def block_pair_semivariance(block_a, block_b, semivariogram_model):
    """
    Function calculates average semivariance between two blocks based on the counts inside the block.
    :param block_a: block A points in the form of array [[point x1A, point y1A, value v1A],
                                                         [point x2A, point y2A, value v2A],
                                                         [...]
                                                         [point xnA, point ynA, value vnA]]
        All coordinates from the array must be placed inside the block!
    :param block_b: block B points in the form of array [[point x1B, point y1B, value v1B],
                                                         [point x2B, point y2B, value v2B],
                                                         [...]
                                                         [point xnB, point ynB, value vnB]]
        All coordinates from the array must be placed inside the block!
    :param semivariogram_model: (TheoreticalSemivariogram) theoretical semivariance model from TheoreticalSemivariance
        class. Model must be fitted and calculated.
    :return semivariance_mean: (float) Average semivariance between blocks divided by points.
    """
    distances_between_points = calc_point_to_point_distance(block_a, block_b).flatten()

    semivariances = []
    for dist in distances_between_points:
        semivariances.append(
            semivariogram_model.predict(dist)
        )

    semivariance_mean = np.sum(semivariances) / len(semivariances)

    return semivariance_mean
Beispiel #2
0
def calculate_semivariance_within_blocks(points_within_area, semivariance_model):
    """
    Function calculates semivariances of points inside all given areal blocks.

    gamma(v, v) = 1/(P * P) * SUM(s->P) SUM(s'->P) gamma(u_s, u_s')
    where:
    gamma(v, v) - average within block semivariance,
    P - number of points used to discretize block v,
    u_s - point u within block v,
    gamma(u_s, u_s') - semivariance between point u_s and u_s' inside block v.

    :param points_within_area: (numpy array / list of lists) [area_id, array of points within area and their values],
    :param semivariance_model: (TheoreticalSemivariogram) Theoretical Semivariogram object,
    :return within_areas_semivariance: (numpy array) [area_id, semivariance]
    """

    within_areas_semivariance = []

    for block in points_within_area:

        # Get area id
        area_id = block[0]

        # Calculate inblock semivariance for a given id
        number_of_points_within_block = len(block[1])  # P
        p = number_of_points_within_block * number_of_points_within_block

        distances_between_points = calc_point_to_point_distance(block[1][:, :-1])

        semivariances = semivariance_model.predict(distances_between_points)

        avg_semivariance = np.sum(semivariances) / p
        within_areas_semivariance.append([area_id, avg_semivariance])

    return within_areas_semivariance
    def test_build_variogram_point_cloud(self):
        my_dir = os.path.dirname(__file__)
        path = os.path.join(my_dir,
                            '../sample_data/poland_dem_gorzow_wielkopolski')

        dataset = read_point_data(path, 'txt')

        # Set semivariance params
        distances = calc_point_to_point_distance(dataset[:, :-1])

        maximum_range = np.max(distances)
        number_of_divisions = 10
        step_size = maximum_range / number_of_divisions

        variogram_cloud = build_variogram_point_cloud(dataset, step_size,
                                                      maximum_range)
        variogram_number_of_points = [2608958, 6525694]

        for idx, k in enumerate(variogram_cloud.keys()):
            points_number = len(variogram_cloud[k])
            self.assertEqual(
                points_number, variogram_number_of_points[idx],
                f'Number of points {points_number} for distance {k} is not'
                f' equal {variogram_number_of_points[idx]}.')
            if idx == 1:
                break
Beispiel #4
0
    def test_calculate_weighted_semivariance(self):
        my_dir = os.path.dirname(__file__)
        path = os.path.join(my_dir, '../sample_data/poland_dem_gorzow_wielkopolski')

        dataset = read_point_data(path, 'txt')
        new_col = np.arange(1, len(dataset) + 1)

        dataset_weights = np.zeros((dataset.shape[0], dataset.shape[1] + 1))
        dataset_weights[:, :-1] = dataset
        dataset_weights[:, -1] = new_col

        # Set semivariance params
        distances = calc_point_to_point_distance(dataset_weights[:, :-2])

        maximum_range = np.max(distances)
        number_of_divisions = 10
        step_size = maximum_range / number_of_divisions

        gamma_w = calculate_weighted_semivariance(dataset_weights, step_size, maximum_range)

        output_int = [105, 290, 459, 536, 515, 456, 449, 503, 469]

        self.assertTrue((gamma_w[:, 1].astype(np.int) == np.array(output_int)).all(),
                        f"Integer part of output should be equal to {output_int}\n"
                        f"but it is equal to {gamma_w[:, 1].astype(np.int)}")
Beispiel #5
0
def build_variogram_point_cloud(data, step_size, max_range):
    """
    Function calculates variogram point cloud of a given set of points for
        a given set of distances. Variogram is calculated as a squared difference
        of each point against other point within range specified by step_size
        parameter. 

    INPUT:

    :param data: (numpy array) coordinates and their values,
    :param step_size: (float) step size of circle within radius are analyzed points,
    :param max_range: (float) maximum range of analysis.

    OUTPUT:

    :return: variogram_cloud - dict with pairs {lag: list of squared differences}.
    """

    distances = calc_point_to_point_distance(data[:, :-1])
    lags = np.arange(step_size, max_range, step_size)
    variogram_cloud = OrderedDict()

    # Calculate squared differences
    # They are halved to be compatibile with semivariogram

    for h in lags:
        distances_in_range = select_values_in_range(distances, h, step_size)
        sem = (data[distances_in_range[0], 2] -
               data[distances_in_range[1], 2])**2
        sem = sem / 2
        variogram_cloud[h] = sem

    return variogram_cloud
Beispiel #6
0
def calculate_covariance(data, step_size, max_range):
    """Function calculates covariance of a given set of points.

        Equation for calculation is:

            covariance = 1 / (N) * SUM(i=1, N) [z(x_i + h) * z(x_i)] - u^2

        where:

            - N - number of observation pairs,
            - h - distance (lag),
            - z(x_i) - value at location z_i,
            - (x_i + h) - location at a distance h from x_i,
            - u -mean of observations at a given lag distance.

        INPUT:

        :param data: (numpy array) coordinates and their values,
        :param step_size: (float) step size of circle within radius are analyzed points,
        :param max_range: (float) maximum range of analysis.

        OUTPUT:

        :return: covariance: numpy array of pair of lag and covariance values where:

            - covariance[0] = array of lags
            - covariance[1] = array of lag's values
            - covariance[2] = array of number of points in each lag.
    """

    distances = calc_point_to_point_distance(data[:, :-1])

    # Get only upper diagonal of distances, rest set to -1
    covar = []
    covariance = []

    lags = np.arange(0, max_range, step_size)

    for idx, h in enumerate(lags):
        distances_in_range = select_values_in_range(distances, h, step_size)
        cov = (data[distances_in_range[0], 2] * data[distances_in_range[1], 2])
        u_mean = (data[distances_in_range[0], 2] +
                  data[distances_in_range[1], 2])
        u_mean = u_mean / (2 * len(u_mean))
        cov_value = np.sum(cov) / (len(cov)) - np.sum(u_mean)**2
        covar.append([cov_value, len(cov)])
        if covar[idx][0] > 0:
            covariance.append([h, covar[idx][0], covar[idx][1]])
        else:
            covariance.append([h, 0, 0])

    covariance = np.vstack(covariance)

    return covariance
def interpolate_raster(data,
                       dim=1000,
                       number_of_neighbors=4,
                       semivariogram_model=None):
    """
    Function interpolates raster from data points using ordinary kriging.

    INPUT:

    :param data: (numpy array / list) [coordinate x, coordinate y, value],
    :param dim: (int) number of pixels (points) of a larger dimension (it could be width or height),
    :param number_of_neighbors: (int) default=16, number of points used to interpolate data,
    :param semivariogram_model: (TheoreticalSemivariance) default=None, Theoretical Semivariogram model,
        if not provided then it is estimated from a given dataset.

    OUTPUT:

    :return: (numpy array) [numpy array of interpolated values, numpy array of interpolation errors,
        [pixel size, min x, max x, min y, max y]]
    """

    # Set dimension

    if isinstance(data, list):
        data = np.array(data)

    cols_coords, rows_coords, props = _set_dims(data[:, 0], data[:, 1], dim)

    # Calculate semivariance if not provided

    if semivariogram_model is None:
        distances = calc_point_to_point_distance(data[:, :-1])

        maximum_range = np.max(distances)
        number_of_divisions = 100
        step_size = maximum_range / number_of_divisions

        semivariance = calculate_semivariance(data, step_size, maximum_range)

        ts = TheoreticalSemivariogram(data, semivariance, False)
        ts.find_optimal_model(False, number_of_neighbors)
    else:
        ts = semivariogram_model

    # Interpolate data point by point

    k = Krige(ts, data)

    kriged_matrix, kriged_errors = update_interpolation_matrix(
        rows_coords, cols_coords, k, number_of_neighbors)

    return [kriged_matrix, kriged_errors], props
    def test_calc_point_to_point_distance(self):
        coords = [(10, -10), (11, -8.9711), (12.2, -13), (11, -10.0)]

        d = calc_point_to_point_distance(coords)

        test_arr = np.array([[0, 1, 3, 1], [1, 0, 4, 1], [3, 4, 0, 3],
                             [1, 1, 3, 0]])

        sum_d = np.sum(d.astype(int))
        sum_test_arr = np.sum(test_arr)
        self.assertEqual(
            sum_d, sum_test_arr,
            "Distances between points are not calculated correctly")
Beispiel #9
0
    def __init__(self, positions, number_of_steps):
        self.data = positions
        self.steps = number_of_steps

        distances = calc_point_to_point_distance(self.data[:, :-1])
        maximum_range = np.max(distances)
        step_size = maximum_range / self.steps

        self.semivariance = calculate_semivariance(self.data, step_size,
                                                   maximum_range)
        self.theoretical_semivariance = TheoreticalSemivariogram(
            self.data, self.semivariance)
        self.theoretical_semivariance.find_optimal_model(
            number_of_ranges=self.steps)
Beispiel #10
0
def calculate_semivariance(data, step_size, max_range):
    """Function calculates semivariance of a given set of points.

        Equation for calculation is:

            semivariance = 1 / (2 * N) * SUM(i=1, N) [z(x_i + h) - z(x_i)]^2

        where:

            - N - number of observation pairs,
            - h - distance (lag),
            - z(x_i) - value at location z_i,
            - (x_i + h) - location at a distance h from x_i.

        INPUT:

        :param data: (numpy array) coordinates and their values,
        :param step_size: (float) step size of circle within radius are analyzed points,
        :param max_range: (float) maximum range of analysis.

        OUTPUT:

        :return: semivariance: numpy array of pair of lag and semivariance values where:

            - semivariance[0] = array of lags,
            - semivariance[1] = array of lag's values,
            - semivariance[2] = array of number of points in each lag.
    """

    distances = calc_point_to_point_distance(data[:, :-1])

    semivariance = []

    lags = np.arange(step_size, max_range, step_size)

    # Calculate semivariances
    for h in lags:
        distances_in_range = select_values_in_range(distances, h, step_size)
        sem = (data[distances_in_range[0], 2] -
               data[distances_in_range[1], 2])**2
        if len(sem) == 0:
            sem_value = 0
        else:
            sem_value = np.sum(sem) / (2 * len(sem))
        semivariance.append([h, sem_value, len(sem)])
    semivariance = np.vstack(semivariance)

    return semivariance
def prepare_kriging_data(unknown_position,
                         data_array,
                         neighbors_range,
                         min_number_of_neighbors=1,
                         max_number_of_neighbors=256):
    """
    Function prepares data for kriging - array of point position, value and distance to an unknown point.

    INPUT:

    :param unknown_position: (numpy array) position of unknown value,
    :param data_array: (numpy array) known positions and their values,
    :param neighbors_range: (float) range within neighbors are included in the prediction,
    :param min_number_of_neighbors: (int) number of the n-closest neighbors used for interpolation if not any neighbor is
        selected within neighbors_range,
    :param max_number_of_neighbors: (int) maximum number of n-closest neighbors used for interpolation if there are too
        many neighbors in range. It speeds up calculations for large datasets.

    OUTPUT:
    :return: (numpy array) dataset with position, value and distance to the unknown point:
        [[x, y, value, distance to unknown position], [...]]
    """

    # Distances to unknown point
    r = np.array([unknown_position])

    known_pos = data_array[:, :-1]
    dists = calc_point_to_point_distance(r, known_pos)

    # Prepare data for kriging
    neighbors_and_dists = np.c_[data_array, dists.T]
    prepared_data = neighbors_and_dists[
        neighbors_and_dists[:, -1] <= neighbors_range, :]

    len_prep = len(prepared_data)

    if len_prep == 0:
        # Sort data
        sorted_neighbors_and_dists = neighbors_and_dists[
            neighbors_and_dists[:, -1].argsort()]
        prepared_data = sorted_neighbors_and_dists[:min_number_of_neighbors]
    elif len_prep > max_number_of_neighbors:
        sorted_neighbors_and_dists = neighbors_and_dists[
            neighbors_and_dists[:, -1].argsort()]
        prepared_data = sorted_neighbors_and_dists[:max_number_of_neighbors]

    return prepared_data
def prepare_distances_list_unknown_area(unknown_area_points):
    """
    Function prepares distances list of unknown (single) area.

    INPUT:

    :param unknown_area_points: [pt x, pt y, val].

    OUTPUT:

    :return: [point value 1, point value 2,  distance between points].
    """
    dists = calc_point_to_point_distance(unknown_area_points[:, :-1])
    vals = unknown_area_points[:, -1]

    merged = _merge_vals_and_distances(vals, vals, dists)
    return np.array(merged)
Beispiel #13
0
    def test_calculate_semivariance(self):
        my_dir = os.path.dirname(__file__)
        path = os.path.join(my_dir, '../sample_data/poland_dem_gorzow_wielkopolski')

        dataset = read_point_data(path, 'txt')

        # Set semivariance params
        distances = calc_point_to_point_distance(dataset[:, :-1])

        maximum_range = np.max(distances)
        number_of_divisions = 10
        step_size = maximum_range / number_of_divisions

        gamma = calculate_semivariance(dataset, step_size, maximum_range)

        output_int = [116, 316, 496, 578, 566, 522, 543, 618, 591]

        self.assertTrue((gamma[:, 1].astype(np.int) == np.array(output_int)).all(),
                        "Integer part of output should be equal to [116, 316, 496, 578, 566, 522, 543, 618, 591]")
def prepare_ata_known_areas(list_of_points_of_known_areas):
    """
    Function prepares known areas data for prediction.

    INPUT:

    :param list_of_points_of_known_areas: (numpy array) list of all areas' points and their values used for the
        prediction.

    OUTPUT:

    :return: (numpy array) list of arrays with areas and distances between them:
        [id base, [id other, [base point value, other point value,  distance between points]]].
    """
    all_distances_list = []
    for pt1 in list_of_points_of_known_areas:

        id_base = pt1[0][0]
        list_of_distances_from_base = [id_base, []]

        points_in_base_area = pt1[0][1][:, :-1]
        vals_in_base_area = pt1[0][1][:, -1]

        for pt2 in list_of_points_of_known_areas:

            id_other = pt2[0][0]
            points_in_other_area = pt2[0][1][:, :-1]
            vals_in_other_area = pt2[0][1][:, -1]

            distances_array = calc_point_to_point_distance(
                points_in_base_area, points_in_other_area)
            merged = _merge_vals_and_distances(vals_in_base_area,
                                               vals_in_other_area,
                                               distances_array)

            list_of_distances_from_base[1].append([id_other, merged])
        all_distances_list.append(list_of_distances_from_base)

    return np.array(all_distances_list)
Beispiel #15
0
    def test_calculate_covariance(self):
        my_dir = os.path.dirname(__file__)
        path = os.path.join(my_dir,
                            '../sample_data/poland_dem_gorzow_wielkopolski')

        dataset = read_point_data(path, 'txt')

        # Set semivariance params
        distances = calc_point_to_point_distance(dataset[:, :-1])

        maximum_range = np.max(distances)
        number_of_divisions = 8
        step_size = maximum_range / number_of_divisions

        gamma = calculate_covariance(dataset, step_size, maximum_range)

        output_int = [490, 324, 41, 0, 0, 0, 0, 0]

        check_equality = (gamma[:, 1].astype(
            np.int) == np.array(output_int)).all()
        self.assertTrue(
            check_equality,
            "Int if output array is not equal to [490, 324, 41,  0,  0,  0,  0,  0]"
        )
    def ordinary_kriging(self,
                         unknown_location,
                         neighbors_range=None,
                         min_no_neighbors=1,
                         max_no_neighbors=256,
                         test_anomalies=True):
        """
        Function predicts value at unknown location with Ordinary Kriging technique.

        INPUT:

        :param unknown_location: (tuple) position of unknown location,
        :param neighbors_range: (float) distance for each neighbors are affecting the interpolated point, if not given
            then it is set to the semivariogram range,
        :param min_no_neighbors: (int) minimum number of neighbors used for interpolation if there is not any neighbor
            within the range specified by neighbors_range,
        :param max_no_neighbors: (int) maximum number of n-closest neighbors used for interpolation if there are too
            many neighbors in range. It speeds up calculations for large datasets.
        :param test_anomalies: (bool) check if weights are negative.

        OUTPUT:

        :return: predicted, error, estimated mean, weights
            [value_in_unknown_location, error, estimated_mean, weights]
        """

        # Check range
        if neighbors_range is None:
            neighbors_range = self.model.range

        prepared_data = prepare_kriging_data(
            unknown_position=unknown_location,
            data_array=self.dataset,
            neighbors_range=neighbors_range,
            min_number_of_neighbors=min_no_neighbors,
            max_number_of_neighbors=max_no_neighbors)
        n = len(prepared_data)
        unknown_distances = prepared_data[:, -1]
        k = self.model.predict(unknown_distances)
        k = k.T
        k_ones = np.ones(1)[0]
        k = np.r_[k, k_ones]

        dists = calc_point_to_point_distance(prepared_data[:, :-2])

        predicted_weights = self.model.predict(dists.ravel())
        predicted = np.array(predicted_weights.reshape(n, n))
        p_ones = np.ones((predicted.shape[0], 1))
        predicted_with_ones_col = np.c_[predicted, p_ones]
        p_ones_row = np.ones((1, predicted_with_ones_col.shape[1]))
        p_ones_row[0][-1] = 0.
        weights = np.r_[predicted_with_ones_col, p_ones_row]

        w = np.linalg.solve(weights, k)
        zhat = prepared_data[:, -2].dot(w[:-1])

        # Test for anomalies
        if test_anomalies:
            if zhat < 0:
                user_input_message = 'Estimated value is below zero and it is: {}. \n'.format(
                    zhat)
                text_error = user_input_message + 'Program is terminated. Try different semivariogram model. ' \
                                                  '(Did you use gaussian model? \
                            If so then try to use other models like linear or exponential) and/or analyze your data \
                            for any clusters which may affect the final estimation'

                raise ValueError(text_error)

        sigma = np.matmul(w.T, k)
        return zhat, sigma, w[-1], w
Beispiel #17
0
    def predict(self,
                unknown_area,
                unknown_area_points,
                number_of_neighbours,
                max_search_radius,
                weighted,
                test_anomalies=True):
        """
        Function predicts areal value in a unknown location based on the centroid-based Poisson Kriging.

        INPUT:

        :param unknown_area: (numpy array) unknown area data in the form:
            [area_id, polygon, centroid x, centroid y]
        :param unknown_area_points: (numpy array) points within an unknown area in the form:
            [area_id, [point_position_x, point_position_y, value]]
        :param number_of_neighbours: (int) minimum number of neighbours to include in the algorithm,
        :param max_search_radius: (float) maximum search radius (if number of neighbours within this search radius is
            smaller than number_of_neighbours parameter then additional neighbours are included up to
            the number_of_neighbours).
        :param weighted: (bool) distances weighted by population (True) or not (False),
        :param test_anomalies: (bool) check if weights are negative.

        OUTPUT:

        :return: prediction, error, estimated mean, weights:
            [value_in_unknown_area, error, estimated_mean, weights]
        """

        self.prepared_data = prepare_poisson_kriging_data(
            unknown_area=unknown_area,
            points_within_unknown_area=unknown_area_points,
            known_areas=self.known_areas,
            points_within_known_areas=self.known_areas_points,
            number_of_neighbours=number_of_neighbours,
            max_search_radius=max_search_radius,
            weighted=weighted
        )  # [id (known), coo_x, coo_y, val, dist_to_unknown, total population]

        n = self.prepared_data.shape[0]
        unknown_distances = self.prepared_data[:, -2]
        k = self.model.predict(
            unknown_distances)  # predicted values from distances to unknown
        k = k.T
        k_ones = np.ones(1)[0]
        k = np.r_[k, k_ones]

        data_for_distance = self.prepared_data[:, 1:3]
        dists = calc_point_to_point_distance(data_for_distance)

        predicted_weights = self.model.predict(dists.ravel())
        predicted = np.array(predicted_weights.reshape(n, n))

        # Add weights to predicted values (diagonal)

        weights_mtx = self.calculate_weight_arr()
        predicted = predicted + weights_mtx

        # Prepare weights matrix

        p_ones = np.ones((predicted.shape[0], 1))
        predicted_with_ones_col = np.c_[predicted, p_ones]
        p_ones_row = np.ones((1, predicted_with_ones_col.shape[1]))
        p_ones_row[0][-1] = 0.
        weights = np.r_[predicted_with_ones_col, p_ones_row]

        # Solve Kriging system
        try:
            w = np.linalg.solve(weights, k)
        except TypeError:
            weights = weights.astype(np.float)
            k = k.astype(np.float)
            w = np.linalg.solve(weights, k)

        zhat = self.prepared_data[:, 3].dot(w[:-1])

        # Test for anomalies
        if test_anomalies:
            if zhat < 0:
                user_input_message = 'Estimated value is below zero and it is: {}. \n'.format(
                    zhat)
                text_error = user_input_message + 'Program is terminated. Try different semivariogram model. ' \
                                                  '(Did you use gaussian model? \
                            If so then try to use other models like linear or exponential) and/or analyze your data \
                            for any clusters which may affect the final estimation'

                raise ValueError(text_error)

        sigmasq = (w.T * k)[0]
        if sigmasq < 0:
            sigma = 0
        else:
            sigma = np.sqrt(sigmasq)
        return zhat, sigma, w[-1], w
Beispiel #18
0
def inverse_distance_weighting(known_points, unknown_location, number_of_neighbours=-1, power=2.):
    """
    Function performs Inverse Distance Weighting with a given set of points and an unknown location.

    INPUT:

    :param known_points: (numpy array) [x, y, value],
    :param unknown_location: (numpy array) [[ux, uy]],
    :param number_of_neighbours: (int) default=-1 which means that all known points will be used to estimate value at
        the unknown location. Can be any number within the limits [2, length(known_points)],
    :param power: (float) default=2, power value must be larger or equal to 0, controls weight assigned to each known
        point.

    OUTPUT:

    :return: (float) value at unknown location.
    """

    # Test unknown points
    try:
        _ = unknown_location.shape[1]
    except IndexError:
        unknown_location = np.expand_dims(unknown_location, axis=0)

    # Test power

    if power < 0:
        raise ValueError('Power cannot be smaller than 0')

    # Test number of neighbours

    if number_of_neighbours == -1:
        number_of_closest = len(known_points)
    elif (number_of_neighbours >= 2) and (number_of_neighbours <= len(known_points)):
        number_of_closest = number_of_neighbours
    else:
        raise ValueError(f'Number of closest neighbors must be between 2 and the number of known points '
                         f'({len(known_points)}) and {number_of_neighbours} neighbours were given instead.')

    # Calculate distances

    distances = calc_point_to_point_distance(unknown_location, known_points[:, :-1])

    # Check if any distance is equal to 0
    if not np.all(distances[0]):

        zer_pos = np.where(distances == 0)
        unkn_value = known_points[zer_pos[1], -1][0]
        return unkn_value

    # Get n closest neighbours...

    sdists = distances.argsort()
    sdists = sdists[0, :number_of_closest]
    dists = distances[0, sdists]
    values = known_points[sdists].copy()
    values = values[:, -1]
    weights = 1 / dists**power

    unkn_value = np.sum(weights * values) / np.sum(weights)
    return unkn_value
def prepare_poisson_kriging_data(unknown_area,
                                 points_within_unknown_area,
                                 known_areas,
                                 points_within_known_areas,
                                 number_of_neighbours,
                                 max_search_radius,
                                 weighted=False):
    """
    Function prepares data for centroid based Poisson Kriging.

    INPUT:

    :param unknown_area: (numpy array) unknown area in the form:
        [area_id, polygon, centroid x, centroid y],
    :param points_within_unknown_area: (numpy array) points and their values within the given area:
        [area_id, [point_position_x, point_position_y, value]],
    :param known_areas: (numpy array) known areas in the form:
        [area_id, polygon, centroid x, centroid y, aggregated value],
    :param points_within_known_areas: (numpy array) points and their values within the given area:
        [area_id, [point_position_x, point_position_y, value]],
    :param number_of_neighbours: (int) minimum number of neighbours to include in the algorithm,
    :param max_search_radius: (float) maximum search radius (if number of neighbours within this search radius is
        smaller than number_of_neighbours parameter then additional neighbours are included up to number of neighbors),
    :param weighted: (bool) distances weighted by population (True) or not (False).

    OUTPUT:

    :return: (numpy array) distances from known locations to the unknown location:
        [id_known, coordinate x, coordinate y, value, distance to unknown, aggregated points values within an area].
    """

    # Prepare data
    cx_cy = unknown_area[2:-1]
    r = np.array(cx_cy)

    known_centroids = known_areas.copy()
    kc_ids = known_centroids[:, 0]
    kc_vals = known_centroids[:, -1]
    kc_pos = known_centroids[:, 2:-1]

    # Build set for Poisson Kriging

    if weighted:
        known_areas_pts = points_within_known_areas.copy()

        dists = []  # [id_known, dist]

        for pt in known_areas_pts:
            d = calc_block_to_block_distance([pt, points_within_unknown_area])
            dists.append([d[0][0][1]])
        s = np.ravel(np.array(dists)).T
        kriging_data = np.c_[kc_ids, kc_pos, kc_vals,
                             s]  # [id, coo_x, coo_y, val, dist_to_unkn]
    else:
        dists = calc_point_to_point_distance(kc_pos, [r])
        dists = dists.ravel()
        s = dists.T
        kriging_data = np.c_[kc_ids, kc_pos, kc_vals,
                             s]  # [id, coo_x, coo_y, val, dist_to_unkn]

    # sort by distance
    kriging_data = kriging_data[kriging_data[:, -1].argsort()]

    # Get distances in max search radius
    max_search_pos = np.argmax(kriging_data[:, -1] > max_search_radius)
    output_data = kriging_data[:max_search_pos]

    # check number of observations

    if len(output_data) < number_of_neighbours:
        output_data = kriging_data[:number_of_neighbours]

    # get total points' value in each id from prepared datasets and append it to the array

    points_vals = []
    for rec in output_data:
        areal_id = rec[0]
        points_in_area = points_within_known_areas[
            points_within_known_areas[:, 0] == areal_id]
        total_val = get_total_value_of_area(points_in_area[0])
        points_vals.append(total_val)

    output_data = np.c_[output_data, np.array(points_vals)]
    return output_data
def prepare_atp_data(points_within_unknown_area, known_areas,
                     points_within_known_areas, number_of_neighbours,
                     max_search_radius):
    """
    Function prepares data for Area to Point Poisson Kriging.

    INPUT:

    :param points_within_unknown_area: (numpy array) points and their values within the given area:
        [area_id, [point_position_x, point_position_y, value of point]],
    :param known_areas: (numpy array) known areas in the form:
        [area_id, areal_polygon, centroid coordinate x, centroid coordinate y, value at specific location],
    :param points_within_known_areas: (numpy array) points and their values within the given area:
        [[area_id, [point_position_x, point_position_y, value of point]], ...],
    :param number_of_neighbours: (int) minimum number of neighbours to include in the algorithm,
    :param max_search_radius: (float) maximum search radius (if number of neighbours within this search radius is
        smaller than number_of_neighbours parameter then additional neighbours are included up to number of neighbors).

    OUTPUT:

    :return output_data: (numpy array) distances from known locations to the unknown location:
        [
            id_known,
            areal value - count,
            [
                unknown point value,
                [known point values, distance],
            total point value count
            ],
            [array of unknown area points coordinates]
        ]
    """

    # Initialize set

    kriging_areas_ids = known_areas[:, 0]
    kriging_areal_values = known_areas[:, -1]

    # Build set for Area to Area Poisson Kriging - sort areas with distance

    known_areas_pts = points_within_known_areas.copy()

    dists = []  # [id_known, dist to unknown]

    for pt in known_areas_pts:
        d = calc_block_to_block_distance([pt, points_within_unknown_area])
        dists.append([d[0][0][1]])
    s = np.ravel(np.array(dists)).T
    kriging_data = np.c_[kriging_areas_ids, kriging_areal_values,
                         s]  # [id, areal val, dist_to_unkn]

    # sort by distance
    kriging_data = kriging_data[kriging_data[:, -1].argsort()]

    # Get distances in max search radius
    max_search_pos = np.argmax(kriging_data[:, -1] > max_search_radius)
    output_data = kriging_data[:max_search_pos]

    # check number of observations

    if len(output_data) < number_of_neighbours:
        output_data = kriging_data[:number_of_neighbours]

    # for each of prepared id prepare distances list with points' weights for semivariogram calculation

    points_vals = []
    points_and_vals_in_unknown_area = points_within_unknown_area[1]
    for rec in output_data:
        areal_id = rec[0]
        areal_value = rec[1]
        known_area = points_within_known_areas[points_within_known_areas[:, 0]
                                               == areal_id]
        known_area = known_area[0]
        points_in_known_area = known_area[1][:, :-1]
        vals_in_known_area = known_area[1][:, -1]

        # Set distances array from each point of unknown area
        merged_points_array = []
        for u_point in points_and_vals_in_unknown_area:
            u_point_dists = calc_point_to_point_distance(
                points_in_known_area, [u_point[:-1]])
            u_point_val = u_point[-1]
            merged = _merge_point_val_and_distances(u_point_val,
                                                    vals_in_known_area,
                                                    u_point_dists)
            merged_points_array.append(merged)

        total_val = np.sum(known_area[1][:, 2])

        # generated_array =
        # [[id, value, [
        # [unknown point value,
        #     [known points values,
        #      distances between points]],
        # ...],
        #  total known points value],
        # [list of uknown point coords]]

        generated_array = [
            areal_id, areal_value, merged_points_array, total_val
        ]
        points_vals.append(generated_array)

    output_data = np.array(points_vals)
    return [output_data, points_within_unknown_area[1][:, :-1]]
Beispiel #21
0
def calculate_weighted_semivariance(data, step_size, max_range):
    """Function calculates weighted semivariance following Monestiez et al.:

        A. Monestiez P, Dubroca L, Bonnin E, Durbec JP, Guinet C: Comparison of model based geostatistical methods
        in ecology: application to fin whale spatial distribution in northwestern Mediterranean Sea.
        In Geostatistics Banff 2004 Volume 2. Edited by: Leuangthong O, Deutsch CV. Dordrecht, The Netherlands,
        Kluwer Academic Publishers; 2005:777-786.

        B. Monestiez P, Dubroca L, Bonnin E, Durbec JP, Guinet C: Geostatistical modelling of spatial distribution
        of Balenoptera physalus in the northwestern Mediterranean Sea from sparse count data and heterogeneous
        observation efforts. Ecological Modelling 2006 in press.

        Equation for calculation is:

        s(h) = [1 / 2 * SUM|a=1, N(h)|: w(0)] ...
                    * [SUM|a=1, N(h)|: (w(0) * [(z(u_a) - z(u_a + h))^2] - m']

        where:

        w(0) = (n(u_a) * n(u_a + h)) / ((n(u_a) + n(u_a + h))

        - s(h) - Semivariogram of the risk,
        - n(u_a) - size of the population at risk in the unit a,
        - z(u_a) - mortality rate at the unit a,
        - u_a + h - area at the distance (h) from the analyzed area,
        - m' - population weighted mean of rates.

        INPUT:

        :param data: (numpy array) coordinates and their values and weights:
            [coordinate x, coordinate y, value, weight],
        :param step_size: (float) step size of circle within radius are analyzed points,
        :param max_range: (float) maximum range of analysis.


        OUTPUT:

        :return: semivariance: numpy array of pair of lag and semivariance values where:

            - semivariance[0] = array of lags
            - semivariance[1] = array of lag's values
            - semivariance[2] = array of number of points in each lag.
    """

    # TEST: test if any 0-weight is inside the dataset

    _test_weights(data[:, -1])

    # Calculate distance

    distances = calc_point_to_point_distance(data[:, :-2])

    # Prepare semivariance arrays
    semivariance = []

    lags = np.arange(step_size, max_range, step_size)

    # Calculate semivariances
    for idx, h in enumerate(lags):
        distances_in_range = select_values_in_range(distances, h, step_size)

        # Any distance in a range?
        if len(distances_in_range[0]) > 0:
            # Weights
            ws_a = data[distances_in_range[0], 3]
            ws_ah = data[distances_in_range[1], 3]

            weights = (ws_a * ws_ah) / (ws_a + ws_ah)

            # Values
            z_a = data[distances_in_range[0], 2]
            z_ah = data[distances_in_range[1], 2]
            z_err = (z_a - z_ah)**2

            # m'
            _vals = data[distances_in_range[0], 2]
            _weights = data[distances_in_range[0], 3]
            mweighted_mean = np.average(_vals, weights=_weights)

            # numerator: w(0) * [(z(u_a) - z(u_a + h))^2] - m'
            numerator = weights * z_err - mweighted_mean

            # semivariance
            sem_value = 0.5 * (np.sum(numerator) / np.sum(weights))
            semivariance.append([h, sem_value, len(numerator)])
        else:
            semivariance.append([h, 0, 0])

    semivariance = np.vstack(semivariance)
    return semivariance