Esempio n. 1
0
def test_check_distances_and_betas():
    betas = np.array([-0.02, -0.01, -0.005, -0.0025, -0.0])
    distances = np.array(networks.distance_from_beta(betas))

    # zero length arrays
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(np.array([]), betas)
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(distances, np.array([]))
    # mismatching array lengths
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(np.array(distances[:-1]), betas)
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(distances, betas[:-1])
    # check that duplicates are caught
    dup_betas = np.array([-0.02, -0.02])
    dup_distances = np.array(networks.distance_from_beta(dup_betas))
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(dup_distances, dup_betas)
    # positive values of beta
    betas_pos = betas.copy()
    betas_pos[0] = 4
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(distances, betas_pos)
    # negative values of distance
    distances_neg = distances.copy()
    distances_neg[0] = -100
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(distances_neg, betas)
    # inconsistent distances <-> betas
    betas[1] = -0.03
    with pytest.raises(ValueError):
        checks.check_distances_and_betas(distances, betas)
Esempio n. 2
0
def local_aggregator(
    node_data: np.ndarray,
    edge_data: np.ndarray,
    node_edge_map: Dict,
    data_map: np.ndarray,
    distances: np.ndarray,
    betas: np.ndarray,
    landuse_encodings: np.ndarray = np.array([]),
    qs: np.ndarray = np.array([]),
    mixed_use_hill_keys: np.ndarray = np.array([]),
    mixed_use_other_keys: np.ndarray = np.array([]),
    accessibility_keys: np.ndarray = np.array([]),
    cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)),
    numerical_arrays: np.ndarray = np.array(np.full((0, 0), np.nan)),
    angular: bool = False,
    suppress_progress: bool = False
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray, np.ndarray]:
    '''
    NODE MAP:
    0 - x
    1 - y
    2 - live
    3 - ghosted
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    '''
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_data_map(
        data_map, check_assigned=True
    )  # raises ValueError data points are not assigned to a network
    checks.check_distances_and_betas(distances, betas)

    # check landuse encodings
    compute_landuses = False
    if len(landuse_encodings) == 0:
        if len(mixed_use_hill_keys) != 0 or len(
                mixed_use_other_keys) != 0 or len(accessibility_keys) != 0:
            raise ValueError(
                'Mixed use metrics or land-use accessibilities require an array of landuse labels.'
            )
    elif len(landuse_encodings) != len(data_map):
        raise ValueError(
            'The number of landuse encodings does not match the number of data points.'
        )
    else:
        checks.check_categorical_data(landuse_encodings)

    # catch completely missing metrics
    if len(mixed_use_hill_keys) == 0 and len(
            mixed_use_other_keys) == 0 and len(accessibility_keys) == 0:
        if len(numerical_arrays) == 0:
            raise ValueError(
                'No metrics specified, please specify at least one metric to compute.'
            )
    else:
        compute_landuses = True

    # catch missing qs
    if len(mixed_use_hill_keys) != 0 and len(qs) == 0:
        raise ValueError(
            'Hill diversity measures require that at least one value of q is specified.'
        )

    # negative qs caught by hill diversity methods

    # check various problematic key combinations
    if len(mixed_use_hill_keys) != 0:
        if (mixed_use_hill_keys.min() < 0 or mixed_use_hill_keys.max() > 3):
            raise ValueError('Mixed-use "hill" keys out of range of 0:4.')

    if len(mixed_use_other_keys) != 0:
        if (mixed_use_other_keys.min() < 0 or mixed_use_other_keys.max() > 2):
            raise ValueError('Mixed-use "other" keys out of range of 0:3.')

    if len(accessibility_keys) != 0:
        max_ac_key = landuse_encodings.max()
        if (accessibility_keys.min() < 0
                or accessibility_keys.max() > max_ac_key):
            raise ValueError(
                'Negative or out of range accessibility key encountered. Keys must match class encodings.'
            )

    for i in range(len(mixed_use_hill_keys)):
        for j in range(len(mixed_use_hill_keys)):
            if j > i:
                i_key = mixed_use_hill_keys[i]
                j_key = mixed_use_hill_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "hill" key.')

    for i in range(len(mixed_use_other_keys)):
        for j in range(len(mixed_use_other_keys)):
            if j > i:
                i_key = mixed_use_other_keys[i]
                j_key = mixed_use_other_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "other" key.')

    for i in range(len(accessibility_keys)):
        for j in range(len(accessibility_keys)):
            if j > i:
                i_key = accessibility_keys[i]
                j_key = accessibility_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate accessibility key.')

    def disp_check(disp_matrix):
        # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions
        if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[
                1]:
            raise ValueError(
                'The disparity matrix must be a square NxN matrix.')
        if len(disp_matrix) == 0:
            raise ValueError(
                'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.'
            )

    # check that missing or malformed disparity weights matrices are caught
    for k in mixed_use_hill_keys:
        if k == 3:  # hill disparity
            disp_check(cl_disparity_wt_matrix)
    for k in mixed_use_other_keys:
        if k == 2:  # raos pairwise
            disp_check(cl_disparity_wt_matrix)

    compute_numerical = False
    # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan))
    if len(numerical_arrays) != 0:
        compute_numerical = True
        if numerical_arrays.shape[1] != len(data_map):
            raise ValueError(
                'The length of the numerical data arrays do not match the length of the data map.'
            )
        checks.check_numerical_data(numerical_arrays)

    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    q_n = len(qs)
    n_n = len(numerical_arrays)
    global_max_dist = distances.max()
    netw_nodes_live = node_data[:, 2]

    # setup data structures
    # hill mixed uses are structured separately to take values of q into account
    mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), np.nan)  # 4 dim
    mixed_use_other_data = np.full((3, d_n, netw_n), np.nan)  # 3 dim

    accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0)
    accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n),
                                    0.0)

    # stats
    stats_sum = np.full((n_n, d_n, netw_n), np.nan)
    stats_sum_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_mean = np.full((n_n, d_n, netw_n), np.nan)
    stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_count = np.full(
        (n_n, d_n, netw_n),
        np.nan)  # use np.nan instead of 0 to avoid division by zero issues
    stats_count_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_variance = np.full((n_n, d_n, netw_n), np.nan)
    stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan)

    stats_max = np.full((n_n, d_n, netw_n), np.nan)
    stats_min = np.full((n_n, d_n, netw_n), np.nan)

    # iterate through each vert and aggregate
    steps = int(netw_n / 10000)
    for netw_src_idx in range(netw_n):
        if not suppress_progress:
            checks.progress_bar(netw_src_idx, netw_n, steps)
        # only compute for live nodes
        if not netw_nodes_live[netw_src_idx]:
            continue
        # generate the reachable classes and their respective distances
        # these are non-unique - i.e. simply the class of each data point within the maximum distance
        # the aggregate_to_src_idx method will choose the closer direction of approach to a data point
        # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method)
        reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx(
            netw_src_idx, node_data, edge_data, node_edge_map, data_map,
            global_max_dist, angular)
        # LANDUSES
        if compute_landuses:
            mu_max_unique_cl = int(landuse_encodings.max() + 1)
            # counts of each class type (array length per max unique classes - not just those within max distance)
            classes_counts = np.full((d_n, mu_max_unique_cl), 0)
            # nearest of each class type (likewise)
            classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf)
            # iterate the reachable indices and related distances
            for data_idx, (reachable, data_dist) in enumerate(
                    zip(reachable_data, reachable_data_dist)):
                if not reachable:
                    continue
                # get the class category in integer form
                # all class codes were encoded to sequential integers - these correspond to the array indices
                cl_code = int(landuse_encodings[int(data_idx)])
                # iterate the distance dimensions
                for d_idx, (d, b) in enumerate(zip(distances, betas)):
                    # increment class counts at respective distances if the distance is less than current d
                    if data_dist <= d:
                        classes_counts[d_idx, cl_code] += 1
                        # if distance is nearer, update the nearest distance array too
                        if data_dist < classes_nearest[d_idx, cl_code]:
                            classes_nearest[d_idx, cl_code] = data_dist
                        # if within distance, and if in accessibility keys, then aggregate accessibility too
                        for ac_idx, ac_code in enumerate(accessibility_keys):
                            if ac_code == cl_code:
                                accessibility_data[ac_idx, d_idx,
                                                   netw_src_idx] += 1
                                accessibility_data_wt[ac_idx, d_idx,
                                                      netw_src_idx] += np.exp(
                                                          b * data_dist)
                                # if a match was found, then no need to check others
                                break
            # mixed uses can be calculated now that the local class counts are aggregated
            # iterate the distances and betas
            for d_idx, b in enumerate(betas):
                cl_counts = classes_counts[d_idx]
                cl_nearest = classes_nearest[d_idx]
                # mu keys determine which metrics to compute
                # don't confuse with indices
                # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys
                for mu_hill_key in mixed_use_hill_keys:
                    for q_idx, q_key in enumerate(qs):
                        if mu_hill_key == 0:
                            mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity(cl_counts, q_key)
                        elif mu_hill_key == 1:
                            mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                        elif mu_hill_key == 2:
                            mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                        # land-use classification disparity hill diversity
                        # the wt matrix can be used without mapping because cl_counts is based on all classes
                        # regardless of whether they are reachable
                        elif mu_hill_key == 3:
                            mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \
                                diversity.hill_diversity_pairwise_matrix_wt(cl_counts,
                                                                            wt_matrix=cl_disparity_wt_matrix,
                                                                            q=q_key)
                for mu_other_key in mixed_use_other_keys:
                    if mu_other_key == 0:
                        mixed_use_other_data[0, d_idx, netw_src_idx] = \
                            diversity.shannon_diversity(cl_counts)
                    elif mu_other_key == 1:
                        mixed_use_other_data[1, d_idx, netw_src_idx] = \
                            diversity.gini_simpson_diversity(cl_counts)
                    elif mu_other_key == 2:
                        mixed_use_other_data[2, d_idx, netw_src_idx] = \
                            diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix)
        # IDW
        # the order of the loops matters because the nested aggregations happen per distance per numerical array
        if compute_numerical:
            # iterate the reachable indices and related distances
            for data_idx, (reachable, data_dist) in enumerate(
                    zip(reachable_data, reachable_data_dist)):
                # some indices will be NaN if beyond max threshold distance - so check for infinity
                # this happens when within radial max distance, but beyond network max distance
                if not reachable:
                    continue
                # iterate the numerical arrays dimension
                for num_idx in range(n_n):
                    # some values will be NaN
                    num = numerical_arrays[num_idx, int(data_idx)]
                    if np.isnan(num):
                        continue
                    # iterate the distance dimensions
                    for d_idx, (d, b) in enumerate(zip(distances, betas)):
                        # increment mean aggregations at respective distances if the distance is less than current d
                        if data_dist <= d:
                            # aggregate
                            if np.isnan(stats_sum[num_idx, d_idx,
                                                  netw_src_idx]):
                                stats_sum[num_idx, d_idx, netw_src_idx] = num
                                stats_count[num_idx, d_idx, netw_src_idx] = 1
                                stats_sum_wt[num_idx, d_idx,
                                             netw_src_idx] = num * np.exp(
                                                 data_dist * b)
                                stats_count_wt[num_idx, d_idx,
                                               netw_src_idx] = np.exp(
                                                   data_dist * b)
                            else:
                                stats_sum[num_idx, d_idx, netw_src_idx] += num
                                stats_count[num_idx, d_idx, netw_src_idx] += 1
                                stats_sum_wt[num_idx, d_idx,
                                             netw_src_idx] += num * np.exp(
                                                 data_dist * b)
                                stats_count_wt[num_idx, d_idx,
                                               netw_src_idx] += np.exp(
                                                   data_dist * b)

                            if np.isnan(stats_max[num_idx, d_idx,
                                                  netw_src_idx]):
                                stats_max[num_idx, d_idx, netw_src_idx] = num
                            elif num > stats_max[num_idx, d_idx, netw_src_idx]:
                                stats_max[num_idx, d_idx, netw_src_idx] = num

                            if np.isnan(stats_min[num_idx, d_idx,
                                                  netw_src_idx]):
                                stats_min[num_idx, d_idx, netw_src_idx] = num
                            elif num < stats_min[num_idx, d_idx, netw_src_idx]:
                                stats_min[num_idx, d_idx, netw_src_idx] = num
            # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast
            for num_idx in range(n_n):
                for d_idx in range(d_n):
                    stats_mean[num_idx, d_idx, netw_src_idx] = \
                        stats_sum[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx]
                    stats_mean_wt[num_idx, d_idx, netw_src_idx] = \
                        stats_sum_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx]
            # calculate variances - counts are already computed per above
            # weighted version is IDW by division through equivalently weighted counts above
            # iterate the reachable indices and related distances
            for data_idx, (reachable, data_dist) in enumerate(
                    zip(reachable_data, reachable_data_dist)):
                # some indices will be NaN if beyond max threshold distance - so check for infinity
                # this happens when within radial max distance, but beyond network max distance
                if not reachable:
                    continue
                # iterate the numerical arrays dimension
                for num_idx in range(n_n):
                    # some values will be NaN
                    num = numerical_arrays[num_idx, int(data_idx)]
                    if np.isnan(num):
                        continue
                    # iterate the distance dimensions
                    for d_idx, (d, b) in enumerate(zip(distances, betas)):
                        # increment variance aggregations at respective distances if the distance is less than current d
                        if data_dist <= d:
                            # aggregate
                            if np.isnan(stats_variance[num_idx, d_idx,
                                                       netw_src_idx]):
                                stats_variance[num_idx, d_idx, netw_src_idx] = \
                                    np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                                stats_variance_wt[num_idx, d_idx, netw_src_idx] = \
                                    np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b)
                            else:
                                stats_variance[num_idx, d_idx, netw_src_idx] += \
                                    np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                                stats_variance_wt[num_idx, d_idx, netw_src_idx] += \
                                    np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b)
            # finalise variance calculations
            for num_idx in range(n_n):
                for d_idx in range(d_n):
                    stats_variance[num_idx, d_idx, netw_src_idx] = \
                        stats_variance[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx]
                    stats_variance_wt[num_idx, d_idx, netw_src_idx] = \
                        stats_variance_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx]
    # send the data back in the same types and same order as the original keys - convert to int for indexing
    mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0)
    for i, k in enumerate(mixed_use_hill_keys):
        mu_hill_k_int[i] = k
    mu_other_k_int = np.full(len(mixed_use_other_keys), 0)
    for i, k in enumerate(mixed_use_other_keys):
        mu_other_k_int[i] = k

    return mixed_use_hill_data[mu_hill_k_int], \
           mixed_use_other_data[mu_other_k_int], \
           accessibility_data, accessibility_data_wt, \
           stats_sum, stats_sum_wt, \
           stats_mean, stats_mean_wt, \
           stats_variance, stats_variance_wt, \
           stats_max, stats_min
Esempio n. 3
0
def singly_constrained(
        node_data: np.ndarray,
        edge_data: np.ndarray,
        node_edge_map: Dict,
        distances: np.ndarray,
        betas: np.ndarray,
        i_data_map: np.ndarray,
        j_data_map: np.ndarray,
        i_weights: np.ndarray,
        j_weights: np.ndarray,
        angular: bool = False,
        suppress_progress: bool = False) -> Tuple[np.ndarray, np.ndarray]:
    '''
    - Calculates trips from i to j and returns the assigned trips and network assigned flows for j nodes
    #TODO: consider enhanced numerical checks for single vs. multi dimensional numerical data

    - Keeping separate from local aggregator because singly-constrained origin / destination models computed separately
    - Requires two iters, one to gather all k-nodes to per j node, then another to get the ratio of j / k attractiveness
    - Assigns j -> k trips over the network as part of second iter
    NODE MAP:
    0 - x
    1 - y
    2 - live
    3 - ghosted
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - entry bearing
    6 - exit bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    '''
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_distances_and_betas(distances, betas)
    checks.check_data_map(i_data_map, check_assigned=True)
    checks.check_data_map(j_data_map, check_assigned=True)

    if len(i_weights) != len(i_data_map):
        raise ValueError(
            'The i_weights array must be the same length as the i_data_map.')

    if len(j_weights) != len(j_data_map):
        raise ValueError(
            'The j_weights array must be the same length as the j_data_map.')

    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    global_max_dist = np.max(distances)
    netw_flows = np.full((d_n, netw_n), 0.0)

    i_n = len(i_data_map)
    k_agg = np.full((d_n, i_n), 0.0)

    j_n = len(j_data_map)
    j_assigned = np.full((d_n, j_n), 0.0)

    # iterate all i nodes
    # filter all reachable nodes k and aggregate k attractiveness * negative exponential of distance
    steps = int(i_n / 10000)
    for i_idx in range(i_n):
        if not suppress_progress:
            checks.progress_bar(i_idx, i_n, steps)
        # get the nearest node
        i_assigned_netw_idx = int(i_data_map[i_idx, 2])
        # calculate the base distance from the data point to the nearest assigned node
        i_x, i_y = i_data_map[i_idx, :2]
        n_x, n_y = node_data[i_assigned_netw_idx, :2]
        i_door_dist = np.hypot(i_x - n_x, i_y - n_y)

        # find the reachable j data points and their respective points from the closest node
        reachable_j, reachable_j_dist, tree_preds = aggregate_to_src_idx(
            i_assigned_netw_idx, node_data, edge_data, node_edge_map,
            j_data_map, global_max_dist, angular)

        # aggregate the weighted j (all k) nodes
        # iterate the reachable indices and related distances
        for j_idx, (j_reachable,
                    j_dist) in enumerate(zip(reachable_j, reachable_j_dist)):
            if not j_reachable:
                continue
            # iterate the distance dimensions
            for d_idx, (d, b) in enumerate(zip(distances, betas)):
                total_dist = j_dist + i_door_dist
                # increment weighted k aggregations at respective distances if the distance is less than current d
                if total_dist <= d:
                    k_agg[d_idx,
                          i_idx] += j_weights[j_idx] * np.exp(total_dist * b)

    # this is the second step
    # this time, filter all reachable j vertices and aggregate the proportion of flow from i to j
    # this is done by dividing i-j flow through i-k_agg flow from previous step
    steps = int(i_n / 10000)
    for i_idx in range(i_n):
        if not suppress_progress:
            checks.progress_bar(i_idx, i_n, steps)

        # get the nearest node
        i_assigned_netw_idx = int(i_data_map[i_idx, 2])
        # calculate the base distance from the data point to the nearest assigned node
        i_x, i_y = i_data_map[i_idx, :2]
        n_x, n_y = node_data[i_assigned_netw_idx, :2]
        i_door_dist = np.hypot(i_x - n_x, i_y - n_y)

        # find the reachable j data points and their respective points from the closest node
        reachable_j, reachable_j_dist, tree_preds = aggregate_to_src_idx(
            i_assigned_netw_idx, node_data, edge_data, node_edge_map,
            j_data_map, global_max_dist, angular)

        # aggregate j divided through all k nodes
        # iterate the reachable indices and related distances
        for j_idx, (j_reachable,
                    j_dist) in enumerate(zip(reachable_j, reachable_j_dist)):
            if not j_reachable:
                continue
            # iterate the distance dimensions
            for d_idx, (d, b) in enumerate(zip(distances, betas)):
                total_dist = j_dist + i_door_dist
                # if the distance is less than current d
                if total_dist <= d:
                    # aggregate all flows from reachable j's to i_idx
                    # divide through respective i-k_agg sums
                    # catch division by zero:
                    if k_agg[d_idx, i_idx] == 0:
                        assigned = 0
                    else:
                        assigned = i_weights[i_idx] * j_weights[j_idx] * np.exp(
                            total_dist * b) / k_agg[d_idx, i_idx]
                    j_assigned[d_idx, j_idx] += assigned
                    # assign trips to network
                    if assigned != 0:
                        # get the j assigned node
                        j_assigned_netw_idx = int(j_data_map[j_idx, 2])
                        # in this case start and end nodes are counted...!
                        netw_flows[d_idx, j_assigned_netw_idx] += assigned
                        # skip if same start / end node
                        if j_assigned_netw_idx == i_assigned_netw_idx:
                            continue
                        # aggregate to the network
                        inter_idx = np.int(tree_preds[j_assigned_netw_idx])
                        while True:
                            # end nodes counted, so place above break
                            netw_flows[d_idx, inter_idx] += assigned
                            # break out of while loop if the intermediary has reached the source node
                            if inter_idx == i_assigned_netw_idx:
                                break
                            # follow the chain
                            inter_idx = np.int(tree_preds[inter_idx])

    return j_assigned, netw_flows
Esempio n. 4
0
def local_segment_centrality(node_data: np.ndarray,
                             edge_data: np.ndarray,
                             node_edge_map: Dict,
                             distances: np.ndarray,
                             betas: np.ndarray,
                             measure_keys: tuple,
                             jitter_scale: float = 0.0,
                             angular: bool = False,
                             progress_proxy=None) -> np.ndarray:
    # integrity checks
    checks.check_distances_and_betas(distances, betas)
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    # gather functions
    close_funcs = List.empty_list(segment_func_proto)
    close_idxs = []
    betw_idxs = []
    for m_idx, m_key in enumerate(measure_keys):
        if not angular:
            # segment keys
            if m_key == 'segment_density':
                close_funcs.append(segment_density)
                close_idxs.append(m_idx)
            elif m_key == 'segment_harmonic':
                close_funcs.append(segment_harmonic)
                close_idxs.append(m_idx)
            elif m_key == 'segment_beta':
                close_funcs.append(segment_beta)
                close_idxs.append(m_idx)
            elif m_key == 'segment_betweenness':
                # only one version of shortest path betweenness - no need for func
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=True if using simplest-path measures. 
                ''')
        else:
            # segment keys
            if m_key == 'segment_harmonic_hybrid':
                # only one version of simplest path closeness - no need for func
                close_idxs.append(m_idx)
            elif m_key == 'segment_betweeness_hybrid':
                # only one version of simplest path betweenness - no need for func
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=False if using shortest-path measures. 
                ''')
    # prepare variables
    n = len(node_data)
    d_n = len(distances)
    k_n = len(measure_keys)
    measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
    global_max_dist = float(np.nanmax(distances))
    nodes_live = node_data[:, 2]
    # iterate through each vert and calculate the shortest path tree
    for src_idx in prange(n):
        shadow_arr = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
        # numba no object mode can only handle basic printing
        # note that progress bar adds a performance penalty
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not nodes_live[src_idx]:
            continue
        '''
        Shortest tree dijkstra        
        Predecessor map is based on impedance heuristic - i.e. angular vs not
        Shortest path distances in metres used for defining max distances regardless
        RETURNS A SHORTEST PATH TREE MAP:
        0 - processed nodes
        1 - predecessors
        2 - shortest path distance
        3 - simplest path angular distance
        4 - cycles
        5 - origin segments
        6 - last segments
        '''
        tree_map, tree_edges = shortest_path_tree(edge_data,
                                                  node_edge_map,
                                                  src_idx,
                                                  max_dist=global_max_dist,
                                                  jitter_scale=jitter_scale,
                                                  angular=angular)
        tree_nodes = np.where(tree_map[:, 0])[0]
        tree_preds = tree_map[:, 1]
        tree_short_dists = tree_map[:, 2]
        tree_simpl_dists = tree_map[:, 3]
        tree_origin_seg = tree_map[:, 5]
        tree_last_seg = tree_map[:, 6]
        '''
        can't do edge processing as part of shortest tree because all shortest paths have to be resolved first
        hence visiting all processed edges and extrapolating information
        NOTES:
        1. the above shortest tree algorithm only tracks edges in one direction - i.e. no duplication
        2. dijkstra sorts all active nodes by distance: explores from near to far: edges discovered accordingly
        '''
        # only build edge data if necessary
        if close_idxs:
            for edge_idx in np.where(tree_edges)[0]:
                # unpack the edge data
                seg_n_nd, seg_m_nd, seg_len, seg_ang, seg_imp_fact, seg_in_bear, seg_out_bear = edge_data[
                    edge_idx]
                n_nd_idx = int(seg_n_nd)
                m_nd_idx = int(seg_m_nd)
                n_simpl_dist = tree_simpl_dists[n_nd_idx]
                m_simpl_dist = tree_simpl_dists[m_nd_idx]
                n_short_dist = tree_short_dists[n_nd_idx]
                m_short_dist = tree_short_dists[m_nd_idx]
                # don't process unreachable segments
                if np.isinf(n_short_dist) and np.isinf(m_short_dist):
                    continue
                '''
                shortest path (non-angular) uses a split segment workflow
                the split workflow allows for non-shortest-path edges to be approached from either direction
                i.e. the shortest path to node "b" isn't necessarily via node "a"
                the edge is then split at the farthest point from either direction and apportioned either way
                if the segment is on the shortest path then the second segment will squash down to naught
                '''
                if not angular:
                    '''
                    dijkstra discovers edges from near to far (sorts before popping next node)
                    i.e. this sort may be unnecessary?
                    '''
                    # sort where a < b
                    if n_short_dist <= m_short_dist:
                        a = tree_short_dists[n_nd_idx]
                        a_imp = tree_short_dists[n_nd_idx]
                        b = tree_short_dists[m_nd_idx]
                        b_imp = tree_short_dists[m_nd_idx]
                    else:
                        a = tree_short_dists[m_nd_idx]
                        a_imp = tree_short_dists[m_nd_idx]
                        b = tree_short_dists[n_nd_idx]
                        b_imp = tree_short_dists[n_nd_idx]
                    # get the max distance along the segment: seg_len = (m - start_len) + (m - end_len)
                    # c and d variables can diverge per beneath
                    c = d = (seg_len + a + b) / 2
                    # c | d impedance should technically be the same if computed from either side
                    c_imp = d_imp = a_imp + (c - a) * seg_imp_fact
                    # iterate the distance and beta thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        '''
                        if c or d are greater than the distance threshold, then the segments are "snipped"
                        '''
                        # a to c segment
                        if a <= dist_cutoff:
                            if c > dist_cutoff:
                                c = dist_cutoff
                                c_imp = a_imp + (dist_cutoff -
                                                 a) * seg_imp_fact
                            for m_idx, close_func in zip(
                                    close_idxs, close_funcs):
                                shadow_arr[m_idx, d_idx,
                                           src_idx] += close_func(
                                               a, c, a_imp, c_imp, beta)
                        # a to b segment - if on the shortest path then b == d, in which case, continue
                        if b == d:
                            continue
                        if b <= dist_cutoff:
                            if d > dist_cutoff:
                                d = dist_cutoff
                                d_imp = b_imp + (dist_cutoff -
                                                 b) * seg_imp_fact
                            for m_idx, close_func in zip(
                                    close_idxs, close_funcs):
                                shadow_arr[m_idx, d_idx,
                                           src_idx] += close_func(
                                               b, d, b_imp, d_imp, beta)
                else:
                    '''
                    there is a different workflow for angular - uses single segment (no segment splitting)
                    this is because the simplest path onto the entire length of segment is from the lower impedance end
                    this assumes segments are relatively straight, overly complex to subdivide segments for spliting...
                    '''
                    # only a single case existing for angular version so no need for abstracted functions
                    # there are three scenarios:
                    # 1) e is the predecessor for f
                    if n_nd_idx == src_idx or tree_preds[m_nd_idx] == n_nd_idx:
                        e = tree_short_dists[n_nd_idx]
                        f = tree_short_dists[m_nd_idx]
                        # if travelling via n, then m = n_imp + seg_ang
                        # calculations are based on segment length / angle
                        # i.e. need to decide whether to base angular change on entry vs exit impedance
                        # else take midpoint of segment as ballpark for average, which is the course taken here
                        # i.e. exit impedance minus half segment impedance
                        ang = m_simpl_dist - seg_ang / 2
                    # 2) f is the predecessor for e
                    elif m_nd_idx == src_idx or tree_preds[
                            n_nd_idx] == m_nd_idx:
                        e = tree_short_dists[m_nd_idx]
                        f = tree_short_dists[n_nd_idx]
                        ang = n_simpl_dist - seg_ang / 2  # per above
                    # 3) neither of the above
                    # get the approach angles for either side and compare to find the least inwards impedance
                    # this involves impedance up to entrypoint either side plus respective turns onto the segment
                    else:
                        # get the out bearing from the predecessor and calculate the turn onto current seg's in bearing
                        # find n's predecessor
                        n_pred_idx = int(tree_preds[n_nd_idx])
                        # find the edge from n's predecessor to n
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             n_pred_idx, n_nd_idx)
                        # get the predecessor edge's outwards bearing at index 6
                        n_pred_out_bear = edge_data[int(e_i), 6]
                        # calculating the turn into this segment from the predecessor's out bearing
                        n_turn_in = np.abs(
                            (seg_in_bear - n_pred_out_bear + 180) % 360 - 180)
                        # then add the turn-in to the aggregated impedance at n
                        # i.e. total angular impedance onto this segment
                        # as above two scenarios, adding half of angular impedance for segment as avg between in / out
                        n_ang = n_simpl_dist + n_turn_in + seg_ang / 2
                        # repeat for the other side other side
                        # per original n -> m edge destructuring: m is the node in the outwards bound direction
                        # i.e. need to first find the corresponding edge in the opposite m -> n direction of travel
                        # this gives the correct inwards bearing as if m were the entry point
                        opp_i = _find_edge_idx(node_edge_map, edge_data,
                                               m_nd_idx, n_nd_idx)
                        # now that the opposing edge is known, we can fetch the inwards bearing at index 5 (not 6)
                        opp_in_bear = edge_data[int(opp_i), 5]
                        # find m's predecessor
                        m_pred_idx = int(tree_preds[m_nd_idx])
                        # we can now go ahead and find m's predecessor edge
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             m_pred_idx, m_nd_idx)
                        # get the predecessor edge's outwards bearing at index 6
                        m_pred_out_bear = edge_data[int(e_i), 6]
                        # and calculate the turn-in from m's predecessor onto the m inwards bearing
                        m_turn_in = np.abs(
                            (opp_in_bear - m_pred_out_bear + 180) % 360 - 180)
                        # then add to aggregated impedance at m
                        m_ang = m_simpl_dist + m_turn_in + seg_ang / 2
                        # the distance and angle are based on the smallest angular impedance onto the segment
                        # select by shortest distance in event angular impedances are identical from either direction
                        if n_ang == m_ang:
                            if n_short_dist <= m_short_dist:
                                e = tree_short_dists[n_nd_idx]
                                ang = n_ang
                            else:
                                e = tree_short_dists[m_nd_idx]
                                ang = m_ang
                        elif n_ang < m_ang:
                            e = tree_short_dists[n_nd_idx]
                            ang = n_ang
                        else:
                            e = tree_short_dists[m_nd_idx]
                            ang = m_ang
                        # f is the entry distance plus segment length
                        f = e + seg_len
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        if e <= dist_cutoff:
                            if f > dist_cutoff:
                                f = dist_cutoff
                            # uses segment length as base (in this sense hybrid)
                            # intentionally not using integral because conflates harmonic shortest-path w. simplest
                            # there is only one case for angular - no need to abstract to func
                            for m_idx in close_idxs:
                                # transform - prevents division by zero
                                agg_ang = 1 + (ang / 180)
                                # then aggregate - angular uses distances explicitly
                                shadow_arr[m_idx, d_idx,
                                           src_idx] += (f - e) / agg_ang
        if betw_idxs:
            # prepare a list of neighbouring nodes
            nb_nodes = List.empty_list(types.int64)
            for edge_idx in node_edge_map[src_idx]:
                out_nd_idx = int(edge_data[edge_idx][1])  # to node is index 1
                nb_nodes.append(out_nd_idx)
            # betweenness keys computed per to_idx
            for to_idx in tree_nodes:
                # only process in one direction
                if to_idx < src_idx:
                    continue
                # skip self node
                if to_idx == src_idx:
                    continue
                # skip direct neighbours (no nodes between)
                if to_idx in nb_nodes:
                    continue
                # distance - do not proceed if no route available
                to_dist = tree_short_dists[to_idx]
                if np.isinf(to_dist):
                    continue
                '''
                BETWEENNESS
                segment versions only agg first and last segments
                the distance decay is based on the distance between the src segment and to segment
                i.e. willingness of people to walk between src and to segments

                betweenness is aggregated to intervening nodes based on above distances and decays
                other sections (in between current first and last) are respectively processed from other to nodes

                distance thresholds are computed using the innner as opposed to outer edges of the segments
                '''
                o_seg_len = edge_data[int(tree_origin_seg[to_idx])][2]
                l_seg_len = edge_data[int(tree_last_seg[to_idx])][2]
                min_span = to_dist - o_seg_len - l_seg_len
                # calculate traversal distances from opposing segments
                o_1 = min_span
                o_2 = min_span + o_seg_len
                l_1 = min_span
                l_2 = min_span + l_seg_len
                # betweenness - only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        if min_span <= dist_cutoff:
                            # prune if necessary
                            if o_2 > dist_cutoff:
                                o_2 = dist_cutoff
                            if l_2 > dist_cutoff:
                                l_2 = dist_cutoff
                            # only one version for betweenness for respective angular / non angular
                            # i.e. no need to abstract to function
                            for m_idx in betw_idxs:
                                if not angular:
                                    # catch division by zero
                                    if beta == 0.0:
                                        auc = o_2 - o_1 + l_2 - l_1
                                    else:
                                        auc = (np.exp(-beta * o_2) -
                                               np.exp(-beta * o_1)) / -beta + \
                                              (np.exp(-beta * l_2) -
                                               np.exp(-beta * l_1)) / -beta
                                    shadow_arr[m_idx, d_idx, inter_idx] += auc
                                else:
                                    bt_ang = 1 + tree_simpl_dists[to_idx] / 180
                                    pt_a = o_2 - o_1
                                    pt_b = l_2 - l_1
                                    shadow_arr[m_idx, d_idx,
                                               inter_idx] += (pt_a +
                                                              pt_b) / bt_ang
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])

        # reduction
        measures_data += shadow_arr

    return measures_data
Esempio n. 5
0
def local_node_centrality(node_data: np.ndarray,
                          edge_data: np.ndarray,
                          node_edge_map: Dict,
                          distances: np.ndarray,
                          betas: np.ndarray,
                          measure_keys: tuple,
                          jitter_scale: float = 0.0,
                          angular: bool = False,
                          progress_proxy=None) -> np.ndarray:
    # integrity checks
    checks.check_distances_and_betas(distances, betas)
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    # gather functions
    close_funcs = List.empty_list(node_close_func_proto)
    close_idxs = []
    betw_funcs = List.empty_list(node_betw_func_proto)
    betw_idxs = []
    for m_idx, m_key in enumerate(measure_keys):
        if not angular:
            # closeness keys
            if m_key == 'node_density':
                close_funcs.append(node_density)
                close_idxs.append(m_idx)
            elif m_key == 'node_farness':
                close_funcs.append(node_farness)
                close_idxs.append(m_idx)
            elif m_key == 'node_cycles':
                close_funcs.append(node_cycles)
                close_idxs.append(m_idx)
            elif m_key == 'node_harmonic':
                close_funcs.append(node_harmonic)
                close_idxs.append(m_idx)
            elif m_key == 'node_beta':
                close_funcs.append(node_beta)
                close_idxs.append(m_idx)
            # betweenness keys
            elif m_key == 'node_betweenness':
                betw_funcs.append(node_betweenness)
                betw_idxs.append(m_idx)
            elif m_key == 'node_betweenness_beta':
                betw_funcs.append(node_betweenness_beta)
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                Unable to match requested centrality measure key against available options.
                Shortest-path measures can't be mixed with simplest-path measures.
                Set angular=True if using simplest-path measures.''')
        else:
            # aggregative keys
            if m_key == 'node_harmonic_angular':
                close_funcs.append(node_harmonic_angular)
                close_idxs.append(m_idx)
            # betweenness keys
            elif m_key == 'node_betweenness_angular':
                betw_funcs.append(node_betweenness)
                betw_idxs.append(m_idx)
            else:
                raise ValueError('''
                Unable to match requested centrality measure key against available options.
                Shortest-path measures can't be mixed with simplest-path measures.
                Set angular=False if using shortest-path measures.''')
    # prepare variables
    n = len(node_data)
    d_n = len(distances)
    k_n = len(measure_keys)
    measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
    global_max_dist = float(np.nanmax(distances))
    nodes_live = node_data[:, 2]
    # iterate through each vert and calculate the shortest path tree
    for src_idx in prange(n):
        shadow_arr = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
        # numba no object mode can only handle basic printing
        # note that progress bar adds a performance penalty
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not nodes_live[src_idx]:
            continue
        '''
        Shortest tree dijkstra        
        Predecessor map is based on impedance heuristic - which can be different from metres
        Distance map in metres still necessary for defining max distances and computing equivalent distance measures
        RETURNS A SHORTEST PATH TREE MAP:
        0 - processed nodes
        1 - predecessors
        2 - shortest path distance
        3 - simplest path angular distance
        4 - cycles
        5 - origin segments
        6 - last segments
        '''
        tree_map, tree_edges = shortest_path_tree(edge_data,
                                                  node_edge_map,
                                                  src_idx,
                                                  max_dist=global_max_dist,
                                                  jitter_scale=jitter_scale,
                                                  angular=angular)
        tree_nodes = np.where(tree_map[:, 0])[0]
        tree_preds = tree_map[:, 1]
        tree_short_dists = tree_map[:, 2]
        tree_simpl_dists = tree_map[:, 3]
        tree_cycles = tree_map[:, 4]
        # process each reachable node
        for to_idx in tree_nodes:
            # skip self node
            if to_idx == src_idx:
                continue
            # unpack impedance and distance for to index
            to_short_dist = tree_short_dists[to_idx]
            to_simpl_dist = tree_simpl_dists[to_idx]
            cycles = tree_cycles[to_idx]
            # do not proceed if no route available
            if np.isinf(to_short_dist):
                continue
            # calculate closeness centralities
            if close_funcs:
                for d_idx in range(len(distances)):
                    dist_cutoff = distances[d_idx]
                    beta = betas[d_idx]
                    if to_short_dist <= dist_cutoff:
                        for m_idx, close_func in zip(close_idxs, close_funcs):
                            shadow_arr[m_idx, d_idx, src_idx] += close_func(
                                to_short_dist, to_simpl_dist, beta, cycles)
            # only process in one direction
            if to_idx < src_idx:
                continue
            # calculate betweenness centralities
            if betw_funcs:
                # only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds
                    for d_idx in range(len(distances)):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        # check threshold
                        if tree_short_dists[to_idx] <= dist_cutoff:
                            # iterate betweenness functions
                            for m_idx, betw_func in zip(betw_idxs, betw_funcs):
                                shadow_arr[m_idx, d_idx,
                                           inter_idx] += betw_func(
                                               to_short_dist, beta)
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])
        # reduce
        measures_data += shadow_arr

    return measures_data
Esempio n. 6
0
def aggregate_stats(
    node_data: np.ndarray,
    edge_data: np.ndarray,
    node_edge_map: Dict,
    data_map: np.ndarray,
    distances: np.ndarray,
    betas: np.ndarray,
    numerical_arrays: np.ndarray = np.array(np.full((0, 0), np.nan)),
    jitter_scale: float = 0.0,
    angular: bool = False,
    progress_proxy=None
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray,
           np.ndarray, np.ndarray, np.ndarray]:
    """
    NODE MAP:
    0 - x
    1 - y
    2 - live
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    """
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_data_map(
        data_map, check_assigned=True
    )  # raises ValueError data points are not assigned to a network
    checks.check_distances_and_betas(distances, betas)
    # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan))
    if numerical_arrays.shape[1] != len(data_map):
        raise ValueError(
            'The length of the numerical data arrays do not match the length of the data map.'
        )
    checks.check_numerical_data(numerical_arrays)
    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    n_n = len(numerical_arrays)
    global_max_dist = float(np.nanmax(distances))
    netw_nodes_live = node_data[:, 2]
    # setup data structures
    stats_sum = np.full((n_n, d_n, netw_n), 0.0)
    stats_sum_wt = np.full((n_n, d_n, netw_n), 0.0)
    stats_mean = np.full((n_n, d_n, netw_n), np.nan)
    stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan)
    stats_count = np.full((n_n, d_n, netw_n), 0.0)
    stats_count_wt = np.full((n_n, d_n, netw_n), 0.0)
    stats_variance = np.full((n_n, d_n, netw_n), np.nan)
    stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan)
    stats_max = np.full((n_n, d_n, netw_n), np.nan)
    stats_min = np.full((n_n, d_n, netw_n), np.nan)
    # iterate through each vert and aggregate
    steps = int(netw_n / 10000)
    # parallelise over n nodes:
    # each distance or stat array index is therefore only touched by one thread at a time
    # i.e. no need to use inner array deductions as with centralities
    for netw_src_idx in prange(netw_n):
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not netw_nodes_live[netw_src_idx]:
            continue
        # generate the reachable classes and their respective distances
        # these are non-unique - i.e. simply the class of each data point within the maximum distance
        # the aggregate_to_src_idx method will choose the closer direction of approach to a data point
        # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method)
        reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx(
            netw_src_idx,
            node_data,
            edge_data,
            node_edge_map,
            data_map,
            global_max_dist,
            jitter_scale=jitter_scale,
            angular=angular)
        # IDW
        # the order of the loops matters because the nested aggregations happen per distance per numerical array
        # iterate the reachable indices and related distances
        for data_idx, (reachable, data_dist) in enumerate(
                zip(reachable_data, reachable_data_dist)):
            # some indices will be NaN if beyond max threshold distance - so check for infinity
            # this happens when within radial max distance, but beyond network max distance
            if not reachable:
                continue
            # iterate the numerical arrays dimension
            for num_idx in range(n_n):
                # some values will be NaN
                num = numerical_arrays[num_idx, int(data_idx)]
                if np.isnan(num):
                    continue
                # iterate the distance dimensions
                for d_idx, (d, b) in enumerate(zip(distances, betas)):
                    # increment mean aggregations at respective distances if the distance is less than current d
                    if data_dist <= d:
                        # aggregate
                        stats_sum[num_idx, d_idx, netw_src_idx] += num
                        stats_count[num_idx, d_idx, netw_src_idx] += 1
                        stats_sum_wt[num_idx, d_idx,
                                     netw_src_idx] += num * np.exp(
                                         -b * data_dist)
                        stats_count_wt[num_idx, d_idx,
                                       netw_src_idx] += np.exp(-b * data_dist)
                        # max
                        if np.isnan(stats_max[num_idx, d_idx, netw_src_idx]):
                            stats_max[num_idx, d_idx, netw_src_idx] = num
                        elif num > stats_max[num_idx, d_idx, netw_src_idx]:
                            stats_max[num_idx, d_idx, netw_src_idx] = num
                        # min
                        if np.isnan(stats_min[num_idx, d_idx, netw_src_idx]):
                            stats_min[num_idx, d_idx, netw_src_idx] = num
                        elif num < stats_min[num_idx, d_idx, netw_src_idx]:
                            stats_min[num_idx, d_idx, netw_src_idx] = num
        # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast
        for num_idx in range(n_n):
            for d_idx in range(d_n):
                # use divide so that division through zero doesn't trigger
                stats_mean[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_sum[num_idx, d_idx, netw_src_idx],
                    stats_count[num_idx, d_idx, netw_src_idx])
                stats_mean_wt[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_sum_wt[num_idx, d_idx, netw_src_idx],
                    stats_count_wt[num_idx, d_idx, netw_src_idx])
        # calculate variances - counts are already computed per above
        # weighted version is IDW by division through equivalently weighted counts above
        # iterate the reachable indices and related distances
        for data_idx, (reachable, data_dist) in enumerate(
                zip(reachable_data, reachable_data_dist)):
            # some indices will be NaN if beyond max threshold distance - so check for infinity
            # this happens when within radial max distance, but beyond network max distance
            if not reachable:
                continue
            # iterate the numerical arrays dimension
            for num_idx in range(n_n):
                # some values will be NaN
                num = numerical_arrays[num_idx, int(data_idx)]
                if np.isnan(num):
                    continue
                # iterate the distance dimensions
                for d_idx, (d, b) in enumerate(zip(distances, betas)):
                    # increment variance aggregations at respective distances if the distance is less than current d
                    if data_dist <= d:
                        # aggregate
                        if np.isnan(stats_variance[num_idx, d_idx,
                                                   netw_src_idx]):
                            stats_variance[num_idx, d_idx, netw_src_idx] = \
                                np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                            stats_variance_wt[num_idx, d_idx, netw_src_idx] = \
                                np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist)
                        else:
                            stats_variance[num_idx, d_idx, netw_src_idx] += \
                                np.square(num - stats_mean[num_idx, d_idx, netw_src_idx])
                            stats_variance_wt[num_idx, d_idx, netw_src_idx] += \
                                np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist)
        # finalise variance calculations
        for num_idx in range(n_n):
            for d_idx in range(d_n):
                # use divide so that division through zero doesn't trigger
                stats_variance[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_variance[num_idx, d_idx, netw_src_idx],
                    stats_count[num_idx, d_idx, netw_src_idx])
                stats_variance_wt[num_idx, d_idx, netw_src_idx] = np.divide(
                    stats_variance_wt[num_idx, d_idx, netw_src_idx],
                    stats_count_wt[num_idx, d_idx, netw_src_idx])

    return stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min
Esempio n. 7
0
def aggregate_landuses(
    node_data: np.ndarray,
    edge_data: np.ndarray,
    node_edge_map: Dict,
    data_map: np.ndarray,
    distances: np.ndarray,
    betas: np.ndarray,
    landuse_encodings: np.ndarray = np.array([]),
    qs: np.ndarray = np.array([]),
    mixed_use_hill_keys: np.ndarray = np.array([]),
    mixed_use_other_keys: np.ndarray = np.array([]),
    accessibility_keys: np.ndarray = np.array([]),
    cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)),
    jitter_scale: float = 0.0,
    angular: bool = False,
    progress_proxy=None
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    NODE MAP:
    0 - x
    1 - y
    2 - live
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    DATA MAP:
    0 - x
    1 - y
    2 - assigned network index - nearest
    3 - assigned network index - next-nearest
    """
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    checks.check_data_map(
        data_map, check_assigned=True
    )  # raises ValueError data points are not assigned to a network
    checks.check_distances_and_betas(distances, betas)
    # check landuse encodings
    if len(landuse_encodings) == 0:
        raise ValueError(
            'Mixed use metrics or land-use accessibilities require an array of landuse labels.'
        )
    elif len(landuse_encodings) != len(data_map):
        raise ValueError(
            'The number of landuse encodings does not match the number of data points.'
        )
    else:
        checks.check_categorical_data(landuse_encodings)
    # catch completely missing metrics
    if len(mixed_use_hill_keys) == 0 and len(
            mixed_use_other_keys) == 0 and len(accessibility_keys) == 0:
        raise ValueError(
            'No metrics specified, please specify at least one metric to compute.'
        )
    # catch missing qs
    if len(mixed_use_hill_keys) != 0 and len(qs) == 0:
        raise ValueError(
            'Hill diversity measures require that at least one value of q is specified.'
        )
    # negative qs caught by hill diversity methods
    # check various problematic key combinations
    if len(mixed_use_hill_keys) != 0:
        if np.nanmin(mixed_use_hill_keys) < 0 or np.max(
                mixed_use_hill_keys) > 3:
            raise ValueError('Mixed-use "hill" keys out of range of 0:4.')
    if len(mixed_use_other_keys) != 0:
        if np.nanmin(mixed_use_other_keys) < 0 or np.max(
                mixed_use_other_keys) > 2:
            raise ValueError('Mixed-use "other" keys out of range of 0:3.')
    if len(accessibility_keys) != 0:
        max_ac_key = np.nanmax(landuse_encodings)
        if np.nanmin(accessibility_keys) < 0 or np.max(
                accessibility_keys) > max_ac_key:
            raise ValueError(
                'Negative or out of range accessibility key encountered. Keys must match class encodings.'
            )
    for i in range(len(mixed_use_hill_keys)):
        for j in range(len(mixed_use_hill_keys)):
            if j > i:
                i_key = mixed_use_hill_keys[i]
                j_key = mixed_use_hill_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "hill" key.')
    for i in range(len(mixed_use_other_keys)):
        for j in range(len(mixed_use_other_keys)):
            if j > i:
                i_key = mixed_use_other_keys[i]
                j_key = mixed_use_other_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate mixed-use "other" key.')
    for i in range(len(accessibility_keys)):
        for j in range(len(accessibility_keys)):
            if j > i:
                i_key = accessibility_keys[i]
                j_key = accessibility_keys[j]
                if i_key == j_key:
                    raise ValueError('Duplicate accessibility key.')

    def disp_check(disp_matrix):
        # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions
        if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[
                1]:
            raise ValueError(
                'The disparity matrix must be a square NxN matrix.')
        if len(disp_matrix) == 0:
            raise ValueError(
                'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.'
            )

    # check that missing or malformed disparity weights matrices are caught
    for k in mixed_use_hill_keys:
        if k == 3:  # hill disparity
            disp_check(cl_disparity_wt_matrix)
    for k in mixed_use_other_keys:
        if k == 2:  # raos pairwise
            disp_check(cl_disparity_wt_matrix)
    # establish variables
    netw_n = len(node_data)
    d_n = len(distances)
    q_n = len(qs)
    global_max_dist = float(np.nanmax(distances))
    netw_nodes_live = node_data[:, 2]
    # setup data structures
    # hill mixed uses are structured separately to take values of q into account
    mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), 0.0)  # 4 dim
    mixed_use_other_data = np.full((3, d_n, netw_n), 0.0)  # 3 dim
    accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0)
    accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n),
                                    0.0)
    # iterate through each vert and aggregate
    # parallelise over n nodes:
    # each distance or stat array index is therefore only touched by one thread at a time
    # i.e. no need to use inner array deductions as with centralities
    for netw_src_idx in prange(netw_n):
        if progress_proxy is not None:
            progress_proxy.update(1)
        # only compute for live nodes
        if not netw_nodes_live[netw_src_idx]:
            continue
        # generate the reachable classes and their respective distances
        # these are non-unique - i.e. simply the class of each data point within the maximum distance
        # the aggregate_to_src_idx method will choose the closer direction of approach to a data point
        # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method)
        reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx(
            netw_src_idx,
            node_data,
            edge_data,
            node_edge_map,
            data_map,
            global_max_dist,
            jitter_scale=jitter_scale,
            angular=angular)
        # LANDUSES
        mu_max_unique_cl = int(landuse_encodings.max() + 1)
        # counts of each class type (array length per max unique classes - not just those within max distance)
        classes_counts = np.full((d_n, mu_max_unique_cl), 0)
        # nearest of each class type (likewise)
        classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf)
        # iterate the reachable indices and related distances
        for data_idx, (reachable, data_dist) in enumerate(
                zip(reachable_data, reachable_data_dist)):
            if not reachable:
                continue
            # get the class category in integer form
            # all class codes were encoded to sequential integers - these correspond to the array indices
            cl_code = int(landuse_encodings[int(data_idx)])
            # iterate the distance dimensions
            for d_idx, (d, b) in enumerate(zip(distances, betas)):
                # increment class counts at respective distances if the distance is less than current d
                if data_dist <= d:
                    classes_counts[d_idx, cl_code] += 1
                    # if distance is nearer, update the nearest distance array too
                    if data_dist < classes_nearest[d_idx, cl_code]:
                        classes_nearest[d_idx, cl_code] = data_dist
                    # if within distance, and if in accessibility keys, then aggregate accessibility too
                    for ac_idx, ac_code in enumerate(accessibility_keys):
                        if ac_code == cl_code:
                            accessibility_data[ac_idx, d_idx,
                                               netw_src_idx] += 1
                            accessibility_data_wt[ac_idx, d_idx,
                                                  netw_src_idx] += np.exp(
                                                      -b * data_dist)
                            # if a match was found, then no need to check others
                            break
        # mixed uses can be calculated now that the local class counts are aggregated
        # iterate the distances and betas
        for d_idx, b in enumerate(betas):
            cl_counts = classes_counts[d_idx]
            cl_nearest = classes_nearest[d_idx]
            # mu keys determine which metrics to compute
            # don't confuse with indices
            # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys
            for mu_hill_key in mixed_use_hill_keys:
                for q_idx, q_key in enumerate(qs):
                    if mu_hill_key == 0:
                        mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity(cl_counts, q_key)
                    elif mu_hill_key == 1:
                        mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                    elif mu_hill_key == 2:
                        mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b)
                    # land-use classification disparity hill diversity
                    # the wt matrix can be used without mapping because cl_counts is based on all classes
                    # regardless of whether they are reachable
                    elif mu_hill_key == 3:
                        mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \
                            diversity.hill_diversity_pairwise_matrix_wt(cl_counts,
                                                                        wt_matrix=cl_disparity_wt_matrix,
                                                                        q=q_key)
            for mu_other_key in mixed_use_other_keys:
                if mu_other_key == 0:
                    mixed_use_other_data[0, d_idx, netw_src_idx] = \
                        diversity.shannon_diversity(cl_counts)
                elif mu_other_key == 1:
                    mixed_use_other_data[1, d_idx, netw_src_idx] = \
                        diversity.gini_simpson_diversity(cl_counts)
                elif mu_other_key == 2:
                    mixed_use_other_data[2, d_idx, netw_src_idx] = \
                        diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix)
    # send the data back in the same types and same order as the original keys - convert to int for indexing
    mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0)
    for i, k in enumerate(mixed_use_hill_keys):
        mu_hill_k_int[i] = k
    mu_other_k_int = np.full(len(mixed_use_other_keys), 0)
    for i, k in enumerate(mixed_use_other_keys):
        mu_other_k_int[i] = k

    return mixed_use_hill_data[mu_hill_k_int], \
           mixed_use_other_data[mu_other_k_int], \
           accessibility_data, \
           accessibility_data_wt
Esempio n. 8
0
def local_centrality(node_data: np.ndarray,
                     edge_data: np.ndarray,
                     node_edge_map: Dict,
                     distances: np.ndarray,
                     betas: np.ndarray,
                     measure_keys: tuple,
                     angular: bool = False,
                     suppress_progress: bool = False) -> np.ndarray:
    '''
    Call from "compute_centrality", which handles high level checks on keys and heuristic flag
    NODE MAP:
    0 - x
    1 - y
    2 - live
    3 - ghosted
    EDGE MAP:
    0 - start node
    1 - end node
    2 - length in metres
    3 - sum of angular travel along length
    4 - impedance factor
    5 - in bearing
    6 - out bearing
    '''
    checks.check_distances_and_betas(distances, betas)
    checks.check_network_maps(node_data, edge_data, node_edge_map)
    # string comparisons will substantially slow down nested loops
    # hence the out-of-loop strategy to map strings to indices corresponding to respective measures
    # keep name and index relationships explicit
    agg_keys = []
    agg_targets = []
    seg_keys = []
    seg_targets = []
    betw_keys = []
    betw_targets = []
    for m_idx, measure_name in enumerate(measure_keys):
        if not angular:
            # aggregating keys
            if measure_name == 'node_density':
                agg_keys.append(0)
                agg_targets.append(m_idx)
            elif measure_name == 'node_farness':
                agg_keys.append(1)
                agg_targets.append(m_idx)
            elif measure_name == 'node_cycles':
                agg_keys.append(2)
                agg_targets.append(m_idx)
            elif measure_name == 'node_harmonic':
                agg_keys.append(3)
                agg_targets.append(m_idx)
            elif measure_name == 'node_beta':
                agg_keys.append(4)
                agg_targets.append(m_idx)
            # segment keys (betweenness segments can be built during betweenness iters)
            elif measure_name == 'segment_density':
                seg_keys.append(0)
                seg_targets.append(m_idx)
            elif measure_name == 'segment_harmonic':
                seg_keys.append(1)
                seg_targets.append(m_idx)
            elif measure_name == 'segment_beta':
                seg_keys.append(2)
                seg_targets.append(m_idx)
            # betweenness keys
            elif measure_name == 'node_betweenness':
                betw_keys.append(0)
                betw_targets.append(m_idx)
            elif measure_name == 'node_betweenness_beta':
                betw_keys.append(1)
                betw_targets.append(m_idx)
            elif measure_name == 'segment_betweenness':
                betw_keys.append(2)
                betw_targets.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=True if using simplest-path measures. 
                ''')
        else:
            # aggregating keys
            if measure_name == 'node_harmonic_angular':
                agg_keys.append(5)
                agg_targets.append(m_idx)
            # segment keys
            elif measure_name == 'segment_harmonic_hybrid':
                seg_keys.append(3)
                seg_targets.append(m_idx)
            # betweenness keys
            elif measure_name == 'node_betweenness_angular':
                betw_keys.append(3)
                betw_targets.append(m_idx)
            elif measure_name == 'segment_betweeness_hybrid':
                betw_keys.append(4)
                betw_targets.append(m_idx)
            else:
                raise ValueError('''
                    Unable to match requested centrality measure key against available options.
                    Shortest-path measures can't be mixed with simplest-path measures.
                    Set angular=False if using shortest-path measures. 
                ''')
    if len(agg_keys) != len(set(agg_keys)) or \
            len(seg_keys) != len(set(seg_keys)) or \
            len(betw_keys) != len(set(betw_keys)):
        raise ValueError('Please remove duplicate measure key.')
    # flags
    betw_nodes = (0 in betw_keys or 1 in betw_keys or 3 in betw_keys)
    betw_segs = (2 in betw_keys or 4 in betw_keys)
    # prepare data arrays
    # establish variables
    n = len(node_data)
    d_n = len(distances)
    k_n = len(measure_keys)
    global_max_dist = np.nanmax(distances)
    nodes_live = node_data[:, 2]
    nodes_ghosted = node_data[:, 3]
    # the shortest path is based on impedances -> be cognisant of cases where impedances are not based on true distance:
    # in such cases, distances are equivalent to the impedance heuristic shortest path, not shortest distance in metres
    measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32)
    steps = int(n / 10000)
    # iterate through each vert and calculate the shortest path tree
    for src_idx in range(n):
        # numba no object mode can only handle basic printing
        # note that progress bar adds a performance penalty
        if not suppress_progress:
            checks.progress_bar(src_idx, n, steps)
        # only compute for live nodes
        if not nodes_live[src_idx]:
            continue
        '''
        run the shortest tree dijkstra
        keep in mind that predecessor map is based on impedance heuristic - which can be different from metres
        distance map in metres still necessary for defining max distances and computing equivalent distance measures
        RETURNS A SHORTEST PATH TREE MAP:
        0 - processed nodes
        1 - predecessors
        2 - distances
        3 - impedances
        4 - cycles
        5 - origin segments
        6 - last segments
        '''
        tree_map, tree_edges = shortest_path_tree(edge_data,
                                                  node_edge_map,
                                                  src_idx,
                                                  max_dist=global_max_dist,
                                                  angular=angular)
        tree_nodes = np.where(tree_map[:, 0])[0]
        tree_preds = tree_map[:, 1]
        tree_dists = tree_map[:, 2]
        tree_imps = tree_map[:, 3]
        tree_cycles = tree_map[:, 4]
        tree_origin_seg = tree_map[:, 5]
        tree_last_seg = tree_map[:, 6]
        # only build edge data if necessary
        if len(seg_keys) > 0:
            # can't do edge processing as part of shortest tree because all shortest paths have to be resolved first
            # visit all processed edges
            for edge_idx in np.where(tree_edges)[0]:
                # unpack
                seg_in_nd, seg_out_nd, seg_len, seg_ang, seg_imp_fact, seg_in_bear, seg_out_bear = edge_data[
                    edge_idx]
                in_nd_idx = int(seg_in_nd)
                out_nd_idx = int(seg_out_nd)
                in_imp = tree_imps[in_nd_idx]
                out_imp = tree_imps[out_nd_idx]
                in_dist = tree_dists[in_nd_idx]
                out_dist = tree_dists[out_nd_idx]
                # don't process unreachable segments
                if np.isinf(in_dist) and np.isinf(out_dist):
                    continue
                # for conceptual simplicity, separate angular and non-angular workflows
                # non angular uses a split segment workflow
                # if the segment is on the shortest path then the second segment will squash down to naught
                if not angular:
                    # sort where a < b
                    if in_imp <= out_imp:
                        a = tree_dists[in_nd_idx]
                        a_imp = tree_imps[in_nd_idx]
                        b = tree_dists[out_nd_idx]
                        b_imp = tree_imps[out_nd_idx]
                    else:
                        a = tree_dists[out_nd_idx]
                        a_imp = tree_imps[out_nd_idx]
                        b = tree_dists[in_nd_idx]
                        b_imp = tree_imps[in_nd_idx]
                    # get the max distance along the segment: seg_len = (m - start_len) + (m - end_len)
                    # c and d variables can diverge per beneath
                    c = d = (seg_len + a + b) / 2
                    # c / d impedance should technically be the same if computed from either side
                    c_imp = d_imp = a_imp + (c - a) * seg_imp_fact
                    # iterate the distance and beta thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        # a-c segment
                        if a <= dist_cutoff:
                            if c > dist_cutoff:
                                c = dist_cutoff
                                c_imp = a_imp + (dist_cutoff -
                                                 a) * seg_imp_fact
                            for seg_idx, seg_key in enumerate(seg_keys):
                                m_idx = seg_targets[seg_idx]
                                if seg_key == 0:
                                    measures_data[m_idx, d_idx,
                                                  src_idx] += c - a
                                elif seg_key == 1:
                                    if a_imp < 1:
                                        measures_data[m_idx, d_idx,
                                                      src_idx] += np.log(c_imp)
                                    else:
                                        measures_data[
                                            m_idx, d_idx, src_idx] += np.log(
                                                c_imp) - np.log(a_imp)
                                elif seg_key == 2:
                                    if beta == -0.0:
                                        auc = c_imp - a_imp
                                    else:
                                        auc = (np.exp(beta * c_imp) -
                                               np.exp(beta * a_imp)) / beta
                                    measures_data[m_idx, d_idx, src_idx] += auc
                        # a-b segment - if on the shortest path then d == b - in which case, continue
                        if b == d:
                            continue
                        if b <= dist_cutoff:
                            if d > dist_cutoff:
                                d = dist_cutoff
                                d_imp = b_imp + (dist_cutoff -
                                                 b) * seg_imp_fact
                            for seg_idx, seg_key in enumerate(seg_keys):
                                m_idx = seg_targets[seg_idx]
                                if seg_key == 0:
                                    measures_data[m_idx, d_idx,
                                                  src_idx] += d - b
                                elif seg_key == 1:
                                    if b_imp < 1:
                                        measures_data[m_idx, d_idx,
                                                      src_idx] += np.log(d_imp)
                                    else:
                                        measures_data[
                                            m_idx, d_idx, src_idx] += np.log(
                                                d_imp) - np.log(b_imp)
                                elif seg_key == 2:
                                    # catch division by zero
                                    # as beta approaches 0 the distance is weighted by 1 instead of < 1
                                    if beta == -0.0:
                                        auc = d_imp - b_imp
                                    else:
                                        auc = (np.exp(beta * d_imp) -
                                               np.exp(beta * b_imp)) / beta
                                    measures_data[m_idx, d_idx, src_idx] += auc
                # different workflow for angular - uses single segment
                # otherwise many assumptions if splitting segments re: angular vs. distance shortest-paths...
                else:
                    # get the approach angles for either side
                    # this involves impedance up to that point plus the turn onto the segment
                    # also add half of the segment's length-wise angular impedance
                    in_ang = in_imp + seg_ang / 2
                    # the source node won't have a predecessor
                    if in_nd_idx != src_idx:
                        # get the out bearing from the predecessor and calculate the turn onto current seg's in bearing
                        in_pred_idx = int(tree_preds[in_nd_idx])
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             in_pred_idx, in_nd_idx)
                        in_pred_out_bear = edge_data[int(e_i), 6]
                        in_ang += np.abs(
                            (seg_in_bear - in_pred_out_bear + 180) % 360 - 180)
                    # same for other side
                    out_ang = out_imp + seg_ang / 2
                    if out_nd_idx != src_idx:
                        out_pred_idx = int(tree_preds[out_nd_idx])
                        e_i = _find_edge_idx(node_edge_map, edge_data,
                                             out_pred_idx, out_nd_idx)
                        out_pred_out_bear = edge_data[int(e_i), 6]
                        out_ang += np.abs(
                            (seg_out_bear - out_pred_out_bear + 180) % 360 -
                            180)
                    # the distance and angle are based on the smallest angular impedance onto the segment
                    # shortest-path segments will have exit bearings equal to the entry bearings
                    # in this case, select the closest by shortest distance
                    if in_ang == out_ang:
                        if in_dist < out_dist:
                            e = tree_dists[in_nd_idx]
                            ang = in_ang
                        else:
                            e = tree_dists[out_nd_idx]
                            ang = out_ang
                    elif in_ang < out_ang:
                        e = tree_dists[in_nd_idx]
                        ang = in_ang
                    else:
                        e = tree_dists[out_nd_idx]
                        ang = out_ang
                    # f is the entry distance plus segment length
                    f = e + seg_len
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        if e <= dist_cutoff:
                            if f > dist_cutoff:
                                f = dist_cutoff
                            # 3 - harmonic segments hybrid
                            # Uses integral of segment distances as a base - then weighted by angular
                            for seg_idx, seg_key in enumerate(seg_keys):
                                if seg_key == 3:
                                    m_idx = seg_targets[seg_idx]
                                    # transform - prevents division by zero
                                    agg_ang = 1 + (ang / 180)
                                    # then aggregate - angular uses distances explicitly
                                    measures_data[m_idx, d_idx,
                                                  src_idx] += (f - e) / agg_ang
        # aggregative and betweenness keys can be computed per to_idx
        for to_idx in tree_nodes:
            # skip self node
            if to_idx == src_idx:
                continue
            # unpack impedance and distance for to index
            to_imp = tree_imps[to_idx]
            to_dist = tree_dists[to_idx]
            # do not proceed if no route available
            if np.isinf(to_dist):
                continue
            # node weights removed since v0.10
            # switched to edge impedance factors
            # calculate centralities
            for d_idx in range(len(distances)):
                dist_cutoff = distances[d_idx]
                beta = betas[d_idx]
                if to_dist <= dist_cutoff:
                    # iterate aggregation functions
                    for agg_idx, agg_key in enumerate(agg_keys):
                        # fetch target index for writing data
                        # stored at equivalent index in agg_targets
                        m_idx = agg_targets[agg_idx]
                        # go through keys and write data
                        # 0 - simple node counts
                        if agg_key == 0:
                            measures_data[m_idx, d_idx, src_idx] += 1
                        # 1 - farness
                        elif agg_key == 1:
                            measures_data[m_idx, d_idx, src_idx] += to_dist
                        # 2 - cycles
                        elif agg_key == 2:
                            if tree_cycles[to_idx]:
                                measures_data[m_idx, d_idx, src_idx] += 1
                        # 3 - harmonic node
                        elif agg_key == 3:
                            measures_data[m_idx, d_idx, src_idx] += 1 / to_imp
                        # 4 - beta weighted node
                        elif agg_key == 4:
                            measures_data[m_idx, d_idx,
                                          src_idx] += np.exp(beta * to_dist)
                        # 5 - harmonic node - angular
                        elif agg_key == 5:
                            a = 1 + (to_imp / 180)  # transform angles
                            measures_data[m_idx, d_idx, src_idx] += 1 / a
            # check whether betweenness keys are present prior to proceeding
            if not betw_nodes and not betw_segs:
                continue
            # only process in one direction
            if to_idx < src_idx:
                continue
            # NODE WORKFLOW
            if betw_nodes:
                # betweenness - only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds
                    for d_idx in range(len(distances)):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        # check threshold
                        if tree_dists[to_idx] <= dist_cutoff:
                            # iterate betweenness functions
                            for betw_idx, betw_key in enumerate(betw_keys):
                                # fetch target index for writing data
                                # stored at equivalent index in betw_targets
                                m_idx = betw_targets[betw_idx]
                                # go through keys and write data
                                # simple count of nodes for betweenness
                                if betw_key == 0:
                                    measures_data[m_idx, d_idx, inter_idx] += 1
                                # 1 - beta weighted betweenness
                                # distance is based on distance between from and to vertices
                                # thus potential spatial impedance via between vertex
                                elif betw_key == 1:
                                    measures_data[m_idx, d_idx,
                                                  inter_idx] += np.exp(
                                                      beta * to_dist) * 1
                                # 3 - betweenness node count - angular heuristic version
                                elif betw_key == 3:
                                    measures_data[m_idx, d_idx, inter_idx] += 1
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])
            if betw_segs:
                # segment versions only agg first and last segments - intervening bits are processed from other to nodes
                o_seg_len = edge_data[int(tree_origin_seg[to_idx])][2]
                l_seg_len = edge_data[int(tree_last_seg[to_idx])][2]
                min_seg_span = tree_dists[to_idx] - o_seg_len - l_seg_len
                o_1 = min_seg_span
                o_2 = min_seg_span + o_seg_len
                l_1 = min_seg_span
                l_2 = min_seg_span + l_seg_len
                # betweenness - only counting truly between vertices, not starting and ending verts
                inter_idx = int(tree_preds[to_idx])
                while True:
                    # break out of while loop if the intermediary has reached the source node
                    if inter_idx == src_idx:
                        break
                    # iterate the distance thresholds - from large to small for threshold snipping
                    for d_idx in range(len(distances) - 1, -1, -1):
                        dist_cutoff = distances[d_idx]
                        beta = betas[d_idx]
                        if min_seg_span <= dist_cutoff:
                            # prune if necessary
                            if o_2 > dist_cutoff:
                                o_2 = dist_cutoff
                            if l_2 > dist_cutoff:
                                l_2 = dist_cutoff
                            for betw_idx, betw_key in enumerate(betw_keys):
                                m_idx = betw_targets[betw_idx]
                                # 2 - segment version of betweenness
                                if betw_key == 2:
                                    # catch division by zero
                                    if beta == -0.0:
                                        auc = o_2 - o_1 + l_2 - l_1
                                    else:
                                        auc = (np.exp(beta * o_2) -
                                               np.exp(beta * o_1)) / beta + \
                                              (np.exp(beta * l_2) -
                                               np.exp(beta * l_1)) / beta
                                    measures_data[m_idx, d_idx,
                                                  inter_idx] += auc
                                # 4 - betweeenness segment hybrid version
                                elif betw_key == 4:
                                    bt_ang = 1 + tree_imps[to_idx] / 180
                                    pt_a = o_2 - o_1
                                    pt_b = l_2 - l_1
                                    measures_data[m_idx, d_idx,
                                                  inter_idx] += (pt_a +
                                                                 pt_b) / bt_ang
                    # follow the chain
                    inter_idx = int(tree_preds[inter_idx])

    return measures_data