def test_decomposed_local_centrality(): # centralities on the original nodes within the decomposed network should equal non-decomposed workflow betas = np.array([-0.02, -0.01, -0.005, -0.0008, -0.0]) distances = networks.distance_from_beta(betas) measure_keys = ('node_density', 'node_farness', 'node_cycles', 'node_harmonic', 'node_beta', 'segment_density', 'segment_harmonic', 'segment_beta', 'node_betweenness', 'node_betweenness_beta', 'segment_betweenness') # test a decomposed graph G = mock.mock_graph() G = graphs.nX_simple_geoms(G) node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G) # generate node and edge maps measures_data = centrality.local_centrality(node_data, edge_data, node_edge_map, distances, betas, measure_keys, angular=False) G_decomposed = graphs.nX_decompose(G, 20) # generate node and edge maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G_decomposed) checks.check_network_maps(node_data, edge_data, node_edge_map) measures_data_decomposed = centrality.local_centrality(node_data, edge_data, node_edge_map, distances, betas, measure_keys, angular=False) # test harmonic closeness on original nodes for non-decomposed vs decomposed d_range = len(distances) m_range = len(measure_keys) assert measures_data.shape == (m_range, d_range, len(G)) assert measures_data_decomposed.shape == (m_range, d_range, len(G_decomposed)) original_node_idx = np.where(node_data[:, 3] == 0) # with increasing decomposition: # - node based measures will not match # - node based segment measures will match - these measure to the cut endpoints per thresholds # - betweenness based segment won't match - doesn't measure to cut endpoints for m_idx in range(m_range): print(m_idx) for d_idx in range(d_range): match = np.allclose(measures_data[m_idx][d_idx], measures_data_decomposed[m_idx][d_idx][original_node_idx], atol=0.1, rtol=0) # relax precision if not match: print('key', measure_keys[m_idx], 'dist:', distances[d_idx], 'match:', match) if m_idx in [5, 6, 7]: assert match
def __init__(self, node_uids: list | tuple, node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, distances: list | tuple | np.ndarray = None, betas: list | tuple | np.ndarray = None, min_threshold_wt: float = checks.def_min_thresh_wt): """ Parameters ---------- node_uids A `list` or `tuple` of node identifiers corresponding to each node. This list must be in the same order and of the same length as the `node_data`. node_data A 2d `numpy` array representing the graph's nodes. The indices of the second dimension correspond as follows: | idx | property | |-----|:---------| | 0 | `x` coordinate | | 1 | `y` coordinate | | 2 | `bool` describing whether the node is `live`. Metrics are only computed for `live` nodes. | The `x` and `y` node attributes determine the spatial coordinates of the node, and should be in a suitable projected (flat) coordinate reference system in metres. [`nX_wgs_to_utm`](/tools/graphs/#nx_wgs_to_utm) can be used for converting a `networkX` graph from WGS84 `lng`, `lat` geographic coordinates to the local UTM `x`, `y` projected coordinate system. When calculating local network centralities or land-use accessibilities, it is best-practice to buffer the network by a distance equal to the maximum distance threshold to be considered. This prevents problematic results arising due to boundary roll-off effects. The `live` node attribute identifies nodes falling within the areal boundary of interest as opposed to those that fall within the surrounding buffered area. Calculations are only performed for `live=True` nodes, thus reducing frivolous computation while also cleanly identifying which nodes are in the buffered roll-off area. If some other process will be used for filtering the nodes, or if boundary roll-off is not being considered, then set all nodes to `live=True`. edge_data A 2d `numpy` array representing the graph's edges. Each edge will be described separately for each direction of travel. The indices of the second dimension correspond as follows: | idx | property | |-----|:---------| | 0 | start node `idx` | | 1 | end node `idx` | | 2 | the segment length in metres | | 3 | the sum of segment's angular change | | 4 | an 'impedance factor' which can be applied to magnify or reduce the effect of the edge's impedance on shortest-path calculations. e.g. for gradients or other such considerations. Use with caution. | | 5 | the edge's entry angular bearing | | 6 | the edge's exit angular bearing | The start and end edge `idx` attributes point to the corresponding node indices in the `node_data` array. The `length` edge attribute (index 2) should always correspond to the edge lengths in metres. This is used when calculating the distances traversed by the shortest-path algorithm so that the respective $d_{max}$ maximum distance thresholds can be enforced: these distance thresholds are based on the actual network-paths traversed by the algorithm as opposed to crow-flies distances. The `angle_sum` edge bearing (index 3) should correspond to the total angular change along the length of the segment. This is used when calculating angular impedances for simplest-path measures. The `start_bearing` (index 5) and `end_bearing` (index 6) attributes respectively represent the starting and ending bearing of the segment. This is also used when calculating simplest-path measures when the algorithm steps from one edge to another. The `imp_factor` edge attribute (index 4) represents an impedance multiplier for increasing or diminishing the impedance of an edge. This is ordinarily set to 1, therefor not impacting calculations. By setting this to greater or less than 1, the edge will have a correspondingly higher or lower impedance. This can be used to take considerations such as street gradients into account, but should be used with caution. node_edge_map A `numba` `Dict` with `node_data` indices as keys and `numba` `List` types as values containing the out-edge indices for each node. distances A distance, or `list`, `tuple`, or `numpy` array of distances corresponding to the local $d_{max}$ thresholds to be used for centrality (and land-use) calculations. The $\beta$ parameters (for distance-weighted metrics) will be determined implicitly. If the `distances` parameter is not provided, then the `beta` parameter must be provided instead. Use a distance of `np.inf` where no distance threshold should be enforced. betas A $\beta$, or `list`, `tuple`, or `numpy` array of $\beta$ to be used for the exponential decay function for weighted metrics. The `distance` parameters for unweighted metrics will be determined implicitly. If the `betas` parameter is not provided, then the `distance` parameter must be provided instead. min_threshold_wt The default `min_threshold_wt` parameter can be overridden to generate custom mappings between the `distance` and `beta` parameters. See [`distance_from_beta`](#distance_from_beta) for more information. Returns ------- NetworkLayer A `NetworkLayer`. Notes ----- :::tip Comment It is possible to represent unlimited $d_{max}$ distance thresholds by setting one of the specified `distance` parameter values to `np.inf`. Note that this may substantially increase the computational time required for the completion of the algorithms on large networks. ::: Properties ---------- """ self._uids = node_uids self._node_data = node_data self._edge_data = edge_data self._node_edge_map = node_edge_map self._distances = distances self._betas = betas self._min_threshold_wt = min_threshold_wt self.metrics = { 'centrality': {}, 'mixed_uses': {}, 'accessibility': { 'non_weighted': {}, 'weighted': {} }, 'stats': {}, 'models': {} } # for storing originating networkX graph self._networkX_multigraph = None # check the data structures if len(self._uids) != len(self._node_data): raise ValueError( 'The number of indices does not match the number of nodes.') # check network maps checks.check_network_maps(self._node_data, self._edge_data, self._node_edge_map) # if distances, check the types and generate the betas if self._distances is not None and self._betas is None: if isinstance(self._distances, (int, float)): self._distances = [self._distances] if isinstance(self._distances, (list, tuple, np.ndarray)): if len(self._distances) == 0: raise ValueError('Please provide at least one distance.') else: raise TypeError( 'Please provide a distance, or a list, tuple, or numpy.ndarray of distances.' ) # generate the betas self._betas = beta_from_distance( self._distances, min_threshold_wt=self._min_threshold_wt) # if betas, generate the distances elif self._betas is not None and self._distances is None: if isinstance(self._betas, (float)): self._betas = [self._betas] if isinstance(self._betas, (list, tuple, np.ndarray)): if len(self._betas) == 0: raise ValueError('Please provide at least one beta.') else: raise TypeError( 'Please provide a beta, or a list, tuple, or numpy.ndarray of betas.' ) self._distances = distance_from_beta( self._betas, min_threshold_wt=self._min_threshold_wt) else: raise ValueError( 'Please provide either distances or betas, but not both.')
def test_check_network_maps(): # network maps G = mock.mock_graph() G = graphs.nX_simple_geoms(G) N = networks.Network_Layer_From_nX(G, distances=[500]) # from cityseer.util import plot # plot.plot_networkX_primal_or_dual(primal=G) # plot.plot_graph_maps(N.uids, N._node_data, N._edge_data) # catch zero length node and edge arrays empty_node_arr = np.full((0, 5), np.nan) with pytest.raises(ValueError): checks.check_network_maps(empty_node_arr, N._edge_data, N._node_edge_map) empty_edge_arr = np.full((0, 4), np.nan) with pytest.raises(ValueError): checks.check_network_maps(N._node_data, empty_edge_arr, N._node_edge_map) # check that malformed node and data maps throw errors with pytest.raises(ValueError): checks.check_network_maps(N._node_data[:, :-1], N._edge_data, N._node_edge_map) with pytest.raises(ValueError): checks.check_network_maps(N._node_data, N._edge_data[:, :-1], N._node_edge_map) # catch problematic edge map values for x in [np.nan, -1]: # missing start node corrupted_edges = N._edge_data.copy() corrupted_edges[0, 0] = x with pytest.raises(AssertionError): checks.check_network_maps(N._node_data, corrupted_edges, N._node_edge_map) # missing end node corrupted_edges = N._edge_data.copy() corrupted_edges[0, 1] = x with pytest.raises(KeyError): checks.check_network_maps(N._node_data, corrupted_edges, N._node_edge_map) # invalid length corrupted_edges = N._edge_data.copy() corrupted_edges[0, 2] = x with pytest.raises(ValueError): checks.check_network_maps(N._node_data, corrupted_edges, N._node_edge_map) # invalid angle_sum corrupted_edges = N._edge_data.copy() corrupted_edges[0, 3] = x with pytest.raises(ValueError): checks.check_network_maps(N._node_data, corrupted_edges, N._node_edge_map) # invalid imp_factor corrupted_edges = N._edge_data.copy() corrupted_edges[0, 4] = x with pytest.raises(ValueError): checks.check_network_maps(N._node_data, corrupted_edges, N._node_edge_map)
def __init__(self, node_uids: Union[list, tuple], node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, distances: Union[list, tuple, np.ndarray] = None, betas: Union[list, tuple, np.ndarray] = None, min_threshold_wt: float = checks.def_min_thresh_wt): ''' NODE MAP: 0 - x 1 - y 2 - live 3 - ghosted EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - in bearing 6 - out bearing ''' self._uids = node_uids self._node_data = node_data self._edge_data = edge_data self._node_edge_map = node_edge_map self._distances = distances self._betas = betas self._min_threshold_wt = min_threshold_wt self.metrics = { 'centrality': {}, 'mixed_uses': {}, 'accessibility': { 'non_weighted': {}, 'weighted': {} }, 'stats': {}, 'models': {} } # for storing originating networkX graph self._networkX = None # check the data structures if len(self._uids) != len(self._node_data): raise ValueError( 'The number of indices does not match the number of nodes.') # check network maps checks.check_network_maps(self._node_data, self._edge_data, self._node_edge_map) # if distances, check the types and generate the betas if self._distances is not None and self._betas is None: if isinstance(self._distances, (int, float)): self._distances = [self._distances] if isinstance(self._distances, (list, tuple, np.ndarray)): if len(self._distances) == 0: raise ValueError('Please provide at least one distance.') else: raise TypeError( 'Please provide a distance, or a list, tuple, or numpy.ndarray of distances.' ) # generate the betas self._betas = beta_from_distance( self._distances, min_threshold_wt=self._min_threshold_wt) # if betas, generate the distances elif self._betas is not None and self._distances is None: if isinstance(self._betas, (float)): self._betas = [self._betas] if isinstance(self._betas, (list, tuple, np.ndarray)): if len(self._betas) == 0: raise ValueError('Please provide at least one beta.') else: raise TypeError( 'Please provide a beta, or a list, tuple, or numpy.ndarray of betas.' ) self._distances = distance_from_beta( self._betas, min_threshold_wt=self._min_threshold_wt) else: raise ValueError( 'Please provide either distances or betas, but not both.')
def test_graph_maps_from_nX(diamond_graph): # test maps vs. networkX G_test = diamond_graph.copy() G_test_dual = graphs.nX_to_dual(G_test) for G, is_dual in zip((G_test, G_test_dual), (False, True)): # set some random 'live' statuses for n in G.nodes(): G.nodes[n]['live'] = bool(np.random.randint(0, 1)) # generate test maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G) # debug plot # plot.plot_graphs(primal=G) # plot.plot_graph_maps(node_uids, node_data, edge_data) # run check (this checks node to edge maps internally) checks.check_network_maps(node_data, edge_data, node_edge_map) # check lengths assert len(node_uids) == len(node_data) == G.number_of_nodes() # edges = x2 assert len(edge_data) == G.number_of_edges() * 2 # check node maps (idx and label match in this case...) for n_label in node_uids: n_idx = node_uids.index(n_label) assert node_data[n_idx][0] == G.nodes[n_label]['x'] assert node_data[n_idx][1] == G.nodes[n_label]['y'] assert node_data[n_idx][2] == G.nodes[n_label]['live'] # check edge maps (idx and label match in this case...) for start, end, length, angle, imp_fact, start_bear, end_bear in edge_data: # print(f'elif (start, end) == ({start}, {end}):') # print(f'assert (length, angle, imp_fact, start_bear, end_bear) == ({length}, {angle}, {imp_fact}, {start_bear}, {end_bear})') if not is_dual: if (start, end) == (0.0, 1.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 120.0, 120.0) elif (start, end) == (0.0, 2.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 60.0, 60.0) elif (start, end) == (1.0, 0.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -60.0, -60.0) elif (start, end) == (1.0, 2.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 0.0, 0.0) elif (start, end) == (1.0, 3.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 60.0, 60.0) elif (start, end) == (2.0, 0.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -120.0, -120.0) elif (start, end) == (2.0, 1.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 180.0, 180.0) elif (start, end) == (2.0, 3.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, 120.0, 120.0) elif (start, end) == (3.0, 1.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -120.0, -120.0) elif (start, end) == (3.0, 2.0): assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 0.0, 1.0, -60.0, -60.0) else: raise KeyError('Unmatched edge.') else: s_idx = node_uids[int(start)] e_idx = node_uids[int(end)] print(s_idx, e_idx) if (start, end) == (0.0, 1.0): # 0_1 0_2 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -60.0, 60.0) elif (start, end) == (0.0, 2.0): # 0_1 1_2 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 120.0, 0.0) elif (start, end) == (0.0, 3.0): # 0_1 1_3 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, 120.0, 60.0) elif (start, end) == (1.0, 0.0): # 0_2 0_1 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -120.0, 120.0) elif (start, end) == (1.0, 2.0): # 0_2 1_2 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 60.0, 180.0) elif (start, end) == (1.0, 4.0): # 0_2 2_3 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, 60.0, 120.0) elif (start, end) == (2.0, 0.0): # 1_2 0_1 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 180.0, -60.0) elif (start, end) == (2.0, 1.0): # 1_2 0_2 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 0.0, -120.0) elif (start, end) == (2.0, 3.0): # 1_2 1_3 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 180.0, 60.0) elif (start, end) == (2.0, 4.0): # 1_2 2_3 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 0.0, 120.0) elif (start, end) == (3.0, 0.0): # 1_3 0_1 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, -120.0, -60.0) elif (start, end) == (3.0, 2.0): # 1_3 1_2 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -120.0, 0.0) elif (start, end) == (3.0, 4.0): # 1_3 2_3 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 60.0, -60.0) elif (start, end) == (4.0, 1.0): # 2_3 0_2 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 60.0, 1.0, -60.0, -120.0) elif (start, end) == (4.0, 2.0): # 2_3 1_2 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, -60.0, 180.0) elif (start, end) == (4.0, 3.0): # 2_3 1_3 assert (length, angle, imp_fact, start_bear, end_bear) == (100.0, 120.0, 1.0, 120.0, -120.0) else: raise KeyError('Unmatched edge.') # check that missing geoms throw an error G_test = diamond_graph.copy() for s, e, k in G_test.edges(keys=True): # delete key from first node and break del G_test[s][e][k]['geom'] break with pytest.raises(KeyError): graphs.graph_maps_from_nX(G_test) # check that non-LineString geoms throw an error G_test = diamond_graph.copy() for s, e, k in G_test.edges(keys=True): G_test[s][e][k]['geom'] = geometry.Point([G_test.nodes[s]['x'], G_test.nodes[s]['y']]) with pytest.raises(TypeError): graphs.graph_maps_from_nX(G_test) # check that missing node keys throw an error G_test = diamond_graph.copy() for k in ['x', 'y']: for n in G_test.nodes(): # delete key from first node and break del G_test.nodes[n][k] break with pytest.raises(KeyError): graphs.graph_maps_from_nX(G_test) # check that invalid imp_factors are caught G_test = diamond_graph.copy() # corrupt imp_factor value and break for corrupt_val in [-1, -np.inf, np.nan]: for s, e, k in G_test.edges(keys=True): G_test[s][e][k]['imp_factor'] = corrupt_val break with pytest.raises(ValueError): graphs.graph_maps_from_nX(G_test)
def singly_constrained( node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, distances: np.ndarray, betas: np.ndarray, i_data_map: np.ndarray, j_data_map: np.ndarray, i_weights: np.ndarray, j_weights: np.ndarray, angular: bool = False, suppress_progress: bool = False) -> Tuple[np.ndarray, np.ndarray]: ''' - Calculates trips from i to j and returns the assigned trips and network assigned flows for j nodes #TODO: consider enhanced numerical checks for single vs. multi dimensional numerical data - Keeping separate from local aggregator because singly-constrained origin / destination models computed separately - Requires two iters, one to gather all k-nodes to per j node, then another to get the ratio of j / k attractiveness - Assigns j -> k trips over the network as part of second iter NODE MAP: 0 - x 1 - y 2 - live 3 - ghosted EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - entry bearing 6 - exit bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest ''' checks.check_network_maps(node_data, edge_data, node_edge_map) checks.check_distances_and_betas(distances, betas) checks.check_data_map(i_data_map, check_assigned=True) checks.check_data_map(j_data_map, check_assigned=True) if len(i_weights) != len(i_data_map): raise ValueError( 'The i_weights array must be the same length as the i_data_map.') if len(j_weights) != len(j_data_map): raise ValueError( 'The j_weights array must be the same length as the j_data_map.') # establish variables netw_n = len(node_data) d_n = len(distances) global_max_dist = np.max(distances) netw_flows = np.full((d_n, netw_n), 0.0) i_n = len(i_data_map) k_agg = np.full((d_n, i_n), 0.0) j_n = len(j_data_map) j_assigned = np.full((d_n, j_n), 0.0) # iterate all i nodes # filter all reachable nodes k and aggregate k attractiveness * negative exponential of distance steps = int(i_n / 10000) for i_idx in range(i_n): if not suppress_progress: checks.progress_bar(i_idx, i_n, steps) # get the nearest node i_assigned_netw_idx = int(i_data_map[i_idx, 2]) # calculate the base distance from the data point to the nearest assigned node i_x, i_y = i_data_map[i_idx, :2] n_x, n_y = node_data[i_assigned_netw_idx, :2] i_door_dist = np.hypot(i_x - n_x, i_y - n_y) # find the reachable j data points and their respective points from the closest node reachable_j, reachable_j_dist, tree_preds = aggregate_to_src_idx( i_assigned_netw_idx, node_data, edge_data, node_edge_map, j_data_map, global_max_dist, angular) # aggregate the weighted j (all k) nodes # iterate the reachable indices and related distances for j_idx, (j_reachable, j_dist) in enumerate(zip(reachable_j, reachable_j_dist)): if not j_reachable: continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): total_dist = j_dist + i_door_dist # increment weighted k aggregations at respective distances if the distance is less than current d if total_dist <= d: k_agg[d_idx, i_idx] += j_weights[j_idx] * np.exp(total_dist * b) # this is the second step # this time, filter all reachable j vertices and aggregate the proportion of flow from i to j # this is done by dividing i-j flow through i-k_agg flow from previous step steps = int(i_n / 10000) for i_idx in range(i_n): if not suppress_progress: checks.progress_bar(i_idx, i_n, steps) # get the nearest node i_assigned_netw_idx = int(i_data_map[i_idx, 2]) # calculate the base distance from the data point to the nearest assigned node i_x, i_y = i_data_map[i_idx, :2] n_x, n_y = node_data[i_assigned_netw_idx, :2] i_door_dist = np.hypot(i_x - n_x, i_y - n_y) # find the reachable j data points and their respective points from the closest node reachable_j, reachable_j_dist, tree_preds = aggregate_to_src_idx( i_assigned_netw_idx, node_data, edge_data, node_edge_map, j_data_map, global_max_dist, angular) # aggregate j divided through all k nodes # iterate the reachable indices and related distances for j_idx, (j_reachable, j_dist) in enumerate(zip(reachable_j, reachable_j_dist)): if not j_reachable: continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): total_dist = j_dist + i_door_dist # if the distance is less than current d if total_dist <= d: # aggregate all flows from reachable j's to i_idx # divide through respective i-k_agg sums # catch division by zero: if k_agg[d_idx, i_idx] == 0: assigned = 0 else: assigned = i_weights[i_idx] * j_weights[j_idx] * np.exp( total_dist * b) / k_agg[d_idx, i_idx] j_assigned[d_idx, j_idx] += assigned # assign trips to network if assigned != 0: # get the j assigned node j_assigned_netw_idx = int(j_data_map[j_idx, 2]) # in this case start and end nodes are counted...! netw_flows[d_idx, j_assigned_netw_idx] += assigned # skip if same start / end node if j_assigned_netw_idx == i_assigned_netw_idx: continue # aggregate to the network inter_idx = np.int(tree_preds[j_assigned_netw_idx]) while True: # end nodes counted, so place above break netw_flows[d_idx, inter_idx] += assigned # break out of while loop if the intermediary has reached the source node if inter_idx == i_assigned_netw_idx: break # follow the chain inter_idx = np.int(tree_preds[inter_idx]) return j_assigned, netw_flows
def assign_to_network(data_map: np.ndarray, node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, max_dist: float, suppress_progress: bool = False) -> np.ndarray: ''' To save unnecessary computation - this is done once and written to the data map. 1 - find the closest network node from each data point 2A - wind clockwise along the network to preferably find a block cycle surrounding the node 2B - in event of topological traps, try anti-clockwise as well 3A - select the closest block cycle node 3B - if no enclosing cycle - simply use the closest node 4 - find the neighbouring node that minimises the distance between the data point on "street-front" NODE MAP: 0 - x 1 - y 2 - live 3 - ghosted EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - entry bearing 6 - exit bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest ''' checks.check_network_maps(node_data, edge_data, node_edge_map) def calculate_rotation(point_a, point_b): # https://stackoverflow.com/questions/37459121/calculating-angle-between-three-points-but-only-anticlockwise-in-python # these two points / angles are relative to the origin - so pass in difference between the points and origin as vectors ang_a = np.arctan2(point_a[1], point_a[0]) # arctan is in y/x order ang_b = np.arctan2(point_b[1], point_b[0]) return np.rad2deg((ang_a - ang_b) % (2 * np.pi)) def calculate_rotation_smallest(point_a, point_b): # smallest difference angle ang_a = np.rad2deg(np.arctan2(point_a[1], point_a[0])) ang_b = np.rad2deg(np.arctan2(point_b[1], point_b[0])) return np.abs((ang_b - ang_a + 180) % 360 - 180) def road_distance(d_coords, netw_idx_a, netw_idx_b): a_coords = node_data[netw_idx_a, :2] b_coords = node_data[netw_idx_b, :2] # get the angles from either intersection node to the data point ang_a = calculate_rotation_smallest(d_coords - a_coords, b_coords - a_coords) ang_b = calculate_rotation_smallest(d_coords - b_coords, a_coords - b_coords) # assume offset street segment if either is significantly greater than 90 (in which case sideways offset from road) if ang_a > 110 or ang_b > 110: return np.inf, np.nan, np.nan # calculate height from two sides and included angle side_a = np.hypot(d_coords[0] - a_coords[0], d_coords[1] - a_coords[1]) side_b = np.hypot(d_coords[0] - b_coords[0], d_coords[1] - b_coords[1]) base = np.hypot(a_coords[0] - b_coords[0], a_coords[1] - b_coords[1]) # forestall potential division by zero if base == 0: return np.inf, np.nan, np.nan # heron's formula s = (side_a + side_b + base) / 2 # perimeter / 2 a = np.sqrt(s * (s - side_a) * (s - side_b) * (s - base)) # area is 1/2 base * h, so h = area / (0.5 * base) h = a / (0.5 * base) # NOTE - the height of the triangle may be less than the distance to the nodes # happens due to offset segments: can cause wrong assignment where adjacent segments have same triangle height # in this case, set to length of closest node so that h (minimum distance) is still meaningful # return indices in order of nearest then next nearest if side_a < side_b: if ang_a > 90: h = side_a return h, netw_idx_a, netw_idx_b else: if ang_b > 90: h = side_b return h, netw_idx_b, netw_idx_a def closest_intersections(d_coords, pr_map, end_node): if len(pr_map) == 1: return np.inf, end_node, np.nan current_idx = end_node next_idx = int(pr_map[int(end_node)]) if len(pr_map) == 2: return road_distance(d_coords, current_idx, next_idx) nearest_idx = np.nan next_nearest_idx = np.nan min_d = np.inf first_pred = next_idx # for finding end of loop while True: h, n_idx, n_n_idx = road_distance(d_coords, current_idx, next_idx) if h < min_d: min_d = h nearest_idx = n_idx next_nearest_idx = n_n_idx # if the next in the chain is nan, then break if np.isnan(pr_map[next_idx]): break current_idx = next_idx next_idx = int(pr_map[next_idx]) if next_idx == first_pred: break return min_d, nearest_idx, next_nearest_idx pred_map = np.full(len(node_data), np.nan) netw_coords = node_data[:, :2] netw_x_arr = node_data[:, 0] netw_y_arr = node_data[:, 1] data_coords = data_map[:, :2] data_x_arr = data_map[:, 0] data_y_arr = data_map[:, 1] total_count = len(data_map) # setup progress bar params steps = int(total_count / 10000) for data_idx in range(total_count): if not suppress_progress: checks.progress_bar(data_idx, total_count, steps) # find the nearest network node min_idx, min_dist = find_nearest(data_x_arr[data_idx], data_y_arr[data_idx], netw_x_arr, netw_y_arr, max_dist) # in some cases no network node will be within max_dist... so accept NaN if np.isnan(min_idx): continue # nearest is initially set for this nearest node, but if a nearer street-edge is found, it will be overriden nearest = min_idx next_nearest = np.nan # set start node to nearest network node node_idx = int(min_idx) # keep track of visited nodes pred_map.fill(np.nan) # state reversing = False # keep track of previous indices prev_idx = np.nan # iterate neighbours while True: # reset neighbour rotation and index counters rotation = np.nan nb_idx = np.nan # iterate the edges for edge_idx in node_edge_map[node_idx]: # get the edge's start and end node indices start, end = edge_data[edge_idx, :2] # cast to int for indexing new_idx = int(end) # don't follow self-loops if new_idx == node_idx: continue # check that this isn't the previous node (already visited as neighbour from other direction) if np.isfinite(prev_idx) and new_idx == prev_idx: continue # look for the new neighbour with the smallest rightwards (anti-clockwise arctan2) angle # measure the angle relative to the data point for the first node if np.isnan(prev_idx): r = calculate_rotation( netw_coords[int(new_idx)] - netw_coords[node_idx], data_coords[data_idx] - netw_coords[node_idx]) # else relative to the previous node else: r = calculate_rotation( netw_coords[int(new_idx)] - netw_coords[node_idx], netw_coords[int(prev_idx)] - netw_coords[node_idx]) if reversing: r = 360 - r # if least angle, update if np.isnan(rotation) or r < rotation: rotation = r nb_idx = new_idx # allow backtracking if no neighbour is found - i.e. dead-ends if np.isnan(nb_idx): if np.isnan(pred_map[node_idx]): # for isolated nodes: nb_idx == np.nan, pred_map[node_idx] == np.nan, and prev_idx == np.nan if np.isnan(prev_idx): break # for isolated edges, the algorithm gets turned-around back to the starting node with nowhere to go # nb_idx == np.nan, pred_map[node_idx] == np.nan # in these cases, pass closest_intersections the prev idx so that it has a predecessor to follow d, n, n_n = closest_intersections(data_coords[data_idx], pred_map, int(prev_idx)) if d < min_dist: nearest = n next_nearest = n_n break # otherwise, go ahead and backtrack nb_idx = pred_map[node_idx] # if the distance is exceeded, reset and attempt in the other direction dist = np.hypot(netw_x_arr[int(nb_idx)] - data_x_arr[data_idx], netw_y_arr[int(nb_idx)] - data_y_arr[data_idx]) if dist > max_dist: pred_map[int(nb_idx)] = node_idx d, n, n_n = closest_intersections(data_coords[data_idx], pred_map, int(nb_idx)) # if the distance to the street edge is less than the nearest node, or than the prior closest edge if d < min_dist: min_dist = d nearest = n next_nearest = n_n # reverse and try in opposite direction if not reversing: reversing = True pred_map.fill(np.nan) node_idx = int(min_idx) prev_idx = np.nan continue break # ignore the following conditions while backtracking # (if backtracking, the current node's predecessor will be equal to the new neighbour) if nb_idx != pred_map[node_idx]: # if the new nb node has already been visited then terminate, this prevent infinite loops # or, if the algorithm has circled the block back to the original starting node if not np.isnan(pred_map[int(nb_idx)]) or nb_idx == min_idx: # set the final predecessor, BUT ONLY if re-encountered the original node # this would otherwise occlude routes (e.g. backtracks) that have passed the same node twice # (such routes are still able to recover the closest edge) if nb_idx == min_idx: pred_map[int(nb_idx)] = node_idx d, n, n_n = closest_intersections(data_coords[data_idx], pred_map, int(nb_idx)) if d < min_dist: nearest = n next_nearest = n_n break # set predecessor (only if not backtracking) pred_map[int(nb_idx)] = node_idx # otherwise, keep going prev_idx = node_idx node_idx = int(nb_idx) # print(f'[{data_idx}, {nearest}, {next_nearest}],') # set in the data map data_map[data_idx, 2] = nearest # adj_idx # in some cases next nearest will be NaN # this is mostly in situations where it works to leave as NaN - e.g. access off dead-ends... data_map[data_idx, 3] = next_nearest # next_adj_idx return data_map
def local_aggregator( node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, data_map: np.ndarray, distances: np.ndarray, betas: np.ndarray, landuse_encodings: np.ndarray = np.array([]), qs: np.ndarray = np.array([]), mixed_use_hill_keys: np.ndarray = np.array([]), mixed_use_other_keys: np.ndarray = np.array([]), accessibility_keys: np.ndarray = np.array([]), cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)), numerical_arrays: np.ndarray = np.array(np.full((0, 0), np.nan)), angular: bool = False, suppress_progress: bool = False ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: ''' NODE MAP: 0 - x 1 - y 2 - live 3 - ghosted EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - in bearing 6 - out bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest ''' checks.check_network_maps(node_data, edge_data, node_edge_map) checks.check_data_map( data_map, check_assigned=True ) # raises ValueError data points are not assigned to a network checks.check_distances_and_betas(distances, betas) # check landuse encodings compute_landuses = False if len(landuse_encodings) == 0: if len(mixed_use_hill_keys) != 0 or len( mixed_use_other_keys) != 0 or len(accessibility_keys) != 0: raise ValueError( 'Mixed use metrics or land-use accessibilities require an array of landuse labels.' ) elif len(landuse_encodings) != len(data_map): raise ValueError( 'The number of landuse encodings does not match the number of data points.' ) else: checks.check_categorical_data(landuse_encodings) # catch completely missing metrics if len(mixed_use_hill_keys) == 0 and len( mixed_use_other_keys) == 0 and len(accessibility_keys) == 0: if len(numerical_arrays) == 0: raise ValueError( 'No metrics specified, please specify at least one metric to compute.' ) else: compute_landuses = True # catch missing qs if len(mixed_use_hill_keys) != 0 and len(qs) == 0: raise ValueError( 'Hill diversity measures require that at least one value of q is specified.' ) # negative qs caught by hill diversity methods # check various problematic key combinations if len(mixed_use_hill_keys) != 0: if (mixed_use_hill_keys.min() < 0 or mixed_use_hill_keys.max() > 3): raise ValueError('Mixed-use "hill" keys out of range of 0:4.') if len(mixed_use_other_keys) != 0: if (mixed_use_other_keys.min() < 0 or mixed_use_other_keys.max() > 2): raise ValueError('Mixed-use "other" keys out of range of 0:3.') if len(accessibility_keys) != 0: max_ac_key = landuse_encodings.max() if (accessibility_keys.min() < 0 or accessibility_keys.max() > max_ac_key): raise ValueError( 'Negative or out of range accessibility key encountered. Keys must match class encodings.' ) for i in range(len(mixed_use_hill_keys)): for j in range(len(mixed_use_hill_keys)): if j > i: i_key = mixed_use_hill_keys[i] j_key = mixed_use_hill_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "hill" key.') for i in range(len(mixed_use_other_keys)): for j in range(len(mixed_use_other_keys)): if j > i: i_key = mixed_use_other_keys[i] j_key = mixed_use_other_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "other" key.') for i in range(len(accessibility_keys)): for j in range(len(accessibility_keys)): if j > i: i_key = accessibility_keys[i] j_key = accessibility_keys[j] if i_key == j_key: raise ValueError('Duplicate accessibility key.') def disp_check(disp_matrix): # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[ 1]: raise ValueError( 'The disparity matrix must be a square NxN matrix.') if len(disp_matrix) == 0: raise ValueError( 'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.' ) # check that missing or malformed disparity weights matrices are caught for k in mixed_use_hill_keys: if k == 3: # hill disparity disp_check(cl_disparity_wt_matrix) for k in mixed_use_other_keys: if k == 2: # raos pairwise disp_check(cl_disparity_wt_matrix) compute_numerical = False # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan)) if len(numerical_arrays) != 0: compute_numerical = True if numerical_arrays.shape[1] != len(data_map): raise ValueError( 'The length of the numerical data arrays do not match the length of the data map.' ) checks.check_numerical_data(numerical_arrays) # establish variables netw_n = len(node_data) d_n = len(distances) q_n = len(qs) n_n = len(numerical_arrays) global_max_dist = distances.max() netw_nodes_live = node_data[:, 2] # setup data structures # hill mixed uses are structured separately to take values of q into account mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), np.nan) # 4 dim mixed_use_other_data = np.full((3, d_n, netw_n), np.nan) # 3 dim accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0) accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n), 0.0) # stats stats_sum = np.full((n_n, d_n, netw_n), np.nan) stats_sum_wt = np.full((n_n, d_n, netw_n), np.nan) stats_mean = np.full((n_n, d_n, netw_n), np.nan) stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan) stats_count = np.full( (n_n, d_n, netw_n), np.nan) # use np.nan instead of 0 to avoid division by zero issues stats_count_wt = np.full((n_n, d_n, netw_n), np.nan) stats_variance = np.full((n_n, d_n, netw_n), np.nan) stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan) stats_max = np.full((n_n, d_n, netw_n), np.nan) stats_min = np.full((n_n, d_n, netw_n), np.nan) # iterate through each vert and aggregate steps = int(netw_n / 10000) for netw_src_idx in range(netw_n): if not suppress_progress: checks.progress_bar(netw_src_idx, netw_n, steps) # only compute for live nodes if not netw_nodes_live[netw_src_idx]: continue # generate the reachable classes and their respective distances # these are non-unique - i.e. simply the class of each data point within the maximum distance # the aggregate_to_src_idx method will choose the closer direction of approach to a data point # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method) reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx( netw_src_idx, node_data, edge_data, node_edge_map, data_map, global_max_dist, angular) # LANDUSES if compute_landuses: mu_max_unique_cl = int(landuse_encodings.max() + 1) # counts of each class type (array length per max unique classes - not just those within max distance) classes_counts = np.full((d_n, mu_max_unique_cl), 0) # nearest of each class type (likewise) classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf) # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): if not reachable: continue # get the class category in integer form # all class codes were encoded to sequential integers - these correspond to the array indices cl_code = int(landuse_encodings[int(data_idx)]) # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment class counts at respective distances if the distance is less than current d if data_dist <= d: classes_counts[d_idx, cl_code] += 1 # if distance is nearer, update the nearest distance array too if data_dist < classes_nearest[d_idx, cl_code]: classes_nearest[d_idx, cl_code] = data_dist # if within distance, and if in accessibility keys, then aggregate accessibility too for ac_idx, ac_code in enumerate(accessibility_keys): if ac_code == cl_code: accessibility_data[ac_idx, d_idx, netw_src_idx] += 1 accessibility_data_wt[ac_idx, d_idx, netw_src_idx] += np.exp( b * data_dist) # if a match was found, then no need to check others break # mixed uses can be calculated now that the local class counts are aggregated # iterate the distances and betas for d_idx, b in enumerate(betas): cl_counts = classes_counts[d_idx] cl_nearest = classes_nearest[d_idx] # mu keys determine which metrics to compute # don't confuse with indices # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys for mu_hill_key in mixed_use_hill_keys: for q_idx, q_key in enumerate(qs): if mu_hill_key == 0: mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity(cl_counts, q_key) elif mu_hill_key == 1: mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) elif mu_hill_key == 2: mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) # land-use classification disparity hill diversity # the wt matrix can be used without mapping because cl_counts is based on all classes # regardless of whether they are reachable elif mu_hill_key == 3: mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, wt_matrix=cl_disparity_wt_matrix, q=q_key) for mu_other_key in mixed_use_other_keys: if mu_other_key == 0: mixed_use_other_data[0, d_idx, netw_src_idx] = \ diversity.shannon_diversity(cl_counts) elif mu_other_key == 1: mixed_use_other_data[1, d_idx, netw_src_idx] = \ diversity.gini_simpson_diversity(cl_counts) elif mu_other_key == 2: mixed_use_other_data[2, d_idx, netw_src_idx] = \ diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix) # IDW # the order of the loops matters because the nested aggregations happen per distance per numerical array if compute_numerical: # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): # some indices will be NaN if beyond max threshold distance - so check for infinity # this happens when within radial max distance, but beyond network max distance if not reachable: continue # iterate the numerical arrays dimension for num_idx in range(n_n): # some values will be NaN num = numerical_arrays[num_idx, int(data_idx)] if np.isnan(num): continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment mean aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate if np.isnan(stats_sum[num_idx, d_idx, netw_src_idx]): stats_sum[num_idx, d_idx, netw_src_idx] = num stats_count[num_idx, d_idx, netw_src_idx] = 1 stats_sum_wt[num_idx, d_idx, netw_src_idx] = num * np.exp( data_dist * b) stats_count_wt[num_idx, d_idx, netw_src_idx] = np.exp( data_dist * b) else: stats_sum[num_idx, d_idx, netw_src_idx] += num stats_count[num_idx, d_idx, netw_src_idx] += 1 stats_sum_wt[num_idx, d_idx, netw_src_idx] += num * np.exp( data_dist * b) stats_count_wt[num_idx, d_idx, netw_src_idx] += np.exp( data_dist * b) if np.isnan(stats_max[num_idx, d_idx, netw_src_idx]): stats_max[num_idx, d_idx, netw_src_idx] = num elif num > stats_max[num_idx, d_idx, netw_src_idx]: stats_max[num_idx, d_idx, netw_src_idx] = num if np.isnan(stats_min[num_idx, d_idx, netw_src_idx]): stats_min[num_idx, d_idx, netw_src_idx] = num elif num < stats_min[num_idx, d_idx, netw_src_idx]: stats_min[num_idx, d_idx, netw_src_idx] = num # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast for num_idx in range(n_n): for d_idx in range(d_n): stats_mean[num_idx, d_idx, netw_src_idx] = \ stats_sum[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx] stats_mean_wt[num_idx, d_idx, netw_src_idx] = \ stats_sum_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx] # calculate variances - counts are already computed per above # weighted version is IDW by division through equivalently weighted counts above # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): # some indices will be NaN if beyond max threshold distance - so check for infinity # this happens when within radial max distance, but beyond network max distance if not reachable: continue # iterate the numerical arrays dimension for num_idx in range(n_n): # some values will be NaN num = numerical_arrays[num_idx, int(data_idx)] if np.isnan(num): continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment variance aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate if np.isnan(stats_variance[num_idx, d_idx, netw_src_idx]): stats_variance[num_idx, d_idx, netw_src_idx] = \ np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b) else: stats_variance[num_idx, d_idx, netw_src_idx] += \ np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) stats_variance_wt[num_idx, d_idx, netw_src_idx] += \ np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(data_dist * b) # finalise variance calculations for num_idx in range(n_n): for d_idx in range(d_n): stats_variance[num_idx, d_idx, netw_src_idx] = \ stats_variance[num_idx, d_idx, netw_src_idx] / stats_count[num_idx, d_idx, netw_src_idx] stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ stats_variance_wt[num_idx, d_idx, netw_src_idx] / stats_count_wt[num_idx, d_idx, netw_src_idx] # send the data back in the same types and same order as the original keys - convert to int for indexing mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0) for i, k in enumerate(mixed_use_hill_keys): mu_hill_k_int[i] = k mu_other_k_int = np.full(len(mixed_use_other_keys), 0) for i, k in enumerate(mixed_use_other_keys): mu_other_k_int[i] = k return mixed_use_hill_data[mu_hill_k_int], \ mixed_use_other_data[mu_other_k_int], \ accessibility_data, accessibility_data_wt, \ stats_sum, stats_sum_wt, \ stats_mean, stats_mean_wt, \ stats_variance, stats_variance_wt, \ stats_max, stats_min
def test_graph_maps_from_nX(): # template graph G_template = mock.mock_graph() G_template = graphs.nX_simple_geoms(G_template) # test maps vs. networkX G_test = G_template.copy() # set some random 'live' statuses for n in G_test.nodes(): G_test.nodes[n]['live'] = bool(np.random.randint(0, 1)) # randomise the imp_factors for s, e in G_test.edges(): G_test[s][e]['imp_factor'] = np.random.random() * 2 # generate geom with angular change for edge 50-51 - should sum to 360 angle_geom = geometry.LineString([ [700700, 5719900], [700700, 5720000], [700750, 5720050], [700700, 5720050], [700700, 5720100] ]) G_test[50][51]['geom'] = angle_geom # generate test maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G_test) # debug plot # plot.plot_graphs(primal=G_test) # plot.plot_graph_maps(node_uids, node_data, edge_data) # run check checks.check_network_maps(node_data, edge_data, node_edge_map) # check lengths assert len(node_uids) == len(node_data) == G_test.number_of_nodes() # no ghosted edges, so edges = x2 assert len(edge_data) == G_test.number_of_edges() * 2 # check node maps (idx and label match in this case...) for n_label in node_uids: assert node_data[n_label][0] == G_test.nodes[n_label]['x'] assert node_data[n_label][1] == G_test.nodes[n_label]['y'] assert node_data[n_label][2] == G_test.nodes[n_label]['live'] assert node_data[n_label][3] == 0 # ghosted is False by default # check edge maps (idx and label match in this case...) for start, end, length, angle_sum, imp_factor, start_bearing, end_bearing in edge_data: assert np.allclose(length, G_test[start][end]['geom'].length, atol=0.001, rtol=0) if (start == 50 and end == 51) or (start == 51 and end == 50): # check that the angle is measured along the line of change # i.e. 45 + 135 + 90 (not 45 + 45 + 90) # angles are transformed per: 1 + (angle_sum / 180) assert angle_sum == 270 else: assert angle_sum == 0 assert np.allclose(imp_factor, G_test[start][end]['imp_factor'], atol=0.001, rtol=0) s_x, s_y = node_data[int(start)][:2] e_x, e_y = node_data[int(end)][:2] assert np.allclose(start_bearing, np.rad2deg(np.arctan2(e_y - s_y, e_x - s_x)), atol=0.001, rtol=0) assert np.allclose(end_bearing, np.rad2deg(np.arctan2(e_y - s_y, e_x - s_x)), atol=0.001, rtol=0) # check that missing geoms throw an error G_test = G_template.copy() for s, e in G_test.edges(): # delete key from first node and break del G_test[s][e]['geom'] break with pytest.raises(KeyError): graphs.graph_maps_from_nX(G_test) # check that non-LineString geoms throw an error G_test = G_template.copy() for s, e in G_test.edges(): G_test[s][e]['geom'] = geometry.Point([G_test.nodes[s]['x'], G_test.nodes[s]['y']]) with pytest.raises(TypeError): graphs.graph_maps_from_nX(G_test) # check that missing node keys throw an error G_test = G_template.copy() for k in ['x', 'y']: for n in G_test.nodes(): # delete key from first node and break del G_test.nodes[n][k] break with pytest.raises(KeyError): graphs.graph_maps_from_nX(G_test) # check that invalid imp_factors are caught G_test = G_template.copy() # corrupt imp_factor value and break for corrupt_val in [-1, -np.inf, np.nan]: for s, e in G_test.edges(): G_test[s][e]['imp_factor'] = corrupt_val break with pytest.raises(ValueError): graphs.graph_maps_from_nX(G_test)
def local_segment_centrality(node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, distances: np.ndarray, betas: np.ndarray, measure_keys: tuple, jitter_scale: float = 0.0, angular: bool = False, progress_proxy=None) -> np.ndarray: # integrity checks checks.check_distances_and_betas(distances, betas) checks.check_network_maps(node_data, edge_data, node_edge_map) # gather functions close_funcs = List.empty_list(segment_func_proto) close_idxs = [] betw_idxs = [] for m_idx, m_key in enumerate(measure_keys): if not angular: # segment keys if m_key == 'segment_density': close_funcs.append(segment_density) close_idxs.append(m_idx) elif m_key == 'segment_harmonic': close_funcs.append(segment_harmonic) close_idxs.append(m_idx) elif m_key == 'segment_beta': close_funcs.append(segment_beta) close_idxs.append(m_idx) elif m_key == 'segment_betweenness': # only one version of shortest path betweenness - no need for func betw_idxs.append(m_idx) else: raise ValueError(''' Unable to match requested centrality measure key against available options. Shortest-path measures can't be mixed with simplest-path measures. Set angular=True if using simplest-path measures. ''') else: # segment keys if m_key == 'segment_harmonic_hybrid': # only one version of simplest path closeness - no need for func close_idxs.append(m_idx) elif m_key == 'segment_betweeness_hybrid': # only one version of simplest path betweenness - no need for func betw_idxs.append(m_idx) else: raise ValueError(''' Unable to match requested centrality measure key against available options. Shortest-path measures can't be mixed with simplest-path measures. Set angular=False if using shortest-path measures. ''') # prepare variables n = len(node_data) d_n = len(distances) k_n = len(measure_keys) measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32) global_max_dist = float(np.nanmax(distances)) nodes_live = node_data[:, 2] # iterate through each vert and calculate the shortest path tree for src_idx in prange(n): shadow_arr = np.full((k_n, d_n, n), 0.0, dtype=np.float32) # numba no object mode can only handle basic printing # note that progress bar adds a performance penalty if progress_proxy is not None: progress_proxy.update(1) # only compute for live nodes if not nodes_live[src_idx]: continue ''' Shortest tree dijkstra Predecessor map is based on impedance heuristic - i.e. angular vs not Shortest path distances in metres used for defining max distances regardless RETURNS A SHORTEST PATH TREE MAP: 0 - processed nodes 1 - predecessors 2 - shortest path distance 3 - simplest path angular distance 4 - cycles 5 - origin segments 6 - last segments ''' tree_map, tree_edges = shortest_path_tree(edge_data, node_edge_map, src_idx, max_dist=global_max_dist, jitter_scale=jitter_scale, angular=angular) tree_nodes = np.where(tree_map[:, 0])[0] tree_preds = tree_map[:, 1] tree_short_dists = tree_map[:, 2] tree_simpl_dists = tree_map[:, 3] tree_origin_seg = tree_map[:, 5] tree_last_seg = tree_map[:, 6] ''' can't do edge processing as part of shortest tree because all shortest paths have to be resolved first hence visiting all processed edges and extrapolating information NOTES: 1. the above shortest tree algorithm only tracks edges in one direction - i.e. no duplication 2. dijkstra sorts all active nodes by distance: explores from near to far: edges discovered accordingly ''' # only build edge data if necessary if close_idxs: for edge_idx in np.where(tree_edges)[0]: # unpack the edge data seg_n_nd, seg_m_nd, seg_len, seg_ang, seg_imp_fact, seg_in_bear, seg_out_bear = edge_data[ edge_idx] n_nd_idx = int(seg_n_nd) m_nd_idx = int(seg_m_nd) n_simpl_dist = tree_simpl_dists[n_nd_idx] m_simpl_dist = tree_simpl_dists[m_nd_idx] n_short_dist = tree_short_dists[n_nd_idx] m_short_dist = tree_short_dists[m_nd_idx] # don't process unreachable segments if np.isinf(n_short_dist) and np.isinf(m_short_dist): continue ''' shortest path (non-angular) uses a split segment workflow the split workflow allows for non-shortest-path edges to be approached from either direction i.e. the shortest path to node "b" isn't necessarily via node "a" the edge is then split at the farthest point from either direction and apportioned either way if the segment is on the shortest path then the second segment will squash down to naught ''' if not angular: ''' dijkstra discovers edges from near to far (sorts before popping next node) i.e. this sort may be unnecessary? ''' # sort where a < b if n_short_dist <= m_short_dist: a = tree_short_dists[n_nd_idx] a_imp = tree_short_dists[n_nd_idx] b = tree_short_dists[m_nd_idx] b_imp = tree_short_dists[m_nd_idx] else: a = tree_short_dists[m_nd_idx] a_imp = tree_short_dists[m_nd_idx] b = tree_short_dists[n_nd_idx] b_imp = tree_short_dists[n_nd_idx] # get the max distance along the segment: seg_len = (m - start_len) + (m - end_len) # c and d variables can diverge per beneath c = d = (seg_len + a + b) / 2 # c | d impedance should technically be the same if computed from either side c_imp = d_imp = a_imp + (c - a) * seg_imp_fact # iterate the distance and beta thresholds - from large to small for threshold snipping for d_idx in range(len(distances) - 1, -1, -1): dist_cutoff = distances[d_idx] beta = betas[d_idx] ''' if c or d are greater than the distance threshold, then the segments are "snipped" ''' # a to c segment if a <= dist_cutoff: if c > dist_cutoff: c = dist_cutoff c_imp = a_imp + (dist_cutoff - a) * seg_imp_fact for m_idx, close_func in zip( close_idxs, close_funcs): shadow_arr[m_idx, d_idx, src_idx] += close_func( a, c, a_imp, c_imp, beta) # a to b segment - if on the shortest path then b == d, in which case, continue if b == d: continue if b <= dist_cutoff: if d > dist_cutoff: d = dist_cutoff d_imp = b_imp + (dist_cutoff - b) * seg_imp_fact for m_idx, close_func in zip( close_idxs, close_funcs): shadow_arr[m_idx, d_idx, src_idx] += close_func( b, d, b_imp, d_imp, beta) else: ''' there is a different workflow for angular - uses single segment (no segment splitting) this is because the simplest path onto the entire length of segment is from the lower impedance end this assumes segments are relatively straight, overly complex to subdivide segments for spliting... ''' # only a single case existing for angular version so no need for abstracted functions # there are three scenarios: # 1) e is the predecessor for f if n_nd_idx == src_idx or tree_preds[m_nd_idx] == n_nd_idx: e = tree_short_dists[n_nd_idx] f = tree_short_dists[m_nd_idx] # if travelling via n, then m = n_imp + seg_ang # calculations are based on segment length / angle # i.e. need to decide whether to base angular change on entry vs exit impedance # else take midpoint of segment as ballpark for average, which is the course taken here # i.e. exit impedance minus half segment impedance ang = m_simpl_dist - seg_ang / 2 # 2) f is the predecessor for e elif m_nd_idx == src_idx or tree_preds[ n_nd_idx] == m_nd_idx: e = tree_short_dists[m_nd_idx] f = tree_short_dists[n_nd_idx] ang = n_simpl_dist - seg_ang / 2 # per above # 3) neither of the above # get the approach angles for either side and compare to find the least inwards impedance # this involves impedance up to entrypoint either side plus respective turns onto the segment else: # get the out bearing from the predecessor and calculate the turn onto current seg's in bearing # find n's predecessor n_pred_idx = int(tree_preds[n_nd_idx]) # find the edge from n's predecessor to n e_i = _find_edge_idx(node_edge_map, edge_data, n_pred_idx, n_nd_idx) # get the predecessor edge's outwards bearing at index 6 n_pred_out_bear = edge_data[int(e_i), 6] # calculating the turn into this segment from the predecessor's out bearing n_turn_in = np.abs( (seg_in_bear - n_pred_out_bear + 180) % 360 - 180) # then add the turn-in to the aggregated impedance at n # i.e. total angular impedance onto this segment # as above two scenarios, adding half of angular impedance for segment as avg between in / out n_ang = n_simpl_dist + n_turn_in + seg_ang / 2 # repeat for the other side other side # per original n -> m edge destructuring: m is the node in the outwards bound direction # i.e. need to first find the corresponding edge in the opposite m -> n direction of travel # this gives the correct inwards bearing as if m were the entry point opp_i = _find_edge_idx(node_edge_map, edge_data, m_nd_idx, n_nd_idx) # now that the opposing edge is known, we can fetch the inwards bearing at index 5 (not 6) opp_in_bear = edge_data[int(opp_i), 5] # find m's predecessor m_pred_idx = int(tree_preds[m_nd_idx]) # we can now go ahead and find m's predecessor edge e_i = _find_edge_idx(node_edge_map, edge_data, m_pred_idx, m_nd_idx) # get the predecessor edge's outwards bearing at index 6 m_pred_out_bear = edge_data[int(e_i), 6] # and calculate the turn-in from m's predecessor onto the m inwards bearing m_turn_in = np.abs( (opp_in_bear - m_pred_out_bear + 180) % 360 - 180) # then add to aggregated impedance at m m_ang = m_simpl_dist + m_turn_in + seg_ang / 2 # the distance and angle are based on the smallest angular impedance onto the segment # select by shortest distance in event angular impedances are identical from either direction if n_ang == m_ang: if n_short_dist <= m_short_dist: e = tree_short_dists[n_nd_idx] ang = n_ang else: e = tree_short_dists[m_nd_idx] ang = m_ang elif n_ang < m_ang: e = tree_short_dists[n_nd_idx] ang = n_ang else: e = tree_short_dists[m_nd_idx] ang = m_ang # f is the entry distance plus segment length f = e + seg_len # iterate the distance thresholds - from large to small for threshold snipping for d_idx in range(len(distances) - 1, -1, -1): dist_cutoff = distances[d_idx] if e <= dist_cutoff: if f > dist_cutoff: f = dist_cutoff # uses segment length as base (in this sense hybrid) # intentionally not using integral because conflates harmonic shortest-path w. simplest # there is only one case for angular - no need to abstract to func for m_idx in close_idxs: # transform - prevents division by zero agg_ang = 1 + (ang / 180) # then aggregate - angular uses distances explicitly shadow_arr[m_idx, d_idx, src_idx] += (f - e) / agg_ang if betw_idxs: # prepare a list of neighbouring nodes nb_nodes = List.empty_list(types.int64) for edge_idx in node_edge_map[src_idx]: out_nd_idx = int(edge_data[edge_idx][1]) # to node is index 1 nb_nodes.append(out_nd_idx) # betweenness keys computed per to_idx for to_idx in tree_nodes: # only process in one direction if to_idx < src_idx: continue # skip self node if to_idx == src_idx: continue # skip direct neighbours (no nodes between) if to_idx in nb_nodes: continue # distance - do not proceed if no route available to_dist = tree_short_dists[to_idx] if np.isinf(to_dist): continue ''' BETWEENNESS segment versions only agg first and last segments the distance decay is based on the distance between the src segment and to segment i.e. willingness of people to walk between src and to segments betweenness is aggregated to intervening nodes based on above distances and decays other sections (in between current first and last) are respectively processed from other to nodes distance thresholds are computed using the innner as opposed to outer edges of the segments ''' o_seg_len = edge_data[int(tree_origin_seg[to_idx])][2] l_seg_len = edge_data[int(tree_last_seg[to_idx])][2] min_span = to_dist - o_seg_len - l_seg_len # calculate traversal distances from opposing segments o_1 = min_span o_2 = min_span + o_seg_len l_1 = min_span l_2 = min_span + l_seg_len # betweenness - only counting truly between vertices, not starting and ending verts inter_idx = int(tree_preds[to_idx]) while True: # break out of while loop if the intermediary has reached the source node if inter_idx == src_idx: break # iterate the distance thresholds - from large to small for threshold snipping for d_idx in range(len(distances) - 1, -1, -1): dist_cutoff = distances[d_idx] beta = betas[d_idx] if min_span <= dist_cutoff: # prune if necessary if o_2 > dist_cutoff: o_2 = dist_cutoff if l_2 > dist_cutoff: l_2 = dist_cutoff # only one version for betweenness for respective angular / non angular # i.e. no need to abstract to function for m_idx in betw_idxs: if not angular: # catch division by zero if beta == 0.0: auc = o_2 - o_1 + l_2 - l_1 else: auc = (np.exp(-beta * o_2) - np.exp(-beta * o_1)) / -beta + \ (np.exp(-beta * l_2) - np.exp(-beta * l_1)) / -beta shadow_arr[m_idx, d_idx, inter_idx] += auc else: bt_ang = 1 + tree_simpl_dists[to_idx] / 180 pt_a = o_2 - o_1 pt_b = l_2 - l_1 shadow_arr[m_idx, d_idx, inter_idx] += (pt_a + pt_b) / bt_ang # follow the chain inter_idx = int(tree_preds[inter_idx]) # reduction measures_data += shadow_arr return measures_data
def local_node_centrality(node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, distances: np.ndarray, betas: np.ndarray, measure_keys: tuple, jitter_scale: float = 0.0, angular: bool = False, progress_proxy=None) -> np.ndarray: # integrity checks checks.check_distances_and_betas(distances, betas) checks.check_network_maps(node_data, edge_data, node_edge_map) # gather functions close_funcs = List.empty_list(node_close_func_proto) close_idxs = [] betw_funcs = List.empty_list(node_betw_func_proto) betw_idxs = [] for m_idx, m_key in enumerate(measure_keys): if not angular: # closeness keys if m_key == 'node_density': close_funcs.append(node_density) close_idxs.append(m_idx) elif m_key == 'node_farness': close_funcs.append(node_farness) close_idxs.append(m_idx) elif m_key == 'node_cycles': close_funcs.append(node_cycles) close_idxs.append(m_idx) elif m_key == 'node_harmonic': close_funcs.append(node_harmonic) close_idxs.append(m_idx) elif m_key == 'node_beta': close_funcs.append(node_beta) close_idxs.append(m_idx) # betweenness keys elif m_key == 'node_betweenness': betw_funcs.append(node_betweenness) betw_idxs.append(m_idx) elif m_key == 'node_betweenness_beta': betw_funcs.append(node_betweenness_beta) betw_idxs.append(m_idx) else: raise ValueError(''' Unable to match requested centrality measure key against available options. Shortest-path measures can't be mixed with simplest-path measures. Set angular=True if using simplest-path measures.''') else: # aggregative keys if m_key == 'node_harmonic_angular': close_funcs.append(node_harmonic_angular) close_idxs.append(m_idx) # betweenness keys elif m_key == 'node_betweenness_angular': betw_funcs.append(node_betweenness) betw_idxs.append(m_idx) else: raise ValueError(''' Unable to match requested centrality measure key against available options. Shortest-path measures can't be mixed with simplest-path measures. Set angular=False if using shortest-path measures.''') # prepare variables n = len(node_data) d_n = len(distances) k_n = len(measure_keys) measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32) global_max_dist = float(np.nanmax(distances)) nodes_live = node_data[:, 2] # iterate through each vert and calculate the shortest path tree for src_idx in prange(n): shadow_arr = np.full((k_n, d_n, n), 0.0, dtype=np.float32) # numba no object mode can only handle basic printing # note that progress bar adds a performance penalty if progress_proxy is not None: progress_proxy.update(1) # only compute for live nodes if not nodes_live[src_idx]: continue ''' Shortest tree dijkstra Predecessor map is based on impedance heuristic - which can be different from metres Distance map in metres still necessary for defining max distances and computing equivalent distance measures RETURNS A SHORTEST PATH TREE MAP: 0 - processed nodes 1 - predecessors 2 - shortest path distance 3 - simplest path angular distance 4 - cycles 5 - origin segments 6 - last segments ''' tree_map, tree_edges = shortest_path_tree(edge_data, node_edge_map, src_idx, max_dist=global_max_dist, jitter_scale=jitter_scale, angular=angular) tree_nodes = np.where(tree_map[:, 0])[0] tree_preds = tree_map[:, 1] tree_short_dists = tree_map[:, 2] tree_simpl_dists = tree_map[:, 3] tree_cycles = tree_map[:, 4] # process each reachable node for to_idx in tree_nodes: # skip self node if to_idx == src_idx: continue # unpack impedance and distance for to index to_short_dist = tree_short_dists[to_idx] to_simpl_dist = tree_simpl_dists[to_idx] cycles = tree_cycles[to_idx] # do not proceed if no route available if np.isinf(to_short_dist): continue # calculate closeness centralities if close_funcs: for d_idx in range(len(distances)): dist_cutoff = distances[d_idx] beta = betas[d_idx] if to_short_dist <= dist_cutoff: for m_idx, close_func in zip(close_idxs, close_funcs): shadow_arr[m_idx, d_idx, src_idx] += close_func( to_short_dist, to_simpl_dist, beta, cycles) # only process in one direction if to_idx < src_idx: continue # calculate betweenness centralities if betw_funcs: # only counting truly between vertices, not starting and ending verts inter_idx = int(tree_preds[to_idx]) while True: # break out of while loop if the intermediary has reached the source node if inter_idx == src_idx: break # iterate the distance thresholds for d_idx in range(len(distances)): dist_cutoff = distances[d_idx] beta = betas[d_idx] # check threshold if tree_short_dists[to_idx] <= dist_cutoff: # iterate betweenness functions for m_idx, betw_func in zip(betw_idxs, betw_funcs): shadow_arr[m_idx, d_idx, inter_idx] += betw_func( to_short_dist, beta) # follow the chain inter_idx = int(tree_preds[inter_idx]) # reduce measures_data += shadow_arr return measures_data
def aggregate_stats( node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, data_map: np.ndarray, distances: np.ndarray, betas: np.ndarray, numerical_arrays: np.ndarray = np.array(np.full((0, 0), np.nan)), jitter_scale: float = 0.0, angular: bool = False, progress_proxy=None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ NODE MAP: 0 - x 1 - y 2 - live EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - in bearing 6 - out bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest """ checks.check_network_maps(node_data, edge_data, node_edge_map) checks.check_data_map( data_map, check_assigned=True ) # raises ValueError data points are not assigned to a network checks.check_distances_and_betas(distances, betas) # when passing an empty 2d array to numba, use: np.array(np.full((0, 0), np.nan)) if numerical_arrays.shape[1] != len(data_map): raise ValueError( 'The length of the numerical data arrays do not match the length of the data map.' ) checks.check_numerical_data(numerical_arrays) # establish variables netw_n = len(node_data) d_n = len(distances) n_n = len(numerical_arrays) global_max_dist = float(np.nanmax(distances)) netw_nodes_live = node_data[:, 2] # setup data structures stats_sum = np.full((n_n, d_n, netw_n), 0.0) stats_sum_wt = np.full((n_n, d_n, netw_n), 0.0) stats_mean = np.full((n_n, d_n, netw_n), np.nan) stats_mean_wt = np.full((n_n, d_n, netw_n), np.nan) stats_count = np.full((n_n, d_n, netw_n), 0.0) stats_count_wt = np.full((n_n, d_n, netw_n), 0.0) stats_variance = np.full((n_n, d_n, netw_n), np.nan) stats_variance_wt = np.full((n_n, d_n, netw_n), np.nan) stats_max = np.full((n_n, d_n, netw_n), np.nan) stats_min = np.full((n_n, d_n, netw_n), np.nan) # iterate through each vert and aggregate steps = int(netw_n / 10000) # parallelise over n nodes: # each distance or stat array index is therefore only touched by one thread at a time # i.e. no need to use inner array deductions as with centralities for netw_src_idx in prange(netw_n): if progress_proxy is not None: progress_proxy.update(1) # only compute for live nodes if not netw_nodes_live[netw_src_idx]: continue # generate the reachable classes and their respective distances # these are non-unique - i.e. simply the class of each data point within the maximum distance # the aggregate_to_src_idx method will choose the closer direction of approach to a data point # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method) reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx( netw_src_idx, node_data, edge_data, node_edge_map, data_map, global_max_dist, jitter_scale=jitter_scale, angular=angular) # IDW # the order of the loops matters because the nested aggregations happen per distance per numerical array # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): # some indices will be NaN if beyond max threshold distance - so check for infinity # this happens when within radial max distance, but beyond network max distance if not reachable: continue # iterate the numerical arrays dimension for num_idx in range(n_n): # some values will be NaN num = numerical_arrays[num_idx, int(data_idx)] if np.isnan(num): continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment mean aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate stats_sum[num_idx, d_idx, netw_src_idx] += num stats_count[num_idx, d_idx, netw_src_idx] += 1 stats_sum_wt[num_idx, d_idx, netw_src_idx] += num * np.exp( -b * data_dist) stats_count_wt[num_idx, d_idx, netw_src_idx] += np.exp(-b * data_dist) # max if np.isnan(stats_max[num_idx, d_idx, netw_src_idx]): stats_max[num_idx, d_idx, netw_src_idx] = num elif num > stats_max[num_idx, d_idx, netw_src_idx]: stats_max[num_idx, d_idx, netw_src_idx] = num # min if np.isnan(stats_min[num_idx, d_idx, netw_src_idx]): stats_min[num_idx, d_idx, netw_src_idx] = num elif num < stats_min[num_idx, d_idx, netw_src_idx]: stats_min[num_idx, d_idx, netw_src_idx] = num # finalise mean calculations - this is happening for a single netw_src_idx, so fairly fast for num_idx in range(n_n): for d_idx in range(d_n): # use divide so that division through zero doesn't trigger stats_mean[num_idx, d_idx, netw_src_idx] = np.divide( stats_sum[num_idx, d_idx, netw_src_idx], stats_count[num_idx, d_idx, netw_src_idx]) stats_mean_wt[num_idx, d_idx, netw_src_idx] = np.divide( stats_sum_wt[num_idx, d_idx, netw_src_idx], stats_count_wt[num_idx, d_idx, netw_src_idx]) # calculate variances - counts are already computed per above # weighted version is IDW by division through equivalently weighted counts above # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): # some indices will be NaN if beyond max threshold distance - so check for infinity # this happens when within radial max distance, but beyond network max distance if not reachable: continue # iterate the numerical arrays dimension for num_idx in range(n_n): # some values will be NaN num = numerical_arrays[num_idx, int(data_idx)] if np.isnan(num): continue # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment variance aggregations at respective distances if the distance is less than current d if data_dist <= d: # aggregate if np.isnan(stats_variance[num_idx, d_idx, netw_src_idx]): stats_variance[num_idx, d_idx, netw_src_idx] = \ np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) stats_variance_wt[num_idx, d_idx, netw_src_idx] = \ np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist) else: stats_variance[num_idx, d_idx, netw_src_idx] += \ np.square(num - stats_mean[num_idx, d_idx, netw_src_idx]) stats_variance_wt[num_idx, d_idx, netw_src_idx] += \ np.square(num - stats_mean_wt[num_idx, d_idx, netw_src_idx]) * np.exp(-b * data_dist) # finalise variance calculations for num_idx in range(n_n): for d_idx in range(d_n): # use divide so that division through zero doesn't trigger stats_variance[num_idx, d_idx, netw_src_idx] = np.divide( stats_variance[num_idx, d_idx, netw_src_idx], stats_count[num_idx, d_idx, netw_src_idx]) stats_variance_wt[num_idx, d_idx, netw_src_idx] = np.divide( stats_variance_wt[num_idx, d_idx, netw_src_idx], stats_count_wt[num_idx, d_idx, netw_src_idx]) return stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min
def aggregate_landuses( node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, data_map: np.ndarray, distances: np.ndarray, betas: np.ndarray, landuse_encodings: np.ndarray = np.array([]), qs: np.ndarray = np.array([]), mixed_use_hill_keys: np.ndarray = np.array([]), mixed_use_other_keys: np.ndarray = np.array([]), accessibility_keys: np.ndarray = np.array([]), cl_disparity_wt_matrix: np.ndarray = np.array(np.full((0, 0), np.nan)), jitter_scale: float = 0.0, angular: bool = False, progress_proxy=None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ NODE MAP: 0 - x 1 - y 2 - live EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - in bearing 6 - out bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest """ checks.check_network_maps(node_data, edge_data, node_edge_map) checks.check_data_map( data_map, check_assigned=True ) # raises ValueError data points are not assigned to a network checks.check_distances_and_betas(distances, betas) # check landuse encodings if len(landuse_encodings) == 0: raise ValueError( 'Mixed use metrics or land-use accessibilities require an array of landuse labels.' ) elif len(landuse_encodings) != len(data_map): raise ValueError( 'The number of landuse encodings does not match the number of data points.' ) else: checks.check_categorical_data(landuse_encodings) # catch completely missing metrics if len(mixed_use_hill_keys) == 0 and len( mixed_use_other_keys) == 0 and len(accessibility_keys) == 0: raise ValueError( 'No metrics specified, please specify at least one metric to compute.' ) # catch missing qs if len(mixed_use_hill_keys) != 0 and len(qs) == 0: raise ValueError( 'Hill diversity measures require that at least one value of q is specified.' ) # negative qs caught by hill diversity methods # check various problematic key combinations if len(mixed_use_hill_keys) != 0: if np.nanmin(mixed_use_hill_keys) < 0 or np.max( mixed_use_hill_keys) > 3: raise ValueError('Mixed-use "hill" keys out of range of 0:4.') if len(mixed_use_other_keys) != 0: if np.nanmin(mixed_use_other_keys) < 0 or np.max( mixed_use_other_keys) > 2: raise ValueError('Mixed-use "other" keys out of range of 0:3.') if len(accessibility_keys) != 0: max_ac_key = np.nanmax(landuse_encodings) if np.nanmin(accessibility_keys) < 0 or np.max( accessibility_keys) > max_ac_key: raise ValueError( 'Negative or out of range accessibility key encountered. Keys must match class encodings.' ) for i in range(len(mixed_use_hill_keys)): for j in range(len(mixed_use_hill_keys)): if j > i: i_key = mixed_use_hill_keys[i] j_key = mixed_use_hill_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "hill" key.') for i in range(len(mixed_use_other_keys)): for j in range(len(mixed_use_other_keys)): if j > i: i_key = mixed_use_other_keys[i] j_key = mixed_use_other_keys[j] if i_key == j_key: raise ValueError('Duplicate mixed-use "other" key.') for i in range(len(accessibility_keys)): for j in range(len(accessibility_keys)): if j > i: i_key = accessibility_keys[i] j_key = accessibility_keys[j] if i_key == j_key: raise ValueError('Duplicate accessibility key.') def disp_check(disp_matrix): # the length of the disparity matrix vis-a-vis unique landuses is tested in underlying diversity functions if disp_matrix.ndim != 2 or disp_matrix.shape[0] != disp_matrix.shape[ 1]: raise ValueError( 'The disparity matrix must be a square NxN matrix.') if len(disp_matrix) == 0: raise ValueError( 'Hill disparity and Rao pairwise measures requires a class disparity weights matrix.' ) # check that missing or malformed disparity weights matrices are caught for k in mixed_use_hill_keys: if k == 3: # hill disparity disp_check(cl_disparity_wt_matrix) for k in mixed_use_other_keys: if k == 2: # raos pairwise disp_check(cl_disparity_wt_matrix) # establish variables netw_n = len(node_data) d_n = len(distances) q_n = len(qs) global_max_dist = float(np.nanmax(distances)) netw_nodes_live = node_data[:, 2] # setup data structures # hill mixed uses are structured separately to take values of q into account mixed_use_hill_data = np.full((4, q_n, d_n, netw_n), 0.0) # 4 dim mixed_use_other_data = np.full((3, d_n, netw_n), 0.0) # 3 dim accessibility_data = np.full((len(accessibility_keys), d_n, netw_n), 0.0) accessibility_data_wt = np.full((len(accessibility_keys), d_n, netw_n), 0.0) # iterate through each vert and aggregate # parallelise over n nodes: # each distance or stat array index is therefore only touched by one thread at a time # i.e. no need to use inner array deductions as with centralities for netw_src_idx in prange(netw_n): if progress_proxy is not None: progress_proxy.update(1) # only compute for live nodes if not netw_nodes_live[netw_src_idx]: continue # generate the reachable classes and their respective distances # these are non-unique - i.e. simply the class of each data point within the maximum distance # the aggregate_to_src_idx method will choose the closer direction of approach to a data point # from the nearest or next-nearest network node (calculated once globally, prior to local_landuses method) reachable_data, reachable_data_dist, tree_preds = aggregate_to_src_idx( netw_src_idx, node_data, edge_data, node_edge_map, data_map, global_max_dist, jitter_scale=jitter_scale, angular=angular) # LANDUSES mu_max_unique_cl = int(landuse_encodings.max() + 1) # counts of each class type (array length per max unique classes - not just those within max distance) classes_counts = np.full((d_n, mu_max_unique_cl), 0) # nearest of each class type (likewise) classes_nearest = np.full((d_n, mu_max_unique_cl), np.inf) # iterate the reachable indices and related distances for data_idx, (reachable, data_dist) in enumerate( zip(reachable_data, reachable_data_dist)): if not reachable: continue # get the class category in integer form # all class codes were encoded to sequential integers - these correspond to the array indices cl_code = int(landuse_encodings[int(data_idx)]) # iterate the distance dimensions for d_idx, (d, b) in enumerate(zip(distances, betas)): # increment class counts at respective distances if the distance is less than current d if data_dist <= d: classes_counts[d_idx, cl_code] += 1 # if distance is nearer, update the nearest distance array too if data_dist < classes_nearest[d_idx, cl_code]: classes_nearest[d_idx, cl_code] = data_dist # if within distance, and if in accessibility keys, then aggregate accessibility too for ac_idx, ac_code in enumerate(accessibility_keys): if ac_code == cl_code: accessibility_data[ac_idx, d_idx, netw_src_idx] += 1 accessibility_data_wt[ac_idx, d_idx, netw_src_idx] += np.exp( -b * data_dist) # if a match was found, then no need to check others break # mixed uses can be calculated now that the local class counts are aggregated # iterate the distances and betas for d_idx, b in enumerate(betas): cl_counts = classes_counts[d_idx] cl_nearest = classes_nearest[d_idx] # mu keys determine which metrics to compute # don't confuse with indices # previously used dynamic indices in data structures - but obtuse if irregularly ordered keys for mu_hill_key in mixed_use_hill_keys: for q_idx, q_key in enumerate(qs): if mu_hill_key == 0: mixed_use_hill_data[0, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity(cl_counts, q_key) elif mu_hill_key == 1: mixed_use_hill_data[1, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) elif mu_hill_key == 2: mixed_use_hill_data[2, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, q=q_key, beta=b) # land-use classification disparity hill diversity # the wt matrix can be used without mapping because cl_counts is based on all classes # regardless of whether they are reachable elif mu_hill_key == 3: mixed_use_hill_data[3, q_idx, d_idx, netw_src_idx] = \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, wt_matrix=cl_disparity_wt_matrix, q=q_key) for mu_other_key in mixed_use_other_keys: if mu_other_key == 0: mixed_use_other_data[0, d_idx, netw_src_idx] = \ diversity.shannon_diversity(cl_counts) elif mu_other_key == 1: mixed_use_other_data[1, d_idx, netw_src_idx] = \ diversity.gini_simpson_diversity(cl_counts) elif mu_other_key == 2: mixed_use_other_data[2, d_idx, netw_src_idx] = \ diversity.raos_quadratic_diversity(cl_counts, wt_matrix=cl_disparity_wt_matrix) # send the data back in the same types and same order as the original keys - convert to int for indexing mu_hill_k_int = np.full(len(mixed_use_hill_keys), 0) for i, k in enumerate(mixed_use_hill_keys): mu_hill_k_int[i] = k mu_other_k_int = np.full(len(mixed_use_other_keys), 0) for i, k in enumerate(mixed_use_other_keys): mu_other_k_int[i] = k return mixed_use_hill_data[mu_hill_k_int], \ mixed_use_other_data[mu_other_k_int], \ accessibility_data, \ accessibility_data_wt
def assign_to_network(data_map: np.ndarray, node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, max_dist: float, progress_proxy=None) -> np.ndarray: """ To save unnecessary computation - this is done once and written to the data map. 1 - find the closest network node from each data point 2A - wind clockwise along the network to preferably find a block cycle surrounding the node 2B - in event of topological traps, try anti-clockwise as well 3A - select the closest block cycle node 3B - if no enclosing cycle - simply use the closest node 4 - find the neighbouring node that minimises the distance between the data point on "street-front" NODE MAP: 0 - x 1 - y 2 - live EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - entry bearing 6 - exit bearing DATA MAP: 0 - x 1 - y 2 - assigned network index - nearest 3 - assigned network index - next-nearest """ checks.check_network_maps(node_data, edge_data, node_edge_map) netw_coords = node_data[:, :2] netw_x_arr = node_data[:, 0] netw_y_arr = node_data[:, 1] data_coords = data_map[:, :2] data_x_arr = data_map[:, 0] data_y_arr = data_map[:, 1] total_count = len(data_map) for data_idx in prange(total_count): if progress_proxy is not None: progress_proxy.update(1) # find the nearest network node min_idx, min_dist = find_nearest(data_x_arr[data_idx], data_y_arr[data_idx], netw_x_arr, netw_y_arr, max_dist) # in some cases no network node will be within max_dist... so accept NaN if np.isnan(min_idx): continue # nearest is initially set for this nearest node, but if a nearer street-edge is found, it will be overriden nearest = min_idx next_nearest = np.nan # set start node to nearest network node node_idx = int(min_idx) # keep track of visited nodes pred_map = np.full(len(node_data), np.nan) # state reversing = False # keep track of previous indices prev_idx = np.nan # iterate neighbours while True: # reset neighbour rotation and index counters rotation = np.nan nb_idx = np.nan # iterate the edges for edge_idx in node_edge_map[node_idx]: # get the edge's start and end node indices start, end = edge_data[edge_idx, :2] # cast to int for indexing new_idx = int(end) # don't follow self-loops if new_idx == node_idx: continue # check that this isn't the previous node (already visited as neighbour from other direction) if np.isfinite(prev_idx) and new_idx == prev_idx: continue # look for the new neighbour with the smallest rightwards (anti-clockwise arctan2) angle # measure the angle relative to the data point for the first node if np.isnan(prev_idx): r = _calculate_rotation( netw_coords[int(new_idx)] - netw_coords[node_idx], data_coords[data_idx] - netw_coords[node_idx]) # else relative to the previous node else: r = _calculate_rotation( netw_coords[int(new_idx)] - netw_coords[node_idx], netw_coords[int(prev_idx)] - netw_coords[node_idx]) if reversing: r = 360 - r # if least angle, update if np.isnan(rotation) or r < rotation: rotation = r nb_idx = new_idx # allow backtracking if no neighbour is found - i.e. dead-ends if np.isnan(nb_idx): if np.isnan(pred_map[node_idx]): # for isolated nodes: nb_idx == np.nan, pred_map[node_idx] == np.nan, and prev_idx == np.nan if np.isnan(prev_idx): break # for isolated edges, the algorithm gets turned-around back to the starting node with nowhere to go # nb_idx == np.nan, pred_map[node_idx] == np.nan # in these cases, pass _closest_intersections the prev idx so that it has a predecessor to follow d, n, n_n = _closest_intersections(node_data, data_coords[data_idx], pred_map, int(prev_idx)) if d < min_dist: nearest = n next_nearest = n_n break # otherwise, go ahead and backtrack nb_idx = pred_map[node_idx] # if the distance is exceeded, reset and attempt in the other direction dist = np.hypot(netw_x_arr[int(nb_idx)] - data_x_arr[data_idx], netw_y_arr[int(nb_idx)] - data_y_arr[data_idx]) if dist > max_dist: pred_map[int(nb_idx)] = node_idx d, n, n_n = _closest_intersections(node_data, data_coords[data_idx], pred_map, int(nb_idx)) # if the distance to the street edge is less than the nearest node, or than the prior closest edge if d < min_dist: min_dist = d nearest = n next_nearest = n_n # reverse and try in opposite direction if not reversing: reversing = True pred_map.fill(np.nan) node_idx = int(min_idx) prev_idx = np.nan continue break # ignore the following conditions while backtracking # (if backtracking, the current node's predecessor will be equal to the new neighbour) if nb_idx != pred_map[node_idx]: # if the new nb node has already been visited then terminate, this prevent infinite loops # or, if the algorithm has circled the block back to the original starting node if not np.isnan(pred_map[int(nb_idx)]) or nb_idx == min_idx: # set the final predecessor, BUT ONLY if re-encountered the original node # this would otherwise occlude routes (e.g. backtracks) that have passed the same node twice # (such routes are still able to recover the closest edge) if nb_idx == min_idx: pred_map[int(nb_idx)] = node_idx d, n, n_n = _closest_intersections(node_data, data_coords[data_idx], pred_map, int(nb_idx)) if d < min_dist: nearest = n next_nearest = n_n break # set predecessor (only if not backtracking) pred_map[int(nb_idx)] = node_idx # otherwise, keep going prev_idx = node_idx node_idx = int(nb_idx) # print(f'[{data_idx}, {nearest}, {next_nearest}],') # set in the data map # no race condition in spite of direct indexing because each is set only once? data_map[data_idx, 2] = nearest # adj_idx # in some cases next nearest will be NaN # this is mostly in situations where it works to leave as NaN # e.g. access off dead-ends... data_map[data_idx, 3] = next_nearest # next_adj_idx return data_map
def local_centrality(node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, distances: np.ndarray, betas: np.ndarray, measure_keys: tuple, angular: bool = False, suppress_progress: bool = False) -> np.ndarray: ''' Call from "compute_centrality", which handles high level checks on keys and heuristic flag NODE MAP: 0 - x 1 - y 2 - live 3 - ghosted EDGE MAP: 0 - start node 1 - end node 2 - length in metres 3 - sum of angular travel along length 4 - impedance factor 5 - in bearing 6 - out bearing ''' checks.check_distances_and_betas(distances, betas) checks.check_network_maps(node_data, edge_data, node_edge_map) # string comparisons will substantially slow down nested loops # hence the out-of-loop strategy to map strings to indices corresponding to respective measures # keep name and index relationships explicit agg_keys = [] agg_targets = [] seg_keys = [] seg_targets = [] betw_keys = [] betw_targets = [] for m_idx, measure_name in enumerate(measure_keys): if not angular: # aggregating keys if measure_name == 'node_density': agg_keys.append(0) agg_targets.append(m_idx) elif measure_name == 'node_farness': agg_keys.append(1) agg_targets.append(m_idx) elif measure_name == 'node_cycles': agg_keys.append(2) agg_targets.append(m_idx) elif measure_name == 'node_harmonic': agg_keys.append(3) agg_targets.append(m_idx) elif measure_name == 'node_beta': agg_keys.append(4) agg_targets.append(m_idx) # segment keys (betweenness segments can be built during betweenness iters) elif measure_name == 'segment_density': seg_keys.append(0) seg_targets.append(m_idx) elif measure_name == 'segment_harmonic': seg_keys.append(1) seg_targets.append(m_idx) elif measure_name == 'segment_beta': seg_keys.append(2) seg_targets.append(m_idx) # betweenness keys elif measure_name == 'node_betweenness': betw_keys.append(0) betw_targets.append(m_idx) elif measure_name == 'node_betweenness_beta': betw_keys.append(1) betw_targets.append(m_idx) elif measure_name == 'segment_betweenness': betw_keys.append(2) betw_targets.append(m_idx) else: raise ValueError(''' Unable to match requested centrality measure key against available options. Shortest-path measures can't be mixed with simplest-path measures. Set angular=True if using simplest-path measures. ''') else: # aggregating keys if measure_name == 'node_harmonic_angular': agg_keys.append(5) agg_targets.append(m_idx) # segment keys elif measure_name == 'segment_harmonic_hybrid': seg_keys.append(3) seg_targets.append(m_idx) # betweenness keys elif measure_name == 'node_betweenness_angular': betw_keys.append(3) betw_targets.append(m_idx) elif measure_name == 'segment_betweeness_hybrid': betw_keys.append(4) betw_targets.append(m_idx) else: raise ValueError(''' Unable to match requested centrality measure key against available options. Shortest-path measures can't be mixed with simplest-path measures. Set angular=False if using shortest-path measures. ''') if len(agg_keys) != len(set(agg_keys)) or \ len(seg_keys) != len(set(seg_keys)) or \ len(betw_keys) != len(set(betw_keys)): raise ValueError('Please remove duplicate measure key.') # flags betw_nodes = (0 in betw_keys or 1 in betw_keys or 3 in betw_keys) betw_segs = (2 in betw_keys or 4 in betw_keys) # prepare data arrays # establish variables n = len(node_data) d_n = len(distances) k_n = len(measure_keys) global_max_dist = np.nanmax(distances) nodes_live = node_data[:, 2] nodes_ghosted = node_data[:, 3] # the shortest path is based on impedances -> be cognisant of cases where impedances are not based on true distance: # in such cases, distances are equivalent to the impedance heuristic shortest path, not shortest distance in metres measures_data = np.full((k_n, d_n, n), 0.0, dtype=np.float32) steps = int(n / 10000) # iterate through each vert and calculate the shortest path tree for src_idx in range(n): # numba no object mode can only handle basic printing # note that progress bar adds a performance penalty if not suppress_progress: checks.progress_bar(src_idx, n, steps) # only compute for live nodes if not nodes_live[src_idx]: continue ''' run the shortest tree dijkstra keep in mind that predecessor map is based on impedance heuristic - which can be different from metres distance map in metres still necessary for defining max distances and computing equivalent distance measures RETURNS A SHORTEST PATH TREE MAP: 0 - processed nodes 1 - predecessors 2 - distances 3 - impedances 4 - cycles 5 - origin segments 6 - last segments ''' tree_map, tree_edges = shortest_path_tree(edge_data, node_edge_map, src_idx, max_dist=global_max_dist, angular=angular) tree_nodes = np.where(tree_map[:, 0])[0] tree_preds = tree_map[:, 1] tree_dists = tree_map[:, 2] tree_imps = tree_map[:, 3] tree_cycles = tree_map[:, 4] tree_origin_seg = tree_map[:, 5] tree_last_seg = tree_map[:, 6] # only build edge data if necessary if len(seg_keys) > 0: # can't do edge processing as part of shortest tree because all shortest paths have to be resolved first # visit all processed edges for edge_idx in np.where(tree_edges)[0]: # unpack seg_in_nd, seg_out_nd, seg_len, seg_ang, seg_imp_fact, seg_in_bear, seg_out_bear = edge_data[ edge_idx] in_nd_idx = int(seg_in_nd) out_nd_idx = int(seg_out_nd) in_imp = tree_imps[in_nd_idx] out_imp = tree_imps[out_nd_idx] in_dist = tree_dists[in_nd_idx] out_dist = tree_dists[out_nd_idx] # don't process unreachable segments if np.isinf(in_dist) and np.isinf(out_dist): continue # for conceptual simplicity, separate angular and non-angular workflows # non angular uses a split segment workflow # if the segment is on the shortest path then the second segment will squash down to naught if not angular: # sort where a < b if in_imp <= out_imp: a = tree_dists[in_nd_idx] a_imp = tree_imps[in_nd_idx] b = tree_dists[out_nd_idx] b_imp = tree_imps[out_nd_idx] else: a = tree_dists[out_nd_idx] a_imp = tree_imps[out_nd_idx] b = tree_dists[in_nd_idx] b_imp = tree_imps[in_nd_idx] # get the max distance along the segment: seg_len = (m - start_len) + (m - end_len) # c and d variables can diverge per beneath c = d = (seg_len + a + b) / 2 # c / d impedance should technically be the same if computed from either side c_imp = d_imp = a_imp + (c - a) * seg_imp_fact # iterate the distance and beta thresholds - from large to small for threshold snipping for d_idx in range(len(distances) - 1, -1, -1): dist_cutoff = distances[d_idx] beta = betas[d_idx] # a-c segment if a <= dist_cutoff: if c > dist_cutoff: c = dist_cutoff c_imp = a_imp + (dist_cutoff - a) * seg_imp_fact for seg_idx, seg_key in enumerate(seg_keys): m_idx = seg_targets[seg_idx] if seg_key == 0: measures_data[m_idx, d_idx, src_idx] += c - a elif seg_key == 1: if a_imp < 1: measures_data[m_idx, d_idx, src_idx] += np.log(c_imp) else: measures_data[ m_idx, d_idx, src_idx] += np.log( c_imp) - np.log(a_imp) elif seg_key == 2: if beta == -0.0: auc = c_imp - a_imp else: auc = (np.exp(beta * c_imp) - np.exp(beta * a_imp)) / beta measures_data[m_idx, d_idx, src_idx] += auc # a-b segment - if on the shortest path then d == b - in which case, continue if b == d: continue if b <= dist_cutoff: if d > dist_cutoff: d = dist_cutoff d_imp = b_imp + (dist_cutoff - b) * seg_imp_fact for seg_idx, seg_key in enumerate(seg_keys): m_idx = seg_targets[seg_idx] if seg_key == 0: measures_data[m_idx, d_idx, src_idx] += d - b elif seg_key == 1: if b_imp < 1: measures_data[m_idx, d_idx, src_idx] += np.log(d_imp) else: measures_data[ m_idx, d_idx, src_idx] += np.log( d_imp) - np.log(b_imp) elif seg_key == 2: # catch division by zero # as beta approaches 0 the distance is weighted by 1 instead of < 1 if beta == -0.0: auc = d_imp - b_imp else: auc = (np.exp(beta * d_imp) - np.exp(beta * b_imp)) / beta measures_data[m_idx, d_idx, src_idx] += auc # different workflow for angular - uses single segment # otherwise many assumptions if splitting segments re: angular vs. distance shortest-paths... else: # get the approach angles for either side # this involves impedance up to that point plus the turn onto the segment # also add half of the segment's length-wise angular impedance in_ang = in_imp + seg_ang / 2 # the source node won't have a predecessor if in_nd_idx != src_idx: # get the out bearing from the predecessor and calculate the turn onto current seg's in bearing in_pred_idx = int(tree_preds[in_nd_idx]) e_i = _find_edge_idx(node_edge_map, edge_data, in_pred_idx, in_nd_idx) in_pred_out_bear = edge_data[int(e_i), 6] in_ang += np.abs( (seg_in_bear - in_pred_out_bear + 180) % 360 - 180) # same for other side out_ang = out_imp + seg_ang / 2 if out_nd_idx != src_idx: out_pred_idx = int(tree_preds[out_nd_idx]) e_i = _find_edge_idx(node_edge_map, edge_data, out_pred_idx, out_nd_idx) out_pred_out_bear = edge_data[int(e_i), 6] out_ang += np.abs( (seg_out_bear - out_pred_out_bear + 180) % 360 - 180) # the distance and angle are based on the smallest angular impedance onto the segment # shortest-path segments will have exit bearings equal to the entry bearings # in this case, select the closest by shortest distance if in_ang == out_ang: if in_dist < out_dist: e = tree_dists[in_nd_idx] ang = in_ang else: e = tree_dists[out_nd_idx] ang = out_ang elif in_ang < out_ang: e = tree_dists[in_nd_idx] ang = in_ang else: e = tree_dists[out_nd_idx] ang = out_ang # f is the entry distance plus segment length f = e + seg_len # iterate the distance thresholds - from large to small for threshold snipping for d_idx in range(len(distances) - 1, -1, -1): dist_cutoff = distances[d_idx] if e <= dist_cutoff: if f > dist_cutoff: f = dist_cutoff # 3 - harmonic segments hybrid # Uses integral of segment distances as a base - then weighted by angular for seg_idx, seg_key in enumerate(seg_keys): if seg_key == 3: m_idx = seg_targets[seg_idx] # transform - prevents division by zero agg_ang = 1 + (ang / 180) # then aggregate - angular uses distances explicitly measures_data[m_idx, d_idx, src_idx] += (f - e) / agg_ang # aggregative and betweenness keys can be computed per to_idx for to_idx in tree_nodes: # skip self node if to_idx == src_idx: continue # unpack impedance and distance for to index to_imp = tree_imps[to_idx] to_dist = tree_dists[to_idx] # do not proceed if no route available if np.isinf(to_dist): continue # node weights removed since v0.10 # switched to edge impedance factors # calculate centralities for d_idx in range(len(distances)): dist_cutoff = distances[d_idx] beta = betas[d_idx] if to_dist <= dist_cutoff: # iterate aggregation functions for agg_idx, agg_key in enumerate(agg_keys): # fetch target index for writing data # stored at equivalent index in agg_targets m_idx = agg_targets[agg_idx] # go through keys and write data # 0 - simple node counts if agg_key == 0: measures_data[m_idx, d_idx, src_idx] += 1 # 1 - farness elif agg_key == 1: measures_data[m_idx, d_idx, src_idx] += to_dist # 2 - cycles elif agg_key == 2: if tree_cycles[to_idx]: measures_data[m_idx, d_idx, src_idx] += 1 # 3 - harmonic node elif agg_key == 3: measures_data[m_idx, d_idx, src_idx] += 1 / to_imp # 4 - beta weighted node elif agg_key == 4: measures_data[m_idx, d_idx, src_idx] += np.exp(beta * to_dist) # 5 - harmonic node - angular elif agg_key == 5: a = 1 + (to_imp / 180) # transform angles measures_data[m_idx, d_idx, src_idx] += 1 / a # check whether betweenness keys are present prior to proceeding if not betw_nodes and not betw_segs: continue # only process in one direction if to_idx < src_idx: continue # NODE WORKFLOW if betw_nodes: # betweenness - only counting truly between vertices, not starting and ending verts inter_idx = int(tree_preds[to_idx]) while True: # break out of while loop if the intermediary has reached the source node if inter_idx == src_idx: break # iterate the distance thresholds for d_idx in range(len(distances)): dist_cutoff = distances[d_idx] beta = betas[d_idx] # check threshold if tree_dists[to_idx] <= dist_cutoff: # iterate betweenness functions for betw_idx, betw_key in enumerate(betw_keys): # fetch target index for writing data # stored at equivalent index in betw_targets m_idx = betw_targets[betw_idx] # go through keys and write data # simple count of nodes for betweenness if betw_key == 0: measures_data[m_idx, d_idx, inter_idx] += 1 # 1 - beta weighted betweenness # distance is based on distance between from and to vertices # thus potential spatial impedance via between vertex elif betw_key == 1: measures_data[m_idx, d_idx, inter_idx] += np.exp( beta * to_dist) * 1 # 3 - betweenness node count - angular heuristic version elif betw_key == 3: measures_data[m_idx, d_idx, inter_idx] += 1 # follow the chain inter_idx = int(tree_preds[inter_idx]) if betw_segs: # segment versions only agg first and last segments - intervening bits are processed from other to nodes o_seg_len = edge_data[int(tree_origin_seg[to_idx])][2] l_seg_len = edge_data[int(tree_last_seg[to_idx])][2] min_seg_span = tree_dists[to_idx] - o_seg_len - l_seg_len o_1 = min_seg_span o_2 = min_seg_span + o_seg_len l_1 = min_seg_span l_2 = min_seg_span + l_seg_len # betweenness - only counting truly between vertices, not starting and ending verts inter_idx = int(tree_preds[to_idx]) while True: # break out of while loop if the intermediary has reached the source node if inter_idx == src_idx: break # iterate the distance thresholds - from large to small for threshold snipping for d_idx in range(len(distances) - 1, -1, -1): dist_cutoff = distances[d_idx] beta = betas[d_idx] if min_seg_span <= dist_cutoff: # prune if necessary if o_2 > dist_cutoff: o_2 = dist_cutoff if l_2 > dist_cutoff: l_2 = dist_cutoff for betw_idx, betw_key in enumerate(betw_keys): m_idx = betw_targets[betw_idx] # 2 - segment version of betweenness if betw_key == 2: # catch division by zero if beta == -0.0: auc = o_2 - o_1 + l_2 - l_1 else: auc = (np.exp(beta * o_2) - np.exp(beta * o_1)) / beta + \ (np.exp(beta * l_2) - np.exp(beta * l_1)) / beta measures_data[m_idx, d_idx, inter_idx] += auc # 4 - betweeenness segment hybrid version elif betw_key == 4: bt_ang = 1 + tree_imps[to_idx] / 180 pt_a = o_2 - o_1 pt_b = l_2 - l_1 measures_data[m_idx, d_idx, inter_idx] += (pt_a + pt_b) / bt_ang # follow the chain inter_idx = int(tree_preds[inter_idx]) return measures_data
def nX_from_graph_maps(node_uids: Union[tuple, list], node_data: np.ndarray, edge_data: np.ndarray, node_edge_map: Dict, networkX_graph: nx.Graph = None, metrics_dict: dict = None) -> nx.Graph: logger.info('Populating node and edge map data to a networkX graph.') if networkX_graph is not None: logger.info('Reusing existing graph as backbone.') if networkX_graph.number_of_nodes() != len(node_data): raise ValueError( 'The number of nodes in the graph does not match the number of nodes in the node map.' ) g_copy = networkX_graph.copy() for uid in node_uids: if uid not in g_copy: raise KeyError( f'Node uid {uid} not found in graph. ' f'If passing a graph as backbone, the uids must match those supplied with the node and edge maps.' ) else: logger.info('No existing graph found, creating new.') g_copy = nx.Graph() for uid in node_uids: g_copy.add_node(uid) # after above so that errors caught first checks.check_network_maps(node_data, edge_data, node_edge_map) logger.info('Unpacking node data.') for uid, node in tqdm(zip(node_uids, node_data), disable=checks.quiet_mode): x, y, live, ghosted = node g_copy.nodes[uid]['x'] = x g_copy.nodes[uid]['y'] = y g_copy.nodes[uid]['live'] = bool(live) g_copy.nodes[uid]['ghosted'] = bool(ghosted) logger.info('Unpacking edge data.') for edge in tqdm(edge_data, disable=checks.quiet_mode): start, end, length, angle_sum, imp_factor, start_bearing, end_bearing = edge start_uid = node_uids[int(start)] end_uid = node_uids[int(end)] # networkX will silently add new edges / data over existing edges g_copy.add_edge(start_uid, end_uid, length=length, angle_sum=angle_sum, imp_factor=imp_factor, start_bearing=start_bearing, end_bearing=end_bearing) if metrics_dict is not None: logger.info('Unpacking metrics to nodes.') for uid, metrics in tqdm(metrics_dict.items(), disable=checks.quiet_mode): if uid not in g_copy: raise KeyError( f'Node uid {uid} not found in graph. ' f'Data dictionary uids must match those supplied with the node and edge maps.' ) g_copy.nodes[uid]['metrics'] = metrics return g_copy