def test_check_data_map(primal_graph): N = networks.NetworkLayerFromNX(primal_graph, distances=[500]) data_dict = mock.mock_data_dict(primal_graph) data_uids, data_map = layers.data_map_from_dict(data_dict) # should throw error if not assigned with pytest.raises(ValueError): checks.check_data_map(data_map) # should work if flag set to False checks.check_data_map(data_map, check_assigned=False) # assign then check that it runs as intended data_map = data.assign_to_network(data_map, N._node_data, N._edge_data, N._node_edge_map, max_dist=400) checks.check_data_map(data_map) # catch zero length data arrays empty_2d_arr = np.full((0, 4), np.nan) with pytest.raises(ValueError): checks.check_data_map(empty_2d_arr) # catch invalid dimensionality with pytest.raises(ValueError): checks.check_data_map(data_map[:, :-1])
def test_hill_branch_wt_diversity(primal_graph): for distances, betas in network_generator(): G = primal_graph.copy() data_dict = mock.mock_data_dict(G) landuse_labels = mock.mock_categorical_data(len(data_dict)) # easy version N_easy = networks.NetworkLayerFromNX(G, distances=distances) D_easy = layers.DataLayerFromDict(data_dict) D_easy.assign_to_network(N_easy, max_dist=500) D_easy.hill_branch_wt_diversity(landuse_labels, qs=[0, 1, 2]) # custom version N_full = networks.NetworkLayerFromNX(G, distances=distances) D_full = layers.DataLayerFromDict(data_dict) D_full.assign_to_network(N_full, max_dist=500) D_full.compute_landuses(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=[0, 1, 2]) # compare for d in distances: for q in [0, 1, 2]: assert np.allclose( N_easy.metrics['mixed_uses']['hill_branch_wt'][q][d], N_full.metrics['mixed_uses']['hill_branch_wt'][q][d], atol=0.001, rtol=0)
def test_Data_Layer_From_Dict(primal_graph): data_dict = mock.mock_data_dict(primal_graph) data_uids, data_map = layers.data_map_from_dict(data_dict) x_arr = data_map[:, 0] y_arr = data_map[:, 1] # test against DataLayerFromDict's internal process D = layers.DataLayerFromDict(data_dict) assert D.uids == data_uids assert np.allclose(D._data, data_map, equal_nan=True) assert np.allclose(D.data_x_arr, x_arr, atol=0.001, rtol=0) assert np.allclose(D.data_y_arr, y_arr, atol=0.001, rtol=0)
def test_dict_wgs_to_utm(primal_graph): # check that node coordinates are correctly converted G_utm = mock.mock_graph() data_dict_utm = mock.mock_data_dict(G_utm) # create a test dictionary test_dict = copy.deepcopy(data_dict_utm) # cast to lat, lon for k, v in test_dict.items(): easting = v['x'] northing = v['y'] # be cognisant of parameter and return order # returns in lat lng order lat, lng = utm.to_latlon(easting, northing, 30, 'U') test_dict[k]['x'] = lng test_dict[k]['y'] = lat # convert back dict_converted = layers.dict_wgs_to_utm(test_dict) # check that round-trip converted match with reasonable proximity given rounding errors for k in data_dict_utm.keys(): # rounding can be tricky assert np.allclose(data_dict_utm[k]['x'], dict_converted[k]['x'], atol=0.1, rtol=0) # relax precision assert np.allclose(data_dict_utm[k]['y'], dict_converted[k]['y'], atol=0.1, rtol=0) # relax precision # check that missing node attributes throw an error for attr in ['x', 'y']: G_wgs = mock.mock_graph(wgs84_coords=True) data_dict_wgs = mock.mock_data_dict(G_wgs) for k in data_dict_wgs.keys(): del data_dict_wgs[k][attr] break # check that missing attribute throws an error with pytest.raises(AttributeError): layers.dict_wgs_to_utm(data_dict_wgs) # check that non WGS coordinates throw error with pytest.raises(AttributeError): layers.dict_wgs_to_utm(data_dict_utm)
def test_data_map_from_dict(primal_graph): # generate mock data data_dict = mock.mock_data_dict(primal_graph) data_uids, data_map = layers.data_map_from_dict(data_dict) assert len(data_uids) == len(data_map) == len(data_dict) for d_label, d in zip(data_uids, data_map): assert d[0] == data_dict[d_label]['x'] assert d[1] == data_dict[d_label]['y'] assert np.isnan(d[2]) assert np.isnan(d[3]) # check that missing attributes throw errors for attr in ['x', 'y']: for k in data_dict.keys(): del data_dict[k][attr] with pytest.raises(AttributeError): layers.data_map_from_dict(data_dict)
def test_metrics_to_dict(primal_graph): # create a network layer and run some metrics N = networks.NetworkLayerFromNX(primal_graph, distances=[500, 1000]) # check with no metrics metrics_dict = N.metrics_to_dict() dict_check(metrics_dict, N) # check with centrality metrics N.node_centrality(measures=['node_harmonic']) metrics_dict = N.metrics_to_dict() dict_check(metrics_dict, N) # check with data metrics data_dict = mock.mock_data_dict(primal_graph) landuse_labels = mock.mock_categorical_data(len(data_dict)) numerical_data = mock.mock_numerical_data(len(data_dict)) metrics_dict = N.metrics_to_dict() dict_check(metrics_dict, N)
def test_find_nearest(primal_graph): N = networks.NetworkLayerFromNX(primal_graph, distances=[100]) # generate some data data_dict = mock.mock_data_dict(primal_graph) D = layers.DataLayerFromDict(data_dict) # test the filter - iterating each point in data map for d in D._data: d_x = d[0] d_y = d[1] # find the closest point on the network min_idx, min_dist = data.find_nearest(d_x, d_y, N.node_x_arr, N.node_y_arr, max_dist=500) # check that no other indices are nearer for i, n in enumerate(N._node_data): n_x = n[0] n_y = n[1] dist = np.sqrt((d_x - n_x) ** 2 + (d_y - n_y) ** 2) if i == min_idx: assert round(dist, 8) == round(min_dist, 8) else: assert dist > min_dist
def test_compute_accessibilities(primal_graph): for distances, betas in network_generator(): G = primal_graph.copy() data_dict = mock.mock_data_dict(G) landuse_labels = mock.mock_categorical_data(len(data_dict)) # easy version N_easy = networks.NetworkLayerFromNX(G, distances=distances) D_easy = layers.DataLayerFromDict(data_dict) D_easy.assign_to_network(N_easy, max_dist=500) D_easy.compute_accessibilities(landuse_labels, ['c']) # custom version N_full = networks.NetworkLayerFromNX(G, distances=distances) D_full = layers.DataLayerFromDict(data_dict) D_full.assign_to_network(N_full, max_dist=500) D_full.compute_landuses(landuse_labels, accessibility_keys=['c']) # compare for d in distances: for wt in ['weighted', 'non_weighted']: assert np.allclose(N_easy.metrics['accessibility'][wt]['c'][d], N_full.metrics['accessibility'][wt]['c'][d], atol=0.001, rtol=0)
def test_mock_data_dict(primal_graph): data = mock.mock_data_dict(primal_graph) min_x = np.inf max_x = -np.inf min_y = np.inf max_y = -np.inf for n, d in primal_graph.nodes(data=True): if d['x'] < min_x: min_x = d['x'] if d['x'] > max_x: max_x = d['x'] if d['y'] < min_y: min_y = d['y'] if d['y'] > max_y: max_y = d['y'] for v in data.values(): # check that attributes are present assert 'x' in v and isinstance(v['y'], (int, float)) assert 'y' in v and isinstance(v['y'], (int, float)) assert v['x'] >= min_x and v['x'] <= max_x assert v['y'] >= min_y and v['y'] <= max_y
def test_compute_landuses(primal_graph): betas = np.array([0.01, 0.005]) distances = networks.distance_from_beta(betas) # network layer N = networks.NetworkLayerFromNX(primal_graph, distances=distances) node_map = N._node_data edge_map = N._edge_data node_edge_map = N._node_edge_map # data layer data_dict = mock.mock_data_dict(primal_graph) qs = np.array([0, 1, 2]) D = layers.DataLayerFromDict(data_dict) # check single metrics independently against underlying for some use-cases, e.g. hill, non-hill, accessibility... D.assign_to_network(N, max_dist=500) # generate some mock landuse data landuse_labels = mock.mock_categorical_data(len(data_dict)) landuse_classes, landuse_encodings = layers.encode_categorical( landuse_labels) # compute hill mixed uses D.compute_landuses(landuse_labels, mixed_use_keys=['hill_branch_wt'], qs=qs) # test against underlying method data_map = D._data mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses( node_map, edge_map, node_edge_map, data_map, distances, betas, landuse_encodings, qs=qs, mixed_use_hill_keys=np.array([1])) for q_idx, q_key in enumerate(qs): for d_idx, d_key in enumerate(distances): assert np.allclose( N.metrics['mixed_uses']['hill_branch_wt'][q_key][d_key], mu_data_hill[0][q_idx][d_idx], atol=0.001, rtol=0) # gini simpson D.compute_landuses(landuse_labels, mixed_use_keys=['gini_simpson']) # test against underlying method data_map = D._data mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses( node_map, edge_map, node_edge_map, data_map, distances, betas, landuse_encodings, mixed_use_other_keys=np.array([1])) for d_idx, d_key in enumerate(distances): assert np.allclose(N.metrics['mixed_uses']['gini_simpson'][d_key], mu_data_other[0][d_idx], atol=0.001, rtol=0) # accessibilities D.compute_landuses(landuse_labels, accessibility_keys=['c']) # test against underlying method data_map = D._data mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses( node_map, edge_map, node_edge_map, data_map, distances, betas, landuse_encodings, accessibility_keys=np.array([landuse_classes.index('c')])) for d_idx, d_key in enumerate(distances): assert np.allclose( N.metrics['accessibility']['non_weighted']['c'][d_key], ac_data[0][d_idx], atol=0.001, rtol=0) assert np.allclose(N.metrics['accessibility']['weighted']['c'][d_key], ac_data_wt[0][d_idx], atol=0.001, rtol=0) # also check the number of returned types for a few assortments of metrics mixed_uses_hill_types = np.array([ 'hill', 'hill_branch_wt', 'hill_pairwise_wt', 'hill_pairwise_disparity' ]) mixed_use_other_types = np.array( ['shannon', 'gini_simpson', 'raos_pairwise_disparity']) ac_codes = np.array(landuse_classes) # mixed uses hill mu_hill_random = np.arange(len(mixed_uses_hill_types)) np.random.shuffle(mu_hill_random) # mixed uses other mu_other_random = np.arange(len(mixed_use_other_types)) np.random.shuffle(mu_other_random) # accessibility ac_random = np.arange(len(landuse_classes)) np.random.shuffle(ac_random) # mock disparity matrix mock_disparity_wt_matrix = np.full( (len(landuse_classes), len(landuse_classes)), 1) # not necessary to do all labels, first few should do for mu_h_min in range(3): mu_h_keys = np.array(mu_hill_random[mu_h_min:]) for mu_o_min in range(3): mu_o_keys = np.array(mu_other_random[mu_o_min:]) for ac_min in range(3): ac_keys = np.array(ac_random[ac_min:]) # in the final case, set accessibility to a single code otherwise an error would be raised if len(mu_h_keys) == 0 and len(mu_o_keys) == 0 and len( ac_keys) == 0: ac_keys = np.array([0]) # randomise order of keys and metrics mu_h_metrics = mixed_uses_hill_types[mu_h_keys] mu_o_metrics = mixed_use_other_types[mu_o_keys] ac_metrics = ac_codes[ac_keys] # prepare network and compute N_temp = networks.NetworkLayerFromNX(primal_graph, distances=distances) D_temp = layers.DataLayerFromDict(data_dict) D_temp.assign_to_network(N_temp, max_dist=500) D_temp.compute_landuses( landuse_labels, mixed_use_keys=list(mu_h_metrics) + list(mu_o_metrics), accessibility_keys=ac_metrics, cl_disparity_wt_matrix=mock_disparity_wt_matrix, qs=qs) # test against underlying method mu_data_hill, mu_data_other, ac_data, ac_data_wt = \ data.aggregate_landuses(node_map, edge_map, node_edge_map, data_map, distances, betas, landuse_encodings, qs=qs, mixed_use_hill_keys=mu_h_keys, mixed_use_other_keys=mu_o_keys, accessibility_keys=ac_keys, cl_disparity_wt_matrix=mock_disparity_wt_matrix) for mu_h_idx, mu_h_met in enumerate(mu_h_metrics): for q_idx, q_key in enumerate(qs): for d_idx, d_key in enumerate(distances): assert np.allclose( N_temp.metrics['mixed_uses'][mu_h_met][q_key] [d_key], mu_data_hill[mu_h_idx][q_idx][d_idx], atol=0.001, rtol=0) for mu_o_idx, mu_o_met in enumerate(mu_o_metrics): for d_idx, d_key in enumerate(distances): assert np.allclose( N_temp.metrics['mixed_uses'][mu_o_met][d_key], mu_data_other[mu_o_idx][d_idx], atol=0.001, rtol=0) for ac_idx, ac_met in enumerate(ac_metrics): for d_idx, d_key in enumerate(distances): assert np.allclose(N_temp.metrics['accessibility'] ['non_weighted'][ac_met][d_key], ac_data[ac_idx][d_idx], atol=0.001, rtol=0) assert np.allclose(N_temp.metrics['accessibility'] ['weighted'][ac_met][d_key], ac_data_wt[ac_idx][d_idx], atol=0.001, rtol=0) # most integrity checks happen in underlying method, though check here for mismatching labels length and typos with pytest.raises(ValueError): D.compute_landuses(landuse_labels[-1], mixed_use_keys=['shannon']) with pytest.raises(ValueError): D.compute_landuses(landuse_labels, mixed_use_keys=['spelling_typo']) # don't check accessibility_labels for typos - because only warning is triggered (not all labels will be in all data) # check that unassigned data layer flags with pytest.raises(ValueError): D_new = layers.DataLayerFromDict(data_dict) D_new.compute_landuses(landuse_labels, mixed_use_keys=['shannon'])
base_path = os.getcwd() plt.style.use('matplotlibrc') ### # INTRO PLOT G = mock.mock_graph() plot.plot_nX(G, labels=True, node_size=80, path='images/graph.png', dpi=150) # INTRO EXAMPLE PLOTS G = graphs.nX_simple_geoms(G) G = graphs.nX_decompose(G, 20) N = networks.NetworkLayerFromNX(G, distances=[400, 800]) N.segment_centrality(measures=['segment_harmonic']) data_dict = mock.mock_data_dict(G, random_seed=25) D = layers.DataLayerFromDict(data_dict) D.assign_to_network(N, max_dist=400) landuse_labels = mock.mock_categorical_data(len(data_dict), random_seed=25) D.hill_branch_wt_diversity(landuse_labels, qs=[0]) G_metrics = N.to_networkX() segment_harmonic_vals = [] mixed_uses_vals = [] for node, data in G_metrics.nodes(data=True): segment_harmonic_vals.append( data['metrics']['centrality']['segment_harmonic'][800]) mixed_uses_vals.append( data['metrics']['mixed_uses']['hill_branch_wt'][0][400]) # custom colourmap
def test_nX_from_graph_maps(primal_graph): # also see test_networks.test_to_networkX for tests on implementation via Network layer # check round trip to and from graph maps results in same graph # explicitly set live params for equality checks # graph_maps_from_networkX generates these implicitly if missing for n in primal_graph.nodes(): primal_graph.nodes[n]['live'] = bool(np.random.randint(0, 1)) # test directly from and to graph maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph) G_round_trip = graphs.nX_from_graph_maps(node_uids, node_data, edge_data, node_edge_map) assert list(G_round_trip.nodes) == list(primal_graph.nodes) assert list(G_round_trip.edges) == list(primal_graph.edges) # check with metrics dictionary N = networks.NetworkLayerFromNX(primal_graph, distances=[500, 1000]) N.node_centrality(measures=['node_harmonic']) data_dict = mock.mock_data_dict(primal_graph) landuse_labels = mock.mock_categorical_data(len(data_dict)) D = layers.DataLayerFromDict(data_dict) D.assign_to_network(N, max_dist=400) D.compute_landuses(landuse_labels, mixed_use_keys=['hill', 'shannon'], accessibility_keys=['a', 'c'], qs=[0, 1]) metrics_dict = N.metrics_to_dict() # without backbone G_round_trip_data = graphs.nX_from_graph_maps(node_uids, node_data, edge_data, node_edge_map, metrics_dict=metrics_dict) for uid, metrics in metrics_dict.items(): assert G_round_trip_data.nodes[uid]['metrics'] == metrics # with backbone G_round_trip_data = graphs.nX_from_graph_maps(node_uids, node_data, edge_data, node_edge_map, networkX_multigraph=primal_graph, metrics_dict=metrics_dict) for uid, metrics in metrics_dict.items(): assert G_round_trip_data.nodes[uid]['metrics'] == metrics # test with decomposed G_decomposed = graphs.nX_decompose(primal_graph, decompose_max=20) # set live explicitly for n in G_decomposed.nodes(): G_decomposed.nodes[n]['live'] = bool(np.random.randint(0, 1)) node_uids_d, node_data_d, edge_data_d, node_edge_map_d = graphs.graph_maps_from_nX(G_decomposed) G_round_trip_d = graphs.nX_from_graph_maps(node_uids_d, node_data_d, edge_data_d, node_edge_map_d) assert list(G_round_trip_d.nodes) == list(G_decomposed.nodes) for n, iter_node_data in G_round_trip.nodes(data=True): assert n in G_decomposed assert iter_node_data['live'] == G_decomposed.nodes[n]['live'] assert iter_node_data['x'] == G_decomposed.nodes[n]['x'] assert iter_node_data['y'] == G_decomposed.nodes[n]['y'] assert G_round_trip_d.edges == G_decomposed.edges # error checks for when using backbone graph: # mismatching numbers of nodes corrupt_G = primal_graph.copy() corrupt_G.remove_node(0) with pytest.raises(ValueError): graphs.nX_from_graph_maps(node_uids, node_data, edge_data, node_edge_map, networkX_multigraph=corrupt_G) # mismatching node uid with pytest.raises(KeyError): corrupt_node_uids = list(node_uids) corrupt_node_uids[0] = 'boo' graphs.nX_from_graph_maps(corrupt_node_uids, node_data, edge_data, node_edge_map, networkX_multigraph=primal_graph) # missing edge with pytest.raises(KeyError): corrupt_primal_graph = primal_graph.copy() corrupt_primal_graph.remove_edge(0, 1) graphs.nX_from_graph_maps(node_uids, node_data, edge_data, node_edge_map, networkX_multigraph=corrupt_primal_graph)
def test_local_agg_time(primal_graph): """ Timing tests for landuse and stats aggregations """ if 'GITHUB_ACTIONS' in os.environ: return os.environ['CITYSEER_QUIET_MODE'] = '1' # generate node and edge maps node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # needs a large enough beta so that distance thresholds aren't encountered distances = np.array([np.inf]) betas = networks.beta_from_distance(distances) qs = np.array([0, 1, 2]) mock_categorical = mock.mock_categorical_data(len(data_map)) landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical) mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0) def assign_wrapper(): data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # prime the function assign_wrapper() iters = 20000 # time and report - roughly 5.675 func_time = timeit.timeit(assign_wrapper, number=iters) print(f'node_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 10 def landuse_agg_wrapper(): mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, mixed_use_hill_keys=np.array([0, 1]), landuse_encodings=landuse_encodings, qs=qs, angular=False) # prime the function landuse_agg_wrapper() iters = 20000 # time and report - roughly 10.10 func_time = timeit.timeit(landuse_agg_wrapper, number=iters) print(f'node_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 15 def stats_agg_wrapper(): # compute data.aggregate_stats(node_data, edge_data, node_edge_map, data_map, distances, betas, numerical_arrays=mock_numerical, angular=False) # prime the function stats_agg_wrapper() iters = 20000 # time and report - roughly 4.96 func_time = timeit.timeit(stats_agg_wrapper, number=iters) print(f'segment_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 10
def test_local_aggregator_numerical_components(primal_graph): # generate node and edge maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # for debugging # from cityseer.tools import plot # plot.plot_graph_maps(node_uids, node_data, edge_data, data_map) # set parameters - use a large enough distance such that simple non-weighted checks can be run for max, mean, variance betas = np.array([0.00125]) distances = networks.distance_from_beta(betas) mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0) # compute stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ data.aggregate_stats(node_data, edge_data, node_edge_map, data_map, distances, betas, numerical_arrays=mock_numerical, angular=False) # non connected portions of the graph will have different stats # used manual data plots from test_assign_to_network() to see which nodes the data points are assigned to # connected graph is from 0 to 48 -> assigned data points are all except 5, 8, 17, 33, 48 connected_nodes_idx = list(range(49)) # and the respective data assigned to connected portion of the graph connected_data_idx = [i for i in range(len(data_dict)) if i not in [5, 8, 9, 17, 18, 29, 33, 38, 48]] # isolated node = 49 -> assigned no data points # isolated nodes = 50 & 51 -> assigned data points = 17, 33 # isolated loop = 52, 53, 54, 55 -> assigned data points = 5, 8, 9, 18, 29, 38, 48 isolated_nodes_idx = [52, 53, 54, 55] isolated_data_idx = [5, 8, 9, 18, 29, 38, 48] for stats_idx in range(len(mock_numerical)): for d_idx in range(len(distances)): # max assert np.isnan(stats_max[stats_idx, d_idx, 49]) assert np.allclose(stats_max[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].max(), atol=0.001, rtol=0) assert np.allclose(stats_max[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].max(), atol=0.001, rtol=0) assert np.allclose(stats_max[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].max(), atol=0.001, rtol=0) # min assert np.isnan(stats_min[stats_idx, d_idx, 49]) assert np.allclose(stats_min[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].min(), atol=0.001, rtol=0) assert np.allclose(stats_min[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].min(), atol=0.001, rtol=0) assert np.allclose(stats_min[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].min(), atol=0.001, rtol=0) # sum assert stats_sum[stats_idx, d_idx, 49] == 0 assert np.allclose(stats_sum[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].sum(), atol=0.001, rtol=0) assert np.allclose(stats_sum[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].sum(), atol=0.001, rtol=0) assert np.allclose(stats_sum[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].sum(), atol=0.001, rtol=0) # mean assert np.isnan(stats_mean[stats_idx, d_idx, 49]) assert np.allclose(stats_mean[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].mean(), atol=0.001, rtol=0) assert np.allclose(stats_mean[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].mean(), atol=0.001, rtol=0) assert np.allclose(stats_mean[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].mean(), atol=0.001, rtol=0) # variance assert np.isnan(stats_variance[stats_idx, d_idx, 49]) assert np.allclose(stats_variance[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].var(), atol=0.001, rtol=0) assert np.allclose(stats_variance[stats_idx, d_idx, isolated_nodes_idx], mock_numerical[stats_idx, isolated_data_idx].var(), atol=0.001, rtol=0) assert np.allclose(stats_variance[stats_idx, d_idx, connected_nodes_idx], mock_numerical[stats_idx, connected_data_idx].var(), atol=0.001, rtol=0)
def test_assign_to_network(primal_graph): # create additional dead-end scenario primal_graph.remove_edge(14, 15) primal_graph.remove_edge(15, 28) # G = graphs.nX_auto_edge_params(G) G = graphs.nX_decompose(primal_graph, 50) node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G) # generate data data_dict = mock.mock_data_dict(G, random_seed=25) data_uids, data_map = layers.data_map_from_dict(data_dict) # override data point locations for test cases vis-a-vis isolated nodes and isolated edges data_map[18, :2] = [701200, 5719400] data_map[39, :2] = [700750, 5720025] data_map[26, :2] = [700400, 5719525] # 500m visually confirmed in plots data_map_1600 = data_map.copy() data_map_1600 = data.assign_to_network(data_map_1600, node_data, edge_data, node_edge_map, max_dist=1600) targets = np.array([ [0, 164, 163], [1, 42, 241], [2, 236, 235], [3, 48, 262], [4, 211, 212], [5, 236, 235], [6, 58, 57], [7, 72, 5], [8, 75, 76], [9, 92, 9], [10, 61, 62], [11, 96, 13], [12, 0, 59], [13, 98, 99], [14, 203, 202], [15, 121, 120], [16, 48, 262], [17, 2, 70], [18, 182, 183], [19, 158, 157], [20, 83, 84], [21, 2, np.nan], [22, 171, 170], [23, 266, 52], [24, 83, 84], [25, 88, 11], [26, 49, np.nan], [27, 19, 138], [28, 134, 135], [29, 262, 46], [30, 78, 9], [31, 188, 189], [32, 180, 181], [33, 95, 94], [34, 226, 225], [35, 110, 111], [36, 39, 228], [37, 158, 25], [38, 88, 87], [39, 263, np.nan], [40, 120, 121], [41, 146, 21], [42, 10, 97], [43, 119, 118], [44, 82, 5], [45, 11, 88], [46, 100, 99], [47, 138, 19], [48, 14, np.nan], [49, 106, 105] ]) # for debugging # from cityseer.tools import plot # plot.plot_graph_maps(node_data, edge_data, data_map) # assignment map includes data x, data y, nearest assigned, next nearest assigned assert np.allclose(data_map_1600[:, 2:], targets[:, 1:], equal_nan=True, atol=0, rtol=0) # max distance of 0 should return all NaN data_map_test_0 = data_map.copy() data_map_test_0 = data.assign_to_network(data_map_test_0, node_data, edge_data, node_edge_map, max_dist=0) assert np.all(np.isnan(data_map_test_0[:, 2])) assert np.all(np.isnan(data_map_test_0[:, 3])) # max distance of 2000 should return no NaN for nearest # there will be some NaN for next nearest data_map_test_2000 = data_map.copy() data_map_test_2000 = data.assign_to_network(data_map_test_2000, node_data, edge_data, node_edge_map, max_dist=2000) assert not np.any(np.isnan(data_map_test_2000[:, 2]))
def test_aggregate_landuses_categorical_components(primal_graph): # generate node and edge maps node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # set parameters betas = np.array([0.02, 0.01, 0.005, 0.0025]) distances = networks.distance_from_beta(betas) qs = np.array([0, 1, 2]) mock_categorical = mock.mock_categorical_data(len(data_map)) landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical) mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1) # set the keys - add shuffling to be sure various orders work hill_keys = np.arange(4) np.random.shuffle(hill_keys) non_hill_keys = np.arange(3) np.random.shuffle(non_hill_keys) ac_keys = np.array([1, 2, 5]) np.random.shuffle(ac_keys) # generate mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, landuse_encodings=landuse_encodings, qs=qs, mixed_use_hill_keys=hill_keys, mixed_use_other_keys=non_hill_keys, accessibility_keys=ac_keys, cl_disparity_wt_matrix=mock_matrix, angular=False) # hill hill = mu_data_hill[np.where(hill_keys == 0)][0] hill_branch_wt = mu_data_hill[np.where(hill_keys == 1)][0] hill_pw_wt = mu_data_hill[np.where(hill_keys == 2)][0] hill_disp_wt = mu_data_hill[np.where(hill_keys == 3)][0] # non hill shannon = mu_data_other[np.where(non_hill_keys == 0)][0] gini = mu_data_other[np.where(non_hill_keys == 1)][0] raos = mu_data_other[np.where(non_hill_keys == 2)][0] # access non-weighted ac_1_nw = ac_data[np.where(ac_keys == 1)][0] ac_2_nw = ac_data[np.where(ac_keys == 2)][0] ac_5_nw = ac_data[np.where(ac_keys == 5)][0] # access weighted ac_1_w = ac_data_wt[np.where(ac_keys == 1)][0] ac_2_w = ac_data_wt[np.where(ac_keys == 2)][0] ac_5_w = ac_data_wt[np.where(ac_keys == 5)][0] # test manual metrics against all nodes mu_max_unique = len(landuse_classes) # test against various distances for d_idx in range(len(distances)): dist_cutoff = distances[d_idx] beta = betas[d_idx] for src_idx in range(len(primal_graph)): reachable_data, reachable_data_dist, tree_preds = data.aggregate_to_src_idx(src_idx, node_data, edge_data, node_edge_map, data_map, dist_cutoff) # counts of each class type (array length per max unique classes - not just those within max distance) cl_counts = np.full(mu_max_unique, 0) # nearest of each class type (likewise) cl_nearest = np.full(mu_max_unique, np.inf) # aggregate a_1_nw = 0 a_2_nw = 0 a_5_nw = 0 a_1_w = 0 a_2_w = 0 a_5_w = 0 # iterate reachable for data_idx, (reachable, data_dist) in enumerate(zip(reachable_data, reachable_data_dist)): if not reachable: continue cl = landuse_encodings[data_idx] # double check distance is within threshold assert data_dist <= dist_cutoff # update the class counts cl_counts[cl] += 1 # if distance is nearer, update the nearest distance array too if data_dist < cl_nearest[cl]: cl_nearest[cl] = data_dist # aggregate accessibility codes if cl == 1: a_1_nw += 1 a_1_w += np.exp(-beta * data_dist) elif cl == 2: a_2_nw += 1 a_2_w += np.exp(-beta * data_dist) elif cl == 5: a_5_nw += 1 a_5_w += np.exp(-beta * data_dist) # assertions assert ac_1_nw[d_idx, src_idx] == a_1_nw assert ac_2_nw[d_idx, src_idx] == a_2_nw assert ac_5_nw[d_idx, src_idx] == a_5_nw assert ac_1_w[d_idx, src_idx] == a_1_w assert ac_2_w[d_idx, src_idx] == a_2_w assert ac_5_w[d_idx, src_idx] == a_5_w assert hill[0, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 0) assert hill[1, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 1) assert hill[2, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 2) assert hill_branch_wt[0, d_idx, src_idx] == \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 0, beta) assert hill_branch_wt[1, d_idx, src_idx] == \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 1, beta) assert hill_branch_wt[2, d_idx, src_idx] == \ diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 2, beta) assert hill_pw_wt[0, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 0, beta) assert hill_pw_wt[1, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 1, beta) assert hill_pw_wt[2, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 2, beta) assert hill_disp_wt[0, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 0) assert hill_disp_wt[1, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 1) assert hill_disp_wt[2, d_idx, src_idx] == \ diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 2) assert shannon[d_idx, src_idx] == diversity.shannon_diversity(cl_counts) assert gini[d_idx, src_idx] == diversity.gini_simpson_diversity(cl_counts) assert raos[d_idx, src_idx] == diversity.raos_quadratic_diversity(cl_counts, mock_matrix) # check that angular is passed-through # actual angular tests happen in test_shortest_path_tree() # here the emphasis is simply on checking that the angular instruction gets chained through # setup dual data G_dual = graphs.nX_to_dual(primal_graph) node_labels_dual, node_data_dual, edge_data_dual, node_edge_map_dual = graphs.graph_maps_from_nX(G_dual) data_dict_dual = mock.mock_data_dict(G_dual, random_seed=13) data_uids_dual, data_map_dual = layers.data_map_from_dict(data_dict_dual) data_map_dual = data.assign_to_network(data_map_dual, node_data_dual, edge_data_dual, node_edge_map_dual, 500) mock_categorical = mock.mock_categorical_data(len(data_map_dual)) landuse_classes_dual, landuse_encodings_dual = layers.encode_categorical(mock_categorical) mock_matrix = np.full((len(landuse_classes_dual), len(landuse_classes_dual)), 1) mu_hill_dual, mu_other_dual, ac_dual, ac_wt_dual = data.aggregate_landuses(node_data_dual, edge_data_dual, node_edge_map_dual, data_map_dual, distances, betas, landuse_encodings_dual, qs=qs, mixed_use_hill_keys=hill_keys, mixed_use_other_keys=non_hill_keys, accessibility_keys=ac_keys, cl_disparity_wt_matrix=mock_matrix, angular=True) mu_hill_dual_sidestep, mu_other_dual_sidestep, ac_dual_sidestep, ac_wt_dual_sidestep = \ data.aggregate_landuses(node_data_dual, edge_data_dual, node_edge_map_dual, data_map_dual, distances, betas, landuse_encodings_dual, qs=qs, mixed_use_hill_keys=hill_keys, mixed_use_other_keys=non_hill_keys, accessibility_keys=ac_keys, cl_disparity_wt_matrix=mock_matrix, angular=False) assert not np.allclose(mu_hill_dual, mu_hill_dual_sidestep, atol=0.001, rtol=0) assert not np.allclose(mu_other_dual, mu_other_dual_sidestep, atol=0.001, rtol=0) assert not np.allclose(ac_dual, ac_dual_sidestep, atol=0.001, rtol=0) assert not np.allclose(ac_wt_dual, ac_wt_dual_sidestep, atol=0.001, rtol=0)
def test_aggregate_landuses_signatures(primal_graph): # generate node and edge maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # set parameters betas = np.array([0.02, 0.01, 0.005, 0.0025]) distances = networks.distance_from_beta(betas) qs = np.array([0, 1, 2]) mock_categorical = mock.mock_categorical_data(len(data_map)) landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical) # check that empty land_use encodings are caught with pytest.raises(ValueError): data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, mixed_use_hill_keys=np.array([0])) # check that unequal land_use encodings vs data map lengths are caught with pytest.raises(ValueError): data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, landuse_encodings=landuse_encodings[:-1], mixed_use_other_keys=np.array([0])) # check that no provided metrics flags with pytest.raises(ValueError): data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, landuse_encodings=landuse_encodings) # check that missing qs flags with pytest.raises(ValueError): data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, mixed_use_hill_keys=np.array([0]), landuse_encodings=landuse_encodings) # check that problematic mixed use and accessibility keys are caught for mu_h_key, mu_o_key, ac_key in [ # negatives ([-1], [1], [1]), ([1], [-1], [1]), ([1], [1], [-1]), # out of range ([4], [1], [1]), ([1], [3], [1]), ([1], [1], [max(landuse_encodings) + 1]), # duplicates ([1, 1], [1], [1]), ([1], [1, 1], [1]), ([1], [1], [1, 1])]: with pytest.raises(ValueError): data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, landuse_encodings, qs=qs, mixed_use_hill_keys=np.array(mu_h_key), mixed_use_other_keys=np.array(mu_o_key), accessibility_keys=np.array(ac_key)) for h_key, o_key in (([3], []), ([], [2])): # check that missing matrix is caught for disparity weighted indices with pytest.raises(ValueError): data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, landuse_encodings=landuse_encodings, qs=qs, mixed_use_hill_keys=np.array(h_key), mixed_use_other_keys=np.array(o_key)) # check that non-square disparity matrix is caught mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1) with pytest.raises(ValueError): data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, landuse_encodings=landuse_encodings, qs=qs, mixed_use_hill_keys=np.array(h_key), mixed_use_other_keys=np.array(o_key), cl_disparity_wt_matrix=mock_matrix[:-1])
def test_aggregate_to_src_idx(primal_graph): node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph) # generate data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) for max_dist in [400, 750]: # in this case, use same assignment max dist as search max dist data_map_temp = data_map.copy() data_map_temp = data.assign_to_network(data_map_temp, node_data, edge_data, node_edge_map, max_dist=max_dist) for angular in [True, False]: for netw_src_idx in range(len(node_data)): # aggregate to src... reachable_data, reachable_data_dist, tree_preds = data.aggregate_to_src_idx(netw_src_idx, node_data, edge_data, node_edge_map, data_map_temp, max_dist, angular=angular) # for debugging # from cityseer.tools import plot # plot.plot_graph_maps(node_uids, node_data, edge_data, data_map) # compare to manual checks on distances: netw_x_arr = node_data[:, 0] netw_y_arr = node_data[:, 1] data_x_arr = data_map_temp[:, 0] data_y_arr = data_map_temp[:, 1] # get the network distances tree_map, tree_edges = centrality.shortest_path_tree(edge_data, node_edge_map, netw_src_idx, max_dist=max_dist, angular=angular) tree_dists = tree_map[:, 2] # verify distances vs. the max for d_idx in range(len(data_map_temp)): # check the integrity of the distances and classes reachable = reachable_data[d_idx] reachable_dist = reachable_data_dist[d_idx] # get the distance via the nearest assigned index nearest_dist = np.inf # if a nearest node has been assigned if np.isfinite(data_map_temp[d_idx, 2]): # get the index for the assigned network node netw_idx = int(data_map_temp[d_idx, 2]) # if this node is within the cutoff distance: if tree_dists[netw_idx] < max_dist: # get the distances from the data point to the assigned network node d_d = np.hypot(data_x_arr[d_idx] - netw_x_arr[netw_idx], data_y_arr[d_idx] - netw_y_arr[netw_idx]) # and add it to the network distance path from the source to the assigned node n_d = tree_dists[netw_idx] nearest_dist = d_d + n_d # also get the distance via the next nearest assigned index next_nearest_dist = np.inf # if a nearest node has been assigned if np.isfinite(data_map_temp[d_idx, 3]): # get the index for the assigned network node netw_idx = int(data_map_temp[d_idx, 3]) # if this node is within the radial cutoff distance: if tree_dists[netw_idx] < max_dist: # get the distances from the data point to the assigned network node d_d = np.hypot(data_x_arr[d_idx] - netw_x_arr[netw_idx], data_y_arr[d_idx] - netw_y_arr[netw_idx]) # and add it to the network distance path from the source to the assigned node n_d = tree_dists[netw_idx] next_nearest_dist = d_d + n_d # now check distance integrity if np.isinf(reachable_dist): assert not reachable assert nearest_dist > max_dist and next_nearest_dist > max_dist else: assert reachable assert reachable_dist <= max_dist if nearest_dist < next_nearest_dist: assert reachable_dist == nearest_dist else: assert reachable_dist == next_nearest_dist
def test_compute_stats(primal_graph): """ Test stats component """ betas = np.array([0.01, 0.005]) distances = networks.distance_from_beta(betas) # network layer N_single = networks.NetworkLayerFromNX(primal_graph, distances=distances) N_multi = networks.NetworkLayerFromNX(primal_graph, distances=distances) node_map = N_multi._node_data edge_map = N_multi._edge_data node_edge_map = N_multi._node_edge_map # data layer data_dict = mock.mock_data_dict(primal_graph) D_single = layers.DataLayerFromDict(data_dict) D_multi = layers.DataLayerFromDict(data_dict) # check single metrics independently against underlying for some use-cases, e.g. hill, non-hill, accessibility... D_single.assign_to_network(N_single, max_dist=500) D_multi.assign_to_network(N_multi, max_dist=500) # generate some mock landuse data mock_numeric = mock.mock_numerical_data(len(data_dict), num_arrs=2) # generate stats D_single.compute_stats(stats_keys='boo', stats_data_arrs=mock_numeric[0]) D_single.compute_stats(stats_keys='baa', stats_data_arrs=mock_numeric[1]) D_multi.compute_stats(stats_keys=['boo', 'baa'], stats_data_arrs=mock_numeric) # test against underlying method data_map = D_single._data stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \ data.aggregate_stats(node_map, edge_map, node_edge_map, data_map, distances, betas, numerical_arrays=mock_numeric) stats_keys = [ 'max', 'min', 'sum', 'sum_weighted', 'mean', 'mean_weighted', 'variance', 'variance_weighted' ] stats_data = [ stats_max, stats_min, stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt ] for num_idx, num_label in enumerate(['boo', 'baa']): for s_key, stats in zip(stats_keys, stats_data): for d_idx, d_key in enumerate(distances): # check one-at-a-time computed vs multiply computed assert np.allclose( N_single.metrics['stats'][num_label][s_key][d_key], N_multi.metrics['stats'][num_label][s_key][d_key], atol=0.001, rtol=0, equal_nan=True) # check one-at-a-time against manual assert np.allclose( N_single.metrics['stats'][num_label][s_key][d_key], stats[num_idx][d_idx], atol=0.001, rtol=0, equal_nan=True) # check multiply computed against manual assert np.allclose( N_multi.metrics['stats'][num_label][s_key][d_key], stats[num_idx][d_idx], atol=0.001, rtol=0, equal_nan=True) # check that problematic keys and data arrays are caught for labels, arrs, err in ( (['a'], mock_numeric, ValueError), # mismatching lengths (['a', 'b'], None, TypeError), # missing arrays (['a', 'b'], [], ValueError), # missing arrays (None, mock_numeric, TypeError), # missing labels ([], mock_numeric, ValueError)): # missing labels with pytest.raises(err): D_multi.compute_stats(stats_keys=labels, stats_data_arrs=arrs)