def test_beta_from_distance(): # some basic checks for d, b in zip([100, 1600, np.inf], [-0.04, -0.0025, -0.0]): # simple straight check against corresponding distance assert networks.beta_from_distance(d) == np.array([b]) # circular check assert networks.distance_from_beta(networks.beta_from_distance(d)) == d # array form check assert networks.beta_from_distance(np.array([d])) == np.array([b]) # check that custom min_threshold_wt works arr = networks.beta_from_distance(172.69388197455342, min_threshold_wt=0.001) assert np.allclose(arr, np.array([-0.04]), atol=0.001, rtol=0) # check on array form arr = networks.beta_from_distance([100, 1600, np.inf]) assert np.allclose(arr, np.array([-0.04, -0.0025, -0.0]), atol=0.001, rtol=0) # check for type error with pytest.raises(TypeError): networks.beta_from_distance('boo') # check that invalid beta values raise an error for d in [-100, 0]: with pytest.raises(ValueError): networks.beta_from_distance(d)
def test_local_centrality_time(): ''' originally based on node_harmonic and node_betweenness: OLD VERSION with trim maps: Timing: 10.490865555 for 10000 iterations NEW VERSION with numba typed list - faster and removes arcane full vs. trim maps workflow 8.242256040000001 for 10000 iterations VERSION with node_edge_map Dict - tad slower but worthwhile for cleaner and more intuitive code 8.882408618 for 10000 iterations float64 - 17.881911942000002 float32 - 13.612861239 segments of unreachable code add to timing regardless... possibly because of high number of iters vs. function prep and teardown...? 14.4 -> 14.293403884 for simpler ghost node workflow ''' # load the test graph G = mock.mock_graph() G = graphs.nX_simple_geoms(G) node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G) # generate node and edge maps # needs a large enough beta so that distance thresholds aren't encountered distances = np.array([np.inf]) betas = networks.beta_from_distance(distances) # setup timing wrapper def wrapper_func(): ''' node density invokes aggregative workflow betweenness node invokes betweenness workflow segment density invokes segments workflow ''' return centrality.local_centrality(node_data, edge_data, node_edge_map, distances, betas, ('node_density', # 7.16s 'node_betweenness', # 8.08s - adds around 1s 'segment_density', # 11.2s - adds around 3s 'segment_betweenness' ), angular=False, suppress_progress=True) # prime the function wrapper_func() iters = 10000 # time and report func_time = timeit.timeit(wrapper_func, number=iters) print(f'Timing: {func_time} for {iters} iterations') if 'GITHUB_ACTIONS' not in os.environ: assert func_time < 20
def test_distance_from_beta(): # some basic checks using float form for b, d in zip([0.04, 0.0025, 0.0], [100, 1600, np.inf]): # simple straight check against corresponding distance assert networks.distance_from_beta(b) == np.array([d]) # circular check assert networks.beta_from_distance(networks.distance_from_beta(b)) == b # array form check assert networks.distance_from_beta(np.array([b])) == np.array([d]) # check that custom min_threshold_wt works arr = networks.distance_from_beta(0.04, min_threshold_wt=0.001) assert np.allclose(arr, np.array([172.69388197455342]), atol=0.001, rtol=0) # check on array form arr = networks.distance_from_beta([0.04, 0.0025, 0.0]) assert np.allclose(arr, np.array([100, 1600, np.inf]), atol=0.001, rtol=0) # check for type error with pytest.raises(TypeError): networks.distance_from_beta('boo') # check that invalid beta values raise an error # positive integer of zero should raise, but not positive float for b in [-0.04, 0, -0, -0.0]: with pytest.raises(ValueError): networks.distance_from_beta(b)
async def population_aggregator(db_config, nodes_table, census_table, city_pop_id): db_con = await asyncpg.connect(**db_config) distances = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600] betas = networks.beta_from_distance(distances) # create the columns await db_con.execute(f''' ALTER TABLE {nodes_table} ADD COLUMN IF NOT EXISTS cens_tot_pop real[], ADD COLUMN IF NOT EXISTS cens_adults real[], ADD COLUMN IF NOT EXISTS cens_employed real[], ADD COLUMN IF NOT EXISTS cens_dwellings real[], ADD COLUMN IF NOT EXISTS cens_students real[]; ''') # iterate the nodes and assign the new values to a list logger.info(f'fetching all ids for city {city_pop_id}') uids = [] records = await db_con.fetch(f''' SELECT id FROM {nodes_table} WHERE city_pop_id::int = {city_pop_id} and within = true; ''') for r in records: uids.append(r['id']) logger.info(f'processing population for {len(uids)} ids') agg_results = [] count = 0 for uid in uids: count += 1 if count % 10000 == 0: completion = round((count / len(uids)) * 100, 2) logger.info(f'{completion}% completed') tot_pop = [] adults = [] employed = [] dwellings = [] students = [] for dist, beta in zip(distances, betas): # NB -> Use ST_DWithin (uses index) # AND NOT ST_Distance which calculates everything from scratch # data = tot_pop, adults, employed, dwellings, students record = await db_con.fetchrow(f''' SELECT sum(d.totpop_w) as tot_pop, sum(d.adults_w) as adults, sum(d.employed_w) as employed, sum(d.dwellings_w) as dwellings, sum(d.students_w) as students FROM ( (SELECT geom FROM {nodes_table} WHERE id = $1) AS node CROSS JOIN LATERAL (SELECT totpop, totadult, totemploy, dwelling, stud18plus, geom <-> node.geom as dist FROM {census_table} WHERE ST_DWithin(geom, node.geom, $2)) AS c CROSS JOIN LATERAL (SELECT exp($3 * c.dist) as weight) as w CROSS JOIN LATERAL (SELECT c.totpop * w.weight as totpop_w, c.totadult * w.weight as adults_w, c.totemploy * w.weight as employed_w, c.dwelling * w.weight as dwellings_w, c.stud18plus * w.weight as students_w ) as w_c ) AS d; ''', uid, dist, beta) # convert to list data = [d for d in record] # change None values to 0 for i in range(len(data)): if data[i] is None: data[i] = 0 # data = tot_pop, adults, employed, dwellings, students for data_point, arr in zip(data, [tot_pop, adults, employed, dwellings, students]): arr.append(data_point) # add to main agg agg_results.append((uid, tot_pop, adults, employed, dwellings, students)) assert len(agg_results) == len(uids) # write back to db await db_con.executemany(f''' UPDATE {nodes_table} SET cens_tot_pop = $2, cens_adults = $3, cens_employed = $4, cens_dwellings = $5, cens_students = $6 WHERE id = $1 ''', agg_results) await db_con.close()
def test_local_agg_time(primal_graph): """ Timing tests for landuse and stats aggregations """ if 'GITHUB_ACTIONS' in os.environ: return os.environ['CITYSEER_QUIET_MODE'] = '1' # generate node and edge maps node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph) # setup data data_dict = mock.mock_data_dict(primal_graph, random_seed=13) data_uids, data_map = layers.data_map_from_dict(data_dict) data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # needs a large enough beta so that distance thresholds aren't encountered distances = np.array([np.inf]) betas = networks.beta_from_distance(distances) qs = np.array([0, 1, 2]) mock_categorical = mock.mock_categorical_data(len(data_map)) landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical) mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0) def assign_wrapper(): data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500) # prime the function assign_wrapper() iters = 20000 # time and report - roughly 5.675 func_time = timeit.timeit(assign_wrapper, number=iters) print(f'node_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 10 def landuse_agg_wrapper(): mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data, edge_data, node_edge_map, data_map, distances, betas, mixed_use_hill_keys=np.array([0, 1]), landuse_encodings=landuse_encodings, qs=qs, angular=False) # prime the function landuse_agg_wrapper() iters = 20000 # time and report - roughly 10.10 func_time = timeit.timeit(landuse_agg_wrapper, number=iters) print(f'node_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 15 def stats_agg_wrapper(): # compute data.aggregate_stats(node_data, edge_data, node_edge_map, data_map, distances, betas, numerical_arrays=mock_numerical, angular=False) # prime the function stats_agg_wrapper() iters = 20000 # time and report - roughly 4.96 func_time = timeit.timeit(stats_agg_wrapper, number=iters) print(f'segment_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 10
def test_local_centrality_time(primal_graph): """ Keep in mind there are several extraneous variables: e.g. may be fairly dramatic differences in timing on larger graphs and larger search distances originally based on node_harmonic and node_betweenness: OLD VERSION with trim maps: Timing: 10.490865555 for 10000 iterations version with numba typed list - faster and removes arcane full vs. trim maps workflow 8.24 for 10000 iterations version with node_edge_map Dict - tad slower but worthwhile for cleaner and more intuitive code 8.88 for 10000 iterations version with shortest path tree algo simplified to nodes and non-angular only 8.19 for 10000 iterations if reducing floating precision float64 - 17.881911942000002 float32 - 13.612861239 notes: - Segments of unreachable code used to add to timing: this seems to have been fixed in more recent versions of numba - Separating the logic into functions results in ever so slightly slower times... though this may be due to function setup at invocation (x10000) which wouldn't be incurred in real scenarios...? - Tests on using a List(Dict('x', 'y', etc.) structure proved almost four times slower, so sticking with arrays - Experiments with golang proved too complex re: bindings... """ if 'GITHUB_ACTIONS' in os.environ: return os.environ['CITYSEER_QUIET_MODE'] = '1' # load the test graph node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX( primal_graph) # needs a large enough beta so that distance thresholds aren't encountered distances = np.array([np.inf]) betas = networks.beta_from_distance(distances) def node_cent_wrapper(): centrality.local_node_centrality(node_data, edge_data, node_edge_map, distances, betas, ('node_harmonic', 'node_betweenness'), angular=False, progress_proxy=None) # prime the function node_cent_wrapper() iters = 20000 # time and report - roughly 6.37s on 4.2GHz i7 func_time = timeit.timeit(node_cent_wrapper, number=iters) print(f'node_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 10 def segment_cent_wrapper(): centrality.local_segment_centrality( node_data, edge_data, node_edge_map, distances, betas, ('segment_harmonic', 'segment_betweenness'), angular=False, progress_proxy=None) # prime the function segment_cent_wrapper() iters = 20000 # time and report - roughly 9.36s on 4.2GHz i7 func_time = timeit.timeit(segment_cent_wrapper, number=iters) print(f'segment_cent_wrapper: {func_time} for {iters} iterations') assert func_time < 13
def test_local_centrality(diamond_graph): """ manual checks for all methods against diamond graph measures_data is multidimensional in the form of measure_keys x distances x nodes """ # generate node and edge maps node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX( diamond_graph) # generate dual diamond_graph_dual = graphs.nX_to_dual(diamond_graph) node_uids_dual, node_data_dual, edge_data_dual, node_edge_map_dual = graphs.graph_maps_from_nX( diamond_graph_dual) # setup distances and betas distances = np.array([50, 150, 250]) betas = networks.beta_from_distance(distances) # NODE SHORTEST # set the keys - add shuffling to be sure various orders work node_keys = [ 'node_density', 'node_farness', 'node_cycles', 'node_harmonic', 'node_beta', 'node_betweenness', 'node_betweenness_beta' ] np.random.shuffle(node_keys) # in place measure_keys = tuple(node_keys) measures_data = centrality.local_node_centrality(node_data, edge_data, node_edge_map, distances, betas, measure_keys) # node density # additive nodes m_idx = node_keys.index('node_density') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [2, 3, 3, 2], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [3, 3, 3, 3], atol=0.001, rtol=0) # node farness # additive distances m_idx = node_keys.index('node_farness') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [200, 300, 300, 200], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [400, 300, 300, 400], atol=0.001, rtol=0) # node cycles # additive cycles m_idx = node_keys.index('node_cycles') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [1, 2, 2, 1], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [2, 2, 2, 2], atol=0.001, rtol=0) # node harmonic # additive 1 / distances m_idx = node_keys.index('node_harmonic') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [0.02, 0.03, 0.03, 0.02], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [0.025, 0.03, 0.03, 0.025], atol=0.001, rtol=0) # node beta # additive exp(-beta * dist) m_idx = node_keys.index('node_beta') # beta = 0.0 assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) # beta = 0.02666667 assert np.allclose(measures_data[m_idx][1], [0.1389669, 0.20845035, 0.20845035, 0.1389669], atol=0.001, rtol=0) # beta = 0.016 assert np.allclose(measures_data[m_idx][2], [0.44455525, 0.6056895, 0.6056895, 0.44455522], atol=0.001, rtol=0) # node betweenness # additive 1 per node en route m_idx = node_keys.index('node_betweenness') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [0, 0, 0, 0], atol=0.001, rtol=0) # takes first out of multiple equidistant routes assert np.allclose(measures_data[m_idx][2], [0, 1, 0, 0], atol=0.001, rtol=0) # node betweenness beta # additive exp(-beta * dist) en route m_idx = node_keys.index('node_betweenness_beta') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) # beta = 0.08 assert np.allclose(measures_data[m_idx][1], [0, 0, 0, 0], atol=0.001, rtol=0) # beta = 0.02666667 # takes first out of multiple equidistant routes # beta evaluated over 200m distance from 3 to 0 via node 1 assert np.allclose(measures_data[m_idx][2], [0, 0.0407622, 0, 0]) # beta = 0.016 # NODE SIMPLEST node_keys_angular = ['node_harmonic_angular', 'node_betweenness_angular'] np.random.shuffle(node_keys_angular) # in place measure_keys = tuple(node_keys_angular) measures_data = centrality.local_node_centrality(node_data, edge_data, node_edge_map, distances, betas, measure_keys, angular=True) # node harmonic angular # additive 1 / (1 + (to_imp / 180)) m_idx = node_keys_angular.index('node_harmonic_angular') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [2, 3, 3, 2], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [2.75, 3, 3, 2.75], atol=0.001, rtol=0) # node betweenness angular # additive 1 per node en simplest route m_idx = node_keys_angular.index('node_betweenness_angular') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [0, 1, 0, 0], atol=0.001, rtol=0) # NODE SIMPLEST ON DUAL node_keys_angular = ['node_harmonic_angular', 'node_betweenness_angular'] np.random.shuffle(node_keys_angular) # in place measure_keys = tuple(node_keys_angular) measures_data = centrality.local_node_centrality(node_data_dual, edge_data_dual, node_edge_map_dual, distances, betas, measure_keys, angular=True) # node_uids_dual = ('0_1', '0_2', '1_2', '1_3', '2_3') # node harmonic angular # additive 1 / (1 + (to_imp / 180)) m_idx = node_keys_angular.index('node_harmonic_angular') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [1.95, 1.95, 2.4, 1.95, 1.95], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [2.45, 2.45, 2.4, 2.45, 2.45], atol=0.001, rtol=0) # node betweenness angular # additive 1 per node en simplest route m_idx = node_keys_angular.index('node_betweenness_angular') assert np.allclose(measures_data[m_idx][0], [0, 0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [0, 0, 0, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [0, 0, 0, 1, 1], atol=0.001, rtol=0) # SEGMENT SHORTEST segment_keys = [ 'segment_density', 'segment_harmonic', 'segment_beta', 'segment_betweenness' ] np.random.shuffle(segment_keys) # in place measure_keys = tuple(segment_keys) measures_data = centrality.local_segment_centrality(node_data, edge_data, node_edge_map, distances, betas, measure_keys, angular=False) # segment density # additive segment lengths m_idx = segment_keys.index('segment_density') assert np.allclose(measures_data[m_idx][0], [100, 150, 150, 100], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [400, 500, 500, 400], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [500, 500, 500, 500], atol=0.001, rtol=0) # segment harmonic # segments are potentially approached from two directions # i.e. along respective shortest paths to intersection of shortest routes # i.e. in this case, the midpoint of the middle segment is apportioned in either direction # additive log(b) - log(a) + log(d) - log(c) # nearer distance capped at 1m to avert negative numbers m_idx = segment_keys.index('segment_harmonic') assert np.allclose(measures_data[m_idx][0], [7.824046, 11.736069, 11.736069, 7.824046], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [10.832201, 15.437371, 15.437371, 10.832201], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [11.407564, 15.437371, 15.437371, 11.407565], atol=0.001, rtol=0) # segment beta # additive (np.exp(-beta * b) - np.exp(-beta * a)) / -beta + (np.exp(-beta * d) - np.exp(-beta * c)) / -beta # beta = 0 resolves to b - a and avoids division through zero m_idx = segment_keys.index('segment_beta') assert np.allclose(measures_data[m_idx][0], [24.542109, 36.813164, 36.813164, 24.542109], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [77.46391, 112.358284, 112.358284, 77.46391], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [133.80205, 177.43903, 177.43904, 133.80205], atol=0.001, rtol=0) # segment betweenness # similar formulation to segment beta: start and end segment of each betweenness pair assigned to intervening nodes # distance thresholds are computed using the inside edges of the segments # so if the segments are touching, they will count up to the threshold distance... m_idx = segment_keys.index('segment_betweenness') assert np.allclose(measures_data[m_idx][0], [0, 24.542109, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [0, 69.78874, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [0, 99.76293, 0, 0], atol=0.001, rtol=0) # SEGMENT SIMPLEST ON PRIMAL!!! ( NO DOUBLE COUNTING ) segment_keys_angular = [ 'segment_harmonic_hybrid', 'segment_betweeness_hybrid' ] np.random.shuffle(segment_keys_angular) # in place measure_keys = tuple(segment_keys_angular) measures_data = centrality.local_segment_centrality(node_data, edge_data, node_edge_map, distances, betas, measure_keys, angular=True) # segment density # additive segment lengths divided through angular impedance # (f - e) / (1 + (ang / 180)) m_idx = segment_keys_angular.index('segment_harmonic_hybrid') assert np.allclose(measures_data[m_idx][0], [100, 150, 150, 100], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [305, 360, 360, 305], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [410, 420, 420, 410], atol=0.001, rtol=0) # segment harmonic # additive segment lengths / (1 + (ang / 180)) m_idx = segment_keys_angular.index('segment_betweeness_hybrid') assert np.allclose(measures_data[m_idx][0], [0, 75, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][1], [0, 150, 0, 0], atol=0.001, rtol=0) assert np.allclose(measures_data[m_idx][2], [0, 150, 0, 0], atol=0.001, rtol=0)