Beispiel #1
0
def test_check_data_map():
    G = mock.mock_graph()
    G = graphs.nX_simple_geoms(G)
    N = networks.Network_Layer_From_nX(G, distances=[500])
    data_dict = mock.mock_data_dict(G)
    data_uids, data_map = layers.data_map_from_dict(data_dict)

    # should throw error if not assigned
    with pytest.raises(ValueError):
        checks.check_data_map(data_map)

    # should work if flag set to False
    checks.check_data_map(data_map, check_assigned=False)

    # assign then check that it runs as intended
    data_map = data.assign_to_network(data_map,
                                      N._node_data,
                                      N._edge_data,
                                      N._node_edge_map,
                                      max_dist=400)
    checks.check_data_map(data_map)

    # catch zero length data arrays
    empty_2d_arr = np.full((0, 4), np.nan)
    with pytest.raises(ValueError):
        checks.check_data_map(empty_2d_arr)

    # catch invalid dimensionality
    with pytest.raises(ValueError):
        checks.check_data_map(data_map[:, :-1])
def test_data_map_from_dict(primal_graph):
    # generate mock data
    data_dict = mock.mock_data_dict(primal_graph)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    assert len(data_uids) == len(data_map) == len(data_dict)
    for d_label, d in zip(data_uids, data_map):
        assert d[0] == data_dict[d_label]['x']
        assert d[1] == data_dict[d_label]['y']
        assert np.isnan(d[2])
        assert np.isnan(d[3])
    # check that missing attributes throw errors
    for attr in ['x', 'y']:
        for k in data_dict.keys():
            del data_dict[k][attr]
        with pytest.raises(AttributeError):
            layers.data_map_from_dict(data_dict)
def test_Data_Layer_From_Dict(primal_graph):
    data_dict = mock.mock_data_dict(primal_graph)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    x_arr = data_map[:, 0]
    y_arr = data_map[:, 1]
    # test against DataLayerFromDict's internal process
    D = layers.DataLayerFromDict(data_dict)
    assert D.uids == data_uids
    assert np.allclose(D._data, data_map, equal_nan=True)
    assert np.allclose(D.data_x_arr, x_arr, atol=0.001, rtol=0)
    assert np.allclose(D.data_y_arr, y_arr, atol=0.001, rtol=0)
Beispiel #4
0
def test_Data_Layer():
    G = mock.mock_graph()
    data_dict = mock.mock_data_dict(G)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    x_arr = data_map[:, 0]
    y_arr = data_map[:, 1]

    # test against Data_Layer internal process
    D = layers.Data_Layer(data_uids, data_map)
    assert D.uids == data_uids
    assert np.allclose(D._data, data_map, equal_nan=True, atol=0.001, rtol=0)
    assert np.allclose(D.x_arr, x_arr, atol=0.001, rtol=0)
    assert np.allclose(D.y_arr, y_arr, atol=0.001, rtol=0)
Beispiel #5
0
async def accessibility_calc(db_config,
                             nodes_table,
                             links_table,
                             city_pop_id,
                             distances,
                             boundary_table='analysis.city_boundaries_150',
                             data_table='os.poi',
                             data_where=None,
                             rdm_flag=False,
                             dual_flag=False):
    if dual_flag or rdm_flag:
        if city_pop_id > 1:
            logger.warning(
                'Only do dual or randomised metrics for city_pop_id = 1')
            return

    if dual_flag:
        nodes_table += '_dual'
        links_table += '_dual'

    if rdm_flag:
        data_table += '_randomised'

    logger.info(
        f'Starting LU calcs for city id: {city_pop_id} on network table '
        f'{nodes_table} and data table {data_table}')
    logger.info(f'Loading network data')
    G = await postGIS_to_networkX(db_config, nodes_table, links_table,
                                  city_pop_id)
    N = networks.NetworkLayerFromNX(G, distances)
    logger.info(f'Loading POI data from data table: {data_table}')
    data_dict = await postGIS_to_landuses_dict(db_config,
                                               data_table,
                                               'urn',
                                               'class_code',
                                               boundary_table,
                                               city_pop_id,
                                               max_dist=max(distances),
                                               data_where=data_where)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    # derive the landuse labels, classes, encodings
    landuse_labels = [v['class'] for v in data_dict.values()]
    landuse_classes, landuse_encodings = layers.encode_categorical(
        landuse_labels)
    logger.info(f'Generating disparity weights matrix')
    cl_disparity_wt_matrix = disparity_wt_matrix(landuse_classes)
    logger.info('Creating data layer')
    D = layers.DataLayer(data_uids, data_map)

    start = time.localtime()
    logger.info('Assigning data points to the network')
    D.assign_to_network(N, max_dist=400)

    # generate the accessibility codes Class
    # this deduces codes and squashes results into categories
    logger.info('Generating POI accessibility codes')
    Acc_codes = Accessibility_Codes(landuse_classes,
                                    len(N.uids),
                                    distances,
                                    compact=(dual_flag or rdm_flag))

    mixed_use_metrics = [
        'hill', 'hill_branch_wt', 'hill_pairwise_wt',
        'hill_pairwise_disparity', 'shannon', 'gini_simpson',
        'raos_pairwise_disparity'
    ]
    # if dual or rdm only do first two
    if dual_flag or rdm_flag:
        mixed_use_metrics = mixed_use_metrics[:2]
        cl_disparity_wt_matrix = None
    # compute
    logger.info('Computing landuses')
    D.compute_aggregated(landuse_labels=landuse_labels,
                         mixed_use_keys=mixed_use_metrics,
                         accessibility_keys=Acc_codes.all_codes,
                         cl_disparity_wt_matrix=cl_disparity_wt_matrix,
                         qs=[0, 1, 2])
    time_duration = datetime.timedelta(seconds=time.mktime(time.localtime()) -
                                       time.mktime(start))
    logger.info(f'Algo duration: {time_duration}')

    # squash the accessibility data
    logger.info('Squashing accessibility data')
    Acc_codes.set_metrics(N.metrics['accessibility'])

    mu_q_keys = [
        'hill', 'hill_branch_wt', 'hill_pairwise_wt', 'hill_pairwise_disparity'
    ]
    if dual_flag or rdm_flag:
        mu_q_keys = mu_q_keys[:2]

    mu_keys = ['shannon', 'gini_simpson', 'raos_pairwise_disparity']
    if dual_flag or rdm_flag:
        mu_keys = []

    if not dual_flag and not rdm_flag:
        ac_keys = [
            'accommodation', 'eating', 'drinking', 'commercial', 'tourism',
            'entertainment', 'government', 'manufacturing', 'retail_food',
            'retail_other', 'transport', 'health', 'education', 'parks',
            'cultural', 'sports', 'total'
        ]
    else:
        ac_keys = [
            'eating', 'drinking', 'commercial', 'retail_food', 'retail_other',
            'transport', 'total'
        ]

    # aggregate the data
    logger.info('Aggregating results')
    bulk_data = []
    for idx, uid in enumerate(N.uids):
        # first check that this is a live node (i.e. within the original city boundary)
        if not N.live[idx]:
            continue
        node_data = [uid]
        # mixed-use keys requiring q values
        for mu_key in mu_q_keys:
            for q_key, q_val in N.metrics['mixed_uses'][mu_key].items():
                inner_data = []
                for d_key, d_val in q_val.items():
                    inner_data.append(d_val[idx])
                node_data.append(inner_data)
        # mixed-use keys not requiring q values
        for mu_key in mu_keys:
            inner_data = []
            for d_key, d_val in N.metrics['mixed_uses'][mu_key].items():
                inner_data.append(d_val[idx])
            node_data.append(inner_data)
        # accessibility keys
        for ac_key in ac_keys:
            inner_data = []
            for d_key, d_val in Acc_codes.metrics['weighted'][ac_key].items():
                inner_data.append(d_val[idx])
            node_data.append(inner_data)
            # also write non-weighted variants of the following
            if ac_key in [
                    'eating', 'commercial', 'retail_food', 'retail_other',
                    'total'
            ]:
                inner_data = []
                for d_key, d_val in Acc_codes.metrics['non_weighted'][
                        ac_key].items():
                    inner_data.append(d_val[idx])
                node_data.append(inner_data)
        bulk_data.append(tuple(node_data))

    logger.info('Writing results to database')
    db_con = await asyncpg.connect(**db_config)
    if not dual_flag and not rdm_flag:
        measure_cols = [
            'mu_hill_0', 'mu_hill_1', 'mu_hill_2', 'mu_hill_branch_wt_0',
            'mu_hill_branch_wt_1', 'mu_hill_branch_wt_2',
            'mu_hill_pairwise_wt_0', 'mu_hill_pairwise_wt_1',
            'mu_hill_pairwise_wt_2', 'mu_hill_dispar_wt_0',
            'mu_hill_dispar_wt_1', 'mu_hill_dispar_wt_2', 'mu_shannon',
            'mu_gini', 'mu_raos', 'ac_accommodation', 'ac_eating',
            'ac_eating_nw', 'ac_drinking', 'ac_commercial', 'ac_commercial_nw',
            'ac_tourism', 'ac_entertainment', 'ac_government',
            'ac_manufacturing', 'ac_retail_food', 'ac_retail_food_nw',
            'ac_retail_other', 'ac_retail_other_nw', 'ac_transport',
            'ac_health', 'ac_education', 'ac_parks', 'ac_cultural',
            'ac_sports', 'ac_total', 'ac_total_nw'
        ]
    else:
        measure_cols = [
            'mu_hill_0', 'mu_hill_1', 'mu_hill_2', 'mu_hill_branch_wt_0',
            'mu_hill_branch_wt_1', 'mu_hill_branch_wt_2', 'ac_eating',
            'ac_eating_nw', 'ac_drinking', 'ac_commercial', 'ac_commercial_nw',
            'ac_retail_food', 'ac_retail_food_nw', 'ac_retail_other',
            'ac_retail_other_nw', 'ac_transport', 'ac_total', 'ac_total_nw'
        ]
    # add the _rdm extension if necessary
    if rdm_flag:
        measure_cols = [m + '_rdm' for m in measure_cols]
    # create the columns
    col_strings = []
    counter = 2
    for measure_col in measure_cols:
        await db_con.execute(f'''
        ALTER TABLE {nodes_table}
            ADD COLUMN IF NOT EXISTS {measure_col} real[];
        ''')
        col_strings.append(f'{measure_col} = ${counter}')
        counter += 1
    await db_con.executemany(
        f'UPDATE {nodes_table} SET ' + ', '.join(col_strings) +
        ' WHERE id = $1;', bulk_data)
    await db_con.close()
Beispiel #6
0
def test_local_agg_time(primal_graph):
    """
    Timing tests for landuse and stats aggregations
    """
    if 'GITHUB_ACTIONS' in os.environ:
        return
    os.environ['CITYSEER_QUIET_MODE'] = '1'

    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph)
    # setup data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)
    # needs a large enough beta so that distance thresholds aren't encountered
    distances = np.array([np.inf])
    betas = networks.beta_from_distance(distances)
    qs = np.array([0, 1, 2])
    mock_categorical = mock.mock_categorical_data(len(data_map))
    landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical)
    mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0)

    def assign_wrapper():
        data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)

    # prime the function
    assign_wrapper()
    iters = 20000
    # time and report - roughly 5.675
    func_time = timeit.timeit(assign_wrapper, number=iters)
    print(f'node_cent_wrapper: {func_time} for {iters} iterations')
    assert func_time < 10

    def landuse_agg_wrapper():
        mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data,
                                                                                   edge_data,
                                                                                   node_edge_map,
                                                                                   data_map,
                                                                                   distances,
                                                                                   betas,
                                                                                   mixed_use_hill_keys=np.array([0, 1]),
                                                                                   landuse_encodings=landuse_encodings,
                                                                                   qs=qs,
                                                                                   angular=False)

    # prime the function
    landuse_agg_wrapper()
    iters = 20000
    # time and report - roughly 10.10
    func_time = timeit.timeit(landuse_agg_wrapper, number=iters)
    print(f'node_cent_wrapper: {func_time} for {iters} iterations')
    assert func_time < 15

    def stats_agg_wrapper():
        # compute
        data.aggregate_stats(node_data,
                             edge_data,
                             node_edge_map,
                             data_map,
                             distances,
                             betas,
                             numerical_arrays=mock_numerical,
                             angular=False)

    # prime the function
    stats_agg_wrapper()
    iters = 20000
    # time and report - roughly 4.96
    func_time = timeit.timeit(stats_agg_wrapper, number=iters)
    print(f'segment_cent_wrapper: {func_time} for {iters} iterations')
    assert func_time < 10
Beispiel #7
0
def test_local_aggregator_numerical_components(primal_graph):
    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph)
    # setup data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)
    # for debugging
    # from cityseer.tools import plot
    # plot.plot_graph_maps(node_uids, node_data, edge_data, data_map)
    # set parameters - use a large enough distance such that simple non-weighted checks can be run for max, mean, variance
    betas = np.array([0.00125])
    distances = networks.distance_from_beta(betas)
    mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0)
    # compute
    stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \
        data.aggregate_stats(node_data,
                             edge_data,
                             node_edge_map,
                             data_map,
                             distances,
                             betas,
                             numerical_arrays=mock_numerical,
                             angular=False)
    # non connected portions of the graph will have different stats
    # used manual data plots from test_assign_to_network() to see which nodes the data points are assigned to
    # connected graph is from 0 to 48 -> assigned data points are all except 5, 8, 17, 33, 48
    connected_nodes_idx = list(range(49))
    # and the respective data assigned to connected portion of the graph
    connected_data_idx = [i for i in range(len(data_dict)) if i not in [5, 8, 9, 17, 18, 29, 33, 38, 48]]
    # isolated node = 49 -> assigned no data points
    # isolated nodes = 50 & 51 -> assigned data points = 17, 33
    # isolated loop = 52, 53, 54, 55 -> assigned data points = 5, 8, 9, 18, 29, 38, 48
    isolated_nodes_idx = [52, 53, 54, 55]
    isolated_data_idx = [5, 8, 9, 18, 29, 38, 48]
    for stats_idx in range(len(mock_numerical)):
        for d_idx in range(len(distances)):
            # max
            assert np.isnan(stats_max[stats_idx, d_idx, 49])
            assert np.allclose(stats_max[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].max(),
                               atol=0.001, rtol=0)
            assert np.allclose(stats_max[stats_idx, d_idx, isolated_nodes_idx],
                               mock_numerical[stats_idx, isolated_data_idx].max(), atol=0.001, rtol=0)
            assert np.allclose(stats_max[stats_idx, d_idx, connected_nodes_idx],
                               mock_numerical[stats_idx, connected_data_idx].max(), atol=0.001, rtol=0)
            # min
            assert np.isnan(stats_min[stats_idx, d_idx, 49])
            assert np.allclose(stats_min[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].min(),
                               atol=0.001, rtol=0)
            assert np.allclose(stats_min[stats_idx, d_idx, isolated_nodes_idx],
                               mock_numerical[stats_idx, isolated_data_idx].min(), atol=0.001, rtol=0)
            assert np.allclose(stats_min[stats_idx, d_idx, connected_nodes_idx],
                               mock_numerical[stats_idx, connected_data_idx].min(), atol=0.001, rtol=0)
            # sum
            assert stats_sum[stats_idx, d_idx, 49] == 0
            assert np.allclose(stats_sum[stats_idx, d_idx, [50, 51]],
                               mock_numerical[stats_idx, [17, 33]].sum(), atol=0.001, rtol=0)
            assert np.allclose(stats_sum[stats_idx, d_idx, isolated_nodes_idx],
                               mock_numerical[stats_idx, isolated_data_idx].sum(), atol=0.001, rtol=0)
            assert np.allclose(stats_sum[stats_idx, d_idx, connected_nodes_idx],
                               mock_numerical[stats_idx, connected_data_idx].sum(), atol=0.001, rtol=0)
            # mean
            assert np.isnan(stats_mean[stats_idx, d_idx, 49])
            assert np.allclose(stats_mean[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].mean(),
                               atol=0.001, rtol=0)
            assert np.allclose(stats_mean[stats_idx, d_idx, isolated_nodes_idx],
                               mock_numerical[stats_idx, isolated_data_idx].mean(), atol=0.001, rtol=0)
            assert np.allclose(stats_mean[stats_idx, d_idx, connected_nodes_idx],
                               mock_numerical[stats_idx, connected_data_idx].mean(), atol=0.001, rtol=0)
            # variance
            assert np.isnan(stats_variance[stats_idx, d_idx, 49])
            assert np.allclose(stats_variance[stats_idx, d_idx, [50, 51]], mock_numerical[stats_idx, [17, 33]].var(),
                               atol=0.001, rtol=0)
            assert np.allclose(stats_variance[stats_idx, d_idx, isolated_nodes_idx],
                               mock_numerical[stats_idx, isolated_data_idx].var(), atol=0.001, rtol=0)
            assert np.allclose(stats_variance[stats_idx, d_idx, connected_nodes_idx],
                               mock_numerical[stats_idx, connected_data_idx].var(), atol=0.001, rtol=0)
Beispiel #8
0
def test_assign_to_network(primal_graph):
    # create additional dead-end scenario
    primal_graph.remove_edge(14, 15)
    primal_graph.remove_edge(15, 28)
    # G = graphs.nX_auto_edge_params(G)
    G = graphs.nX_decompose(primal_graph, 50)
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(G)
    # generate data
    data_dict = mock.mock_data_dict(G, random_seed=25)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    # override data point locations for test cases vis-a-vis isolated nodes and isolated edges
    data_map[18, :2] = [701200, 5719400]
    data_map[39, :2] = [700750, 5720025]
    data_map[26, :2] = [700400, 5719525]
    # 500m visually confirmed in plots
    data_map_1600 = data_map.copy()
    data_map_1600 = data.assign_to_network(data_map_1600,
                                           node_data,
                                           edge_data,
                                           node_edge_map,
                                           max_dist=1600)
    targets = np.array([
        [0, 164, 163],
        [1, 42, 241],
        [2, 236, 235],
        [3, 48, 262],
        [4, 211, 212],
        [5, 236, 235],
        [6, 58, 57],
        [7, 72, 5],
        [8, 75, 76],
        [9, 92, 9],
        [10, 61, 62],
        [11, 96, 13],
        [12, 0, 59],
        [13, 98, 99],
        [14, 203, 202],
        [15, 121, 120],
        [16, 48, 262],
        [17, 2, 70],
        [18, 182, 183],
        [19, 158, 157],
        [20, 83, 84],
        [21, 2, np.nan],
        [22, 171, 170],
        [23, 266, 52],
        [24, 83, 84],
        [25, 88, 11],
        [26, 49, np.nan],
        [27, 19, 138],
        [28, 134, 135],
        [29, 262, 46],
        [30, 78, 9],
        [31, 188, 189],
        [32, 180, 181],
        [33, 95, 94],
        [34, 226, 225],
        [35, 110, 111],
        [36, 39, 228],
        [37, 158, 25],
        [38, 88, 87],
        [39, 263, np.nan],
        [40, 120, 121],
        [41, 146, 21],
        [42, 10, 97],
        [43, 119, 118],
        [44, 82, 5],
        [45, 11, 88],
        [46, 100, 99],
        [47, 138, 19],
        [48, 14, np.nan],
        [49, 106, 105]
    ])
    # for debugging
    # from cityseer.tools import plot
    # plot.plot_graph_maps(node_data, edge_data, data_map)
    # assignment map includes data x, data y, nearest assigned, next nearest assigned
    assert np.allclose(data_map_1600[:, 2:],
                       targets[:, 1:],
                       equal_nan=True,
                       atol=0,
                       rtol=0)
    # max distance of 0 should return all NaN
    data_map_test_0 = data_map.copy()
    data_map_test_0 = data.assign_to_network(data_map_test_0,
                                             node_data,
                                             edge_data,
                                             node_edge_map,
                                             max_dist=0)
    assert np.all(np.isnan(data_map_test_0[:, 2]))
    assert np.all(np.isnan(data_map_test_0[:, 3]))
    # max distance of 2000 should return no NaN for nearest
    # there will be some NaN for next nearest
    data_map_test_2000 = data_map.copy()
    data_map_test_2000 = data.assign_to_network(data_map_test_2000,
                                                node_data,
                                                edge_data,
                                                node_edge_map,
                                                max_dist=2000)
    assert not np.any(np.isnan(data_map_test_2000[:, 2]))
Beispiel #9
0
def test_aggregate_landuses_categorical_components(primal_graph):
    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph)
    # setup data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)
    # set parameters
    betas = np.array([0.02, 0.01, 0.005, 0.0025])
    distances = networks.distance_from_beta(betas)
    qs = np.array([0, 1, 2])
    mock_categorical = mock.mock_categorical_data(len(data_map))
    landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical)
    mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1)
    # set the keys - add shuffling to be sure various orders work
    hill_keys = np.arange(4)
    np.random.shuffle(hill_keys)
    non_hill_keys = np.arange(3)
    np.random.shuffle(non_hill_keys)
    ac_keys = np.array([1, 2, 5])
    np.random.shuffle(ac_keys)
    # generate
    mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data,
                                                                               edge_data,
                                                                               node_edge_map,
                                                                               data_map,
                                                                               distances,
                                                                               betas,
                                                                               landuse_encodings=landuse_encodings,
                                                                               qs=qs,
                                                                               mixed_use_hill_keys=hill_keys,
                                                                               mixed_use_other_keys=non_hill_keys,
                                                                               accessibility_keys=ac_keys,
                                                                               cl_disparity_wt_matrix=mock_matrix,
                                                                               angular=False)
    # hill
    hill = mu_data_hill[np.where(hill_keys == 0)][0]
    hill_branch_wt = mu_data_hill[np.where(hill_keys == 1)][0]
    hill_pw_wt = mu_data_hill[np.where(hill_keys == 2)][0]
    hill_disp_wt = mu_data_hill[np.where(hill_keys == 3)][0]
    # non hill
    shannon = mu_data_other[np.where(non_hill_keys == 0)][0]
    gini = mu_data_other[np.where(non_hill_keys == 1)][0]
    raos = mu_data_other[np.where(non_hill_keys == 2)][0]
    # access non-weighted
    ac_1_nw = ac_data[np.where(ac_keys == 1)][0]
    ac_2_nw = ac_data[np.where(ac_keys == 2)][0]
    ac_5_nw = ac_data[np.where(ac_keys == 5)][0]
    # access weighted
    ac_1_w = ac_data_wt[np.where(ac_keys == 1)][0]
    ac_2_w = ac_data_wt[np.where(ac_keys == 2)][0]
    ac_5_w = ac_data_wt[np.where(ac_keys == 5)][0]
    # test manual metrics against all nodes
    mu_max_unique = len(landuse_classes)
    # test against various distances
    for d_idx in range(len(distances)):
        dist_cutoff = distances[d_idx]
        beta = betas[d_idx]
        for src_idx in range(len(primal_graph)):
            reachable_data, reachable_data_dist, tree_preds = data.aggregate_to_src_idx(src_idx,
                                                                                        node_data,
                                                                                        edge_data,
                                                                                        node_edge_map,
                                                                                        data_map,
                                                                                        dist_cutoff)
            # counts of each class type (array length per max unique classes - not just those within max distance)
            cl_counts = np.full(mu_max_unique, 0)
            # nearest of each class type (likewise)
            cl_nearest = np.full(mu_max_unique, np.inf)
            # aggregate
            a_1_nw = 0
            a_2_nw = 0
            a_5_nw = 0
            a_1_w = 0
            a_2_w = 0
            a_5_w = 0
            # iterate reachable
            for data_idx, (reachable, data_dist) in enumerate(zip(reachable_data, reachable_data_dist)):
                if not reachable:
                    continue
                cl = landuse_encodings[data_idx]
                # double check distance is within threshold
                assert data_dist <= dist_cutoff
                # update the class counts
                cl_counts[cl] += 1
                # if distance is nearer, update the nearest distance array too
                if data_dist < cl_nearest[cl]:
                    cl_nearest[cl] = data_dist
                # aggregate accessibility codes
                if cl == 1:
                    a_1_nw += 1
                    a_1_w += np.exp(-beta * data_dist)
                elif cl == 2:
                    a_2_nw += 1
                    a_2_w += np.exp(-beta * data_dist)
                elif cl == 5:
                    a_5_nw += 1
                    a_5_w += np.exp(-beta * data_dist)
            # assertions
            assert ac_1_nw[d_idx, src_idx] == a_1_nw
            assert ac_2_nw[d_idx, src_idx] == a_2_nw
            assert ac_5_nw[d_idx, src_idx] == a_5_nw

            assert ac_1_w[d_idx, src_idx] == a_1_w
            assert ac_2_w[d_idx, src_idx] == a_2_w
            assert ac_5_w[d_idx, src_idx] == a_5_w

            assert hill[0, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 0)
            assert hill[1, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 1)
            assert hill[2, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 2)

            assert hill_branch_wt[0, d_idx, src_idx] == \
                   diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 0, beta)
            assert hill_branch_wt[1, d_idx, src_idx] == \
                   diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 1, beta)
            assert hill_branch_wt[2, d_idx, src_idx] == \
                   diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 2, beta)

            assert hill_pw_wt[0, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 0, beta)
            assert hill_pw_wt[1, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 1, beta)
            assert hill_pw_wt[2, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 2, beta)

            assert hill_disp_wt[0, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 0)
            assert hill_disp_wt[1, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 1)
            assert hill_disp_wt[2, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 2)

            assert shannon[d_idx, src_idx] == diversity.shannon_diversity(cl_counts)
            assert gini[d_idx, src_idx] == diversity.gini_simpson_diversity(cl_counts)
            assert raos[d_idx, src_idx] == diversity.raos_quadratic_diversity(cl_counts, mock_matrix)

    # check that angular is passed-through
    # actual angular tests happen in test_shortest_path_tree()
    # here the emphasis is simply on checking that the angular instruction gets chained through

    # setup dual data
    G_dual = graphs.nX_to_dual(primal_graph)
    node_labels_dual, node_data_dual, edge_data_dual, node_edge_map_dual = graphs.graph_maps_from_nX(G_dual)
    data_dict_dual = mock.mock_data_dict(G_dual, random_seed=13)
    data_uids_dual, data_map_dual = layers.data_map_from_dict(data_dict_dual)
    data_map_dual = data.assign_to_network(data_map_dual, node_data_dual, edge_data_dual, node_edge_map_dual, 500)
    mock_categorical = mock.mock_categorical_data(len(data_map_dual))
    landuse_classes_dual, landuse_encodings_dual = layers.encode_categorical(mock_categorical)
    mock_matrix = np.full((len(landuse_classes_dual), len(landuse_classes_dual)), 1)

    mu_hill_dual, mu_other_dual, ac_dual, ac_wt_dual = data.aggregate_landuses(node_data_dual,
                                                                               edge_data_dual,
                                                                               node_edge_map_dual,
                                                                               data_map_dual,
                                                                               distances,
                                                                               betas,
                                                                               landuse_encodings_dual,
                                                                               qs=qs,
                                                                               mixed_use_hill_keys=hill_keys,
                                                                               mixed_use_other_keys=non_hill_keys,
                                                                               accessibility_keys=ac_keys,
                                                                               cl_disparity_wt_matrix=mock_matrix,
                                                                               angular=True)

    mu_hill_dual_sidestep, mu_other_dual_sidestep, ac_dual_sidestep, ac_wt_dual_sidestep = \
        data.aggregate_landuses(node_data_dual,
                                edge_data_dual,
                                node_edge_map_dual,
                                data_map_dual,
                                distances,
                                betas,
                                landuse_encodings_dual,
                                qs=qs,
                                mixed_use_hill_keys=hill_keys,
                                mixed_use_other_keys=non_hill_keys,
                                accessibility_keys=ac_keys,
                                cl_disparity_wt_matrix=mock_matrix,
                                angular=False)

    assert not np.allclose(mu_hill_dual, mu_hill_dual_sidestep, atol=0.001, rtol=0)
    assert not np.allclose(mu_other_dual, mu_other_dual_sidestep, atol=0.001, rtol=0)
    assert not np.allclose(ac_dual, ac_dual_sidestep, atol=0.001, rtol=0)
    assert not np.allclose(ac_wt_dual, ac_wt_dual_sidestep, atol=0.001, rtol=0)
Beispiel #10
0
def test_aggregate_landuses_signatures(primal_graph):
    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph)
    # setup data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)
    # set parameters
    betas = np.array([0.02, 0.01, 0.005, 0.0025])
    distances = networks.distance_from_beta(betas)
    qs = np.array([0, 1, 2])
    mock_categorical = mock.mock_categorical_data(len(data_map))
    landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical)
    # check that empty land_use encodings are caught
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                mixed_use_hill_keys=np.array([0]))
    # check that unequal land_use encodings vs data map lengths are caught
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                landuse_encodings=landuse_encodings[:-1],
                                mixed_use_other_keys=np.array([0]))
    # check that no provided metrics flags
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                landuse_encodings=landuse_encodings)
    # check that missing qs flags
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                mixed_use_hill_keys=np.array([0]),
                                landuse_encodings=landuse_encodings)
    # check that problematic mixed use and accessibility keys are caught
    for mu_h_key, mu_o_key, ac_key in [
        # negatives
        ([-1], [1], [1]),
        ([1], [-1], [1]),
        ([1], [1], [-1]),
        # out of range
        ([4], [1], [1]),
        ([1], [3], [1]),
        ([1], [1], [max(landuse_encodings) + 1]),
        # duplicates
        ([1, 1], [1], [1]),
        ([1], [1, 1], [1]),
        ([1], [1], [1, 1])]:
        with pytest.raises(ValueError):
            data.aggregate_landuses(node_data,
                                    edge_data,
                                    node_edge_map,
                                    data_map,
                                    distances,
                                    betas,
                                    landuse_encodings,
                                    qs=qs,
                                    mixed_use_hill_keys=np.array(mu_h_key),
                                    mixed_use_other_keys=np.array(mu_o_key),
                                    accessibility_keys=np.array(ac_key))
    for h_key, o_key in (([3], []), ([], [2])):
        # check that missing matrix is caught for disparity weighted indices
        with pytest.raises(ValueError):
            data.aggregate_landuses(node_data,
                                    edge_data,
                                    node_edge_map,
                                    data_map,
                                    distances,
                                    betas,
                                    landuse_encodings=landuse_encodings,
                                    qs=qs,
                                    mixed_use_hill_keys=np.array(h_key),
                                    mixed_use_other_keys=np.array(o_key))
        # check that non-square disparity matrix is caught
        mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1)
        with pytest.raises(ValueError):
            data.aggregate_landuses(node_data,
                                    edge_data,
                                    node_edge_map,
                                    data_map,
                                    distances,
                                    betas,
                                    landuse_encodings=landuse_encodings,
                                    qs=qs,
                                    mixed_use_hill_keys=np.array(h_key),
                                    mixed_use_other_keys=np.array(o_key),
                                    cl_disparity_wt_matrix=mock_matrix[:-1])
Beispiel #11
0
def test_aggregate_to_src_idx(primal_graph):
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph)
    # generate data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    for max_dist in [400, 750]:
        # in this case, use same assignment max dist as search max dist
        data_map_temp = data_map.copy()
        data_map_temp = data.assign_to_network(data_map_temp,
                                               node_data,
                                               edge_data,
                                               node_edge_map,
                                               max_dist=max_dist)
        for angular in [True, False]:
            for netw_src_idx in range(len(node_data)):
                # aggregate to src...
                reachable_data, reachable_data_dist, tree_preds = data.aggregate_to_src_idx(netw_src_idx,
                                                                                            node_data,
                                                                                            edge_data,
                                                                                            node_edge_map,
                                                                                            data_map_temp,
                                                                                            max_dist,
                                                                                            angular=angular)
                # for debugging
                # from cityseer.tools import plot
                # plot.plot_graph_maps(node_uids, node_data, edge_data, data_map)
                # compare to manual checks on distances:
                netw_x_arr = node_data[:, 0]
                netw_y_arr = node_data[:, 1]
                data_x_arr = data_map_temp[:, 0]
                data_y_arr = data_map_temp[:, 1]
                # get the network distances
                tree_map, tree_edges = centrality.shortest_path_tree(edge_data,
                                                                     node_edge_map,
                                                                     netw_src_idx,
                                                                     max_dist=max_dist,
                                                                     angular=angular)
                tree_dists = tree_map[:, 2]
                # verify distances vs. the max
                for d_idx in range(len(data_map_temp)):
                    # check the integrity of the distances and classes
                    reachable = reachable_data[d_idx]
                    reachable_dist = reachable_data_dist[d_idx]
                    # get the distance via the nearest assigned index
                    nearest_dist = np.inf
                    # if a nearest node has been assigned
                    if np.isfinite(data_map_temp[d_idx, 2]):
                        # get the index for the assigned network node
                        netw_idx = int(data_map_temp[d_idx, 2])
                        # if this node is within the cutoff distance:
                        if tree_dists[netw_idx] < max_dist:
                            # get the distances from the data point to the assigned network node
                            d_d = np.hypot(data_x_arr[d_idx] - netw_x_arr[netw_idx],
                                           data_y_arr[d_idx] - netw_y_arr[netw_idx])
                            # and add it to the network distance path from the source to the assigned node
                            n_d = tree_dists[netw_idx]
                            nearest_dist = d_d + n_d
                    # also get the distance via the next nearest assigned index
                    next_nearest_dist = np.inf
                    # if a nearest node has been assigned
                    if np.isfinite(data_map_temp[d_idx, 3]):
                        # get the index for the assigned network node
                        netw_idx = int(data_map_temp[d_idx, 3])
                        # if this node is within the radial cutoff distance:
                        if tree_dists[netw_idx] < max_dist:
                            # get the distances from the data point to the assigned network node
                            d_d = np.hypot(data_x_arr[d_idx] - netw_x_arr[netw_idx],
                                           data_y_arr[d_idx] - netw_y_arr[netw_idx])
                            # and add it to the network distance path from the source to the assigned node
                            n_d = tree_dists[netw_idx]
                            next_nearest_dist = d_d + n_d
                    # now check distance integrity
                    if np.isinf(reachable_dist):
                        assert not reachable
                        assert nearest_dist > max_dist and next_nearest_dist > max_dist
                    else:
                        assert reachable
                        assert reachable_dist <= max_dist
                        if nearest_dist < next_nearest_dist:
                            assert reachable_dist == nearest_dist
                        else:
                            assert reachable_dist == next_nearest_dist