Python encode_categorical 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: cityseer.metrics.layers

메소드/함수: encode_categorical

hotexamples.com에서의 예제들: 9

Python encode_categorical - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 cityseer.metrics.layers.encode_categorical에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_layers.py 프로젝트: benchmark-urbanism/cityseer-api

def test_encode_categorical():
    # generate mock data
    mock_categorical = mock.mock_categorical_data(50)
    classes, class_encodings = layers.encode_categorical(mock_categorical)
    for cl in classes:
        assert cl in mock_categorical
    for idx, label in enumerate(mock_categorical):
        assert label in classes
        assert classes.index(label) == class_encodings[idx]

예제 #2

파일 보기

파일: test_checks.py 프로젝트: KalipheGTU/cityseer

def test_check_categorical_data():
    mock_categorical = mock.mock_categorical_data(50)
    data_classes, data_encoding = layers.encode_categorical(mock_categorical)

    # check for malformed data
    # negatives
    with pytest.raises(ValueError):
        data_encoding[0] = -1
        checks.check_categorical_data(data_encoding)
    # NaN
    with pytest.raises(ValueError):
        data_encoding[0] = np.nan
        checks.check_categorical_data(data_encoding)
    # floats
    with pytest.raises(ValueError):
        data_encoding_float = np.full(len(data_encoding), np.nan)
        data_encoding_float[:] = data_encoding[:].astype(np.float)
        data_encoding_float[0] = 1.2345
        checks.check_categorical_data(data_encoding_float)

예제 #3

파일 보기

async def accessibility_calc(db_config,
                             nodes_table,
                             links_table,
                             city_pop_id,
                             distances,
                             boundary_table='analysis.city_boundaries_150',
                             data_table='os.poi',
                             data_where=None,
                             rdm_flag=False,
                             dual_flag=False):
    if dual_flag or rdm_flag:
        if city_pop_id > 1:
            logger.warning(
                'Only do dual or randomised metrics for city_pop_id = 1')
            return

    if dual_flag:
        nodes_table += '_dual'
        links_table += '_dual'

    if rdm_flag:
        data_table += '_randomised'

    logger.info(
        f'Starting LU calcs for city id: {city_pop_id} on network table '
        f'{nodes_table} and data table {data_table}')
    logger.info(f'Loading network data')
    G = await postGIS_to_networkX(db_config, nodes_table, links_table,
                                  city_pop_id)
    N = networks.NetworkLayerFromNX(G, distances)
    logger.info(f'Loading POI data from data table: {data_table}')
    data_dict = await postGIS_to_landuses_dict(db_config,
                                               data_table,
                                               'urn',
                                               'class_code',
                                               boundary_table,
                                               city_pop_id,
                                               max_dist=max(distances),
                                               data_where=data_where)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    # derive the landuse labels, classes, encodings
    landuse_labels = [v['class'] for v in data_dict.values()]
    landuse_classes, landuse_encodings = layers.encode_categorical(
        landuse_labels)
    logger.info(f'Generating disparity weights matrix')
    cl_disparity_wt_matrix = disparity_wt_matrix(landuse_classes)
    logger.info('Creating data layer')
    D = layers.DataLayer(data_uids, data_map)

    start = time.localtime()
    logger.info('Assigning data points to the network')
    D.assign_to_network(N, max_dist=400)

    # generate the accessibility codes Class
    # this deduces codes and squashes results into categories
    logger.info('Generating POI accessibility codes')
    Acc_codes = Accessibility_Codes(landuse_classes,
                                    len(N.uids),
                                    distances,
                                    compact=(dual_flag or rdm_flag))

    mixed_use_metrics = [
        'hill', 'hill_branch_wt', 'hill_pairwise_wt',
        'hill_pairwise_disparity', 'shannon', 'gini_simpson',
        'raos_pairwise_disparity'
    ]
    # if dual or rdm only do first two
    if dual_flag or rdm_flag:
        mixed_use_metrics = mixed_use_metrics[:2]
        cl_disparity_wt_matrix = None
    # compute
    logger.info('Computing landuses')
    D.compute_aggregated(landuse_labels=landuse_labels,
                         mixed_use_keys=mixed_use_metrics,
                         accessibility_keys=Acc_codes.all_codes,
                         cl_disparity_wt_matrix=cl_disparity_wt_matrix,
                         qs=[0, 1, 2])
    time_duration = datetime.timedelta(seconds=time.mktime(time.localtime()) -
                                       time.mktime(start))
    logger.info(f'Algo duration: {time_duration}')

    # squash the accessibility data
    logger.info('Squashing accessibility data')
    Acc_codes.set_metrics(N.metrics['accessibility'])

    mu_q_keys = [
        'hill', 'hill_branch_wt', 'hill_pairwise_wt', 'hill_pairwise_disparity'
    ]
    if dual_flag or rdm_flag:
        mu_q_keys = mu_q_keys[:2]

    mu_keys = ['shannon', 'gini_simpson', 'raos_pairwise_disparity']
    if dual_flag or rdm_flag:
        mu_keys = []

    if not dual_flag and not rdm_flag:
        ac_keys = [
            'accommodation', 'eating', 'drinking', 'commercial', 'tourism',
            'entertainment', 'government', 'manufacturing', 'retail_food',
            'retail_other', 'transport', 'health', 'education', 'parks',
            'cultural', 'sports', 'total'
        ]
    else:
        ac_keys = [
            'eating', 'drinking', 'commercial', 'retail_food', 'retail_other',
            'transport', 'total'
        ]

    # aggregate the data
    logger.info('Aggregating results')
    bulk_data = []
    for idx, uid in enumerate(N.uids):
        # first check that this is a live node (i.e. within the original city boundary)
        if not N.live[idx]:
            continue
        node_data = [uid]
        # mixed-use keys requiring q values
        for mu_key in mu_q_keys:
            for q_key, q_val in N.metrics['mixed_uses'][mu_key].items():
                inner_data = []
                for d_key, d_val in q_val.items():
                    inner_data.append(d_val[idx])
                node_data.append(inner_data)
        # mixed-use keys not requiring q values
        for mu_key in mu_keys:
            inner_data = []
            for d_key, d_val in N.metrics['mixed_uses'][mu_key].items():
                inner_data.append(d_val[idx])
            node_data.append(inner_data)
        # accessibility keys
        for ac_key in ac_keys:
            inner_data = []
            for d_key, d_val in Acc_codes.metrics['weighted'][ac_key].items():
                inner_data.append(d_val[idx])
            node_data.append(inner_data)
            # also write non-weighted variants of the following
            if ac_key in [
                    'eating', 'commercial', 'retail_food', 'retail_other',
                    'total'
            ]:
                inner_data = []
                for d_key, d_val in Acc_codes.metrics['non_weighted'][
                        ac_key].items():
                    inner_data.append(d_val[idx])
                node_data.append(inner_data)
        bulk_data.append(tuple(node_data))

    logger.info('Writing results to database')
    db_con = await asyncpg.connect(**db_config)
    if not dual_flag and not rdm_flag:
        measure_cols = [
            'mu_hill_0', 'mu_hill_1', 'mu_hill_2', 'mu_hill_branch_wt_0',
            'mu_hill_branch_wt_1', 'mu_hill_branch_wt_2',
            'mu_hill_pairwise_wt_0', 'mu_hill_pairwise_wt_1',
            'mu_hill_pairwise_wt_2', 'mu_hill_dispar_wt_0',
            'mu_hill_dispar_wt_1', 'mu_hill_dispar_wt_2', 'mu_shannon',
            'mu_gini', 'mu_raos', 'ac_accommodation', 'ac_eating',
            'ac_eating_nw', 'ac_drinking', 'ac_commercial', 'ac_commercial_nw',
            'ac_tourism', 'ac_entertainment', 'ac_government',
            'ac_manufacturing', 'ac_retail_food', 'ac_retail_food_nw',
            'ac_retail_other', 'ac_retail_other_nw', 'ac_transport',
            'ac_health', 'ac_education', 'ac_parks', 'ac_cultural',
            'ac_sports', 'ac_total', 'ac_total_nw'
        ]
    else:
        measure_cols = [
            'mu_hill_0', 'mu_hill_1', 'mu_hill_2', 'mu_hill_branch_wt_0',
            'mu_hill_branch_wt_1', 'mu_hill_branch_wt_2', 'ac_eating',
            'ac_eating_nw', 'ac_drinking', 'ac_commercial', 'ac_commercial_nw',
            'ac_retail_food', 'ac_retail_food_nw', 'ac_retail_other',
            'ac_retail_other_nw', 'ac_transport', 'ac_total', 'ac_total_nw'
        ]
    # add the _rdm extension if necessary
    if rdm_flag:
        measure_cols = [m + '_rdm' for m in measure_cols]
    # create the columns
    col_strings = []
    counter = 2
    for measure_col in measure_cols:
        await db_con.execute(f'''
        ALTER TABLE {nodes_table}
            ADD COLUMN IF NOT EXISTS {measure_col} real[];
        ''')
        col_strings.append(f'{measure_col} = ${counter}')
        counter += 1
    await db_con.executemany(
        f'UPDATE {nodes_table} SET ' + ', '.join(col_strings) +
        ' WHERE id = $1;', bulk_data)
    await db_con.close()

예제 #4

파일 보기

def plot_assignment(Network_Layer,
                    Data_Layer,
                    path: str = None,
                    node_colour: (list, tuple, np.ndarray) = None,
                    node_labels: bool = False,
                    data_labels: (list, tuple, np.ndarray) = None):
    # extract NetworkX
    Graph = Network_Layer.to_networkX()

    if node_colour is not None:
        if not (len(node_colour) == 1 or len(node_colour) == len(Graph)):
            raise ValueError(
                'Node colours should either be a single colour or a list or tuple of colours matching '
                'the number of nodes in the graph.')
        node_colour = node_colour
    else:
        node_colour = secondary

    # do a simple plot - don't provide path
    pos = {}
    for n, d in Graph.nodes(data=True):
        pos[n] = (d['x'], d['y'])
    nx.draw(Graph,
            pos,
            with_labels=node_labels,
            font_size=5,
            font_color='w',
            font_weight='bold',
            node_color=node_colour,
            node_size=30,
            node_shape='o',
            edge_color='w',
            width=1,
            alpha=0.75)

    if data_labels is None:
        data_colour = info
        data_cmap = None
    else:
        # generate categorical colormap
        d_classes, d_encodings = layers.encode_categorical(data_labels)
        data_colour = colors.Normalize()(d_encodings)
        data_cmap = 'Dark2'  # Set1

    # overlay data map
    plt.scatter(x=Data_Layer._data[:, 0],
                y=Data_Layer._data[:, 1],
                c=data_colour,
                cmap=data_cmap,
                s=30,
                edgecolors='white',
                lw=0.5,
                alpha=0.95)

    # draw assignment
    for i, (x, y, nearest_netw_idx, next_n_netw_idx) in \
            enumerate(zip(Data_Layer._data[:, 0],
                          Data_Layer._data[:, 1],
                          Data_Layer._data[:, 2],
                          Data_Layer._data[:, 3])):

        # if the data points have been assigned network indices
        if not np.isnan(nearest_netw_idx):
            # plot lines to parents for easier viz
            p_x = Network_Layer._node_data[int(nearest_netw_idx)][0]
            p_y = Network_Layer._node_data[int(nearest_netw_idx)][1]
            plt.plot([p_x, x], [p_y, y], c='#64c1ff', lw=0.5, ls='--')

        if not np.isnan(next_n_netw_idx):
            p_x = Network_Layer._node_data[int(next_n_netw_idx)][0]
            p_y = Network_Layer._node_data[int(next_n_netw_idx)][1]
            plt.plot([p_x, x], [p_y, y], c='#888888', lw=0.5, ls='--')

    if path:
        plt.savefig(path, facecolor=background, dpi=150)
    else:
        plt.gcf().set_facecolor(background)
        plt.show()

예제 #5

파일 보기

def test_local_agg_time(primal_graph):
    """
    Timing tests for landuse and stats aggregations
    """
    if 'GITHUB_ACTIONS' in os.environ:
        return
    os.environ['CITYSEER_QUIET_MODE'] = '1'

    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph)
    # setup data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)
    # needs a large enough beta so that distance thresholds aren't encountered
    distances = np.array([np.inf])
    betas = networks.beta_from_distance(distances)
    qs = np.array([0, 1, 2])
    mock_categorical = mock.mock_categorical_data(len(data_map))
    landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical)
    mock_numerical = mock.mock_numerical_data(len(data_dict), num_arrs=2, random_seed=0)

    def assign_wrapper():
        data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)

    # prime the function
    assign_wrapper()
    iters = 20000
    # time and report - roughly 5.675
    func_time = timeit.timeit(assign_wrapper, number=iters)
    print(f'node_cent_wrapper: {func_time} for {iters} iterations')
    assert func_time < 10

    def landuse_agg_wrapper():
        mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data,
                                                                                   edge_data,
                                                                                   node_edge_map,
                                                                                   data_map,
                                                                                   distances,
                                                                                   betas,
                                                                                   mixed_use_hill_keys=np.array([0, 1]),
                                                                                   landuse_encodings=landuse_encodings,
                                                                                   qs=qs,
                                                                                   angular=False)

    # prime the function
    landuse_agg_wrapper()
    iters = 20000
    # time and report - roughly 10.10
    func_time = timeit.timeit(landuse_agg_wrapper, number=iters)
    print(f'node_cent_wrapper: {func_time} for {iters} iterations')
    assert func_time < 15

    def stats_agg_wrapper():
        # compute
        data.aggregate_stats(node_data,
                             edge_data,
                             node_edge_map,
                             data_map,
                             distances,
                             betas,
                             numerical_arrays=mock_numerical,
                             angular=False)

    # prime the function
    stats_agg_wrapper()
    iters = 20000
    # time and report - roughly 4.96
    func_time = timeit.timeit(stats_agg_wrapper, number=iters)
    print(f'segment_cent_wrapper: {func_time} for {iters} iterations')
    assert func_time < 10

예제 #6

파일 보기

def test_aggregate_landuses_categorical_components(primal_graph):
    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map, = graphs.graph_maps_from_nX(primal_graph)
    # setup data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)
    # set parameters
    betas = np.array([0.02, 0.01, 0.005, 0.0025])
    distances = networks.distance_from_beta(betas)
    qs = np.array([0, 1, 2])
    mock_categorical = mock.mock_categorical_data(len(data_map))
    landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical)
    mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1)
    # set the keys - add shuffling to be sure various orders work
    hill_keys = np.arange(4)
    np.random.shuffle(hill_keys)
    non_hill_keys = np.arange(3)
    np.random.shuffle(non_hill_keys)
    ac_keys = np.array([1, 2, 5])
    np.random.shuffle(ac_keys)
    # generate
    mu_data_hill, mu_data_other, ac_data, ac_data_wt = data.aggregate_landuses(node_data,
                                                                               edge_data,
                                                                               node_edge_map,
                                                                               data_map,
                                                                               distances,
                                                                               betas,
                                                                               landuse_encodings=landuse_encodings,
                                                                               qs=qs,
                                                                               mixed_use_hill_keys=hill_keys,
                                                                               mixed_use_other_keys=non_hill_keys,
                                                                               accessibility_keys=ac_keys,
                                                                               cl_disparity_wt_matrix=mock_matrix,
                                                                               angular=False)
    # hill
    hill = mu_data_hill[np.where(hill_keys == 0)][0]
    hill_branch_wt = mu_data_hill[np.where(hill_keys == 1)][0]
    hill_pw_wt = mu_data_hill[np.where(hill_keys == 2)][0]
    hill_disp_wt = mu_data_hill[np.where(hill_keys == 3)][0]
    # non hill
    shannon = mu_data_other[np.where(non_hill_keys == 0)][0]
    gini = mu_data_other[np.where(non_hill_keys == 1)][0]
    raos = mu_data_other[np.where(non_hill_keys == 2)][0]
    # access non-weighted
    ac_1_nw = ac_data[np.where(ac_keys == 1)][0]
    ac_2_nw = ac_data[np.where(ac_keys == 2)][0]
    ac_5_nw = ac_data[np.where(ac_keys == 5)][0]
    # access weighted
    ac_1_w = ac_data_wt[np.where(ac_keys == 1)][0]
    ac_2_w = ac_data_wt[np.where(ac_keys == 2)][0]
    ac_5_w = ac_data_wt[np.where(ac_keys == 5)][0]
    # test manual metrics against all nodes
    mu_max_unique = len(landuse_classes)
    # test against various distances
    for d_idx in range(len(distances)):
        dist_cutoff = distances[d_idx]
        beta = betas[d_idx]
        for src_idx in range(len(primal_graph)):
            reachable_data, reachable_data_dist, tree_preds = data.aggregate_to_src_idx(src_idx,
                                                                                        node_data,
                                                                                        edge_data,
                                                                                        node_edge_map,
                                                                                        data_map,
                                                                                        dist_cutoff)
            # counts of each class type (array length per max unique classes - not just those within max distance)
            cl_counts = np.full(mu_max_unique, 0)
            # nearest of each class type (likewise)
            cl_nearest = np.full(mu_max_unique, np.inf)
            # aggregate
            a_1_nw = 0
            a_2_nw = 0
            a_5_nw = 0
            a_1_w = 0
            a_2_w = 0
            a_5_w = 0
            # iterate reachable
            for data_idx, (reachable, data_dist) in enumerate(zip(reachable_data, reachable_data_dist)):
                if not reachable:
                    continue
                cl = landuse_encodings[data_idx]
                # double check distance is within threshold
                assert data_dist <= dist_cutoff
                # update the class counts
                cl_counts[cl] += 1
                # if distance is nearer, update the nearest distance array too
                if data_dist < cl_nearest[cl]:
                    cl_nearest[cl] = data_dist
                # aggregate accessibility codes
                if cl == 1:
                    a_1_nw += 1
                    a_1_w += np.exp(-beta * data_dist)
                elif cl == 2:
                    a_2_nw += 1
                    a_2_w += np.exp(-beta * data_dist)
                elif cl == 5:
                    a_5_nw += 1
                    a_5_w += np.exp(-beta * data_dist)
            # assertions
            assert ac_1_nw[d_idx, src_idx] == a_1_nw
            assert ac_2_nw[d_idx, src_idx] == a_2_nw
            assert ac_5_nw[d_idx, src_idx] == a_5_nw

            assert ac_1_w[d_idx, src_idx] == a_1_w
            assert ac_2_w[d_idx, src_idx] == a_2_w
            assert ac_5_w[d_idx, src_idx] == a_5_w

            assert hill[0, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 0)
            assert hill[1, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 1)
            assert hill[2, d_idx, src_idx] == diversity.hill_diversity(cl_counts, 2)

            assert hill_branch_wt[0, d_idx, src_idx] == \
                   diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 0, beta)
            assert hill_branch_wt[1, d_idx, src_idx] == \
                   diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 1, beta)
            assert hill_branch_wt[2, d_idx, src_idx] == \
                   diversity.hill_diversity_branch_distance_wt(cl_counts, cl_nearest, 2, beta)

            assert hill_pw_wt[0, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 0, beta)
            assert hill_pw_wt[1, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 1, beta)
            assert hill_pw_wt[2, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_distance_wt(cl_counts, cl_nearest, 2, beta)

            assert hill_disp_wt[0, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 0)
            assert hill_disp_wt[1, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 1)
            assert hill_disp_wt[2, d_idx, src_idx] == \
                   diversity.hill_diversity_pairwise_matrix_wt(cl_counts, mock_matrix, 2)

            assert shannon[d_idx, src_idx] == diversity.shannon_diversity(cl_counts)
            assert gini[d_idx, src_idx] == diversity.gini_simpson_diversity(cl_counts)
            assert raos[d_idx, src_idx] == diversity.raos_quadratic_diversity(cl_counts, mock_matrix)

    # check that angular is passed-through
    # actual angular tests happen in test_shortest_path_tree()
    # here the emphasis is simply on checking that the angular instruction gets chained through

    # setup dual data
    G_dual = graphs.nX_to_dual(primal_graph)
    node_labels_dual, node_data_dual, edge_data_dual, node_edge_map_dual = graphs.graph_maps_from_nX(G_dual)
    data_dict_dual = mock.mock_data_dict(G_dual, random_seed=13)
    data_uids_dual, data_map_dual = layers.data_map_from_dict(data_dict_dual)
    data_map_dual = data.assign_to_network(data_map_dual, node_data_dual, edge_data_dual, node_edge_map_dual, 500)
    mock_categorical = mock.mock_categorical_data(len(data_map_dual))
    landuse_classes_dual, landuse_encodings_dual = layers.encode_categorical(mock_categorical)
    mock_matrix = np.full((len(landuse_classes_dual), len(landuse_classes_dual)), 1)

    mu_hill_dual, mu_other_dual, ac_dual, ac_wt_dual = data.aggregate_landuses(node_data_dual,
                                                                               edge_data_dual,
                                                                               node_edge_map_dual,
                                                                               data_map_dual,
                                                                               distances,
                                                                               betas,
                                                                               landuse_encodings_dual,
                                                                               qs=qs,
                                                                               mixed_use_hill_keys=hill_keys,
                                                                               mixed_use_other_keys=non_hill_keys,
                                                                               accessibility_keys=ac_keys,
                                                                               cl_disparity_wt_matrix=mock_matrix,
                                                                               angular=True)

    mu_hill_dual_sidestep, mu_other_dual_sidestep, ac_dual_sidestep, ac_wt_dual_sidestep = \
        data.aggregate_landuses(node_data_dual,
                                edge_data_dual,
                                node_edge_map_dual,
                                data_map_dual,
                                distances,
                                betas,
                                landuse_encodings_dual,
                                qs=qs,
                                mixed_use_hill_keys=hill_keys,
                                mixed_use_other_keys=non_hill_keys,
                                accessibility_keys=ac_keys,
                                cl_disparity_wt_matrix=mock_matrix,
                                angular=False)

    assert not np.allclose(mu_hill_dual, mu_hill_dual_sidestep, atol=0.001, rtol=0)
    assert not np.allclose(mu_other_dual, mu_other_dual_sidestep, atol=0.001, rtol=0)
    assert not np.allclose(ac_dual, ac_dual_sidestep, atol=0.001, rtol=0)
    assert not np.allclose(ac_wt_dual, ac_wt_dual_sidestep, atol=0.001, rtol=0)

예제 #7

파일 보기

def test_aggregate_landuses_signatures(primal_graph):
    # generate node and edge maps
    node_uids, node_data, edge_data, node_edge_map = graphs.graph_maps_from_nX(primal_graph)
    # setup data
    data_dict = mock.mock_data_dict(primal_graph, random_seed=13)
    data_uids, data_map = layers.data_map_from_dict(data_dict)
    data_map = data.assign_to_network(data_map, node_data, edge_data, node_edge_map, 500)
    # set parameters
    betas = np.array([0.02, 0.01, 0.005, 0.0025])
    distances = networks.distance_from_beta(betas)
    qs = np.array([0, 1, 2])
    mock_categorical = mock.mock_categorical_data(len(data_map))
    landuse_classes, landuse_encodings = layers.encode_categorical(mock_categorical)
    # check that empty land_use encodings are caught
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                mixed_use_hill_keys=np.array([0]))
    # check that unequal land_use encodings vs data map lengths are caught
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                landuse_encodings=landuse_encodings[:-1],
                                mixed_use_other_keys=np.array([0]))
    # check that no provided metrics flags
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                landuse_encodings=landuse_encodings)
    # check that missing qs flags
    with pytest.raises(ValueError):
        data.aggregate_landuses(node_data,
                                edge_data,
                                node_edge_map,
                                data_map,
                                distances,
                                betas,
                                mixed_use_hill_keys=np.array([0]),
                                landuse_encodings=landuse_encodings)
    # check that problematic mixed use and accessibility keys are caught
    for mu_h_key, mu_o_key, ac_key in [
        # negatives
        ([-1], [1], [1]),
        ([1], [-1], [1]),
        ([1], [1], [-1]),
        # out of range
        ([4], [1], [1]),
        ([1], [3], [1]),
        ([1], [1], [max(landuse_encodings) + 1]),
        # duplicates
        ([1, 1], [1], [1]),
        ([1], [1, 1], [1]),
        ([1], [1], [1, 1])]:
        with pytest.raises(ValueError):
            data.aggregate_landuses(node_data,
                                    edge_data,
                                    node_edge_map,
                                    data_map,
                                    distances,
                                    betas,
                                    landuse_encodings,
                                    qs=qs,
                                    mixed_use_hill_keys=np.array(mu_h_key),
                                    mixed_use_other_keys=np.array(mu_o_key),
                                    accessibility_keys=np.array(ac_key))
    for h_key, o_key in (([3], []), ([], [2])):
        # check that missing matrix is caught for disparity weighted indices
        with pytest.raises(ValueError):
            data.aggregate_landuses(node_data,
                                    edge_data,
                                    node_edge_map,
                                    data_map,
                                    distances,
                                    betas,
                                    landuse_encodings=landuse_encodings,
                                    qs=qs,
                                    mixed_use_hill_keys=np.array(h_key),
                                    mixed_use_other_keys=np.array(o_key))
        # check that non-square disparity matrix is caught
        mock_matrix = np.full((len(landuse_classes), len(landuse_classes)), 1)
        with pytest.raises(ValueError):
            data.aggregate_landuses(node_data,
                                    edge_data,
                                    node_edge_map,
                                    data_map,
                                    distances,
                                    betas,
                                    landuse_encodings=landuse_encodings,
                                    qs=qs,
                                    mixed_use_hill_keys=np.array(h_key),
                                    mixed_use_other_keys=np.array(o_key),
                                    cl_disparity_wt_matrix=mock_matrix[:-1])

예제 #8

파일 보기

파일: test_layers.py 프로젝트: KalipheGTU/cityseer

def test_compute_aggregated_A():
    G = mock.mock_graph()
    G = graphs.nX_simple_geoms(G)
    betas = np.array([-0.01, -0.005])
    distances = networks.distance_from_beta(betas)
    # network layer
    N = networks.Network_Layer_From_nX(G, distances)
    node_map = N._node_data
    edge_map = N._edge_data
    node_edge_map = N._node_edge_map
    # data layer
    data_dict = mock.mock_data_dict(G)
    qs = np.array([0, 1, 2])
    D = layers.Data_Layer_From_Dict(data_dict)
    # check single metrics independently against underlying for some use-cases, e.g. hill, non-hill, accessibility...
    D.assign_to_network(N, max_dist=500)
    # generate some mock landuse data
    landuse_labels = mock.mock_categorical_data(len(data_dict))
    landuse_classes, landuse_encodings = layers.encode_categorical(
        landuse_labels)
    # compute hill mixed uses
    D.compute_aggregated(landuse_labels,
                         mixed_use_keys=['hill_branch_wt'],
                         qs=qs)
    # test against underlying method
    data_map = D._data
    mu_data_hill, mu_data_other, ac_data, ac_data_wt, \
    stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \
        data.local_aggregator(node_map,
                              edge_map,
                              node_edge_map,
                              data_map,
                              distances,
                              betas,
                              landuse_encodings,
                              qs=qs,
                              mixed_use_hill_keys=np.array([1]))
    for q_idx, q_key in enumerate(qs):
        for d_idx, d_key in enumerate(distances):
            assert np.allclose(
                N.metrics['mixed_uses']['hill_branch_wt'][q_key][d_key],
                mu_data_hill[0][q_idx][d_idx],
                atol=0.001,
                rtol=0)
    # gini simpson
    D.compute_aggregated(landuse_labels, mixed_use_keys=['gini_simpson'])
    # test against underlying method
    data_map = D._data
    mu_data_hill, mu_data_other, ac_data, ac_data_wt, \
    stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \
        data.local_aggregator(node_map,
                              edge_map,
                              node_edge_map,
                              data_map,
                              distances,
                              betas,
                              landuse_encodings,
                              mixed_use_other_keys=np.array([1]))
    for d_idx, d_key in enumerate(distances):
        assert np.allclose(N.metrics['mixed_uses']['gini_simpson'][d_key],
                           mu_data_other[0][d_idx],
                           atol=0.001,
                           rtol=0)
    # accessibilities
    D.compute_aggregated(landuse_labels, accessibility_keys=['c'])
    # test against underlying method
    data_map = D._data
    mu_data_hill, mu_data_other, ac_data, ac_data_wt, \
    stats_sum, stats_sum_wt, stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \
        data.local_aggregator(node_map,
                              edge_map,
                              node_edge_map,
                              data_map,
                              distances,
                              betas,
                              landuse_encodings,
                              accessibility_keys=np.array([landuse_classes.index('c')]))
    for d_idx, d_key in enumerate(distances):
        assert np.allclose(
            N.metrics['accessibility']['non_weighted']['c'][d_key],
            ac_data[0][d_idx],
            atol=0.001,
            rtol=0)
        assert np.allclose(N.metrics['accessibility']['weighted']['c'][d_key],
                           ac_data_wt[0][d_idx],
                           atol=0.001,
                           rtol=0)
    # also check the number of returned types for a few assortments of metrics
    mixed_uses_hill_types = np.array([
        'hill', 'hill_branch_wt', 'hill_pairwise_wt', 'hill_pairwise_disparity'
    ])
    mixed_use_other_types = np.array(
        ['shannon', 'gini_simpson', 'raos_pairwise_disparity'])
    ac_codes = np.array(landuse_classes)

    mu_hill_random = np.arange(len(mixed_uses_hill_types))
    np.random.shuffle(mu_hill_random)

    mu_other_random = np.arange(len(mixed_use_other_types))
    np.random.shuffle(mu_other_random)

    ac_random = np.arange(len(landuse_classes))
    np.random.shuffle(ac_random)

    # mock disparity matrix
    mock_disparity_wt_matrix = np.full(
        (len(landuse_classes), len(landuse_classes)), 1)

    # not necessary to do all labels, first few should do
    for mu_h_min in range(3):
        mu_h_keys = np.array(mu_hill_random[mu_h_min:])

        for mu_o_min in range(3):
            mu_o_keys = np.array(mu_other_random[mu_o_min:])

            for ac_min in range(3):
                ac_keys = np.array(ac_random[ac_min:])

                # in the final case, set accessibility to a single code otherwise an error would be raised
                if len(mu_h_keys) == 0 and len(mu_o_keys) == 0 and len(
                        ac_keys) == 0:
                    ac_keys = np.array([0])

                # randomise order of keys and metrics
                mu_h_metrics = mixed_uses_hill_types[mu_h_keys]
                mu_o_metrics = mixed_use_other_types[mu_o_keys]
                ac_metrics = ac_codes[ac_keys]

                N_temp = networks.Network_Layer_From_nX(G, distances)
                D_temp = layers.Data_Layer_From_Dict(data_dict)
                D_temp.assign_to_network(N_temp, max_dist=500)
                D_temp.compute_aggregated(
                    landuse_labels,
                    mixed_use_keys=list(mu_h_metrics) + list(mu_o_metrics),
                    accessibility_keys=ac_metrics,
                    cl_disparity_wt_matrix=mock_disparity_wt_matrix,
                    qs=qs)

                # test against underlying method
                mu_data_hill, mu_data_other, ac_data, ac_data_wt, stats_sum, stats_sum_wt, \
                stats_mean, stats_mean_wt, stats_variance, stats_variance_wt, stats_max, stats_min = \
                    data.local_aggregator(node_map,
                                          edge_map,
                                          node_edge_map,
                                          data_map,
                                          distances,
                                          betas,
                                          landuse_encodings,
                                          qs=qs,
                                          mixed_use_hill_keys=mu_h_keys,
                                          mixed_use_other_keys=mu_o_keys,
                                          accessibility_keys=ac_keys,
                                          cl_disparity_wt_matrix=mock_disparity_wt_matrix)

                for mu_h_idx, mu_h_met in enumerate(mu_h_metrics):
                    for q_idx, q_key in enumerate(qs):
                        for d_idx, d_key in enumerate(distances):
                            assert np.allclose(
                                N_temp.metrics['mixed_uses'][mu_h_met][q_key]
                                [d_key],
                                mu_data_hill[mu_h_idx][q_idx][d_idx],
                                atol=0.001,
                                rtol=0)

                for mu_o_idx, mu_o_met in enumerate(mu_o_metrics):
                    for d_idx, d_key in enumerate(distances):
                        assert np.allclose(
                            N_temp.metrics['mixed_uses'][mu_o_met][d_key],
                            mu_data_other[mu_o_idx][d_idx],
                            atol=0.001,
                            rtol=0)

                for ac_idx, ac_met in enumerate(ac_metrics):
                    for d_idx, d_key in enumerate(distances):
                        assert np.allclose(N_temp.metrics['accessibility']
                                           ['non_weighted'][ac_met][d_key],
                                           ac_data[ac_idx][d_idx],
                                           atol=0.001,
                                           rtol=0)
                        assert np.allclose(N_temp.metrics['accessibility']
                                           ['weighted'][ac_met][d_key],
                                           ac_data_wt[ac_idx][d_idx],
                                           atol=0.001,
                                           rtol=0)

    # most integrity checks happen in underlying method, though check here for mismatching labels length and typos
    with pytest.raises(ValueError):
        D.compute_aggregated(landuse_labels[-1], mixed_use_keys=['shannon'])
    with pytest.raises(ValueError):
        D.compute_aggregated(landuse_labels, mixed_use_keys=['spelling_typo'])
    # don't check accessibility_labels for typos - because only warning is triggered (not all labels will be in all data)
    # check that unassigned data layer flags
    with pytest.raises(ValueError):
        D_new = layers.Data_Layer_From_Dict(data_dict)
        D_new.compute_aggregated(landuse_labels, mixed_use_keys=['shannon'])

예제 #9

파일 보기

    lu_flow_c = np.full((iters, spans), 0.0)
    # get the landuse encodings - note that the labels don't change (changes occur via assignments)
    landuse_labels = mock.mock_categorical_data(length=len(Landuse_Layer.uids),
                                                num_classes=3)
    if not randomised:
        l = len(Netw_Layer.uids)
        l_1 = int(l / 3)
        l_2 = l_1 * 2
        for d_idx, assigned_idx in enumerate(Landuse_Layer._data[:, 2]):
            if assigned_idx < l_1:
                landuse_labels[d_idx] = 'a'
            elif assigned_idx < l_2:
                landuse_labels[d_idx] = 'b'
            else:
                landuse_labels[d_idx] = 'c'
    landuse_classes, landuse_encodings = layers.encode_categorical(
        landuse_labels)

    # iterate
    for n in tqdm(range(iters)):
        # POPULATION
        # record current assignment state
        pop_map[n] = set_current_num(Pop_Layer._data, Netw_Layer._nodes)
        # calculate the effective density
        # each population point is a single unit
        # the state technically remains the same, it is the x, y and assignments that change!
        Pop_Layer.compute_stats_single('density', pop_state)
        dens = Netw_Layer.metrics['stats']['density']['sum_weighted'][800]
        dens[np.isnan(dens)] = 0
        # set the centrality weights accordingly
        Netw_Layer.weights = dens
        # calculate the density weighted centrality