Esempio n. 1
0
def test_pairwise_distance():
    assert (sim.pairwise_distances(
        np.array([1, 0]).reshape(1, -1),
        np.array([1, 0]).reshape(1, -1), "euclidean") == 0)
    assert sim.pairwise_distances(
        np.array([2, 0]).reshape(1, -1),
        np.array([1, 0]).reshape(1, -1), "euclidean") == 1
Esempio n. 2
0
def update_graph(origin_name, move_type,
                 range_km, n_most,
                 max_price, target):
    df_filtered = similarity.filter_w_price(df, max_price, [origin_name, target])
    df_origin = df.loc[df['nimi'] == origin_name, :]
    if move_type == 'difference':
        target = origin_name
    area, included = map_fi_plot.get_included_area(df_filtered, move_type, origin_name, range_km, target)
    X, y, target_names = viz.get_pca_data(included, 2018, 5)
    target_names.index = range(len(target_names))
    X_pca = pipe.transform(X)
    d = similarity.pairwise_distances(X_pca, X_pca, 'euclidean')
    similar = similarity.get_similar_in_geo_area(included, origin_name, d,
                                                 target_names, n_most)
    df_comparison = df.loc[df['nimi'].isin(similar), :]
    coords_max = {
        'miny': df.bounds.miny.min(),
        'minx': df.bounds.minx.min(),
        'maxy': df.bounds.miny.max(),
        'maxx': df.bounds.miny.max()
    }

    layout = define_layout()

    return {
        #        'data': set_fill_colors_for_origin_and_comp(plot_data4, origin_name, similar, target),
        'data': set_fill_colors_for_origin_and_comp(make_graph_data(included), origin_name, similar, target),
        'layout': layout
    }
Esempio n. 3
0
def update_table(origin_name, move_type,
                 range_km, n_most,
                 max_price, target):
    df_filtered = similarity.filter_w_price(df, max_price, [origin_name, target])
    # transform values to ranks for easy understanding
    df_filtered_ranks = similarity.full_df_to_ranks(df_filtered, bins=10)
    df_origin = df.loc[df['nimi'] == origin_name, :]
    if move_type == 'difference':
        target = origin_name
    area, included = map_fi_plot.get_included_area(df_filtered, move_type, origin_name, range_km, target)
    NA, included_ranks = map_fi_plot.get_included_area(df_filtered_ranks, move_type, origin_name, range_km, target)
    X, y, target_names = viz.get_pca_data(included, 2018, 5)
    target_names.index = range(len(target_names))
    X_pca = pipe.transform(X)
    d = similarity.pairwise_distances(X_pca, X_pca, 'euclidean')
    similar = similarity.get_similar_in_geo_area(included, origin_name, d,
                                                 target_names, n_most)
    tb = viz.table_similar_with_names(included_ranks, origin_name, similar, target_names, X_pca,
                                      ['pono', 'nimi', 'he_kika',
                                       'ra_asunn', 'te_laps',
                                       'te_as_valj', 'tp_tyopy',
                                       'tr_mtu', 'yliopistot', 'amk'],
                                      tail=False)
    tb = tb.drop_duplicates()

    tb = format_numeric_table_cols(tb, numcols=['dist'])
    cols = [x for x in tb.columns.values if
            x not in ['geometry', 'kunta', 'kuntanro', 'pono', 'pono.level', 'nimi', 'nimi_x', 'vuosi',
                      'dist', 'rakennukset_bin']]
    # tb.loc[:, cols] = tb.loc[:, cols].applymap(lambda x: similarity.value_to_plusses(x))

    trace = go.Table(
        header=dict(values=list(
            ['Pono', 'Nimi', 'Keski-ikä', 'Asunnot', 'Lapsitaloudet', 'Työpaikat', 'Mediaanitulo', 'Yliopistot', 'AMK',
             'Dist']),
                    fill=dict(color='#C2D4FF'),
                    align=['left'] * 5,
                    height=40),
        cells=dict(values=[tb.pono, tb.nimi, tb.he_kika, tb.ra_asunn, tb.te_laps, tb.tp_tyopy, tb.tr_mtu, tb.yliopistot,
                           tb.amk, tb.dist],
                   fill=dict(color='#F5F8FF'),
                   align=['left'] * 5,
                   height=30)
    )
    # py.plot([trace], 'test.html')
    return {'data': [trace],
            'layout': dict(autosize=True, margin=dict(
                t=0,
                b=0,
                r=0,
                l=0
            )
                           )
            }
Esempio n. 4
0
def plot_similar_in_geo_area(data, orig_name, target, range_km, how, n_most, pipe, figsize=(12, 10)):
    methods = ['intersection', 'difference']
    if how not in methods:
        raise ValueError('how should be either "intersection" or "difference"')
    if target is None:
        target = orig_name
    df = merge_to_polygons_for_year(data, 2018)
    if orig_name not in list(df['nimi_x']):
        raise ValueError('origin_name not in data!')
    if target not in list(df['nimi_x']):
        raise ValueError('target not in data!')
    area, included = get_included_area(df, how, orig_name, range_km, target)
    x, y, target_names = viz.get_pca_data(included, 2018, 5)
    target_names.index = range(len(target_names))
    x_pca = pipe.transform(x)
    d = similarity.pairwise_distances(x_pca, x_pca, 'euclidean')
    similar = similarity.get_similar_in_geo_area(included, orig_name, d,
                                                 target_names, n_most)
    # included.plot(alpha=0.5, edgecolor='k', cmap='tab10')
    map_with_highlights_names(data, '', orig_name, similar, 2018, area=area, figsize=figsize)
Esempio n. 5
0
def test_get_similar_in_geo_area(get_sample_geodata):
    included_area = get_sample_geodata.loc[get_sample_geodata.pono.isin(
        ['00180', '00200', '00210'])]
    x, y, target_names = viz.get_pca_data(get_sample_geodata, 2018, 5)
    target_names.index = range(len(target_names))
    x_pca, pipe = viz.do_pca(x, 5)
    d = sim.pairwise_distances(x_pca, x_pca, 'euclidean')
    res = sim.get_similar_in_geo_area(included_area,
                                      orig_name="Vattuniemi",
                                      d=d,
                                      target_names=target_names,
                                      n_most=1)
    assert res == ['Lauttasaari']
    included_area2 = get_sample_geodata.loc[get_sample_geodata.pono.isin(
        ['00180', '00210'])]
    assert sim.get_similar_in_geo_area(included_area2,
                                       orig_name="Vattuniemi",
                                       d=d,
                                       target_names=target_names,
                                       n_most=1) == ["Kamppi - Ruoholahti"]
Esempio n. 6
0
X, y, target_names = viz.get_pca_data(data, 2018, 5)
target_names.index = range(len(target_names))
viz.exploratory_pca(X, 20)

X_pca, pipe = viz.do_pca(X, 5)

pca_comp = viz.generate_pca_report(pipe.named_steps['pca'])
pca_comp['vars'] = viz.get_pca_cols(data)
print(pca_comp)

viz.pca_plot(X_pca, target_names, y.ravel())
# save pca to csv
# pca_comp.to_csv('pca.csv')

# similarity calculation
d = similarity.pairwise_distances(X_pca, X_pca, 'euclidean')
names = similarity.get_n_most_similar_with_name("Otaniemi", d, target_names,
                                                10)
print(names)

data.reset_index(inplace=True, drop=True)
data_l5 = data.loc[data['pono.level'] == 5, :].assign(
    max_factor=pd.DataFrame(X_pca.argmax(axis=1)))
map_fi_plot.map_fi_postinumero(data_l5,
                               "Highest factors per area",
                               color_var='max_factor')

map_fi_plot.map_with_highlights_names(
    data_l5, "How similar to Vattuniemi?", 'Vattuniemi',
    similarity.get_n_most_similar_with_name('Vattuniemi', d, target_names, 15))
map_fi_plot.map_with_highlights_names(