def test_do_pca(get_iris_pca): df_iris = pd.read_csv('./tests/iris.csv', index_col=False).drop('Unnamed: 0', axis=1) pcomp = get_iris_pca x_pca, pipe = viz.do_pca( df_iris. loc[:, ['sepal length', 'sepal width', 'petal length', 'petal width']], 2) assert np.allclose(x_pca, pcomp, atol=0.000001)
def test_generate_pca_report(get_iris_pca): df_iris = pd.read_csv('./tests/iris.csv', index_col=False).drop('Unnamed: 0', axis=1) x_pca, pipe = viz.do_pca( df_iris. loc[:, ['sepal length', 'sepal width', 'petal length', 'petal width']], 2) pcomp = viz.generate_pca_report(pipe.named_steps['pca']) df_res = pd.DataFrame( { "C1": [0.522372, -0.263355, 0.581254, 0.565611], "C2": [0.372318, 0.925556, 0.021095, 0.065416] }, index=[0, 1, 2, 3]) assert np.allclose(pcomp, df_res, atol=0.000001)
def test_table_similar_with_names(get_sample_geodata): x, y, target_names = viz.get_pca_data(get_sample_geodata, 2018, 5) x_pca, pipe = viz.do_pca(x, 5) res = viz.table_similar_with_names(get_sample_geodata, "Vattuniemi", ["Lauttasaari", "Ruoholahti - Kamppi"], target_names, x_pca, cols=["he_kika"]) df_res = pd.DataFrame( { "nimi": ['Vattuniemi', 'Lauttasaari'], "he_kika": [42.0, 40.0], "dist": [0.0, 4.573346] }, index=[16, 11]) assert np.allclose(res.loc[:, ["he_kika", "dist"]], df_res.loc[:, ["he_kika", "dist"]], atol=0.000001) assert np.all(res.nimi == df_res.nimi)
def test_get_similar_in_geo_area(get_sample_geodata): included_area = get_sample_geodata.loc[get_sample_geodata.pono.isin( ['00180', '00200', '00210'])] x, y, target_names = viz.get_pca_data(get_sample_geodata, 2018, 5) target_names.index = range(len(target_names)) x_pca, pipe = viz.do_pca(x, 5) d = sim.pairwise_distances(x_pca, x_pca, 'euclidean') res = sim.get_similar_in_geo_area(included_area, orig_name="Vattuniemi", d=d, target_names=target_names, n_most=1) assert res == ['Lauttasaari'] included_area2 = get_sample_geodata.loc[get_sample_geodata.pono.isin( ['00180', '00210'])] assert sim.get_similar_in_geo_area(included_area2, orig_name="Vattuniemi", d=d, target_names=target_names, n_most=1) == ["Kamppi - Ruoholahti"]
'hinta': 'int64', 'yliopistot': 'int64', 'amk': 'int64'}) df.drop(labels='Unnamed: 0', axis=1, inplace=True) df = map_fi_plot.merge_to_polygons_for_year(df, 2018) # df.drop(labels=['vuosi_y', 'nimi_y'], axis=1, inplace=True) df = df.rename(index=str, columns={'vuosi_y': 'vuosi', 'nimi_y': 'nimi'}) # get uni data # amk, yl = data_transforms.get_edu_data() # df['yliopistot'] = [yl[x] if x in yl else 0 for x in df.pono] # df['amk'] = [amk[x] if x in amk else 0 for x in df.pono] X, y, target_names = viz.get_pca_data(df, 2018, 5) target_names.index = range(len(target_names)) X_pca, pipe = viz.do_pca(X, 5) # plot_data4 = make_graph_data(df) # with open('plotly_plot_data4', 'wb') as f: # pickle.dump(plot_data4, f) with open('plotly_plot_data4', 'rb') as f: data_pickle = pickle.load(f) pono_name_dict = dict(zip(df.sort_values(by='pono').pono + ' ' + df.sort_values(by='pono').nimi, df.sort_values(by='pono').nimi)) app.layout = html.Div([ html.Div([ html.Div([ html.P('Select origin area:', style={'display': 'inline-block'}), dcc.Dropdown(