def test_splom_case(): iris = px.data.iris() fig = px.scatter_matrix(iris) assert len(fig.data[0].dimensions) == len(iris.columns) dic = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} fig = px.scatter_matrix(dic) assert np.all(fig.data[0].dimensions[0].values == np.array(dic["a"])) ar = np.arange(9).reshape((3, 3)) fig = px.scatter_matrix(ar) assert np.all(fig.data[0].dimensions[0].values == ar[:, 0])
def display_scatterplot(): df2 = df[[ "age", "pop", "pov", "college", "urate", "county_income", "h_income", "share_white", "share_black", "share_hispanic" ]] correlated_data = df2.corr(method='pearson') absolute_values = correlated_data.abs() sum_values = absolute_values.sum(axis=0).sort_values(ascending=False, inplace=False) names = [] for name, val in sum_values[0:5].iteritems(): names.append(name) df3 = df2[names] return px.scatter_matrix(df3, height=800, labels={ "pov": "Poverty Rate", "h_income": "House Hold Income", "college": "College Rate", "urate": "Unemployment Rate", "share_black": "Black Population", "share_white": "White Population", "share_hispanic": "Hispanic Population", "age": "Age", "pop": "Population", "county_income": "County Income" })
def pcagrid(df, n_components=4, norm='none'): if (norm == 'z_score'): df_norm = (df - df.mean()) / df.std() elif (norm == 'min_max'): df_norm = (df - df.min()) / (df.max() - df.min()) else: df_norm = df pca = sklearn.decomposition.PCA(n_components=n_components) components = pca.fit_transform(df.iloc[:, :-1]) labels = { str(i): f"PC {i+1} ({var:.1f}%)" for i, var in enumerate(pca.explained_variance_ratio_ * 100) } labels['color'] = 'quality' fig = px.scatter_matrix( components, labels=labels, dimensions=range(len(pca.explained_variance_ratio_)), color=df["quality"], title= f'Total Explained Variance: {pca.explained_variance_ratio_.sum()*100:.2f}%' ) fig.update_traces(diagonal_visible=False) return fig.show()
def plot_scattermatrix(dimensions, title, df): fig = px.scatter_matrix( df, dimensions=dimensions, color='default.payment.next.month', color_discrete_sequence=px.colors.qualitative.Set2, color_continuous_scale=px.colors.qualitative.Set2, symbol='default.payment.next.month', ) fig.update_layout(title=title, coloraxis_showscale=False, width=1700, height=1700, legend=dict( yanchor="top", font=dict(size=16), xanchor="right", )) fig.update_traces( diagonal_visible=False, showupperhalf=False, marker=dict( colorscale=px.colors.qualitative.Set2, #showscale=False, # colors encode categorical variables line_color='white', line_width=0, size=2), ) fig.show() import plotly.figure_factory as ff
def plot_input(df: pd.DataFrame, corrs=False): """ @df -> dataframe with input data\n @corrs -> should correlation matrix be plotted (default : False)\n return None (fig.show()) """ if not corrs: ## Visualise data distribution fg = px.scatter_matrix( df, dimensions=[c for c in df.columns if c != 'class'], color='class', ) fg.update_traces(diagonal_visible=False, showupperhalf=False) fg.show() else: ## Check correlation of data heat = go.Heatmap(z=np.array(df.corr('pearson')), x=df.columns, y=df.columns, xgap=5, ygap=5, colorscale='ylorrd', reversescale=True) fg = go.Figure(data=heat, layout=go.Layout( width=800, height=800, xaxis_showgrid=False, yaxis_showgrid=False, )) fg.show()
def scatter_matrix(self, df='default'): if df == 'default': df = self.df_default.copy() elif df == 'attacks': df = self.df_default.copy() df = df[df['Attack_Type(42)'].isin(self.attack_types)] elif df == 'normal': df = self.df_normal.copy() elif df == 'dos': df = self.df_dos.copy() elif df == 'probe': df = self.df_probe.copy() elif df == 'u2r': df = self.df_u2r.copy() elif df == 'r2l': df = self.df_r2l.copy() else: print( 'Invalid df option. Choose a df from one of the following: default, attacks, normal, dos, probe, u2r, r2l' ) return df['Attack_Type(42)'] = df['Attack_Type(42)'].apply( self.__replace_values) fig = px.scatter_matrix( df, color="Attack_Type(42)", labels={col: col.replace('_', ' ') for col in df.columns}) # remove underscore fig.update_traces(diagonal_visible=False) fig.show()
def num_analysis(n_clicks, input, *args): if input is None or input =='': return html.H5('分析方法を選択し、実行を押してください') else: if input == 'AAA': describe = use_df.describe().reset_index() return dash_table.DataTable( column_selectable='multi', fixed_rows={'headers': True, 'data': 0}, data=df_processor(describe)[0], columns=df_processor(describe)[1], style_table={ 'overflowX': 'scroll', 'overflowY': 'scroll', 'maxHeight': '350px', 'maxWidht': '800px' }, style_header={ 'fontWeight': 'bold', 'textAlign': 'center' } ) elif input == 'BBB': fig = px.scatter_matrix( use_df, dimensions=num_cols, color=target_column ) return dcc.Graph(figure=fig) elif input == 'CCC': return html.H5(len(cat_cols))
def app(): # vstup 1: výběr datové sady data_file_path = st.file_uploader("Data file") data = None if data_file_path is not None: # read data if user uploads a file data = pd.read_csv(data_file_path) # seek back to position 0 after reading data_file_path.seek(0) if data is None: st.warning("No data loaded") return # vstup 2: výběr parametrů scatter matrix dimensions = st.multiselect("Scatter matrix dimensions", list(data.columns), default=list(data.columns)) color = st.selectbox("Color", data.columns) opacity = st.slider("Opacity", 0.0, 1.0, 0.5) # scatter matrix plat st.write( px.scatter_matrix(data, dimensions=dimensions, color=color, opacity=opacity)) # výběr sloupce pro zobrazení rozdělení dat interesting_column = st.selectbox("Interesting column", data.columns) # výběr funkce pro zobrazení rozdělovací funkce dist_plot = st.selectbox("Plot type", [px.box, px.histogram, px.violin]) st.write(dist_plot(data, x=interesting_column, color=color))
def part_1(data): fig = go.Figure( data=go.Heatmap(z=data.values, x=data.keys(), y=data.keys())) fig.show() modified_data = data.unstack() attr = modified_data.sort_values(ascending=False).to_frame() attr.reset_index(inplace=True) attr.columns = ['first_attribute', 'second_attribute', 'correlation'] print(attr) attr = attr.loc[attr.first_attribute != attr.second_attribute][0:10:2] print(attr) attr = attr.sort_values(by='first_attribute') fig = exp.line(attr, x='first_attribute', y='correlation', text='second_attribute') fig.show() fig = exp.histogram(attr, x='first_attribute', y='correlation', histfunc='max') fig.show() fig = exp.scatter_matrix(attr, color='second_attribute') fig.show() print_word_cloud(skills)
def generate_splom(df): splom = px.scatter_matrix(df, dimensions=[ "sepal_width", "sepal_length", "petal_width", "petal_length" ]) return splom
def _generate_scatter(self): df = self.pp.get_numeric_df(self.settings['data']).copy() fig = px.scatter_matrix(df, height=700) fig.update_xaxes(tickangle=90) for annotation in fig['layout']['annotations']: annotation['textangle'] = -90 return html.Div([ html.Div(html.H1(children='Матрица рассеяния'), style={'text-align': 'center'}), html.Div([ html.Div(dcc.Graph(id='scatter_matrix', figure=fig), style={ 'width': '78%', 'display': 'inline-block', 'border-color': 'rgb(220, 220, 220)', 'border-style': 'solid', 'padding': '5px' }), html.Div(dcc.Markdown(children=markdown_text_scatter), style={ 'width': '18%', 'float': 'right', 'display': 'inline-block' }) ]) ], style={'margin': '100px'})
def eda(): st.title('Exploratory data analysis') # Show Dataset st.subheader("Preview DataFrame") st.write("Head", data.head()) st.write("Tail", data.tail()) # Dimensions data_dim = st.radio('What Dimension Do You Want to Show', ('Rows', 'Columns')) if data_dim == 'Rows': st.write("Showing Length of Rows", len(data)) if data_dim == 'Columns': st.write("Showing Length of Columns", data.shape[1]) # Distributions of features st.subheader("Plot distribution of feature") x_options = [ 'year', 'mileage', 'volume', 'price', 'body', 'transmission', 'wheel', 'drive', 'fuel' ] x_axis = st.selectbox('Which feature do you want to explore?', x_options) fig = px.histogram(data, x=x_axis) st.plotly_chart(fig) # Scatter plot with price st.subheader("Scatter plot") x_options = ['year', 'mileage', 'volume', 'price'] dim = st.multiselect('Which feature do you want to explore?', x_options, default=['price', 'year']) fig2 = px.scatter_matrix(data, dimensions=dim) st.plotly_chart(fig2)
def update_graph1(selected_values): if len(selected_values) > 0: fig1 = px.line(toyo_cropnavi, x=toyo_cropnavi.index, y=selected_values) selected_df = toyo_cropnavi[selected_values] fig2 = px.scatter_matrix(selected_df) fig3 = px.imshow(selected_df.corr(), title="データ間の相関") return fig1, fig2, fig3 raise PreventUpdate
def scatter_matrix(self, form): fig = px.scatter_matrix(self.df, dimensions=form['dimensions'], color=form['color'], width=1400, height=800) return fig
def test_scatter_matrix_plot(self, data: pd.DataFrame): crypto_comp = pd.concat( [data[stock]["Open"] for stock in params.get("STOCK_CODES")], axis=1 ) crypto_comp.columns = [ f"{stock.capitalize()} Open" for stock in params.get("STOCK_CODES") ] fig = px.scatter_matrix(crypto_comp) fig.show()
def g4(batch_id, x, y, z, color, colorset): input_model = models.BatchInput(batch_id) df = input_model.as_pandas_dataframe() fig = px.scatter_matrix(df, dimensions=[x, y, z], color=color, color_continuous_scale=get_colorset(colorset)) div = opy.plot(fig, auto_open=False, output_type='div') return div
def scatterMatrix(self, df): """Create and show a scatter matrix with the given columns.""" # remove 0s ldf = df[columns].dropna() # create scatter matrix and show fig = px.scatter_matrix(ldf) fig.show()
def plot_corr_mat(df, sentiment_col): df = df.rename(columns={sentiment_col: 'sentiment'}) fig = px.scatter_matrix( df, dimensions=['sentiment', 'volume', 'cases', 'deaths'], color='country') fig.update_layout(autosize=True, height=500, margin=dict(b=5, t=20, l=5, r=5)) return fig
def display_graph(x1value, x2value, x3value, x4vaue): #On crée la matrice figure = px.scatter_matrix(df, dimensions=[x1value, x2value, x3value, x4value], title="Matrice de Corrélation : " ) figure.update_traces(diagonal_visible=False) #On retourne la matrice return figure
def _make_task_scatter_chart_and_corr(df, color, year, weekdays, categories, hour_interval): df = df.loc[df["year"].isin(year)] df = df.loc[df["category"].isin(categories)] df = df.loc[(df["weekday"].isin(weekdays))] df = df.loc[(df["start_hour"] >= hour_interval[0]) & (df["start_hour"] < hour_interval[1])] jittering_value = 0.35 df["attention_j"] = df["attention_score"].apply( lambda x: x + random.uniform(-jittering_value, jittering_value )) # Jittering df["happy_j"] = df["happy_score"].apply(lambda x: x + random.uniform( -jittering_value, jittering_value)) # Jittering df.dropna(inplace=True) df["size"] = 1 # NOTE: fixed value if len(df) == 0: return {}, {} scatter_matrix_fig = px.scatter_matrix( df, dimensions=["happy_j", "attention_j", "working_hours", "start_hour"], labels=labels, color=color, # color_discrete_map=colors, category_orders={ "category": data_handler.TASK_CATEGORIES, "year": ["2017", "2018", "2019", "2020"], "weekday": list(calendar.day_name), }, size="size", size_max=5, opacity=0.4, hover_data=[ "start_time", "end_time", "description", "attention_score", "happy_score", "working_hours_text" ], height=700, ) corr_df = df[[ "attention_score", "happy_score", "working_hours", "start_hour" ]].corr() corr_fig = px.imshow( corr_df.to_numpy(), x=list(corr_df.columns), y=list(corr_df.columns), labels=labels, color_continuous_scale=px.colors.sequential.Viridis, range_color=[-1, 1], ) return scatter_matrix_fig, corr_fig
def plot_scatter_matrix(n_clicks, data, columns, selected, ndxs): if n_clicks is None: raise PreventUpdate df = pd.DataFrame(data) df["DateAcquired"] = pd.to_datetime(df["DateAcquired"]) numeric_columns = df.select_dtypes(include=np.number).columns n_dimensions = len(numeric_columns) if ndxs is not None: df = df.reindex(ndxs) fig = px.scatter_matrix(df, dimensions=columns) fig.update_layout( autosize=True, height=1200, showlegend=False, margin=dict(l=50, r=10, b=200, t=50, pad=0), hovermode="closest", ) config = T.gen_figure_config(filename="PQC-scatter-matrix") marker_color = df["Use Downstream"].replace({ True: C.colors["use_downstream"], False: C.colors["dont_use_downstream"] }) marker_line_color = df["Flagged"].replace({ True: C.colors["flagged"], False: C.colors["not_flagged"] }) marker_symbol = [0] * len(df) for i, ndx in enumerate(ndxs): if ndx in selected: marker_color[i] = C.colors["selected"] marker_symbol[i] = 1 fig.update_traces( marker_symbol=marker_symbol, marker_line_color=marker_line_color, marker_line_width=2, opacity=0.8, ) fig.update_traces(marker_color=marker_color) fig.update_traces(marker_size=20) return fig, config
def num_analysis(n_clicks, input, *args): if input is None or input == '': return html.H5('分析方法を選択し、実行を押してください') else: if input == 'AAA': fig = px.scatter_matrix(use_df, dimensions=num_cols, color=target_column) return dcc.Graph(figure=fig) else: return html.H5('分析方法を選択し、実行を押してください')
def scatter_matrix(df,dimensions=['SEXO','EDAD'],color='MUNICIPIO'): fig = px.scatter_matrix(df, dimensions=dimensions, color=color,#symbol="MUNICIPIO", color_continuous_scale=px.colors.diverging.Temps,#Temps Tropic title="Scatter matrix", labels={col:col.replace('_', ' ') for col in df[dimensions].columns}) # remove underscore fig.update_traces(diagonal_visible=False) return fig
def num_analysis(n_clicks, input, *args): if input is None or input =='': return html.H5('分析方法を選択し、実行を押してください') elif use_df is None: return html.H5('目的変数が選択されていません。') else: # 統計量一覧表の描画 if input == 'AAA': describe = processed_df[num_cols].describe().round(4).reset_index() return [dash_table.DataTable( column_selectable='multi', fixed_rows={'headers': True, 'data': 0}, data=df_processor(describe)[0], columns=df_processor(describe)[1], style_table={ 'overflowX': 'scroll', 'overflowY': 'scroll', 'maxHeight': '350px', 'maxWidht': '800px' }, style_header={ 'fontWeight': 'bold', 'textAlign': 'center' } ), html.Br()] # ペアプロットの描画 elif input == 'BBB': fig = px.scatter_matrix( processed_df, dimensions=num_cols, color=target_column ) fig.update_layout( dragmode='select', width=1000, height=600, hovermode='closest', ) return [dcc.Graph(figure=fig), html.Br()] # 相関係数(ヒートマップ)の描画 elif input == 'CCC': corr = processed_df[num_cols].corr().round(4) fig = ff.create_annotated_heatmap( z=corr.values, x=list(corr.columns), y=list(corr.index), colorscale='Oranges', hoverinfo='none' ) return [dcc.Graph(figure=fig), html.Br()]
def plot_pair_plots(data, outfile, folder_path, dimensions_, labels_=None, color_=None): fig2 = px.scatter_matrix(data, dimensions=dimensions_, labels=labels_, color=color_) fig2.write_html(os.path.join(folder_path, f"{outfile}.html"), auto_open=False)
def update_graph2(selected_values, radio_value): if len(selected_values) > 0: if radio_value == "10m": selected_df = fire_rain[fire_rain["place"].isin(selected_values)] pivot_df = selected_df.pivot_table(values="value", columns=["place"], index="dt") pivot_corr = pivot_df.corr() fig3 = px.line(selected_df, x="dt", y="value", color="place") fig4 = px.scatter_matrix(pivot_df) fig5 = px.imshow(pivot_corr) return fig3, fig4, fig5 elif radio_value == "daily": data1, data2 = make_daily_data(fire_rain) selected_df = data2[data2["place"].isin(selected_values)] matrix_data = data1[selected_values] fig6 = px.line(selected_df, x="dt", y="value", color="place") fig7 = px.scatter_matrix(matrix_data) fig8 = px.imshow(matrix_data.corr(), title="データ間の相関") return fig6, fig7, fig8 raise PreventUpdate
def component_matrix(num_components=2, color_map="Item"): labels = { str(i): f"PC {i+1} ({var:.1f}%)" for i, var in enumerate(pca.explained_variance_ratio_ * 100) } fig = px.scatter_matrix(components, labels=labels, dimensions=range(num_components), color=df_categorical[color_map]) fig.update_traces(diagonal_visible=False) fig.show()
def test_returns_scatter_matrix_plot(self, stock_data_returns: pd.DataFrame): returns_comp = pd.concat( [ stock_data_returns.query(f'stock_name=="{stock}"')["returns"] for stock in params.get("STOCK_CODES") ], axis=1 ) returns_comp.columns = [ f"{stock.capitalize()} returns" for stock in params.get("STOCK_CODES") ] fig = px.scatter_matrix(returns_comp) fig.show()
def get_html(cfg, mode=None): df = ScatterMatrix.compute() fig = px.scatter_matrix(df, dimensions=["Verhalten", "Kompetenz", "Information", "Vertrauen"], color="Rolle-Kontext", size="Anzahl") fig.update_traces(diagonal_visible=False) fig.update_layout( title="Zufriedenheit unter den Teilnehmern", paper_bgcolor='rgb(243, 243, 243)', plot_bgcolor='rgb(243, 243, 243)' ) return fig.to_html(**cfg)
def Principal_Comp_Reg(df, features, Y, Standardize=False, n_components=2): pca = PCA(n_components=n_components) # Separating out the features x = df.loc[:, features].values # Separating out the target y = df.loc[:, [Y]].values if Standardize: # Standardizing the features x = StandardScaler().fit_transform(x) x = pca.fit_transform(x) principalComponents = np.hstack((x, y)) principaldf = pd.DataFrame(principalComponents, columns=[f"P{i}" for i in range(n_components)] + [Y]) labels = { str(i): f"PC {i+1} ({var:.1f}%)" for i, var in enumerate(pca.explained_variance_ratio_ * 100) } fig = px.scatter_matrix(principalComponents, labels=labels, dimensions=range(3), color=df[Y], title="PCA of X+y") fig.update_traces(diagonal_visible=False) fig.show() recons = pca.inverse_transform(x) reconsdf = pd.DataFrame(recons, columns=features) fig = px.scatter_matrix(principalComponents, labels=labels, dimensions=range(3), color=df[Y], title="Inverse PCA of X+y") fig.update_traces(diagonal_visible=False) fig.show() return principaldf, pca.explained_variance_ratio_, reconsdf