def return_figures(): """Creates four plotly visualizations Args: None Returns: list (dict): list containing the four plotly visualizations """ x0 = data.loc[data.target == 0, 'age'] x1 = data.loc[data.target == 1, 'age'] group_labels = ["Heart Disease", "No Heart Disease"] fig_one = ff.create_distplot([x0, x1], group_labels, bin_size=3, show_rug=False) fig_one.update_traces(opacity=0.55) fig_one.update_layout( title='Probability Density of Age versus Presence of Heart Disease', xaxis=dict(title='Age'), yaxis=dict(title='Probability Density')) fig_two = plotly_corr_heatmap(data, show_diagonal=False) fig_two.update_layout(title="Features Correlation Matrix") # third chart plots percent of population that is rural from 1990 to 2015 proportions = data.groupby(['target', 'sex']).size().reset_index() proportions.columns = ['target', 'sex', 'number'] proportions['totalsex'] = proportions.groupby('sex').number.transform( 'sum') proportions['proportion'] = proportions['number'] / proportions['totalsex'] # turn it into a categorical variable proportions['sex'] = proportions['sex'].map(sex_map) proportions['target'] = proportions['target'].map(target_map) fig_three = px.bar(proportions, x='sex', y='proportion', color='target', barmode='group') fig_three.update_layout(title='Proportion of Sex with Heart Disease') # fourth chart shows rural population vs arable land fig_four = px.scatter_3d(data.loc[data.sex == 1], x="chol", y="trestbps", z="thalach", color="target_name") fig_four.update_layout( showlegend=False, title= 'Max Heart Rate Achieved (Thalach), Cholestoral, and Resting Blood Pressure (trestbps) in Males' ) fig_four.update_traces(opacity=0.75, marker=dict(size=5)) scatter_columns = list(data.drop('target', axis=1).columns) fig_five = px.scatter_matrix( data, dimensions=scatter_columns, color='target_name', symbol='target_name', title='Scatter matrix for Heart Disease Dataset') fig_five.update_traces(diagonal_visible=False, opacity=0.15) # fig_five.update_layout(xaxis=dict(shoticklabels=False)) # append all charts to the figures list figures = [] #figures.append(dict(data=graph_one, layout=layout_one)) figures.append(fig_one) figures.append(fig_two) figures.append(fig_three) figures.append(fig_four) #figures.append(dict(data=graph_two, layout=layout_two)) #figures.append(dict(data=graph_three, layout=layout_three)) #figures.append(dict(data=graph_four, layout=layout_four)) #figures.append(dict(data=graph_five, layout=layout_five)) figures.append(fig_five) return figures
from sklearn.manifold import TSNE import plotly.express as px df = px.data.iris() features = df.loc[:, :'petal_width'] tsne = TSNE(n_components=3, random_state=0) projections = tsne.fit_transform(features, ) fig = px.scatter_3d(projections, x=0, y=1, z=2, color=df.species, labels={'color': 'species'}) fig.update_traces(marker_size=8) fig.write_html("./tsne_example.html")
#plt.plot(ref_points[:, 0], ref_points[:, 1], 'o', color=my_gray, markersize=10, markeredgecolor='none', # label='reference points' if i == 0 else '') # draw dashed lines between reference and transformed points #for i in range(n): # plt.plot([ref_points[i, 0], tsr_points[i, 0]], [ref_points[i, 1], tsr_points[i, 1]], '--', color=colors[i]) #plt.legend(numpoints=1) #plt.savefig('pointset_registration_2.png', format='png') # compute the affine transform from the point set translation, transformation = compute_affine_transform(ref_points, tsr_points) invt = np.linalg.inv(transformation) offset = -np.dot(invt, translation) ref_centroid = np.mean(ref_points, axis=0) tsr_centroid = np.mean(tsr_points, axis=0) new_points = np.empty_like(ref_points) for i in range(n): new_points[i] = ref_centroid + np.dot(transformation, tsr_points[i] - tsr_centroid) print( 'point %d will move to (%3.1f, %3.1f, %3.1f) to be compared with (%3.1f, %3.1f, %3.1f)' % (i, new_points[i, 0], new_points[i, 1], new_points[i, 2], ref_points[i, 0], ref_points[i, 1], ref_points[i, 2])) #plt.plot(new_points[i, 0], new_points[i, 1], 'x', color=colors[i], markersize=12, # label='new points' if i == 0 else '') #plt.legend(numpoints=1) #plt.savefig('pointset_registration_3.png', format='png') #plt.show() df = pd.DataFrame(new_points, columns=['x', 'y', 'section']) fig = px.scatter_3d(df, x='x', y='y', z='section', color='section') fig.show()
df['Label'].unique() #df['Label'] names_numeric = df['Label'].unique() print(names_numeric) df_labels = df[['Label']] df = df.drop(['Label'], axis=1) y = df_labels.values from sklearn import preprocessing X = preprocessing.scale(df) print(df) from sklearn.decomposition import PCA pca = PCA(n_components=3) principalComponents = pca.fit_transform(X) principalDf = pd.DataFrame(data=principalComponents, columns=['pc1', 'pc2', 'pc3']) x = principalDf.values print(pca.explained_variance_ratio_) final = np.column_stack((x, y)) final_df = pd.DataFrame(final, columns=['pc1', 'pc2', 'pc3', 'labels']) print(names) print(names_numeric) import plotly.express as px import plotly fig = px.scatter_3d(final_df, x='pc1', y='pc2', z='pc3', color='labels') plotly.offline.plot(fig, "3d.html")
def build_plot(is_anim, plot_type, df, progress=None, **kwargs) -> dict: params = dict(**kwargs) for k, v in params.items(): if v == amp_consts.NONE_SELECTED: params[k] = filter_none(params[k]) num_columns = df.select_dtypes(include=[np.number]).columns.to_list() if is_anim: time_column = params.pop("time_column", "") if ( time_column in df.select_dtypes( include=[np.datetime64, "datetime", "datetime64", "datetime64[ns, UTC]"] ).columns.to_list() ): df["time_step"] = df[time_column].dt.strftime("%Y/%m/%d %H:%M:%S") afc = "time_step" else: afc = time_column params["animation_frame"] = afc df = df.sort_values([afc]) if plot_type not in [ amp_consts.PLOT_PCA_3D, amp_consts.PLOT_PCA_2D, amp_consts.PLOT_PCA_SCATTER, amp_consts.PLOT_LDA_2D, amp_consts.PLOT_QDA_2D, amp_consts.PLOT_NCA, ]: x = params.get("x") params["range_x"] = ( None if x not in num_columns else [df[x].min(), df[x].max()] ) y = params.get("y") params["range_y"] = ( None if y not in num_columns else [df[y].min(), df[y].max()] ) if plot_type in [amp_consts.PLOT_SCATTER_3D, amp_consts.PLOT_PCA_3D]: z = params.get("z") params["range_z"] = ( None if z not in num_columns else [df[z].min(), df[z].max()] ) params["data_frame"] = df fig = None model_data = None column_names = None class_names = None if plot_type == amp_consts.PLOT_SCATTER: fig = px.scatter(**params) elif plot_type == amp_consts.PLOT_SCATTER_3D: fig = px.scatter_3d(**params) elif plot_type == amp_consts.PLOT_LINE: fig = px.line(**params) elif plot_type == amp_consts.PLOT_BAR: fig = px.bar(**params) elif plot_type == amp_consts.PLOT_HISTOGRAM: if "orientation" in params and params.get("orientation") == "h": params["x"], params["y"] = None, params["x"] fig = px.histogram(**params) elif plot_type == amp_consts.PLOT_BOX: fig = px.box(**params) elif plot_type == amp_consts.PLOT_VIOLIN: fig = px.violin(**params) elif plot_type == amp_consts.PLOT_DENSITY_HEATMAP: fig = px.density_heatmap(**params) elif plot_type == amp_consts.PLOT_DENSITY_CONTOUR: fc = params.pop("fill_contours") is True fig = px.density_contour(**params) if fc: fig.update_traces(contours_coloring="fill", contours_showlabels=True) elif plot_type == amp_consts.PLOT_PARALLEL_CATEGORIES: fig = px.parallel_categories(**params) elif plot_type == amp_consts.PLOT_PARALLEL_COORDINATES: fig = px.parallel_coordinates(**params) elif plot_type == amp_consts.PLOT_SCATTER_MATRIX: fig = make_subplots( rows=len(num_columns), cols=len(num_columns), shared_xaxes=True, row_titles=num_columns, ) color_column = params.get("color") if color_column is not None: template_colors = pio.templates[params.get("template")].layout["colorway"] if template_colors is None: template_colors = pio.templates[pio.templates.default].layout["colorway"] color_count = len(df[color_column].unique()) if len(template_colors) >= color_count: pass else: template_colors = np.repeat( template_colors, (color_count // len(template_colors)) + 1 ) template_colors = template_colors[:color_count] else: template_colors = 0 legend_added = False step = 0 total = len(num_columns) ** 2 matrix_diag = params["matrix_diag"] matrix_up = params["matrix_up"] matrix_down = params["matrix_down"] for i, c in enumerate(num_columns): for j, l in enumerate(num_columns): progress(step, total) step += 1 if i == j: if matrix_diag == "Nothing": continue elif matrix_diag == "Histogram": mtx_plot_kind = "Histogram" else: mtx_plot_kind = "Scatter" else: if ( (i == j) or (i > j and matrix_up == "Scatter") or (i < j and matrix_down == "Scatter") ): mtx_plot_kind = "Scatter" elif (i > j and matrix_up == "Nothing") or ( i < j and matrix_down == "Nothing" ): continue elif (i > j and matrix_up == "2D histogram") or ( i < j and matrix_down == "2D histogram" ): mtx_plot_kind = "2D histogram" else: mtx_plot_kind = "Error" if isinstance(template_colors, int) or mtx_plot_kind == "2D histogram": if mtx_plot_kind == "Histogram": add_histogram(fig=fig, x=df[c], index=i + 1) elif mtx_plot_kind == "Scatter": add_scatter( fig=fig, x=df[c], y=df[l], row=j + 1, col=i + 1, ) elif mtx_plot_kind == "2D histogram": add_2d_hist(fig=fig, x=df[c], y=df[l], row=j + 1, col=i + 1) else: for color_parse, cat in zip( template_colors, df[color_column].unique() ): df_cat = df[df[color_column] == cat] if mtx_plot_kind == "Histogram": add_histogram( fig=fig, x=df_cat[c], index=i + 1, name=cat, marker=color_parse, legend=not legend_added, ) elif mtx_plot_kind == "Scatter": add_scatter( fig=fig, x=df_cat[c], y=df_cat[l], row=j + 1, col=i + 1, name=cat, marker=color_parse, legend=not legend_added, ) legend_added = True fig.update_xaxes( title_text=c, row=j + 1, col=i + 1, ) if c == 0: fig.update_yaxes( title_text=l, row=j + 1, col=i + 1, ) fig.update_layout(barmode="stack") elif plot_type in [ amp_consts.PLOT_PCA_2D, amp_consts.PLOT_PCA_3D, amp_consts.PLOT_PCA_SCATTER, ]: X = df.loc[:, num_columns] ignored_columns = params.pop("ignore_columns", []) if ignored_columns: X = X.drop( list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1 ) column_names = X.columns.to_list() scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) model_data = PCA() x_new = model_data.fit_transform(X) pc1_lbl = f"PC1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)" pc2_lbl = f"PC2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)" x = x_new[:, 0] y = x_new[:, 1] df[pc1_lbl] = x * (1.0 / (x.max() - x.min())) df[pc2_lbl] = y * (1.0 / (y.max() - y.min())) params["x"] = pc1_lbl params["y"] = pc2_lbl if is_anim: params["range_x"] = [-1, 1] params["range_y"] = [-1, 1] try: sl = params.pop("show_loadings") is True except: sl = None if plot_type in [amp_consts.PLOT_PCA_3D]: z = x_new[:, 2] pc3_lbl = f"PC3 ({model_data.explained_variance_ratio_[2] * 100:.2f}%)" df[pc3_lbl] = z * (1.0 / (z.max() - z.min())) params["z"] = pc3_lbl if is_anim: params["range_z"] = [-1, 1] fig = px.scatter_3d(**params) if sl: loadings = np.transpose(model_data.components_[0:3, :]) m = 1 / np.amax(loadings) loadings = loadings * m xc, yc, zc = [], [], [] for i in range(loadings.shape[0]): xc.extend([0, loadings[i, 0], None]) yc.extend([0, loadings[i, 1], None]) zc.extend([0, loadings[i, 2], None]) fig.add_trace( go.Scatter3d( x=xc, y=yc, z=zc, mode="lines", name="Loadings", showlegend=False, line=dict(color="black"), opacity=0.3, ) ) fig.add_trace( go.Scatter3d( x=loadings[:, 0], y=loadings[:, 1], z=loadings[:, 2], mode="text", text=num_columns, opacity=0.7, name="Loadings", ), ) elif plot_type in [amp_consts.PLOT_PCA_3D]: fig = px.scatter(**params) if sl: loadings = np.transpose(model_data.components_[0:2, :]) m = 1 / np.amax(loadings) loadings = loadings * m xc, yc = [], [] for i in range(loadings.shape[0]): xc.extend([0, loadings[i, 0], None]) yc.extend([0, loadings[i, 1], None]) fig.add_trace( go.Scatter( x=xc, y=yc, mode="lines", name="Loadings", showlegend=False, line=dict(color="black"), opacity=0.3, ) ) fig.add_trace( go.Scatter( x=loadings[:, 0], y=loadings[:, 1], mode="text", text=column_names, opacity=0.7, name="Loadings", ), ) elif plot_type in [amp_consts.PLOT_PCA_SCATTER]: l = lambda x, y: x == y params_ = { "data_frame": x_new, "labels": {str(i): f"PC {i+1}" for i in range(x_new.shape[1] - 1)}, } if params["color"] is not None: params_["color"] = df[params["color"]] if params["dimensions"] is not None: params_["dimensions"] = range( min( params["dimensions"], x_new.shape[1] - 1, ) ) if is_anim: params_["range_x"] = [-1, 1] params_["range_y"] = [-1, 1] fig = px.scatter_matrix(**params_) fig.update_traces(diagonal_visible=False) elif plot_type in [amp_consts.PLOT_LDA_2D, amp_consts.PLOT_QDA_2D]: X = df.loc[:, num_columns] ignored_columns = params.pop("ignore_columns", []) if ignored_columns: X = X.drop( list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1 ) column_names = X.columns.to_list() if params["target"] in df.select_dtypes(include=["object"]).columns.to_list(): t = df[params["target"]].astype("category").cat.codes elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list(): t = df[params["target"]].astype("int") else: t = df[params["target"]] class_names = df[params["target"]].unique() scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) if plot_type == amp_consts.PLOT_LDA_2D: model_data = LinearDiscriminantAnalysis(solver=params.pop("solver", "svd")) elif plot_type == amp_consts.PLOT_QDA_2D: model_data = QuadraticDiscriminantAnalysis(store_covariance=True) x_new = model_data.fit(X, y=t).transform(X) label_root = "LD" if plot_type == amp_consts.PLOT_LDA_2D else "QD" pc1_lbl = f"{label_root}1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)" pc2_lbl = f"{label_root}2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)" x = x_new[:, 0] y = x_new[:, 1] df[pc1_lbl] = x / np.abs(x).max() df[pc2_lbl] = y / np.abs(y).max() params["x"] = pc1_lbl params["y"] = pc2_lbl if is_anim: params["range_x"] = [-1, 1] params["range_y"] = [-1, 1] params.pop("target") sl = params.pop("show_loadings") is True fig = px.scatter(**params) if sl: loadings = np.transpose(model_data.coef_[0:2, :]) loadings[:, 0] = loadings[:, 0] / np.abs(loadings[:, 0]).max() loadings[:, 1] = loadings[:, 1] / np.abs(loadings[:, 1]).max() # m = 1 / np.amax(loadings) # loadings = loadings * m xc, yc = [], [] for i in range(loadings.shape[0]): xc.extend([0, loadings[i, 0], None]) yc.extend([0, loadings[i, 1], None]) fig.add_trace( go.Scatter( x=xc, y=yc, mode="lines", name="Loadings", showlegend=False, line=dict(color="black"), opacity=0.3, ) ) fig.add_trace( go.Scatter( x=loadings[:, 0], y=loadings[:, 1], mode="text", text=column_names, opacity=0.7, name="Loadings", ), ) elif plot_type in [amp_consts.PLOT_NCA]: X = df.loc[:, num_columns] ignored_columns = params.pop("ignore_columns", []) if ignored_columns: X = X.drop( list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1 ) column_names = X.columns.to_list() if params["target"] in df.select_dtypes(include=["object"]).columns.to_list(): t = df[params["target"]].astype("category").cat.codes elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list(): t = df[params["target"]].astype("int") else: t = df[params["target"]] class_names = df[params["target"]].unique() scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) model_data = NeighborhoodComponentsAnalysis( init=params.pop("init", "auto"), n_components=min(len(column_names), params.pop("n_components", 2)), ) x_new = model_data.fit(X, y=t).transform(X) df["x_nca"] = x_new[:, 0] df["y_nca"] = x_new[:, 1] params["x"] = "x_nca" params["y"] = "y_nca" if is_anim: params["range_x"] = [-1, 1] params["range_y"] = [-1, 1] params.pop("target") fig = px.scatter(**params) elif plot_type == amp_consts.PLOT_CORR_MATRIX: fig = px.imshow( df[num_columns].corr(method=params.get("corr_method")).values, x=num_columns, y=num_columns, ) else: fig = None if plot_type in amp_consts.PLOT_IS_3D: fig.update_layout(scene={"aspectmode": "cube"}) if fig is not None: fig.update_layout( height=params["height"], template=params["template"], legend={"traceorder": "normal"}, ) if ("size" not in params) or ( (params["size"] is None) or (params["size"] == amp_consts.NONE_SELECTED) ): fig.update_traces( marker=dict( size=8, line=dict(width=2), # color="DarkSlateGrey"), opacity=0.7, ), selector=dict(mode="markers"), ) return { k: v for k, v in zip( ["figure", "model_data", "column_names", "class_names"], [fig, model_data, column_names, class_names], ) if v is not None }
'y' : output[:,1]*1000, 'z' : output[:,2]*-1000, 'Sdirect': output[:,3], 'Smises' : output[:,4]/1e9 }) print("Runtime: %f seconds" % (time.time()-start_time)) Smises_max_loc_model = dfmodel.loc[dfmodel['Smises']==dfmodel.max()['Smises']][['x','y','z','Smises']] Smises_max_loc_valid = dfjambent.loc[dfjambent['Smises']==dfjambent.max()['Smises']][['x','y','z','Smises']] print('Max von Mises stress numerical model = ','\n','\n',Smises_max_loc_model,'\n') print('Max von Mises stress validation model = ','\n','\n',Smises_max_loc_valid,'\n') fig = px.scatter_3d(dfmodel, x='x', y='y', z='z', color='Smises') fig.update_layout(title='Von mises stresses numerical model') fig.show() fig = px.scatter_3d(dfjambent, x='x', y='y', z='z', color='Smises') fig.update_layout(title='Von mises stresses validation model') fig.show() #HL_dy_bending = dfbending[dfbending.z==0] #HL_dy_bending = HL_dy_bending[HL_dy_bending.y==0] #HL_dy_bending = HL_dy_bending.sort_values(by=['x']) # sort the nodes by x value (necessary for plotting) # # #HL_dy_jambent = dfjambent[dfjambent.z==0] #HL_dy_jambent = HL_dy_jambent[HL_dy_jambent.y==0] #HL_dy_jambent = HL_dy_jambent.sort_values(by=['x']) # sort the nodes by x value (necessary for plotting)
def scatterplot( df: pd.DataFrame, col: str, color: str = None, hover_name: str = None, hover_data: [] = None, title="", return_figure=False, ): """ Show scatterplot of DataFrame column using python plotly scatter. Plot the values in column col. For example, if every cell in df[col] is a list of three values (e.g. from doing PCA with 3 components), a 3D-Plot is created and every cell entry [x, y, z] is visualized as the point (x, y, z). Parameters ---------- df: DataFrame with a column to be visualized. col: str The name of the column of the DataFrame to use for x and y (and z) axis. color: str, optional, default=None Name of the column to use for coloring (rows with same value get same color). hover_name: str, optional, default=None Name of the column to supply title of hover data when hovering over a point. hover_data: List[str], optional, default=[] List of column names to supply data when hovering over a point. title: str, default to "". Title of the plot. return_figure: bool, optional, default=False Function returns the figure instead of showing it if set to True. Examples -------- >>> import texthero as hero >>> import pandas as pd >>> df = pd.DataFrame(["Football, Sports, Soccer", ... "music, violin, orchestra", "football, fun, sports", ... "music, fun, guitar"], columns=["texts"]) >>> df["texts"] = hero.clean(df["texts"]).pipe(hero.tokenize) >>> df["pca"] = ( ... hero.tfidf(df["texts"]) ... .pipe(hero.pca, n_components=3) ... ) >>> df["topics"] = ( ... hero.tfidf(df["texts"]) ... .pipe(hero.kmeans, n_clusters=2) ... ) >>> hero.scatterplot(df, col="pca", color="topics", ... hover_data=["texts"]) # doctest: +SKIP """ plot_values = np.stack(df[col], axis=1) dimension = len(plot_values) if dimension < 2 or dimension > 3: raise ValueError( "The column you want to visualize has dimension < 2 or dimension > 3." " The function can only visualize 2- and 3-dimensional data.") if dimension == 2: x, y = plot_values[0], plot_values[1] fig = px.scatter( df, x=x, y=y, color=color, hover_data=hover_data, title=title, hover_name=hover_name, ) else: x, y, z = plot_values[0], plot_values[1], plot_values[2] fig = px.scatter_3d( df, x=x, y=y, z=z, color=color, hover_data=hover_data, title=title, hover_name=hover_name, ) if return_figure: return fig else: fig.show()
def plot_3d_diffusion_embedding(x1, x2, x3): fig = px.scatter_3d(x=x1, y=x2, z=x3) fig.show(renderer='iframe')
def viz_streamlit_entity_embed_manifold( pipe, # nlu component_list default_texts: List[str] = ("Donald Trump likes to visit New York", "Angela Merkel likes to visit Berlin!", 'Peter hates visiting Paris'), title: Optional[str] = "Lower dimensional Manifold visualization for Entity embeddings", sub_title: Optional[str] = "Apply any of the 10+ `Manifold` or `Matrix Decomposition` algorithms to reduce the dimensionality of `Entity Embeddings` to `1-D`, `2-D` and `3-D` ", default_algos_to_apply: List[str] = ("TSNE", "PCA"), target_dimensions: List[int] = (1, 2, 3), show_algo_select: bool = True, set_wide_layout_CSS: bool = True, num_cols: int = 3, model_select_position: str = 'side', # side or main key: str = "NLU_streamlit", show_infos: bool = True, show_logo: bool = True, n_jobs: Optional[int] = 3, # False ): from nlu.pipe.viz.streamlit_viz.streamlit_utils_OS import StreamlitUtilsOS StreamlitVizTracker.footer_displayed = False try: import plotly.express as px from sklearn.metrics.pairwise import distance_metrics except: st.error( "You need the sklearn and plotly package in your Python environment installed for similarity visualizations. Run <pip install sklearn plotly>") if show_logo: StreamlitVizTracker.show_logo() if set_wide_layout_CSS: _set_block_container_style() if title: st.header(title) if sub_title: st.subheader(sub_title) # if show_logo :VizUtilsStreamlitOS.show_logo() # VizUtilsStreamlitOS.loaded_word_embeding_pipes = [] if isinstance(default_texts, list) : default_texts = '\n'.join(default_texts) data = st.text_area('Enter N texts, seperated by new lines to visualize Sentence Embeddings for ', default_texts).split('\n') output_level = 'chunk' ner_emebed_pipe_algo_selection = [] loaded_ner_embed_nlu_refs = [] algos = ['TSNE'] # A component_list should have a NER and a Word Embedding if pipe not in StreamlitVizTracker.loaded_ner_word_embeding_pipes: StreamlitVizTracker.loaded_ner_word_embeding_pipes.append( pipe) if pipe not in StreamlitVizTracker.loaded_word_embeding_pipes: StreamlitVizTracker.loaded_word_embeding_pipes.append( pipe) if show_algo_select: # Manifold Selection exp = st.expander("Select additional manifold and dimension reduction techniques to apply") algos = exp.multiselect( "Reduce embedding dimensionality to something visualizable", options=( "TSNE", "ISOMAP", 'LLE', 'Spectral Embedding', 'MDS', 'PCA', 'SVD aka LSA', 'DictionaryLearning', 'FactorAnalysis', 'FastICA', 'KernelPCA', 'LatentDirichletAllocation'), default=default_algos_to_apply, ) ner_emb_components_usable = [e for e in Discoverer.get_components('ner', True, include_aliases=True) if 'embed' not in e and 'sentence' not in e] # Find nlu_ref of currenlty loaded component_list for p in StreamlitVizTracker.loaded_ner_word_embeding_pipes: loaded_ner_embed_nlu_refs.append(p.nlu_ref) # NER Selection if model_select_position == 'side': ner_emebed_pipe_algo_selection = st.sidebar.multiselect( "Pick additional NER Models for the Dimension Reduction", options=ner_emb_components_usable, default=loaded_ner_embed_nlu_refs, key=key) else: ner_emebed_pipe_algo_selection = exp.multiselect( "Pick additional NER Models for the Dimension Reduction", options=ner_emb_components_usable, default=loaded_ner_embed_nlu_refs, key=key) for ner_nlu_ref in ner_emebed_pipe_algo_selection: load = True for ner_p in StreamlitVizTracker.loaded_ner_word_embeding_pipes: if ner_p.nlu_ref == ner_nlu_ref: load = False break if not load: continue p = nlu.load(ner_nlu_ref) if p not in StreamlitVizTracker.loaded_ner_word_embeding_pipes: StreamlitVizTracker.loaded_ner_word_embeding_pipes.append( p) if p not in StreamlitVizTracker.loaded_word_embeding_pipes: StreamlitVizTracker.loaded_word_embeding_pipes.append( p) col_index = 0 cols = st.columns(num_cols) def are_cols_full(): return col_index == num_cols for p in StreamlitVizTracker.loaded_ner_word_embeding_pipes: p = EntityManifoldUtils.insert_chunk_embedder_to_pipe_if_missing(p) predictions = p.predict(data, metadata=True, output_level=output_level, multithread=False).dropna() entity_cols = EntityManifoldUtils.get_ner_cols(predictions) chunk_embed_col = EntityManifoldUtils.find_chunk_embed_col(predictions) # TODO get cols for non default NER? or multi ner setups? # features = predictions[EntityManifoldUtils.get_ner_cols(predictions)] # e_col = StreamlitUtilsOS.find_embed_col(predictions) e_com = StreamlitUtilsOS.find_embed_component(p) e_com_storage_ref = StorageRefUtils.extract_storage_ref(e_com) emb = predictions[chunk_embed_col] mat = np.array([x for x in emb]) # for ner_emb_p in ps: for algo in algos: # Only pos values for latent Dirchlet if algo == 'LatentDirichletAllocation': mat = np.square(mat) if len(mat.shape) > 2: mat = mat.reshape(len(emb), mat.shape[-1]) hover_data = entity_cols + ['text'] # calc reduced dimensionality with every algo feature_to_color_by = entity_cols[0] if 1 in target_dimensions: low_dim_data = StreamlitUtilsOS.get_manifold_algo(algo, 1, n_jobs).fit_transform(mat) x = low_dim_data[:, 0] y = np.zeros(low_dim_data[:, 0].shape) # predictions['text'] = original_text tsne_df = pd.DataFrame({**{'x': x, 'y': y}, **{k: predictions[k] for k in entity_cols}, **{'text': predictions[entity_cols[-1]]} }) fig = px.scatter(tsne_df, x="x", y="y", color=feature_to_color_by, hover_data=hover_data) subh = f"""Word-Embeddings =`{e_com_storage_ref}`, NER-Model =`{p.nlu_ref}`, Manifold-Algo =`{algo}` for `D=1`""" cols[col_index].markdown(subh) cols[col_index].write(fig, key=key) col_index += 1 if are_cols_full(): cols = st.columns(num_cols) col_index = 0 if 2 in target_dimensions: low_dim_data = StreamlitUtilsOS.get_manifold_algo(algo, 2, n_jobs).fit_transform(mat) x = low_dim_data[:, 0] y = low_dim_data[:, 1] tsne_df = pd.DataFrame({**{'x': x, 'y': y}, **{k: predictions[k] for k in entity_cols}, **{'text': predictions[entity_cols[-1]]} }) fig = px.scatter(tsne_df, x="x", y="y", color=feature_to_color_by, hover_data=hover_data) subh = f"""Word-Embeddings =`{e_com_storage_ref}`, NER-Model =`{p.nlu_ref}`, Manifold-Algo =`{algo}` for `D=2`""" cols[col_index].markdown(subh) cols[col_index].write(fig, key=key) col_index += 1 if are_cols_full(): cols = st.columns(num_cols) col_index = 0 if 3 in target_dimensions: low_dim_data = StreamlitUtilsOS.get_manifold_algo(algo, 3, n_jobs).fit_transform(mat) x = low_dim_data[:, 0] y = low_dim_data[:, 1] z = low_dim_data[:, 2] tsne_df = pd.DataFrame({**{'x': x, 'y': y, 'z': z}, **{k: predictions[k] for k in entity_cols}, **{'text': predictions[entity_cols[-1]]} }) fig = px.scatter_3d(tsne_df, x="x", y="y", z='z', color=feature_to_color_by, hover_data=hover_data) subh = f"""Word-Embeddings =`{e_com_storage_ref}`, NER-Model =`{p.nlu_ref}`, Manifold-Algo =`{algo}` for `D=3`""" cols[col_index].markdown(subh) cols[col_index].write(fig, key=key) col_index += 1 if are_cols_full(): cols = st.columns(num_cols) col_index = 0 # Todo fancy embed infos etc # if display_embed_information: display_embed_vetor_information(e_com,mat) # if display_embed_information: # exp = st.expander("Embedding vector information") # exp.write(embed_vector_info) if show_infos: # VizUtilsStreamlitOS.display_infos() StreamlitVizTracker.display_model_info(pipe.nlu_ref, pipes=[pipe]) StreamlitVizTracker.display_footer()
def visualize(img, outputs, renderer): vert = outputs['verts'][0] cam = outputs['cam_pred'][0] texture = outputs['texture'][0] faces = outputs['faces'][0] shape_pred = renderer(vert, cam) img_pred = renderer(vert, cam, texture=texture) #renderer.saveMesh(vert, texture) vertex_seg_map = torch.argmax(outputs['vertex_seg_map'][0], dim=1).unsqueeze(1).type(torch.FloatTensor) x = torch.cat([vert.cpu(), vertex_seg_map], dim=1) print(outputs['vertex_seg_map'][0].max(1)[0].sum()) df = x.numpy() df = pd.DataFrame(df) fig = px.scatter_3d(df, x=0, y=1, z=2, color=3) fig.write_html("./file.html") print("file.html written") tex_seg = vertex_seg_map.repeat(1, 3) for i in range(642): if (tex_seg[i][0].item() == 0.0): tex_seg[i] = torch.tensor([0.0, 0.0, 1.]) elif (tex_seg[i][0].item() == 1.0): tex_seg[i] = torch.tensor([0.0, 1.0, 0.0]) elif (tex_seg[i][0].item() == 2.0): tex_seg[i] = torch.tensor([0.0, 1.0, 1.0]) elif (tex_seg[i][0].item() == 3.0): tex_seg[i] = torch.tensor([1., 0., 0.]) elif (tex_seg[i][0].item() == 4.0): tex_seg[i] = torch.tensor([1., 0., 1.]) elif (tex_seg[i][0].item() == 5.0): tex_seg[i] = torch.tensor([1., 1., 0.]) elif (tex_seg[i][0].item() == 6.0): tex_seg[i] = torch.tensor([1., 0.5, 0.5]) elif (tex_seg[i][0].item() == 7.0): tex_seg[i] = torch.tensor([0.5, 1, 1]) save_obj("demo_seg.obj", vert, outputs['faces'][0], tex_seg.contiguous(), texture_type='vertex') print("seg_obj file written") # Different viewpoints. vp1 = renderer.diff_vp(vert, cam, angle=30, axis=[0, 1, 0], texture=texture, extra_elev=True) vp2 = renderer.diff_vp(vert, cam, angle=60, axis=[0, 1, 0], texture=texture, extra_elev=True) vp3 = renderer.diff_vp(vert, cam, angle=90, axis=[0, 1, 0], texture=texture) img = np.transpose(img, (1, 2, 0)) import matplotlib.pyplot as plt plt.ion() plt.figure(1) plt.clf() plt.subplot(231) plt.imshow(img) plt.title('input') plt.axis('off') plt.subplot(232) plt.imshow(shape_pred) plt.title('pred mesh') plt.axis('off') plt.subplot(233) plt.imshow(img_pred) plt.title('pred mesh w/texture') plt.axis('off') plt.subplot(234) plt.imshow(vp1) plt.title('different viewpoints') plt.axis('off') plt.subplot(235) plt.imshow(vp2) plt.axis('off') plt.subplot(236) plt.imshow(vp3) plt.axis('off') plt.draw() plt.show() print('saving file to demo_image.png') plt.savefig('demo_image.png')
# %% print(df) # %% import plotly import plotly.graph_objs as go import plotly.express as px from ipywidgets import interact, widgets plotly.offline.init_notebook_mode() # Configure the trace. trace = px.scatter_3d( df, x='X', y='Y', z='Z', color='label', # title="Graph for Layer "+str(layer) ) trace.show() # %% ################# Explore Codebook Summation and Centroids ################# i = 0 channel_labels = {'x', 'y', 'z'} fig = plt.figure(figsize=(40, 40)) for c in enumerate(codebook): color_index = 0
reduced_data = tsne.fit_transform(features) clusters_data = reduced_data else: clusters_data = features k_means = KMeans(n_clusters=K) y = k_means.fit_predict(clusters_data) df['Cluster'] = y df['Cluster'] = df['Cluster'].apply(str) fig = px.scatter_3d(df, x=df['X'], y=df['Y'], z=df['Z'], color=df['Cluster'], width=700, height=700) st.plotly_chart(fig) interpret(df, "k") elif clustering_method == techniques[1]: st.write("Hierarchical results") features = normalize(features) fig = ff.create_dendrogram(features) fig.update_layout(width=800, height=600) st.plotly_chart(fig)
clustered_df = cleaned_crypto_df.merge(pcs_df, on='Unnamed: 0') clustered_df = clustered_df.merge(coins_name, on='Unnamed: 0') clustered_df['Class'] = model.labels_ clustered_df.set_index('Unnamed: 0', drop=True, inplace=True) clustered_df.head(10) # %% [markdown] # # Visualizing Results # %% # 3D scatter plot fig = px.scatter_3d(clustered_df, x='PC 1', y='PC 2', z='PC 3', color='Class', symbol='Class', hover_name='CoinName', hover_data=['Algorithm']) fig.update_layout(legend={'x': 0, 'y': 1}) fig.show() # %% # create a hvplot table for all the current tradable cryptocurrencies obj_table = clustered_df.hvplot.table(columns=[ 'CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'Class' ], width=500) hvplot.show(obj_table)
X_test['y_pred'] = lr.predict(X_test_array) lr.coef_ import plotly.express as px X_train.head() """# Primeira visualização I- Podemos observar na função executada no inÃcio do kernel que a correlação entre a *Temperatura Minima (C)* e a *Precipitacao (mm)* é baixa; porém II- Podemos observar no gráfico abaixo que quanto maior a temperatura e menor o volume de precipitação, ***MAIOR será nosso consumo de cerveja.*** """ fig = px.scatter_3d(X_test, x='Precipitacao (mm)',y='Temperatura Minima (C)',z='y_pred') fig.show() """# Segunda visualização I- Aqui podemos observar que quanto maior for nossa temperatura, ***MAIOR será o consumo de cerveja.*** II- Podemos observar também que o consumo de cerveja é consideravelmente maior nos finais de semana. """ fig = px.scatter_3d(X_test, x='Final de Semana',y='Temperatura Maxima (C)',z='y_pred') fig.show() """# Terceira visualização I- Neste gráfico também podemos observar que quanto maior for nossa temperatura e menor for nosso volume de precipitação, ***MAIOR será o consumo de cerveja.***
x_scaled2 = min_max_scaler.fit_transform(x) df1 = pd.DataFrame(x_scaled2) # ------------------------ k-means Clustering --------------------- kmeans = KMeans(init="k-means++", n_clusters=4, random_state=15, max_iter=500).fit(x_scaled2) df1['kmeans'] = kmeans.labels_ df1.columns = ['energy', 'instrumentalness', 'loudness', 'kmeans'] kmeans = df1['kmeans'] df['kmeans'] = kmeans fig = px.scatter_3d(df, x='energy', y='instrumentalness', z='loudness', color='kmeans') fig.show() # ---------------------------------- Violin Plot for each cluster ------------------ c0 = df1[df1['kmeans'] == 0] c1 = df1[df1['kmeans'] == 1] c2 = df1[df1['kmeans'] == 2] c3 = df1[df1['kmeans'] == 3] # genre =df ['genre'] # c0['genre'] = genre # c1['genre'] = genre # c2['genre'] = genre
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px iris = pd.read_csv("Iris.csv") fig = px.scatter_3d(iris, x='SepalLengthCm', y='SepalWidthCm', z='PetalWidthCm', color='Species') fig.show()
import pandas as pd import plotly.express as px from sklearn.mixture import BayesianGaussianMixture from sklearn.cluster import OPTICS import matplotlib.pyplot as plt dfw = pd.read_csv('./data/wilac_clustering_data.csv') dfw.mean() dfw fig = px.scatter_3d(dfw, x='Gini2016', y='GDPpc2016', z='InsAcceptance', color='quadraticEffect', hover_name='sample', color_continuous_scale='RdBu', color_continuous_midpoint=0, template="plotly_dark", opacity=0.9, title='Scatterplot Quadratic Effect on Willingness \ to Participate in Activism') fig.write_html("./plots/wilacQua.html") dfc = pd.read_csv('./data/colac_clustering_data.csv') dfc.mean() fig = px.scatter_3d(dfc, x='Gini2016', y='GDPpc2016', z='InsAcceptance', color='quadraticEffect',
alphas = [] Res = [] CLs = [] CDs = [] for alpha in alpha_inputs: for Re in Re_inputs: alphas.append(alpha) Res.append(Re) CLs.append(Cl_e216(alpha, Re)) CDs.append(Cd_profile_e216(alpha, Re)) px.scatter_3d(x=alphas, y=Res, z=CLs, size=np.ones_like(alphas), color=CLs, log_y=True, labels={ "x": "alphas", "y": "Re", "z": "CL" }).show() px.scatter_3d(x=alphas, y=Res, z=CDs, size=np.ones_like(alphas), color=CDs, log_y=True, labels={ "x": "alphas", "y": "Re", "z": "CD"
#iris_df['class'] = predictions iris_df.head() # %% [markdown] # data Visualization of results # %% # use hvplot to plot 2D scatter, use bokeh to viz (for web browsers) import holoviews as hv hv.extension('bokeh') iris_df.hvplot.scatter(x='petal_width', y='sepal_length', by='class') hvplot.show( iris_df.hvplot.scatter(x='petal_width', y='sepal_length', by='class')) # %% # use plotly.express library fig = px.scatter_3d(iris_df, x='petal_width', y='sepal_length', z='petal_length', color='class', symbol='class', size='sepal_width', width=800) fig.update_layout(legend=dict( x=0, y=1)) # update the legend position to legend = {'x': 0, 'y':1} fig.show() # %% # %%
df = pd.read_excel('data.xlsx') rands = np.random.normal(0, 0.1, size=(df.shape[0], 3)) num_cols = ['Data Source', 'Target Variable', 'Prediction Type'] df[num_cols] = df[num_cols] + rands fig = px.scatter_3d(df, x='Data Source', y='Target Variable', z='Prediction Type', color='Family', width=1000, height=700, title='Machine Learning Exploration', hover_data={ 'Family': True, 'Technique': True, 'Data Source': False, 'Target Variable': False, 'Prediction Type': False, 'Description': False }, range_x=[-.15, 1.15], range_y=[-.15, 1.15], range_z=[-1.15, 1.15]) fig.update_layout( scene={ 'xaxis': { 'ticktext': ['Unstructured', 'Structured'], 'tickvals': [0, 1],
"y": "Gamma" }).show() # # ER_from_P test chamber_pressure_inputs = np.logspace(5, 6, 200) exit_pressure_inputs = np.logspace(4, 5, 200) ox_for_test = 0 chamber_pressures = [] exit_pressures = [] ers = [] for chamber_pressure in chamber_pressure_inputs: for exit_pressure in exit_pressure_inputs: chamber_pressures.append(chamber_pressure) exit_pressures.append(exit_pressure) ers.append( expansion_ratio_from_pressure(chamber_pressure, exit_pressure, gamma(ox_for_test), ox_for_test)) data = pd.DataFrame({ 'chamber_pressure': chamber_pressures, 'exit_pressure': exit_pressures, 'ers': ers }) px.scatter_3d(data, x='chamber_pressure', y='exit_pressure', z='ers', color='ers', log_x=True, log_y=True, log_z=True).show()
affinity # Display affinity matrix affinity_sym = 0.5*(affinity+affinity.T) # Make 'affinity' symetric by averaging each point affinity_sym # Display symetric affinity matrix # As you can see, the affinity matrix labeled 'affinity' is not symmetric. With an assigned n of 2 each point must find 2 nearest neighbors including itself. A 1 in the matrix means that that point is a nearest neighbor for the point in which the row pertains to. In the example of the first point (the first row in the matrix), its nearest neighbor is itself and the second point. For point 2 (the second row in the matrix) its nearest neighbor is itself and point 3 not point 1. This happens because point 3 is closer to 2 then point 1 in terms of the Euclidean distance. # # The affinity matrix is made symmetric by averaging it by its transpose. As you can see from 'affinity_sym' some elements of the matrix now have a value of 0.5. For example, the first row (pertaining to point 1) has a value of 0.5 where it used to have a value of 1 in the second column. This tells us that point 1's nearest neighbor is point 2 but point 1 is not a nearest neighbor of point 2. # # Looking back at the matrix 'affinity', if we sum the matrix by its rows each row will have a value of 2 (the n value we assigned) because each point is required to find its 2 nearest neighbors. If we sum the matrix by its columns, we will get different values for each column. Column 1 will sum to 1, telling us that it is only a nearest neighbor to itself. Column 2, on the other hand, will sum to 3, telling us that it is the nearest neighbor to 2 other points besides itself. # *** # ## Distance Correlation Matrix # These cells are created to test if the sepctral embedding can be computed using a distance matrix rather then an affinity matrix. dis_corr_matrix = pairwise_distances(data_df[0:500], metric=dis_corr) # A distance correlation matrix is computed using the diget data (this matrix is symmetric) dis_corr_matrix hv.Image(dis_corr_matrix).opts(width=400, height=300, colorbar=True, cmap='jet') # Plot distance matrix D = 0.25 # Distance condition cond_matrix = np.where(dis_corr_matrix<D, 1, 0) # All distances less then 'D' are assighned a 1 and all distances greater then 'D' are assighned a 0 hv.Image(cond_matrix).opts(width=400, height=300, colorbar=True, cmap='Greys') # Plot conditional matrix 'cond_matrix' embedding = SpectralEmbedding(n_components=3, affinity='precomputed', n_jobs=n_cpus, eigen_solver='arpack') # Compute 3D spectral embedding data_transformed = embedding.fit_transform(np.abs(cond_matrix)) # Compute data transform by using 'cond_matrix' created above plot_input = pd.DataFrame(data_transformed,columns=['x','y','z']) # Save transformed data at data frame plot_input['Number'] = num_df.astype(str) # Add column of diget lables plot = px.scatter_3d(plot_input, x='x', y='y', z='z', color='Number', width=500, height=400, opacity=0.7) # Create plot of embedding #plot # Display plot
#count up the open buckets and measure the debt supply at each time step debt_supply[time_step]=sum([c["debt"] for c in cdps if c["open"]==True]) #calculate the loss or gain per day as a function of the Dai supply on that day if liquidated_debt[time_step]!=0: loss_gain_perc[time_step] = loss_gain[time_step]/liquidated_debt[time_step] else: loss_gain_perc[time_step]=0 result_matrix += #record the total loss from the simulation result_array[simulation] = sum(loss_gain)/np.average(debt_supply) #record the total array of ETH prices from the simulation eth_price_record[simulation] = M data_results+=[{"sigma":sigma,"collateral_cutoff":collateral_cutoff,"gain-loss":np.average(result_array)}] print(sigma,collateral_cutoff,liquidation_penalty, np.average(result_array)) fig = px.scatter_3d(pd.DataFrame(data_results), x='sigma', y='collateral_cutoff', z='gain-loss', color='gain-loss',size_max=40,opacity=0.7) plot_url = py.plot(fig,filename="3d_gains.html") #display a graph of the ETH price over time data = [go.Scatter(x=x, y=M, mode='lines',line=dict(color="blue")) ] layout = go.Layout(xaxis=dict(title="Days"),yaxis=dict(title="ETH Price")) fig = Figure(data=data, layout=layout) plot_url = py.plot(fig,filename="ETH Price over time.html") #display a graph of the colalteralization ratios of each bucket over time collateral_df = pd.DataFrame(collateralizations)
def main(start_data, end_data): ###### CUSTOMIZE COLOR THEME ###### alt.themes.register("finastra", finastra_theme) alt.themes.enable("finastra") violet, fuchsia = ["#694ED6", "#C137A2"] ###### SET UP PAGE ###### icon_path = os.path.join(".", "raw", "esg_ai_logo.png") st.set_page_config(page_title="INVESTECH", page_icon=icon_path, layout='centered', initial_sidebar_state="collapsed") _, logo, _ = st.beta_columns(3) logo.image(icon_path, width=200) style = ("text-align:center; padding: 0px; font-family: arial black;, " "font-size: 400%") title = f"<h1 style='{style}'>INVESTECH</h1><br><br>" st.write(title, unsafe_allow_html=True) ###### LOAD DATA ###### with st.spinner(text="Fetching Data..."): data, companies = load_data(start_data, end_data) df_conn = data["conn"] df_data = data["data"] embeddings = data["embed"] ####### CREATE SIDEBAR CATEGORY FILTER###### st.sidebar.title("Filter Options") date_place = st.sidebar.empty() esg_categories = st.sidebar.multiselect("Select News Categories", ["E", "S", "G"], ["E", "S", "G"]) pub = st.sidebar.empty() num_neighbors = st.sidebar.slider("Number of Connections", 1, 20, value=8) ###### RUN COMPUTATIONS WHEN A COMPANY IS SELECTED ###### company = st.selectbox("Select a Company to Analyze", companies) if company and company != "Select a Company": ###### FILTER ###### df_company = df_data[df_data.Organization == company] diff_col = f"{company.replace(' ', '_')}_diff" esg_keys = ["E_score", "S_score", "G_score"] esg_df = get_melted_frame(data, esg_keys, keepcol=diff_col) ind_esg_df = get_melted_frame(data, esg_keys, dropcol="industry_tone") tone_df = get_melted_frame(data, ["overall_score"], keepcol=diff_col) ind_tone_df = get_melted_frame(data, ["overall_score"], dropcol="industry_tone") ###### DATE WIDGET ###### start = df_company.DATE.min() end = df_company.DATE.max() selected_dates = date_place.date_input("Select a Date Range", value=[start, end], min_value=start, max_value=end, key=None) time.sleep( 0.8) #Allow user some time to select the two dates -- hacky :D start, end = selected_dates ###### FILTER DATA ###### df_company = filter_company_data(df_company, esg_categories, start, end) esg_df = filter_on_date(esg_df, start, end) ind_esg_df = filter_on_date(ind_esg_df, start, end) tone_df = filter_on_date(tone_df, start, end) ind_tone_df = filter_on_date(ind_tone_df, start, end) date_filtered = filter_on_date(df_data, start, end) ###### PUBLISHER SELECT BOX ###### publishers = df_company.SourceCommonName.sort_values().unique().tolist( ) publishers.insert(0, "all") publisher = pub.selectbox("Select Publisher", publishers) df_company = filter_publisher(df_company, publisher) ###### DISPLAY DATA ###### URL_Expander = st.beta_expander(f"View {company.title()} Data:", True) URL_Expander.write(f"### {len(df_company):,d} Matching Articles for " + company.title()) display_cols = [ "DATE", "SourceCommonName", "Tone", "Polarity", "NegativeTone", "PositiveTone" ] # "WordCount" URL_Expander.write(df_company[display_cols]) #### URL_Expander.write(f"#### Sample Articles") link_df = df_company[["DATE", "URL"]].head(3).copy() # link_df["URL"] = link_df["URL"].apply(lambda R: f"[{R}]({R})") link_df["ARTICLE"] = link_df.URL.apply(get_clickable_name) link_df = link_df[["DATE", "ARTICLE"]].to_markdown(index=False) URL_Expander.markdown(link_df) #### ###### CHART: METRIC OVER TIME ###### st.markdown("---") col1, col2 = st.beta_columns((1, 3)) metric_options = [ "Tone", "NegativeTone", "PositiveTone", "Polarity", "ActivityDensity", "WordCount", "Overall Score", "ESG Scores" ] line_metric = col1.radio("Choose Metric", options=metric_options) if line_metric == "ESG Scores": # Get ESG scores esg_df["WHO"] = company.title() ind_esg_df["WHO"] = "Industry Average" esg_plot_df = pd.concat([esg_df, ind_esg_df]).reset_index(drop=True) esg_plot_df.replace( { "E_score": "Environment", "S_score": "Social", "G_score": "Governance" }, inplace=True) metric_chart = alt.Chart( esg_plot_df, title="Trends Over Time").mark_line().encode( x=alt.X("yearmonthdate(DATE):O", title="DATE"), y=alt.Y("Score:Q"), color=alt.Color("ESG", sort=None, legend=alt.Legend(title=None, orient="top")), strokeDash=alt.StrokeDash("WHO", sort=None, legend=alt.Legend( title=None, symbolType="stroke", symbolFillColor="gray", symbolStrokeWidth=4, orient="top")), tooltip=[ "DATE", "ESG", alt.Tooltip("Score", format=".5f") ]) else: if line_metric == "Overall Score": line_metric = "Score" tone_df["WHO"] = company.title() ind_tone_df["WHO"] = "Industry Average" plot_df = pd.concat([tone_df, ind_tone_df]).reset_index(drop=True) else: df1 = df_company.groupby( "DATE")[line_metric].mean().reset_index() df2 = filter_on_date( df_data.groupby("DATE")[line_metric].mean().reset_index(), start, end) df1["WHO"] = company.title() df2["WHO"] = "Industry Average" plot_df = pd.concat([df1, df2]).reset_index(drop=True) metric_chart = alt.Chart( plot_df, title="Trends Over Time").mark_line().encode( x=alt.X("yearmonthdate(DATE):O", title="DATE"), y=alt.Y(f"{line_metric}:Q", scale=alt.Scale(type="linear")), color=alt.Color("WHO", legend=None), strokeDash=alt.StrokeDash( "WHO", sort=None, legend=alt.Legend( title=None, symbolType="stroke", symbolFillColor="gray", symbolStrokeWidth=4, orient="top", ), ), tooltip=["DATE", alt.Tooltip(line_metric, format=".3f")]) metric_chart = metric_chart.properties(height=340, width=200).interactive() col2.altair_chart(metric_chart, use_container_width=True) ###### CHART: ESG RADAR ###### col1, col2 = st.beta_columns((1, 2)) avg_esg = data["ESG"] avg_esg.rename(columns={"Unnamed: 0": "Type"}, inplace=True) avg_esg.replace( { "T": "Overall", "E": "Environment", "S": "Social", "G": "Governance" }, inplace=True) avg_esg["Industry Average"] = avg_esg.mean(axis=1) radar_df = avg_esg[["Type", company, "Industry Average"]].melt("Type", value_name="score", var_name="entity") radar = px.line_polar(radar_df, r="score", theta="Type", color="entity", line_close=True, hover_name="Type", hover_data={ "Type": True, "entity": True, "score": ":.2f" }, color_discrete_map={ "Industry Average": fuchsia, company: violet }) radar.update_layout( template=None, polar={ "radialaxis": { "showticklabels": False, "ticks": "" }, "angularaxis": { "showticklabels": False, "ticks": "" }, }, legend={ "title": None, "yanchor": "middle", "orientation": "h" }, title={ "text": "<b>ESG Scores</b>", "x": 0.5, "y": 0.8875, "xanchor": "center", "yanchor": "top", "font": { "family": "Futura", "size": 23 } }, margin={ "l": 5, "r": 5, "t": 0, "b": 0 }, ) radar.update_layout(showlegend=False) col1.plotly_chart(radar, use_container_width=True) ###### CHART: DOCUMENT TONE DISTRIBUTION ##### # add overall average dist_chart = alt.Chart( df_company, title="Document Tone " "Distribution").transform_density( density='Tone', as_=["Tone", "density"]).mark_area(opacity=0.5, color="purple").encode( x=alt.X('Tone:Q', scale=alt.Scale(domain=(-10, 10))), y='density:Q', tooltip=[ alt.Tooltip("Tone", format=".3f"), alt.Tooltip("density:Q", format=".4f") ]).properties(height=325, ).configure_title( dy=-20).interactive() col2.markdown("### <br>", unsafe_allow_html=True) col2.altair_chart(dist_chart, use_container_width=True) ###### CHART: SCATTER OF ARTICLES OVER TIME ##### # st.markdown("---") scatter = alt.Chart(df_company, title="Article Tone").mark_circle().encode( x="NegativeTone:Q", y="PositiveTone:Q", size="WordCount:Q", color=alt.Color("Polarity:Q", scale=alt.Scale()), tooltip=[ alt.Tooltip("Polarity", format=".3f"), alt.Tooltip("NegativeTone", format=".3f"), alt.Tooltip("PositiveTone", format=".3f"), alt.Tooltip("DATE"), alt.Tooltip("WordCount", format=",d"), alt.Tooltip("SourceCommonName", title="Site") ]).properties(height=450).interactive() st.altair_chart(scatter, use_container_width=True) ###### NUMBER OF NEIGHBORS TO FIND ##### neighbor_cols = [f"n{i}_rec" for i in range(num_neighbors)] company_df = df_conn[df_conn.company == company] neighbors = company_df[neighbor_cols].iloc[0] ###### CHART: 3D EMBEDDING WITH NEIGHBORS ###### st.markdown("---") color_f = lambda f: f"Company: {company.title()}" if f == company else ( "Connected Company" if f in neighbors.values else "Other Company") embeddings["colorCode"] = embeddings.company.apply(color_f) point_colors = { company: violet, "Connected Company": fuchsia, "Other Company": "lightgrey" } fig_3d = px.scatter_3d( embeddings, x="0", y="1", z="2", color='colorCode', color_discrete_map=point_colors, opacity=0.4, hover_name="company", hover_data={c: False for c in embeddings.columns}, ) fig_3d.update_layout( legend={ "orientation": "h", "yanchor": "bottom", "title": None }, title={ "text": "<b>Company Connections</b>", "x": 0.5, "y": 0.9, "xanchor": "center", "yanchor": "top", "font": { "family": "Futura", "size": 23 } }, scene={ "xaxis": { "visible": False }, "yaxis": { "visible": False }, "zaxis": { "visible": False } }, margin={ "l": 0, "r": 0, "t": 0, "b": 0 }, ) st.plotly_chart(fig_3d, use_container_width=True) ###### CHART: NEIGHBOR SIMILIARITY ###### st.markdown("---") neighbor_conf = pd.DataFrame({ "Neighbor": neighbors, "Confidence": company_df[[f"n{i}_conf" for i in range(num_neighbors)]].values[0] }) conf_plot = alt.Chart( neighbor_conf, title="Connected Companies").mark_bar().encode( x="Confidence:Q", y=alt.Y("Neighbor:N", sort="-x"), tooltip=["Neighbor", alt.Tooltip("Confidence", format=".3f")], color=alt.Color( "Confidence:Q", scale=alt.Scale(), legend=None)).properties(height=25 * num_neighbors + 100).configure_axis(grid=False) st.altair_chart(conf_plot, use_container_width=True)
x="col0", y="col1", labels={"col0": "dim 1", "col1": "dim 2"}, animation_frame="t", ) fig2d.show() # For Dimensionality Reduction to 3D reducer.setRds(query0=query0, query1=query1, dim=3) print(reducer.getRdsDf()) fig3d = px.scatter_3d( reducer.getRdsDf(), x="col0", y="col1", z="col2", labels={"col0": "dim 1", "col1": "dim 2", "col2": "dim 3"}, animation_frame="t", color="query0", ) fig3d.update_traces( marker=dict(size=2), ) fig3d.update_layout( title_text="post embedding", scene=dict( xaxis=dict( nticks=4, range=[-20, 20], # range=[min(embedder.em["col0"]), max(embedder.em["col0"])],
def _assemble_chart_object_from_filtered_df_and_chart_input_list( self, df: pd.DataFrame, chart_input_values_list: Tuple[Any], template: str = None, ) -> Any: """take a dataframe and a list of chart input values from the dash callback, produce a plotly figure 1. create a dict with all the original (default) values and updated values (from the chart inputs) 2. create and return the figure based on that data Args: df (pandas.DataFrame): dataframe we want to filter chart_input_values_list (Tuple[Any]): list of values we'll use to update the chart template (:obj: `str`, optional): layout template we want to use. Options include: ['default', 'turbo', 'turbo-dark'] Returns: plotly.graph_objs._figure.Figure (plotly.express.bar, line, etc) Raises: ValueError if chart_input_values_list doesn't have the same length as self.chart_input_list """ if len(chart_input_values_list) != len(self.chart_input_list): raise ValueError( '''chart_input_values_list ({}) and chart_input_list ({}) must have the same length''' .format(chart_input_values_list, self.chart_input_list)) # 1 figure_values_dict = dict(self._chart_input_string_default_value_dict) for index, chart_input_value in enumerate(chart_input_values_list): figure_values_dict[ self.chart_input_list[index]] = chart_input_value # 2 if figure_values_dict['output_type'] == 'scatter': return px.scatter( data_frame=df, x=figure_values_dict['x'], y=figure_values_dict['y'], color=figure_values_dict['color'], size=figure_values_dict['size'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], template=self._template_lookup_dict[template] ['chart_template'], ) if figure_values_dict['output_type'] == 'line': return px.line( data_frame=df, x=figure_values_dict['x'], y=figure_values_dict['y'], color=figure_values_dict['color'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], template=self._template_lookup_dict[template] ['chart_template'], ) if figure_values_dict['output_type'] == 'area': return px.area( data_frame=df, x=figure_values_dict['x'], y=figure_values_dict['y'], color=figure_values_dict['color'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], template=self._template_lookup_dict[template] ['chart_template'], ) if figure_values_dict['output_type'] == 'bar': return px.bar( data_frame=df, x=figure_values_dict['x'], y=figure_values_dict['y'], color=figure_values_dict['color'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], template=self._template_lookup_dict[template] ['chart_template'], ) if figure_values_dict['output_type'] == 'violin': return px.violin( data_frame=df, x=figure_values_dict['x'], y=figure_values_dict['y'], color=figure_values_dict['color'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], points='all', template=self._template_lookup_dict[template] ['chart_template'], ) if figure_values_dict['output_type'] == 'scatter_3d': return px.scatter_3d( data_frame=df, x=figure_values_dict['x'], y=figure_values_dict['y'], z=figure_values_dict['z'], color=figure_values_dict['color'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], template=self._template_lookup_dict[template] ['chart_template'], ) if figure_values_dict['output_type'] == 'scatter_geo': return px.scatter_geo( data_frame=df, locations=figure_values_dict['locations'], locationmode=figure_values_dict['locationmode'], projection=figure_values_dict['projection'], color=figure_values_dict['color'], size=figure_values_dict['size'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], template=self._template_lookup_dict[template] ['chart_template'], ) if figure_values_dict['output_type'] == 'choropleth': return px.choropleth( data_frame=df, locations=figure_values_dict['locations'], locationmode=figure_values_dict['locationmode'], projection=figure_values_dict['projection'], color=figure_values_dict['color'], hover_name=figure_values_dict['hover_name'], hover_data=figure_values_dict['hover_data'], template=self._template_lookup_dict[template] ['chart_template'], ) # who are you? who who, who who else: raise ValueError( """I don't know what to do with a "{}" output_type. Please add it to {}.""" .format(figure_values_dict['output_type'], __file__))
def random(): # Load data pcf = pipe.load_pcf(r'W:\Neurophysiology-Storage1\Wahl\Hendrik\PhD\Data\Batch3\M41\20200511') # Neurons as samples, position bins as features raw_data = pcf.bin_avg_activity pc_idx = [x[0] for x in pcf.place_cells] labels = np.zeros(len(raw_data)) labels[pc_idx] = 1 # Standardize (z-score) data data = raw_data-np.mean(raw_data, axis=0)/np.std(raw_data, axis=0) # perform PCA (input as shape (n_samples, n_features) score, evectors, evals = pca(data) # plot the eigenvalues plot_eigenvalues(evals, limit=False) # plot variance explained plot_variance_explained(np.cumsum(evals)/np.sum(evals), cutoff=0.95) # visualize weights of the n-th principal component n_comp = 1 plt.figure() for i in range(n_comp): plt.plot(weights[i], label=f'Comp {i+1}', linewidth=2) for zone in pcf.params['zone_borders']: plt.axvspan(zone[0], zone[1], color='red', alpha=0.1) plt.legend() perform_PCA(data, labels, 2, plot=True) # built-in PCA pca_model = PCA(n_components=80) # Initializes PCA out = pca_model.fit(data) # Performs PCA scores = pca_model.transform(data) weights = pca_model.components_ # Plot first three components df = pd.DataFrame(np.vstack((scores.T, labels)).T) df.rename(columns=str, inplace=True) df.rename(columns={'80': 'labels'}, inplace=True) pio.renderers.default = 'browser' fig = px.scatter_3d(df, x='0', y='1', z='2', color='labels') fig.show() def perform_PCA(data, labels, n_comp, plot=False): pca_model = PCA(n_components=80) # Initializes PCA pca_model.fit(data) # Performs PCA scores = pca_model.transform(data) nrows = 3 ncols = 3 if plot: fig, ax= plt.subplots(nrows, ncols) i = 0 for row in range(nrows): for col in range(ncols): ax[row, col].scatter(x=scores[:, i], y=scores[:, i+1], s=10, c=labels) ax[row, col].set_xlabel(f'Component {i+1}') ax[row, col].set_ylabel(f'Component {i+2}') i += 1 # Plot PCA component with overlaying histogram plot_pc_with_hist(-score, evectors, (0, 1), labels, pcf.params) # t-SNE fig, ax = plt.subplots(2, 3) perplexities = [5, 30, 50, 75, 100, 500] count = 0 for row in range(2): for col in range(3): pca_mod = PCA(n_components=50) pca_results = pca_mod.fit_transform(data) tsne_mod = TSNE(n_components=2, perplexity=perplexities[count], n_iter=5000) embed = tsne_mod.fit_transform(pca_results) ax[row, col].scatter(x=embed[:, 0], y=embed[:, 1], c=labels) ax[row, col].set_xlabel('Component 1') ax[row, col].set_ylabel('Component 2') ax[row, col].set_title(f'Perplexity {perplexities[count]}') count += 1 # 3D for perp in perplexities: tsne_mod = TSNE(n_components=3, perplexity=perp, n_iter=5000) embed = tsne_mod.fit_transform(data) df = pd.DataFrame(np.vstack((embed.T, labels)).T) df.rename(columns=str, inplace=True) df.rename(columns={'3': 'labels'}, inplace=True) pio.renderers.default = 'browser' fig = px.scatter_3d(df, x='0', y='1', z='2', color='labels') fig.show()
) # Building Scatter plot x = [] y = [] z = [] for value in wordProjections: x.append(value[0]) y.append(value[1]) z.append(value[2]) scatter_plot = px.scatter_3d( wordProjections, x=0, y=1, z=2, color = labels_word2vec, hover_name = labels_word2vec, labels = {'color': ''} ) scatter_plot.update_layout( plot_bgcolor="black", paper_bgcolor="black", font_color="#666699", height=800, ) # Building Histogram histogram = px.histogram( df, x = 'label',
ar = scipy.interpolate.griddata(points=(dfs.x, dfs.y), values=dfs.h_li, xi=(xi, yi)) # %% plt.imshow(ar, extent=(dfs.x.min(), dfs.x.max(), dfs.y.min(), dfs.y.max())) # %% # %% import plotly.express as px # %% px.scatter_3d(data_frame=dfs, x="longitude", y="latitude", z="h_li", color="laser") # %% # %% [markdown] # ### Play using XrViz # # Install the PyViz JupyterLab extension first using the [extension manager](https://jupyterlab.readthedocs.io/en/stable/user/extensions.html#using-the-extension-manager) or via the command below: # # ```bash # jupyter labextension install @pyviz/[email protected] --no-build # jupyter labextension list # check to see that extension is installed # jupyter lab build --debug # build extension ??? with debug messages printed # ```
plot( fig, filename="tsne_plot.html", auto_open=False, config={ "scrollZoom": True, "modeBarButtonsToRemove": ["lasso2d", "zoom2d"] }, ) #%% Plotly 3D tsne fig = px.scatter_3d( data_frame=df_subset.dropna(), hover_data=["subcats", "mid"], size="dy", x="tsne1", y="tsne2", z="tsne3", color_discrete_sequence=bright, color="cattext", ) fig.update_traces(marker=dict(size=2.3, opacity=1.0, line=dict(width=0))) # size=2.3 plot(fig, filename="tsne_plot.html", auto_open=False) #%% Save tsne vectors df_sl_cols_keep = [ "mid", "tsne1", "tsne2", "cat", "dx",