def plot_scores(self): """ plots modellers scores """ fig1 = px.scatter(self.models, x='dope', y='molpdf', hover_name='model', trendline='ols', facet_col='template', color='variant', category_orders={'template': self.template_order, 'variant': self.variant_order}, range_y=[8000, 16000], color_discrete_sequence=px.colors.qualitative.G10, height=500, width=1000, template='presentation',) fig1.update_traces(opacity=0.66) fig1.show() fig1.write_html('scatter_multiModel_score.html') fig2 = px.box(self.models, x='template', y='molpdf', hover_name='model', points='all', color='variant', category_orders={'template': self.template_order, 'variant': self.variant_order}, range_y=[8000, 16000], color_discrete_sequence=px.colors.qualitative.G10, height=500, width=1000, template='presentation') fig2.show() fig2.write_html('boxMol_multiModel_score.html') fig3 = px.box(self.models, x='template', y='dope', hover_name='model', points='all', color='variant', category_orders={'template': self.template_order, 'variant': self.variant_order}, range_y=[-237000, -227000], color_discrete_sequence=px.colors.qualitative.G10, height=500, width=1000, template='presentation') fig3.show() fig3.write_html('boxDop_multiModel_score.html')
def update_output(club1, club2): return (px.box( df[(df['Club'] == club1) | (df['Club'] == club2)], x="Club", y="Height", color="Club", points="all", color_discrete_sequence=['#2BB3B0', '#B277A7']).update_layout( title={ 'text': "Height Distribution", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }), px.box( df[(df['Club'] == club1) | (df['Club'] == club2)], x="Club", y="Weight", color="Club", points="all", color_discrete_sequence=['#2BB3B0', '#B277A7']).update_layout( title={ 'text': "Weight Distribution", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }))
def corr_notes(value13, value14): if value13 == "general": fig4 = px.box(x=final_note[final_note["MPG_REAL"] > value14]["AGE"], y=final_note[final_note["MPG_REAL"] > value14]["NOTE_m_sc"]) fig4.update_layout(title=dict(text = 'Evolution of Players notes', x=0.5), xaxis_title='Age', yaxis_title='Notes') return html.Div([ html.Br(), dcc.Graph(id='g4', figure=fig4) ]) else: fig5 = px.box(x=final_note[(final_note["POSITION"] == value13) & (final_note["MPG_REAL"] > value14)]["AGE"], y=final_note[(final_note["POSITION"] == value13) & (final_note["MPG_REAL"] > value14)]["NOTE_m_sc"]) fig5.update_layout(title=dict(text = 'Evolution of Players notes', x=0.5), xaxis_title='Age', yaxis_title='Notes') return html.Div([ html.Br(), dcc.Graph(id='g5', figure=fig5) ])
def load_plot4(df, mintime, maxtime, sectorno): df = df.sort_values(['N'], ascending=[1]) color_dict = dict(zip(df.NAME, df.COLORCODE)) optiontyres = st.radio("By Tyre Compound or Overall?", ("Overall", "Tyre Compound")) if optiontyres == "Overall": fig = px.box(df, x="NAME", y=sectorno, color="NAME", width=1200, height=600, color_discrete_map=color_dict) fig.update_xaxes(title_text='Name') if optiontyres == "Tyre Compound": fig = px.box(df, x="TYRECOMPOUND", y=sectorno, color="NAME", width=1200, height=600, color_discrete_map=color_dict) fig.update_xaxes(title_text='Tyre Compounds') fig.update_layout(plot_bgcolor='#eeeeee', legend_bgcolor='#eeeeee') fig.update_yaxes(range=[mintime, maxtime], title_text='Total sector time') st.plotly_chart(fig)
def boxplot_describe(df_plot, col_x="weekday", col_y="speed", n_variables=True): """ Plot a boxplot and returns a describe of the variable. Args: a dataframe, a numerical column (col_y), a factor column (col_x) and a n_variables argument. Returns: If n_variables is set as true, several boxplots are depicted according to the values of col_x and a dataframe with the describe of the col_y column is given grouped by the col_x values If False, only a boxplot is plotted with the numerical values """ plt.figure(figsize=(15, 8)) if n_variables: if len(df_plot[col_x].value_counts().index) > 4: # df = px.data.tips() fig = px.box(df_plot, x=col_x, y=col_y) else: fig = px.box(df_plot, x=col_x, y=col_y, color=col_x) fig.show() # ax = sns.boxplot(x=col_x, y=col_y,data=df_plot) return df_plot[[col_y, col_x]].groupby(col_x).describe().T else: fig = px.box(df_plot, y=col_y) fig.show()
def update_wykres_1(marka, model, lata, marka2, model2): dff = df.copy() if marka is not None: war1 = dff['Marka pojazdu'] == marka if model is not None: war_1m = dff['Model pojazdu'] == model war1 = war1 & war_1m if marka2 is not None: war2 = dff['Marka pojazdu'] == marka2 if model2 is not None: war_2m = dff['Model pojazdu'] == model2 war2 = war2 & war_2m if marka is not None and marka2 is not None: dff = dff[war1 | war2] elif marka is None and marka2 is not None: dff = dff[war2] elif marka is not None and marka2 is None: dff = dff[war1] rok_od = lata[0] rok_do = lata[1] dff = dff[dff['Rok produkcji'] > rok_od] dff = dff[dff['Rok produkcji'] < rok_do] if dff['Marka pojazdu'].nunique() == 2: fig = px.box(dff, x='Rok produkcji', y='Cena', height=380, color='Marka pojazdu') elif dff['Marka pojazdu'].nunique() == 1 and dff['Model pojazdu'].nunique( ) == 2: fig = px.box(dff, x='Rok produkcji', y='Cena', height=380, color='Model pojazdu') else: fig = px.box(dff, x='Rok produkcji', y='Cena', height=380) fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)', 'font_color': 'rgb(127,175,223)' }) return fig
def graph_boxplot_artists(value, col): df_cml_tmp = df_cml.copy() if col == 'Auction Number': df_cml_tmp['Auction Number'] = df_cml_tmp['Auction Number'].apply( lambda x: 'Action ' + str(x)) if value is None or len(value) == 0: #Sold and unsold artwork by artist df_sale_record_artist = df_cml_tmp.groupby(col)['Sold'].agg( number_of_auctions='count', number_of_sold_artworks='sum', number_of_unsold_artworks=lambda x: x.count() - x.sum(), sale_rate='mean').sort_values('number_of_auctions', ascending=False) else: #Sold and unsold artwork by artist df_sale_record_artist = df_cml_tmp[df_cml_tmp['1 Author'].isin( value)].groupby(col)['Sold'].agg( number_of_auctions='count', number_of_sold_artworks='sum', number_of_unsold_artworks=lambda x: x.count() - x.sum(), sale_rate='mean').sort_values('number_of_auctions', ascending=False) if col == 'Auction Number': df_sale_record_artist = df_sale_record_artist.sort_index( ascending=True) #Boxplot of sale prices by artwork #First, let's reoder the dataframe so that the same order of the previous graphs (from artist with most auctions to the one with the lowest) tmp = df_cml_tmp.copy() tmp['Temp'] = pd.CategoricalIndex(tmp[col], ordered=True, categories=df_sale_record_artist.index) df_cml_sorted = tmp.sort_values('Temp', ascending=True) #Create graph filt = (df_cml_sorted[col].isin( df_sale_record_artist[:50].index)) & (df_cml_sorted['Sold'] == 1) if col == 'Dominant Colour Name': fig_boxplot_sale_price = px.box( df_cml_sorted[filt], x='Dominant Colour Name', y="Final Price", title= 'Boxplot of final sale price for dominant colours associated with 0+ artworks', color='Dominant Colour Name', color_discrete_sequence=list( df_sale_record_artist[df_sale_record_artist > 0].index), points=False, boxmode="overlay") else: fig_boxplot_sale_price = px.box(df_cml_sorted[filt], x=col, y="Final Price", title='Final sale prices box plot') return fig_boxplot_sale_price
def appendBox(clicks, col1, col2, color): print(clicks, col1, col2, color) global df fig = px.box() if clicks > 0: if color == None: fig = px.box(data_frame=df, x=col1, y=col2) else: fig = px.box(data_frame=df, x=col1, y=col2, color=color) return fig return fig
def update_statistics_charts(project, subject, gender, race, age): if project == 'all': project = data.project.unique() else: project = [project] if not subject: return px.box(), px.box() elif 'all' in subject: subject = data.person.unique() else: subject = subject if gender == 'all': gender = data.gender.unique() else: gender = [gender] if not race: return px.box(), px.box() elif 'all' in race: race = data.race.unique() else: race = race mask = ((data.project.isin(project)) & (data.person.isin(subject)) & (data.gender.isin(gender)) & (data.race.isin(race)) & ((data.age <= age[1]) & (data.age >= age[0]))) filtered_data = data.loc[mask, :] mape_chart_figure = { "data": [ go.Box(x=filtered_data['mape'], y=filtered_data['activity'], orientation='h') ], "layout": go.Layout(title='MAPE vs Activity'), } da_chart_figure = { "data": [ go.Box(x=filtered_data['da'], y=filtered_data['activity'], orientation='h') ], "layout": go.Layout(title='Data Availability vs Activity'), } return mape_chart_figure, da_chart_figure
def _plotly_express(cat_col, color, churn): if churn == "Churn": fig = px.box(df, x=color, y=cat_col, color=df['Churn_label'].map({ 'Yes': 'Churn', 'No': 'NoChurn' }), height=450, color_discrete_map={ "Churn": "steelblue", "NoChurn": "tomato" }, category_orders={ str(color): df[color].value_counts().sort_index().index }) fig.update_layout( title=f"{cat_col} distribution <br>by {color} & Churn", xaxis_title=dict(), showlegend=True, yaxis_title=f"{cat_col} Distribution", title_x=.5, legend_title=f'Churn:', xaxis={'type': 'category'}, margin=dict(t=100, l=50)) else: fig = px.box(df, x=color, y=cat_col, height=450, category_orders={ str(color): df[color].value_counts().sort_index().index }, color_discrete_sequence=['mediumseagreen']) fig.update_layout(title=f"Distribution of {cat_col} <br>by {color}", xaxis_title=dict(), showlegend=True, yaxis_title=f"{cat_col} Distribution", title_x=.5, legend_title=f'Churn:', xaxis={'type': 'category'}, margin=dict(t=100, l=50)) fig.update_xaxes(title='') return fig
def update_plots(dropdown_x, dropdown_y, button): if (button == 'si'): Scatter_fig = px.scatter(data_frame=data, x=dropdown_x, y=dropdown_y, color='species') Scatter_fig.update_layout(title='Scatterplot' + variables_dict[dropdown_x] + 'vs.' + variables_dict[dropdown_y], xaxis_title=variables_dict[dropdown_x], yaxis_title=variables_dict[dropdown_y], paper_bgcolor="#F8F9F9") else: Scatter_fig = px.scatter( data_frame=data, x=dropdown_x, y=dropdown_y, ) Scatter_fig.update_layout(title='Scatterplot: ' + variables_dict[dropdown_x] + ' vs. ' + variables_dict[dropdown_y], xaxis_title=variables_dict[dropdown_x], yaxis_title=variables_dict[dropdown_y], paper_bgcolor="#F8F9F9") Box_fig_x = px.box(data_frame=data, x='species', y=dropdown_x, color='species') Box_fig_x.update_layout(showlegend=False, xaxis_title='Species', title='Boxplot: ' + variables_dict[dropdown_x], yaxis_title='cm', paper_bgcolor="#F8F9F9") Box_fig_y = px.box(data_frame=data, x='species', y=dropdown_y, color='species') Box_fig_y.update_layout(showlegend=False, xaxis_title='Species', title='Boxplot: ' + variables_dict[dropdown_y], yaxis_title='cm', paper_bgcolor="#F8F9F9") return [Scatter_fig, Box_fig_x, Box_fig_y]
def _plotly_express(cat_col, color, churn): # tmp = df_train.groupby(color)[cat_col].sum().reset_index() # tmp = tmp.sort_values(color) if churn == "Churn": fig = px.box(df_train, x=color, y=cat_col, # category_orders={color:df_train[color].value_counts}, # legend=False, color=df_train['Churn_label'].map({'Yes': 'Churn', 'No': 'NoChurn'}), height=450, color_discrete_map={"Churn": "seagreen", "NoChurn": "indianred"}, category_orders={ str(color): df_train[color].value_counts().sort_index().index} # opacity=.6,# height=400 ) fig.update_layout( title=f"{cat_col} dist by <br>{color} & Churn", xaxis_title=dict(), showlegend=True, yaxis_title=f"{cat_col} Distribution", title_x=.5, legend_title=f'Churn:', xaxis={'type': 'category'}, # legend_orientation='h', # legend=dict(y=-.06), margin=dict(t=100, l=50) ) else: fig = px.box(df_train, x=color, y=cat_col, height=450, # legend=False, category_orders={ str(color): df_train[color].value_counts().sort_index().index}, color_discrete_sequence=['seagreen'] # opacity=.6,# height=400 ) fig.update_layout( title=f"Distribution of {cat_col} <br>by {color}", xaxis_title=dict(), showlegend=False, yaxis_title=f"{cat_col} Distribution", # width=560000, title_x=.5, legend_title=f'Churn:', xaxis={'type': 'category'}, # legend_orientation='h', # legend=dict(y=-.06), margin=dict(t=100, l=50) ) fig.update_xaxes(title='') return fig
def boxplot_duration(df_act, y_scale='norm'): """ plot a boxplot of activity durations (mean) max min """ assert y_scale in ['norm', 'log'] df = activities_duration_dist(df_act) # add column for display of datapoints later df[START_TIME] = df_act[START_TIME].dt.strftime('%c') if y_scale == 'log': df['log minutes'] = np.log(df['minutes']) labels = {'minutes': 'log minutes'} else: labels = {'minutes': 'minutes'} fig = px.box( df, x="activity", y=labels['minutes'], notched=True, # used notched shape labels=labels, points='all', # display points next to box plot title="Activity durations", hover_data=[START_TIME, 'minutes'] # add day column to hover data ) return fig
def update_figure(Val): uni = px.bar() bi = px.bar() if Val == "Death Event": uni = px.pie(data_frame=df_HF.groupby([Val]).count().reset_index(), names=Val, values="Count", title=f"Pie chart of {Val}") elif type(df_HF[Val][0]) == type("str"): uni, bi = px.pie(data_frame=df_HF.groupby([Val]).count().reset_index(), names=Val, values="Count", title=f"Pie chart of {Val}"), px.bar( df_HF.groupby([Val, "Death Event" ]).count().reset_index(), x=Val, y="Count", color="Death Event", title=f"Bar chart of {Val} vs deth") else: uni, bi = px.histogram(df_HF, x=Val, title=f"Histogram of {Val}"), px.box( df_HF, x="Death Event", y=Val, title=f"Box plot of {Val}") uni.update_layout(font_color="rgb(255, 255, 255)", paper_bgcolor='rgb(43, 63, 82)', plot_bgcolor='rgb(43, 63, 82)') bi.update_layout(font_color="rgb(255, 255, 255)", paper_bgcolor='rgb(43, 63, 82)', plot_bgcolor='rgb(43, 63, 82)') return uni, bi
def box_categorical(df, y, title='Box', out_path=None, max_col=2, layout_kwargs={}, to_image=False): columns = df.select_dtypes(include='object') columns = [x for x in columns if x != y] data_groups = [] for column in columns: median_df = df.groupby(column).agg( BOX_CATEGORICAL_median=(y, 'median')).reset_index().sort_values( by='BOX_CATEGORICAL_median') tmp_df = df[[column, y]].copy() tmp_df = tmp_df.merge(median_df, on=column, how='left') tmp_df = tmp_df.sort_values(by='BOX_CATEGORICAL_median') fig = px.box(tmp_df, x=column, y=y) data_groups.append(fig['data']) datagroups_subplots(data_groups, max_col=max_col, title=title, out_path=out_path, xaxis_titles=columns, yaxis_titles=[ y if i % max_col == 0 else None for i, _ in enumerate(columns) ], layout_kwargs=layout_kwargs, to_image=to_image)
def plot_score_boxplot(dfe: DataFrame, per: config.Per) -> Figure: """ Plot score bloxplot. Parameters ---------- df DataFrame. per Per what? """ precision = dfe.copy() precision["value"] = precision[config.label.precision] del precision[config.label.precision] precision["score"] = "precision" recall = dfe.copy() recall["value"] = recall[config.label.recall] del recall[config.label.recall] recall["score"] = "recall" f1score = dfe.copy() f1score["value"] = f1score[config.label.f1score] del f1score[config.label.f1score] f1score["score"] = config.label.f1score if per == config.Per.organism: fields = ["organism", "domain"] hover_data = [ "organism", "domain", config.label.auc, config.label.hmmer_hits, ] else: hover_data = [ per.name, "clan", config.label.auc, config.label.hmmer_hits, ] hover_data = list(set(hover_data)) fields = [per.name, "clan", config.label.auc, config.label.hmmer_hits] fields = list(set(fields)) dfe = pd.concat([precision, recall, f1score]) title = f"Score boxplot, {per.name}-wise" fig = px.box( dfe, x="-log10(e-value)", color="score", y="value", title=title, hover_name=per.name, hover_data=hover_data, ) fig.update_yaxes(range=YRANGE) return fig
def _plot_sparse(self, data_long_format) -> ReportOutput: columns_to_filter = [self.x, "value"] for optional_column in [self.color, self.facet_row, self.facet_column]: if optional_column is not None: columns_to_filter.append(optional_column) data_long_format_filtered = data_long_format.loc[data_long_format.value != 0, columns_to_filter] columns_to_filter.remove("value") total_counts = data_long_format_filtered.groupby(columns_to_filter, as_index=False).agg( {"value": 'sum'}) data_long_format_filtered = data_long_format_filtered.merge(total_counts, on=self.x, how="left", suffixes=('', '_sum')) \ .fillna(0) \ .sort_values(by=self.x) \ .reset_index(drop=True) figure = px.box(data_long_format_filtered, x=self.x, y="value", color=self.color, facet_row=self.facet_row, facet_col=self.facet_column, labels={ "valuemean": self.y_title, self.x: self.x_title, }, template='plotly_white', color_discrete_sequence=px.colors.diverging.Tealrose) file_path = self.result_path / f"{self.result_name}.html" figure.write_html(str(file_path)) return ReportOutput(path=file_path, name="feature boxplots")
def choosing_the_algorithm(df, x, y, color, max_y): fig = px.box(df, x=x, y=y, color=color) fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default fig.update_layout(yaxis=dict(range=[0, max_y])) fig.update_layout(showlegend=False) fig.update_layout(margin=dict(l=0, r=0, t=0, b=0), ) return fig
def plot10(df): df = df.sort_values("age") sw = df['age'].sort_values() sw_01 = (sw - sw.min()) / (sw.max() - sw.min()) sw_colors = {n: mpl.colors.rgb2hex(c) for n, c in zip(sw, matplotlib.cm.viridis(sw_01))} fig = px.box(df, x="age", y="pace", color="age", category_orders={'sepal_width': sw.to_list()[::-1]}, color_discrete_map=sw_colors, labels={ 'pace': 'Ritmo', 'age': 'Idade' } ) fig.update_yaxes(rangemode="tozero") fig.update_layout( height=750, title="<b>Influência da idade no ritmo do atleta<b>", title_font_size = 20 ) return fig
def plot_compare_stocks(stocks=None, strategy=bollinger_bands, date_from=None, date_to=None, period=365): if stocks is None: stocks = [] list_of_comparisons = [] for stock in stocks: logger.info(f"adding {stock.fullname} to comparison") data = stock.data strategy_data = strategy(data) if date_from: strategy_data = strategy_data[date_from:] data = data[date_from:] if date_to: strategy_data = strategy_data[:date_to] data = data[:date_to] dict_hold = return_on_hold(data, period=period) dict_str = return_on_strategy(strategy_data, period=period) df_hold = pd.DataFrame(dict_hold.items(), columns=["Date", "Hold"]).set_index("Date") df_STR = pd.DataFrame(dict_str.items(), columns=["Date", "Strategy"]).set_index("Date") df_merged = df_hold.merge(df_STR, left_index=True, right_index=True, how="left") df_fin = pd.melt( df_merged.reset_index(), id_vars=["Date"], value_vars=["Hold", "Strategy"], value_name="ROI", var_name="Type", ).set_index("Date") df_fin["Stock"] = f"{stock.fullname}_{stock.interval}" list_of_comparisons.append(df_fin) df_compare = pd.concat(list_of_comparisons) fig = px.box(df_compare, x="Stock", y="ROI", color="Type") x_axis = df_compare["Stock"].unique() fig.add_trace( go.Scatter(x=x_axis, y=[1] * len(x_axis), mode="lines+markers", name="profit line")) time_of_completion = datetime.datetime.now().strftime("%Y_%m_%d__%H_%M") plot_path = str( Path(__file__).parent.parent / "plots" / f"plot_comparison_{time_of_completion}.html") plot(fig, filename=plot_path, auto_open=True) sleep(1) return plot_path
def plot_boxplot(data, col, parameter, title='Boxplot'): """ This function plots the boxplot and prints the necessary details of the given input Inputs: data = The dataframe on which you would like to plot the boxplot col = The column of the dataframe on which you would like to plot the boxplot on parameter = What you would like to be printed while printing the necessary details title = The title which you would like to give to your boxplot Returns: Returns a plotly figure """ fig = ff.box(data, y=col, title=title) print( f'From this boxplot we can see that the median {parameter} of this stock {col} is {np.median(data[col])}' ) if data[col].skew() > 0: print( f'From this boxplot we can see that there are more positive {parameter} in this stock' ) print( f'This can be seen in the boxplot as there are more {parameter} above the median i.e there is more data above the median than below the median' ) elif data[col].skew() < 0: print( f'From this boxplot we can see that there are more negative {parameter} in this stock' ) print( f'This can be seen in the boxplot as there are more {parameter} below the median i.e there is more data below the median than above the median' ) else: print( f'The {parameter} are not skewed and the positive returns and the negative returns are equally likely' ) print('Please watch out for the outliers') return fig
def visualize_elapsed_time_per_ts(df: pd.DataFrame, relative=False) -> None: indexes = ['simulation', 'run', 'timestep', 'substep'] z_df = df.set_index(indexes) first_time = z_df.query('timestep == 1 & substep == 1').reset_index( [-1, -2]).run_time s = (z_df.run_time - first_time) s.name = 'time_since_start' z_df = z_df.join(s) s = z_df.groupby(indexes[:-1]).time_since_start.max() fig_df = s.reset_index() if relative is True: s = fig_df.groupby(indexes[:-2]).time_since_start.diff() s.name = 'psub_duration' fig_df = fig_df.join(s) y_col = 'psub_duration' else: y_col = 'time_since_start' fig = px.box(fig_df, x='timestep', y=y_col) return fig
def _plot(self, plotting_data, output_name): if plotting_data.empty: logging.warning( f"Coefficients: empty data subset specified, skipping {output_name} plot..." ) else: filename = self.result_path / f"{output_name}.html" import plotly.express as px figure = px.box( plotting_data, x="max_seed_overlap", y="coefficients", labels={ "max_seed_overlap": self._x_axis_title, "coefficients": self._y_axis_title }, template='plotly_white', color_discrete_sequence=px.colors.diverging.Tealrose) # figure.update_layout(title={"text":self.title, "x":0.5, "font": {"size":14}}) figure.write_html(str(filename)) return ReportOutput( filename, f"Overlap between implanted motif seeds and features versus {self._y_axis_title.lower()}" )
def outliers(request): global ppd print("outliers function") if request.method == 'POST': print("POST data") print(request.POST['outlier']) ppd.remove_feature_outlier_data(request.POST['outlier']) numeric_data = ppd.get_numeric_data() numeric_features_name = ppd.get_numeric_features_name() feature_box_plot = {} # Calculate Quartile # ppd.cal_quartile() for i in numeric_features_name: fig = px.box(numeric_data.loc[numeric_data[i].notnull(), i], y=i, points='all', width=600) feature_box_plot[i] = { 'box_plot': pio.to_html(fig=fig, full_html=False, include_plotlyjs=False), 'num_outlier': ppd.get_feature_num_outlier(i), 'have_missing': ppd.check_feature_missing(i) } context = {'feature_box_plot': feature_box_plot} return render(request, 'data_cleaning_app/outliers.html', context=context)
def box_plot(data, options): xaxis_title = options.pop("xaxis_title") or "Time" yaxis_title = options.pop("yaxis_title") legend_title = options.pop("legend_title") sample = options.pop("sample") fig_options = ChainMap( options, GRAPHS_DEFAULT_OPTIONS["timeseries"]["box"].get_defaults( exclude_non_plotly_options=True)) fig_options["x"] = "time" fig_options["y"] = "value" ts_resampled = data.resample(sample).sum() ts_resampled.index.name = "time" ts_unstacked = ts_resampled.unstack() ts_unstacked.name = "value" ts_flattened = ts_unstacked.reset_index() try: fig = px.box(ts_flattened, points="outliers", **fig_options) except ValueError as ve: flash(f"Timeseries plot error: {ve}", category="error") raise PlottingError(f"Timeseries plot error: {ve}") fig.update_layout(xaxis_title=xaxis_title, yaxis_title=yaxis_title or add_unit_to_label(fig_options["y"], ts_flattened), legend_title=legend_title, template=GRAPHS_DEFAULT_TEMPLATE, **GRAPHS_DEFAULT_LAYOUT) fig.update_xaxes(GRAPHS_DEFAULT_XAXES_LAYOUT) fig.update_yaxes(GRAPHS_DEFAULT_YAXES_LAYOUT) return fig
def _generate_box(self): df = self.pp.get_numeric_df(self.settings['data']) #df.rename(columns=lambda x: x[:11], inplace=True) fig = px.box(df) return html.Div([ html.Div(html.H1(children='Ящик с усами'), style={'text-align': 'center'}), html.Div([ html.Div(dcc.Graph(id='box', figure=fig), style={ 'width': '78%', 'display': 'inline-block', 'border-color': 'rgb(220, 220, 220)', 'border-style': 'solid', 'padding': '5px' }), html.Div(dcc.Markdown(children=markdown_text_box), style={ 'width': '18%', 'float': 'right', 'display': 'inline-block' }) ]) ], style={'margin': '100px'})
def _generate_box(self): df = self.pp.get_numeric_df(self.settings['data']) fig = px.box(df) fig.update_xaxes(title='Переменные') fig.update_yaxes(title='Значения квантилей') return html.Div([ html.Div(html.H1(children='Ящик с усами'), style={'text-align': 'center'}), html.Div([ html.Div(dcc.Graph(id='box', figure=fig), style={ 'width': '78%', 'display': 'inline-block', 'border-color': 'rgb(220, 220, 220)', 'border-style': 'solid', 'padding': '5px' }), html.Div(dcc.Markdown(children=markdown_text_box), style={ 'width': '18%', 'float': 'right', 'display': 'inline-block' }) ]) ], style={'margin': '100px'})
def analyze_boxplot(df, col): ''' Analyzes on the basis of boxplot Inputs: df - the dataframe you wish to analyze col - the column of the yahoo finance dataframe you wish to analyze The analysis is done on the basis of number of observations above and below the median Also analyzes on the basis of skewness ''' stock_list = df.columns for stock in stock_list: fig = px.box(df, y=stock, title=f'Boxplot for {col} of {stock}') st.plotly_chart(fig) st.write( f'From this boxplot we can see that the median {col} of {stock} is {np.nanmedian(df[stock])}' ) if df[stock].skew() > 0: st.write( f'From this boxplot we can see that there are more positive {col} in this stock' ) st.write( f'This can be seen in the boxplot as there are more {col} above the median i.e there is more data above the median than below the median' ) elif df[stock].skew() < 0: st.write( f'From this boxplot we can see that there are more negative {col} in this stock' ) st.write( f'This can be seen in the boxplot as there are more {col} below the median i.e there is more data below the median than above the median' ) else: st.write('This stock is not skewed at all') st.write( 'This is shown by equal number of observations both above and below the median' )
def get_feature_stats(dataframe): list_of_charts = [] columns = dataframe.keys() try: for col in columns: fig = None column = dataframe[col] if np.issubdtype(column.dtype, np.object) and dataframe.groupby( column).count().shape[0] <= 5: fig = px.histogram(dataframe, x=column, color=column) elif np.issubdtype(column.dtype, np.number): if dataframe.groupby(column).count().shape[0] <= 2: fig = px.histogram(dataframe, x=column, color=column) elif column.min() == 0 and\ column.quantile(0) == 0 and\ column.quantile(0.5) == 0 and dataframe.groupby(column).count().shape[0] <= 5: fig = px.histogram(dataframe, x=column, color=column) else: fig = px.box(dataframe, y=column) else: pass graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) list_of_charts.append([col, graphJSON]) except: pass list_of_charts = pd.DataFrame(list_of_charts, columns=["feature", "chart"]) # print(list_of_charts) return list_of_charts
def display_boxes(clust, vari): clusts = ["Food", "Health", "Food & Health"] box_cluster = pd.read_csv(path + "box_cluster.csv").rename( columns={ "Food": "Food clusters", "Health": "Health clusters", "Food & Health": "Food & Health clusters" }) box_cluster["Food clusters"] = box_cluster["Food clusters"] + 1 box_cluster["Health clusters"] = box_cluster["Health clusters"] + 1 box_cluster[ "Food & Health clusters"] = box_cluster["Food & Health clusters"] + 1 box_cluster = box_cluster.sort_values(clust) fig = px.box(box_cluster, x=clust, y=vari, color=clust, color_discrete_sequence=[ '#fdca26', '#ed7953', '#bd3786', '#7201a8', '#0d0887' ]) fig.update_layout(plot_bgcolor='white') fig.update_yaxes(showline=True, linewidth=2, linecolor='black') fig.update_xaxes(showline=True, linewidth=2, linecolor='black') fig.update_layout(legend=dict(yanchor='bottom', y=0)) return fig