def sdm_curve(BFWDF, score): BFWDF['score'] = BFWDF[score] fig = go.Figure() fig.add_trace( go.Violin(x=BFWDF['subgroup'][BFWDF['Tag'] == 'Imposter'], y=BFWDF['score'][BFWDF['Tag'] == 'Imposter'], legendgroup='Imposter', scalegroup='Imposter', name='Imposter', side='negative', line_color='blue')) fig.add_trace( go.Violin(x=BFWDF['subgroup'][BFWDF['Tag'] == 'Genuine'], y=BFWDF['score'][BFWDF['Tag'] == 'Genuine'], legendgroup='Genuine', scalegroup='Genuine', name='Genuine', side='positive', line_color='red')) fig.update_traces(meanline_visible=True) fig.update_layout(violinmode='overlay', title="SDM Curve by Subgroup", xaxis_title="subgroup", yaxis_title="score") fig.update_xaxes(categoryorder="category ascending") fig.update_layout(legend_title='') return fig
def index_dist_graph(col_name): hose_x = hose[col_name] hnx_x = hnx[col_name] fig = go.Figure() fig.add_trace( go.Violin(x=hnx.index.year, y=hnx_x, legendgroup='HNX', scalegroup='HNX', name='HNX-INDEX', side='positive', line_color=HNX_COLOR, opacity=0.6)) fig.add_trace( go.Violin(x=hose.index.year, y=hose_x, legendgroup='HOSE', scalegroup='HOSE', name='VN-INDEX', side='positive', line_color=HOSE_COLOR, opacity=0.6)) fig.update_traces(meanline_visible=True, width=3) fig.update_layout(violingap=0, violinmode='overlay', width=1000, title="Distribution of Daily Percentage Change by Index") return fig
def generate_fig(threshold, df, col, cutoff=None): df_review_vals = df['review'].unique().tolist() fig = go.Figure() if not 'positive' in df_review_vals: fig.add_trace(go.Violin(y=df[col], x=[1] * len(df), box_visible=True)) else: fig.add_trace( go.Violin(y=df.loc[~(df['review'] == 'positive'), col], x=[1] * len(df), side='negative', line_color='blue', pointpos=-1.5)) fig.add_trace( go.Violin(y=df.loc[df['review'] == 'positive', col], x=[1] * len(df), side='positive', line_color='orange', pointpos=1.5)) fig.update_traces(points='all') fig.add_trace( go.Scatter(y=[threshold] * 3, mode='lines', line_color='mediumseagreen')) if cutoff is not None: fig.add_trace( go.Scatter(y=[cutoff] * 3, mode='lines', line_color='indianred')) fig.update_layout(margin=dict(l=0, r=0, t=0, b=0), showlegend=False, font=dict(size=14)) fig.update_xaxes(showticklabels=False) fig.update_yaxes(range=[-0.1, 1.1]) return fig
def update_graph(n_clicks, big_occupations, small_occupations, big_industries, small_industries, technics): if n_clicks == 0: raise PreventUpdate else: fig = go.Figure() filted_min_df = gen_filtered_min_df(big_occupations, small_occupations, big_industries, small_industries, technics) filted_max_df = gen_filtered_max_df(big_occupations, small_occupations, big_industries, small_industries, technics) # グラフ描画 fig.add_trace( go.Violin(x=filted_min_df["IPO"], y=filted_min_df['min_salary'], legendgroup='min', scalegroup='min', name='各最低給与の分布', line_color='blue')) fig.add_trace( go.Violin(x=filted_max_df["IPO"], y=filted_max_df['max_salary'], legendgroup='max', scalegroup='max', name='各最大給与の分布', line_color='orange')) fig.update_traces(meanline_visible=True, jitter=0.05) fig.update_layout(violingap=0, violinmode='group') return fig
def reg_violins(): for grouping, title in ( ('unpaired', 'ΔΔG landscape of Gβ2 (GNB2) bound to Gγ1 (GNG1) subunit only'), ('wAlpha', 'ΔΔG landscape of Gβ2 bound to γ1 and αi (GNAI) subunits'), ('wGRK2', 'ΔΔG landscape of Gβ2 bound to γ1 and β-adrenergic receptor kinase 1 (GRK2))' )): df = read_data(grouping) df = df.assign(ddG_limited=df.ddG.apply( lambda v: min(abs(v), 50) * abs(v) / (v + 0.0001))) fig = go.Figure() for groupname, group in (('gnomAD', gnomad), ('Pathogenic', pathogenic), ('Clinvar_homologues', clinvar)): fig.add_trace( go.Violin(y=df.loc[df.mutation.isin(group)].ddG_limited, name=groupname, box_visible=True, meanline_visible=True)) fig.add_trace( go.Violin(y=df.ddG_limited, name='sequence-space', box_visible=True, meanline_visible=True)) fig.update_layout(title_text=title, yaxis={'range': [-10, 51]}) fig.write_image('violin_' + grouping + '.png', scale=3) fig.show()
def plot_violin_of(self, feature: str, np_hist: tuple): _, bins = np_hist cur_feature_data = self._features_data[feature] fig = go.Figure() fig.add_trace( go.Violin( y=cur_feature_data, x0=0, name="global", box_visible=True, meanline_visible=True, )) prev_bin = 0. for bin_idx, cur_bin in enumerate(bins, 1): if bin_idx != bins.shape[0]: cur_data = cur_feature_data[(cur_feature_data > prev_bin) & (cur_feature_data <= cur_bin)] else: cur_data = cur_feature_data[(cur_feature_data > prev_bin)] fig.add_trace( go.Violin( y=cur_data, x0=bin_idx, name=str(cur_bin) if bin_idx != bins.shape[0] else 'max', box_visible=True, meanline_visible=True, # points="all", )) prev_bin = cur_bin fig.update_layout(_LAYOUT) fig.update_layout({ 'title': f"Feature {feature}", 'xaxis': { 'tickmode': 'array', 'tickvals': list(range(len(bins) + 1)), 'ticktext': ['global'] + [str(cur_bin) for cur_bin in bins] } }) # fig.show() # print(f"{STATUS_ARROW}Save violin plot of {feature} as pnh") # fig.write_image( # self._output_folder.joinpath( # f"feature_{feature}_violin.png" # ).as_posix() # ) print(f"{STATUS_ARROW}Save violin plot of {feature} as html") fig.write_html( self._output_folder.joinpath( f"feature_{feature}_violin.html").as_posix())
def violin_compare(results: pd.DataFrame, *, x: str, series: str, y: str = 'score', bandwidth: float = 0.025, x_title: Optional[str] = None, y_title: Optional[str] = None, x_range: Optional[Tuple[float, float]] = None, y_range: Optional[Tuple[float, float]] = None, title: Optional[str] = None) -> go.Figure: data = __preprocess_distribution(results, x, y, series) if len(data.columns) != 2: raise ValueError(f"The input data is not suitable for violin_compare: The series dimension '{series}'" f" should have 2 distinct values, but has {len(data.columns)}") c1 = data.columns[0] c2 = data.columns[1] traces = [ go.Violin(x=data.index, y=data[c1], box=vl.Box(visible=False), marker=vl.Marker(color=COLORS[0]), meanline=vl.Meanline(visible=True), bandwidth=bandwidth, name=c1.capitalize(), side='negative'), go.Violin(x=data.index, y=data[c2], box=vl.Box(visible=False), marker=vl.Marker(color=COLORS[1]), meanline=vl.Meanline(visible=True), bandwidth=bandwidth, name=c2.capitalize(), side='positive') ] return __create_figure(traces, title, series, x_title, x, x_range, y_title, y, y_range, violingap=0, violinmode='overlay')
def generate_violin_plot(df, column): fig = go.Figure() values = df[column].unique() for value in values: fig.add_trace( go.Violin(x=df[column][df[column] == value], y=df['Total'][df[column] == value], name=value, box_visible=True, meanline_visible=True)) fig.update_layout( #paper_bgcolor="#F8F9F9", title=column, height=220, width=500, autosize=True, margin={ "r": 0, "t": 50, "l": 0, "b": 0 }, showlegend=False, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)').update_yaxes(automargin=True) return dcc.Graph(figure=fig, id='violin_plot')
def log_val_cosine(self, distances, step): title_text = 'Cosine Distance Validation' fig = go.Figure() violin_plot = lambda ys, name: go.Violin( y=ys, box_visible=True, meanline_visible=True, spanmode='hard', name=name, ) for name, distance in distances.items(): fig.add_trace(violin_plot(distance, name)) # TODO: Visdom doesn't work with title in layout fig.update_layout(shapes=[ # Line Horizontal go.layout.Shape( type="line", x0=-0.5, y0=0.5, x1=2.5, y1=0.5, line=dict( width=2, dash="dot", ), ), ]) self.viz.plotlyplot(fig, win=title_text)
def violinplot(df, title='Violin-Plot', out_path=None, layout_width=None, layout_height=None, numeric_only=True): data = [] columns = [ k for k, v in df.dtypes.items() if 'float' in str(v) or 'int' in str(v) ] if numeric_only else df.columns for column in columns: data.append( go.Violin( y=df[column], box_visible=True, meanline_visible=True, points='outliers' # all, outliers, suspectedoutliers )) max_col = 2 subplot_titles = [f'{x.lower()}' for x in columns] plot_subplots(data, max_col, title, subplot_titles=subplot_titles, out_path=out_path, layout_width=layout_width, layout_height=layout_height)
def generate_distribution_violin(data_path: str, num_ranks: int, timesteps: int, bw_value: float): num_ranks = 2 vpic_reader = VPICReader(data_path, num_ranks=num_ranks) fig = go.Figure() for tsidx in range(0, timesteps): data = vpic_reader.read_global(tsidx) print(len(data)) # plotted_data = np.random.choice(data, 50000) plotted_data = np.random.choice(data, 500) head_cutoff = 0.5 head_cutoff_2 = 4 tail_cutoff = 10 head_data = len([i for i in plotted_data if i < head_cutoff]) head_data_2 = len([i for i in plotted_data if i < head_cutoff_2]) tail_data = len([i for i in plotted_data if i > tail_cutoff]) percent_head = head_data * 100.0 / len(plotted_data) percent_head_2 = head_data_2 * 100.0 / len(plotted_data) percent_tail = tail_data * 100.0 / len(plotted_data) print('TS {0}, < {1}: {2:.2f}'.format(tsidx, head_cutoff, percent_head)) print('TS {0}, < {1}: {2:.2f}'.format(tsidx, head_cutoff_2, percent_head_2)) print('TS {0}, > {1}: {2:.2f}'.format(tsidx, tail_cutoff, percent_tail)) plotted_data = list(map(lambda x: log_tailed(x, 10), plotted_data)) ts_name = 'Timestep {0}'.format(vpic_reader.get_ts(tsidx), ) violin_data = go.Violin(y=plotted_data, box_visible=False, meanline_visible=False, name=ts_name, side='positive', points=False, bandwidth=bw_value, scalemode='width', line=dict(width=1)) fig.add_trace(violin_data) fig.update_traces(width=1.8) fig.update_layout( title_text='Energy distribution from 4 timesteps of a VPIC simulation' ' (tail is logarithmic)', yaxis=dict(tickmode='array', tickvals=list(range(0, 18, 2)), ticktext=[ '{0:.0f}'.format(log_tailed_reverse(x, 10)) for x in range(0, 18, 2) ]), ) fig.show()
def plot_summary_stat(stat_conds, stat_name="stat"): fig = go.Figure() maximum = 0 for condition in stat_conds: fig.add_trace(go.Violin(y=stat_conds[condition], name=condition, points='all', box_visible=True, meanline_visible=True)) this_max = max(stat_conds[condition]) if this_max > maximum: maximum = this_max fig.update_layout( autosize=False, width=800, height=600, showlegend=False, plot_bgcolor='rgba(0,0,0,0)', xaxis=go.layout.XAxis( title_text="Condition", ), yaxis=go.layout.YAxis( title_text=stat_name, range=[0,maximum], ticks="outside", gridcolor='rgba(0,0,0,.1)' ) ) fig.show() if not os.path.exists(analysis_plot_dir): os.mkdir(analysis_plot_dir) fig.write_image(analysis_plot_dir+stat_name+".png")
def get_ridgeplot_fig(df, distance='total_distance', nvals='all'): clubs, xmin, xmax = utils.get_clubs(df) colors = n_colors('rgb(242, 139, 0)', 'rgb(206, 0, 0)', 12, colortype='rgb') fig = go.Figure() for club, color in zip(clubs, colors): name = utils.club_enum[club] array = df.groupby('club').get_group(club)[distance].values data = utils.get_values(array, nvals) fig.add_trace(go.Violin(x=data, name=name, line_color=color)) fig.update_traces( orientation='h', side='positive', width=3, points=False, ) fig.update_layout( margin=dict(t=30, r=10, b=10, l=10), xaxis_showgrid=True, xaxis_zeroline=False, showlegend=False, xaxis=dict( range=[xmin-10, xmax+20], tickmode='linear', tick0=0, dtick=10, ) ) return fig
def boxplot(self, points="outliers"): """Points can be all, outliers, suspectedoutliers""" df_sh = self.splits_filtered time_scale = 60 fig = go.Figure() pointpos = [-0.9, -1.1, -0.6, -0.3] for split in df_sh.split_id.unique(): split, split_name = self.get_split(split) ddf = df_sh.loc[df_sh.split_id == split, :] fig.add_trace( go.Violin(y=np.array(ddf['split_duration']) / time_scale, name=f'{split} - {split_name}', pointpos=0 #name='kale', # marker_color='#3D9970' )) fig.update_traces( meanline_visible=True, points=points, # show all points jitter=0.5, # add some jitter on points for better visibility scalemode='count') #scale violin plot area with total count fig.update_layout(title_text="Split times distribution", violingap=0, violingroupgap=0, violinmode='overlay') fig.show() return fig
def graficar_violin_plot(x, y, df, titulo, y_label): fig = go.Figure() niveles = df[x].unique() for nivel in niveles: xaxis = df[x][df[x] == nivel] yaxis = df[y][df[x] == nivel] fig.add_trace( go.Violin( x=xaxis, #keep cereal type at x axis y=yaxis, #keep carbohydrates type at y axis name=nivel, #name of each category box_visible= True, #if you want to show box plot within the violin meanline_visible=True, #if meanline of boxplot should be visible points="all" #plot all the points of distribution )) fig.update_layout( title={ 'text': titulo, 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }, yaxis_title=y_label, #set y axis ) fig.show()
def generate_violin_plot(df, col, filename): group_labels = pd.unique(df[col]) # group_labels = group_labels.astype(str) # print(group_labels) fig_1 = go.Figure() for g in group_labels: y_t = df["target"][df[col] == g] x_t = df[col][df[col] == g] fig_1.add_trace( go.Violin( y=y_t, x=x_t, name=g.astype(str), box_visible=True, meanline_visible=True, # points=False ) ) fig_1.update_layout( title="Continuous Response by Categorical Predictor", xaxis_title="Groupings", yaxis_title="Response", ) # fig_1.show() fig_1.write_html( file=filename, include_plotlyjs="cdn", )
def ridge_line(df_act, t_range='day', n=1000): """ https://plotly.com/python/violin/ for one day plot the activity distribution over the day - sample uniform from each interval """ df = activities_dist(df_act.copy(), t_range, n) colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', len(df.columns), colortype='rgb') data = df.values.T fig = go.Figure() i = 0 for data_line, color in zip(data, colors): fig.add_trace( go.Violin(x=data_line, line_color=color, name=df.columns[i])) i += 1 fig.update_traces(orientation='h', side='positive', width=3, points=False) fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False) return fig
def create_violinplot(score_type, lower_score, upper_score, title, figname): layout = go.Layout(yaxis=dict(range=[0, 6000])) fig = go.Figure(layout = layout) fig.add_trace(go.Violin(y = scored_df.shares[scored_df[score_type] < lower_score], name = 'Scores < ' + str(lower_score) + '<br>n = ' + str(scored_df.shares[scored_df[score_type] < lower_score].count()))) fig.add_trace(go.Violin(y = scored_df.shares[scored_df[score_type].between(lower_score, upper_score)], name = str(lower_score) + ' ≤ Scores ≤ ' + str(upper_score) + '<br>n = ' + str(scored_df.shares[scored_df[score_type].between(lower_score, upper_score)].count()))) fig.add_trace(go.Violin(y = scored_df.shares[scored_df[score_type] > upper_score], name = 'Scores >' + str(upper_score) + '<br>n = ' + str(scored_df.shares[scored_df[score_type] > upper_score].count()))) fig.update_layout( title = title, title_x = 0.5, yaxis_title = 'Number of Shares', showlegend = True ) fig.show()
def plot_hist(plot_df, cols=[ "poll_point", "total_contrib", "contrib_count", "contrib_per_person", "contrib_diff", "contrib_perc_change", "contrib_per_person_diff", "contrib_per_person_perc_change" ], height=1600, width=1400, title="Contributions by State", out_file=None): fig = make_subplots(rows=len(cols), cols=1, subplot_titles=cols) for i, col in enumerate(cols): fig.add_trace(go.Violin(x=plot_df['state'][plot_df['name'] == 'Biden'], y=plot_df[col][plot_df['name'] == 'Biden'], legendgroup='Biden', scalegroup='Biden', name='Biden', side='negative', line_color='#002868', width=1), row=i + 1, col=1) fig.add_trace(go.Violin(x=plot_df['state'][plot_df['name'] == 'Trump'], y=plot_df[col][plot_df['name'] == 'Trump'], legendgroup='Trump', scalegroup='Trump', name='Trump', side='positive', line_color="#BF0A30", width=1), row=i + 1, col=1) # fig = px.violin(plot_df, x = "state", y = "total_contrib", color = "name" # # , box = True # ) fig.update_traces(meanline_visible=True) fig.update_layout(height=height, width=width, title_text=title, violingap=0, violinmode='overlay') if out_file is not None: fig.write_html(out_file) return fig
def plot_occ_violin( merged_df: pd.DataFrame, height=1600, width=1400, # title = "National Contribution by Occupation", out_dir=None): """Quick violin plots per occupation and average donation. Args: merged_df (pd.DataFrame): contains output of get_merged_df """ # Look at retired people nationally. merged_df = merged_df[[ "state", "date", "transaction_amt", "name", "occupation" ]] top_10_jobs = list( merged_df.groupby("occupation")["occupation"].count().sort_values( ascending=False)[:10].index) merged_df = merged_df.query("occupation.isin(@top_10_jobs)") # fig = make_subplots(rows = len(top_10_jobs), cols = 1, subplot_titles = top_10_jobs) for i, job in enumerate(top_10_jobs): job_df = merged_df.query("occupation == @job & transaction_amt > 0") # Add both sides of the violin plot. fig = go.Figure() for cand in ["Biden", "Trump"]: fig.add_trace( go.Violin( x=job_df['state'][job_df['name'] == cand], y=job_df["transaction_amt"][job_df['name'] == cand], # legendgroup=cand, # scalegroup=cand, name=cand, side='negative' if cand == "Biden" else "positive", line_color='#002868' if cand == "Biden" else "#BF0A30", width=1)) fig.update_traces(meanline_visible=True) fig.data[0].update( span=[job_df.transaction_amt.min(), job_df.transaction_amt.max()], spanmode='manual') fig.data[1].update( span=[job_df.transaction_amt.min(), job_df.transaction_amt.max()], spanmode='manual') fig.update_layout(height=height, width=width, title_text=f"National Contribution by {job.lower()}", violingap=0, violinmode='overlay') if out_dir is not None: fig.write_html( os.path.join(out_dir, f"contrib_by_{job.lower()}.html")) return fig
def plot_density(fobject, other_objects, title): data = pd.DataFrame(columns=['y', 'y_hat', 'subgroup', 'model']) objects = [fobject] if other_objects is not None: for other_obj in other_objects: objects.append(other_obj) for obj in objects: for subgroup in np.unique(obj.protected): y, y_hat = obj.y[obj.protected == subgroup], obj.y_hat[obj.protected == subgroup] data_to_append = pd.DataFrame({'y': y, 'y_hat': y_hat, 'subgroup': np.repeat(subgroup, len(y)), 'model': np.repeat(obj.label, len(y))}) data = data.append(data_to_append) fig = go.Figure() counter = 0 for model in data.model.unique(): for i, sub in enumerate(data.subgroup.unique()): counter += 1 fig.add_trace( go.Violin( box_visible=True, x=data.loc[(data.subgroup == sub) & (data.model == model)].y_hat, y0=sub + model, name=sub, fillcolor=_theme.get_default_colors(len(data.subgroup.unique()), type='line')[i], opacity=0.9, line_color='black' ) ) violins_in_model = int(counter / len(data.model.unique())) starter_violins = np.arange(0, counter, violins_in_model) fig.update_xaxes(title='prediction') fig.update_yaxes(title='model', tickvals=list((starter_violins + (violins_in_model - 1) / 2)), ticktext=list(data.model.unique())) # hide doubling entries in legend legend_entries = set() for trace in fig['data']: legend_entries.add(trace['name']) for trace in fig['data']: if trace['name'] in legend_entries: legend_entries.remove(trace['name']) else: trace['showlegend'] = False if title is None: title = "Density plot" fig.update_layout(utils._fairness_theme(title)) return fig
def violin_plot(): """Get data for plot, return plot Adds plotly.graph_objects charts for violin plot at initial loading page Returns: (dcc.Graph) """ np.random.seed(1) points = (np.linspace(1, 2, 12)[:, None] * np.random.randn(12, 200) + (np.arange(12) + 2 * np.random.random(12))[:, None]) points2 = np.array([ np.concatenate((point, [points.min(), points.max()])) for point in points ]) colors = n_colors('rgb(32, 32, 41)', 'rgb(190, 155, 137)', 12, colortype='rgb') data = [] for data_line, color in zip(points2, colors): trace = go.Violin(x=data_line, line_color=color, side='positive', width=3, points=False, hoverinfo='skip') data.append(trace) layout = dict(title='u t i l s . p y', xaxis={ 'showgrid': False, 'zeroline': False, 'visible': False, 'fixedrange': True, }, yaxis={ 'showgrid': False, 'zeroline': False, 'visible': False, 'fixedrange': True, }, showlegend=False, margin=dict(l=0, r=0, t=80, b=0)) return dcc.Graph(figure=dict(data=data, layout=layout), id='violin-plot', config={ 'modeBarButtonsToRemove': [ 'zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', 'autoScale2d', 'resetScale2d', 'toggleSpikelines', 'hoverClosestCartesian', 'hoverCompareCartesian' ], }, style={ 'margin-top': '15vh', 'height': '60vh' })
def _get_violin_traces( col: str, filtered_data: pd.DataFrame, colour_axes_name: str, x_axes_name: str, x_encoding_dict: dict, colour_encoding_dict: dict, ): traces = [] if (not colour_axes_name) and (not x_axes_name): traces.append( go.Violin(y=filtered_data[col], name=col, line_color="black")) elif x_axes_name: if colour_axes_name: for label in colour_encoding_dict: traces.append( go.Violin( x=filtered_data[x_axes_name][( filtered_data[colour_axes_name] == label )].replace(x_encoding_dict), y=filtered_data[col][filtered_data[colour_axes_name] == label], name=colour_encoding_dict[label], legendgroup=colour_encoding_dict[label], scalegroup=colour_encoding_dict[label], )) else: traces.append( go.Violin( x=filtered_data[x_axes_name].replace(x_encoding_dict), y=filtered_data[col], )) else: for label in colour_encoding_dict: traces.append( go.Violin( x=filtered_data[colour_axes_name][ filtered_data[colour_axes_name] == label].replace( colour_encoding_dict), y=filtered_data[col][filtered_data[colour_axes_name] == label], name=colour_encoding_dict[label], )) return traces
def plot_ail_dstribution_by_year(data): demand_dist_by_year = go.Figure() for y in range(data.year.min(), data.year.max() + 1): demand_dist_by_year.add_trace( go.Violin(y=data[data["year"] == y]["AIL_DEMAND"], name=y, box_visible=True, meanline_visible=True)) demand_dist_by_year.update_layout(title_text="Distribution of AIL by Year") return demand_dist_by_year
def box_plot(df, cols): fig = go.Figure() for col in cols: fig.add_trace( go.Violin(y=df[col], name=col, box_visible=True, meanline_visible=True)) fig.update_layout(title='Box and Violin Plots') return fig
def plot_quantile_returns_violin(return_by_q, ylim_percentiles=None): """ Plots a violin box plot of period wise returns for factor quantiles. Parameters ---------- return_by_q : pd.DataFrame - MultiIndex DataFrame with date and quantile as rows MultiIndex, forward return windows as columns, returns as values. ylim_percentiles : tuple of integers Percentiles of observed data to use as y limits for plot. """ return_by_q = return_by_q.copy() if ylim_percentiles is not None: ymin = (np.nanpercentile(return_by_q.values, ylim_percentiles[0]) * DECIMAL_TO_BPS) ymax = (np.nanpercentile(return_by_q.values, ylim_percentiles[1]) * DECIMAL_TO_BPS) else: ymin = None ymax = None gf = make_subplots(rows=1, cols=1, x_title='分位数', y_title='收益率(基点)', shared_xaxes=True) unstacked_dr = return_by_q.multiply(DECIMAL_TO_BPS) unstacked_dr.columns = unstacked_dr.columns.set_names('forward_periods') unstacked_dr = unstacked_dr.stack() unstacked_dr.name = 'return' unstacked_dr = unstacked_dr.reset_index() groups = unstacked_dr['forward_periods'].unique() for name in groups: gf.add_trace( go.Violin(x=unstacked_dr['factor_quantile'][ unstacked_dr['forward_periods'] == name], y=unstacked_dr['return'][unstacked_dr['forward_periods'] == name], box_visible=False, legendgroup=name, scalegroup=name, name=name)) gf.update_traces(meanline_visible=True) gf.update_layout(violinmode='group') gf.update_layout(title_text="因子分位数期间收益率") gf.update_yaxes(range=[ymin, ymax]) gf.update_layout( legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)) gf.show()
def create_violin_plot(df, selected_columns): """ Violin plot for all columns in a data frame. """ fig = go.Figure() if len(selected_columns) == 0: return fig else: for col in selected_columns: fig.add_trace(go.Violin(y=df[col], name=col)) return fig
def q3Plotter(productCD, width=800, height=300): enable_plotly_in_cell() fig = make_subplots(rows=1, cols=2, subplot_titles=['Histogram', 'Violin Plot'], horizontal_spacing=0.1) fig.append_trace(go.Histogram(x = mergeData[mergeData.ProductCD==productCD].TransactionAmt), row=1, col=1) fig.append_trace(go.Violin(x = mergeData[mergeData.ProductCD==productCD].TransactionAmt, box_visible=True), row=1, col=2) #trace = go.Histogram(x = mergeData[mergeData.ProductCD==i].TransactionAmt) #data = [trace] #layout = go.Layout(title=go.layout.Title(text='Distribution of TransactionAmt for ProductCD: '+i), # xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text="$"))) #fig = go.Figure(data=data, layout=layout) fig.update_layout(height = height, width = width, showlegend=False, title_text="Distribution of TransactionAmt for ProductCD: "+productCD) iplot(fig)
def generate_violin_plot(df_complex, df_random, colors): violin_plot = go.Figure() violin_plot.add_trace( go.Violin(x=df_complex['language'], y=df_complex['cyclomatic_complexity'], legendgroup='Top', scalegroup='Top', name='Top', line_color='blue', points=False, spanmode="hard")) violin_plot.add_trace( go.Violin(x=df_random['language'], y=df_random['cyclomatic_complexity'], legendgroup='Random', scalegroup='Random', name='Random', line_color='orange', points=False, spanmode="hard")) violin_plot.update_traces(box_visible=True) violin_plot.update_yaxes(type='log') violin_plot.update_layout(violinmode='group', margin={ 'l': 40, 'b': 40, 't': 10, 'r': 10 }) violin_plot.update_xaxes(title='Languages') violin_plot.update_yaxes(title='Cyclomatic Complexity') violin_plot.update_layout(plot_bgcolor=colors['background'], paper_bgcolor=colors['background'], font_color=colors['text']) violin_plot.update_xaxes(showgrid=False) violin_plot.update_yaxes(showgrid=False) return violin_plot
def plot_cont_resp_cat_pred(self, feat, y, y_name, **kwargs): n = 200 # add noise to data group_labels = [f"group_{int(i)}" for i in range(len(feat.unique()))] ele_group = pd.cut(feat.to_list(), bins=len(group_labels), labels=group_labels) temp_df = pd.DataFrame({"a": feat.values, "b": ele_group}) temp_df["noise"] = temp_df["a"].values + np.random.normal( 0, 1, len(temp_df["a"])) temp_df = temp_df.groupby("b")["noise"].apply(list).reset_index( name="agg") temp_df = temp_df[temp_df["agg"].astype(bool)] group_list = temp_df["agg"].to_list() group_labels = [ f"group_{int(i)}" for i in range(1, len(temp_df["agg"]) + 1) ] del temp_df # Create distribution plot with custom bin_size stat_text = f'(t-value={kwargs["t_val"]}) (p-value={kwargs["p_val"]})' title_text = "Continuous Response by Categorical Predictor" fig_1 = ff.create_distplot(group_list, group_labels, bin_size=0.2) fig_1.update_layout( title=f"{title_text}: {stat_text}", xaxis_title=f"Response: {y_name}", yaxis_title="Distribution", ) plot_file_1 = f"plots/{feat.name}_distr_cont_resp_plot.html" fig_1.write_html(file=plot_file_1, include_plotlyjs="cdn") fig_2 = go.Figure() for curr_hist, curr_group in zip(group_list, group_labels): fig_2.add_trace( go.Violin( x=np.repeat(curr_group, n), y=curr_hist, name=curr_group, box_visible=True, meanline_visible=True, )) fig_2.update_layout( title=f"{title_text}: {stat_text}", xaxis_title="Groupings", yaxis_title=f"Response: {y_name}", ) plot_file_2 = f"plots/{feat.name}_violin_cont_resp_plot.html" fig_2.write_html(file=plot_file_2, include_plotlyjs="cdn") return None