def Violin_Plot(df): # global violin # global children fig = go.Figure() for col in list(df.select_dtypes(include=[np.number]).columns): fig.add_trace( go.Violin(x0=col, y=(df[col] - df[col].mean()) / df[col].std(), name=col, box_visible=True, meanline_visible=True)) fig1 = go.Figure() for col in list(df.select_dtypes(include=[np.number]).columns): fig1.add_trace( go.Violin(x0=col, y=(df[col] - df[col].min()) / (df[col].max() - df[col].min()), name=col, box_visible=True, meanline_visible=True)) violin = [ html.H5("Violin Plot:Outlier plot with mean normalization"), dcc.Graph(figure=fig), html.H5("Violin Plot:Outlier plot with min-max normalization"), dcc.Graph(figure=fig1), ] return violin
def gen_distribution_plot(df, feature): figure = go.Figure() figure.add_trace( go.Violin(x=df['Value'][df['User'] == 'Expert'], y=df['TaskType'][df['User'] == 'Expert'], customdata=df['SegmentNumOverall'][df['User'] == 'Expert'], legendgroup='Expert', scalegroup='Expert', name='Expert', side='positive', line_color='lightseagreen', marker=dict(line=dict(width=1, color='lightseagreen'), symbol='line-ns'), orientation='h', points='all', box=dict(visible=True), meanline=dict(visible=True))) figure.add_trace( go.Violin(x=df['Value'][df['User'] == 'Novice'], y=df['TaskType'][df['User'] == 'Novice'], customdata=df['SegmentNumOverall'][df['User'] == 'Novice'], legendgroup='Novice', scalegroup='Novice', name='Novice', side='negative', line_color='mediumpurple', marker=dict(line=dict(width=1, color='mediumpurple'), symbol='line-ns'), orientation='h', points='all', box=dict(visible=True), meanline=dict(visible=True))) figure.update_layout( title=force_labels['title'][force_labels['feature'] == feature].to_numpy()[0], margin=dict(t=50, b=10, l=50, r=50), hovermode='closest', xaxis=dict(zeroline=False, automargin=True, showticklabels=True, title=dict(text=force_labels['x_title'][ force_labels['feature'] == feature].to_numpy()[0], font=dict(color="#737a8d")), linecolor="#737a8d", showgrid=False), yaxis=dict(zeroline=False, automargin=True, showticklabels=True, title=dict(text='Task Type', font=dict(color="#737a8d")), linecolor="#737a8d", showgrid=False), font=dict(color="#737a8d"), plot_bgcolor="#171b26", paper_bgcolor="#171b26", ) return figure
def render_large_graph(years, countries, cat): data = [] year_range = list(range(years[0], years[1]+1)) for c in countries: df_country = df[df['Country'] == c] df_country_year = df_country[df_country['Year'].isin(year_range)] trace = go.Violin( y = df_country_year[cat], x = df_country_year['Country'], name = c, box_visible = True, meanline_visible=True, jitter = 0.3 ) data.append(trace) layout = go.Layout( title = dict(text = 'Comparison of {cat} from {year1} to {year2}'.format(cat = cat, year1 = years[0], year2=years[1])), yaxis = dict(title = cat, showgrid = True, gridcolor = 'lightgray'), xaxis = dict(showgrid = True, gridcolor = 'lightgray'), margin = dict (t = 80), paper_bgcolor = 'white', plot_bgcolor = 'white', showlegend = False ) figure = go.Figure (data = data, layout = layout) return figure
def sequencing_speed_over_time(dfs, path, title, settings, color="#4CB391"): time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot.html", title="Violin plot of sequencing speed over time") mask = dfs['duration'] != 0 fig = go.Figure() fig.add_trace( go.Violin(x=dfs.loc[mask, "timebin"], y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"], points=False, spanmode="hard", line_color='black', line_width=1.5, fillcolor=color, opacity=0.8)) fig.update_layout(xaxis_title='Interval (hours)', yaxis_title='Sequencing speed (nucleotides/second)', title=title or time_duration.title, title_x=0.5) fig.update_xaxes(tickangle=45) time_duration.fig = fig time_duration.html = time_duration.fig.to_html(full_html=False, include_plotlyjs='cdn') time_duration.save(settings) return time_duration
def quality_over_time(dfs, path, settings, title=None, color="#4CB391"): time_qual = Plot(path=path + "TimeQualityViolinPlot.html", title="Violin plot of quality over time") fig = go.Figure() fig.add_trace( go.Violin(y=dfs["quals"], x=dfs["timebin"], points=False, spanmode="hard", line_color='black', line_width=1.5, fillcolor=color, opacity=0.8)) fig.update_layout(xaxis_title='Interval (hours)', yaxis_title='Basecall quality', title=title or time_qual.title, title_x=0.5) fig.update_xaxes(tickangle=45) time_qual.fig = fig time_qual.html = time_qual.fig.to_html(full_html=False, include_plotlyjs='cdn') time_qual.save(settings) return time_qual
def plotly1(): yScale, xScale, gender = takeallanswersbyperson() pplinfo = takeperson() sizee = [float(x) for x in yScale] absyscale = [30 * abs(x) for x in sizee] gender = [int(x) for x in gender] trace = go.Scatter( mode='markers', x=yScale, y=xScale, hovertext=pplinfo, hoverinfo="text", marker_color=gender, marker=dict(size=absyscale, colorscale='Viridis'), showlegend=False) data = [trace] graphJSON = json.dumps(data, cls=plotly.utils.PlotlyJSONEncoder) data2 = [] for x in range(31): yScale, xScale = takeanswerswithquestionid(x) tracetemp = go.Violin(y=yScale, box_visible=True, line_color='black', points="all", meanline_visible=True, fillcolor='lightseagreen', opacity=0.6, x0='Total Bill', name='QUESTION' + str(x)) data2.append(tracetemp) graphJSON2 = json.dumps(data2, cls=plotly.utils.PlotlyJSONEncoder) return graphJSON, graphJSON2
def season_dist_plot(ssn: season, y_col: str): """Constructs specified season distribution plot. Args: ssn: season, season stats y_col: str, name of column to plot Returns: distribution figure """ ordered_players = ssn.stats.sort_values(by='Place', ascending=True)['Player'].to_list() fig = go.Figure() for p in ordered_players: ppoints = ssn.points.long.loc[ssn.points.long['Player'] == p, :] fig.add_trace(go.Violin( x=ppoints.loc[:, 'Player'], y=ppoints.loc[:, y_col], fillcolor=COLORS[ssn.schedule.players.index(p)], line_color='gray', name=p, legendgroup=p, box_visible=False, pointpos=0, meanline_visible=True, points='all', )) fig.update_xaxes(tickfont={'size': 18}) fig.update_yaxes(title_text=y_col, title_font={'size': 22}, tickfont={'size': 18}) fig.update_layout( autosize=False, height=400, margin=go.layout.Margin(l=50, r=50, b=25, t=25, pad=4), ) return fig
def create_trace(settings): # flip the variables according to the box orientation if settings.properties['box_orientation'] == 'h': y = settings.x x = settings.y else: x = settings.x y = settings.y return [ graph_objs.Violin( x=x or None, y=y, name=settings.data_defined_legend_title if settings.data_defined_legend_title != '' else settings.properties['name'], customdata=settings.properties['custom'], orientation=settings.properties['box_orientation'], points=settings.properties['box_outliers'], fillcolor=settings.properties['in_color'], line=dict(color=settings.properties['out_color'], width=settings.properties['marker_width']), opacity=settings.properties['opacity'], meanline=dict(visible=settings.properties['show_mean_line']), side=settings.properties['violin_side'], box_visible=settings.properties['violin_box']) ]
def plot_processing_QC(): g = dcc.Graph( id='processing_QC_plot', figure={ 'data': [ go.Violin(y=None, text=str("NULL"), opacity=0.7, name=str("NULL"), box_visible=True, meanline_visible=True, points="all") ], 'layout': go.Layout( xaxis={'title': 'QC factor'}, yaxis={'title': "Counts"}, margin=margin, legend={ 'x': 0, 'y': 1 }, hovermode='closest', #width=4 * scale, #height=2 * scale ) }) return g
def violin(): cols = [ 'IAM Loses', 'Inverter Loses', 'Limiting Loses', 'Low Light Loses', 'Ohmic Loses', 'Power Threshold Losses', 'Thermal Losses' ] data = [ go.Violin(y=df[e], name=e, box_visible=True, meanline_visible=True) for e in cols ] layout = go.Layout(xaxis=dict(tickfont=dict(size=11), color='#e6e6e6', gridwidth=0.5, gridcolor="#333", zerolinecolor="#333"), yaxis=dict(title='Loss Amount (%)', titlefont=dict(size=11), tickfont=dict(size=11), color='#e6e6e6', gridwidth=0.5, gridcolor="#333", zerolinecolor="#333"), legend=dict(x=0, y=1.2, orientation='h', font=dict(size=12, color='#e6e6e6')), plot_bgcolor='#282828', paper_bgcolor='#222222', margin=dict(l=50, r=20, t=80, b=20)) figure = go.Figure(data=data, layout=layout) return dict(msgViolin='Loss Variations By Type (%)', figViolin=figure.to_json())
def DC_viz(df, charter='Plotly', output_chart=False, output_dir=None, resolution=150, discrete_first=True, display=False): ''' Takes a subset dataframe of one continuous and one discrete feature and generates a Violin Plot ''' U = df.columns[0] V = df.columns[1] if discrete_first: D = U C = V else: D = V C = U if charter == 'Plotly': fig = go.Figure() for i in list(df[D].unique()): series = df[df[D] == i][C] fig.add_trace(go.Violin(x=series, name=str(i))) fig.update_traces(orientation='h', side='positive', width=3, points=False) fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, xaxis_title=C.replace('_', ' ').title(), yaxis_title=D.replace('_', ' ').title(), plot_bgcolor="rgba(0, 0, 0, 0)", paper_bgcolor="rgba(0, 0, 0, 0)", showlegend=False) if display: fig.show() if output_chart: fig.update_xaxes(tickcolor='white', tickfont=dict(color='white')) fig.update_yaxes(tickcolor='white', tickfont=dict(color='white')) fig.update_layout(font=dict(color="white")) fig.write_image(str(output_dir / 'charts' / (U + '_' + V + '.png')), scale=resolution // 72) else: sns.violinplot(df[D], df[C]) if len(df[D]) < 500: sns.swarmplot( x=df[D], y=df[C], edgecolor="white", linewidth=1 ) # Only show a swarm plot if there are fewer than 500 data points plt.xlabel(D.replace('_', ' ').title()) plt.ylabel(C.replace('_', ' ').title()) if display: plt.show() if output_chart: plt.savefig(output_dir / 'charts' / (U + '_' + V + '.png'), dpi=resolution) plt.close('all')
def reset_tsnr_imgs(sub, task): braintsnr_tsv = os.path.join( data_dir, sub + '_task-' + task + '_echo-2_desc-rapreproc_braintsnr.tsv') GMtsnr_tsv = os.path.join( data_dir, sub + '_task-' + task + '_echo-2_desc-rapreproc_GMtsnr.tsv') WMtsnr_tsv = os.path.join( data_dir, sub + '_task-' + task + '_echo-2_desc-rapreproc_WMtsnr.tsv') CSFtsnr_tsv = os.path.join( data_dir, sub + '_task-' + task + '_echo-2_desc-rapreproc_CSFtsnr.tsv') df_braintsnr = pd.read_csv(braintsnr_tsv, sep='\t').dropna() df_GMtsnr = pd.read_csv(GMtsnr_tsv, sep='\t').dropna() df_WMtsnr = pd.read_csv(WMtsnr_tsv, sep='\t').dropna() df_CSFtsnr = pd.read_csv(CSFtsnr_tsv, sep='\t').dropna() dat1 = df_braintsnr['tsnr'].to_numpy() dat2 = df_GMtsnr['tsnr'].to_numpy() dat3 = df_WMtsnr['tsnr'].to_numpy() dat4 = df_CSFtsnr['tsnr'].to_numpy() layout = go.Layout( yaxis=dict(title='Masks'), xaxis=dict(title='Temporal signal-to-noise ratio (tSNR)', range=[-20, 250]), # autosize=False, # width=500, margin={ 't': 0, }) fig3 = go.Figure(layout=layout) fig3.add_trace( go.Violin(x=dat1, line_color=sequential.Inferno[5], name='Brain')) fig3.add_trace( go.Violin(x=dat2, line_color=sequential.Inferno[6], name='GM')) fig3.add_trace( go.Violin(x=dat3, line_color=sequential.Inferno[7], name='WM')) fig3.add_trace( go.Violin(x=dat4, line_color=sequential.Inferno[8], name='CSF')) fig3.update_traces(orientation='h', side='positive', width=3, points=False) fig3.update_layout(xaxis_showgrid=True, yaxis_showgrid=True, xaxis_zeroline=False, legend={'traceorder': 'reversed'}) return fig3
def generate_plot(combined): fig = go.Figure() fig.add_trace( go.Violin( x=combined['feature'][combined['user'] == 'user a'], y=combined['value'][combined['user'] == 'user a'], name="Friend 1", )) fig.add_trace( go.Violin( x=combined['feature'][combined['user'] == 'user b'], y=combined['value'][combined['user'] == 'user b'], name="Friend 2", )) fig.add_trace( go.Violin( x=combined['feature'][combined['user'] == 'result'], y=combined['value'][combined['user'] == 'result'], name="Recommendations", )) fig.update_traces(box_visible=True, meanline_visible=True) fig.update_layout(title={ 'text': "Comparison of Audio Features", 'yanchor': 'middle', 'x': 0.52 }, xaxis_title="Audio Features", yaxis_title="Normalized Feature Values", violinmode='group', font=dict(size=12), autosize=False, width=700, height=400, margin=dict(l=50, r=10, b=10, t=40, pad=1), legend={ 'orientation': "h", 'y': -0.2, 'x': 0.22 }) return plot(fig, output_type='div')
def render(self, redis_client, model_names, *args): model_name = redis_client.get('model-name').decode('utf-8') algo_name = redis_client.get('algo-name').decode('utf-8') key = 'distrib-{dataset_name}-{model_name}-{algo_name}-data'.format( dataset_name=self.dataset_name, model_name=model_name, algo_name=algo_name) dataraw = redis_client.get(key) # print('get', key) # print(dataraw) # if dataraw is None: # return {} # TODO: Check timestamp to avoid updating if there is no changes # *Only check if the render is triggered by n_interval and not by user click # print("render summary") if dataraw is not None: data = json.loads(dataraw.decode('utf-8'))['data'] else: data = {} # print(data) return { 'data': [ go.Violin( x=([distrib_name for _ in range(len(data[distrib_name]))] if distrib_name in data else [distrib_name]), y=([value for value in data[distrib_name].values()] if distrib_name in data else []), points='all', pointpos=-1.1, jitter=0, showlegend=False, box=dict(visible=True) ) for distrib_name in config.distrib_names], 'layout': dict( title='{} - {}'.format(model_name, algo_name), autosize=True, height=250, font=dict(color='#CCCCCC'), titlefont=dict(color='#CCCCCC', size='14'), margin=dict( l=35, r=35, b=35, t=45 ), hovermode="closest", plot_bgcolor="#191A1A", paper_bgcolor="#020202", )}
def generate(self): columns = list(self.dataframe.columns) # remove Class column for column in ['Class']: columns.remove(column) # create figure as subplots fig = make_subplots( rows=len(columns), cols=1, shared_xaxes=False, subplot_titles=columns, ) # colors for traces colors = px.colors.qualitative.Plotly # map color to class color_map = get_color_map(colors, self.dataframe['Class']) for column_idx, column in enumerate(columns): for clazz in self.dataframe['Class']: fig.add_trace( go.Violin( x=self.dataframe['Class'][self.dataframe['Class'] == clazz], y=self.dataframe[column][self.dataframe['Class'] == clazz], name=clazz, marker={'color': color_map[clazz], 'symbol': 'x', 'opacity': 0.3}, points='all', spanmode='soft' ), row=column_idx + 1, col=1 ) # layout of all traces fig.update_traces(box_visible=False, showlegend=True, meanline_visible=True) # hide duplicate labels names = set() fig.for_each_trace( lambda trace: trace.update(showlegend=False) if (trace.name in names) else names.add(trace.name)) # set specific layout fig.update_layout( showlegend=False, height=len(columns) * 200 ) return fig
def analyze(self, q=(0.025, 0.5, 0.975)): """ Convenience function to compute some summary statistics, and make interactive plotly figures. Parameters ---------- q : tuple, optional [description], by default (0.025, 0.5, 0.975) Returns ------- fig Plotly Figure object results dict containing summary statistics of the formula/functional predictions. """ # covariance matrix of the formulas and functional group predictions formula_covar = np.cov(self.formulas, rowvar=False) functional_covar = np.cov(self.functional_groups, rowvar=False) # calculate the quantile ranges to report as marginal uncertainties # by default, the specified range is the median, and the edges are # the 95% highest posterior density formula_q = np.quantile(self.formulas, q, axis=0) functional_q = np.quantile(self.functional_groups, q, axis=0) # package the statistical summary results = { "formula": { "covariance": formula_covar, "quantile": formula_q }, "functional": { "covariance": functional_covar, "quantile": functional_q } } fig = go.Figure() for index, atom in enumerate(["H", "C", "O", "N"]): fig.add_trace( go.Violin( x=[ atom, ] * len(self.formulas), y=self.formulas[atom], name=atom, meanline_visible=True, opacity=0.6, )) fig.update_layout(title_text="Predicted formula", xaxis_title="Atom", yaxis_title="Number") return fig, results
def smoker_graph(): layout_count = copy.deepcopy(layout) fig = go.Figure(layout=layout_count) fig.add_trace( go.Violin( x=df['smoker'][df['smoker'] == 'yes'], y=df['charges'][df['smoker'] == 'yes'], box_visible=True, opacity=1, legendgroup='Smoker', scalegroup='M', name='Smoker', fillcolor='#e8871a', line_color='black', )) fig.add_trace( go.Violin( x=df['smoker'][df['smoker'] == 'no'], y=df['charges'][df['smoker'] == 'no'], box_visible=True, opacity=1, legendgroup='Non-Smoker', scalegroup='M', name='Non-Smoker', fillcolor='#00c0c7', line_color='black', )) fig.update_layout(title="Smoker and Non-Smoker Charges", title_x=0.5, yaxis_title="Charges", titlefont=dict(family='Open Sans', size=18, color="#ffffff"), showlegend=False, yaxis_zeroline=False) return fig
def violin( y: Sequence[float], title: Optional[str] = None, xlabel: Optional[str] = None, ylabel: Optional[str] = None, name: Optional[str] = None, xlim: Optional[List[float]] = None, ylim: Optional[List[float]] = None, xscale: Optional[List[float]] = None, yscale: Optional[List[float]] = None, x_dtick: Optional[float] = None, y_dtick: Optional[float] = None, ) -> EZPlotlyPlot: """ Make a single violin plot. :param y: The data for the violin plot as `Sequence[float]` :param title: The title of the plot as `Optional[str]` :param xlabel: The x-axis label as `Optional[str]` :param ylabel: The y-axis label as `Optional[str]` :param name: The name of the violin plot as `Optional[str]` (useful for plotting series) :param xlim: The x-axis limits [x_left_lim, x_right_lim] as `Optional[List[float]]` :param ylim: The y-axis limits [y_left_lim, y_right_lim] as `Optional[List[float]]` :param xscale: The scale of the x-axis ('log', 'linear') as `Optional[str]` :param yscale: The scale of the y-axis ('log', 'linear') as `Optional[str]` :param x_dtick: The plotting delta tick (i.e. tick length) of the x-axis as `Optional[float]` :param y_dtick: The plotting delta tick (i.e. tick length) of the y-axis as `Optional[float]` :return: EZPlotlyPlot object representing violin plot """ # plot type plot_type = "violin" # legend properties showlegend = name is not None # make violin object violin_obj = go.Violin(y=y, name=name, showlegend=showlegend) # return return EZPlotlyPlot( plot_type=plot_type, title=title, xlabel=xlabel, ylabel=ylabel, plot_obj=violin_obj, xlim=xlim, ylim=ylim, xscale=xscale, yscale=yscale, x_dtick=x_dtick, y_dtick=y_dtick, )
def update_yearplot(route_type, county_name, route_name, scale): if len(route_type) < 1 or route_type == ['ALL']: route_type = list(traffic_df.route_type.unique()) if len(county_name) < 1 or county_name == ['ALL']: county_name = list(traffic_df.county_name.unique()) if len(route_name) < 1 or route_name == ['ALL']: route_name = list(traffic_df.route.unique()) route_type_mask = traffic_df.route_type.isin(route_type) county_mask = traffic_df.county_name.isin(county_name) route_mask = traffic_df.route.isin(route_name) filter_mask = route_type_mask & county_mask & route_mask plot_df = traffic_df[filter_mask] # map configurations if scale == 'AADT': vals = plot_df['average_daily_traffic'] medians = plot_df.groupby( 'year').average_daily_traffic.median().reset_index() title = 'Average Daily Traffic' elif scale == 'Log10AADT': vals = np.log10(plot_df['average_daily_traffic']) medians = plot_df.groupby('year').log10_adt.median().reset_index() title = 'Log10(Average Daily Traffic)' elif scale == 'Percent Change': vals = plot_df['total_pct_change'] medians = plot_df.groupby( 'year').total_pct_change.median().reset_index() title = 'Total Pct Change' medians.columns = ['year', 'vals'] data = [ go.Violin(y=vals, x=plot_df["year"], box_visible=True, meanline_visible=True, opacity=0.6, line_color='mediumpurple'), go.Scatter(x=medians.year, y=medians.vals, line_color='lightblue') ] layout = go.Layout(title=title, showlegend=False, font=dict(color="white"), margin=go.layout.Margin(t=32, b=25, l=30, r=5), plot_bgcolor="#323130", paper_bgcolor="#323130") return {'data': data, 'layout': layout}
def length_over_time(dfs, path, title, settings, log_length=False, color="#4CB391"): if log_length: time_length = Plot(path=path + "TimeLogLengthViolinPlot.html", title="Violin plot of log read lengths over time") else: time_length = Plot(path=path + "TimeLengthViolinPlot.html", title="Violin plot of read lengths over time") length_column = "log_lengths" if log_length else "lengths" if "length_filter" in dfs: # produced by NanoPlot filtering of too long reads temp_dfs = dfs[dfs["length_filter"]] else: temp_dfs = dfs fig = go.Figure() fig.add_trace( go.Violin(y=temp_dfs[length_column], x=temp_dfs["timebin"], points=False, spanmode="hard", line_color='black', line_width=1.5, fillcolor=color, opacity=0.8)) fig.update_layout(xaxis_title='Interval (hours)', yaxis_title='Read length', title=title or time_length.title, title_x=0.5) if log_length: ticks = [ 10**i for i in range(10) if not 10**i > 10 * np.amax(dfs["lengths"]) ] fig.update_layout(yaxis=dict( tickmode='array', tickvals=np.log10(ticks), ticktext=ticks)) fig.update_yaxes(tickangle=45) time_length.fig = fig time_length.html = time_length.fig.to_html(full_html=False, include_plotlyjs='cdn') time_length.save(settings) return time_length
def plotBox(df, listcol, prev_df=None): if prev_df is None: data = [] colors = colorSanity(len(listcol)) for i, n in enumerate(listcol): trace = go.Violin(y=df[n], box_visible=True, meanline_visible=True, name=n.split('_')[-1], opacity=0.6, marker=dict(color=colors[i])) data.append(trace) iplot(data) else: data = [] colors = colorSanity(len(listcol)) for i, n in enumerate(listcol): trace = go.Violin(y=prev_df[n], box_visible=False, meanline_visible=True, legendgroup='Prev Week', scalegroup='Prev Week', name=n.split('_')[-1], opacity=0.6, side='negative', marker=dict(color='orange')) data.append(trace) trace = go.Violin(y=df[n], box_visible=False, meanline_visible=True, name=n.split('_')[-1], opacity=0.6, legendgroup='This Week', scalegroup='This Week', side='positive', marker=dict(color='blue')) data.append(trace) iplot(data)
def plotViolinPrev(df, prev_df, list_col, name='', color=['orange', 'blue']): fig = go.Figure() color = colorSanity(len(list_col)) for i, n in enumerate(list_col): fig.add_trace( go.Violin(y=prev_df[n], legendgroup=n, scalegroup=n, name="previous " + n.split('_')[-1], line_color=color[i], opacity=0.4)) fig.add_trace( go.Violin(y=df[n], legendgroup=n, scalegroup=n, name=n.split('_')[-1], line_color=color[i], opacity=1)) fig.update_traces(meanline_visible=True) fig.update_layout(violingap=0, violinmode='overlay') fig.show()
def create_technology_salary(df, tech_dict, encoded_tech): for key in tech_dict.keys(): tech_dict[key] = [x.lower() for x in tech_dict[key]] df_tech = df[['salary']].join(encoded_tech) df_tech_list = pd.DataFrame(columns=df_tech.columns[2:]) for column in df_tech.columns[2:]: df_tech_list.loc[1, column] = df_tech.loc[df_tech[column] == 1, 'salary'].median() df_tech_list.loc[2, column] = df_tech.loc[df_tech[column] == 1, 'salary'].count() threshold = 10 df_tech_trans = df_tech_list.T df_tech_trans = df_tech_trans[df_tech_trans[2] > threshold] top_tech = df_tech_trans.sort_values(by=1, ascending=False) violin_fig = go.Figure() box_fig = go.Figure() for idx, top10 in enumerate(top_tech.index[0:9]): tech_data = df_tech.loc[df_tech[top10] == 1, ['salary', top10]] tech_data.loc[:, top10] = top10 violin_fig.add_trace( go.Violin(x=tech_data[top10], y=tech_data.salary, box_visible=True, meanline_visible=True)) box_fig.add_trace(go.Box(y=tech_data.salary, name=top10)) violin_fig.update_layout( dict( title= 'Advertised salary average (€) of job ads by technologies referenced', showlegend=False)) box_fig.update_layout( dict(title='Top 10 technologies ranked by highest median salary', showlegend=False, yaxis_title='salary [in €]')) labels = [] for idx, name in enumerate(top_tech.index[0:9]): tmp = [key for key, value in tech_dict.items() if name in value] tmp = ', '.join(tmp) labels.append(top_tech.index[idx] + '<sub>(' + tmp + ')</sub>') top_tech_bar = go.Bar(x=top_tech[1][0:9], y=labels, orientation='h') #top_tech.index[0:9] layout = dict( title= 'Advertised salary average (€) of job ads by technologies referenced', yaxis=dict(autorange="reversed"), xaxis_title='mean salary') top_tech_fig = go.Figure(data=top_tech_bar, layout=layout) return box_fig
def update_graphics2(RChoice2): """ Updates the bottom graph based on 2nd RadioItems """ traces = [ go.Violin( x=base_df['Fail_set'][base_df['Fail_set'] == fail_set], y=base_df[RChoice2][base_df['Fail_set'] == fail_set], name="{}".format(fail_set), box_visible=True, meanline_visible=True, ) for fail_set in [True, False] ] return [{'data': traces}]
def violin_plot(day_selected, hour_picked): PrediccionQuery = Prediccion_query(update_map=False) # if start_date and end_date: # PrediccionQuery.add_date_range(str(start_date), str(end_date)) if day_selected is not None: PrediccionQuery.add_day_filter(day_selected) if hour_picked is not None: PrediccionQuery.add_hour_filter(hour_picked) df = Sql().request(PrediccionQuery.query) df['log_carga'] = np.log(df['carga'] + 1) df.loc[df['log_carga'] <= df['log_carga'].quantile(0.2), 'cluster'] = 'baja' df.loc[(df['log_carga'] > df['log_carga'].quantile(0.2)) & (df['log_carga'] <= df['log_carga'].quantile(0.4)), 'cluster'] = 'media baja' df.loc[(df['log_carga'] > df['log_carga'].quantile(0.4)) & (df['log_carga'] <= df['log_carga'].quantile(0.6)), 'cluster'] = 'media' df.loc[(df['log_carga'] > df['log_carga'].quantile(0.6)) & (df['log_carga'] <= df['log_carga'].quantile(0.8)), 'cluster'] = 'media alta' df.loc[df['log_carga'] > df['log_carga'].quantile(0.8), 'cluster'] = 'alta' colores = ['#cc3232', '#db7b2b', '#e7b416', '#99c140', '#2dc937'][::-1] layout = go.Layout( title="Clasificación de niveles de carga", margin=go.layout.Margin(l=10, r=10, t=50, b=50), showlegend=False, paper_bgcolor="white", plot_bgcolor="white", ) fig = go.Figure(layout=layout) for nivel, color in zip( ['baja', 'media baja', 'media', 'media alta', 'alta'], colores): fig.add_trace( go.Violin(y=df['carga'][df['cluster'] == nivel], name=nivel, legendgroup=nivel, line_color=color)) fig.update_traces(box_visible=True, meanline_visible=True) fig.update_yaxes(type="log") return fig
def create_plot_violin(dic_data, message='total'): data = [] counter = 0 for key, piece_of_data in dic_data.items(): df = pd.DataFrame({'y': piece_of_data}) total = 0 for data_point in piece_of_data: total += data_point data.append(go.Violin(y=df['y'], box_visible=True, line_color='black', meanline_visible=True, fillcolor=colors[counter], opacity=0.6, x0=f"{message} = {total}", name=key)) counter += 1 graphJSON = json.dumps(data, cls=plotly.utils.PlotlyJSONEncoder) return graphJSON
def update_season_dist_plot(week: int, y_col: str): """Updates specified season distribution plot. Args: week: int, current week y_col: str, name of column to plot Returns: distribution figure """ temp_points = points.loc[points[WEEK_COL] <= week, :] temp_points[COL_JOIN.format( AGAINST_COL, RANK_COL)] = temp_points.groupby(AGAINST_COL)[COL_JOIN.format( POINTS_COL, AGAINST_COL)].rank() temp_season = collect_season_stats(temp_points, schedule, WEEK_COL, PLAYER_COL, AGAINST_COL, POINTS_COL, RANK_COL) temp_players = temp_season.sort_values( by="Place", ascending=True)[PLAYER_COL].to_list() fig = go.Figure() for player in temp_players: player_points = temp_points.loc[temp_points[PLAYER_COL] == player, :] fig.add_trace( go.Violin( x=player_points.loc[:, PLAYER_COL], y=player_points.loc[:, y_col], fillcolor=COLORS[PLAYERS.index(player)], line_color="gray", name=player, legendgroup=player, box_visible=False, pointpos=0, meanline_visible=True, points="all", )) fig.update_xaxes(tickfont={"size": 18}) fig.update_yaxes(title_text=y_col, title_font={"size": 22}, tickfont={"size": 18}) fig.update_layout( autosize=False, height=400, margin=go.layout.Margin(l=50, r=50, b=25, t=25, pad=4), ) return fig
def update_graphics1(RChoice1): # clickData): """ Updates the top graph based on 1st RadioItems """ # Let's do something with clicks later #if clickData is not None: traces = [ go.Violin( x=base_df['Fail_set'][base_df['Fail_set'] == fail_set], y=base_df[RChoice1][base_df['Fail_set'] == fail_set], name="{}".format(fail_set), box_visible=True, meanline_visible=True, ) for fail_set in [True, False] ] return [{'data': traces}]
def _graph_fitness_statistics(self): data = [] for generation in sorted(self.overall_generational_stats.keys()): if (not (generation - 1) % 10) or generation == sorted( self.overall_generational_stats.keys())[-1]: data.append( go.Violin(name='gen {0}'.format(generation), y=self.overall_generational_stats[generation] ['fitness scores'], points='all', jitter=0.3, pointpos=-1.8, meanline=dict(visible=True), box=dict(visible=True))) plotly.offline.plot(data, filename='Fitness Stats.html')
def monthly_distribution(sample_year, months, years, temps, monthly_path): ''' Function to compute the monthly temperature distribution Parameters ---------- sample_year: year of your choice (integer) it is either 2007 or 2008 months : numpy.ndarray (integer) 1-dimensional numpy array with month identifiers temps: numpy.ndarray (integer) temperature (Celsius) of the measurement years: numpy.ndarray (integer) year of the measurement monthly_path: string path where to export the distribution in .html ''' unique_months = np.unique(months) names = [ 'Jan. {y}'.format(y=sample_year), 'Feb. {y}'.format(y=sample_year), 'Mar. {y}'.format(y=sample_year), 'Apr. {y}'.format(y=sample_year), 'May {y}'.format(y=sample_year), 'Jun. {y}'.format(y=sample_year), 'Jul. {y}'.format(y=sample_year), 'Aug. {y}'.format(y=sample_year), 'Sept. {y}'.format(y=sample_year), 'Oct. {y}'.format(y=sample_year), 'Nov. {y}'.format(y=sample_year), 'Dec. {y}'.format(y=sample_year) ] colors = n_colors('rgb(10, 200, 197)', 'rgb(10, 200, 197)', 12, colortype='rgb') fig = go.Figure() for month, color, name in zip(unique_months, colors, names): fig.add_trace( go.Violin(x=temps[((months == month) & (years == sample_year)).nonzero()[0]], line=dict(color=color), orientation='h', side='positive', points=False, name=name)) fig.layout.update( title= 'Distribution of Monthly Temperatures {y} (kernel density estimation plot)' .format(y=sample_year), xaxis=dict(title="Temperature (C°)")) pyo.plot(fig, filename=monthly_path)