def plot_range_comparison(self, xlabel: str = '', figsize: Tuple[int] = (7, 3), add_text_label: bool = True, **kwargs): df = self.get_ranges_df(**kwargs) fig = (p9.ggplot(df) + p9.aes('cat_value', 'counts', fill='direction') + p9.geom_col(alpha=.8) + p9.theme(figure_size=figsize, axis_text_x=p9.element_text(rotation=45)) + p9.scale_fill_manual(['#3f7f93', '#da3b46', '#4d4a4a']) + p9.labs(x=xlabel, y='Number of Comparisons', fill='R')) if add_text_label: if df.loc[df.direction == 'Positive'].loc[df.counts > 0].size > 0: fig += p9.geom_text( p9.aes(label='label', x='cat_value', y='n + max(n) * .15'), inherit_aes=False, size=9, data=df.loc[df.direction == 'Positive'].loc[df.counts > 0], color='#3f7f93') if df.loc[df.direction == 'Negative'].loc[df.counts > 0].size > 0: fig += p9.geom_text( p9.aes(label='label', x='cat_value', y='n + max(n) * .05'), inherit_aes=False, size=9, data=df.loc[df.direction == 'Negative'].loc[df.counts > 0], color='#da3b46') return fig
def plot_mem(df): x = df.copy() # initialise some extra columns useful for plotting x['new_cols'] = [str(i) for i in x['col_name']] x['new_cols'] = pd.Categorical(x['new_cols'], categories=x['new_cols'], ordered=True) x['cnt_print_loc_pos'] = (x.pcnt.values) + (np.max(x.pcnt.values)) / 70 x['cnt_print_loc_neg'] = (x.pcnt.values) - (np.max(x.pcnt.values)) / 70 # build basic plot ggplt = p9.ggplot(x, p9.aes(x = 'new_cols', y = 'pcnt', fill = 'new_cols')) \ + p9.geom_bar(stat = 'identity') \ + p9.guides(fill = False) \ + p9.ylab('% of total size') \ + p9.xlab('') \ + p9.theme(axis_text_x=p9.element_text(rotation = 45, hjust=1)) # add text labels to the highest bars y1 = x.copy()[x.pcnt > 0.3 * np.max(x.pcnt)] ggplt = ggplt + \ p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_neg', label = 'size', \ fill = 'col_name'), inherit_aes = False, data = y1, color = 'white', \ angle = 90, vjust = 'top') # add text labels to the lower bars y2 = x.copy()[x.pcnt <= 0.3 * np.max(x.pcnt)] ggplt = ggplt + \ p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_pos', label = 'size', \ fill = 'col_name'), inherit_aes = False, data = y2, color = 'gray', \ angle = 90, vjust = 'bottom') return ggplt
def test_text_aesthetics(): p = ( ggplot(df, aes(y='y', label='label')) + geom_text(aes('x', label='label'), size=15, ha='left') + geom_text( aes('x+1', angle='angle'), size=15, va='top', show_legend=False) + geom_text( aes('x+2', label='label', alpha='z'), size=15, show_legend=False) + geom_text(aes('x+3', color='factor(z)'), size=15, show_legend=False) + geom_text(aes('x+5', size='z'), ha='right', show_legend=False) + scale_size_continuous(range=(12, 30)) + scale_y_continuous(limits=(-0.5, n - 0.5))) assert p == 'text_aesthetics'
def test_text_aesthetics(): p = (ggplot(df, aes(y='y', label='label')) + geom_text(aes('x', label='label'), size=15, ha='left') + geom_text(aes('x+1', angle='angle'), size=15, va='top', show_legend=False) + geom_text(aes('x+2', label='label', alpha='z'), size=15, show_legend=False) + geom_text(aes('x+3', color='factor(z)'), size=15, show_legend=False) + geom_text(aes('x+5', size='z'), ha='right', show_legend=False) + scale_size_continuous(range=(12, 30)) + scale_y_continuous(limits=(-0.5, n-0.5))) assert p == 'text_aesthetics'
def test_stat_count_float(): df = pd.DataFrame({'x': ['a', 'b'], 'weight': [1.5, 2.5]}) p = (ggplot(df) + aes(x='x', weight='weight', fill='x') + geom_bar() + geom_text(aes(label=after_stat('count')), stat='count')) assert p + _theme == 'stat-count-float'
def plot_significance_vs_ranking( summary_df, method_name, x_label, output_figure_filename ): # Format input dataframe plot_df = pd.DataFrame( data={ "Test statistic": summary_df[ method_stats_dict[method_name] + " (Real)" ].values, "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values, }, index=summary_df.index, ) fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank")) fig += pn.geom_point() fig += pn.geom_point( plot_df[plot_df["Percentile rank"] > 0.9], pn.aes(x="Test statistic", y="Percentile rank"), color="red", ) fig += pn.geom_text( pn.aes( label=[ x if plot_df.loc[x, "Percentile rank"] > 0.9 else "" for x in plot_df.index ] ), ha="left", va="top", size=5, ) fig += pn.labs( x=x_label, y="Percentile of ranking", title=f"{method_name} pathway statistics vs ranking", ) fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) print(fig) # Save figure fig.save( output_figure_filename, format="svg", bbox_inches="tight", transparent=True, pad_inches=0, dpi=300, )
def getErrorPlot(self, msg="Error Occured"): df = DataFrame({"x": [10], "y": [2], "label": [msg]}) p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="white") \ + THEME.cat_colors_lines \ + THEME.mt \ + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank()) return p
def test_stat_count_int(): df = pd.DataFrame({'x': ['a', 'b'], 'weight': [1, 2]}) p = (ggplot(df) + aes(x='x', weight='weight', fill='x') + geom_bar() + geom_text(aes(label='stat(count)'), stat='count')) assert p + _theme == 'stat-count-int'
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text(aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False) + geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black') + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, )) return plt
def plot_vs_discrete(data_table, discrete_metric_name, metric_name, segment_name, title, ylim=None, aggregate="mean" ): data_filtered = \ data_table.loc[((pd.notnull(data_table[metric_name])) & (pd.notnull(data_table[discrete_metric_name])))][ [discrete_metric_name, metric_name, segment_name]] data_filtered[[metric_name]] = data_filtered[[metric_name]].astype(float) result = data_filtered.groupby([discrete_metric_name, segment_name]).agg({metric_name: aggregate}).reset_index() result[metric_name] = round(result[metric_name], 3) gg_result = plot.ggplot(result) + plot.aes(x=discrete_metric_name, y=metric_name, fill=segment_name, label=metric_name ) + \ plot.geom_bar(stat="identity", position="dodge") + \ plot.geom_text(position=plot.position_dodge(width=.9), size=8) + \ plot.labs(x=discrete_metric_name, y=aggregate + "(" + metric_name + ")", title=title) if pd.notnull(ylim): gg_result = gg_result + plot.ylim(ylim) return gg_result
def make_sentiment_plot(sentiment_df, exclude_zero_bin=True, plot_text_labels=True): rows = [] print( "Sentiment plot: exclude zero bins? {} show text? {}".format( exclude_zero_bin, plot_text_labels ) ) for column in filter(lambda c: c.startswith("bin_"), sentiment_df.columns): c = Counter(sentiment_df[column]) date = column[4:] for bin_name, val in c.items(): if exclude_zero_bin and (bin_name == "0.0" or not isinstance(bin_name, str)): continue bin_name = str(bin_name) assert isinstance(bin_name, str) val = int(val) rows.append( { "date": datetime.strptime(date, "%Y-%m-%d"), "bin": bin_name, "value": val, } ) df = pd.DataFrame.from_records(rows) # print(df['bin'].unique()) # HACK TODO FIXME: should get from price_change_bins()... order = [ "-1000.0", "-100.0", "-10.0", "-5.0", "-3.0", "-2.0", "-1.0", "-1e-06", "1e-06", "1.0", "2.0", "3.0", "5.0", "10.0", "25.0", "100.0", "1000.0", ] df["bin_ordered"] = pd.Categorical(df["bin"], categories=order) plot = ( p9.ggplot(df, p9.aes("date", "bin_ordered", fill="value")) + p9.geom_tile(show_legend=False) + p9.theme_bw() + p9.xlab("") + p9.ylab("Percentage daily change") + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(10, 5)) ) if plot_text_labels: plot = plot + p9.geom_text(p9.aes(label="value"), size=8, color="white") return plot_as_inline_html_data(plot)
def plot_train_test(ags): frontiers = data.train_test(ags) frontiers, model = data.train_test_model(frontiers) labs = frontiers.sort_values('train_flops').groupby( 'elo').first().reset_index() desc = f'log₁₀(test) = {model.params[1]:.1f} · log₁₀(train) + {model.params[2]:.1g} · elo + {model.params[0]:.0f}' return ( pn.ggplot( frontiers, pn.aes(x='train_flops', y='test_flops', color='elo', group='elo')) + pn.geom_line(size=.5, show_legend=False) + pn.geom_line(pn.aes(y='test_flops_hat'), size=.25, show_legend=False, linetype='dashed') # + pn.geom_point(size=.5, show_legend=False) + pn.geom_text(pn.aes(label='elo.astype(int)'), labs, show_legend=False, size=6, nudge_y=+.2) + pn.scale_color_cmap(limits=(-1500, 0)) + pn.scale_x_continuous(trans='log10') + pn.scale_y_continuous(trans='log10') + pn.annotate( 'text', 1.5e13, 5e9, label=desc, ha='left', size=6, family='serif') + pn.labs(x='Train-time compute (FLOPS-seconds)', y='Test-time compute (FLOPS-seconds)') + plot.IEEE())
def summary(tags, opts=None): print(tags) tags_summary = ( tags.groupby(["tag", "background"]) .agg({"tag": "count"}) .rename(columns={"tag": "n_tags"}) .reset_index() .astype({"background": "category", "tag": "category"}) ) print(tags_summary) # tags_summary = tags_df.groupby(["species"]).agg( # {"tag_duration": "sum", "species": "count"} # ) # tags_summary.rename(columns={"species": "count"}, inplace=True) # tags_summary["tag_duration"] = tags_summary.tag_duration.astype(int) # tags_summary["duration"] = tags_summary.tag_duration.astype(str) + "s" # tags_summary = tags_summary.reindex(list(SPECIES_LABELS.keys())) # # tags_summary["species"] = tags_summary.index # tags_summary.reset_index(inplace=True) # tags_summary # ( # ggplot( # data=tags_summary, # mapping=aes( # x="factor(species, ordered=False)", # y="tag_duration", # fill="factor(species, ordered=False)", # ), # ) # + geom_bar(stat="identity", show_legend=False) # + xlab("Species") # + ylab("Duration of annotations (s)") # + geom_text(mapping=aes(label="count"), nudge_y=15) # + theme_classic() # + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels) # ).save("species_repartition_duration_mini.png", width=10, height=8) plt = ( ggplot( data=tags_summary, mapping=aes( x="tag", # "factor(species, ordered=False)", y="n_tags", fill="background", # "factor(species, ordered=False)", ), ) + geom_bar(stat="identity", show_legend=True, position=position_dodge()) + xlab("Species") + ylab("Number of annotations") + geom_text(mapping=aes(label="n_tags"), nudge_y=15) + theme_classic() + theme(axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30})) # + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels) ).save("tag_species_bg.png", width=10, height=8) # print(tags_summary) print(plt)
def p(N=3): """Return *N* distinct plot objects.""" template = ( ggplot(aes(x='wt', y='mpg', label='name'), data=mtcars) + geom_text() ) for i in range(1, N+1): yield template + ggtitle('%d of %d' % (i, N))
def plot_revigo( rev, outline=2, expand_points=(1.05, 1.2), figure_size=(8, 8), font_size=8, point_size=3, point_alpha=0.7, palette='RdPu', dispensability_cutoff=1., show_all_labels=False, text_column='name', term_size_limit=None, ): import plotnine as p9 import matplotlib.patheffects as path_effects pe = [ path_effects.Stroke(linewidth=2, foreground='white'), path_effects.Normal() ] if not show_all_labels: lbl_df = rev[(rev.eliminated == 0) & (rev.dispensability < dispensability_cutoff)] if term_size_limit is not None: lbl_df = lbl_df[lbl_df.term_size < term_size_limit] else: lbl_df = rev g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) + p9.geom_point(p9.aes(fill='neglog10', size='frequency'), color='black', alpha=point_alpha) + p9.geom_text(p9.aes(label=text_column), data=lbl_df, size=font_size, adjust_text={ 'expand_points': expand_points, 'arrowprops': { 'arrowstyle': '-' }, 'x': rev.plot_X.values, 'y': rev.plot_Y.values }, path_effects=pe) + p9.theme_bw() + p9.scale_fill_distiller(type='seq', palette=palette, direction=1) + p9.labs(x='Semantic similarity space', y='', fill='-log10(adj. p-value)', size='Term frequency') + p9.scale_size_continuous(range=(2, 7), trans='log10') + p9.theme(figure_size=figure_size, axis_text_x=p9.element_blank(), axis_text_y=p9.element_blank(), axis_ticks=p9.element_blank())) return g
def test_dodge_preserve_single_text(): df1 = pd.DataFrame({'x': ['a', 'b', 'b', 'b'], 'y': ['a', 'a', 'b', 'b']}) d = position_dodge(preserve='single', width=0.9) p = (ggplot(df1, aes('x', fill='y')) + geom_bar(position=d) + geom_text(aes(y=after_stat('count'), label=after_stat('count')), stat='count', position=d, va='bottom')) assert p + _theme == 'dodge_preserve_single_text'
def plotfreq(freqdf): ''' ---------- Parameters ---------- freqdf dataframe generated by freq() Returns ------- Bar chart with frequencies & percentages in descending order Example ------- import exploretransform as et df, X, y = et.loadboston() et.plotfreq(et.freq(X['town'])) Warning ------- This function will likely not plot more than 100 unique levels properly. ---------- ''' # input checks if isinstance(freqdf, (pd.core.frame.DataFrame)): pass else: return print("\nFunction only accetps dataframes\n") if len(freqdf.columns) == 4: pass else: return print("\nInput must be a dataframe generated by freq()\n") if sum(freqdf.columns[1:4] == ['freq', 'perc', 'cump']) == 3: pass else: return print("\nInput must be a dataframe generated by freq()\n") if len(freqdf) < 101: pass else: return print("\nUnable to plot more than 100 items") # label for plot lbl = freqdf['freq'].astype(str).str.cat( '[ ' + freqdf['perc'].astype(str) + '%' + ' ]', sep=' ') # create variable to be used in aes aesx = 'reorder(' + freqdf.columns[0] + ', freq)' # build plot plot = (pn.ggplot(freqdf) + pn.aes(x=aesx, y='freq', fill='freq', label=lbl) + pn.geom_bar(stat='identity') + pn.coord_flip() + pn.theme(axis_text_y=pn.element_text(size=6, weight='bold'), legend_position='none') + pn.labs(x=freqdf.columns[0], y="Freq") + pn.scale_fill_gradient2(mid='bisque', high='blue') + pn.geom_text(size=6, nudge_y=.7)) return plot
def plot_optimal_model_size(ags): from statsmodels.formula import api as smf results = {} for b, g in ags.groupby('boardsize'): ordered = g.sort_values('elo').copy() ordered['params'] = g.width**2 * g.depth left = np.log10(g.train_flops.min()) right = np.log10(g.train_flops.max()) for f in np.linspace(left, right, 11)[1:]: subset = ordered[ordered.train_flops <= 10**f] results[b, 10**f] = subset.params.iloc[-1] df = pd.Series(results).reset_index() df.columns = ['boardsize', 'approx_flops', 'params'] model = smf.ols('np.log10(params) ~ np.log10(approx_flops) + 1', df).fit() left, right = np.log10(df.approx_flops.min()), np.log10( df.approx_flops.max()) preds = pd.DataFrame({'approx_flops': 10**np.linspace(left, right, 21)}) preds['params'] = 10**model.predict(preds) labs = df.sort_values('approx_flops').groupby( 'boardsize').last().reset_index() labs['params'] = labs.apply( lambda r: df[df.approx_flops <= r.approx_flops].params.max(), axis=1) points = df.sort_values('approx_flops').groupby( 'boardsize').last().reset_index() desc = f'log₁₀(params) = {model.params[1]:.2f} · log₁₀(compute) − {-model.params[0]:.1f}' return ( pn.ggplot(df, pn.aes(x='approx_flops', y='params')) + pn.geom_line(pn.aes(color='factor(boardsize)', group='boardsize'), show_legend=False) + pn.geom_line(data=preds, linetype='dashed', size=.25) + pn.geom_point(pn.aes(color='factor(boardsize)', group='boardsize'), data=points, size=.5, show_legend=False) + pn.geom_text(pn.aes( color='factor(boardsize)', group='boardsize', label='boardsize'), data=labs, nudge_y=+.5, show_legend=False, size=6) + pn.annotate( 'text', 1e9, 2e7, label=desc, ha='left', size=6, family='serif') + pn.scale_x_continuous(trans='log10') + pn.scale_y_continuous(trans='log10') + pn.scale_color_hue(l=.4) + pn.labs(x='Train-time compute (FLOPS-seconds)', y='Optimal model size (params)') + plot.IEEE())
def plot_test(ags): df = ags.query('boardsize == 9').groupby('run').apply( lambda df: df[df.idx == df.idx.max()]).copy() df['test_flops'] = df.test_nodes * (df.train_flops / df.samples) subset = df.query('test_nodes == 64').sort_values('test_flops') selection = [ subset.loc[ELO * subset.elo > e].iloc[0].run for e in np.linspace(-2000, -500, 4) ] df = df[df.run.isin(selection)].copy() df['params'] = df.width**2 * df.depth df['arch'] = df.apply(lambda r: '{depth}×{width}'.format(**r), axis=1) labels = df.sort_values('test_flops').reset_index( drop=True).groupby('run').first().reset_index() return (pn.ggplot( df, pn.aes(x='test_flops', y='ELO*elo', color='params', group='run')) + pn.geom_point(size=.25, show_legend=False) + pn.geom_line(size=.5, show_legend=False) + pn.geom_text(pn.aes(label='test_nodes'), nudge_y=-50, show_legend=False, size=4, va='top') + pn.geom_text(pn.aes(label='test_nodes'), nudge_y=-50, show_legend=False, size=4, va='top') + pn.geom_text(pn.aes(label='arch'), data=labels, show_legend=False, size=6, nudge_x=-.1, ha='right') + pn.scale_x_continuous(trans='log10') + pn.scale_color_cmap('plasma', trans='log10', limits=(df.params.min(), 10 * df.params.max())) + pn.coord_cartesian( (3.5, None)) + pn.labs(x='Test-time compute (FLOPS-seconds)', y='Elo v. perfect play') + plot.IEEE())
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="count", label="..count..")) + geom_bar(fill="#1e4f79") + geom_text(stat="count", va='bottom', size=24) + scale_x_discrete(limits=[ "1", "2", "3", "5", "26", "52", "97", "100", "300", "537" ]) + scale_y_continuous(breaks=[0, 5, 10], limits=[0, 10]) + ggtitle("Case Study Sizes") + xlab("Number of Projects") + ylab("Number of Case Studies") + theme_classic(base_size=28, base_family="Helvetica") + theme(text=element_text(size=28))).save(file_path, width=14, height=7)
def test_stack_negative(): df = df1.copy() _loc = df.columns.get_loc df.iloc[0, _loc('y')] *= -1 df.iloc[len(df) - 1, _loc('y')] *= -1 p = (ggplot(df) + geom_col(aes('factor(x)', 'y', fill='factor(y)'), position='stack') + geom_text(aes('factor(x)', 'y', label='y'), position=position_stack(vjust=0.5))) assert p + _theme == 'stack-negative'
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) + geom_bar(stat="identity", fill="#1e4f79") + geom_text(va='bottom', size=24, format_string='{:.1%}') + scale_x_discrete(limits=self._data["pattern"]) + scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) + ggtitle("Design Pattern Counts") + xlab("Design Pattern") + ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_text_x=element_text(rotation=45, ha="right"))).save( file_path, width=24, height=8)
def plot_ambient_by_difference(adata, plot_name='cellbender_results'): # Compute the total amount of expression of each gene adata.var['total_gene_counts_raw'] = np.array( adata.layers['counts_raw'].sum(axis=0)).squeeze() adata.var['total_gene_counts_cellbender'] = np.array( adata.layers['counts_cellbender'].sum(axis=0)).squeeze() adata.var['difference_total_gene_counts_raw_cellbender'] = adata.var[ 'total_gene_counts_raw'] - adata.var['total_gene_counts_cellbender'] # Make the plot gplt = plt9.ggplot(adata.var) gplt = gplt + plt9.theme_bw() gplt = gplt + plt9.geom_point(plt9.aes( x='ambient_expression', y='difference_total_gene_counts_raw_cellbender'), alpha=0.25) gplt = gplt + plt9.labs(x='Ambient RNA signature', y='Counts removed by cellbender', title='Ambient RNA signature removal per gene') # gplt = gplt + plt9.scale_y_continuous( # trans='log10', # labels=comma_labels, # minor_breaks=0 # ) gplt.save( '{}-ambient_signature-scatter.png'.format(plot_name), #dpi=300, width=5, height=5) # Add gene names to the plot gplt = plt9.ggplot(adata.var) gplt = gplt + plt9.theme_bw() gplt = gplt + plt9.geom_text(plt9.aes( x='ambient_expression', y='difference_total_gene_counts_raw_cellbender', label='gene_symbols'), alpha=0.25) gplt = gplt + plt9.labs(x='Ambient RNA signature', y='Counts removed by cellbender', title='Ambient RNA signature removal per gene') # gplt = gplt + plt9.scale_y_continuous( # trans='log10', # labels=comma_labels, # minor_breaks=0 # ) gplt.save( '{}-ambient_signature-scatter_genenames.png'.format(plot_name), #dpi=300, width=5, height=5)
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="category", y="count", label="percent")) + geom_bar(stat="identity", fill="#1e4f79") + geom_text(va='bottom', size=24) + scale_x_discrete(limits=self._data["category"]) + scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) + ggtitle("Classes per Category") + xlab("Category") + ylab("Number of Classes") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_text_x=element_text(rotation=45, ha="right"))).save( file_path, width=7, height=7)
def test_stack_negative(): df = df1.copy() _loc = df.columns.get_loc df.iloc[0, _loc('y')] *= -1 df.iloc[len(df)-1, _loc('y')] *= -1 p = (ggplot(df) + geom_col(aes('factor(x)', 'y', fill='factor(y)'), position='stack') + geom_text(aes('factor(x)', 'y', label='y'), position=position_stack(vjust=0.5)) ) assert p + _theme == 'stack-negative'
def getErrorPlot(self, msg="Error Occured"): """ Creates a plotnine plot with error message. To be used to display error essages across dashboards. parameters: - msg: the message to be displayed when error occurs """ df = DataFrame({"x": [10], "y": [2], "label": [msg]}) p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="Black") \ + THEME.cat_colors_lines \ + THEME.mt \ + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank()) return p
def plot_result_stats(results, title): stats = results.describe().unstack().reset_index().rename(columns={ "level_0": "metric", "level_1": "group", 0: "value" }) stats = stats[~stats["group"].isin(["count", "min", "max"])] stats["value_presentation"] = round(stats["value"], 2) plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") + p9.geom_col(position="dodge") + p9.theme_bw() + p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) + p9.geom_text(p9.aes(label="value_presentation"), position=p9.position_dodge(width=0.9), va="bottom")) return plot
def setup_heatmap0(df: pd.DataFrame, format_string, axis_text): # https://stackoverflow.com/a/62161556/819272 # Plotnine does not support changing the position of any axis. return (p9.ggplot(df, p9.aes(y='row', x='col')) + p9.coord_equal() + p9.geom_tile(p9.aes(fill='scale')) + p9.geom_text( p9.aes(label='value'), format_string=format_string, size=7) + p9.scale_y_discrete(drop=False) + p9.scale_x_discrete(drop=False) + p9.scale_fill_gradientn(colors=['#63BE7B', '#FFEB84', '#F8696B'], na_value='#CCCCCC', guide=False) + p9.theme(axis_text=p9.element_blank() if not axis_text else p9.element_text(face='bold'), axis_ticks=p9.element_blank(), axis_title=p9.element_blank(), panel_grid=p9.element_blank()))
def plot_company_rank(df): assert isinstance(df, pd.DataFrame) #assert 'sector' in df.columns n_bin = len(df['bin'].unique()) plot = (p9.ggplot( df, p9.aes('date', 'rank', group='asx_code', color='sector')) + p9.geom_smooth(span=0.3, se=False) + p9.geom_text(p9.aes(label='asx_code', x='x', y='y'), nudge_x=1.2, size=6, show_legend=False) + p9.xlab('') + p9.facet_wrap('~bin', nrow=n_bin, ncol=1, scales="free_y") + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(8, 20), subplots_adjust={'right': 0.8})) return plot_as_inline_html_data(plot)
def plot_vs_continuous(data_table, continuous_metric_name, breaks, metric_name, segment_name, title, aggregate="mean"): result = _aggregate_vs_continuous(data_table, continuous_metric_name, breaks, metric_name, segment_name, aggregate) gg_result = plot.ggplot(result) + plot.aes(x="level_0", y=metric_name, fill=segment_name, label=metric_name ) + \ plot.geom_bar(stat="identity", position="dodge") + \ plot.geom_text(position=plot.position_dodge(width=.9), size=8) + \ plot.labs(x=continuous_metric_name, y=aggregate + "(" + metric_name + ")", title=title) return gg_result
def add_mirna_g(g,df, str_name,str_start,str_end,dis_pos,l_s,l_e,l_score=[]): # print(str_name,str_start,str_end,dis_pos,l_s,l_e) df[str_start]= pd.Series(l_s) df[str_end] = pd.Series(l_e) g+= pt.annotate("text", x=0,y=dis_pos,label=str_name) g+= pt.geom_errorbarh(df,pt.aes(xmin=str_start,y=(dis_pos),xmax=str_end,color='mi_name')) g+= pt.geom_segment(df,pt.aes(x=str_start,y=(dis_pos),yend=0,xend=str_start,color='mi_name')) if(l_score): # print(l_score) # pd.options.display.float_format = '{:.1f}'.format score_column_name = 'score'+str_name # print(l_score,score_column_name,str_start,dis_pos) df[score_column_name] = pd.Series(l_score,dtype=np.float).map('{:.0f}'.format) g+= pt.geom_text(df, pt.aes(x=str_start,y=dis_pos,label=score_column_name,color='mi_name'), nudge_x=0.1, nudge_y=0.1)#,adjust_text=adjust_text_dict)
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)): """ We create a function to plot the bar plot. """ return ( # Define the plot. p9.ggplot(dataframe, p9.aes(x='threshold', fill='value')) # Add the bars. + p9.geom_bar(position='dodge') + p9.geom_text(p9.aes(label='stat(count)'), stat='count', position=p9.position_dodge(0.9), size=7, va='bottom') # Rename the x axis. + p9.scale_x_discrete(name='Threshold') # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top). + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500)) # Replace the names in the legend and set the colors of the bars. + p9.scale_fill_manual(values={ 0: '#009e73', 1: '#d55e00' }, labels=lambda l: [{ 0: 'Stable', 1: 'Unstable' }[x] for x in l]) # Place the plots in a grid, renaming the labels. + p9.facet_grid('. ~ iterations', labeller=p9.labeller(cols=lambda x: f'iters = {x}')) # Define the theme for the plot. + p9.theme( # Remove the y axis name. axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = ( ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text( aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False ) + geom_segment( aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black' ) + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, ) ) return plt
def main(): mpl.rc('mathtext', fontset='cm') warnings.filterwarnings('ignore', r'(geom|position)_\w+ ?: Removed \d+ rows') warnings.filterwarnings('ignore', r'Saving .+ x .+ in image') warnings.filterwarnings('ignore', r'Filename: .+\.png') df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_f') + titles('P_f(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pf_Ob_Ol') df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_f') + titles('P_f(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pf_Ob_σ') df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_q') + titles('P_q(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pq_Ob_Ol') df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_q') + titles('P_q(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pq_Ob_σ') df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'O_l', 'Opr') + titles("O'(O_b, O_l)") + limits((1, 10), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'Opr_Ob_Ol') df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'σ', 'Opr') + titles("O'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'Opr_Ob_σ') df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)}) .assign(Pf=lambda x: Opr_Pf(x.Opr))) save_both(my_plot(df, 'Opr', 'Pf') + titles("P_f(O')") + labs("O'", 'P_f') + limits((1, 20), (0, 1), xbreaks=np.linspace(2, 20, 10), ybreaks=np.linspace(0, 1, 11)) + gg.geom_line() + gg.geom_hline(yintercept=C, linetype='dashed', color='grey') , 'Pf_Opr') df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11)) save_both(my_plot(df, 'O_b', 'σ', 'σpr') + titles("σ'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'σpr_Ob_σ') df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)}) .assign(Pq=lambda x: σpr_Pq(x.σpr))) save_both(my_plot(df, 'σpr', 'Pq') + titles("P_q(σ')") + labs("σ'", 'P_q') + limits((0, 20), (-1, 0), xbreaks=np.linspace(0, 20, 11), ybreaks=np.linspace(-1, 0, 11)) + gg.geom_line() , 'Pq_σpr') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_free') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_free') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_qual') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_qual') df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f')) df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q')) df = pd.concat((df_Pf, df_Pq), ignore_index=True) df.drop_duplicates('O_b', inplace=True) Opr = df_Pf.query('σ==0').O_b[0] σpr = df_Pq.query('O_b==1').σ[0] labels = pd.DataFrame({ 'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3], 'label': ["$O'$", "$σ'$", mathrm('More profit')] }) lab_aes = gg.aes('x', 'y', label='label') save_both( gg.ggplot(df, gg.aes(x='O_b', y='σ')) + gg.geom_area(gg.aes(fill='profit'), alpha=0.3) + gg.geom_vline(xintercept=Opr, linetype='dashed') + gg.geom_hline(yintercept=σpr, linetype='dashed') # text alignment can't be specified in an aes + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top') + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom') + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom') + gg.scale_fill_discrete(name=mathrm('Bet type'), labels=[mathrm('Free'), mathrm('Qualifying')]) + limits((1, 10), (0, 5)) + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'), mathrm('more profitable'), mathrm('space'))) + labs('O_b', 'σ') , 'Px_shapes')
from __future__ import absolute_import, division, print_function import os import matplotlib.pyplot as plt import pytest import six from plotnine import ggplot, aes, geom_text, ggsave from plotnine.data import mtcars from plotnine.exceptions import PlotnineError p = (ggplot(aes(x='wt', y='mpg', label='name'), data=mtcars) + geom_text()) def sequential_filenames(): """ Generate filenames for the tests """ for i in range(100): yield 'filename-{}.png'.format(i) filename_gen = sequential_filenames() def assert_file_exist(filename, msg=None): if not msg: msg = "File {} does not exist".format(filename) assert os.path.exists(filename), msg
plt.ion() import RestrictedData xnorms = RestrictedData.xnorms annots = RestrictedData.annots tsne = TSNE(n_components=2, verbose=1, perplexity=10, method='barnes_hut', angle=0.5, init='pca', early_exaggeration=12, learning_rate=200, n_iter=1000, random_state=123) tsneResults = tsne.fit_transform(xnorms['shen'].values) ggd = pd.DataFrame({'sample' : xnorms['shen'].index, 'system' : annots['shen'].reindex(xnorms['shen'].index)['System'], 'coord1' : tsneResults[:, 0], 'coord2' : tsneResults[:, 1]}) plt.close() ggo = gg.ggplot(ggd, gg.aes(x='coord1', y='coord2', color='system', label='sample')) ggo += gg.geom_point() ggo += gg.geom_text(nudge_y=9, show_legend=False) ggo += gg.scale_color_manual(values=['firebrick', 'goldenrod', 'lightseagreen', 'darkorchid', 'darkslategray', 'dodgerblue']) ggo += gg.theme_bw() ggo += gg.xlab('tSNE coordinate 1') ggo += gg.ylab('tSNE coordinate 2') print(ggo)