def plot_replicate_density( df, batch, plate, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=300, height=1.5, width=2, ): density_gg = ( gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info")) + gg.geom_density(alpha=0.3) + gg.scale_fill_manual( name="Replicate", labels={ "True": "True", "False": "False" }, values=["#B99638", "#2DB898"], ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( title=gg.element_text(size=9), axis_text=gg.element_text(size=5), axis_title=gg.element_text(size=8), legend_text=gg.element_text(size=6), legend_title=gg.element_text(size=7), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), )) if output_file_base: save_figure(density_gg, output_file_base, output_file_extensions, dpi, height, width) return density_gg
def plot_zmw_stats(self, **kwargs): """Plot of ZMW stats for all runs. Note ---- Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`. Parameters ---------- ``**kwargs`` : dict Keyword arguments passed to :meth:`Summaries.zmw_stats`. Returns ------- plotnine.ggplot.ggplot Stacked bar graph of ZMW stats for each run. """ df = self.zmw_stats(**kwargs) p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) + p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) + p9.theme(axis_text_x=p9.element_text(angle=90, vjust=1, hjust=0.5), figure_size=(0.4 * len(df['name'].unique()), 2.5) ) + p9.ylab('number of ZMWs') + p9.xlab('') ) if len(df['status'].unique()) < len(CBPALETTE): p = p + p9.scale_fill_manual(CBPALETTE[1:]) return p
class THEME(): bgcolor = "#293241" LOADER_COLOR = "#2a9d8f" LOADER_TYPE = "dot" colors_light = [ "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f", "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e" ] colors_dark = [ "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a" ] # mt = theme(panel_background=element_rect(fill=bgcolor) # ,plot_background=element_rect(fill=bgcolor) # , axis_text_x = element_text(color="black") # , axis_text_y = element_text(color="black") # , strip_margin_y=0.05 # , strip_margin_x=0.5) mt = theme_bw() + theme(panel_border=element_blank()) cat_colors = scale_fill_manual(values=colors_light) cat_colors_lines = scale_color_manual(values=colors_light) gradient_colors = scale_fill_gradient("#ce4257", "#aad576") FILL = 1 COLOR = 2 LONG_FIGURE = (10, 20)
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text(aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False) + geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black') + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, )) return plt
def plot_downstream(clwe, table, output, ylim): df = pd.read_csv(data_file(table)) df = df[df.clwe == clwe] df = df.assign( refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']), language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG']) ) g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine')) g += p9.geom_bar(position='dodge', stat='identity', width=.8) g += p9.coord_cartesian(ylim=ylim) g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E']) g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial') g += p9.theme( plot_background=p9.element_rect(fill='white'), panel_grid_major_y=p9.element_line(), axis_text_x=p9.element_text(margin={'t': 10}), axis_text_y=p9.element_text(margin={'r': 8}), legend_position=(.7, .9), legend_direction='horizontal', legend_title=p9.element_blank(), legend_text=p9.element_text(size=FONT_SIZE), legend_box_margin=0, figure_size=(12, 3) ) g.save(filename=output_file(output))
def plot_range_comparison(self, xlabel: str = '', figsize: Tuple[int] = (7, 3), add_text_label: bool = True, **kwargs): df = self.get_ranges_df(**kwargs) fig = (p9.ggplot(df) + p9.aes('cat_value', 'counts', fill='direction') + p9.geom_col(alpha=.8) + p9.theme(figure_size=figsize, axis_text_x=p9.element_text(rotation=45)) + p9.scale_fill_manual(['#3f7f93', '#da3b46', '#4d4a4a']) + p9.labs(x=xlabel, y='Number of Comparisons', fill='R')) if add_text_label: if df.loc[df.direction == 'Positive'].loc[df.counts > 0].size > 0: fig += p9.geom_text( p9.aes(label='label', x='cat_value', y='n + max(n) * .15'), inherit_aes=False, size=9, data=df.loc[df.direction == 'Positive'].loc[df.counts > 0], color='#3f7f93') if df.loc[df.direction == 'Negative'].loc[df.counts > 0].size > 0: fig += p9.geom_text( p9.aes(label='label', x='cat_value', y='n + max(n) * .05'), inherit_aes=False, size=9, data=df.loc[df.direction == 'Negative'].loc[df.counts > 0], color='#da3b46') return fig
def plot_two_way_sdc(sdc_df: pd.DataFrame, alpha: float = .05, **kwargs): """ Plots the results of a SDC analysis for a fixed window size in a 2D figure. In a similar fashion to a recurrence plot, x and y axes represent the start index of the x and y sequences. Only results with a p_value < alpha are shown, while controlling the alpha as a function of the intensity of the score and the color as a function of the sign of the established relationship. Parameters ---------- sdc_df Data frame as outputted by `compute_sdc` which will be used to plot the results. alpha Significance threshold. Only values with a score < alpha will be plotted kwargs Keyword arguments to pass to `plotnine.theme` to customize the plot. Returns ------- p9.ggplot.ggplot Plot """ fragment_size = int(sdc_df.iloc[0]['stop_1'] - sdc_df.iloc[0]['start_1']) f = (sdc_df.loc[lambda dd: dd.p_value < alpha].assign(r_str=lambda dd: dd[ 'r'].apply(lambda x: '$r > 0$' if x > 0 else '$r < 0$')).pipe( lambda dd: p9.ggplot(dd) + p9.aes( 'start_1', 'start_2', fill='r_str', alpha='abs(r)' ) + p9.geom_tile() + p9.scale_fill_manual(['#da2421', 'black']) + p9.scale_y_reverse() + p9.theme(**kwargs) + p9.guides(alpha=False) + p9.labs(x='$X_i$', y='$Y_j$', fill='$r$', title=f'Two-Way SDC plot for $S = {fragment_size}$' + r' and $\alpha =$' + f'{alpha}'))) return f
def bsuite_bar_plot(df_in: pd.DataFrame, sweep_vars: Sequence[str] = None) -> gg.ggplot: """Output bar plot of bsuite data.""" df = _clean_bar_plot_data(df_in, sweep_vars) p = (gg.ggplot(df) + gg.aes(x='env', y='score', colour='type', fill='type') + gg.geom_bar(position='dodge', stat='identity') + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5) + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS) + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS) + gg.xlab('experiment') + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1)) ) if not all(df.finished): # add a layer of alpha for unfinished jobs p += gg.aes(alpha='finished') p += gg.scale_alpha_discrete(range=[0.3, 1.0]) # Compute the necessary size of the plot if sweep_vars: p += gg.facet_wrap(sweep_vars, labeller='label_both', ncol=1) n_hypers = df[sweep_vars].drop_duplicates().shape[0] else: n_hypers = 1 return p + gg.theme(figure_size=(14, 3 * n_hypers + 1))
def plot_hypothesis(hypothesis, file_name): bin_types = list(hypothesis) scores = list(hypothesis[bin_types[0]]) plots = [] for bin_type, score in product(bin_types, scores): mean_name = "Mean: " + score df = pd.DataFrame(columns=["Bin", "Dataset", mean_name]) df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value']) for bin_ in hypothesis[bin_type][score]: h = list(bin_.values())[0] bin_name = list(bin_)[0] parameter1 = h.p1 parameter2 = h.p2 mean1 = h.mean1 mean2 = h.mean2 row1 = { "Bin": bin_name, 'Dataset': parameter1, mean_name: str(round(float(mean1), 3)) } row2 = { "Bin": bin_name, 'Dataset': parameter2, mean_name: str(round(float(mean2), 3)) } df = df.append(row1, ignore_index=True) df = df.append(row2, ignore_index=True) t_statistic = h.t p_value = h.p row = { "Bin": bin_name, 't-statistic': str(round(t_statistic, 3)), 'p-value': str(p_value), '95% Confidence': "Significant" if p_value <= 0.05 else "Not Significant" } df2 = df2.append(row, ignore_index=True) plots.append( (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) + geom_col(stat='identity', position='dodge') + ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format( bin_type, score)))) plots.append( (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) + geom_col(stat='identity', width=0.2) + ggtitle( "{0} bin distribution| {1}\nBin's 95% Confidence Level Test". format(bin_type, score)) + scale_fill_manual(values={ 'Significant': "#214517", 'Not Significant': '#c62f2d' }))) save_as_pdf_pages(plots, file_name) return
def _plot_regret_group(df: pd.DataFrame, group_col: str) -> gg.ggplot: """Plots the average regret through time when grouped.""" group_name = group_col.replace('_', ' ') df[group_name] = df[group_col].astype('category') p = (gg.ggplot(df) + gg.aes(x='episode', y='average_regret', group=group_name, colour=group_name, fill=group_name) + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1) + gg.scale_colour_manual(values=FIVE_COLOURS) + gg.scale_fill_manual(values=FIVE_COLOURS)) return p
def plot_replicate_correlation( df, batch, plate, facet_string=None, split_samples=False, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=500, height=4, width=5, return_plot=False, ): correlation_gg = ( gg.ggplot( df, gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"), ) + gg.geom_boxplot( alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5 ) + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0) + gg.scale_fill_manual( name="Replicate", labels={"True": "True", "False": "False"}, values=["#B99638", "#2DB898"], ) + gg.xlab("Replicates") + gg.ylab("Pearson Correlation") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( subplots_adjust={"wspace": 0.2}, title=gg.element_text(size=5), axis_text=gg.element_text(size=4), axis_title=gg.element_text(size=5), legend_text=gg.element_text(size=4), legend_title=gg.element_text(size=5), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) if split_samples: assert facet_string, "To split samples, specify a facet_string" correlation_gg += gg.facet_wrap(facet_string) if output_file_base: save_figure( correlation_gg, output_file_base, output_file_extensions, dpi, height, width ) if return_plot: return correlation_gg
def create_confidence_plot(conf_df): plt = (ggplot(conf_df) + aes(x='x', color='Method', fill='Method') + geom_density(alpha=.45) + facet_wrap('Task', nrow=4) + xlab('Confidence') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( axis_text_y=element_blank(), axis_ticks_major_y=element_blank(), axis_title_y=element_blank(), legend_title=element_blank(), legend_position='top', legend_box='horizontal', )) return plt
def _bar_plot_compare(df: pd.DataFrame) -> gg.ggplot: """Bar plot of buite score data, comparing agents on each experiment.""" p = (gg.ggplot(df) + gg.aes(x='agent', y='score', colour='agent', fill='agent') + gg.geom_bar(position='dodge', stat='identity') + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5) + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1)) + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS) + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS)) if not all(df.finished): # add a layer of alpha for unfinished jobs p += gg.aes(alpha='finished') p += gg.scale_alpha_discrete(range=[0.3, 1.0]) return p
def plot_regret_average(df_in: pd.DataFrame, group_col: str, episode: int, sweep_vars: Optional[Sequence[str]] = None, regret_col: str = 'total_regret') -> gg.ggplot: """Bar plot the average regret at end of learning.""" df = _preprocess_ave_regret(df_in, group_col, episode, sweep_vars, regret_col) group_name = group_col.replace('_', ' ') p = (gg.ggplot(df) + gg.aes(x=group_name, y='average_regret', fill=group_name) + gg.geom_bar(stat='identity') + gg.scale_fill_manual(values=FIVE_COLOURS) + gg.ylab('average regret after {} episodes'.format(episode))) return facet_sweep_plot(p, sweep_vars)
def plot_replicate_density( df, batch, plate, cutoff, percent_strong, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=300, height=1.5, width=2, return_plot=False, ): density_gg = ( gg.ggplot(df, gg.aes(x="similarity_metric", fill="group_replicate")) + gg.geom_density(alpha=0.3) + gg.scale_fill_manual( name="Replicate", labels={"True": "True", "False": "False"}, values=["#B99638", "#2DB898"], ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") + gg.geom_vline(xintercept=cutoff, color="red", linetype="dashed") + gg.ggtitle( f"{batch}; Plate: {plate}\n\nPercent Replicating: {np.round(percent_strong * 100, 2)}%" ) + gg.theme_bw() + gg.theme( title=gg.element_text(size=3.5), axis_text=gg.element_text(size=4), axis_title=gg.element_text(size=4), legend_text=gg.element_text(size=4), legend_title=gg.element_text(size=4), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) if output_file_base: save_figure( density_gg, output_file_base, output_file_extensions, dpi, height, width ) if return_plot: return density_gg
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)): """ We create a function to plot the bar plot. """ return ( # Define the plot. p9.ggplot(dataframe, p9.aes(x='threshold', fill='value')) # Add the bars. + p9.geom_bar(position='dodge') + p9.geom_text(p9.aes(label='stat(count)'), stat='count', position=p9.position_dodge(0.9), size=7, va='bottom') # Rename the x axis. + p9.scale_x_discrete(name='Threshold') # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top). + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500)) # Replace the names in the legend and set the colors of the bars. + p9.scale_fill_manual(values={ 0: '#009e73', 1: '#d55e00' }, labels=lambda l: [{ 0: 'Stable', 1: 'Unstable' }[x] for x in l]) # Place the plots in a grid, renaming the labels. + p9.facet_grid('. ~ iterations', labeller=p9.labeller(cols=lambda x: f'iters = {x}')) # Define the theme for the plot. + p9.theme( # Remove the y axis name. axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def cell_division(adata): """ Plots total_counts as a function of the principal circle nodes to visualize the moment of cell division. Parameters ---------------- adata: AnnData The AnnData object being used for the analysis. Must be previously evaluated by `tl.celldiv_moment`. Returns ------------ A plotnine line-plot to help visualize the moment of cell division and direction of the cell cycle. If method = 'counts' when tl.celldiv_moment was run, cell division is defined by the largest drop in total_counts. The changes in counts are represented by the bars at the bottom, and the suggested moment of cell division is marked in red. The cell cycle should follow an incremental increase in total counts until around the moment of cell division. Alternatively, if method='g2m' in tl.celldiv_moment, the G2-M signature dynamics are used to define the moment of cell division. """ ref_var = adata.uns['scycle']['cell_div_moment']['ref_var'] edge_to_0 = adata.uns['scycle']['cell_div_moment']['cell_div_edge'][0] edges = adata.uns['princirc_gr']['edges'] edges['cell_div'] = edges['e1'] == edge_to_0 cell_div_count = edges[edges['e1'] == edge_to_0]['mean_var'] cell_div_plot = (ggplot(edges, aes('e1', 'mean_var')) + geom_point(aes(y = 'mean_var'), size = 2) + geom_path(aes(y = 'mean_var')) + geom_smooth(aes(y = 'mean_var'), method = 'lm', linetype = 'dashed') + annotate("point", x = edge_to_0, y = cell_div_count, color = 'red', size = 2) + labs(x = 'Edge position', y = ref_var) + geom_col(aes(y = 'diff_var', fill = 'cell_div')) + scale_fill_manual(values = ['darkgrey', 'red'], guide = False) + theme_std) return cell_div_plot
def create_confidence_plot(conf_df): plt = ( ggplot(conf_df) + aes(x='x', color='Method', fill='Method') + geom_density(alpha=.45) + facet_wrap('Task', nrow=4) + xlab('Confidence') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( axis_text_y=element_blank(), axis_ticks_major_y=element_blank(), axis_title_y=element_blank(), legend_title=element_blank(), legend_position='top', legend_box='horizontal', ) ) return plt
class THEME(): bgcolor = "#293241" LOADER_COLOR = "#2a9d8f" LOADER_TYPE = "dot" colors_light = [ "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#2a9d8f", "#797d62", "#3a6ea5" ] mt = theme(panel_background=element_rect(fill=bgcolor), plot_background=element_rect(fill=bgcolor), axis_text_x=element_text(color="black"), axis_text_y=element_text(color="black"), strip_margin_y=0.05, strip_margin_x=0.5) cat_colors = scale_fill_manual(values=colors_light) cat_colors_lines = scale_color_manual(values=colors_light) gradient_colors = scale_fill_gradient("#aad576", "#ce4257") FILL = 1 COLOR = 2 LONG_FIGURE = (10, 20)
def pictures(self, mode='bw', subset=None, n_random=10): """Returns a picture of the selected images. Creates either a colored or a black-white picture of the selected images. Args: mode: Should the picture be black-white ('bw') or in color ('color')? subset: Optional list of picture indices that should be included in the dataframe. If specified, n_random will be ignored. n_random: Optional number of randomly selected images. If neither subset nor n_random are specified, all images will be included. Returns: A plotnine object including all pictures with their label. Raises: NotImplementedError: mode must be either 'bw' or 'color'.""" dataframe = self.rgb_dataframe(subset=subset, n_random=n_random) if mode == 'bw': fill_key = 'rgb_bw' elif mode == 'color': fill_key = 'rgb' else: raise NotImplementedError("Pictures are either in black-white" "('bw') or in color ('color').") picture = ( gg.ggplot(dataframe, gg.aes(x='x', y='y', fill=fill_key)) + gg.geom_tile() + gg.theme_void() + gg.theme(legend_position='none') + gg.scale_fill_manual( values={key: key for key in dataframe[fill_key].unique()}) + gg.facet_wrap('image_id', labeller=self.labeller) + gg.scale_y_reverse() + gg.coord_fixed()) return picture
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = ( ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text( aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False ) + geom_segment( aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black' ) + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, ) ) return plt
def pattern_research_plot(data): from colour import Color def colors_gradient_generator(low_color, high_color, color_steps): low_color_obj = Color(low_color) high_color_obj = Color(high_color) return map(lambda x : x.hex_l, low_color_obj.range_to(high_color_obj,color_steps)) blue = list(colors_gradient_generator("#004996", "#018ace", 3))[::-1] data = data.melt(id_vars=['hour_category'], value_vars= ['D','W','MS'], var_name='series', value_name='count') time_unit_categories = pd.Categorical(data['series'], categories= ['D','W','MS']) data = data.assign(series = time_unit_categories) plot =(p9.ggplot(data=data, mapping=p9.aes(x='hour_category', y ='count', fill ='series')) + p9.geom_bar(stat='identity', position='dodge') + p9.scale_fill_manual(blue,labels = ['D','W','MS']) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=8), axis_title = p9.element_text(size = 8,face = 'bold')) + p9.coord_cartesian(ylim = (0,100)) + p9.scale_y_continuous(labels=lambda l: ["%d%%" % (v) for v in l]) + p9.labs(x='hour_category',y='Ratio of attacks')) return plot
# In[7]: g = (p9.ggplot(binned_df, p9.aes(x="precision", y="edges", color="in_hetionet")) + p9.geom_point() + p9.geom_line() + p9.scale_color_manual(values={ "Existing": color_map["Existing"], "Novel": color_map["Novel"] }) + p9.facet_wrap("relation") + p9.scale_y_log10() + p9.theme_bw()) print(g) # In[8]: g = (p9.ggplot(binned_df, p9.aes(x="precision", y="edges", fill="in_hetionet")) + p9.geom_bar(stat='identity', position='dodge') + p9.scale_fill_manual(values={ "Existing": color_map["Existing"], "Novel": color_map["Novel"] }) + p9.coord_flip() + p9.facet_wrap("relation") + p9.scale_y_log10() + p9.theme(figure_size=(12, 8), aspect_ratio=9) + p9.theme_bw()) print(g) # In[9]: combined_sen_tree = { "DaG": { "file": "../../../disease_gene/disease_associates_gene/edge_prediction_experiment/output/combined_predicted_dag_sentences.tsv.xz", "group": ["doid_id", "entrez_gene_id"] }, "CtD": { "file": "../../../compound_disease/compound_treats_disease/edge_prediction_experiment/output/combined_predicted_ctd_sentences.tsv.xz",
# Reorder plotting variables filter_list = [ 'all_variant_count', 'filter_common_var_count', 'filter_min_depth_count', 'filter_max_depth_count' ] filter_list_cat = CategoricalDtype(categories=filter_list, ordered=True) filter_melt_df['num_variants_cat'] = ( filter_melt_df['num_variants'].astype(str).astype(filter_list_cat)) p = (gg.ggplot(filter_melt_df, gg.aes(x='lane', y='filtration', fill='num_variants_cat')) + gg.geom_bar(stat='identity', position='dodge') + gg.facet_wrap('~ final_id') + gg.scale_fill_manual( name='Filtration Step', values=['#1b9e77', '#d95f02', '#7570b3', '#e7298a'], labels=[ 'All Variants', 'Common Variants', 'Depth (< {} reads)'.format(replicate_filter_min_depth_count), 'Depth (> {} reads)'.format(replicate_filter_max_depth_count) ]) + gg.xlab('Sample') + gg.ylab('Final Number of Variants') + gg.theme_bw() + gg.theme(axis_text_x=gg.element_text(angle='90'), axis_text=gg.element_text(size=8), axis_title=gg.element_text(size=14))) p # In[13]: figure_file = os.path.join('figures', 'replicates_filtration_results.pdf') gg.ggsave(p, figure_file, height=5.5, width=6.5, dpi=500) # In[14]:
def density_plot(df, x, group=None, facet_x=None, facet_y=None, position='overlay', sort_groups=True, base_size=10, figure_size=(6, 3), **stat_kwargs): ''' Plot a 1-d density plot Parameters ---------- df : pd.DataFrame input dataframe x : str quoted expression to be plotted on the x axis group : str quoted expression to be used as group (ie color) facet_x : str quoted expression to be used as facet facet_y : str quoted expression to be used as facet position : str if groups are present, choose between `stack` or `overlay` base_size : int base size for theme_ez figure_size :tuple of int figure size stat_kwargs : kwargs kwargs for the density stat Returns ------- g : EZPlot EZplot object ''' if position not in ['overlay', 'stack']: log.error("position not recognized") raise NotImplementedError("position not recognized") # create a copy of the data dataframe = df.copy() # define groups and variables; remove and store (eventual) names names = {} groups = {} variables = {} for label, var in zip(['x', 'group', 'facet_x', 'facet_y'], [x, group, facet_x, facet_y]): names[label], groups[label] = unname(var) # fix special cases if x == '.index': groups['x'] = '.index' names[ 'x'] = dataframe.index.name if dataframe.index.name is not None else '' # aggregate data and reorder columns gdata = agg_data(dataframe, variables, groups, None, fill_groups=False) gdata = gdata[[ c for c in ['x', 'group', 'facet_x', 'facet_y'] if c in gdata.columns ]] # start plotting g = EZPlot(gdata) # determine order and create a categorical type colors = ez_colors(g.n_groups('group')) # set groups if group is None: g += p9.geom_density(p9.aes(x="x"), stat=p9.stats.stat_density(**stat_kwargs), colour=ez_colors(1)[0], fill=ez_colors(1)[0], **POSITION_KWARGS[position]) else: g += p9.geom_density(p9.aes(x="x", group="factor(group)", colour="factor(group)", fill="factor(group)"), stat=p9.stats.stat_density(**stat_kwargs), **POSITION_KWARGS[position]) g += p9.scale_fill_manual(values=colors, reverse=False) g += p9.scale_color_manual(values=colors, reverse=False) # set facets if facet_x is not None and facet_y is None: g += p9.facet_wrap('~facet_x') if facet_x is not None and facet_y is not None: g += p9.facet_grid('facet_y~facet_x') # set x scale if g.column_is_categorical('x'): g += p9.scale_x_discrete() else: g += p9.scale_x_continuous(labels=ez_labels) # set y scale g += p9.scale_y_continuous(labels=ez_labels) # set axis labels g += \ p9.xlab(names['x']) + \ p9.ylab('Density') # set theme g += theme_ez(figure_size=figure_size, base_size=base_size, legend_title=p9.element_text(text=names['group'], size=base_size)) if sort_groups: g += p9.guides(fill=p9.guide_legend(reverse=True)) return g
def barchart_make(roi, df, list_rois, config, ylimit, save_function, find_ylim_function): thisroi = list_rois[roi] current_df = df.loc[df['index'] == thisroi] current_df = current_df.sort_values([config.single_roi_fig_x_axis]) current_df = current_df.reset_index( drop=True) # Reset index to remove grouping current_df[config.single_roi_fig_x_axis] = pd.Categorical( current_df[config.single_roi_fig_x_axis], categories=current_df[config.single_roi_fig_x_axis].unique()) figure = ( pltn.ggplot( current_df, pltn.aes(x=config.single_roi_fig_x_axis, y='Mean', ymin="Mean-Conf_Int_95", ymax="Mean+Conf_Int_95", fill='factor({colour})'.format( colour=config.single_roi_fig_colour))) + pltn.theme_538() + pltn.geom_col(position=pltn.position_dodge( preserve='single', width=0.8), width=0.8, na_rm=True) + pltn.geom_errorbar(size=1, position=pltn.position_dodge( preserve='single', width=0.8)) + pltn.labs(x=config.single_roi_fig_label_x, y=config.single_roi_fig_label_y, fill=config.single_roi_fig_label_fill) + pltn.scale_x_discrete(labels=[]) + pltn.theme(panel_grid_major_x=pltn.element_line(alpha=0), axis_title_x=pltn.element_text( weight='bold', color='black', size=20), axis_title_y=pltn.element_text( weight='bold', color='black', size=20), axis_text_y=pltn.element_text(size=20, color='black'), legend_title=pltn.element_text(size=20, color='black'), legend_text=pltn.element_text(size=18, color='black'), subplots_adjust={'right': 0.85}, legend_position=(0.9, 0.8), dpi=config.plot_dpi) + pltn.geom_text(pltn.aes(y=-.7, label=config.single_roi_fig_x_axis), color='black', size=20, va='top') + pltn.scale_fill_manual( values=config.colorblind_friendly_plot_colours)) if ylimit: # Set y limit of figure (used to make it the same for every barchart) figure += pltn.ylim(None, ylimit) thisroi += '_same_ylim' returned_ylim = 0 if config.use_same_axis_limits in ('Same limits', 'Create both') and ylimit == 0: returned_ylim = find_ylim_function(thisroi, figure, 'yaxis') if config.use_same_axis_limits == 'Same limits' and ylimit == 0: return returned_ylim elif ylimit != 0: folder = 'Same_yaxis' else: folder = 'Different_yaxis' save_function(figure, thisroi, config, folder, 'barchart') return returned_ylim
colour=ez_colors(1)[0], na_rm=False) else: g += p9.geom_crossbar(p9.aes(x="x", y='center', ymin='low', ymax='high', group="factor(group_x)", colour="factor(group)", fill="factor(group)"), position=p9.position_dodge( 0.7, preserve='single'), na_rm=True, alpha=0.2) g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group'))) g += p9.scale_colour_manual(values=ez_colors(g.n_groups('group'))) elif geom == 'ribbon': g = EZPlot(gdata.dropna()) # set groups if group is None: g += p9.geom_ribbon(p9.aes(x="x", y='center', ymin='low', ymax='high'), fill=ez_colors(1)[0], alpha=0.2, na_rm=False)
# In[9]: dataset = "dmso_treated" umap_resistant_type_gg = ( gg.ggplot(embedding_df, gg.aes(x="x", y="y")) + gg.geom_point( gg.aes(fill="Metadata_clone_type", shape="Metadata_batch", size="Metadata_cell_count"), color='black', alpha=0.6) + gg.theme_bw() + gg.xlab("UMAP (X)") + gg.ylab("UMAP (Y)") + gg.ggtitle("DMSO treated samples") + gg.scale_shape_manual(name="Batch", values=[".", "+", "x"]) + gg.scale_fill_manual(name="Clone type", values=["#1F8AA5", "#E98831"]) + gg.scale_size_continuous(name="Cell count") + gg.theme( strip_text=gg.element_text(size=6, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) file = os.path.join("figures", "umap", f"{dataset}_umap_resistant_type") for extension in save_file_extensions: umap_resistant_type_gg.save(filename='{}{}'.format(file, extension), height=3, width=3.5, dpi=400) umap_resistant_type_gg
), ) if check_if_write(cell_count_output_file, force, throw_warning=True): cell_count_df.to_csv(cell_count_output_file, sep="\t", index=False) # Graph: Cell count with all wells in same graph cell_count_gg = ( gg.ggplot(cell_count_df, gg.aes(x="site", y="cell_count")) + gg.geom_bar(gg.aes(fill="Cell_Quality"), stat="identity") + gg.theme_bw() + gg.theme(axis_text_x=gg.element_text(rotation=90, size=5)) + gg.xlab("Sites") + gg.ylab("Cell Count") + gg.scale_fill_manual( name="Cell Quality", labels=cell_category_list, values=cell_category_colors ) ) os.makedirs(output_figuresdir, exist_ok=True) output_file = pathlib.Path( output_figuresdir, "all_cellpainting_cellquality_across_sites.png" ) if check_if_write(output_file, force, throw_warning=True): cell_count_gg.save(output_file, dpi=300, width=10, height=7, verbose=False) # Same graph as above, separated by well. cell_count_gg_parsed = ( gg.ggplot(cell_count_df, gg.aes(x="site", y="cell_count")) + gg.geom_bar(gg.aes(fill="Cell_Quality"), stat="identity") + gg.theme_bw()
def line_plot(df, x, y, group=None, facet_x=None, facet_y=None, aggfun='sum', err=None, show_points=False, base_size=10, figure_size=(6, 3)): ''' Aggregates data in df and plots multiple columns as a line chart. Parameters ---------- df : pd.DataFrame input dataframe x : str quoted expression to be plotted on the x axis y : str or list of str quoted expression(s) to be plotted on the y axis group : str quoted expression to be used as group (ie color) facet_x : str quoted expression to be used as facet facet_y : str quoted expression to be used as facet aggfun : str or fun function to be used for aggregating (eg sum, mean, median ...) err : str quoted expression to be used as error shaded area show_points : bool show/hide markers base_size : int base size for theme_ez figure_size :tuple of int figure size Returns ------- g : EZPlot EZplot object ''' if group is not None and isinstance(y, list) and len(y) > 1: log.error( "groups can be specified only when a single y column is present") raise ValueError( "groups can be specified only when a single y column is present") if err is not None and isinstance(y, list) and len(y) > 1: log.error( "err can be specified only when a single y column is present") raise ValueError( "err can be specified only when a single y column is present") if isinstance(y, list) and len(y) == 1: y = y[0] # create a copy of the data dataframe = df.copy() # define groups and variables; remove and store (eventual) names names = {} groups = {} variables = {} for label, var in zip(['x', 'group', 'facet_x', 'facet_y'], [x, group, facet_x, facet_y]): names[label], groups[label] = unname(var) # fix special cases if x == '.index': groups['x'] = '.index' names[ 'x'] = dataframe.index.name if dataframe.index.name is not None else '' if isinstance(y, list): ys = [] for i, var in enumerate(y): ys.append('y_{}'.format(i)) names['y_{}'.format(i)], variables['y_{}'.format(i)] = unname(var) # aggregate data tmp_gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True) groups_present = [ c for c in ['x', 'facet_x', 'facet_y'] if c in tmp_gdata.columns ] gdata = pd.melt(tmp_gdata, groups_present, var_name='group', value_name='y') gdata['group'] = gdata['group'].replace( {var: names[var] for var in ys}) # update values for plotting names['y'] = 'Value' names['group'] = 'Variable' group = 'Variable' else: names['y'], variables['y'] = unname(y) if err is not None: names['err'], variables['err'] = unname(err) # aggregate data gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True) # reorder columns gdata = gdata[[ c for c in ['x', 'y', 'err', 'group', 'facet_x', 'facet_y'] if c in gdata.columns ]] if err is not None: gdata['ymax'] = gdata['y'] + gdata['err'] gdata['ymin'] = gdata['y'] - gdata['err'] # init plot obj g = EZPlot(gdata) # set groups if group is None: g += p9.geom_line(p9.aes(x="x", y="y"), group=1, colour=ez_colors(1)[0]) if show_points: g += p9.geom_point(p9.aes(x="x", y="y"), group=1, colour=ez_colors(1)[0]) if err is not None: g += p9.geom_ribbon(p9.aes(x="x", ymax="ymax", ymin="ymin"), group=1, fill=ez_colors(1)[0], alpha=0.2) else: g += p9.geom_line( p9.aes(x="x", y="y", group="factor(group)", colour="factor(group)")) if show_points: g += p9.geom_point(p9.aes(x="x", y="y", colour="factor(group)")) if err is not None: g += p9.geom_ribbon(p9.aes(x="x", ymax="ymax", ymin="ymin", fill="factor(group)"), alpha=0.2) g += p9.scale_color_manual(values=ez_colors(g.n_groups('group'))) g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group'))) # set facets if facet_x is not None and facet_y is None: g += p9.facet_wrap('~facet_x') if facet_x is not None and facet_y is not None: g += p9.facet_grid('facet_y~facet_x') # set x scale if g.column_is_timestamp('x'): g += p9.scale_x_datetime() elif g.column_is_categorical('x'): g += p9.scale_x_discrete() else: g += p9.scale_x_continuous(labels=ez_labels) # set y scale g += p9.scale_y_continuous(labels=ez_labels) # set axis labels g += \ p9.xlab(names['x']) + \ p9.ylab(names['y']) # set theme g += theme_ez(figure_size=figure_size, base_size=base_size, legend_title=p9.element_text(text=names['group'], size=base_size)) return g
def area_plot(df, x, y, group=None, facet_x=None, facet_y=None, aggfun='sum', fill=False, sort_groups=True, base_size=10, figure_size=(6, 3)): ''' Aggregates data in df and plots as a stacked area chart. Parameters ---------- df : pd.DataFrame input dataframe x : str quoted expression to be plotted on the x axis y : str quoted expression to be plotted on the y axis group : str quoted expression to be used as group (ie color) facet_x : str quoted expression to be used as facet facet_y : str quoted expression to be used as facet aggfun : str or fun function to be used for aggregating (eg sum, mean, median ...) fill : bool plot shares for each group instead of absolute values sort_groups : bool sort groups by the sum of their value (otherwise alphabetical order is used) base_size : int base size for theme_ez figure_size :tuple of int figure size Returns ------- g : EZPlot EZplot object ''' # create a copy of the data dataframe = df.copy() # define groups and variables; remove and store (eventual) names names = {} groups = {} variables = {} for label, var in zip(['x', 'group', 'facet_x', 'facet_y'], [x, group, facet_x, facet_y]): names[label], groups[label] = unname(var) names['y'], variables['y'] = unname(y) # fix special cases if x == '.index': groups['x'] = '.index' names[ 'x'] = dataframe.index.name if dataframe.index.name is not None else '' # aggregate data and reorder columns gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True) gdata['y'].fillna(0, inplace=True) gdata = gdata[[ c for c in ['x', 'y', 'group', 'facet_x', 'facet_y'] if c in gdata.columns ]] if fill: groups_to_normalize = [ c for c in ['x', 'facet_x', 'facet_y'] if c in gdata.columns ] total_values = gdata \ .groupby(groups_to_normalize)['y'] \ .sum() \ .reset_index() \ .rename(columns = {'y':'tot_y'}) gdata = pd.merge(gdata, total_values, on=groups_to_normalize) gdata['y'] = gdata['y'] / (gdata['tot_y'] + EPSILON) gdata.drop('tot_y', axis=1, inplace=True) ylabeller = percent_labels else: ylabeller = ez_labels # get plot object g = EZPlot(gdata) # determine order and create a categorical type if sort_groups: sort_data_groups(g) # get colors colors = np.flip(ez_colors(g.n_groups('group'))) # set groups if group is None: g += p9.geom_area(p9.aes(x="x", y="y"), colour=None, fill=ez_colors(1)[0], na_rm=True) else: g += p9.geom_area(p9.aes(x="x", y="y", group="factor(group)", fill="factor(group)"), colour=None, na_rm=True) g += p9.scale_fill_manual(values=colors) # set facets if facet_x is not None and facet_y is None: g += p9.facet_wrap('~facet_x') if facet_x is not None and facet_y is not None: g += p9.facet_grid('facet_y~facet_x') # set x scale if g.column_is_timestamp('x'): g += p9.scale_x_datetime() elif g.column_is_categorical('x'): g += p9.scale_x_discrete() else: g += p9.scale_x_continuous(labels=ez_labels) # set y scale g += p9.scale_y_continuous(labels=ylabeller, expand=[0, 0, 0.1 * (not fill) + 0.03, 0]) # set axis labels g += \ p9.xlab(names['x']) + \ p9.ylab(names['y']) # set theme g += theme_ez(figure_size=figure_size, base_size=base_size, legend_title=p9.element_text(text=names['group'], size=base_size)) if sort_groups: g += p9.guides(fill=p9.guide_legend(reverse=True), color=p9.guide_legend(reverse=True)) return g
g = ( p9.ggplot(publish_rate_df.rename(index=str, columns={"label": "Label"})) + p9.aes( x="pub_month", y="rate", fill="Label", group="Label", color="Label", linetype="Label", shape="Label", ) + p9.geom_point(size=2) + p9.geom_line() + p9.scale_linetype_manual(["solid", "solid", "solid"]) + p9.scale_color_manual( [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) + p9.scale_fill_manual( [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) + p9.scale_shape_manual(["o", "o", "o"]) # plot the x axis titles + p9.geom_vline(xintercept=[2.5, 14.5, 26.5, 38.5, 50.5, 62.5, 74.5]) + p9.geom_text(label="2014", x=8.5, y=0, color="black", size=13) + p9.geom_text(label="2015", x=20.5, y=0, color="black", size=13) + p9.geom_text(label="2016", x=32.5, y=0, color="black", size=13) + p9.geom_text(label="2017", x=44.5, y=0, color="black", size=13) + p9.geom_text(label="2018", x=56.5, y=0, color="black", size=13) + p9.geom_text(label="2019", x=68.5, y=0, color="black", size=13) # Plot the overall proportion published + p9.geom_hline( yintercept=0.4196, linetype="solid", color=color_mapper["2018"]) + p9.geom_hline(yintercept=published / posted, linetype="solid", color=color_mapper["2020ML"]) +