def cell_cycle_phase_barplot(adata, palette='Set2'): """Plots the proportion of cells in each phase of the cell cycle See also: cell_cycle_phase_pieplot for the matplotlib pie chart Parameters ----------- adata: AnnData The AnnData object being used for the analysis. Must be previously evaluated by `tl.annotate_cell_cycle`. Returns ----------- A plotnine barplot with the total counts of cell in each phase of the cell cycle. """ plt_data = adata.obs.copy() plt_data['cell_cycle_phase'] = pd.Categorical( plt_data['cell_cycle_phase'], categories=['G1 post-mitotic', 'G1 pre-replication', 'S/G2/M']) cycle_plot = ( ggplot(plt_data, aes('cell_cycle_phase', fill='cell_cycle_phase')) + geom_bar() + coord_flip() + guides(fill=False) + labs(y='', x='Cell cycle phase') + theme_light() + theme(panel_grid_major_y=element_blank(), panel_grid_minor_y=element_blank(), panel_grid_major_x=element_line(size=1.5), panel_grid_minor_x=element_line(size=1.5)) + scale_fill_brewer(type='qual', palette=palette)) return cycle_plot
def plot_revigo( rev, outline=2, expand_points=(1.05, 1.2), figure_size=(8, 8), font_size=8, point_size=3, point_alpha=0.7, palette='RdPu', dispensability_cutoff=1., show_all_labels=False, text_column='name', term_size_limit=None, ): import plotnine as p9 import matplotlib.patheffects as path_effects pe = [ path_effects.Stroke(linewidth=2, foreground='white'), path_effects.Normal() ] if not show_all_labels: lbl_df = rev[(rev.eliminated == 0) & (rev.dispensability < dispensability_cutoff)] if term_size_limit is not None: lbl_df = lbl_df[lbl_df.term_size < term_size_limit] else: lbl_df = rev g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) + p9.geom_point(p9.aes(fill='neglog10', size='frequency'), color='black', alpha=point_alpha) + p9.geom_text(p9.aes(label=text_column), data=lbl_df, size=font_size, adjust_text={ 'expand_points': expand_points, 'arrowprops': { 'arrowstyle': '-' }, 'x': rev.plot_X.values, 'y': rev.plot_Y.values }, path_effects=pe) + p9.theme_bw() + p9.scale_fill_distiller(type='seq', palette=palette, direction=1) + p9.labs(x='Semantic similarity space', y='', fill='-log10(adj. p-value)', size='Term frequency') + p9.scale_size_continuous(range=(2, 7), trans='log10') + p9.theme(figure_size=figure_size, axis_text_x=p9.element_blank(), axis_text_y=p9.element_blank(), axis_ticks=p9.element_blank())) return g
def plot_violin_plots( par_id: str, dims: List[str], draws: Dict, log_scale_variables: List[str], units: Dict[str, str], confidence_intervals, measurements, ): """Plot and save violin plots of parsed distributions. :param par_id: Name of the parameter plotted :param dims: Dimensions of the parameter :param draws: pd.Dataframe of parameter distribution indexed by dimensions and contains the population samples :param log_scale_variables: Parameters that are log-distributed :param units: Dictionary of units for each parameter """ par_units = units[par_id] x = fill = dims[0] if len(dims) <= 1 else "experiments" plot = (p9.ggplot(data=draws) + p9.geom_violin( p9.aes(y=f"{par_id}", x=x, fill=fill), position="identity", color="None", size=0.5, alpha=0.7, weight=0.7, linetype="None", ) + p9.labels.ylab(f"{par_id} {par_units}")) if par_id in confidence_intervals.keys(): plot += p9.geoms.geom_errorbar( p9.aes(x=x, ymin="lower_ci", ymax="upper_ci"), data=confidence_intervals[par_id], width=0.1, ) if par_id in measurements.keys(): if len(measurements[par_id]) > 0: plot += p9.geoms.geom_point( p9.aes(y="measurement", x=x), data=measurements[par_id], ) if len(dims) == 1: plot += p9.themes.theme(axis_text_x=p9.element_text(angle=70), ) if len(dims) > 1: plot += p9.facet_wrap(f"~{dims[1]}") + p9.themes.theme( panel_spacing_y=0.05, panel_spacing_x=0.35, axis_title=p9.element_text(size=10), axis_text=p9.element_text(size=11), axis_text_y=p9.element_text(size=8, angle=45), axis_title_x=p9.element_blank(), axis_text_x=p9.element_blank(), ) if par_id in log_scale_variables: plot += p9.scale_y_log10() return plot
def plot_metrics_comparison_lineplot_grid(dataframe, models_labels, metrics_labels, figure_size=(14, 4)): """ We define a function to plot the grid. """ return ( # Define the plot. p9.ggplot( dataframe, p9.aes(x='threshold', y='value', group='variable', color='variable', shape='variable')) # Add the points and lines. + p9.geom_point() + p9.geom_line() # Rename the x axis and give some space to left and right. + p9.scale_x_discrete(name='Threshold', expand=(0, 0.2)) # Rename the y axis, give some space on top and bottom, and print the tick labels with 2 decimal digits. + p9.scale_y_continuous(name='Value', expand=(0, 0.05), labels=lambda l: ['{:.2f}'.format(x) for x in l]) # Replace the names in the legend. + p9.scale_shape_discrete( name='Metric', labels=lambda l: [metrics_labels[x] for x in l]) # Define the colors for the metrics for color-blind people. + p9.scale_color_brewer(name='Metric', labels=lambda l: [metrics_labels[x] for x in l], type='qual', palette='Set2') # Place the plots in a grid, renaming the labels for rows and columns. + p9.facet_grid('iterations ~ model', labeller=p9.labeller( rows=lambda x: f'iters = {x}', cols=lambda x: f'{models_labels[x]}')) # Define the theme for the plot. + p9.theme( # Remove the y axis name. axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def theme_energinet() -> p9.themes.theme: """Create a simple Energinet theme.""" return p9.theme( text=p9.element_text(family=endktheme.style.font_family()), axis_line=p9.element_line(color="black"), plot_background=p9.element_blank(), panel_background=p9.element_rect(fill="white"), legend_background=p9.element_rect(fill="white"), legend_key=p9.element_blank(), panel_grid=p9.element_blank(), axis_ticks=p9.element_blank(), )
def _plot_theme(grid_axis='both', grid_lines='both', theme='bw'): """Internal function provides consistent theme across plots. Currently a slightly modified version of theme_bw() with configurable grid lines. Args: grid_axis: controls the axis on which to draw grid lines - Accepts: None, 'x', 'y', 'both' - Default: 'both' grid_lines: controls whether major or minor grid lines are drawn - Accepts: None, 'major', 'minor', 'both' - Default: 'both' theme: - Accepts: 'bw', 'classic', 'gray', 'grey', 'seaborn', '538', 'dark', 'matplotlib', 'minimal', 'xkcd', 'light' - Default: 'bw' Returns: A theme object to be added to a plotnine.ggplot() object. """ import plotnine as gg assert (grid_axis in [None, 'x', 'y', 'both']) assert (grid_lines in [None, 'major', 'minor', 'both']) assert (theme in [ 'bw', 'classic', 'gray', 'grey', 'seaborn', '538', 'dark', 'matplotlib', 'minimal', 'xkcd', 'light' ]) drop_grid = set() if grid_axis is None or grid_lines is None: drop_grid.update(['panel_grid_major', 'panel_grid_minor']) elif grid_axis == 'x': drop_grid.update(['panel_grid_major_y', 'panel_grid_minor_y']) if grid_lines == 'major': drop_grid.add('panel_grid_minor_y') elif grid_lines == 'minor': drop_grid.add('panel_grid_major_y') elif grid_axis == 'y': drop_grid.update(['panel_grid_major_x', 'panel_grid_minor_x']) if grid_lines == 'major': drop_grid.add('panel_grid_minor_x') elif grid_lines == 'minor': drop_grid.add('panel_grid_major_x') grid_opt = dict() for x in drop_grid: grid_opt[x] = gg.element_blank() return getattr(gg, 'theme_'+theme)() + \ gg.theme(panel_border = gg.element_blank(), axis_line = gg.element_line(color = "black"), **grid_opt)
def create_confidence_plot(conf_df): plt = (ggplot(conf_df) + aes(x='x', color='Method', fill='Method') + geom_density(alpha=.45) + facet_wrap('Task', nrow=4) + xlab('Confidence') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( axis_text_y=element_blank(), axis_ticks_major_y=element_blank(), axis_title_y=element_blank(), legend_title=element_blank(), legend_position='top', legend_box='horizontal', )) return plt
def setup_heatmap0(df: pd.DataFrame, format_string, axis_text): # https://stackoverflow.com/a/62161556/819272 # Plotnine does not support changing the position of any axis. return (p9.ggplot(df, p9.aes(y='row', x='col')) + p9.coord_equal() + p9.geom_tile(p9.aes(fill='scale')) + p9.geom_text( p9.aes(label='value'), format_string=format_string, size=7) + p9.scale_y_discrete(drop=False) + p9.scale_x_discrete(drop=False) + p9.scale_fill_gradientn(colors=['#63BE7B', '#FFEB84', '#F8696B'], na_value='#CCCCCC', guide=False) + p9.theme(axis_text=p9.element_blank() if not axis_text else p9.element_text(face='bold'), axis_ticks=p9.element_blank(), axis_title=p9.element_blank(), panel_grid=p9.element_blank()))
def plot_preprocessing_boxplot_bymodel(dataframe, models_labels, metrics_labels, groups_labels, figure_size=(14, 4)): """ We define a function to plot the grid. """ return ( # Define the plot. p9.ggplot(dataframe, p9.aes(x='variable', y='value', fill='group')) # Add the boxplots. + p9.geom_boxplot(position='dodge') # Rename the x axis. + p9.scale_x_discrete(name='Metric', labels=lambda l: [metrics_labels[x] for x in l]) # Rename the y axis. + p9.scale_y_continuous( name='Value', expand=(0, 0.05), # breaks=[-0.25, 0, 0.25, 0.5, 0.75, 1], limits=[-0.25, 1], labels=lambda l: ['{:.2f}'.format(x) for x in l]) # Define the colors for the metrics for color-blind people. + p9.scale_fill_brewer(name='Group', labels=lambda l: [groups_labels[x] for x in l], type='qual', palette='Set2') # Place the plots in a grid, renaming the labels. + p9.facet_grid( 'model ~ .', scales='free_y', labeller=p9.labeller(rows=lambda x: f'{models_labels[x]}')) # Define the theme for the plot. + p9.theme( # Remove the x and y axis names. axis_title_x=p9.element_blank(), axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def getErrorPlot(self, msg="Error Occured"): df = DataFrame({"x": [10], "y": [2], "label": [msg]}) p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="white") \ + THEME.cat_colors_lines \ + THEME.mt \ + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank()) return p
def plot_downstream(clwe, table, output, ylim): df = pd.read_csv(data_file(table)) df = df[df.clwe == clwe] df = df.assign( refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']), language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG']) ) g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine')) g += p9.geom_bar(position='dodge', stat='identity', width=.8) g += p9.coord_cartesian(ylim=ylim) g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E']) g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial') g += p9.theme( plot_background=p9.element_rect(fill='white'), panel_grid_major_y=p9.element_line(), axis_text_x=p9.element_text(margin={'t': 10}), axis_text_y=p9.element_text(margin={'r': 8}), legend_position=(.7, .9), legend_direction='horizontal', legend_title=p9.element_blank(), legend_text=p9.element_text(size=FONT_SIZE), legend_box_margin=0, figure_size=(12, 3) ) g.save(filename=output_file(output))
def plot_pointplot(plot_df, y_axis_label="", use_log10=False, limits=[0, 3.2]): """ Plots the pointplot Arguments: plot_df - the dataframe that contains the odds ratio and lemmas y_axis_label - the label for the y axis use_log10 - use log10 for the y axis? """ graph = ( p9.ggplot(plot_df, p9.aes(x="lemma", y="odds_ratio")) + p9.geom_pointrange(p9.aes(ymin="lower_odds", ymax="upper_odds"), position=p9.position_dodge(width=1), size=0.3, color="#253494") + p9.scale_x_discrete(limits=(plot_df.sort_values( "odds_ratio", ascending=True).lemma.tolist())) + (p9.scale_y_log10() if use_log10 else p9.scale_y_continuous( limits=limits)) + p9.geom_hline(p9.aes(yintercept=1), linetype='--', color='grey') + p9.coord_flip() + p9.theme_seaborn( context='paper', style="ticks", font_scale=1, font='Arial') + p9.theme( # 640 x 480 figure_size=(6.66, 5), panel_grid_minor=p9.element_blank(), axis_title=p9.element_text(size=12), axis_text_x=p9.element_text(size=10)) + p9.labs(x=None, y=y_axis_label)) return graph
def theme_cognoma(fontsize_mult=1): return (gg.theme_bw(base_size=14 * fontsize_mult) + gg.theme( line=gg.element_line(color="#4d4d4d"), rect=gg.element_rect(fill="white", color=None), text=gg.element_text(color="black"), axis_ticks=gg.element_line(color="#4d4d4d"), legend_key=gg.element_rect(color=None), panel_border=gg.element_rect(color="#4d4d4d"), panel_grid=gg.element_line(color="#b3b3b3"), panel_grid_major_x=gg.element_blank(), panel_grid_minor=gg.element_blank(), strip_background=gg.element_rect(fill="#FEF2E2", color="#4d4d4d"), axis_text=gg.element_text(size=12 * fontsize_mult, color="#4d4d4d"), axis_title_x=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d"), axis_title_y=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d")))
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text(aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False) + geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black') + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, )) return plt
def plot_breakdown(cip_df: pd.DataFrame): """Stacked bar plot of increasing and decreasing stocks per sector in the specified df""" cols_to_drop = [colname for colname in cip_df.columns if colname.startswith('bin_')] df = cip_df.drop(columns=cols_to_drop) df = pd.DataFrame(df.sum(axis='columns'), columns=['sum']) df = df.merge(stocks_by_sector(), left_index=True, right_on='asx_code') if len(df) == 0: # no stock in cip_df have a sector? ie. ETF? return None assert set(df.columns) == set(['sum', 'asx_code', 'sector_name']) df['increasing'] = df.apply(lambda row: 'up' if row['sum'] >= 0.0 else 'down', axis=1) sector_names = df['sector_name'].value_counts().index.tolist() # sort bars by value count (ascending) sector_names_cat = pd.Categorical(df['sector_name'], categories=sector_names) df = df.assign(sector_name_cat=sector_names_cat) #print(df) plot = ( p9.ggplot(df, p9.aes(x='factor(sector_name_cat)', fill='factor(increasing)')) + p9.geom_bar() + p9.labs(x="Sector", y="Number of stocks") + p9.theme(axis_text_y=p9.element_text(size=7), subplots_adjust={"left": 0.2, 'right': 0.85}, legend_title=p9.element_blank() ) + p9.coord_flip() ) return plot_as_inline_html_data(plot)
class THEME(): bgcolor = "#293241" LOADER_COLOR = "#2a9d8f" LOADER_TYPE = "dot" colors_light = [ "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f", "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e" ] colors_dark = [ "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a" ] # mt = theme(panel_background=element_rect(fill=bgcolor) # ,plot_background=element_rect(fill=bgcolor) # , axis_text_x = element_text(color="black") # , axis_text_y = element_text(color="black") # , strip_margin_y=0.05 # , strip_margin_x=0.5) mt = theme_bw() + theme(panel_border=element_blank()) cat_colors = scale_fill_manual(values=colors_light) cat_colors_lines = scale_color_manual(values=colors_light) gradient_colors = scale_fill_gradient("#ce4257", "#aad576") FILL = 1 COLOR = 2 LONG_FIGURE = (10, 20)
def gen_fig(): X_title = 'Edit Iteration' Y_title = 'Position in Question' C_title = 'Type' stuff = pickle.load(open(data_dir, 'rb')) all_questions, all_buzzes = stuff[0], stuff[1] for k, (questions, buzzes) in enumerate(zip(all_questions, all_buzzes)): if len(buzzes) < 5: continue if all(x == 'NULL' for x in buzzes): continue print(k) length_buzzing_positions = {X_title: [], Y_title: [], C_title: []} for i, (q, b) in enumerate(zip(questions, buzzes)): length = len(q.split()) length_buzzing_positions[X_title].append(i) length_buzzing_positions[Y_title].append(length) length_buzzing_positions[C_title].append('Question Length') if b == 'NULL': b = length length_buzzing_positions[X_title].append(i) length_buzzing_positions[Y_title].append(b) length_buzzing_positions[C_title].append('Buzzing Position') df = pd.DataFrame(length_buzzing_positions) p = (ggplot(df) + geom_path(aes(x=X_title, y=Y_title, color=C_title), size=2) + theme( legend_title=element_blank(), legend_position='top', )) p.save(os.path.join(fig_dir, '{}.pdf'.format(k)))
def __init__(self, *args, **kwargs): """See main class docstring.""" p9.theme_matplotlib.__init__(self, *args, **kwargs) gray = '#D9D9D9' # gray used in themes.theme_matplotlib self.add_theme( p9.theme( panel_border=p9.element_rect(color=gray, size=0.7), axis_line=p9.element_blank(), axis_ticks_length=0, axis_ticks=p9.element_blank(), panel_grid_major=p9.element_line(color=gray, size=0.7), panel_grid_minor=p9.element_blank(), panel_ontop=True, # plot panel on top of grid ), inplace=True)
def __init__(self, args, display_title='Analysis'): super().__init__(args, display_title) fusion_pos_file = Mkref_fusion.parse_genomeDir( args.fusion_genomeDir)['fusion_pos'] self.pos_dict = Count_fusion.read_pos_file(fusion_pos_file) self.p9_theme = { 'axis_line_x': p9.element_line(size=2, colour="black"), 'axis_line_y': p9.element_line(size=2, colour="black"), 'panel_grid_major': p9.element_blank(), 'panel_grid_minor': p9.element_blank(), 'panel_border': p9.element_blank(), 'panel_background': p9.element_blank(), 'axis_text_x': p9.element_text(colour="black"), 'axis_text_y': p9.element_text(colour="black"), }
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)): """ We create a function to plot the bar plot. """ return ( # Define the plot. p9.ggplot(dataframe, p9.aes(x='threshold', fill='value')) # Add the bars. + p9.geom_bar(position='dodge') + p9.geom_text(p9.aes(label='stat(count)'), stat='count', position=p9.position_dodge(0.9), size=7, va='bottom') # Rename the x axis. + p9.scale_x_discrete(name='Threshold') # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top). + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500)) # Replace the names in the legend and set the colors of the bars. + p9.scale_fill_manual(values={ 0: '#009e73', 1: '#d55e00' }, labels=lambda l: [{ 0: 'Stable', 1: 'Unstable' }[x] for x in l]) # Place the plots in a grid, renaming the labels. + p9.facet_grid('. ~ iterations', labeller=p9.labeller(cols=lambda x: f'iters = {x}')) # Define the theme for the plot. + p9.theme( # Remove the y axis name. axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def plot_restaurants_per_neighborhood(filepath, restaurant_data_file, pittsburgh_shapefile): mexican_restaurants = pd.read_csv(filepath + restaurant_data_file) gdf = gpd.GeoDataFrame( mexican_restaurants, geometry=gpd.points_from_xy(mexican_restaurants.longitude, mexican_restaurants.latitude), ) restaurant_locations = gdf.filter(items=["geometry"]) # import Pittsburgh neighborhood shapefile neighborhood_polygons = gpd.read_file(pittsburgh_shapefile).filter( items=["hood", "hood_no", "geometry"]) # spatial join to figure out which neighborhood each restaurant is in restaurants_in_polys = gpd.sjoin(restaurant_locations, neighborhood_polygons, how="inner", op="intersects") restaurants_counted = restaurants_in_polys.groupby( "hood_no").count().reset_index() restaurants_in_hoods = restaurants_counted.filter( items=["hood_no", "hood"]) restaurants_in_hoods.rename(columns={"hood": "num_restaurants"}, inplace=True) restaurants_per_shape = gpd.GeoDataFrame( pd.merge(neighborhood_polygons, restaurants_in_hoods, how="left")) restaurant_map = (p.ggplot(restaurants_per_shape) + p.geom_map(p.aes(fill="num_restaurants")) + p.scale_colour_gradient(low="white", high="black") + p.theme( panel_background=p.element_rect(fill="white"), axis_text_x=p.element_blank(), axis_text_y=p.element_blank(), axis_ticks_major_x=p.element_blank(), axis_ticks_major_y=p.element_blank(), )) + p.scale_fill_gradient( low="#efefef", high="#073763", name="# Restaurants") restaurant_map.save("restaurant_map.png")
def scatter_plot2(df1, df2, xcol, ycol, domain, color1='black', color2='red', xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5): assert len(domain) == 2 POINT_SIZE = 1.5 DASH_PATTERN = (0, (6, 2)) if xname is None: xname = xcol if yname is None: yname = ycol # formatter for axes' labels ax_formatter = mizani.custom_format('{:n}') if clamp: # clamp overflowing values if required df1 = df1.copy(deep=True) df1.loc[df1[xcol] > domain[1], xcol] = domain[1] df1.loc[df1[ycol] > domain[1], ycol] = domain[1] df2 = df2.copy(deep=True) df2.loc[df2[xcol] > domain[1], xcol] = domain[1] df2.loc[df2[ycol] > domain[1], ycol] = domain[1] # generate scatter plot scatter = p9.ggplot(df1) scatter += p9.aes(x=xcol, y=ycol) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, color=color1, alpha=0.5) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, data=df2, color=color2, alpha=0.5) scatter += p9.labs(x=xname, y=yname) # rug plots scatter += p9.geom_rug(na_rm=True, sides="tr", color=color1, alpha=0.05) scatter += p9.geom_rug(na_rm=True, sides="tr", data=df2, color=color2, alpha=0.05) if log: # log scale scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter) scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter) else: scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter) scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter) # scatter += p9.theme_xkcd() scatter += p9.theme_bw() scatter += p9.theme(panel_grid_major=p9.element_line(color='#666666', alpha=0.5)) scatter += p9.theme(panel_grid_minor=p9.element_blank()) scatter += p9.theme(figure_size=(width, height)) scatter += p9.theme(text=p9.element_text(size=24, color="black")) # generate additional lines scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN) # diagonal scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN) # vertical rule scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN) # horizontal rule res = scatter return res
def theme_cognoma(fontsize_mult=1): import plotnine as gg return (gg.theme_bw(base_size = 14 * fontsize_mult) + gg.theme( line = gg.element_line(color = "#4d4d4d"), rect = gg.element_rect(fill = "white", color = None), text = gg.element_text(color = "black"), axis_ticks = gg.element_line(color = "#4d4d4d"), legend_key = gg.element_rect(color = None), panel_border = gg.element_rect(color = "#4d4d4d"), panel_grid = gg.element_line(color = "#b3b3b3"), panel_grid_major_x = gg.element_blank(), panel_grid_minor = gg.element_blank(), strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"), axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"), axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"), axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d") ))
def create_confidence_plot(conf_df): plt = ( ggplot(conf_df) + aes(x='x', color='Method', fill='Method') + geom_density(alpha=.45) + facet_wrap('Task', nrow=4) + xlab('Confidence') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( axis_text_y=element_blank(), axis_ticks_major_y=element_blank(), axis_title_y=element_blank(), legend_title=element_blank(), legend_position='top', legend_box='horizontal', ) ) return plt
def __init__(self, base_size=11, base_family='DejaVu Sans'): theme_light.__init__(self, base_size, base_family) self.add_theme(theme( axis_ticks=element_line(color='#DDDDDD', size=0.5), panel_border=element_rect(fill='None', color='#838383', size=1), strip_background=element_rect( fill='#DDDDDD', color='#838383', size=1), strip_text_x=element_text(color='black'), strip_text_y=element_text(color='black', angle=-90), legend_key=element_blank() ), inplace=True)
def getErrorPlot(self, msg="Error Occured"): """ Creates a plotnine plot with error message. To be used to display error essages across dashboards. parameters: - msg: the message to be displayed when error occurs """ df = DataFrame({"x": [10], "y": [2], "label": [msg]}) p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="Black") \ + THEME.cat_colors_lines \ + THEME.mt \ + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank()) return p
def __init__(self, base_size=11, base_family="DejaVu Sans"): theme_light.__init__(self, base_size, base_family) self.add_theme( theme( axis_ticks=element_line(color="#DDDDDD", size=0.5), panel_border=element_rect(fill="None", color="#838383", size=1), strip_background=element_rect(fill="#DDDDDD", color="#838383", size=1), strip_text_x=element_text(color="black"), strip_text_y=element_text(color="black", angle=-90), legend_key=element_blank(), ), inplace=True, )
def ikuya_sys_plot(): nips_df = load_ikuya_nips() with open('2019_tacl_trick/data/ikuya_cdf.json') as f: df = pd.DataFrame(json.load(f)) df = pd.concat([df, nips_df]) df['model'] = df['model'].map(relabel) model_dtype = CategoricalDtype( ['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True) df['model'] = df['model'].astype(model_dtype) p = ( ggplot(df) + aes(x='x', y='y', color='model', xmin='x', xmax='x') + geom_point(size=1.0, shape='.') + xlab('Percent of Question Revealed') + ylab('Accuracy') + scale_y_continuous(breaks=np.linspace(0, 1, 6), limits=[0, 1]) + theme( legend_position=(.335, .7), legend_background=element_blank( ), #element_rect(alpha=1, fill='#EEEFEE', color='white'), #legend_key=element_rect(alpha=0), legend_box_margin=0, legend_title=element_blank())) p.save('2019_tacl_trick/auto_fig/ikuya_cdf.pdf', width=3.5, height=2.5)
def plot_breakdown(ld: LazyDictionary) -> p9.ggplot: """Stacked bar plot of increasing and decreasing stocks per sector in the specified df""" cip_df = ld["cip_df"] cols_to_drop = [ colname for colname in cip_df.columns if colname.startswith("bin_") ] df = cip_df.drop(columns=cols_to_drop) df = pd.DataFrame(df.sum(axis="columns"), columns=["sum"]) ss = ld["stocks_by_sector"] # ss should be: # asx_code sector_name # asx_code # 14D 14D Industrials # 1AD 1AD Health Care # 1AG 1AG Industrials # 1AL 1AL Consumer Discretionary........ # print(ss) df = df.merge(ss, left_index=True, right_index=True) if len(df) == 0: # no stock in cip_df have a sector? ie. ETF? return None assert set(df.columns) == set(["sum", "asx_code", "sector_name"]) df["increasing"] = df.apply(lambda row: "up" if row["sum"] >= 0.0 else "down", axis=1) sector_names = (df["sector_name"].value_counts().index.tolist() ) # sort bars by value count (ascending) sector_names_cat = pd.Categorical(df["sector_name"], categories=sector_names) df = df.assign(sector_name_cat=sector_names_cat) # print(df) plot = (p9.ggplot( df, p9.aes(x="factor(sector_name_cat)", fill="factor(increasing)")) + p9.geom_bar() + p9.coord_flip()) return user_theme( plot, x_axis_label="Sector", y_axis_label="Number of stocks", subplots_adjust={ "left": 0.2, "right": 0.85 }, legend_title=p9.element_blank(), asxtrade_want_fill_d=True, )
def create_length_plot(len_df, legend_position='right', legend_box='vertical'): mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index() mean_len_df[' '] = 'Mean Length' plt = ( ggplot(len_df) + aes(x='x', fill='Method', y='..density..') + geom_histogram(binwidth=2, position='identity', alpha=.6) + geom_text( aes(x='x', y=.22, label='x', color='Method'), mean_len_df, inherit_aes=False, format_string='{:.1f}', show_legend=False ) + geom_segment( aes(x='x', xend='x', y=0, yend=.205, linetype=' '), mean_len_df, inherit_aes=False, color='black' ) + scale_linetype_manual(['dashed']) + facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) + xlab('Example Length') + ylab('Frequency') + scale_color_manual(values=COLORS) + scale_fill_manual(values=COLORS) + theme_fs() + theme( aspect_ratio=1, legend_title=element_blank(), legend_position=legend_position, legend_box=legend_box, ) ) return plt
def theme_tufte(base_size=11, base_family='serif', lines=True, ticks=True): """ Theme inspired by Chapter 6 'Data-Ink Maximization and Graphical Design` of Edward Tufte's 'The Visual Display of Quantitative Information`. Parameters ---------- base_size : int, optional Base font size. All text sizes are scaled versions of the base font size. Default is 11. base_family : str, optional Base font family. lines : bool, optional Draw axis spines. Default is True. ticks : bool, optional Draw axis ticks. Default is True. Returns ------- Plotnine theme. """ ret = (p9.theme_bw(base_size=base_size, base_family=base_family) + p9.theme(legend_background=p9.element_blank(), legend_key=p9.element_blank(), panel_background=p9.element_blank(), strip_background=p9.element_blank(), plot_background=p9.element_rect(fill='white'), axis_line=p9.element_line(size=0.5), axis_ticks=p9.element_line(size=0.5), panel_grid=p9.element_blank())) if not ticks: ret = ret + p9.theme(axis_ticks=p9.element_blank()) if not lines: ret = ret + p9.theme(axis_line=p9.element_blank()) return ret
def rel_plot(sbs, variant, jitter=0.01): plotdata = sbs[sbs.variant == variant] xcol = "base" ycol = "ratio" plotdata = plotdata.assign(x=plotdata[xcol], y=plotdata[ycol]) plotdata = plotdata.assign(sbs_index=plotdata.index.values) session_text = (plotdata[["session_index", "base_session_index"]].apply( tuple, axis=1).map(lambda tup: f"{tup[0]} vs. {tup[1]}")) plotdata = plotdata.assign(session_text=session_text) x = np.geomspace(0.02, 1, num=5) y = 1 / x diag_df = pd.DataFrame({"x": x, "y": y}) scatterplot = ( ggplot(plotdata) + geom_jitter( aes(x="x", y="y", fill="dataset", color="dataset"), width=jitter, height=jitter, alpha=0.6, size=1.0, ) # shape=plotdata.dataset.map(lambda x : '.' if x in ['lvis','objectnet'] else 'o'), # size=plotdata.dataset.map(lambda x : 1. if x in ['lvis','objectnet'] else 2.)) # + geom_text(aes(x='base', y='delta', label='category', color='dataset'), va='bottom', # data=plotdata1[plotdata1.ratio < .6], # position=position_jitter(.05, .05), show_legend=False) + geom_line(aes(x="x", y="y"), data=diag_df) # + geom_text(aes(x='x', y='y', label='session_text'), va='top', data=plotdata[(plotdata.y < .4) | (plotdata.y > 3)]) + ylab(ycol) # + geom_area(aes(y2=1.1, y=.9), linetype='dashed', alpha=.7) + geom_hline(aes(yintercept=1.1), linetype="dashed", alpha=0.7) + geom_hline(aes(yintercept=0.9), linetype="dashed", alpha=0.7) + geom_vline( aes(xintercept=0.1, ), linetype="dashed", alpha=0.7, ) + geom_vline( aes(xintercept=0.3, ), linetype="dashed", alpha=0.7, ) # + geom_abline() # + geom_point(aes(x='recall', y='precision', color='variant'), size=1.) # + facet_wrap(facets=['cat'], ncol=6, scales='free_x') + xlab(xcol) # +scale_color_discrete() + theme( figure_size=(8, 5), legend_position="top", subplots_adjust={"hspace": 0.5}, legend_title=element_blank(), legend_box_margin=-1, legend_margin=0.0, axis_text=element_text(size=12, margin={ "t": 0.2, "l": -0.3 }), legend_text=element_text(size=11), axis_title=element_text(size=12, margin={ "r": -0.2, "b": 0.0, "l": 0, "t": 0.0 }), ) + scale_x_log10(labels=make_labeler(brief_format), breaks=[0.01, 0.1, 0.3, 1.0]) + scale_y_log10(labels=make_labeler(brief_format), breaks=[0.5, 0.9, 1.1, 2.0, 3.0, 6, 12])) return scatterplot
def ologram_merge_stats(inputfiles=None, pdf_width=None, pdf_height=None, output=None, labels=None): # ------------------------------------------------------------------------- # Check user provided labels # ------------------------------------------------------------------------- if labels is not None: labels = labels.split(",") for elmt in labels: if not re.search("^[A-Za-z0-9_]+$", elmt): message( "Only alphanumeric characters and '_' allowed for --more-bed-labels", type="ERROR") if len(labels) != len(inputfiles): message("--labels: the number of labels should be" " the same as the number of input files ", type="ERROR") if len(labels) != len(set(labels)): message("Redundant labels not allowed.", type="ERROR") # ------------------------------------------------------------------------- # Loop over input files # ------------------------------------------------------------------------- df_list = list() df_label = list() for pos, infile in enumerate(inputfiles): message("Reading file : " + infile.name) # Read the dataset into a temporay dataframe df_tmp = pd.read_csv(infile, sep='\t', header=0, index_col=None) # Change name of 'feature_type' column. df_tmp = df_tmp.rename(index=str, columns={"feature_type": "Feature"}) # Assign the name of the dataset to a new column if labels is None: file_short_name = os.path.basename(os.path.normpath(os.path.dirname(infile.name))) df_label += [file_short_name] else: file_short_name = labels[pos] df_label += [labels[pos]] df_tmp = df_tmp.assign(**{"dataset": [file_short_name] * df_tmp.shape[0]}) # Pval set to 0 or -1 are changed to 1e-320 and NaN respectively df_tmp.loc[df_tmp['summed_bp_overlaps_pvalue'] == 0, 'summed_bp_overlaps_pvalue'] = 1e-320 df_tmp.loc[df_tmp['summed_bp_overlaps_pvalue'] == -1, 'summed_bp_overlaps_pvalue'] = np.nan # Compute -log10(pval) df_tmp = df_tmp.assign(**{"-log_10(pval)": -np.log10(df_tmp.summed_bp_overlaps_pvalue)}) # Which p-values are signifcant ? # TODO: For now, draws all p-values. Add Benjamini-Hochberg correction, and distinguish between NaN and 0. df_tmp = df_tmp.assign(**{"pval_signif": df_tmp.summed_bp_overlaps_pvalue > 0}) # Add the df to the list to be subsequently merged df_list += [df_tmp] if len(set(df_label)) < len(df_label): message('Enclosing directories are ambiguous and cannot be used as labels. You may use "--labels".', type="ERROR") # ------------------------------------------------------------------------- # Concatenate dataframes (row bind) # ------------------------------------------------------------------------- message("Merging dataframes.") df_merged = pd.concat(df_list, axis=0) # ------------------------------------------------------------------------- # Plotting # ------------------------------------------------------------------------- message("Plotting") my_plot = ggplot(data=df_merged, mapping=aes(y='Feature', x='dataset')) my_plot += geom_tile(aes(fill = 'summed_bp_overlaps_log2_fold_change')) my_plot += scale_fill_gradient2() my_plot += labs(fill = "log2(fold change) for summed bp overlaps") # Points for p-val. Must be after geom_tile() my_plot += geom_point(data = df_merged.loc[df_merged['pval_signif']], mapping = aes(x='dataset',y='Feature',color = '-log_10(pval)'), size=4, shape ='D', inherit_aes = False) my_plot += scale_color_gradientn(colors = ["#160E00","#FFB025","#FFE7BD"]) my_plot += labs(color = "-log10(p-value)") # Theming my_plot += theme_bw() my_plot += theme(panel_grid_major=element_blank(), axis_text_x=element_text(rotation=90), panel_border=element_blank(), axis_ticks=element_blank()) # ------------------------------------------------------------------------- # Saving # ------------------------------------------------------------------------- message("Saving") nb_ft = len(list(df_merged['Feature'].unique())) nb_datasets = len(list(df_merged['dataset'].unique())) if pdf_width is None: panel_width = 0.6 pdf_width = panel_width * nb_datasets if pdf_width > 100: pdf_width = 100 message("Setting --pdf-width to 100 (limit)") if pdf_height is None: panel_height = 0.6 pdf_height = panel_height * nb_ft if pdf_height > 500: pdf_height = 500 message("Setting --pdf-height to 500 (limit)") message("Page width set to " + str(pdf_width)) message("Page height set to " + str(pdf_height)) figsize = (pdf_width, pdf_height) # ------------------------------------------------------------------------- # Turn warning off. Both pandas and plotnine use warnings for deprecated # functions. I need to turn they off although I'm not really satisfied with # this solution... # ------------------------------------------------------------------------- def fxn(): warnings.warn("deprecated", DeprecationWarning) # ------------------------------------------------------------------------- # Saving # ------------------------------------------------------------------------- with warnings.catch_warnings(): warnings.simplefilter("ignore") fxn() message("Saving diagram to file : " + output.name) message("Be patient. This may be long for large datasets.") # NOTE : We must manually specify figure size with save_as_pdf_pages save_as_pdf_pages(filename=output.name, plots=[my_plot + theme(figure_size=figsize)], width=pdf_width, height=pdf_height)
theme2 = theme_gray() theme3 = theme1 + theme2 assert theme3 == theme2 def test_add_empty_theme_element(): # An empty theme element does not alter the theme theme1 = theme_gray() + theme(axis_line_x=element_line(color='red')) theme2 = theme1 + theme(axis_line_x=element_line()) assert theme1 == theme2 l1 = element_line(color='red', size=1, linewidth=1, linetype='solid') l2 = element_line(color='blue', size=2, linewidth=2) l3 = element_line(color='blue', size=2, linewidth=2, linetype='solid') blank = element_blank() def test_add_element_heirarchy(): # parent themeable modifies child themeable theme1 = theme_gray() + theme(axis_line_x=l1) # child theme2 = theme1 + theme(axis_line=l2) # parent theme3 = theme1 + theme(axis_line_x=l3) # child, for comparison assert theme2.themeables['axis_line_x'] == \ theme3.themeables['axis_line_x'] theme1 = theme_gray() + theme(axis_line_x=l1) # child theme2 = theme1 + theme(line=l2) # grand-parent theme3 = theme1 + theme(axis_line_x=l3) # child, for comparison assert theme2.themeables['axis_line_x'] == \ theme3.themeables['axis_line_x']