def plot_replicate_density(
    df,
    batch,
    plate,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info"))
        + gg.geom_density(alpha=0.3) + gg.scale_fill_manual(
            name="Replicate",
            labels={
                "True": "True",
                "False": "False"
            },
            values=["#B99638", "#2DB898"],
        ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") +
        gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme(
            title=gg.element_text(size=9),
            axis_text=gg.element_text(size=5),
            axis_title=gg.element_text(size=8),
            legend_text=gg.element_text(size=6),
            legend_title=gg.element_text(size=7),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        ))

    if output_file_base:
        save_figure(density_gg, output_file_base, output_file_extensions, dpi,
                    height, width)

    return density_gg
Exemple #2
0
    def plot_zmw_stats(self, **kwargs):
        """Plot of ZMW stats for all runs.

        Note
        ----
        Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`.

        Parameters
        ----------
        ``**kwargs`` : dict
            Keyword arguments passed to :meth:`Summaries.zmw_stats`.

        Returns
        -------
        plotnine.ggplot.ggplot
            Stacked bar graph of ZMW stats for each run.

        """
        df = self.zmw_stats(**kwargs)

        p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) +
             p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) +
             p9.theme(axis_text_x=p9.element_text(angle=90,
                                                  vjust=1,
                                                  hjust=0.5),
                      figure_size=(0.4 * len(df['name'].unique()), 2.5)
                      ) +
             p9.ylab('number of ZMWs') +
             p9.xlab('')
             )

        if len(df['status'].unique()) < len(CBPALETTE):
            p = p + p9.scale_fill_manual(CBPALETTE[1:])

        return p
class THEME():
    bgcolor = "#293241"
    LOADER_COLOR = "#2a9d8f"
    LOADER_TYPE = "dot"

    colors_light = [
        "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f",
        "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e"
    ]
    colors_dark = [
        "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a"
    ]
    # mt = theme(panel_background=element_rect(fill=bgcolor)
    #            ,plot_background=element_rect(fill=bgcolor)
    #            , axis_text_x = element_text(color="black")
    #            , axis_text_y = element_text(color="black")
    #            , strip_margin_y=0.05
    #            , strip_margin_x=0.5)

    mt = theme_bw() + theme(panel_border=element_blank())

    cat_colors = scale_fill_manual(values=colors_light)
    cat_colors_lines = scale_color_manual(values=colors_light)
    gradient_colors = scale_fill_gradient("#ce4257", "#aad576")
    FILL = 1
    COLOR = 2

    LONG_FIGURE = (10, 20)
Exemple #4
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') +
           geom_histogram(binwidth=2, position='identity', alpha=.6) +
           geom_text(aes(x='x', y=.22, label='x', color='Method'),
                     mean_len_df,
                     inherit_aes=False,
                     format_string='{:.1f}',
                     show_legend=False) +
           geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
                        mean_len_df,
                        inherit_aes=False,
                        color='black') + scale_linetype_manual(['dashed']) +
           facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) +
           xlab('Example Length') + ylab('Frequency') +
           scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               aspect_ratio=1,
               legend_title=element_blank(),
               legend_position=legend_position,
               legend_box=legend_box,
           ))

    return plt
Exemple #5
0
def plot_downstream(clwe, table, output, ylim):
    df = pd.read_csv(data_file(table))
    df = df[df.clwe == clwe]
    df = df.assign(
        refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']),
        language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG'])
    )
    g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine'))
    g += p9.geom_bar(position='dodge', stat='identity', width=.8)
    g += p9.coord_cartesian(ylim=ylim)
    g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E'])
    g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial')
    g += p9.theme(
        plot_background=p9.element_rect(fill='white'),
        panel_grid_major_y=p9.element_line(),
        axis_text_x=p9.element_text(margin={'t': 10}),
        axis_text_y=p9.element_text(margin={'r': 8}),
        legend_position=(.7, .9),
        legend_direction='horizontal',
        legend_title=p9.element_blank(),
        legend_text=p9.element_text(size=FONT_SIZE),
        legend_box_margin=0,
        figure_size=(12, 3)
    )
    g.save(filename=output_file(output))
    def plot_range_comparison(self,
                              xlabel: str = '',
                              figsize: Tuple[int] = (7, 3),
                              add_text_label: bool = True,
                              **kwargs):
        df = self.get_ranges_df(**kwargs)
        fig = (p9.ggplot(df) +
               p9.aes('cat_value', 'counts', fill='direction') +
               p9.geom_col(alpha=.8) +
               p9.theme(figure_size=figsize,
                        axis_text_x=p9.element_text(rotation=45)) +
               p9.scale_fill_manual(['#3f7f93', '#da3b46', '#4d4a4a']) +
               p9.labs(x=xlabel, y='Number of Comparisons', fill='R'))

        if add_text_label:
            if df.loc[df.direction == 'Positive'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .15'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Positive'].loc[df.counts > 0],
                    color='#3f7f93')
            if df.loc[df.direction == 'Negative'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .05'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Negative'].loc[df.counts > 0],
                    color='#da3b46')

        return fig
def plot_two_way_sdc(sdc_df: pd.DataFrame, alpha: float = .05, **kwargs):
    """
    Plots the results of a SDC analysis for a fixed window size in a 2D figure.

    In a similar fashion to a recurrence plot, x and y axes represent the start index of the x and y sequences. Only
    results with a p_value < alpha are shown, while controlling the alpha as a function of the intensity of the score
    and the color as a function of the sign of the established relationship.

    Parameters
    ----------
    sdc_df
        Data frame as outputted by `compute_sdc` which will be used to plot the results.
    alpha
        Significance threshold. Only values with a score < alpha will be plotted
    kwargs
        Keyword arguments to pass to `plotnine.theme` to customize the plot.
    Returns
    -------
    p9.ggplot.ggplot
        Plot
    """
    fragment_size = int(sdc_df.iloc[0]['stop_1'] - sdc_df.iloc[0]['start_1'])
    f = (sdc_df.loc[lambda dd: dd.p_value < alpha].assign(r_str=lambda dd: dd[
        'r'].apply(lambda x: '$r > 0$' if x > 0 else '$r < 0$')).pipe(
            lambda dd: p9.ggplot(dd) + p9.aes(
                'start_1', 'start_2', fill='r_str', alpha='abs(r)'
            ) + p9.geom_tile() + p9.scale_fill_manual(['#da2421', 'black']) +
            p9.scale_y_reverse() + p9.theme(**kwargs) + p9.guides(alpha=False)
            + p9.labs(x='$X_i$',
                      y='$Y_j$',
                      fill='$r$',
                      title=f'Two-Way SDC plot for $S = {fragment_size}$' +
                      r' and $\alpha =$' + f'{alpha}')))

    return f
Exemple #8
0
def bsuite_bar_plot(df_in: pd.DataFrame,
                    sweep_vars: Sequence[str] = None) -> gg.ggplot:
  """Output bar plot of bsuite data."""
  df = _clean_bar_plot_data(df_in, sweep_vars)

  p = (gg.ggplot(df)
       + gg.aes(x='env', y='score', colour='type', fill='type')
       + gg.geom_bar(position='dodge', stat='identity')
       + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5)
       + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS)
       + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS)
       + gg.xlab('experiment')
       + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1))
      )
  if not all(df.finished):  # add a layer of alpha for unfinished jobs
    p += gg.aes(alpha='finished')
    p += gg.scale_alpha_discrete(range=[0.3, 1.0])

  # Compute the necessary size of the plot
  if sweep_vars:
    p += gg.facet_wrap(sweep_vars, labeller='label_both', ncol=1)
    n_hypers = df[sweep_vars].drop_duplicates().shape[0]
  else:
    n_hypers = 1
  return p + gg.theme(figure_size=(14, 3 * n_hypers + 1))
def plot_hypothesis(hypothesis, file_name):
    bin_types = list(hypothesis)
    scores = list(hypothesis[bin_types[0]])
    plots = []
    for bin_type, score in product(bin_types, scores):
        mean_name = "Mean: " + score
        df = pd.DataFrame(columns=["Bin", "Dataset", mean_name])
        df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value'])
        for bin_ in hypothesis[bin_type][score]:
            h = list(bin_.values())[0]
            bin_name = list(bin_)[0]
            parameter1 = h.p1
            parameter2 = h.p2
            mean1 = h.mean1
            mean2 = h.mean2
            row1 = {
                "Bin": bin_name,
                'Dataset': parameter1,
                mean_name: str(round(float(mean1), 3))
            }
            row2 = {
                "Bin": bin_name,
                'Dataset': parameter2,
                mean_name: str(round(float(mean2), 3))
            }
            df = df.append(row1, ignore_index=True)
            df = df.append(row2, ignore_index=True)
            t_statistic = h.t
            p_value = h.p
            row = {
                "Bin":
                bin_name,
                't-statistic':
                str(round(t_statistic, 3)),
                'p-value':
                str(p_value),
                '95% Confidence':
                "Significant" if p_value <= 0.05 else "Not Significant"
            }
            df2 = df2.append(row, ignore_index=True)
        plots.append(
            (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) +
             geom_col(stat='identity', position='dodge') +
             ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format(
                 bin_type, score))))
        plots.append(
            (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) +
             geom_col(stat='identity', width=0.2) + ggtitle(
                 "{0} bin distribution| {1}\nBin's 95% Confidence Level Test".
                 format(bin_type, score)) +
             scale_fill_manual(values={
                 'Significant': "#214517",
                 'Not Significant': '#c62f2d'
             })))
    save_as_pdf_pages(plots, file_name)

    return
Exemple #10
0
def _plot_regret_group(df: pd.DataFrame, group_col: str) -> gg.ggplot:
  """Plots the average regret through time when grouped."""
  group_name = group_col.replace('_', ' ')
  df[group_name] = df[group_col].astype('category')
  p = (gg.ggplot(df)
       + gg.aes(x='episode', y='average_regret',
                group=group_name, colour=group_name, fill=group_name)
       + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1)
       + gg.scale_colour_manual(values=FIVE_COLOURS)
       + gg.scale_fill_manual(values=FIVE_COLOURS))
  return p
Exemple #11
0
def plot_replicate_correlation(
    df,
    batch,
    plate,
    facet_string=None,
    split_samples=False,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=500,
    height=4,
    width=5,
    return_plot=False,
):
    correlation_gg = (
        gg.ggplot(
            df,
            gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"),
        )
        + gg.geom_boxplot(
            alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5
        )
        + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Replicates")
        + gg.ylab("Pearson Correlation")
        + gg.ggtitle("{}: {}".format(batch, plate))
        + gg.theme_bw()
        + gg.theme(
            subplots_adjust={"wspace": 0.2},
            title=gg.element_text(size=5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=5),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=5),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if split_samples:
        assert facet_string, "To split samples, specify a facet_string"
        correlation_gg += gg.facet_wrap(facet_string)

    if output_file_base:
        save_figure(
            correlation_gg, output_file_base, output_file_extensions, dpi, height, width
        )
    if return_plot:
        return correlation_gg
Exemple #12
0
def create_confidence_plot(conf_df):
    plt = (ggplot(conf_df) + aes(x='x', color='Method', fill='Method') +
           geom_density(alpha=.45) + facet_wrap('Task', nrow=4) +
           xlab('Confidence') + scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               axis_text_y=element_blank(),
               axis_ticks_major_y=element_blank(),
               axis_title_y=element_blank(),
               legend_title=element_blank(),
               legend_position='top',
               legend_box='horizontal',
           ))
    return plt
Exemple #13
0
def _bar_plot_compare(df: pd.DataFrame) -> gg.ggplot:
    """Bar plot of buite score data, comparing agents on each experiment."""
    p = (gg.ggplot(df) +
         gg.aes(x='agent', y='score', colour='agent', fill='agent') +
         gg.geom_bar(position='dodge', stat='identity') +
         gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5) +
         gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1)) +
         gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS) +
         gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS))
    if not all(df.finished):  # add a layer of alpha for unfinished jobs
        p += gg.aes(alpha='finished')
        p += gg.scale_alpha_discrete(range=[0.3, 1.0])
    return p
Exemple #14
0
def plot_regret_average(df_in: pd.DataFrame,
                        group_col: str,
                        episode: int,
                        sweep_vars: Optional[Sequence[str]] = None,
                        regret_col: str = 'total_regret') -> gg.ggplot:
    """Bar plot the average regret at end of learning."""
    df = _preprocess_ave_regret(df_in, group_col, episode, sweep_vars,
                                regret_col)
    group_name = group_col.replace('_', ' ')
    p = (gg.ggplot(df) +
         gg.aes(x=group_name, y='average_regret', fill=group_name) +
         gg.geom_bar(stat='identity') +
         gg.scale_fill_manual(values=FIVE_COLOURS) +
         gg.ylab('average regret after {} episodes'.format(episode)))
    return facet_sweep_plot(p, sweep_vars)
Exemple #15
0
def plot_replicate_density(
    df,
    batch,
    plate,
    cutoff,
    percent_strong,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
    return_plot=False,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="similarity_metric", fill="group_replicate"))
        + gg.geom_density(alpha=0.3)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Pearson Correlation")
        + gg.ylab("Density")
        + gg.geom_vline(xintercept=cutoff, color="red", linetype="dashed")
        + gg.ggtitle(
            f"{batch}; Plate: {plate}\n\nPercent Replicating: {np.round(percent_strong * 100, 2)}%"
        )
        + gg.theme_bw()
        + gg.theme(
            title=gg.element_text(size=3.5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=4),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=4),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if output_file_base:
        save_figure(
            density_gg, output_file_base, output_file_extensions, dpi, height, width
        )

    if return_plot:
        return density_gg
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)):
    """
    We create a function to plot the bar plot.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='threshold', fill='value'))
        # Add the bars.
        + p9.geom_bar(position='dodge') +
        p9.geom_text(p9.aes(label='stat(count)'),
                     stat='count',
                     position=p9.position_dodge(0.9),
                     size=7,
                     va='bottom')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Threshold')
        # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top).
        + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500))
        # Replace the names in the legend and set the colors of the bars.
        + p9.scale_fill_manual(values={
            0: '#009e73',
            1: '#d55e00'
        },
                               labels=lambda l: [{
                                   0: 'Stable',
                                   1: 'Unstable'
                               }[x] for x in l])
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid('. ~ iterations',
                        labeller=p9.labeller(cols=lambda x: f'iters = {x}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the y axis name.
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
Exemple #17
0
def cell_division(adata):
    """ Plots total_counts as a function of the principal circle nodes to
    visualize the moment of cell division.

    Parameters
    ----------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.celldiv_moment`.

    Returns
    ------------
    A plotnine line-plot to help visualize the moment of cell division and
    direction of the cell cycle.

    If method = 'counts' when tl.celldiv_moment was run,
    cell division is defined by the largest drop in total_counts. The changes in
    counts are represented by the
    bars at the bottom, and the suggested moment of cell division is marked in
    red. The cell cycle should follow an incremental increase in total counts
    until around the moment of cell division.

    Alternatively, if method='g2m' in tl.celldiv_moment, the G2-M signature
    dynamics are used to define the moment of cell division.
    """
    ref_var = adata.uns['scycle']['cell_div_moment']['ref_var']
    edge_to_0 = adata.uns['scycle']['cell_div_moment']['cell_div_edge'][0]
    edges = adata.uns['princirc_gr']['edges']
    edges['cell_div'] = edges['e1'] == edge_to_0

    cell_div_count = edges[edges['e1'] == edge_to_0]['mean_var']

    cell_div_plot = (ggplot(edges, aes('e1', 'mean_var'))
     + geom_point(aes(y = 'mean_var'), size = 2)
     + geom_path(aes(y = 'mean_var'))
     + geom_smooth(aes(y = 'mean_var'), method = 'lm', linetype = 'dashed')
     + annotate("point", x = edge_to_0, y = cell_div_count, color = 'red', size = 2)
     + labs(x = 'Edge position', y = ref_var)
     + geom_col(aes(y = 'diff_var', fill = 'cell_div'))
     + scale_fill_manual(values = ['darkgrey', 'red'], guide = False)
     + theme_std)

    return cell_div_plot
Exemple #18
0
def create_confidence_plot(conf_df):
    plt = (
        ggplot(conf_df)
        + aes(x='x', color='Method', fill='Method')
        + geom_density(alpha=.45)
        + facet_wrap('Task', nrow=4)
        + xlab('Confidence')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            axis_text_y=element_blank(),
            axis_ticks_major_y=element_blank(),
            axis_title_y=element_blank(),
            legend_title=element_blank(),
            legend_position='top',
            legend_box='horizontal',
        )
    )
    return plt
Exemple #19
0
class THEME():
    bgcolor = "#293241"
    LOADER_COLOR = "#2a9d8f"
    LOADER_TYPE = "dot"

    colors_light = [
        "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#2a9d8f",
        "#797d62", "#3a6ea5"
    ]
    mt = theme(panel_background=element_rect(fill=bgcolor),
               plot_background=element_rect(fill=bgcolor),
               axis_text_x=element_text(color="black"),
               axis_text_y=element_text(color="black"),
               strip_margin_y=0.05,
               strip_margin_x=0.5)

    cat_colors = scale_fill_manual(values=colors_light)
    cat_colors_lines = scale_color_manual(values=colors_light)
    gradient_colors = scale_fill_gradient("#aad576", "#ce4257")
    FILL = 1
    COLOR = 2

    LONG_FIGURE = (10, 20)
Exemple #20
0
    def pictures(self, mode='bw', subset=None, n_random=10):
        """Returns a picture of the selected images.

        Creates either a colored or a black-white picture of the selected
        images.

        Args:
            mode: Should the picture be black-white ('bw') or in color
                ('color')?
            subset: Optional list of picture indices that should be included in
                the dataframe. If specified, n_random will be ignored.
            n_random: Optional number of randomly selected images. If neither
                subset nor n_random are specified, all images will be included.

        Returns:
            A plotnine object including all pictures with their label.

        Raises:
            NotImplementedError: mode must be either 'bw' or 'color'."""
        dataframe = self.rgb_dataframe(subset=subset, n_random=n_random)
        if mode == 'bw':
            fill_key = 'rgb_bw'
        elif mode == 'color':
            fill_key = 'rgb'
        else:
            raise NotImplementedError("Pictures are either in black-white"
                                      "('bw') or in color ('color').")
        picture = (
            gg.ggplot(dataframe, gg.aes(x='x', y='y', fill=fill_key)) +
            gg.geom_tile() + gg.theme_void() +
            gg.theme(legend_position='none') + gg.scale_fill_manual(
                values={key: key
                        for key in dataframe[fill_key].unique()}) +
            gg.facet_wrap('image_id', labeller=self.labeller) +
            gg.scale_y_reverse() + gg.coord_fixed())
        return picture
Exemple #21
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (
        ggplot(len_df)
        + aes(x='x', fill='Method', y='..density..')
        + geom_histogram(binwidth=2, position='identity', alpha=.6)
        + geom_text(
            aes(x='x', y=.22, label='x', color='Method'),
            mean_len_df,
            inherit_aes=False,
            format_string='{:.1f}',
            show_legend=False
        )
        + geom_segment(
            aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
            mean_len_df,
            inherit_aes=False, color='black'
        )
        + scale_linetype_manual(['dashed'])
        + facet_wrap('Task')
        + xlim(0, 20) + ylim(0, .23)
        + xlab('Example Length') + ylab('Frequency')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            aspect_ratio=1,
            legend_title=element_blank(),
            legend_position=legend_position,
            legend_box=legend_box,
        )
    )

    return plt
Exemple #22
0
def pattern_research_plot(data):
    from colour import Color
    
    def colors_gradient_generator(low_color, high_color, color_steps):
        low_color_obj = Color(low_color)
        high_color_obj = Color(high_color)
        return map(lambda x : x.hex_l, low_color_obj.range_to(high_color_obj,color_steps))
    
    blue = list(colors_gradient_generator("#004996", "#018ace", 3))[::-1]
    data = data.melt(id_vars=['hour_category'], value_vars= ['D','W','MS'], var_name='series', value_name='count')
    time_unit_categories = pd.Categorical(data['series'], categories= ['D','W','MS'])
    data = data.assign(series = time_unit_categories)
    plot =(p9.ggplot(data=data,
                     mapping=p9.aes(x='hour_category', y ='count', fill ='series'))
        + p9.geom_bar(stat='identity', position='dodge') 
        + p9.scale_fill_manual(blue,labels = ['D','W','MS'])
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size=8),
                   axis_title = p9.element_text(size = 8,face = 'bold'))
        + p9.coord_cartesian(ylim = (0,100))
        + p9.scale_y_continuous(labels=lambda l: ["%d%%" % (v) for v in l])
        + p9.labs(x='hour_category',y='Ratio of attacks'))
        
    return plot
Exemple #23
0
# In[7]:

g = (p9.ggplot(binned_df, p9.aes(x="precision", y="edges",
                                 color="in_hetionet")) + p9.geom_point() +
     p9.geom_line() + p9.scale_color_manual(values={
         "Existing": color_map["Existing"],
         "Novel": color_map["Novel"]
     }) + p9.facet_wrap("relation") + p9.scale_y_log10() + p9.theme_bw())
print(g)

# In[8]:

g = (p9.ggplot(binned_df, p9.aes(x="precision", y="edges", fill="in_hetionet"))
     + p9.geom_bar(stat='identity', position='dodge') +
     p9.scale_fill_manual(values={
         "Existing": color_map["Existing"],
         "Novel": color_map["Novel"]
     }) + p9.coord_flip() + p9.facet_wrap("relation") + p9.scale_y_log10() +
     p9.theme(figure_size=(12, 8), aspect_ratio=9) + p9.theme_bw())
print(g)

# In[9]:

combined_sen_tree = {
    "DaG": {
        "file":
        "../../../disease_gene/disease_associates_gene/edge_prediction_experiment/output/combined_predicted_dag_sentences.tsv.xz",
        "group": ["doid_id", "entrez_gene_id"]
    },
    "CtD": {
        "file":
        "../../../compound_disease/compound_treats_disease/edge_prediction_experiment/output/combined_predicted_ctd_sentences.tsv.xz",
Exemple #24
0
# Reorder plotting variables
filter_list = [
    'all_variant_count', 'filter_common_var_count', 'filter_min_depth_count',
    'filter_max_depth_count'
]
filter_list_cat = CategoricalDtype(categories=filter_list, ordered=True)
filter_melt_df['num_variants_cat'] = (
    filter_melt_df['num_variants'].astype(str).astype(filter_list_cat))

p = (gg.ggplot(filter_melt_df,
               gg.aes(x='lane', y='filtration', fill='num_variants_cat')) +
     gg.geom_bar(stat='identity', position='dodge') +
     gg.facet_wrap('~ final_id') + gg.scale_fill_manual(
         name='Filtration Step',
         values=['#1b9e77', '#d95f02', '#7570b3', '#e7298a'],
         labels=[
             'All Variants', 'Common Variants',
             'Depth (< {} reads)'.format(replicate_filter_min_depth_count),
             'Depth (> {} reads)'.format(replicate_filter_max_depth_count)
         ]) + gg.xlab('Sample') + gg.ylab('Final Number of Variants') +
     gg.theme_bw() + gg.theme(axis_text_x=gg.element_text(angle='90'),
                              axis_text=gg.element_text(size=8),
                              axis_title=gg.element_text(size=14)))
p

# In[13]:

figure_file = os.path.join('figures', 'replicates_filtration_results.pdf')
gg.ggsave(p, figure_file, height=5.5, width=6.5, dpi=500)

# In[14]:
Exemple #25
0
def density_plot(df,
                 x,
                 group=None,
                 facet_x=None,
                 facet_y=None,
                 position='overlay',
                 sort_groups=True,
                 base_size=10,
                 figure_size=(6, 3),
                 **stat_kwargs):
    '''
    Plot a 1-d density plot

    Parameters
    ----------
    df : pd.DataFrame
      input dataframe
    x : str
      quoted expression to be plotted on the x axis
    group : str
      quoted expression to be used as group (ie color)
    facet_x : str
      quoted expression to be used as facet
    facet_y : str
      quoted expression to be used as facet
    position : str
      if groups are present, choose between `stack` or `overlay`
    base_size : int
      base size for theme_ez
    figure_size :tuple of int
      figure size
    stat_kwargs : kwargs
      kwargs for the density stat

    Returns
    -------
    g : EZPlot
      EZplot object

    '''

    if position not in ['overlay', 'stack']:
        log.error("position not recognized")
        raise NotImplementedError("position not recognized")

    # create a copy of the data
    dataframe = df.copy()

    # define groups and variables; remove and store (eventual) names
    names = {}
    groups = {}
    variables = {}

    for label, var in zip(['x', 'group', 'facet_x', 'facet_y'],
                          [x, group, facet_x, facet_y]):
        names[label], groups[label] = unname(var)

    # fix special cases
    if x == '.index':
        groups['x'] = '.index'
        names[
            'x'] = dataframe.index.name if dataframe.index.name is not None else ''

    # aggregate data and reorder columns
    gdata = agg_data(dataframe, variables, groups, None, fill_groups=False)
    gdata = gdata[[
        c for c in ['x', 'group', 'facet_x', 'facet_y'] if c in gdata.columns
    ]]

    # start plotting
    g = EZPlot(gdata)

    # determine order and create a categorical type
    colors = ez_colors(g.n_groups('group'))

    # set groups
    if group is None:
        g += p9.geom_density(p9.aes(x="x"),
                             stat=p9.stats.stat_density(**stat_kwargs),
                             colour=ez_colors(1)[0],
                             fill=ez_colors(1)[0],
                             **POSITION_KWARGS[position])
    else:
        g += p9.geom_density(p9.aes(x="x",
                                    group="factor(group)",
                                    colour="factor(group)",
                                    fill="factor(group)"),
                             stat=p9.stats.stat_density(**stat_kwargs),
                             **POSITION_KWARGS[position])
        g += p9.scale_fill_manual(values=colors, reverse=False)
        g += p9.scale_color_manual(values=colors, reverse=False)

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

    # set y scale
    g += p9.scale_y_continuous(labels=ez_labels)

    # set axis labels
    g += \
        p9.xlab(names['x']) + \
        p9.ylab('Density')

    # set theme
    g += theme_ez(figure_size=figure_size,
                  base_size=base_size,
                  legend_title=p9.element_text(text=names['group'],
                                               size=base_size))

    if sort_groups:
        g += p9.guides(fill=p9.guide_legend(reverse=True))

    return g
Exemple #26
0
    def barchart_make(roi, df, list_rois, config, ylimit, save_function,
                      find_ylim_function):
        thisroi = list_rois[roi]

        current_df = df.loc[df['index'] == thisroi]

        current_df = current_df.sort_values([config.single_roi_fig_x_axis])
        current_df = current_df.reset_index(
            drop=True)  # Reset index to remove grouping
        current_df[config.single_roi_fig_x_axis] = pd.Categorical(
            current_df[config.single_roi_fig_x_axis],
            categories=current_df[config.single_roi_fig_x_axis].unique())

        figure = (
            pltn.ggplot(
                current_df,
                pltn.aes(x=config.single_roi_fig_x_axis,
                         y='Mean',
                         ymin="Mean-Conf_Int_95",
                         ymax="Mean+Conf_Int_95",
                         fill='factor({colour})'.format(
                             colour=config.single_roi_fig_colour))) +
            pltn.theme_538() + pltn.geom_col(position=pltn.position_dodge(
                preserve='single', width=0.8),
                                             width=0.8,
                                             na_rm=True) +
            pltn.geom_errorbar(size=1,
                               position=pltn.position_dodge(
                                   preserve='single', width=0.8)) +
            pltn.labs(x=config.single_roi_fig_label_x,
                      y=config.single_roi_fig_label_y,
                      fill=config.single_roi_fig_label_fill) +
            pltn.scale_x_discrete(labels=[]) +
            pltn.theme(panel_grid_major_x=pltn.element_line(alpha=0),
                       axis_title_x=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_title_y=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_text_y=pltn.element_text(size=20, color='black'),
                       legend_title=pltn.element_text(size=20, color='black'),
                       legend_text=pltn.element_text(size=18, color='black'),
                       subplots_adjust={'right': 0.85},
                       legend_position=(0.9, 0.8),
                       dpi=config.plot_dpi) +
            pltn.geom_text(pltn.aes(y=-.7, label=config.single_roi_fig_x_axis),
                           color='black',
                           size=20,
                           va='top') + pltn.scale_fill_manual(
                               values=config.colorblind_friendly_plot_colours))

        if ylimit:
            # Set y limit of figure (used to make it the same for every barchart)
            figure += pltn.ylim(None, ylimit)
            thisroi += '_same_ylim'

        returned_ylim = 0
        if config.use_same_axis_limits in ('Same limits',
                                           'Create both') and ylimit == 0:
            returned_ylim = find_ylim_function(thisroi, figure, 'yaxis')

        if config.use_same_axis_limits == 'Same limits' and ylimit == 0:
            return returned_ylim
        elif ylimit != 0:
            folder = 'Same_yaxis'
        else:
            folder = 'Different_yaxis'

        save_function(figure, thisroi, config, folder, 'barchart')

        return returned_ylim
Exemple #27
0
                                  colour=ez_colors(1)[0],
                                  na_rm=False)
        else:
            g += p9.geom_crossbar(p9.aes(x="x",
                                         y='center',
                                         ymin='low',
                                         ymax='high',
                                         group="factor(group_x)",
                                         colour="factor(group)",
                                         fill="factor(group)"),
                                  position=p9.position_dodge(
                                      0.7, preserve='single'),
                                  na_rm=True,
                                  alpha=0.2)

            g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group')))
            g += p9.scale_colour_manual(values=ez_colors(g.n_groups('group')))

    elif geom == 'ribbon':

        g = EZPlot(gdata.dropna())

        # set groups
        if group is None:
            g += p9.geom_ribbon(p9.aes(x="x",
                                       y='center',
                                       ymin='low',
                                       ymax='high'),
                                fill=ez_colors(1)[0],
                                alpha=0.2,
                                na_rm=False)
# In[9]:


dataset = "dmso_treated"

umap_resistant_type_gg = (
    gg.ggplot(embedding_df, gg.aes(x="x", y="y"))
    + gg.geom_point(
        gg.aes(fill="Metadata_clone_type", shape="Metadata_batch", size="Metadata_cell_count"),
        color='black', alpha=0.6)
    + gg.theme_bw()
    + gg.xlab("UMAP (X)")
    + gg.ylab("UMAP (Y)")
    + gg.ggtitle("DMSO treated samples")
    + gg.scale_shape_manual(name="Batch", values=[".", "+", "x"])
    + gg.scale_fill_manual(name="Clone type", values=["#1F8AA5", "#E98831"])
    + gg.scale_size_continuous(name="Cell count")
    + gg.theme(
        strip_text=gg.element_text(size=6, color="black"),
        strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
    )
)

file = os.path.join("figures", "umap", f"{dataset}_umap_resistant_type")

for extension in save_file_extensions:
    umap_resistant_type_gg.save(filename='{}{}'.format(file, extension), height=3, width=3.5, dpi=400)

umap_resistant_type_gg

    ),
)

if check_if_write(cell_count_output_file, force, throw_warning=True):
    cell_count_df.to_csv(cell_count_output_file, sep="\t", index=False)

# Graph: Cell count with all wells in same graph
cell_count_gg = (
    gg.ggplot(cell_count_df, gg.aes(x="site", y="cell_count"))
    + gg.geom_bar(gg.aes(fill="Cell_Quality"), stat="identity")
    + gg.theme_bw()
    + gg.theme(axis_text_x=gg.element_text(rotation=90, size=5))
    + gg.xlab("Sites")
    + gg.ylab("Cell Count")
    + gg.scale_fill_manual(
        name="Cell Quality", labels=cell_category_list, values=cell_category_colors
    )
)

os.makedirs(output_figuresdir, exist_ok=True)
output_file = pathlib.Path(
    output_figuresdir, "all_cellpainting_cellquality_across_sites.png"
)
if check_if_write(output_file, force, throw_warning=True):
    cell_count_gg.save(output_file, dpi=300, width=10, height=7, verbose=False)

# Same graph as above, separated by well.
cell_count_gg_parsed = (
    gg.ggplot(cell_count_df, gg.aes(x="site", y="cell_count"))
    + gg.geom_bar(gg.aes(fill="Cell_Quality"), stat="identity")
    + gg.theme_bw()
Exemple #30
0
def line_plot(df,
              x,
              y,
              group=None,
              facet_x=None,
              facet_y=None,
              aggfun='sum',
              err=None,
              show_points=False,
              base_size=10,
              figure_size=(6, 3)):
    '''
  Aggregates data in df and plots multiple columns as a line chart.

  Parameters
  ----------
  df : pd.DataFrame
    input dataframe
  x : str
    quoted expression to be plotted on the x axis
  y : str or list of str
    quoted expression(s) to be plotted on the y axis
  group : str
    quoted expression to be used as group (ie color)
  facet_x : str
    quoted expression to be used as facet
  facet_y : str
    quoted expression to be used as facet
  aggfun : str or fun
    function to be used for aggregating (eg sum, mean, median ...)
  err : str
     quoted expression to be used as error shaded area
  show_points : bool
    show/hide markers
  base_size : int
    base size for theme_ez
  figure_size :tuple of int
    figure size

  Returns
  -------
  g : EZPlot
    EZplot object

  '''

    if group is not None and isinstance(y, list) and len(y) > 1:
        log.error(
            "groups can be specified only when a single y column is present")
        raise ValueError(
            "groups can be specified only when a single y column is present")

    if err is not None and isinstance(y, list) and len(y) > 1:
        log.error(
            "err can be specified only when a single y column is present")
        raise ValueError(
            "err can be specified only when a single y column is present")

    if isinstance(y, list) and len(y) == 1:
        y = y[0]

    # create a copy of the data
    dataframe = df.copy()

    # define groups and variables; remove and store (eventual) names
    names = {}
    groups = {}
    variables = {}

    for label, var in zip(['x', 'group', 'facet_x', 'facet_y'],
                          [x, group, facet_x, facet_y]):
        names[label], groups[label] = unname(var)

    # fix special cases
    if x == '.index':
        groups['x'] = '.index'
        names[
            'x'] = dataframe.index.name if dataframe.index.name is not None else ''

    if isinstance(y, list):

        ys = []
        for i, var in enumerate(y):
            ys.append('y_{}'.format(i))
            names['y_{}'.format(i)], variables['y_{}'.format(i)] = unname(var)

        # aggregate data
        tmp_gdata = agg_data(dataframe,
                             variables,
                             groups,
                             aggfun,
                             fill_groups=True)
        groups_present = [
            c for c in ['x', 'facet_x', 'facet_y'] if c in tmp_gdata.columns
        ]
        gdata = pd.melt(tmp_gdata,
                        groups_present,
                        var_name='group',
                        value_name='y')
        gdata['group'] = gdata['group'].replace(
            {var: names[var]
             for var in ys})

        # update values for plotting
        names['y'] = 'Value'
        names['group'] = 'Variable'
        group = 'Variable'

    else:

        names['y'], variables['y'] = unname(y)
        if err is not None:
            names['err'], variables['err'] = unname(err)

        # aggregate data
        gdata = agg_data(dataframe,
                         variables,
                         groups,
                         aggfun,
                         fill_groups=True)

    # reorder columns
    gdata = gdata[[
        c for c in ['x', 'y', 'err', 'group', 'facet_x', 'facet_y']
        if c in gdata.columns
    ]]
    if err is not None:
        gdata['ymax'] = gdata['y'] + gdata['err']
        gdata['ymin'] = gdata['y'] - gdata['err']

    # init plot obj
    g = EZPlot(gdata)

    # set groups
    if group is None:
        g += p9.geom_line(p9.aes(x="x", y="y"),
                          group=1,
                          colour=ez_colors(1)[0])
        if show_points:
            g += p9.geom_point(p9.aes(x="x", y="y"),
                               group=1,
                               colour=ez_colors(1)[0])
        if err is not None:
            g += p9.geom_ribbon(p9.aes(x="x", ymax="ymax", ymin="ymin"),
                                group=1,
                                fill=ez_colors(1)[0],
                                alpha=0.2)
    else:
        g += p9.geom_line(
            p9.aes(x="x", y="y", group="factor(group)",
                   colour="factor(group)"))
        if show_points:
            g += p9.geom_point(p9.aes(x="x", y="y", colour="factor(group)"))
        if err is not None:
            g += p9.geom_ribbon(p9.aes(x="x",
                                       ymax="ymax",
                                       ymin="ymin",
                                       fill="factor(group)"),
                                alpha=0.2)
        g += p9.scale_color_manual(values=ez_colors(g.n_groups('group')))
        g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group')))

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_timestamp('x'):
        g += p9.scale_x_datetime()
    elif g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

    # set y scale
    g += p9.scale_y_continuous(labels=ez_labels)

    # set axis labels
    g += \
      p9.xlab(names['x']) + \
      p9.ylab(names['y'])

    # set theme
    g += theme_ez(figure_size=figure_size,
                  base_size=base_size,
                  legend_title=p9.element_text(text=names['group'],
                                               size=base_size))

    return g
Exemple #31
0
def area_plot(df,
              x,
              y,
              group=None,
              facet_x=None,
              facet_y=None,
              aggfun='sum',
              fill=False,
              sort_groups=True,
              base_size=10,
              figure_size=(6, 3)):
    '''
    Aggregates data in df and plots as a stacked area chart.

    Parameters
    ----------
    df : pd.DataFrame
      input dataframe
    x : str
      quoted expression to be plotted on the x axis
    y : str
      quoted expression to be plotted on the y axis
    group : str
      quoted expression to be used as group (ie color)
    facet_x : str
      quoted expression to be used as facet
    facet_y : str
      quoted expression to be used as facet
    aggfun : str or fun
      function to be used for aggregating (eg sum, mean, median ...)
    fill : bool
      plot shares for each group instead of absolute values
    sort_groups : bool
      sort groups by the sum of their value (otherwise alphabetical order is used)
    base_size : int
      base size for theme_ez
    figure_size :tuple of int
      figure size

    Returns
    -------
    g : EZPlot
      EZplot object

    '''

    # create a copy of the data
    dataframe = df.copy()

    # define groups and variables; remove and store (eventual) names
    names = {}
    groups = {}
    variables = {}

    for label, var in zip(['x', 'group', 'facet_x', 'facet_y'],
                          [x, group, facet_x, facet_y]):
        names[label], groups[label] = unname(var)
    names['y'], variables['y'] = unname(y)

    # fix special cases
    if x == '.index':
        groups['x'] = '.index'
        names[
            'x'] = dataframe.index.name if dataframe.index.name is not None else ''

    # aggregate data and reorder columns
    gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True)
    gdata['y'].fillna(0, inplace=True)
    gdata = gdata[[
        c for c in ['x', 'y', 'group', 'facet_x', 'facet_y']
        if c in gdata.columns
    ]]

    if fill:
        groups_to_normalize = [
            c for c in ['x', 'facet_x', 'facet_y'] if c in gdata.columns
        ]
        total_values = gdata \
            .groupby(groups_to_normalize)['y'] \
            .sum() \
            .reset_index() \
            .rename(columns = {'y':'tot_y'})
        gdata = pd.merge(gdata, total_values, on=groups_to_normalize)
        gdata['y'] = gdata['y'] / (gdata['tot_y'] + EPSILON)
        gdata.drop('tot_y', axis=1, inplace=True)
        ylabeller = percent_labels
    else:
        ylabeller = ez_labels

    # get plot object
    g = EZPlot(gdata)

    # determine order and create a categorical type
    if sort_groups:
        sort_data_groups(g)

    # get colors
    colors = np.flip(ez_colors(g.n_groups('group')))

    # set groups
    if group is None:
        g += p9.geom_area(p9.aes(x="x", y="y"),
                          colour=None,
                          fill=ez_colors(1)[0],
                          na_rm=True)
    else:
        g += p9.geom_area(p9.aes(x="x",
                                 y="y",
                                 group="factor(group)",
                                 fill="factor(group)"),
                          colour=None,
                          na_rm=True)
        g += p9.scale_fill_manual(values=colors)

    # set facets
    if facet_x is not None and facet_y is None:
        g += p9.facet_wrap('~facet_x')
    if facet_x is not None and facet_y is not None:
        g += p9.facet_grid('facet_y~facet_x')

    # set x scale
    if g.column_is_timestamp('x'):
        g += p9.scale_x_datetime()
    elif g.column_is_categorical('x'):
        g += p9.scale_x_discrete()
    else:
        g += p9.scale_x_continuous(labels=ez_labels)

    # set y scale
    g += p9.scale_y_continuous(labels=ylabeller,
                               expand=[0, 0, 0.1 * (not fill) + 0.03, 0])

    # set axis labels
    g += \
        p9.xlab(names['x']) + \
        p9.ylab(names['y'])

    # set theme
    g += theme_ez(figure_size=figure_size,
                  base_size=base_size,
                  legend_title=p9.element_text(text=names['group'],
                                               size=base_size))

    if sort_groups:
        g += p9.guides(fill=p9.guide_legend(reverse=True),
                       color=p9.guide_legend(reverse=True))

    return g
g = (
    p9.ggplot(publish_rate_df.rename(index=str, columns={"label": "Label"})) +
    p9.aes(
        x="pub_month",
        y="rate",
        fill="Label",
        group="Label",
        color="Label",
        linetype="Label",
        shape="Label",
    ) + p9.geom_point(size=2) + p9.geom_line() +
    p9.scale_linetype_manual(["solid", "solid", "solid"]) +
    p9.scale_color_manual(
        [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) +
    p9.scale_fill_manual(
        [color_mapper["2020"], color_mapper["2020ML"], color_mapper["2018"]]) +
    p9.scale_shape_manual(["o", "o", "o"])
    # plot the x axis titles
    + p9.geom_vline(xintercept=[2.5, 14.5, 26.5, 38.5, 50.5, 62.5, 74.5]) +
    p9.geom_text(label="2014", x=8.5, y=0, color="black", size=13) +
    p9.geom_text(label="2015", x=20.5, y=0, color="black", size=13) +
    p9.geom_text(label="2016", x=32.5, y=0, color="black", size=13) +
    p9.geom_text(label="2017", x=44.5, y=0, color="black", size=13) +
    p9.geom_text(label="2018", x=56.5, y=0, color="black", size=13) +
    p9.geom_text(label="2019", x=68.5, y=0, color="black", size=13)
    # Plot the overall proportion published
    + p9.geom_hline(
        yintercept=0.4196, linetype="solid", color=color_mapper["2018"]) +
    p9.geom_hline(yintercept=published / posted,
                  linetype="solid",
                  color=color_mapper["2020ML"]) +