Beispiel #1
0
def test_errorbarh_aesthetics():
    p = (ggplot(df, aes(xmin='ymin', xmax='ymax')) +
         geom_errorbarh(aes(y='x'), size=2) +
         geom_errorbarh(aes(y='x+1', alpha='z'), height=0.2, size=2) +
         geom_errorbarh(aes(y='x+2', linetype='factor(z)'), size=2) +
         geom_errorbarh(aes(y='x+3', color='factor(z)'), size=2) +
         geom_errorbarh(aes(y='x+4', size='z')))

    assert p + _theme == 'errorbarh_aesthetics'
def test_errorbarh_aesthetics():
    p = (ggplot(df, aes(xmin='ymin', xmax='ymax')) +
         geom_errorbarh(aes(y='x'), size=2) +
         geom_errorbarh(aes(y='x+1', alpha='z'), height=0.2, size=2) +
         geom_errorbarh(aes(y='x+2', linetype='factor(z)'), size=2) +
         geom_errorbarh(aes(y='x+3', color='factor(z)'), size=2) +
         geom_errorbarh(aes(y='x+4', size='z'))
         )

    assert p + _theme == 'errorbarh_aesthetics'
Beispiel #3
0
    def scatterplot(cls, df):
        Utils.check_and_make_dir("Figures/Scatterplots")
        df = df[(df['index'] != 'Overall') &
                (df['index'] != 'No ROI')]  # Remove No ROI and Overall rows

        df = df.groupby([config.table_cols, config.table_rows]).apply(
            lambda x: x.sort_values(['Mean']))  # Group by parameters and sort
        df = df.reset_index(drop=True)  # Reset index to remove grouping

        scatterplots = ['roi_ordered', 'stat_ordered']
        if config.table_row_order == 'roi':
            scatterplots.remove('stat')
        elif config.table_row_order == 'statorder':
            scatterplots.remove('roi_ordered')

        for scatterplot in scatterplots:
            if config.verbose:
                print(f"Saving {scatterplot} scatterplot!")

            if scatterplot == 'roi_ordered':
                roi_ord = pd.Categorical(df['index'],
                                         categories=df['index'].unique()
                                         )  # Order rows based on first facet
            else:
                roi_ord = pd.Categorical(
                    df.groupby(['MB', 'SENSE'
                                ]).cumcount())  # Order each facet individually

            figure_table = (
                pltn.ggplot(df, pltn.aes(x="Mean", y=roi_ord)) +
                pltn.geom_point(na_rm=True, size=1) + pltn.geom_errorbarh(
                    pltn.aes(xmin="Mean-Conf_Int_95", xmax="Mean+Conf_Int_95"),
                    na_rm=True,
                    height=None) + pltn.xlim(0, None) +
                pltn.scale_y_discrete(labels=[]) +
                pltn.ylab(config.table_y_label) +
                pltn.xlab(config.table_x_label) +
                pltn.facet_grid('{rows}~{cols}'.format(rows=config.table_rows,
                                                       cols=config.table_cols),
                                drop=True,
                                labeller="label_both") +
                pltn.theme_538()  # Set theme
                + pltn.theme(
                    panel_grid_major_y=pltn.themes.element_line(alpha=0),
                    panel_grid_major_x=pltn.themes.element_line(alpha=1),
                    panel_background=pltn.element_rect(fill="gray", alpha=0.1),
                    dpi=config.plot_dpi))

            figure_table.save(
                f"Figures/Scatterplots/{scatterplot}_scatterplot.png",
                height=config.plot_scale,
                width=config.plot_scale * 3,
                verbose=False,
                limitsize=False)
Beispiel #4
0
def add_mirna_g(g,df, str_name,str_start,str_end,dis_pos,l_s,l_e,l_score=[]):
    # print(str_name,str_start,str_end,dis_pos,l_s,l_e)
    df[str_start]= pd.Series(l_s)
    df[str_end] = pd.Series(l_e)

    g+= pt.annotate("text", x=0,y=dis_pos,label=str_name)
    g+= pt.geom_errorbarh(df,pt.aes(xmin=str_start,y=(dis_pos),xmax=str_end,color='mi_name'))
    g+= pt.geom_segment(df,pt.aes(x=str_start,y=(dis_pos),yend=0,xend=str_start,color='mi_name'))
    if(l_score):
        # print(l_score)
        # pd.options.display.float_format = '{:.1f}'.format
        score_column_name = 'score'+str_name
        # print(l_score,score_column_name,str_start,dis_pos)
        df[score_column_name] = pd.Series(l_score,dtype=np.float).map('{:.0f}'.format)
        
        g+= pt.geom_text(df, pt.aes(x=str_start,y=dis_pos,label=score_column_name,color='mi_name'),
                          nudge_x=0.1, nudge_y=0.1)#,adjust_text=adjust_text_dict)
def plot_pointgraph(
        plot_df,
        x_axis_label,
        left_arrow_label,
        right_arrow_label,
        left_arrow_start=-0.5,
        left_arrow_height=38.5,
        right_arrow_start=0.5,
        right_arrow_height=1.5,
        arrow_length=2,
        left_arrow_label_x=-1.5,
        left_arrow_label_y=-1.5,
        right_arrow_label_x=-1.5,
        right_arrow_label_y=-1.5,
        limits=(-3, 3),
):
    """
    This function is designed to plot the an errorbar graph to show each token's odd ratio.
    The main idea for this graph is to show which corpora a token is enriched
    Args:
        plot_df - the data frame to plot,
        x_axis_label - the label of the x axis,
        left_arrow_label - the label for the left arrow,
        right_arrow_label - the label for the right arrow,
        left_arrow_start - the start of the left arrow to be plotted
        left_arrow_height - the height at which the arrow needs to be plotted
        right_arrow_start -  the start of the right arrow to be plotted
        right_arrow_height - - the height at which the arrow needs to be plotted
        arrow_length - the length of the arrow
        left_arrow_label_x - the x axis position for the label of the left arrow
        left_arrow_label_y - the y axis position for the label of the left arrow
        right_arrow_label_x - the x axis position for the label of the right arrow
        right_arrow_label_y - the y axis position for the label of the right arrow
        limits=(-3,3)
    """

    graph = (p9.ggplot(
        plot_df.assign(lemma=lambda x: pd.Categorical(x.lemma.tolist())),
        p9.aes(
            y="lemma",
            xmin="lower_odds",
            x="odds_ratio",
            xmax="upper_odds",
            yend="lemma",
        ),
    ) + p9.geom_errorbarh(color="#253494") + p9.scale_y_discrete(limits=(
        plot_df.sort_values("odds_ratio", ascending=True).lemma.tolist())) +
             p9.scale_x_continuous(limits=limits) +
             p9.geom_vline(p9.aes(xintercept=0), linetype="--", color="grey") +
             p9.annotate(
                 "segment",
                 x=left_arrow_start,
                 xend=left_arrow_start - arrow_length,
                 y=left_arrow_height,
                 yend=left_arrow_height,
                 colour="black",
                 size=0.5,
                 alpha=1,
                 arrow=p9.arrow(length=0.1),
             ) + p9.annotate(
                 "text",
                 label=left_arrow_label,
                 x=left_arrow_label_x,
                 y=left_arrow_label_y,
                 size=12,
                 alpha=0.7,
             ) + p9.annotate(
                 "segment",
                 x=right_arrow_start,
                 xend=right_arrow_start + arrow_length,
                 y=right_arrow_height,
                 yend=right_arrow_height,
                 colour="black",
                 size=0.5,
                 alpha=1,
                 arrow=p9.arrow(length=0.1),
             ) + p9.annotate(
                 "text",
                 label=right_arrow_label,
                 x=right_arrow_label_x,
                 y=right_arrow_label_y,
                 size=12,
                 alpha=0.7,
             ) + p9.theme_seaborn(
                 context="paper",
                 style="ticks", font_scale=1, font="Arial") + p9.theme(
                     figure_size=(11, 8.5),
                     panel_grid_minor=p9.element_blank(),
                     text=p9.element_text(size=12),
                 ) + p9.labs(y=None, x=x_axis_label))

    return graph
                odds_ratio=lambda x: x.odds_ratio.apply(lambda x: np.log2(x)),
                lower_odds=lambda x: x.lower_odds.apply(lambda x: np.log2(x)),
                upper_odds=lambda x: x.upper_odds.apply(lambda x: np.log2(x)),
            ))
plot_df.head()

g = (p9.ggplot(
    plot_df.assign(lemma=lambda x: pd.Categorical(x.lemma.tolist())),
    p9.aes(
        y="lemma",
        xmin="lower_odds",
        x="odds_ratio",
        xmax="upper_odds",
        yend="lemma",
    ),
) + p9.geom_errorbarh(color="#253494") + p9.scale_y_discrete(limits=(
    plot_df.sort_values("odds_ratio", ascending=True).lemma.tolist())) +
     p9.scale_x_continuous(limits=(-3, 3)) +
     p9.geom_vline(p9.aes(xintercept=0), linetype="--", color="grey") +
     p9.annotate(
         "segment",
         x=0.5,
         xend=2.5,
         y=1.5,
         yend=1.5,
         colour="black",
         size=0.5,
         alpha=1,
         arrow=p9.arrow(length=0.1),
     ) + p9.annotate(
         "text", label="bioRxiv Enriched", x=1.5, y=2.5, size=18, alpha=0.7) +