コード例 #1
0
def visualize_embedding(multinet, labels=None, verbose=True):
    embedding = multinet.embedding
    X = embedding[0]
    indices = embedding[1]

    if verbose:
        print("------ Starting embedding visualization -------")

    if labels:
        # optionally match indices to labels and add a column
        label_vector = [labels[x] for x in indices]
        X_embedded = TSNE(n_components=2).fit_transform(X)
        dfr = pd.DataFrame(X_embedded, columns=['dim1', 'dim2'])
        dfr['labels'] = label_vector
        print(dfr.head())
        gx = (ggplot(dfr, aes('dim1', 'dim2', color="labels")) +
              geom_point(size=0.5) + theme_bw())
        gx.draw()
        plt.show()
    else:
        X_embedded = TSNE(n_components=2).fit_transform(X)
        dfr = pd.DataFrame(X_embedded, columns=['dim1', 'dim2'])
        print(dfr.head())
        gx = (ggplot(dfr, aes('dim1', 'dim2')) + geom_point(size=0.5) +
              theme_bw())
        gx.draw()
        plt.show()
コード例 #2
0
def plot_ambient_by_difference(adata, plot_name='cellbender_results'):

    # Compute the total amount of expression of each gene
    adata.var['total_gene_counts_raw'] = np.array(
        adata.layers['counts_raw'].sum(axis=0)).squeeze()
    adata.var['total_gene_counts_cellbender'] = np.array(
        adata.layers['counts_cellbender'].sum(axis=0)).squeeze()

    adata.var['difference_total_gene_counts_raw_cellbender'] = adata.var[
        'total_gene_counts_raw'] - adata.var['total_gene_counts_cellbender']

    # Make the plot
    gplt = plt9.ggplot(adata.var)
    gplt = gplt + plt9.theme_bw()
    gplt = gplt + plt9.geom_point(plt9.aes(
        x='ambient_expression',
        y='difference_total_gene_counts_raw_cellbender'),
                                  alpha=0.25)
    gplt = gplt + plt9.labs(x='Ambient RNA signature',
                            y='Counts removed by cellbender',
                            title='Ambient RNA signature removal per gene')
    # gplt = gplt + plt9.scale_y_continuous(
    #     trans='log10',
    #     labels=comma_labels,
    #     minor_breaks=0
    # )
    gplt.save(
        '{}-ambient_signature-scatter.png'.format(plot_name),
        #dpi=300,
        width=5,
        height=5)

    # Add gene names to the plot
    gplt = plt9.ggplot(adata.var)
    gplt = gplt + plt9.theme_bw()
    gplt = gplt + plt9.geom_text(plt9.aes(
        x='ambient_expression',
        y='difference_total_gene_counts_raw_cellbender',
        label='gene_symbols'),
                                 alpha=0.25)
    gplt = gplt + plt9.labs(x='Ambient RNA signature',
                            y='Counts removed by cellbender',
                            title='Ambient RNA signature removal per gene')
    # gplt = gplt + plt9.scale_y_continuous(
    #     trans='log10',
    #     labels=comma_labels,
    #     minor_breaks=0
    # )
    gplt.save(
        '{}-ambient_signature-scatter_genenames.png'.format(plot_name),
        #dpi=300,
        width=5,
        height=5)
コード例 #3
0
def plot_mass(calculated_cell_mass, plot_every_nth_point):
    """ Plots the resulting mass

    Args:
        calculated_cell_mass (`pandas data frame`):  Pandas data frame [Nx3] with time and calculated cell mass and
                                                     rolling mean averaged cell mass
        plot_every_nth_point (`int`):                If 1 all data points are plotted. Otherwise every nth data point is
                                                     used for plotting.

    Returns:
        p (`ggplot object`):                         Returns a ggplot plot object

    """

    col_names = list(calculated_cell_mass)
    col_names[0] = 'Time (h)'
    calculated_cell_mass.columns = col_names
    calculated_cell_mass = calculated_cell_mass.iloc[::plot_every_nth_point, :]

    # Plot data
    p = ggplot(aes(x=col_names[0], y=col_names[1]), data=calculated_cell_mass) + \
        geom_point(alpha=0.1) + \
        geom_line(aes(y=col_names[2]), color='red') + \
        theme_bw()
    return p
コード例 #4
0
def plot_categ_spatial(mod,
                       adata,
                       sample_col,
                       color,
                       n_columns=2,
                       figure_size=(24, 5.7),
                       point_size=0.8,
                       text_size=9):

    for_plot = adata.obs[["imagecol", "imagerow", sample_col]]
    for_plot["color"] = color

    # fix types
    for_plot["color"] = pd.Categorical(for_plot["color"], ordered=True)
    # for_plot['color'] = pd.to_numeric(for_plot['color'])
    for_plot["sample"] = pd.Categorical(for_plot[sample_col], ordered=False)
    for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"])
    for_plot["imagerow"] = -pd.to_numeric(for_plot["imagerow"])

    ax = (
        plotnine.ggplot(
            for_plot, plotnine.aes(x="imagecol", y="imagerow", color="color"))
        + plotnine.geom_point(size=point_size)  # + plotnine.scale_color_cmap()
        + plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme(
            panel_background=plotnine.element_rect(
                fill="black", colour="black", size=0, linetype="solid"),
            panel_grid_major=plotnine.element_line(
                size=0, linetype="solid", colour="black"),
            panel_grid_minor=plotnine.element_line(
                size=0, linetype="solid", colour="black"),
            strip_text=plotnine.element_text(size=text_size),
        ) + plotnine.facet_wrap("~sample", ncol=n_columns) +
        plotnine.theme(figure_size=figure_size))

    return ax
コード例 #5
0
def accPlot(accsByNFeats):
    plotdata = []
    for s in accsByNFeats:
        plotdata.append(
            pd.concat([
                pd.DataFrame({
                    "p": p,
                    "acc": accsByNFeats[s][p],
                    "set": s
                },
                             index=[str(p)]) for p in accsByNFeats[s]
            ],
                      axis=0))
    ggd = pd.concat(plotdata)
    ggd['acc'] = ggd['acc'].astype(float)
    ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set'))
    ggo += gg.geom_line(alpha=0.5)
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000])
    ggo += gg.scale_color_manual(
        values=['darkgray', 'black', 'red', 'dodgerblue'])
    ggo += gg.ylab('Accuracy (5-fold CV)')
    print(ggo)
    return ggd
コード例 #6
0
ファイル: plotting.py プロジェクト: yumbohorquez/bsuite
def facet_sweep_plot(base_plot: gg.ggplot,
                     sweep_vars: Sequence[str] = None,
                     tall_plot: bool = False) -> gg.ggplot:
  """Add a facet_wrap to the plot based on sweep_vars."""
  df = base_plot.data.copy()

  if sweep_vars:
    # Work out what size the plot should be based on the hypers + add facet.
    n_hypers = df[sweep_vars].drop_duplicates().shape[0]
    base_plot += gg.facet_wrap(sweep_vars, labeller='label_both')
  else:
    n_hypers = 1

  if n_hypers == 1:
    fig_size = (7, 5)
  elif n_hypers == 2:
    fig_size = (13, 5)
  elif n_hypers == 4:
    fig_size = (13, 8)
  elif n_hypers <= 12:
    fig_size = (15, 4 * np.divide(n_hypers, 3) + 1)
  else:
    print('WARNING - comparing {} agents at once is more than recommended.'
          .format(n_hypers))
    fig_size = (15, 12)

  if tall_plot:
    fig_size = (fig_size[0], fig_size[1] * 1.25)

  theme_settings = gg.theme_bw(base_size=18, base_family='serif')
  theme_settings += gg.theme(
      figure_size=fig_size, panel_spacing_x=0.5, panel_spacing_y=0.5,)

  return base_plot + theme_settings
コード例 #7
0
def plot_predict(forecast):
    p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) +
         geom_point(colour='blue', alpha=0.3, na_rm=True) +
         geom_line(colour='blue', na_rm=True) + geom_line(
             data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') +
         geom_ribbon(data=forecast,
                     mapping=aes(ymin='yhat_lower', ymax='yhat_upper'),
                     fill='blue',
                     alpha=0.1) +
         scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') +
         xlab('Time') + ylab('Pressure') + theme_bw() +
         theme(axis_text_x=element_text(
             angle=45, hjust=1, face='bold', color='black'),
               axis_text_y=element_text(face='bold', colour='black')))

    ggplot.save(p,
                filename='predict_pressure_chart.png',
                path=os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                  'png'),
                width=8,
                height=6,
                units='in',
                dpi=326,
                verbose=False)
    return p
コード例 #8
0
ファイル: plots.py プロジェクト: mappin/asxtrade
def make_sentiment_plot(sentiment_df, exclude_zero_bin=True, plot_text_labels=True):
    rows = []
    print(
        "Sentiment plot: exclude zero bins? {} show text? {}".format(
            exclude_zero_bin, plot_text_labels
        )
    )

    for column in filter(lambda c: c.startswith("bin_"), sentiment_df.columns):
        c = Counter(sentiment_df[column])
        date = column[4:]
        for bin_name, val in c.items():
            if exclude_zero_bin and (bin_name == "0.0" or not isinstance(bin_name, str)):
                continue
            bin_name = str(bin_name)
            assert isinstance(bin_name, str)
            val = int(val)
            rows.append(
                {
                    "date": datetime.strptime(date, "%Y-%m-%d"),
                    "bin": bin_name,
                    "value": val,
                }
            )

    df = pd.DataFrame.from_records(rows)
    # print(df['bin'].unique())
    # HACK TODO FIXME: should get from price_change_bins()...
    order = [
        "-1000.0",
        "-100.0",
        "-10.0",
        "-5.0",
        "-3.0",
        "-2.0",
        "-1.0",
        "-1e-06",
        "1e-06",
        "1.0",
        "2.0",
        "3.0",
        "5.0",
        "10.0",
        "25.0",
        "100.0",
        "1000.0",
    ]
    df["bin_ordered"] = pd.Categorical(df["bin"], categories=order)

    plot = (
        p9.ggplot(df, p9.aes("date", "bin_ordered", fill="value"))
        + p9.geom_tile(show_legend=False)
        + p9.theme_bw()
        + p9.xlab("")
        + p9.ylab("Percentage daily change")
        + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(10, 5))
    )
    if plot_text_labels:
        plot = plot + p9.geom_text(p9.aes(label="value"), size=8, color="white")
    return plot_as_inline_html_data(plot)
コード例 #9
0
def plot_replicate_density(
    df,
    batch,
    plate,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info"))
        + gg.geom_density(alpha=0.3) + gg.scale_fill_manual(
            name="Replicate",
            labels={
                "True": "True",
                "False": "False"
            },
            values=["#B99638", "#2DB898"],
        ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") +
        gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme(
            title=gg.element_text(size=9),
            axis_text=gg.element_text(size=5),
            axis_title=gg.element_text(size=8),
            legend_text=gg.element_text(size=6),
            legend_title=gg.element_text(size=7),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        ))

    if output_file_base:
        save_figure(density_gg, output_file_base, output_file_extensions, dpi,
                    height, width)

    return density_gg
コード例 #10
0
def plot_umap_well(embedding_df, fig_file, well_column):
    well_gg = (gg.ggplot(embedding_df, gg.aes(x="x", y="y")) + gg.geom_point(
        gg.aes(color=well_column), size=0.2, shape=".", alpha=0.2) +
               gg.theme_bw())

    well_gg.save(filename=fig_file, height=4, width=5, dpi=500)
    return well_gg
コード例 #11
0
class THEME():
    bgcolor = "#293241"
    LOADER_COLOR = "#2a9d8f"
    LOADER_TYPE = "dot"

    colors_light = [
        "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f",
        "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e"
    ]
    colors_dark = [
        "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a"
    ]
    # mt = theme(panel_background=element_rect(fill=bgcolor)
    #            ,plot_background=element_rect(fill=bgcolor)
    #            , axis_text_x = element_text(color="black")
    #            , axis_text_y = element_text(color="black")
    #            , strip_margin_y=0.05
    #            , strip_margin_x=0.5)

    mt = theme_bw() + theme(panel_border=element_blank())

    cat_colors = scale_fill_manual(values=colors_light)
    cat_colors_lines = scale_color_manual(values=colors_light)
    gradient_colors = scale_fill_gradient("#ce4257", "#aad576")
    FILL = 1
    COLOR = 2

    LONG_FIGURE = (10, 20)
コード例 #12
0
def plot_significance_vs_ranking(
    summary_df, method_name, x_label, output_figure_filename
):
    # Format input dataframe
    plot_df = pd.DataFrame(
        data={
            "Test statistic": summary_df[
                method_stats_dict[method_name] + " (Real)"
            ].values,
            "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values,
        },
        index=summary_df.index,
    )

    fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank"))
    fig += pn.geom_point()
    fig += pn.geom_point(
        plot_df[plot_df["Percentile rank"] > 0.9],
        pn.aes(x="Test statistic", y="Percentile rank"),
        color="red",
    )
    fig += pn.geom_text(
        pn.aes(
            label=[
                x if plot_df.loc[x, "Percentile rank"] > 0.9 else ""
                for x in plot_df.index
            ]
        ),
        ha="left",
        va="top",
        size=5,
    )
    fig += pn.labs(
        x=x_label,
        y="Percentile of ranking",
        title=f"{method_name} pathway statistics vs ranking",
    )
    fig += pn.theme_bw()
    fig += pn.theme(
        legend_title_align="center",
        plot_background=pn.element_rect(fill="white"),
        legend_key=pn.element_rect(fill="white", colour="white"),
        legend_title=pn.element_text(family="sans-serif", size=15),
        legend_text=pn.element_text(family="sans-serif", size=12),
        plot_title=pn.element_text(family="sans-serif", size=15),
        axis_text=pn.element_text(family="sans-serif", size=12),
        axis_title=pn.element_text(family="sans-serif", size=15),
    )

    print(fig)

    # Save figure
    fig.save(
        output_figure_filename,
        format="svg",
        bbox_inches="tight",
        transparent=True,
        pad_inches=0,
        dpi=300,
    )
コード例 #13
0
def plot_auc(read_file_1, read_file_2, plot_dir, save_file, generate_auc):
    # read in data
    temp_sub = pd.read_csv(os.path.join(dir_output, read_file_1))
    temp_agg = pd.read_csv(os.path.join(dir_output, read_file_2))

    #subset agg model to match sub models
    temp_agg = subset_agg(temp_sub=temp_sub, temp_agg=temp_agg)

    # recode outcome
    temp_agg = recode_outcome(temp_dat=temp_agg)
    temp_sub = recode_outcome(temp_dat=temp_sub)

    if generate_auc:
        # get auc
        temp_sub = get_auc(temp_sub)
        temp_agg = get_auc(temp_agg)

    # remove NA
    temp_sub = temp_sub.dropna().reset_index(drop=True)
    temp_agg = temp_agg.dropna().reset_index(drop=True)

    # create new variable to indicate if agg or sub data
    temp_sub.insert(0, 'model', 'CPT specific')
    temp_agg.insert(0, 'model', 'Aggregate')

    # get outpult file
    plot_output = os.path.join(dir_figures, plot_dir)
    # combine data
    dat = pd.concat([temp_agg, temp_sub], axis=0).reset_index(drop=True)
    img = (ggplot(dat, aes(x='outcome', y='auc', fill='model')) +
           geom_violin(aes(draw_quantiles='auc')) +
           labs(x='Outcome', y='AUROC') + theme_bw())
    img.save(os.path.join(plot_output, save_file))
コード例 #14
0
def scatter_plot(df,
                 xcol,
                 ycol,
                 domain,
                 xname=None,
                 yname=None,
                 log=False,
                 width=6,
                 height=6,
                 clamp=True,
                 tickCount=5):
    assert len(domain) == 2

    POINT_SIZE = 0.5
    DASH_PATTERN = (0, (3, 1))

    if xname == None:
        xname = xcol
    if yname == None:
        yname = ycol

    # formater for axes' labels
    ax_formatter = mizani.custom_format('{:n}')

    if clamp:  # clamp overflowing values if required
        df = df.copy(deep=True)
        df.loc[df[xcol] > domain[1], xcol] = domain[1]
        df.loc[df[ycol] > domain[1], ycol] = domain[1]

    # generate scatter plot
    scatter = p9.ggplot(df)
    scatter += p9.aes(x=xcol, y=ycol)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True)
    scatter += p9.labs(x=xname, y=yname)

    if log:  # log scale
        scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter)
    else:
        scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter)

    #scatter += p9.theme_xkcd()
    scatter += p9.theme_bw()
    scatter += p9.theme(
        panel_grid_major=p9.element_line(color='#666666', alpha=0.5))
    scatter += p9.theme(figure_size=(width, height))

    # generate additional lines
    scatter += p9.geom_abline(intercept=0, slope=1,
                              linetype=DASH_PATTERN)  # diagonal
    scatter += p9.geom_vline(xintercept=domain[1],
                             linetype=DASH_PATTERN)  # vertical rule
    scatter += p9.geom_hline(yintercept=domain[1],
                             linetype=DASH_PATTERN)  # horizontal rule

    res = scatter

    return res
コード例 #15
0
def plot(solu, k):

    # Generates a plot of the four bar mechanism, which represents a frame in the animation

    print("Frame: ", k)

    sol = solu[k:k + 1]

    p = ( ggplot(sol) +
         # MAIN LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x=0, y=0), shape = 'o', size = 3) +
         geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) +
         # 2ND LINKAGE
         geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) +
         geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) +
         # AP LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) +
         geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) +
         # 3RD LINKAGE
         geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # 4TH LINKAGE
         geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) +
         geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) +
         # NODES IDENTIFICATION
         annotate("text", x = 0, y = -20, label = "$O_1$") +
         annotate("text", x = sol.Ro4[k].real, y = sol.Ro4[k].imag -20, label = "$O_4$") +
         annotate("text", x = sol.Ra[k].real+10, y = sol.Ra[k].imag, label = "$A$") +
         annotate("text", x = sol.Rba[k].real +20, y = sol.Rba[k].imag -10, label = "$B$") +
         annotate("text", x = sol.Rpa[k].real, y = sol.Rpa[k].imag -40, label = "$P$") +
         # ACCELERATIONS ARROWS (you may remove if you wish to remove acceleration informations)
         geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, \
                          xend = sol.Rba[k].real + sol.Aba[k].real * ACC_SCALE, \
                          yend = sol.Rba[k].imag + sol.Aba[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point B
        geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, \
                          xend = sol.Ra[k].real + sol.Aa[k].real * ACC_SCALE, \
                          yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point A
        geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \
                          xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \
                          yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\
                      colour='red', arrow=arrow()) + # Point C
         # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations)
         # inputting text between '$ $' makes plotnine produce beautiful LaTeX text
         annotate("text", x = sol.Rba[k].real-30, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') +
         annotate("text", x = sol.Ra[k].real+20, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') +
         annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag+20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') +
         # TIME IDENTIFICATION
         annotate("label", x = 120, y = -80, label = f'Time: ${sol.time[k]:.2f}~s$', alpha = 1) +
         #
         labs(x='$x~[mm]$', y='$y~[mm]$') +
         coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so.
         theme_bw() # Plot is prettier with this theme compared to the default.
         )

    return p
コード例 #16
0
ファイル: revigo.py プロジェクト: gokceneraslan/sctoolkit
def plot_revigo(
    rev,
    outline=2,
    expand_points=(1.05, 1.2),
    figure_size=(8, 8),
    font_size=8,
    point_size=3,
    point_alpha=0.7,
    palette='RdPu',
    dispensability_cutoff=1.,
    show_all_labels=False,
    text_column='name',
    term_size_limit=None,
):

    import plotnine as p9
    import matplotlib.patheffects as path_effects

    pe = [
        path_effects.Stroke(linewidth=2, foreground='white'),
        path_effects.Normal()
    ]
    if not show_all_labels:
        lbl_df = rev[(rev.eliminated == 0)
                     & (rev.dispensability < dispensability_cutoff)]
        if term_size_limit is not None:
            lbl_df = lbl_df[lbl_df.term_size < term_size_limit]
    else:
        lbl_df = rev

    g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) +
         p9.geom_point(p9.aes(fill='neglog10', size='frequency'),
                       color='black',
                       alpha=point_alpha) +
         p9.geom_text(p9.aes(label=text_column),
                      data=lbl_df,
                      size=font_size,
                      adjust_text={
                          'expand_points': expand_points,
                          'arrowprops': {
                              'arrowstyle': '-'
                          },
                          'x': rev.plot_X.values,
                          'y': rev.plot_Y.values
                      },
                      path_effects=pe) + p9.theme_bw() +
         p9.scale_fill_distiller(type='seq', palette=palette, direction=1) +
         p9.labs(x='Semantic similarity space',
                 y='',
                 fill='-log10(adj. p-value)',
                 size='Term frequency') +
         p9.scale_size_continuous(range=(2, 7), trans='log10') +
         p9.theme(figure_size=figure_size,
                  axis_text_x=p9.element_blank(),
                  axis_text_y=p9.element_blank(),
                  axis_ticks=p9.element_blank()))

    return g
コード例 #17
0
def plot_paired_ranking(
    method1_summary_df,
    method2_summary_df,
    method1_name,
    method2_name,
    output_figure_filename,
):
    # Join dataframes to make sure the rows are aligned
    merged_summary_df = method1_summary_df.merge(
        method2_summary_df,
        left_index=True,
        right_index=True,
        suffixes=[f"_{method1_name}", f"_{method2_name}"],
    )

    # Format input dataframe
    plot_df = pd.DataFrame(
        data={
            "Method1 ranking": merged_summary_df[
                f"Percentile (simulated)_{method1_name}"
            ].values,
            "Method2 ranking": merged_summary_df[
                f"Percentile (simulated)_{method2_name}"
            ].values,
        },
        index=merged_summary_df.index,
    )
    fig = pn.ggplot(plot_df, pn.aes(x="Method1 ranking", y="Method2 ranking"))
    fig += pn.geom_point()
    fig += pn.labs(
        x=f"{method1_name} pathway ranking",
        y=f"{method2_name} pathway ranking",
        title=f"{method1_name} vs {method2_name} pathway ranking",
    )
    fig += pn.theme_bw()
    fig += pn.theme(
        legend_title_align="center",
        plot_background=pn.element_rect(fill="white"),
        legend_key=pn.element_rect(fill="white", colour="white"),
        legend_title=pn.element_text(family="sans-serif", size=15),
        legend_text=pn.element_text(family="sans-serif", size=12),
        plot_title=pn.element_text(family="sans-serif", size=15),
        axis_text=pn.element_text(family="sans-serif", size=12),
        axis_title=pn.element_text(family="sans-serif", size=15),
    )

    # Save figure
    fig.save(
        output_figure_filename,
        format="svg",
        bbox_inches="tight",
        transparent=True,
        pad_inches=0,
        dpi=300,
    )
    print(fig)
コード例 #18
0
def plot_histogram(df_plot,
                   variable_column,
                   output_file='plot_distribution',
                   facet_column='none',
                   x_log10=False):
    """Plot plot_distribution to png.

    Parameters
    ----------
    df_plot : pandas.DataFrame
        DataFrame with <variable_column> as a column.
    variable_column : string
        String of variable_column column to plot.
    output_file : string
        Basename of output file.
    facet_column : string
        Column to facet the plot by.

    Returns
    -------
    NULL
    """
    df_plot['x'] = df_plot[variable_column]
    if x_log10:
        if np.any(df_plot['x'].values < 0):
            return 1
        elif np.any(df_plot['x'].values == 0):
            df_plot['x'] = np.log10(df_plot['x'].values + 1e-10)
            variable_column = variable_column + ' (log10)'
        else:
            df_plot['x'] = np.log10(df_plot['x'].values)
            variable_column = variable_column + ' (log10)'
    gplt = plt9.ggplot(df_plot, plt9.aes(x='x'))
    gplt = gplt + plt9.theme_bw()
    gplt = gplt + plt9.geom_histogram(alpha=0.8)
    gplt = gplt + plt9.scale_x_continuous(
        # trans='log10',
        # labels=comma_labels,
        minor_breaks=0)
    gplt = gplt + plt9.scale_y_continuous(
        # trans='log10',
        # labels=comma_labels,
        minor_breaks=0)
    gplt = gplt + plt9.labs(title='', x=variable_column)
    gplt = gplt + plt9.theme(axis_text_x=plt9.element_text(angle=-45, hjust=0))
    if facet_column != 'none':
        gplt = gplt + plt9.facet_wrap('~ {}'.format(facet_column), ncol=5)
        n_facets = df_plot[facet_column].nunique()
        gplt.save('{}.png'.format(output_file),
                  dpi=300,
                  width=6 * (n_facets / 4),
                  height=4 * (n_facets / 4),
                  limitsize=False)
    else:
        gplt.save('{}.png'.format(output_file), dpi=300, width=4, height=4)
    return 0
コード例 #19
0
def scatter_plot2(df1, df2, xcol, ycol, domain, color1='black', color2='red', xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5):
    assert len(domain) == 2

    POINT_SIZE = 1.5
    DASH_PATTERN = (0, (6, 2))

    if xname is None:
        xname = xcol
    if yname is None:
        yname = ycol

    # formatter for axes' labels
    ax_formatter = mizani.custom_format('{:n}')

    if clamp:  # clamp overflowing values if required
        df1 = df1.copy(deep=True)
        df1.loc[df1[xcol] > domain[1], xcol] = domain[1]
        df1.loc[df1[ycol] > domain[1], ycol] = domain[1]

        df2 = df2.copy(deep=True)
        df2.loc[df2[xcol] > domain[1], xcol] = domain[1]
        df2.loc[df2[ycol] > domain[1], ycol] = domain[1]

    # generate scatter plot
    scatter = p9.ggplot(df1)
    scatter += p9.aes(x=xcol, y=ycol)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, color=color1, alpha=0.5)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, data=df2, color=color2, alpha=0.5)
    scatter += p9.labs(x=xname, y=yname)

    # rug plots
    scatter += p9.geom_rug(na_rm=True, sides="tr", color=color1, alpha=0.05)
    scatter += p9.geom_rug(na_rm=True, sides="tr", data=df2, color=color2, alpha=0.05)

    if log:  # log scale
        scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter)
    else:
        scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter)

    # scatter += p9.theme_xkcd()
    scatter += p9.theme_bw()
    scatter += p9.theme(panel_grid_major=p9.element_line(color='#666666', alpha=0.5))
    scatter += p9.theme(panel_grid_minor=p9.element_blank())
    scatter += p9.theme(figure_size=(width, height))
    scatter += p9.theme(text=p9.element_text(size=24, color="black"))

    # generate additional lines
    scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN)  # diagonal
    scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN)  # vertical rule
    scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN)  # horizontal rule

    res = scatter

    return res
コード例 #20
0
ファイル: lsq.py プロジェクト: DesiPilla/power_ranker
def plot_save_rank(df_ranks, df_teams, year, week, show=False):
    """Plot the ranking iterations for each team

  :param df_ranks: data frame with team_id, and rankings for each iteration
  :param df_teams: data frame with team_id and owner info
  :param year: year for data
  :param week: current week
  :param show: flag to display the plot
  :return: final summarised rankings data frame with columns for team_id and ranks
  """
    # Plot each iteration
    df_ranks_lsq = pd.merge(df_teams[['team_id', 'firstName']],
                            df_ranks,
                            on='team_id')
    # Space out labels on x-axis according to final rankings
    df_ranks_lsq['label_x_pos'] = df_ranks_lsq.get(
        99).rank() * 100 / df_ranks_lsq.get(99).size
    # Convert to long format for plotting ease
    df_ranks_lsq_long = (df_ranks_lsq.rename({
        'ranks': '0'
    }, axis='columns').melt(id_vars=['team_id', 'firstName', 'label_x_pos']))
    # Convert iteration variable to int
    df_ranks_lsq_long.variable = df_ranks_lsq_long.variable.astype(int)
    # Make the plot
    p = (ggplot(aes(
        x='variable', y='value', color='factor(team_id)', group='team_id'),
                data=df_ranks_lsq_long) + geom_line() +
         geom_label(aes(label='firstName',
                        x='label_x_pos',
                        y='value',
                        color='factor(team_id)'),
                    data=df_ranks_lsq_long[df_ranks_lsq_long.variable == 99],
                    size=10) + labs(x='Iteration', y='LSQ rank') + theme_bw() +
         guides(color=False))
    # Save plot
    if show:
        p.draw()
    # make dir if it doesn't exist already
    out_dir = Path(f'output/{year}/week{week}')
    out_dir.mkdir(parents=True, exist_ok=True)
    out_name = out_dir / 'lsq_iter_rankings.png'
    # plotnine is throwing too many warnings
    warnings.filterwarnings('ignore')
    p.save(out_name, width=9, height=6, dpi=300)
    warnings.filterwarnings('default')
    logger.info(f'Saved LSQ rankings plot to local file: {out_name.resolve()}')
    # Average last 70 elements to get final rank
    df_final_ranks = (df_ranks_lsq_long.query('variable>70').groupby([
        'team_id'
    ])[['value'
        ]].agg(lambda x: np.tanh(np.mean(x) / 75.)).reset_index().rename(
            {'value': 'lsq'}, axis=1))
    # Normalize by max score
    df_final_ranks['lsq'] = df_final_ranks.get('lsq') / df_final_ranks.get(
        'lsq').max()
    return df_final_ranks
コード例 #21
0
ファイル: utils.py プロジェクト: DesiPilla/power_ranker
def calc_tiers(df_ranks, year, week, bw=0.09, order=4, show=False):
    """Calculate 3-5 tiers using Gaussian Kernel Density Estimation

  :param df_ranks: data frame with power rankings for each team
  :param year: current year
  :param week: current week
  :param bw: bandwidth for KDE
  :param order: order parameter for KDE
  :param show: flag to show plot
  :return: None
  """
    logger.info('Calculating tiers for power rankings')
    # Estimate the kernel using power rankings
    kde = gaussian_kde(df_ranks.get('power'), bw_method=bw)
    # Create grid of points for plot
    x_grid = np.linspace(
        df_ranks.get('power').min() - 10.,
        df_ranks.get('power').max() + 10,
        df_ranks.get('power').size * 10)
    # Calculate densities for each grid point for plotting
    df_kde = pd.DataFrame(dict(x=x_grid, kde=kde(x_grid)))
    # Calculate relative minimums to determine tiers
    rel_min = pd.DataFrame(
        dict(rel_min=x_grid[argrelmin(kde(x_grid), order=order)[0]]))
    # Only keep 5 tiers
    tier_mins = sorted(rel_min.rel_min.values, reverse=True)[:4]
    # Find position of power rank when added to list of minimums to get tier
    df_ranks['tier'] = df_ranks.apply(lambda x: sorted(
        tier_mins + [x.power], reverse=True).index(x.power) + 1,
                                      axis=1)
    # Plot KDE and overlay tiers and actual power rankings as vertical lines
    tier_plot = (
        ggplot(aes(x='x', y='kde'), data=df_kde) + geom_line(size=1.5) +
        geom_vline(
            aes(xintercept='rel_min'), data=rel_min, color='red', alpha=0.7) +
        geom_vline(aes(xintercept='power'),
                   data=df_ranks,
                   color='blue',
                   linetype='dashed',
                   alpha=0.4) + theme_bw() +
        labs(x='Power Rankings',
             y=f'KDE (bw: {bw}, order: {order})',
             title=f'Tiers for week {week}'))
    # Show plot
    if show:
        tier_plot.draw()
    # Create directory if it doesn't exist to save plot
    out_dir = Path(f'output/{year}/week{week}')
    out_dir.mkdir(parents=True, exist_ok=True)
    out_name = out_dir / 'tiers.png'
    # Save plot (plotnine is throwing too many warnings...)
    warnings.filterwarnings('ignore')
    tier_plot.save(out_name, width=9, height=6, dpi=300)
    warnings.filterwarnings('default')
    logger.info(f'Saved Tiers plot to local file: {out_name.resolve()}')
    return df_ranks
コード例 #22
0
def plot_umap_cell_line(embedding_df, fig_file, cell_line_column, color_labels,
                        color_values):
    cell_line_gg = (
        gg.ggplot(embedding_df, gg.aes(x="x", y="y")) + gg.geom_point(
            gg.aes(color=cell_line_column), size=0.2, shape=".", alpha=0.2) +
        gg.theme_bw() + gg.scale_color_manual(
            name="Cell Line", labels=color_labels, values=color_values))

    cell_line_gg.save(filename=fig_file, height=4, width=5, dpi=500)
    return cell_line_gg
コード例 #23
0
def plot_replicate_correlation(
    df,
    batch,
    plate,
    facet_string=None,
    split_samples=False,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=500,
    height=4,
    width=5,
    return_plot=False,
):
    correlation_gg = (
        gg.ggplot(
            df,
            gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"),
        )
        + gg.geom_boxplot(
            alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5
        )
        + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Replicates")
        + gg.ylab("Pearson Correlation")
        + gg.ggtitle("{}: {}".format(batch, plate))
        + gg.theme_bw()
        + gg.theme(
            subplots_adjust={"wspace": 0.2},
            title=gg.element_text(size=5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=5),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=5),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if split_samples:
        assert facet_string, "To split samples, specify a facet_string"
        correlation_gg += gg.facet_wrap(facet_string)

    if output_file_base:
        save_figure(
            correlation_gg, output_file_base, output_file_extensions, dpi, height, width
        )
    if return_plot:
        return correlation_gg
コード例 #24
0
def plot_outcome_counts(read_file_1, read_file_2, save_file, plot_dir):
    temp_sub = pd.read_csv(os.path.join(dir_output, read_file_1))
    temp_agg = pd.read_csv(os.path.join(dir_output, read_file_2))
    temp_sub = recode_outcome(temp_sub)
    temp_agg = recode_outcome(temp_agg)
    plot_output = os.path.join(dir_figures, plot_dir)
    dat = pd.concat([temp_agg, temp_sub], axis=0).reset_index(drop=True)
    dat = dat.groupby(['outcome', 'model']).size().reset_index(name='counts')
    img = (ggplot(dat, aes(x='outcome', y='counts', fill='model')) +
           geom_bar(stat='identity', position='dodge')) + labs(
               x='Outcome', y='Counts') + theme_bw()
    img.save(os.path.join(plot_output, save_file))
コード例 #25
0
def estimate_cutoffs_plot(output_file,
                          df_plt,
                          df_cell_estimate_cutoff,
                          df_fit=None,
                          scale_x_log10=False,
                          save_plot=True):
    """Plot UMI counts by sorted cell barcodes."""
    if min(df_plt['umi_counts']) <= 0:
        fix_log_scale = min(df_plt['umi_counts']) + 1
        df_plt['umi_counts'] = df_plt['umi_counts'] + fix_log_scale
    gplt = plt9.ggplot()
    gplt = gplt + plt9.theme_bw()
    if len(df_plt) <= 50000:
        gplt = gplt + plt9.geom_point(mapping=plt9.aes(x='barcode',
                                                       y='umi_counts'),
                                      data=df_plt,
                                      alpha=0.05,
                                      size=0.1)
    else:
        gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='barcode',
                                                      y='umi_counts'),
                                     data=df_plt,
                                     alpha=0.25,
                                     size=0.75,
                                     color='black')
    gplt = gplt + plt9.geom_vline(mapping=plt9.aes(xintercept='n_cells',
                                                   color='method'),
                                  data=df_cell_estimate_cutoff,
                                  alpha=0.75,
                                  linetype='dashdot')
    gplt = gplt + plt9.scale_color_brewer(palette='Dark2', type='qual')
    if scale_x_log10:
        gplt = gplt + plt9.scale_x_continuous(
            trans='log10', labels=comma_labels, minor_breaks=0)
    else:
        gplt = gplt + plt9.scale_x_continuous(labels=comma_labels,
                                              minor_breaks=0)
    gplt = gplt + plt9.scale_y_continuous(
        trans='log10', labels=comma_labels, minor_breaks=0)
    gplt = gplt + plt9.labs(title='',
                            y='UMI counts',
                            x='Barcode index, sorted by UMI count',
                            color='Cutoff')
    # Add the fit of the droplet utils model
    if df_fit:
        gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='x', y='y'),
                                     data=df_fit,
                                     alpha=1,
                                     color='yellow')
    if save_plot:
        gplt.save('{}.png'.format(output_file), dpi=300, width=5, height=4)
    return gplt
コード例 #26
0
    def plot_fusion(self):
        """
        plot fusion count
        """

        p9.theme_set(p9.theme_void())
        for ref in self.pos_dict:
            if ref in self.df_tsne.columns:
                out_plot_file = f'{self.out_prefix}_{ref}_fusion.pdf'
                plot = p9.ggplot(self.df_tsne, p9.aes(x="tSNE_1", y="tSNE_2", color=ref)) + \
                    p9.geom_point(size=0.2) + \
                    p9.theme_bw() + \
                    p9.scale_color_gradient(low="lightgrey",high="blue")
                plot.save(out_plot_file)
コード例 #27
0
    def plot_replicates_greyscale(self):
        """
        Some journals require greyscale graphs. This method makes that simple.

        """

        from plotnine import ggplot, ylab, xlab, geom_line, aes, theme_bw, scale_color_grey

        plot = ((ggplot(self.data, aes('Time', 'Current', color='Channel')) +
                 ylab(u'Current (μA)') + xlab('Time (seconds)') + geom_line() +
                 theme_bw() + scale_color_grey()))

        print(plot)
        return plot
コード例 #28
0
ファイル: model_plotting.py プロジェクト: jacalin1/REMARK-1
 def plot_base_temp(df):
     pp = p9.ggplot(
         df,
         p9.aes(x='mos_since_start',
                y='value',
                group='variable',
                colour='variable',
                shape='variable',
                linetype='variable'))
     pp = pp + p9.geom_line(alpha = aes_color_alpha) +\
          p9.geom_point(show_legend=True, alpha = aes_color_alpha) +\
          aes_color + aes_glyphs +\
          p9.theme_bw(base_size=9) + aes_fte_theme + aes_model_xlab
     return pp
コード例 #29
0
def qq_plot(df, limit=20000):
    return (
        pn.ggplot(
            df
                .sort_values('P')
                .assign(OBS=lambda df: -np.log10(df['P']))
                .assign(EXP=lambda df: -np.log10(np.arange(1, len(df) + 1) / float(len(df))))
                .head(limit),
            pn.aes(x='EXP', y='OBS')
        ) + 
        pn.geom_point() + 
        pn.geom_abline() + 
        pn.theme_bw() 
    )
コード例 #30
0
def plot_result_stats(results, title):
    stats = results.describe().unstack().reset_index().rename(columns={
        "level_0": "metric",
        "level_1": "group",
        0: "value"
    })
    stats = stats[~stats["group"].isin(["count", "min", "max"])]
    stats["value_presentation"] = round(stats["value"], 2)
    plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") +
            p9.geom_col(position="dodge") + p9.theme_bw() +
            p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) +
            p9.geom_text(p9.aes(label="value_presentation"),
                         position=p9.position_dodge(width=0.9),
                         va="bottom"))
    return plot
コード例 #31
0
ファイル: utils.py プロジェクト: cognoma/machine-learning
def theme_cognoma(fontsize_mult=1):   
    import plotnine as gg
    
    return (gg.theme_bw(base_size = 14 * fontsize_mult) +
        gg.theme(
          line = gg.element_line(color = "#4d4d4d"), 
          rect = gg.element_rect(fill = "white", color = None), 
          text = gg.element_text(color = "black"), 
          axis_ticks = gg.element_line(color = "#4d4d4d"),
          legend_key = gg.element_rect(color = None), 
          panel_border = gg.element_rect(color = "#4d4d4d"),  
          panel_grid = gg.element_line(color = "#b3b3b3"), 
          panel_grid_major_x = gg.element_blank(),
          panel_grid_minor = gg.element_blank(),
          strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"),
          axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"),
          axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"),
          axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d")
    ))
コード例 #32
0
ファイル: LogisticReal.py プロジェクト: denniscwylie/maclearn
def accPlot(accsByNFeats):
    plotdata = []
    for s in accsByNFeats:
        plotdata.append(pd.concat([DataFrame({"p" : p,
                                              "acc" : accsByNFeats[s][p],
                                              "set" : s},
                                             index = [str(p)])
                                   for p in accsByNFeats[s]],
                                  axis = 0))
    ggd = pd.concat(plotdata)
    ggd['acc'] = ggd['acc'].astype(float)
    ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set'))
    ggo += gg.geom_line(alpha=0.5)
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000])
    ggo += gg.scale_color_manual(values=['darkgray', 'black',
                                         'red', 'dodgerblue'])
    ggo += gg.ylab('Accuracy (5-fold CV)')
    print(ggo)
コード例 #33
0
ファイル: test_theme.py プロジェクト: jwhendy/plotnine
    def test_theme_bw(self):
        p = self.g + labs(title='Theme BW') + theme_bw()

        assert p + _theme == 'theme_bw'
コード例 #34
0
ファイル: KnnSim.py プロジェクト: denniscwylie/maclearn
                                   x['k'],
                                   x['resubAccuracy'],
                                   x['testAccuracy'])
                                  for x in repeatedKnnResults],
                                 columns = ['p',
                                            'k',
                                            'resubAccuracy',
                                            'testAccuracy'])

ggdata = pd.concat(
    [DataFrame({'p' : knnResultsSimplified.p,
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'resub',
                'Accuracy' : knnResultsSimplified.resubAccuracy}),
     DataFrame({'p' : knnResultsSimplified.p,
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'test',
                'Accuracy' : knnResultsSimplified.testAccuracy})],
    axis = 0
)

plt.close()
ggo = gg.ggplot(ggdata, gg.aes(x='p', y='Accuracy',
                               color='type', group='type', linetype='type'))
ggo += gg.facet_wrap('~ k')
ggo += gg.scale_x_log10()
ggo += gg.geom_point(alpha=0.6)
ggo += gg.stat_smooth()
ggo += gg.theme_bw()
print(ggo)