コード例 #1
0
def plot_frequency(n = 200):
    """
    Draws the histogram of the distribution of n tweets by date.
    
    Parameters
    ----------
    n: int
    An integer specifying how many tweets should be analysed.
    
    Returns
    -------
    It saves the histogram as a .png file in the static folder.

    """
        
    from plotnine import ggplot, aes, geom_histogram,  scale_x_datetime, labs, theme_minimal, ggsave 
    from Mod_1_API import gather_tweets
    from mizani.breaks import date_breaks
    from mizani.formatters import date_format
    import pandas
    
     
    df = pandas.DataFrame(gather_tweets(n))
       
    plot1 = (ggplot(df, aes(x = 'Date', fill = 'Author')) +
           geom_histogram() +
           scale_x_datetime(breaks=date_breaks('1 week')) +
           labs(x = "Time in weeks", y = "Number of tweets by source") +
           theme_minimal()
           )
    ggsave(plot = plot1, filename = "test.png", path = "static/")
コード例 #2
0
ファイル: test_animation.py プロジェクト: jwhendy/plotnine
 def plot(i):
     return (qplot(x, y, color=colors[i], xlab='x', ylab='y')
             + lims(color=(1, 7))
             + labs(color='color')
             + theme_minimal()
             + _theme
             )
コード例 #3
0
 def plot(i):
     return (qplot(x, y, color=colors[i], xlab='x', ylab='y')
             + lims(color=(1, 7))
             + labs(color='color')
             + theme_minimal()
             + _theme
             )
コード例 #4
0
def multiplot(files, smooth=100, alpha=0.6, loss_padd=None):
    if not isinstance(files, dict):
        files = [files]

    def load_hist(entry):
        name, file = entry
        try:
            hist = np.loadtxt(file)
        except OSError:
            warn = "{} could not be loaded with np.loadtext({})."
            warnings.warn(warn.format(name, file), UserWarning)
            return name, None
        is_fine = np.isfinite(hist)
        if not any(is_fine):
            return name, None
        iters = np.where(is_fine)[0]
        hist = hist[is_fine]
        lb = min(hist)
        if loss_padd is not None and lb < 0:
            hist += loss_padd - lb
            lb = loss_padd
        ldf = pd.DataFrame({
            "loss": hist,
            "iteration": iters,
            "model": [name] * len(hist)
        })
        if smooth is not False:
            if lb > 0:
                ldf["sloss"] = np.exp(
                    gaussian_filter1d(np.log(hist), sigma=smooth))
            else:
                ldf["sloss"] = gaussian_filter1d(hist, sigma=smooth)
        return name, ldf

    tasks = list(files.items())
    df = pd.DataFrame()
    with mp.Pool() as pool:
        for name, ldf in tqdm(pool.imap(load_hist, tasks),
                              total=len(tasks),
                              desc="models"):
            if ldf is not None:
                df = df.append(ldf)

    def breaks(limits):
        ll = np.log10(limits)
        if (ll[1] - ll[0]) > 3:
            ll = np.round(ll)
            ex = np.linspace(ll[0], ll[1], 10)
            ex = np.round(ex)
        else:
            ex = np.linspace(ll[0], ll[1], 10)
        return 10.0**ex

    pl = (pn.ggplot(pn.aes("iteration", "loss", color="model"), df) +
          pn.geom_line(alpha=alpha) + pn.scale_y_log10() + pn.theme_minimal())
    if smooth is not False:
        pl += pn.geom_line(pn.aes(y="sloss"), size=1, alpha=alpha)
    return pl, df
コード例 #5
0
ファイル: test_animation.py プロジェクト: jwhendy/plotnine
    def plot(i):
        if i == 2:
            p = qplot(x, y, xlab='x', ylab='y')
        else:
            p = (qplot(x, y, color=colors[i], xlab='x', ylab='y')
                 + lims(color=(1, 7))
                 + labs(color='color'))

        return p + theme_minimal()
コード例 #6
0
    def plot(i):
        if i == 2:
            p = qplot(x, y, xlab='x', ylab='y')
        else:
            p = (qplot(x, y, color=colors[i], xlab='x', ylab='y')
                 + lims(color=(1, 7))
                 + labs(color='color'))

        return p + theme_minimal()
コード例 #7
0
ファイル: test_animation.py プロジェクト: jwhendy/plotnine
 def plot(i):
     c = colors[i]
     if i == 2:
         p = (qplot(x, y, color=c, xlab='x', ylab='y')
              + lims(color=(1, 7))
              + labs(color='color'))
     else:
         p = (qplot(x, y, stroke=c, xlab='x', ylab='y')
              + lims(stroke=(1, 7))
              + labs(stroke='stroke'))
     return p + theme_minimal()
コード例 #8
0
ファイル: test_animation.py プロジェクト: jwhendy/plotnine
 def plot(i):
     if i == 2:
         _lims = lims(color=(3, 7))
     else:
         _lims = lims(color=(1, 7))
     return (qplot(x, y, color=colors[i], xlab='x', ylab='y')
             + _lims
             + labs(color='color')
             + theme_minimal()
             + _theme
             )
コード例 #9
0
 def plot(i):
     if i == 2:
         _lims = lims(color=(3, 7))
     else:
         _lims = lims(color=(1, 7))
     return (qplot(x, y, color=colors[i], xlab='x', ylab='y')
             + _lims
             + labs(color='color')
             + theme_minimal()
             + _theme
             )
コード例 #10
0
 def plot(i):
     c = colors[i]
     if i == 2:
         p = (qplot(x, y, color=c, xlab='x', ylab='y')
              + lims(color=(1, 7))
              + labs(color='color'))
     else:
         p = (qplot(x, y, stroke=c, xlab='x', ylab='y')
              + lims(stroke=(1, 7))
              + labs(stroke='stroke'))
     return p + theme_minimal()
コード例 #11
0
ファイル: plot_auto.py プロジェクト: OscarDeGar/py_grama
def plot_hists(df, out=None, **kwargs):
    r"""Construct histograms

    Create a set of histograms. Often used to visualize the results of random
    sampling for multiple outputs.

    Usually called as a dispatch from plot_auto().

    Args:
        out (list of strings): Variables to plot

    Returns:
        Seaborn histogram plot

    Examples:

        >>> import grama as gr
        >>> import matplotlib.pyplot as plt
        >>> from grama.models import make_cantilever_beam
        >>> md = make_cantilever_beam()
        >>> ## Dispatch from autoplotter
        >>> (
        >>>     md
        >>>     >> gr.ev_sample(n=100, df_det="nom")
        >>>     >> gr.pt_auto()
        >>> )
        >>> ## Re-create without metadata
        >>> (
        >>>     md
        >>>     >> gr.ev_sample(n=100, df_det="nom")
        >>>     >> gr.pt_hists(out=md.out)
        >>> )

    """
    if out is None:
        raise ValueError("Must provide input columns list as keyword out")

    return (
        df
        >> tf_pivot_longer(
            columns=out,
            names_to="var",
            values_to="value",
        )
        >> ggplot(aes("value"))
        + geom_histogram(bins=30)
        + facet_wrap("var", scales="free")
        + theme_minimal()
        + labs(
            x="Output Value",
            y="Count",
        )
    )
コード例 #12
0
def gene_log_HR_plot(inFile, pcaFile=None, model=None):
    # get logHRs
    par = get_params(inFile)
    pca_components = par["means"]["logHR"].shape[0] >> 1
    components = range(pca_components)
    tf_components = slice(pca_components, 2 * pca_components)

    t_logHR = par["means"]["logHR"][components, 0]
    tf_logHR = par["means"]["logHR"][tf_components, 0]

    t_logHR_sd = par["stds"]["logHR"][components, 0]
    tf_logHR_sd = par["stds"]["logHR"][tf_components, 0]

    # get pca
    if pcaFile is None:
        pcaFile = inFile.replace("_params.hdf5", "_pca.pkl")
    with open(pcaFile, "rb") as buff:
        pca = pickle.load(buff)

    # prep dataframe
    n_genes = pca.components_.shape[1]
    if model is None:
        logHR_df = pd.DataFrame(index=[f"{i+1}" for i in range(n_genes)])
    else:
        logHR_df = pd.DataFrame(index=model.counts.index)
    logHR_df["tumor logHR"] = pca.inverse_transform(t_logHR)
    logHR_df["non-tumor logHR"] = pca.inverse_transform(tf_logHR)
    logHR_df["tumor logHR sd"] = np.sqrt(
        np.sum((pca.components_ * t_logHR_sd[:, None])**2, axis=0))
    logHR_df["non-tumor logHR sd"] = np.sqrt(
        np.sum((pca.components_ * tf_logHR_sd[:, None])**2, axis=0))
    logHR_df["tumor Z"] = logHR_df["tumor logHR"] / logHR_df["tumor logHR sd"]
    logHR_df["non-tumor Z"] = (logHR_df["non-tumor logHR"] /
                               logHR_df["tumor logHR sd"])
    logHR_df["tumor p-value"] = norm.sf(abs(logHR_df["tumor Z"])) * 2
    logHR_df["non-tumor p-value"] = norm.sf(abs(logHR_df["non-tumor Z"])) * 2

    # make plot
    lb = min(logHR_df["non-tumor logHR"].min(), logHR_df["tumor logHR"].min())
    ub = max(logHR_df["non-tumor logHR"].max(), logHR_df["tumor logHR"].max())
    pl = (pn.ggplot(pn.aes("non-tumor logHR", "tumor logHR"), logHR_df) +
          pn.xlim(lb, ub) + pn.ylim(lb, ub) + pn.theme_minimal() +
          pn.geom_point(alpha=0.3, color="red") + pn.geom_abline())
    return pl, logHR_df
コード例 #13
0
def kernel_stats(inFile, log_scale=True):
    par = get_params(inFile)

    n_kernel = 0
    for var in sorted(par["means"]):
        n_kernel += "mus_f" in var

    tf = pm.distributions.transforms.StickBreaking()

    dfs = list()
    for tissue_type in ["t", "f"]:
        weights = tf.backward(
            par["means"][f"w_{tissue_type}_stickbreaking__"]).eval()
        n_dim = par["means"][f"x_{tissue_type}"].shape[1]
        volumes = list()
        for kernel in range(n_kernel):
            # get covariance elipse parameters
            packed_cov = par["means"][
                f"packed_L_{tissue_type}_{kernel}_cholesky-cov-packed__"]
            lower = pm.expand_packed_triangular(n_dim, packed_cov,
                                                lower=True).eval()
            cov = np.dot(lower, lower.T)
            volume = np.linalg.det(cov)
            volumes.append(volume)
        type_df = pd.DataFrame(
            {
                "tissue": "tumor" if tissue_type == "t" else "non-tumor",
                "weight": weights,
                "volume": volumes,
            },
            index=[f"kernel {i}" for i in range(n_kernel)],
        )
        dfs.append(type_df)
    df = pd.concat(dfs)
    pl = (pn.ggplot(pn.aes("volume", "weight", color="tissue"), df) +
          pn.geom_point())
    if log_scale:
        pl += pn.scale_y_log10()
        pl += pn.scale_x_log10()
    pl += pn.theme_minimal()
    return pl, df
コード例 #14
0
def lollipop(data):
    data = data.sort_values(by=['probability']).reset_index(drop=True)
    custom_order = pd.Categorical(data['label'], categories=data.label)
    data = data.assign(label_custom=custom_order)


    p = ggplot(data, aes('label_custom', 'probability')) + \
        geom_point(color = "#88aa88", size = 4) + \
            geom_segment(aes(x = 'label_custom', y = 0, xend = 'label_custom', yend = 'probability'), color = "#88aa88") + \
                coord_flip(expand=True) + \
                    theme_minimal() + \
                        labs(x="", y="probability", title = "Most Likely Object") + \
                            guides(title_position = "left") + \
                                theme(plot_title = element_text(size = 20, face = "bold", ha= "right"))

    fig = p.draw()
    figfile = BytesIO()
    plt.savefig(figfile, format='png', bbox_inches='tight')
    figfile.seek(0)  # rewind to beginning of file
    figdata_png = base64.b64encode(figfile.getvalue()).decode()
    return p, figdata_png
コード例 #15
0
ファイル: timspy.py プロジェクト: hcji/timspy
    def plot_windows(self, query=""):
        """Plot selection windows with 'plotnine'.

        Install plotnine separately.

        Args:
           query (str): a query used for subselection in "self.windows"
        Returns:
            ggplot: a plot with selection windows
        """
        from plotnine import ggplot, aes, geom_rect, theme_minimal, xlab, ylab, labs
        D = self.windows.reset_index().query(query) if query else self.windows[1:].reset_index()
        plot = (ggplot(aes(), data=D) + 
                geom_rect(aes(xmin='mz_left', xmax='mz_right',
                              ymin='IM_min',  ymax='IM_max',
                              fill='pd.Categorical(window_gr)'), 
                          alpha=.5, color='black')+
                theme_minimal() +
                xlab('mass/charge') +
                ylab('1/K0') +
                labs(fill='Window Group'))
        return plot
コード例 #16
0
ファイル: trainscvi.py プロジェクト: Munfred/scutils
def wraplot(adata=None,
            filename='wraplot',
            embedding='tsne',
            feature='sample_type_tech',
            size=(12, 12),
            color=None,
            save=False,
            draw=False,
            psize=1):
    start = datetime.datetime.now()
    p.options.figure_size = size
    savename = filename + '.' + embedding + '.' + feature + '.' + str(
        color) + '.png'
    if color == None:
        color = feature
        savename = filename + '.' + embedding + '.' + feature + '.wraplot.png'
    print(
        start.strftime("%H:%M:%S"),
        'Starting ... \t',
        savename,
    )

    pt = (p.ggplot(p.aes(x=embedding + '0', y=embedding + '1', color=color),
                   adata.obs) +
          p.geom_point(color='lightgrey',
                       shape='.',
                       data=adata.obs.drop(feature, axis=1)) +
          p.geom_point(shape='.', size=psize, alpha=1, stroke=0) +
          p.theme_minimal() + p.facet_wrap('~' + feature) +
          p.guides(color=p.guide_legend(override_aes={'size': 10})))

    if save: pt.save(savename, format='png', dpi=200)

    end = datetime.datetime.now()
    delta = end - start
    print(start.strftime("%H:%M:%S"), str(int(delta.total_seconds())),
          's to make: \t', savename)
コード例 #17
0
ファイル: plot_auto.py プロジェクト: OscarDeGar/py_grama
def plot_sinew_inputs(df, var=None, sweep_ind="sweep_ind"):
    r"""Inspect a sinew design

    Create a scatterplot matrix with hues. Often used to visualize a sinew
    design before evaluating the model functions.

    Usually called as a dispatch from plot_auto().

    Args:
        df (Pandas DataFrame): Input design data
        var (list of strings): Variables to plot
        sweep_ind (string): Sweep index column in df

    Returns:
        Seaborn scatterplot matrix

    Examples:

        >>> import grama as gr
        >>> import matplotlib.pyplot as plt
        >>> from grama.models import make_cantilever_beam
        >>> md = make_cantilever_beam()
        >>> ## Dispatch from autoplotter
        >>> (
        >>>     md
        >>>     >> gr.ev_sinews(df_det="swp", skip=True)
        >>>     >> gr.pt_auto()
        >>> )
        >>> ## Re-create without metadata
        >>> (
        >>>     md
        >>>     >> gr.ev_sinews(df_det="swp")
        >>>     >> gr.pt_sinew_inputs(var=md.var)
        >>> )

    """
    if var is None:
        raise ValueError("Must provide input columns list as keyword var")

    ## Define helpers
    labels_blank = lambda v: [""] * len(v)
    breaks_min = lambda lims: (lims[0], 0.5 * (lims[0] + lims[1]), lims[1])

    ## Make blank figure
    fig = (
        df
        >> ggplot()
        + geom_blank()
        + theme_void()
    ).draw(show=False)

    gs = gridspec.GridSpec(len(var), len(var))
    for i, v1 in enumerate(var):
        for j, v2 in enumerate(var):
            ax = fig.add_subplot(gs[i, j])
            ## Switch labels
            if j == 0:
                labels_y = _sci_format
            else:
                labels_y = labels_blank
            if i == len(var) - 1:
                labels_x = _sci_format
            else:
                labels_x = labels_blank

            ## Label
            if i == j:
                p = (
                    df
                    >> ggplot()
                    + annotate(
                        "label",
                        x=0,
                        y=0,
                        label=v1,
                    )
                    + theme_void()
                    + guides(color=None)
                )

            ## Scatterplot
            else:
                p = (
                    df
                    >> ggplot(aes(v2, v1, color="factor("+sweep_ind+")"))
                    + geom_point(size=0.1)
                    + scale_x_continuous(
                        breaks=breaks_min,
                        labels=labels_x,
                    )
                    + scale_y_continuous(
                        breaks=breaks_min,
                        labels=labels_y,
                    )
                    + guides(color=None)
                    + theme_minimal()
                    + theme(
                        axis_title=element_text(va="top", size=12),
                    )
                )

            _ = p._draw_using_figure(fig, [ax])


    ## Plot
    # NB Returning the figure causes a "double plot" in Jupyter....
    fig.show()
コード例 #18
0
ファイル: plot_auto.py プロジェクト: OscarDeGar/py_grama
def plot_sinew_outputs(
    df, var=None, out=None, sweep_ind="sweep_ind", sweep_var="sweep_var"
):
    r"""Construct sinew plot

    Create a relational lineplot with hues for each sweep. Often used to
    visualize the outputs of a sinew design.

    Usually called as a dispatch from plot_auto().

    Args:
        df (Pandas DataFrame): Input design data with output results
        var (list of strings): Variables to plot
        out (list of strings): Outputs to plot
        sweep_ind (string): Sweep index column in df
        sweep_var (string): Swept variable column in df

    Returns:
        Seaborn relational lineplot

    Examples:

        >>> import grama as gr
        >>> import matplotlib.pyplot as plt
        >>> from grama.models import make_cantilever_beam
        >>> md = make_cantilever_beam()
        >>> ## Dispatch from autoplotter
        >>> (
        >>>     md
        >>>     >> gr.ev_sinews(df_det="swp")
        >>>     >> gr.pt_auto()
        >>> )
        >>> ## Re-create without metadata
        >>> (
        >>>     md
        >>>     >> gr.ev_sinews(df_det="swp")
        >>>     >> gr.pt_sinew_inputs(var=md.var, out=md.out)
        >>> )

    """
    if var is None:
        raise ValueError("Must provide input columns list as keyword arg var")
    if out is None:
        raise ValueError("Must provide output columns list as keyword arg out")

    ## Prepare data
    # Gather inputs
    id_vars = [col for col in df.columns if col not in var]
    df_tmp = melt(df, id_vars, var, "_var", "_x")

    # Gather outputs
    id_vars = [col for col in df_tmp.columns if col not in out]
    df_plot = melt(df_tmp, id_vars, out, "_out", "_y")

    # Filter off-sweep values
    df_plot = df_plot[df_plot[sweep_var] == df_plot["_var"]]

    breaks_min = lambda lims: (lims[0], 0.5 * (lims[0] + lims[1]), lims[1])
    return (
        df_plot
        >> ggplot(aes(
            "_x",
            "_y",
            color="factor(" + sweep_ind + ")",
            group="factor(" + sweep_ind + ")",
        ))
        + geom_line()
        + facet_grid("_out~_var", scales="free")

        + scale_x_continuous(
            breaks=breaks_min,
            labels=_sci_format,
        )
        + scale_y_continuous(
            breaks=breaks_min,
            labels=_sci_format,
        )
        + guides(color=None)
        + theme_minimal()
        + theme(
            strip_text_y=element_text(angle=0),
            panel_border=element_rect(color="black", size=0.5),
        )
        + labs(
            x="Input Value",
            y="Output Value",
        )
    )
コード例 #19
0
ファイル: run_graphdr.py プロジェクト: jzthree/quasildr
                metric=arguments['--metric'])

    pd.DataFrame(Z).to_csv(output + DOCSTR + '.graphdr',
                           sep='\t',
                           index_label=False)

    if arguments['--plot']:
        try:
            from plotnine import ggplot, aes, geom_point, theme_minimal

            if arguments['--anno_file']:
                df = pd.DataFrame({
                    'x':
                    Z[:, 0],
                    'y':
                    Z[:, 1],
                    'c':
                    anno[arguments['--anno_column']].map(str)
                })
                p = ggplot(df, aes('x', 'y', color='c')) + geom_point(
                    size=0.1) + theme_minimal()
            else:
                df = pd.DataFrame({'x': Z[:, 0], 'y': Z[:, 1]})
                p = ggplot(df, aes(
                    'x', 'y')) + geom_point(size=0.1) + theme_minimal()

            p.save(output + DOCSTR + '.pdf')
        except ImportError:
            warnings.warn(
                'plotnine needs to be installed for the plotting function.')
コード例 #20
0
ファイル: test_theme.py プロジェクト: jwhendy/plotnine
    def test_theme_minimal(self):
        p = self.g + labs(title='Theme Minimal') + theme_minimal()

        assert p + _theme == 'theme_minimal'
コード例 #21
0
def expression_plot(
    inFile,
    cp1=1,
    cp2=2,
    model=None,
    draw_distribution=True,
    draw_points=True,
    max_kernel_alpha=0.5,
    color="expression",
):
    par = get_params(inFile)
    pl = (pn.ggplot(pn.aes(f"CP {cp1}", f"CP {cp2}", color=color)) +
          pn.theme_minimal())
    df = None
    kdf = None
    if draw_points:
        if model is None:
            index = [
                f"sample {i+1}" for i in range(par["means"]["x_t"].shape[0])
            ]
            if color != "expression":
                raise Exception(
                    "A model must be passed to color other that by expression."
                )
        else:
            index = model.counts.columns
        columns = [f"CP {i+1}" for i in range(par["means"]["x_t"].shape[1])]
        df_t = pd.DataFrame(par["means"]["x_t"], index=index, columns=columns)
        df_t["expression"] = "tumor"
        df_tf = pd.DataFrame(par["means"]["x_f"], index=index, columns=columns)
        df_tf["expression"] = "non-tumor"
        df = pd.concat([df_t, df_tf])
        if model is not None:
            df = df.merge(model.pheno,
                          "left",
                          left_index=True,
                          right_index=True)
        pl += pn.geom_point(data=df, alpha=0.3)
    if draw_distribution:
        n_kernel = 0
        for var in sorted(par["means"]):
            n_kernel += "mus_f" in var

        if "altStick" in par["note"] and not par["note"]["altStick"]:
            tf = StickBreaking_legacy()
        else:
            tf = StickBreaking2()
        elipses = list()
        elipse_t = np.linspace(0, 2 * np.pi, 100)

        for tissue_type in ["t", "f"]:
            weights = tf.backward(
                par["means"][f"w_{tissue_type}_stickbreaking__"]).eval()
            n_dim = par["means"][f"x_{tissue_type}"].shape[1]
            for kernel in range(n_kernel):
                # get covariance elipse parameters
                packed_cov = par["means"][
                    f"packed_L_{tissue_type}_{kernel}_cholesky-cov-packed__"]
                lower = pm.expand_packed_triangular(n_dim,
                                                    packed_cov,
                                                    lower=True).eval()
                cov = np.dot(lower, lower.T)[[cp1 - 1, cp2 -
                                              1], :][:, [cp1 - 1, cp2 - 1]]
                var, U = np.linalg.eig(cov)
                theta = np.arccos(np.abs(U[0, 0]))

                # parametrize elipse
                width = 2 * np.sqrt(5.991 * var[0])
                hight = 2 * np.sqrt(5.991 * var[1])

                density = weights[kernel] / width * hight

                x = width * np.cos(elipse_t)
                y = hight * np.sin(elipse_t)

                # rotation
                c, s = np.cos(theta), np.sin(theta)
                R = np.array(((c, -s), (s, c)))
                path = np.dot(R, np.array([x, y]))

                # position
                pos = par["means"][f"mus_{tissue_type}_{kernel}"]
                path += pos[[cp1 - 1, cp2 - 1]][:, None]

                # make data frame
                path_df = pd.DataFrame({
                    f"CP {cp1}": path[0, :],
                    f"CP {cp2}": path[1, :]
                })
                path_df["kernel"] = kernel
                path_df["density"] = density
                path_df["expression"] = ("tumor" if tissue_type == "t" else
                                         "non-tumor")
                path_df["expression-kernel"] = (f"tumor {kernel}"
                                                if tissue_type == "t" else
                                                f"non-tumor {kernel}")
                elipses.append(path_df)
        kdf = pd.concat(elipses)
        density_scale = max_kernel_alpha / kdf["density"].max()
        kdf["density"] *= density_scale
        pl += pn.geom_polygon(
            pn.aes(fill="expression",
                   group="expression-kernel",
                   alpha="density"),
            data=kdf,
        )
        pl += pn.scale_alpha_continuous(range=(0, max_kernel_alpha))
    return pl, df, kdf
コード例 #22
0
g = 'T'
ens = adata.var[adata.var.SYMBOL == "T"].ENSEMBL[0]
adata.obs['T_counts'] = adata[:, ens].X.toarray()[:, 0]
adata.obs['T_logcounts'] = adata[:, ens].layers.get("logcounts").toarray()[:,
                                                                           0]
adata.obs['T_smoothed'] = adata[:,
                                ens].layers.get("scvi_normalised").toarray()[:,
                                                                             0]
adata.obs['T_smoothed_lc'] = np.log2(adata.obs['T_smoothed'] + 1)

count_hist = adata.obs[f'{g}_counts'].value_counts().reset_index().rename(
    columns={'index': 'counts'})
p.options.figure_size = 6, 2
plot_ = (p.ggplot(p.aes(x='counts', y=f'{g}_counts'),
                  count_hist.query('0 < counts < 25')) +
         p.geom_bar(stat='identity') + p.scale_x_log10() + p.theme_minimal() +
         p.labs(x=f'{g} UMI counts', y='Number cells'))
plot_.save('mgast_T_counts.pdf', verbose=False)

count_hist = adata.obs[f'{g}_logcounts'].value_counts().reset_index().rename(
    columns={'index': 'logcounts'})
p.options.figure_size = 6, 2
plot_ = (
    p.ggplot(p.aes(x='logcounts'), count_hist.query('0 < logcounts < 25')) +
    p.geom_histogram(bins=128, color="k", fill="w") + p.scale_x_log10() +
    p.theme_minimal() + p.labs(x=f'{g} UMI logcounts', y='Number cells'))
plot_.save('mgast_T_logcounts.pdf', verbose=False)

count_hist = adata.obs[f'{g}_smoothed_lc'].value_counts().reset_index().rename(
    columns={'index': 'smoothed_lc'})
p.options.figure_size = 6, 2
コード例 #23
0
def log_HR_plot(inFile, label_unit=10, log_scale_color=True):
    par = get_params(inFile)
    pca_components = par["means"]["logHR"].shape[0] >> 1
    components = range(pca_components)
    tf_components = slice(pca_components, 2 * pca_components)

    logHR_df = pd.DataFrame(index=[f"{i+1}" for i in components])
    logHR_df["tumor logHR"] = par["means"]["logHR"][components, 0]
    logHR_df["non-tumor logHR"] = par["means"]["logHR"][tf_components, 0]
    logHR_df["component"] = components
    logHR_df["label"] = [
        logHR_df.index[i] if i <= label_unit else "" for i in components
    ]
    logHR_df["tumor logHR sd"] = par["stds"]["logHR"][components, 0]
    logHR_df["non-tumor logHR sd"] = par["stds"]["logHR"][tf_components, 0]
    logHR_df["tumor Z"] = logHR_df["tumor logHR"] / logHR_df["tumor logHR sd"]
    logHR_df["non-tumor Z"] = (logHR_df["non-tumor logHR"] /
                               logHR_df["tumor logHR sd"])
    logHR_df["tumor p-value"] = norm.sf(abs(logHR_df["tumor Z"])) * 2
    logHR_df["non-tumor p-value"] = norm.sf(abs(logHR_df["non-tumor Z"])) * 2
    logHR_df["tumor -log10(p-value)"] = -np.log10(logHR_df["tumor p-value"])
    logHR_df["non-tumor -log10(p-value)"] = -np.log10(
        logHR_df["non-tumor p-value"])

    lb = min(logHR_df["non-tumor logHR"].min(), logHR_df["tumor logHR"].min())
    ub = max(logHR_df["non-tumor logHR"].max(), logHR_df["tumor logHR"].max())
    pl = (pn.ggplot(
        pn.aes(
            "non-tumor logHR",
            "tumor logHR",
            color="non-tumor p-value",
            fill="tumor p-value",
            label="label",
        ),
        logHR_df,
    ) + pn.xlim(lb, ub) + pn.ylim(lb, ub) + pn.geom_abline() +
          pn.geom_point() + pn.theme_minimal() +
          pn.geom_text(ha="left", va="bottom", color="black"))
    if log_scale_color:
        pl += pn.scale_color_cmap(trans="log")
        pl += pn.scale_fill_cmap(trans="log")

    lb = min(
        logHR_df["non-tumor -log10(p-value)"].min(),
        logHR_df["tumor -log10(p-value)"].min(),
    )
    ub = max(
        logHR_df["non-tumor -log10(p-value)"].max(),
        logHR_df["tumor -log10(p-value)"].max(),
    )
    pl_p = (pn.ggplot(
        pn.aes(
            "non-tumor -log10(p-value)",
            "tumor -log10(p-value)",
            color="component",
            label="label",
        ),
        logHR_df,
    ) + pn.geom_point() + pn.xlim(lb, ub) + pn.ylim(lb, ub) +
            pn.theme_minimal() +
            pn.geom_text(ha="left", va="bottom", color="black"))
    return pl, pl_p, logHR_df
コード例 #24
0
ファイル: test_theme.py プロジェクト: wilmckoy/plotnine
    def test_theme_minimal(self):
        p = self.g + labs(title='Theme Minimal') + theme_minimal()

        assert p + _theme == 'theme_minimal'
コード例 #25
0
        np.sqrt(np.diag(event_study_formula.cov_params().loc[lags][lags]))
    ]),
    'mean':
    np.concatenate([
        event_study_formula.params[leads],
        np.array([0]), event_study_formula.params[lags]
    ]),
    'label':
    np.arange(-9, 6)
})

leadslags_plot['lb'] = leadslags_plot['mean'] - leadslags_plot['sd'] * 1.96
leadslags_plot['ub'] = leadslags_plot['mean'] + leadslags_plot['sd'] * 1.96

# This version has a point-range at each
# estimated lead or lag
# comes down to stylistic preference at the
# end of the day!
p.ggplot(leadslags_plot, p.aes(x = 'label', y = 'mean',
             ymin = 'lb',
             ymax = 'ub')) +\
    p.geom_hline(yintercept = 0.035169444, color = "red") +\
    p.geom_pointrange() +\
    p.theme_minimal() +\
    p.xlab("Years before and after castle doctrine expansion") +\
    p.ylab("log(Homicide Rate)") +\
    p.geom_hline(yintercept = 0,
             linetype = "dashed") +\
    p.geom_vline(xintercept = 0,
             linetype = "dashed")
コード例 #26
0
ファイル: plot_auto.py プロジェクト: OscarDeGar/py_grama
def plot_scattermat(df, var=None):
    r"""Create a scatterplot matrix

    Create a scatterplot matrix. Often used to visualize a design (set of inputs
    points) before evaluating the functions.

    Usually called as a dispatch from plot_auto().

    Args:
        var (list of strings): Variables to plot

    Returns:
        ggplot: Scatterplot matrix

    Examples:

        >>> import grama as gr
        >>> import matplotlib.pyplot as plt
        >>> from grama.models import make_cantilever_beam
        >>> md = make_cantilever_beam()
        >>> ## Dispatch from autoplotter
        >>> (
        >>>     md
        >>>     >> gr.ev_sample(n=100, df_det="nom", skip=True)
        >>>     >> gr.pt_auto()
        >>> )
        >>> ## Re-create plot without metadata
        >>> (
        >>>     md
        >>>     >> gr.ev_sample(n=100, df_det="nom")
        >>>     >> gr.pt_scattermat(var=md.var)
        >>> )

    """
    if var is None:
        raise ValueError("Must provide input columns list as keyword var")

    ## Define helpers
    labels_blank = lambda v: [""] * len(v)
    breaks_min = lambda lims: (lims[0], 0.5 * (lims[0] + lims[1]), lims[1])

    ## Make blank figure
    fig = (
        df
        >> ggplot()
        + geom_blank()
        + theme_void()
    ).draw(show=False)

    gs = gridspec.GridSpec(len(var), len(var))
    for i, v1 in enumerate(var):
        for j, v2 in enumerate(var):
            ax = fig.add_subplot(gs[i, j])
            ## Switch labels
            if j == 0:
                labels_y = _sci_format
            else:
                labels_y = labels_blank
            if i == len(var) - 1:
                labels_x = _sci_format
            else:
                labels_x = labels_blank

            ## Density
            if i == j:
                xmid = 0.5 * (
                    df[v1].min() + df[v1].max()
                )

                p = (
                    df
                    >> ggplot(aes(v1))
                    + geom_density()
                    + scale_x_continuous(
                        breaks=breaks_min,
                        labels=labels_x,
                    )
                    + scale_y_continuous(
                        breaks=breaks_min,
                        labels=labels_y,
                    )
                    + annotate(
                        "label",
                        x=xmid,
                        y=0,
                        label=v1,
                        va="bottom",
                    )
                    + theme_minimal()
                    + labs(title=v1)
                )

            ## Scatterplot
            else:
                p = (
                    df
                    >> ggplot(aes(v2, v1))
                    + geom_point()
                    + scale_x_continuous(
                        breaks=breaks_min,
                        labels=labels_x,
                    )
                    + scale_y_continuous(
                        breaks=breaks_min,
                        labels=labels_y,
                    )
                    + theme_minimal()
                    + theme(
                        axis_title=element_text(va="top", size=12),
                    )
                )

            _ = p._draw_using_figure(fig, [ax])


    ## Plot
    # NB Returning the figure causes a "double plot" in Jupyter....
    fig.show()