Beispiel #1
0
def calc_tiers(df_ranks, year, week, bw=0.09, order=4, show=False):
    """Calculate 3-5 tiers using Gaussian Kernel Density Estimation

  :param df_ranks: data frame with power rankings for each team
  :param year: current year
  :param week: current week
  :param bw: bandwidth for KDE
  :param order: order parameter for KDE
  :param show: flag to show plot
  :return: None
  """
    logger.info('Calculating tiers for power rankings')
    # Estimate the kernel using power rankings
    kde = gaussian_kde(df_ranks.get('power'), bw_method=bw)
    # Create grid of points for plot
    x_grid = np.linspace(
        df_ranks.get('power').min() - 10.,
        df_ranks.get('power').max() + 10,
        df_ranks.get('power').size * 10)
    # Calculate densities for each grid point for plotting
    df_kde = pd.DataFrame(dict(x=x_grid, kde=kde(x_grid)))
    # Calculate relative minimums to determine tiers
    rel_min = pd.DataFrame(
        dict(rel_min=x_grid[argrelmin(kde(x_grid), order=order)[0]]))
    # Only keep 5 tiers
    tier_mins = sorted(rel_min.rel_min.values, reverse=True)[:4]
    # Find position of power rank when added to list of minimums to get tier
    df_ranks['tier'] = df_ranks.apply(lambda x: sorted(
        tier_mins + [x.power], reverse=True).index(x.power) + 1,
                                      axis=1)
    # Plot KDE and overlay tiers and actual power rankings as vertical lines
    tier_plot = (
        ggplot(aes(x='x', y='kde'), data=df_kde) + geom_line(size=1.5) +
        geom_vline(
            aes(xintercept='rel_min'), data=rel_min, color='red', alpha=0.7) +
        geom_vline(aes(xintercept='power'),
                   data=df_ranks,
                   color='blue',
                   linetype='dashed',
                   alpha=0.4) + theme_bw() +
        labs(x='Power Rankings',
             y=f'KDE (bw: {bw}, order: {order})',
             title=f'Tiers for week {week}'))
    # Show plot
    if show:
        tier_plot.draw()
    # Create directory if it doesn't exist to save plot
    out_dir = Path(f'output/{year}/week{week}')
    out_dir.mkdir(parents=True, exist_ok=True)
    out_name = out_dir / 'tiers.png'
    # Save plot (plotnine is throwing too many warnings...)
    warnings.filterwarnings('ignore')
    tier_plot.save(out_name, width=9, height=6, dpi=300)
    warnings.filterwarnings('default')
    logger.info(f'Saved Tiers plot to local file: {out_name.resolve()}')
    return df_ranks
Beispiel #2
0
def test_aesthetics():
    p = (ggplot(df) + geom_point(aes('x', 'y')) +
         geom_vline(aes(xintercept='xintercept'), size=2) +
         geom_vline(aes(xintercept='xintercept+.1', alpha='z'), size=2) +
         geom_vline(aes(xintercept='xintercept+.2', linetype='factor(z)'),
                    size=2) +
         geom_vline(aes(xintercept='xintercept+.3', color='factor(z)'), size=2)
         + geom_vline(aes(xintercept='xintercept+.4', size='z')))

    assert p + _theme == 'aesthetics'
Beispiel #3
0
def plot_estimate_distribution(dist):
    return (pn.ggplot(dist, pn.aes(x='estimates')) +
            pn.geom_histogram(bins=25) + pn.geom_vline(
                xintercept=sum(pile['denomination']),
                color="#FF5500",
                size=2,
            ) + pn.geom_vline(
                xintercept=3363400,
                color="#FF5500",
                size=2,
                linetype='dotted',
            ))
Beispiel #4
0
def mungle_plot(data_df,
                users=USERS,
                aggregation="7D",
                start="2018-12-31",
                end=None):

    data = data_df.sort_values(["User", "Date"])
    data.drop_duplicates(subset=["User", "Tweet"], inplace=True)
    data = data.astype({
        "User": "******",
        "Tweet": "str",
        "Date": "datetime64[ns]",
        "Favorites": "int",
        "Type": "category"
    })
    data["Date"] = data["Date"] - dt.timedelta(hours=3)

    filtered_data = data.loc[(data.Type == "Tweet")].loc[data.User.isin(users)]
    data_sums = (filtered_data.groupby("User").apply(
        lambda x: x.set_index("Date").resample("1D").sum().reindex(
            pd.date_range(
                dt.datetime(2018, 12, 30), data.max()["Date"], freq="D")
        )).drop("ID", axis=1).rename_axis(["User", "Date"]).reset_index())
    data_count = (filtered_data.groupby("User").apply(
        lambda x: x.set_index("Date").resample("1D").count().reindex(
            pd.date_range(
                dt.datetime(2018, 12, 30), data.max()["Date"], freq="D"))).
                  drop(["User", "Favorites", "Retweets", "ID", "Type"],
                       axis=1).rename_axis(["User", "Date"]).reset_index())

    full_data = data_count.merge(data_sums, how="left", on=["User", "Date"])
    resampled_data = (full_data.set_index("Date").groupby("User").resample(
        aggregation, label="right", closed="right").sum())

    agg_data = resampled_data.groupby("Date").agg("sum").reset_index()

    if end is None:
        end = agg_data.Date.max()

    agg_data = agg_data.loc[(agg_data.Date >= start) & (agg_data.Date <= end)]

    plot = (p9.ggplot(agg_data, p9.aes("Date", "Tweet")) +
            p9.geom_line(color="red", size=1) +
            p9.geom_vline(xintercept="2019-06-30", linetype="dotted") +
            p9.geom_vline(xintercept="2019-10-27", linetype="dotted") +
            p9.theme(axis_text_x=p9.element_text(angle=90, hjust=-1)) +
            p9.labs(title="Tweets de cuentas oficiales del gobierno en 2019",
                    subtitle="Acumulados semanales",
                    y="",
                    x=""))

    return plot, agg_data
def test_annotation_stripes_coord_flip():
    pdf = mtcars.assign(gear=pd.Categorical(mtcars.gear),
                        am=pd.Categorical(mtcars.am))
    p = (
        ggplot(pdf) + annotation_stripes(
            fills=["#AAAAAA", "#FFFFFF", "#7F7FFF"], alpha=0.3) + geom_jitter(
                aes("gear", "wt", shape="gear", color="am"), random_state=5) +
        geom_vline(xintercept=0.5, color="black") +
        geom_vline(xintercept=1.5, color="black") +
        geom_vline(xintercept=2.5, color="black") +
        geom_vline(xintercept=3.5, color="black") +
        scale_shape_discrete(guide=guide_legend(order=1))  # work around #229
        + coord_flip())
    assert p == "annotation_stripes_coord_flip"
Beispiel #6
0
def test_aesthetics():
    p = (ggplot(df) +
         geom_point(aes('x', 'y')) +
         geom_vline(aes(xintercept='xintercept'), size=2) +
         geom_vline(aes(xintercept='xintercept+.1', alpha='z'),
                    size=2) +
         geom_vline(aes(xintercept='xintercept+.2',
                        linetype='factor(z)'),
                    size=2) +
         geom_vline(aes(xintercept='xintercept+.3',
                        color='factor(z)'),
                    size=2) +
         geom_vline(aes(xintercept='xintercept+.4', size='z')))

    assert p + _theme == 'aesthetics'
def scatter_plot(df,
                 xcol,
                 ycol,
                 domain,
                 xname=None,
                 yname=None,
                 log=False,
                 width=6,
                 height=6,
                 clamp=True,
                 tickCount=5):
    assert len(domain) == 2

    POINT_SIZE = 0.5
    DASH_PATTERN = (0, (3, 1))

    if xname == None:
        xname = xcol
    if yname == None:
        yname = ycol

    # formater for axes' labels
    ax_formatter = mizani.custom_format('{:n}')

    if clamp:  # clamp overflowing values if required
        df = df.copy(deep=True)
        df.loc[df[xcol] > domain[1], xcol] = domain[1]
        df.loc[df[ycol] > domain[1], ycol] = domain[1]

    # generate scatter plot
    scatter = p9.ggplot(df)
    scatter += p9.aes(x=xcol, y=ycol)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True)
    scatter += p9.labs(x=xname, y=yname)

    if log:  # log scale
        scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter)
    else:
        scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter)

    #scatter += p9.theme_xkcd()
    scatter += p9.theme_bw()
    scatter += p9.theme(
        panel_grid_major=p9.element_line(color='#666666', alpha=0.5))
    scatter += p9.theme(figure_size=(width, height))

    # generate additional lines
    scatter += p9.geom_abline(intercept=0, slope=1,
                              linetype=DASH_PATTERN)  # diagonal
    scatter += p9.geom_vline(xintercept=domain[1],
                             linetype=DASH_PATTERN)  # vertical rule
    scatter += p9.geom_hline(yintercept=domain[1],
                             linetype=DASH_PATTERN)  # horizontal rule

    res = scatter

    return res
def test_annotation_stripes_continuous_scale():
    p = (ggplot(df)
         + annotation_stripes(fill_range=True)
         + geom_point(aes('x', 'y'))
         + geom_vline(xintercept=[0.5, 1.5, 2.5, 3.5])
         )

    assert p == 'annotation_stripes_continuous_scale'
def test_annotation_stripes_fill_range():
    p = (ggplot(df)
         + annotation_stripes(fill_range=True)
         + geom_point(aes('factor(x)', 'y'))
         + geom_vline(xintercept=[0.5, 1.5, 2.5, 3.5])
         )

    assert p == 'annotation_stripes_fill_range'
Beispiel #10
0
def plot_dist_with_ci(dist):
    return (pn.ggplot(dist, pn.aes(x='estimates')) +
            pn.geom_histogram(bins=25) + pn.geom_vline(
                xintercept=dist.quantile(0.025),
                color="#FF5500",
                size=2,
                linetype='dotted',
            ) + pn.geom_vline(
                xintercept=dist.quantile(0.975),
                color="#FF5500",
                size=2,
                linetype='dotted',
            ) + pn.ggtitle("${0:,.0f} ({1:,.0f}, {2:,.0f})".format(
                np.mean(dist.estimates),
                dist.estimates.quantile(0.025),
                dist.estimates.quantile(0.975),
            )))
def test_annotation_stripes_coord_flip():
    p = (ggplot(df)
         + annotation_stripes()
         + geom_point(aes('factor(x)', 'y'))
         + geom_vline(xintercept=[0.5, 1.5, 2.5, 3.5])
         + coord_flip()
         )

    assert p == 'annotation_stripes_coord_flip'
def scatter_plot2(df1, df2, xcol, ycol, domain, color1='black', color2='red', xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5):
    assert len(domain) == 2

    POINT_SIZE = 1.5
    DASH_PATTERN = (0, (6, 2))

    if xname is None:
        xname = xcol
    if yname is None:
        yname = ycol

    # formatter for axes' labels
    ax_formatter = mizani.custom_format('{:n}')

    if clamp:  # clamp overflowing values if required
        df1 = df1.copy(deep=True)
        df1.loc[df1[xcol] > domain[1], xcol] = domain[1]
        df1.loc[df1[ycol] > domain[1], ycol] = domain[1]

        df2 = df2.copy(deep=True)
        df2.loc[df2[xcol] > domain[1], xcol] = domain[1]
        df2.loc[df2[ycol] > domain[1], ycol] = domain[1]

    # generate scatter plot
    scatter = p9.ggplot(df1)
    scatter += p9.aes(x=xcol, y=ycol)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, color=color1, alpha=0.5)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, data=df2, color=color2, alpha=0.5)
    scatter += p9.labs(x=xname, y=yname)

    # rug plots
    scatter += p9.geom_rug(na_rm=True, sides="tr", color=color1, alpha=0.05)
    scatter += p9.geom_rug(na_rm=True, sides="tr", data=df2, color=color2, alpha=0.05)

    if log:  # log scale
        scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter)
    else:
        scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter)

    # scatter += p9.theme_xkcd()
    scatter += p9.theme_bw()
    scatter += p9.theme(panel_grid_major=p9.element_line(color='#666666', alpha=0.5))
    scatter += p9.theme(panel_grid_minor=p9.element_blank())
    scatter += p9.theme(figure_size=(width, height))
    scatter += p9.theme(text=p9.element_text(size=24, color="black"))

    # generate additional lines
    scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN)  # diagonal
    scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN)  # vertical rule
    scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN)  # horizontal rule

    res = scatter

    return res
def density_plot1(num_matches_per_round: int,
                  match_lengths_from_one_round: list):
    """ Density plot for match lengths, new rules, no blowouts, 85 matches/round """

    match_lengths = pd.DataFrame(
        {'Match length': match_lengths_from_one_round})
    (plt.ggplot(match_lengths, plt.aes(x='Match length')) +
     plt.geom_density() +
     plt.geom_vline(xintercept=50, color='black', size=2) +
     plt.theme_classic() +
     plt.xlim([0, 55])).save(filename='figures/match_length_density_plot.png')
def plot_and_save(scale_data_df_cleaned, smooth_factor, temp_file_name):
    fasting_start = to_datetime('2019-10-15')
    plot_output = (
        ggplot(scale_data_df_cleaned, aes(x='timestamp', y='weight')) +
        #   facet_wrap('~', ncol = 1, scales = 'free') +
        geom_point(size=0.5) + geom_smooth(span=smooth_factor, color='red') +
        geom_vline(aes(xintercept=fasting_start), color='blue', size=1.2) +
        geom_label(aes(x=to_datetime('2019-11-30'),
                       y=max(scale_data_df_cleaned.loc[:, 'weight'])),
                   label='IF starts!',
                   size=15))
    plot_output.save(temp_file_name, width=13, height=10, dpi=80)
Beispiel #15
0
def plot_elos():
    diffs = np.linspace(-1000, +1000)
    rates = 1 / (1 + 10**(-diffs / 400))
    df = pd.DataFrame({'elo': diffs, 'winrate': rates})

    return (pn.ggplot(df) + pn.geom_line(pn.aes(x='elo', y='winrate')) +
            pn.geom_vline(xintercept=0, alpha=.1) +
            pn.geom_hline(yintercept=.5, alpha=.1) +
            pn.labs(x='Own Elo relative to opponent\'s Elo',
                    y='Win rate v. opponent') +
            pn.scale_y_continuous(labels=percent_format()) +
            pn.coord_cartesian(expand=False) + plot.IEEE())
Beispiel #16
0
def estimate_cutoffs_plot(output_file,
                          df_plt,
                          df_cell_estimate_cutoff,
                          df_fit=None,
                          scale_x_log10=False,
                          save_plot=True):
    """Plot UMI counts by sorted cell barcodes."""
    if min(df_plt['umi_counts']) <= 0:
        fix_log_scale = min(df_plt['umi_counts']) + 1
        df_plt['umi_counts'] = df_plt['umi_counts'] + fix_log_scale
    gplt = plt9.ggplot()
    gplt = gplt + plt9.theme_bw()
    if len(df_plt) <= 50000:
        gplt = gplt + plt9.geom_point(mapping=plt9.aes(x='barcode',
                                                       y='umi_counts'),
                                      data=df_plt,
                                      alpha=0.05,
                                      size=0.1)
    else:
        gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='barcode',
                                                      y='umi_counts'),
                                     data=df_plt,
                                     alpha=0.25,
                                     size=0.75,
                                     color='black')
    gplt = gplt + plt9.geom_vline(mapping=plt9.aes(xintercept='n_cells',
                                                   color='method'),
                                  data=df_cell_estimate_cutoff,
                                  alpha=0.75,
                                  linetype='dashdot')
    gplt = gplt + plt9.scale_color_brewer(palette='Dark2', type='qual')
    if scale_x_log10:
        gplt = gplt + plt9.scale_x_continuous(
            trans='log10', labels=comma_labels, minor_breaks=0)
    else:
        gplt = gplt + plt9.scale_x_continuous(labels=comma_labels,
                                              minor_breaks=0)
    gplt = gplt + plt9.scale_y_continuous(
        trans='log10', labels=comma_labels, minor_breaks=0)
    gplt = gplt + plt9.labs(title='',
                            y='UMI counts',
                            x='Barcode index, sorted by UMI count',
                            color='Cutoff')
    # Add the fit of the droplet utils model
    if df_fit:
        gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='x', y='y'),
                                     data=df_fit,
                                     alpha=1,
                                     color='yellow')
    if save_plot:
        gplt.save('{}.png'.format(output_file), dpi=300, width=5, height=4)
    return gplt
def test_annotation_stripes_faceting():
    n = len(df)

    df2 = pd.DataFrame({
        'x': np.hstack([df['x'], df['x']]),
        'y': np.hstack([df['y'], df['y']]),
        'g': list('a' * n + 'b' * n)
    })

    p = (ggplot() + annotation_stripes(fill_range='no') +
         geom_point(df2, aes('factor(x)', 'y')) +
         geom_vline(xintercept=[0.5, 1.5, 2.5, 3.5]) + facet_wrap('g'))
    assert p == 'annotation_stripes_faceting'
Beispiel #18
0
    def plotLatentVsObserved(self,
                             value,
                             *,
                             latent_min=-15,
                             latent_max=10,
                             npoints=200,
                             wt_vline=True):
        """Plot observed enrichment/phenotype as function of latent phenotype.

        Parameters
        ----------
        value : {'enrichment', 'phenotype'}
            Do we plot observed enrichment or observed phenotype?
        latent_min : float
            Smallest value of latent phenotype on plot.
        latent_max : float
            Largest value of latent phenotype on plot.
        npoints : int
            Plot a line fit to this many points.
        wt_vline : bool
            Draw a vertical line at the wildtype latent phenotype.

        Returns
        -------
        plotnine.ggplot.ggplot
            Plot of observed enrichment or phenotype as function of latent
            phenotype.

        """
        latent = numpy.linspace(latent_min, latent_max, npoints)
        observed = self.latentToObserved(latent, value)

        p = (p9.ggplot(
            pd.DataFrame({
                "latent": latent,
                "observed": observed
            }),
            p9.aes("latent", "observed"),
        ) + p9.geom_line() + p9.theme(figure_size=(3.5, 2.5)) +
             p9.xlab("latent phenotype") + p9.ylab(f"observed {value}"))

        if wt_vline:
            p = p + p9.geom_vline(xintercept=self.wt_latent,
                                  color=CBPALETTE[1],
                                  linetype="dashed")

        return p
Beispiel #19
0
def plot_replicate_density(
    df,
    batch,
    plate,
    cutoff,
    percent_strong,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
    return_plot=False,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="similarity_metric", fill="group_replicate"))
        + gg.geom_density(alpha=0.3)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Pearson Correlation")
        + gg.ylab("Density")
        + gg.geom_vline(xintercept=cutoff, color="red", linetype="dashed")
        + gg.ggtitle(
            f"{batch}; Plate: {plate}\n\nPercent Replicating: {np.round(percent_strong * 100, 2)}%"
        )
        + gg.theme_bw()
        + gg.theme(
            title=gg.element_text(size=3.5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=4),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=4),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if output_file_base:
        save_figure(
            density_gg, output_file_base, output_file_extensions, dpi, height, width
        )

    if return_plot:
        return density_gg
Beispiel #20
0
    def plotMutsHistogram(self, value, *,
                          mutant_order=1, bins=30, wt_vline=True):
        """Plot distribution of phenotype for all mutants of a given order.

        Parameters
        ----------
        value : {'latentPhenotype', 'observedPhenotype', 'observedEnrichment'}
            What value to plot.
        mutant_order : int
            Plot mutations of this order. Currently only works for 1
            (single mutants).
        bins : int
            Number of bins in histogram.
        wt_vline : bool
            Draw a vertical line at the wildtype value.

        Returns
        -------
        plotnine.ggplot.ggplot
            Histogram of phenotype for all mutants.

        """
        if mutant_order != 1:
            raise ValueError('only implemented for `mutant_order` of 1')

        if value not in {'latentPhenotype', 'observedPhenotype',
                         'observedEnrichment'}:
            raise ValueError(f"invalid `value` of {value}")
        func = getattr(self, value)

        xlist = [func(m) for m in self.muteffects.keys()]

        p = (p9.ggplot(pd.DataFrame({value: xlist}),
                       p9.aes(value)) +
             p9.geom_histogram(bins=bins) +
             p9.theme(figure_size=(3.5, 2.5)) +
             p9.ylab(f"number of {mutant_order}-mutants")
             )

        if wt_vline:
            p = p + p9.geom_vline(
                        xintercept=func(''),
                        color=CBPALETTE[1],
                        linetype='dashed')

        return p
Beispiel #21
0
def plot_pred_hist(label_list, pred_list, names=None, n_bins=10):
    """
    予測確率のヒストグラムを描く
    :param: label_list: 正解ラベルリストの配列. [(y1, y2, ...), (y1, y2, ...)]  のようにして与える,  pred_list に対応させる
    :param: pred_list: 予測確率リストの配列. label_list と同じ長さにすること
    :param: names=None: モデルの名称. None または同じ長さにすること. 指定しない場合, ラベルの組が 2~3  ならば ['train', 'valid', 'test'] を与える. 3より多い場合は通し番号にする.
    :param: n_bins: ヒストグラムのビン数
    :return: plotnine オブジェクト
    TODO: geom_vline の表示方法
    """
    if names is None:
        if len(label_list) == 2:
            names = ('train', 'test')
        elif len(label_list) == 3:
            names = ('train', 'valid', 'test')
        else:
            names = list(range(len(label_list)))
    else:
        pass
    name_order = {k: v for v, k in enumerate(names)}
    name_order_rev = {str(k): v for v, k in name_order.items()}
    d = pd.DataFrame(
            {col: v for col, v in zip(('y', 'prediction'), [list(chain.from_iterable(x)) for x in ([label_list, pred_list])])}
    ).assign(
        model=list(chain.from_iterable([[name] * len(l) for name, l in zip(names, label_list)]))
    ).melt(
        id_vars='model'
    ).assign(
        order=lambda x: x.model.replace(name_order)
    ).sort_values(['order', 'variable'])
    # 補助線としての平均値を引くためのデータ
    d_mean = d.drop(columns='order').groupby(['variable', 'model']).mean(
            ).reset_index().rename(columns={'value': 'mean'})
    d = d.merge(d_mean, on=['variable', 'model'])
    return ggplot(
            d,
            aes(x='value', y='..density..', group='variable', fill='variable')
    ) + geom_histogram(position='identity', alpha=.5, bins=10
    ) + geom_vline(
            aes(xintercept='mean', group='variable', color='variable',
                linetype='variable')
    ) + labs(x='prediction', fill='frequency', linetype='mean', color='mean'
    ) + facet_wrap(
            '~order', scales='free_y', labeller=lambda x: name_order_rev[x]
    ) + theme_classic() + theme(figure_size=(6, 4))
Beispiel #22
0
def main(argv: List[str]) -> None:
    parser = argparse.ArgumentParser()

    parser.add_argument("roll_rule", type=RollRule, choices=list(RollRule))
    parser.add_argument("--num_iterations", type=int, default=10000)
    parser.add_argument("--seed", type=int, default=None)
    parser.add_argument("--plot_file", default="ability_roll_distribution.png")

    args = parser.parse_args(argv)

    if args.seed is not None:
        random.seed(args.seed)

    # Run the simulation and process the data
    roll_counts = simulate(args.roll_rule, args.num_iterations)
    data = process_data(roll_counts)

    # Calculate statistics
    mean = sum(data["value"] * data["percent"] / 100.0)
    mode = data.iloc[data["count"].idxmax()]["value"]
    stddev = math.sqrt(
        sum(data["percent"] / 100.0 * (data["value"] - mean)**2.0))
    skewness = pearson_first_skewness(mean, mode, stddev)

    # Print out result information
    print(data)
    print()
    print("Mean:", mean)
    print("Mode:", mode)
    print("Standard deviation:", stddev)
    print("Skewness:", skewness)

    # Plot the data
    plot = (plt9.ggplot(data, plt9.aes("value", "percent")) +
            plt9.geom_bar(stat="identity") +
            plt9.geom_vline(xintercept=mean, color="black") +
            plt9.xlim(0, 21) + plt9.ylab("Chance (%)") +
            plt9.xlab("Ability Score") +
            plt9.ggtitle("Ability Score Distribution ({} iterations)".format(
                args.num_iterations)))

    plot.save(args.plot_file, dpi=300)
    print("Wrote plot image to:", args.plot_file)
    def hist_residuals(self, figure_size=(8, 4), sample_frac=1.0):
        """Histogram of residuals

        Parameters
        ----------
        figure_size : tuple(int, int), optional default=(8, 4)
            Plot size (width, height)

        sample_frac : float, optional default=1.0
            Fraction of data points to plot

        Returns
        -------
        plot : ggplot object
        """
        return (ggplot(self.df.sample(frac=sample_frac), aes(x="residual")) +
                geom_histogram(fill="lightblue", colour="grey") +
                geom_vline(xintercept=0, color="red", linetype="dashed") +
                labs(title="Residuals", x="Residuals") +
                theme(figure_size=figure_size))
Beispiel #24
0
def plot_arima(df):
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    p = (
        ggplot(data=df, mapping=aes(x='Timestamp', y=df.columns.values[1])) +
        geom_point(colour='blue', alpha=0.3, na_rm=True) +
        geom_line(colour='blue', na_rm=True) +
        geom_point(mapping=aes(x='Timestamp', y=df.columns.values[2]),
                   colour='red',
                   alpha=0.3,
                   na_rm=True) +
        geom_line(mapping=aes(x='Timestamp', y=df.columns.values[2]),
                  colour='red',
                  na_rm=True) +
        geom_vline(xintercept=max(df[['Timestamp', df.columns.values[1]
                                      ]].dropna(axis=0)['Timestamp']),
                   color='green',
                   linetype='dashed') +
        # geom_line(mapping=aes(x='Timestamp', y='Lower'), colour='green', na_rm=True, alpha=0.3) +
        # geom_line(mapping=aes(x='Timestamp', y='Upper'), colour='green', na_rm=True, alpha=0.3) +
        geom_ribbon(data=df,
                    mapping=aes(ymin='Lower', ymax='Upper'),
                    fill='red',
                    alpha=0.1) +
        scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') +
        xlab('Time') + ylab(df.columns.values[1]) + theme_bw() +
        theme(axis_text_x=element_text(
            angle=45, hjust=1, face='bold', color='black'),
              axis_text_y=element_text(face='bold', colour='black')))

    ggplot.save(p,
                filename=df.columns.values[1] + '_predict.png',
                path=os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                  'png'),
                width=8,
                height=6,
                units='in',
                dpi=326,
                verbose=False)
    return p
def density_plot2(num_matches_per_round: int,
                  match_lengths_from_one_round: list,
                  match_lengths_from_one_round_with_blowouts: list):
    """ Density plot for match lengths, new rules, blowouts vs. no blowouts, 85 matches/round """

    match_lengths_blowout = pd.DataFrame({
        'Match length':
        np.concatenate([
            match_lengths_from_one_round,
            match_lengths_from_one_round_with_blowouts
        ]),
        'Blowouts':
        np.concatenate([
            np.repeat('No', num_matches_per_round),
            np.repeat('Yes', num_matches_per_round)
        ])
    })
    (plt.ggplot(match_lengths_blowout,
                plt.aes(x='Match length', color='Blowouts')) +
     plt.geom_density() +
     plt.geom_vline(xintercept=50, color='black', size=2) + plt.xlim([0, 55]) +
     plt.theme_classic()).save(
         filename='figures/match_length_with_blowout_density_plot.png')
Beispiel #26
0
        np.sqrt(np.diag(event_study_formula.cov_params().loc[lags][lags]))
    ]),
    'mean':
    np.concatenate([
        event_study_formula.params[leads],
        np.array([0]), event_study_formula.params[lags]
    ]),
    'label':
    np.arange(-9, 6)
})

leadslags_plot['lb'] = leadslags_plot['mean'] - leadslags_plot['sd'] * 1.96
leadslags_plot['ub'] = leadslags_plot['mean'] + leadslags_plot['sd'] * 1.96

# This version has a point-range at each
# estimated lead or lag
# comes down to stylistic preference at the
# end of the day!
p.ggplot(leadslags_plot, p.aes(x = 'label', y = 'mean',
             ymin = 'lb',
             ymax = 'ub')) +\
    p.geom_hline(yintercept = 0.035169444, color = "red") +\
    p.geom_pointrange() +\
    p.theme_minimal() +\
    p.xlab("Years before and after castle doctrine expansion") +\
    p.ylab("log(Homicide Rate)") +\
    p.geom_hline(yintercept = 0,
             linetype = "dashed") +\
    p.geom_vline(xintercept = 0,
             linetype = "dashed")
Beispiel #27
0
def rel_plot(sbs, variant, jitter=0.01):
    plotdata = sbs[sbs.variant == variant]
    xcol = "base"
    ycol = "ratio"
    plotdata = plotdata.assign(x=plotdata[xcol], y=plotdata[ycol])
    plotdata = plotdata.assign(sbs_index=plotdata.index.values)
    session_text = (plotdata[["session_index", "base_session_index"]].apply(
        tuple, axis=1).map(lambda tup: f"{tup[0]} vs. {tup[1]}"))
    plotdata = plotdata.assign(session_text=session_text)

    x = np.geomspace(0.02, 1, num=5)
    y = 1 / x
    diag_df = pd.DataFrame({"x": x, "y": y})

    scatterplot = (
        ggplot(plotdata) + geom_jitter(
            aes(x="x", y="y", fill="dataset", color="dataset"),
            width=jitter,
            height=jitter,
            alpha=0.6,
            size=1.0,
        )
        #                 shape=plotdata.dataset.map(lambda x : '.' if x in ['lvis','objectnet'] else 'o'),
        #                 size=plotdata.dataset.map(lambda x : 1. if x in ['lvis','objectnet'] else 2.))
        #  + geom_text(aes(x='base', y='delta', label='category', color='dataset'), va='bottom',
        #              data=plotdata1[plotdata1.ratio < .6],
        #              position=position_jitter(.05, .05), show_legend=False)
        + geom_line(aes(x="x", y="y"), data=diag_df)
        # + geom_text(aes(x='x', y='y', label='session_text'), va='top', data=plotdata[(plotdata.y < .4) | (plotdata.y > 3)])
        + ylab(ycol)
        #               + geom_area(aes(y2=1.1, y=.9), linetype='dashed', alpha=.7)
        + geom_hline(aes(yintercept=1.1), linetype="dashed", alpha=0.7) +
        geom_hline(aes(yintercept=0.9), linetype="dashed", alpha=0.7) +
        geom_vline(
            aes(xintercept=0.1, ),
            linetype="dashed",
            alpha=0.7,
        ) + geom_vline(
            aes(xintercept=0.3, ),
            linetype="dashed",
            alpha=0.7,
        )
        # + geom_abline()
        #    + geom_point(aes(x='recall', y='precision', color='variant'), size=1.)
        #     + facet_wrap(facets=['cat'], ncol=6, scales='free_x')
        + xlab(xcol)
        # +scale_color_discrete()
        + theme(
            figure_size=(8, 5),
            legend_position="top",
            subplots_adjust={"hspace": 0.5},
            legend_title=element_blank(),
            legend_box_margin=-1,
            legend_margin=0.0,
            axis_text=element_text(size=12, margin={
                "t": 0.2,
                "l": -0.3
            }),
            legend_text=element_text(size=11),
            axis_title=element_text(size=12,
                                    margin={
                                        "r": -0.2,
                                        "b": 0.0,
                                        "l": 0,
                                        "t": 0.0
                                    }),
        ) + scale_x_log10(labels=make_labeler(brief_format),
                          breaks=[0.01, 0.1, 0.3, 1.0]) +
        scale_y_log10(labels=make_labeler(brief_format),
                      breaks=[0.5, 0.9, 1.1, 2.0, 3.0, 6, 12]))

    return scatterplot
Beispiel #28
0
def scatter_cell_cycle(
    adata,
    scores=["signatures", "components"][0],
    size=1.5,
    alpha=1,
    curvature_shrink=1,
    lab_ypos=2,
):
    """Plots cell cycle signatures vs pseudotime

    Parameters
    ----------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.cell_cycle_phase`.
    scores: str
        A string indicating what to plot as cell cycle scores against pseudotime.
        If 'signatures', standard S-phase, G2-M and Histones signatures are used;
        if 'components', the 4 cell cycle related components are used.
    size: float
        Controls the point size of the plot.
    alpha: float
        A value between 0 and 1. Controls point transparency.
    lab_ypos: float
        Controls the y-axis position of the cell cycle phase annotation.

    Returns
    --------------
    A plotnine scatter plot of pseudotime vs 3 cell cycle signatures.

    """
    if scores == "signatures":
        y = ["S-phase", "G2-M", "Histones"]
        colors = ["#66c2a5", "#fc8d62", "#8da0cb", "black"]
    elif scores == "components":
        _add_compScores(adata)
        y = ["G1/S comp", "G2/M+ comp", "G2/M- comp", "Histones comp"]
        colors = ["#66c2a5", "#fc8d62", "#8da0cb", "#e5c494", "black"]

    time_scatter = scatter_pseudotime(
        adata, y=y, size=size, alpha=alpha) + labs(
            x="Pseudotime", y="Signature scores", color="Signature")

    # -- Add cell cycle annotations
    if "cell_cycle_division" in adata.uns["scycle"]:
        cc_divs = adata.uns["scycle"]["cell_cycle_division"]

        # -- Curvature data
        curv_data = cc_divs["curvature"]
        curv = curv_data["curvature"].values
        cvz = zscore(curv) / curvature_shrink
        cvz = cvz - np.max(cvz)
        curv_data.loc[:, "curvature"] = cvz
        curv_data.loc[:, "signature"] = "Curvature"

        # -- Peak data (for segments)
        gr_min = np.min(curv_data["curvature"])
        pk_data = curv_data[curv_data["ispeak"] == "peak"]
        pk_data.loc[:, "ymin"] = gr_min

        # -- Cell cycle annotation
        cc_phase = pd.DataFrame(
            dict(
                starts=[
                    None,
                    cc_divs["s_start"],
                    cc_divs["g2_start"],
                    cc_divs["m_start"],
                ],
                labels=["G1", "S", "G2", "M"],
                labpos=[
                    np.mean([0, cc_divs["s_start"]]),
                    np.mean([cc_divs["s_start"], cc_divs["g2_start"]]),
                    np.mean([cc_divs["g2_start"], cc_divs["m_start"]]),
                    np.mean([cc_divs["m_start"], 1]),
                ],
                y=lab_ypos,
            ))

        cell_cycle_plt = (
            time_scatter +
            geom_point(aes("pseudotime", "curvature", color="signature"),
                       data=curv_data) +
            geom_line(aes("pseudotime", "curvature"), data=curv_data) +
            scale_color_manual(values=colors) + geom_segment(
                aes(x="pseudotime",
                    xend="pseudotime",
                    y="ymin",
                    yend="curvature"),
                linetype="dotted",
                data=pk_data,
            ) + geom_vline(
                aes(xintercept="starts"), linetype="dashed", data=cc_phase) +
            geom_text(aes(x="labpos", y="y", label="labels"), data=cc_phase))

        return cell_cycle_plt
    else:
        return time_scatter
Beispiel #29
0
def test_aes_inheritance():
    with pytest.raises(PlotnineError):
        p = (ggplot(df, aes('x', 'y', xintercept='xintercept')) +
             geom_point() +
             geom_vline(size=2))
        p.draw_test()
def main():
    mpl.rc('mathtext', fontset='cm')

    warnings.filterwarnings('ignore',
                            r'(geom|position)_\w+ ?: Removed \d+ rows')
    warnings.filterwarnings('ignore', r'Saving .+ x .+ in image')
    warnings.filterwarnings('ignore', r'Filename: .+\.png')

    df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_f')
              + titles('P_f(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pf_Ob_Ol')

    df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_f')
              + titles('P_f(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pf_Ob_σ')

    df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_q')
              + titles('P_q(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pq_Ob_Ol')

    df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_q')
              + titles('P_q(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pq_Ob_σ')

    df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'O_l', 'Opr')
              + titles("O'(O_b, O_l)")
              + limits((1, 10), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'Opr_Ob_Ol')

    df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'σ', 'Opr')
              + titles("O'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Opr_Ob_σ')

    df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)})
            .assign(Pf=lambda x: Opr_Pf(x.Opr)))
    save_both(my_plot(df, 'Opr', 'Pf')
              + titles("P_f(O')")
              + labs("O'", 'P_f')
              + limits((1, 20), (0, 1),
                       xbreaks=np.linspace(2, 20, 10),
                       ybreaks=np.linspace(0, 1, 11))
              + gg.geom_line()
              + gg.geom_hline(yintercept=C, linetype='dashed', color='grey')
              , 'Pf_Opr')

    df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'σpr')
              + titles("σ'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'σpr_Ob_σ')

    df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)})
            .assign(Pq=lambda x: σpr_Pq(x.σpr)))
    save_both(my_plot(df, 'σpr', 'Pq')
              + titles("P_q(σ')")
              + labs("σ'", 'P_q')
              + limits((0, 20), (-1, 0),
                       xbreaks=np.linspace(0, 20, 11),
                       ybreaks=np.linspace(-1, 0, 11))
              + gg.geom_line()
              , 'Pq_σpr')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_free')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_free')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_qual')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_qual')

    df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f'))
    df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q'))
    df = pd.concat((df_Pf, df_Pq), ignore_index=True)
    df.drop_duplicates('O_b', inplace=True)

    Opr = df_Pf.query('σ==0').O_b[0]
    σpr = df_Pq.query('O_b==1').σ[0]

    labels = pd.DataFrame({
        'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3],
        'label': ["$O'$", "$σ'$", mathrm('More profit')]
    })
    lab_aes = gg.aes('x', 'y', label='label')

    save_both(
        gg.ggplot(df, gg.aes(x='O_b', y='σ'))
        + gg.geom_area(gg.aes(fill='profit'), alpha=0.3)
        + gg.geom_vline(xintercept=Opr, linetype='dashed')
        + gg.geom_hline(yintercept=σpr, linetype='dashed')

        # text alignment can't be specified in an aes
        + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top')
        + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom')
        + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom')

        + gg.scale_fill_discrete(name=mathrm('Bet type'),
                                 labels=[mathrm('Free'), mathrm('Qualifying')])
        + limits((1, 10), (0, 5))
        + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'),
                                     mathrm('more profitable'),
                                     mathrm('space')))
        + labs('O_b', 'σ')
        , 'Px_shapes')
Beispiel #31
0
def pseudotime_scatter(adata, y, facet = True, size = 1.5, alpha = 1,
                       color = 'black', ncol = 2, lab_ypos = 2):
    """Plots a scatter plot of pseudotime vs one or multiple variables

    Parameters
    --------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.pseudotime`.
    y: str or list
        If type(y) == str, y must be a variable annotated in adata.obs and
        will be used as the y-axis. If type(y) == list, then multiple variables
        will be plotted using a shared y-axis but different point colors.
    facet: bool
        Whether to return a facetted plot or all signatures in a single plot.
        Only used if y is a list.
    size: float
        Controls the point size of the plot.
    alpha: float
        A value between 0 and 1. Controls point transparency.
    color: str
        A supported color name. Controls the point color if type(y)==str.
        Ignored otherwise.
    ncol: int
        Number of columns in the facetting, if facet=True. Ignored otherwise.
    lab_ypos: float
        Controls the y-axis position of the cell cycle phase annotation, if present.

    Returns
    -------------
    A plotnine scatter plot of pseudotime.
    """
    if type(y) == str:
        #-- Get data
        if y in adata.obs.columns:
            plot_df = pd.DataFrame({'x': adata.obs['pseudotime'], 'y': adata.obs[y]})
        elif y in adata.var_names:
            plot_df = pd.DataFrame({'x': adata.obs['pseudotime'], 'y': adata[:,y].X.flatten()})
        else:
            raise Exception('`y` variable not found')

        #-- Make plot
        if color in adata.obs.columns:
            time_scatter = (ggplot(plot_df, aes(x = 'x', y = 'y'))
              + geom_point(aes(color = color), size = size, alpha = alpha)
              + labs(x = 'Pseudotime', y = y)
              + theme_std)
        else:
            time_scatter = (ggplot(plot_df, aes(x = 'x', y = 'y'))
              + geom_point(size = size, alpha = alpha, color = color)
              + labs(x = 'Pseudotime', y = y)
              + theme_std)

    else:
        #-- Make multiple color plot
        sannot = pd.DataFrame({'pseudotime': adata.obs['pseudotime']})
        sannot['id'] = range(sannot.shape[0])
        #-- Checks
        check1 = [var in adata.var_names for var in y]
        check2 = [var in adata.obs.columns.values for var in y]
        idx = np.array(check1) | np.array(check2)
        y_arr = np.array(y)
        if not np.any(idx):
            raise Exception('No variables in `y` found.')
        if not np.all(idx):
            warnings.warn('Variable not found! Dropping: ' + ', '.join((y_arr[~idx])))
            y = y_arr[idx]
        #-- Get y from obs or matrix:
        for var in y:
            if var in adata.obs.columns:
                sannot[var] = adata.obs[var]
            elif var in adata.var_names:
                sannot[var] = adata[:,var].X.flatten()
        plot_df = pd.melt(sannot, id_vars = ['id', 'pseudotime'],
                          var_name = 'signature', value_name = 'score')
        plot_df['signature'] = plot_df['signature'].astype('category')
        plot_df['signature'].cat.reorder_categories(y, inplace=True)

        if facet:
            time_scatter = (ggplot(plot_df, aes('pseudotime', 'score'))
             + facet_wrap('signature', scales = 'free_y', ncol = ncol)
             + geom_point(aes(color = 'signature'), alpha = alpha, size = size)
             + theme_std)
        else:
            time_scatter = (ggplot(plot_df, aes('pseudotime', 'score'))
             + geom_point(aes(color = 'signature'), alpha = alpha, size = size)
             + theme_std)

    if "cell_cycle_division" in adata.uns["scycle"]:
        cc_divs = adata.uns["scycle"]["cell_cycle_division"]
        # -- Cell cycle annotation
        cc_phase = pd.DataFrame(
            dict(
                starts=[
                    None,
                    cc_divs["pr_start"],
                    cc_divs["rep_start"],
                    # cc_divs["m_start"],
                ],
                labels=["G1 PM", "G1 PR", "S/G2/M"],
                labpos=[
                    np.mean([0, cc_divs["pr_start"]]),
                    np.mean([cc_divs["pr_start"], cc_divs["rep_start"]]),
                    np.mean([cc_divs["rep_start"], 1]),
                    # np.mean([cc_divs["m_start"], 1]),
                ],
                y=lab_ypos,
            )
        )
        time_scatter = (time_scatter
        + geom_vline(aes(xintercept="starts"), linetype="dashed", data=cc_phase)
        + geom_text(aes(x="labpos", y="y", label="labels"), data=cc_phase))

    return time_scatter
Beispiel #32
0
def ggpca(x,
          y=None,
          center='col',
          scale='none',
          rlab=False,
          clab=False,
          cshow=None,
          rsize=4,
          csize=2,
          lsize=10,
          lnudge=0.03,
          ralpha=0.6,
          calpha=1.0,
          clightalpha=0,
          rname='sample',
          cname='variable',
          lname='',
          grid=True,
          printit=False,
          xsvd=None,
          invert1=False,
          invert2=False,
          colscale=None,
          **kwargs):
    if cshow is None:
        cshow = x.shape[1]
    if rlab is not None and isinstance(rlab, bool):
        rlab = x.index if rlab else ''
    if clab is not None and isinstance(clab, bool):
        clab = x.columns if clab else ''
    if y is not None:
        pass
    x = x.loc[:, x.isnull().sum(axis=0) == 0]
    if xsvd is None:
        xsvd = svdForPca(x, center, scale)
    rsf = np.max(xsvd[0].iloc[:, 0]) - np.min(xsvd[0].iloc[:, 0])
    csf = np.max(xsvd[2].iloc[0, :]) - np.min(xsvd[2].iloc[0, :])
    sizeRange = sorted([csize, rsize])
    alphaRange = sorted([calpha, ralpha])
    ggd = pd.DataFrame({
        'PC1': xsvd[0].iloc[:, 0] / rsf,
        'PC2': xsvd[0].iloc[:, 1] / rsf,
        'label': rlab,
        'size': rsize,
        'alpha': ralpha
    })
    cclass = []
    if cshow > 0:
        cdata = pd.DataFrame({
            'PC1': xsvd[2].iloc[0, :] / csf,
            'PC2': xsvd[2].iloc[1, :] / csf,
            'label': clab,
            'size': csize,
            'alpha': calpha
        })
        if cshow < x.shape[1]:
            cscores = cdata['PC1']**2 + cdata['PC2']**2
            keep = cscores.sort_values(ascending=False).head(cshow).index
            if clightalpha > 0:
                cdata.loc[~cdata.index.isin(keep), 'label'] = ''
                cdata.loc[~cdata.index.isin(keep), 'alpha'] = clightalpha
                alphaRange = [
                    np.min([alphaRange[0], clightalpha]),
                    np.max([alphaRange[1], clightalpha])
                ]
            else:
                cdata = cdata.loc[cdata.index.isin(keep)]
        ggd = pd.concat([cdata, ggd])
        cclass = [cname] * cdata.shape[0]
    if invert1:
        ggd['PC1'] = -ggd['PC1']
    if invert2:
        ggd['PC2'] = -ggd['PC2']
    if y is not None:
        ggd['class'] = cclass + list(y.loc[x.index])
    else:
        ggd['class'] = cclass + ([rname] * x.shape[0])
    ggo = gg.ggplot(
        ggd,
        gg.aes(x='PC1',
               y='PC2',
               color='class',
               size='size',
               alpha='alpha',
               label='label'))
    ggo += gg.geom_hline(yintercept=0, color='lightgray')
    ggo += gg.geom_vline(xintercept=0, color='lightgray')
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.geom_text(nudge_y=lnudge, size=lsize, show_legend=False)
    if colscale is None and len(ggd['class'].unique()) < 8:
        colscale = [
            'darkslategray', 'goldenrod', 'lightseagreen', 'orangered',
            'dodgerblue', 'darkorchid'
        ]
        colscale = colscale[0:(len(ggd['class'].unique()) - 1)] + ['gray']
        if len(colscale) == 2 and cshow > 0:
            colscale = ['black', 'darkgray']
        if len(colscale) == 2 and cshow == 0:
            colscale = ['black', 'red']
        if len(colscale) == 3:
            colscale = ['black', 'red', 'darkgray']
    ggo += gg.scale_color_manual(values=colscale, name=lname)
    ggo += gg.scale_size_continuous(guide=False, range=sizeRange)
    ggo += gg.scale_alpha_continuous(guide=False, range=alphaRange)
    ggo += gg.xlab('PC1 (' +
                   str(np.round(100 * xsvd[1][0]**2 /
                                ((xsvd[1]**2).sum()), 1)) +
                   '% explained var.)')
    ggo += gg.ylab('PC2 (' +
                   str(np.round(100 * xsvd[1][1]**2 /
                                ((xsvd[1]**2).sum()), 1)) +
                   '% explained var.)')
    if not grid:
        ggo += gg.theme(panel_grid_minor=gg.element_blank(),
                        panel_grid_major=gg.element_blank(),
                        panel_background=gg.element_blank())
    ggo += gg.theme(axis_ticks=gg.element_blank(),
                    axis_text_x=gg.element_blank(),
                    axis_text_y=gg.element_blank())
    if printit:
        print(ggo)
    return ggo
Beispiel #33
0
    def plotMutsHistogram(self,
                          value, *,
                          k=None,
                          mutant_order=1,
                          bins=30,
                          wt_vline=True,
                          ):
        """Plot distribution of phenotype for all mutants of a given order.

        Parameters
        ----------
        value : {'latentPhenotype', 'observedPhenotype', 'observedEnrichment'}
            What value to plot.
        k : int or None
            If value is `latentPhenotype, which phenotype (1 <= `k` <=
            :attr:`MultiLatentSigmoidPhenotypeSimulator.n_latent_phenotypes`)
            to plot.
        mutant_order : int
            Plot mutations of this order. Currently only works for 1
            (single mutants).
        bins : int
            Number of bins in histogram.
        wt_vline : bool
            Draw a vertical line at the wildtype value.

        Returns
        -------
        plotnine.ggplot.ggplot
            Histogram of phenotype for all mutants.

        """
        if mutant_order != 1:
            raise ValueError('only implemented for `mutant_order` of 1')

        if value == 'latentPhenotype':
            if isinstance(k, int) and 1 <= k <= self.n_latent_phenotypes:
                kwargs = {'k': k}
                xlabel = f"latentPhenotype {k}"
            else:
                raise ValueError(f"invalid `k` of {k}")
        else:
            kwargs = {}
            xlabel = value

        if value not in {'latentPhenotype', 'observedPhenotype',
                         'observedEnrichment'}:
            raise ValueError(f"invalid `value` of {value}")
        func = getattr(self, value)

        xlist = [func(m, **kwargs) for m in self._all_subs]

        p = (p9.ggplot(pd.DataFrame({value: xlist}),
                       p9.aes(value)) +
             p9.geom_histogram(bins=bins) +
             p9.theme(figure_size=(3.5, 2.5)) +
             p9.ylab(f"number of {mutant_order}-mutants") +
             p9.xlab(xlabel)
             )

        if wt_vline:
            p = p + p9.geom_vline(
                        xintercept=func('', **kwargs),
                        color=CBPALETTE[1],
                        linetype='dashed')

        return p