コード例 #1
0
def cum_regret_plot(experiment_name, data_path=_DEFAULT_DATA_PATH):
    """Simple plot of average instantaneous regret by agent, per timestep.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    https://web.stanford.edu/~bvr/pubs/TS_Tutorial.pdf
  """
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['t', 'agent']).agg({
        'cum_regret': [np.mean, lower_interval, upper_interval]
    }).reset_index())
    plt_df.columns = ['_'.join(i) for i in plt_df.columns.values]
    p = (gg.ggplot(plt_df) + gg.aes('t_', 'cum_regret_mean', colour='agent_') +
         gg.geom_line(size=1.25, alpha=0.75) +
         gg.geom_ribbon(gg.aes(ymin='cum_regret_lower_interval',
                               ymax='cum_regret_upper_interval',
                               fill='agent_'),
                        alpha=0.1) + gg.xlab('time period (t)') +
         gg.ylab('cumulative regret') +
         gg.scale_colour_brewer(name='agent_', type='qual', palette='Set1'))

    plot_dict = {experiment_name + '_cum_regret': p}
    return plot_dict
コード例 #2
0
ファイル: plot.py プロジェクト: yongduek/ts_tutorial
def misspecified_plot(experiment_name='finite_misspecified',
                      data_path=_DEFAULT_DATA_PATH):
"""Specialized plotting script for TS tutorial paper misspecified TS."""
  df = load_data(experiment_name, data_path)

  def _parse_np_array(np_string):
    return np.array(np_string.replace('[', '')
                    .replace(']', '')
                    .strip()
                    .split())
  df['posterior_mean'] = df.posterior_mean.apply(_parse_np_array)

  # Action means
  new_col_list = ['mean_0', 'mean_1', 'mean_2']
  for n, col in enumerate(new_col_list):
    df[col] = df['posterior_mean'].apply(lambda x: float(x[n]))

  plt_df = (df.groupby(['agent', 't'])
            .agg({'instant_regret': np.mean,
                  'mean_0': np.mean,
                  'mean_1': np.mean,
                  'mean_2': np.mean})
            .reset_index())

  regret_plot = (gg.ggplot(plt_df)
                 + gg.aes('t', 'instant_regret', colour='agent')
                 + gg.geom_line(size=1.25, alpha=0.75)
                 + gg.xlab('Timestep (t)')
                 + gg.ylab('Average instantaneous regret')
                 + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1')
                 + gg.coord_cartesian(ylim=(0, 0.02)))

  melt_df = pd.melt(plt_df, id_vars=['agent', 't'], value_vars=new_col_list)
  melt_df['group_id'] = melt_df.agent + melt_df.variable
  action_plot = (gg.ggplot(melt_df)
                 + gg.aes('t', 'value', colour='agent', group='group_id')
                 + gg.geom_line(size=1.25, alpha=0.75)
                 + gg.coord_cartesian(ylim=(0, 0.05))
                 + gg.xlab('Timestep (t)')
                 + gg.ylab('Expected mean reward')
                 + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1'))

  plot_dict = {}
  plot_dict['misspecified_regret'] = regret_plot
  plot_dict['misspecified_action'] = action_plot
  return plot_dict
コード例 #3
0
ファイル: plot.py プロジェクト: zheolong/ts_tutorial
def plot_action_proportion(df_agent):
    """Plot the action proportion for the sub-dataframe for a single agent."""
    n_action = np.max(df_agent.action) + 1
    plt_data = []
    for i in range(n_action):
        probs = (df_agent.groupby('t').agg({
            'action': lambda x: np.mean(x == i)
        }).rename(columns={'action': 'action_' + str(i)}))
        plt_data.append(probs)
    plt_df = pd.concat(plt_data, axis=1).reset_index()
    p = (gg.ggplot(pd.melt(plt_df, id_vars='t')) +
         gg.aes('t', 'value', colour='variable', group='variable') +
         gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') +
         gg.ylab('Action probability') + gg.ylim(0, 1) +
         gg.scale_colour_brewer(name='Variable', type='qual', palette='Set1'))
    return p
コード例 #4
0
def plot_qq(df, color_var, facet_var=None, title=''):
    """
    Inspired by https://www.cureffi.org/2012/08/15/qq-plots-with-matplotlib/
    """
    # retrive pmin, the most significant (i.e. min) p value (for defining
    # the axes)
    axis_max = max(df['pval_neglog10'])

    if facet_var is None:
        pvals = df.groupby(
            by=color_var).apply(calculate_expected_pval).reset_index(
                level=color_var, drop=True)
    else:
        pvals = df.groupby(by=[color_var, facet_var]).apply(
            calculate_expected_pval).reset_index(level=[color_var, facet_var],
                                                 drop=True)

    # now plot these two arrays against each other
    n_colors = pvals[color_var].nunique()
    qqplot = plt9.ggplot(
        pvals,
        plt9.aes(x='expected_pval_neglog10',
                 y='pval_neglog10',
                 color=color_var))
    qqplot = qqplot + plt9.geom_point(size=0.1, alpha=0.25)
    qqplot = qqplot + plt9.geom_abline(
        slope=1, intercept=0, color='black', linetype='dashed')
    qqplot = qqplot + plt9.theme_bw()
    if n_colors < 9:
        qqplot = qqplot + plt9.scale_colour_brewer(palette='Dark2',
                                                   type='qual')
    qqplot = qqplot + plt9.labs(x='Expected (-log10 p-value)',
                                y='Observed (-log10 p-value)',
                                title=title,
                                color='')
    qqplot = qqplot + plt9.lims(x=(0, axis_max), y=(0, axis_max))
    if facet_var is not None:
        qqplot = qqplot + plt9.facet_wrap('~ {}'.format(facet_var), ncol=5)
    qqplot = qqplot + plt9.theme(strip_text=plt9.element_text(size=5),
                                 axis_text_x=plt9.element_text(angle=-45,
                                                               hjust=0))
    # set guide legend alpha to 1
    qqplot = qqplot + plt9.guides(color=plt9.guide_legend(override_aes={
        'size': 2.0,
        'alpha': 1.0
    }))
    return (qqplot)
コード例 #5
0
ファイル: plot.py プロジェクト: zheolong/ts_tutorial
def simple_algorithm_plot(experiment_name, data_path=_DEFAULT_DATA_PATH):
    """Simple plot of average instantaneous regret by agent, per timestep.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    plot_dict: {experiment_name: ggplot plot}
  """
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['t', 'agent']).agg({
        'instant_regret': np.mean
    }).reset_index())
    p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') +
         gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') +
         gg.ylab('Average instantaneous regret') +
         gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1'))
    return {experiment_name: p}
コード例 #6
0
ファイル: plot.py プロジェクト: zuzannna/ts_tutorial
def simple_algorithm_plot(experiment_name, data_path=_DEFAULT_DATA_PATH):
    """Simple plot of average instantaneous regret by agent, per timestep.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    https://web.stanford.edu/~bvr/pubs/TS_Tutorial.pdf
  """
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['t', 'agent']).agg({
        'instant_regret': np.mean
    }).reset_index())
    p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') +
         gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('time period (t)') +
         gg.ylab('per-period regret') +
         gg.scale_colour_brewer(name='agent', type='qual', palette='Set1'))

    plot_dict = {experiment_name + '_simple': p}
    return plot_dict
コード例 #7
0
ファイル: plot.py プロジェクト: zheolong/ts_tutorial
def cumulative_travel_time_plot(experiment_name, data_path=_DEFAULT_DATA_PATH):
    """Plot cumulative ratio total travel time relative to optimal shortest path.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    plot_dict: {experiment_name: ggplot plot}
  """
    df = load_data(experiment_name, data_path)
    df['cum_ratio'] = (df.cum_optimal - df.cum_regret) / df.cum_optimal
    plt_df = (df.groupby(['t', 'agent']).agg({
        'cum_ratio': np.mean
    }).reset_index())
    p = (gg.ggplot(plt_df) + gg.aes('t', 'cum_ratio', colour='agent') +
         gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') +
         gg.ylab('Total distance / optimal') +
         gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1') +
         gg.aes(ymin=1) +
         gg.geom_hline(yintercept=1, linetype='dashed', size=2, alpha=0.5))
    return {experiment_name + '_cum': p}
コード例 #8
0
def main():
    """Run CLI."""
    parser = argparse.ArgumentParser(description="""
            Read anndata object. Read chrX and chrY genes, plot scatterplot
            of mean expression of those signature across samples. Anndata
            should have experiment_id and sex columns.
            """)

    parser.add_argument('-h5',
                        '--h5_anndata',
                        dest='h5',
                        required=True,
                        help='H5 AnnData file.')

    parser.add_argument(
        '-Y',
        '--chrY_genes',
        default='',
        dest='Y',
        required=False,
        help='TSV file of Y genes. If none, uses all genes on Y chr.')

    parser.add_argument('-X',
                        '--chrX_genes',
                        default='',
                        dest='X',
                        required=False,
                        help='TSV file of X genes. If none, uses XIST.')

    parser.add_argument('-o',
                        '--output_file',
                        default='scatterplot-sex_sample_swap_check',
                        dest='o',
                        help='Basename for output files.')

    options = parser.parse_args()

    # Load the AnnData file
    adata = sc.read_h5ad(filename=options.h5)

    # If we have a flag for cells that pass QC then filter down to them
    if 'cell_passes_qc' in adata.obs:
        adata = adata[adata.obs['cell_passes_qc'], :]
        del adata.obs['cell_passes_qc']

    # Read Chr X and Chr Y genes
    if options.X != '':
        X = pd.read_csv(options.X, sep="\t")
        X = X['ensembl_gene_id']
        X_lab = "Mean X chr gene expression (counts)"
    else:
        X = ['ENSG00000229807']
        # X = ['XIST']
        X_lab = "Mean XIST gene expression (counts)"
    if options.Y != '':
        Y = pd.read_csv(options.Y, sep="\t")
        Y = Y['ensembl_gene_id']
    else:
        Y = [
            "ENSG00000184895", "ENSG00000129824", "ENSG00000067646",
            "ENSG00000176679", "ENSG00000099715", "ENSG00000168757",
            "ENSG00000099721", "ENSG00000092377", "ENSG00000099725",
            "ENSG00000233803", "ENSG00000229549", "ENSG00000228927",
            "ENSG00000258992", "ENSG00000238074", "ENSG00000236424",
            "ENSG00000114374", "ENSG00000067048", "ENSG00000183878",
            "ENSG00000154620", "ENSG00000129864", "ENSG00000129862",
            "ENSG00000165246", "ENSG00000129873", "ENSG00000182415",
            "ENSG00000172468", "ENSG00000169953", "ENSG00000286265",
            "ENSG00000012817", "ENSG00000198692", "ENSG00000280969",
            "ENSG00000242875", "ENSG00000234414", "ENSG00000244395",
            "ENSG00000242389", "ENSG00000169807", "ENSG00000169800",
            "ENSG00000226941", "ENSG00000169789", "ENSG00000183753",
            "ENSG00000188120", "ENSG00000205944", "ENSG00000169763",
            "ENSG00000172352", "ENSG00000183795", "ENSG00000187191",
            "ENSG00000205916", "ENSG00000185894", "ENSG00000172288"
        ]
        # Same as above, but hugo names.
        # Y = [
        #     "SRY",
        #     "RPS4Y1",
        #     "ZFY",
        #     "TGIF2LY",
        #     "PCDH11Y",
        #     "TSPY2",
        #     "AMELY",
        #     "TBL1Y",
        #     "PRKY",
        #     "TSPY4",
        #     "TSPY8",
        #     "TSPY3",
        #     "TSPY1",
        #     "TSPY9P",
        #     "TSPY10",
        #     "USP9Y",
        #     "DDX3Y",
        #     "UTY",
        #     "TMSB4Y",
        #     "VCY",
        #     "VCY1B",
        #     "NLGN4Y",
        #     "CDY2B",
        #     "CDY2A",
        #     "HSFY1",
        #     "HSFY2",
        #     "AC007244.1",
        #     "KDM5D",
        #     "EIF1AY",
        #     "RPS4Y2",
        #     "RBMY1B",
        #     "RBMY1A1",
        #     "RBMY1D",
        #     "RBMY1E",
        #     "PRY2",
        #     "RBMY1F",
        #     "RBMY1J",
        #     "PRY",
        #     "BPY2",
        #     "DAZ1",
        #     "DAZ2",
        #     "PRYP3",
        #     "CDY1B",
        #     "BPY2B",
        #     "DAZ3",
        #     "DAZ4",
        #     "BPY2C",
        #     "CDY1"
        # ]

    # Make the plot
    adata.var['X_chr-gene'] = np.in1d(adata.var.index, X)
    adata.var['Y_chr-gene'] = np.in1d(adata.var.index, Y)
    adata.obs['X_chr-sum'] = adata[:, adata.var['X_chr-gene']].X.todense().sum(
        axis=1)
    adata.obs['Y_chr-sum'] = adata[:, adata.var['Y_chr-gene']].X.todense().sum(
        axis=1)
    if 'sex' not in adata.obs.columns:
        adata.obs['sex'] = 'not reported'
    df = adata.obs[['experiment_id', 'sex', 'Y_chr-sum', 'X_chr-sum']]
    df = df.groupby(['experiment_id', 'sex']).mean().dropna().reset_index()

    # Save scatterplot with mean expression per sample
    plt = plt9.ggplot(df) + plt9.aes(x='X_chr-sum', y='Y_chr-sum', color='sex')
    plt = plt + plt9.theme_bw()
    plt = plt + plt9.scale_colour_brewer(type='qual', palette='Dark2')
    plt = plt + plt9.geom_point(alpha=0.45)
    plt = plt + plt9.ylab("Mean Y chr gene expression (counts)")
    plt = plt + plt9.xlab(X_lab)
    plt.save('{}.png'.format(options.o), dpi=300, width=4, height=4)
コード例 #9
0
ファイル: plot.py プロジェクト: zuzannna/ts_tutorial
def concurrent_agents_plot(experiment_name='graph_indep_concurrent',
                           data_path=_DEFAULT_DATA_PATH,
                           paper_version=True):
    '''Passing paper_version=True should be used to reproduce Fig. 14 of the paper 
  for K = 1,10,20,50,100. In this case, the labels in the legend are manually 
  ordered by the values of K. Otherwise, the labels are ordered alphabetically.'''

    df = load_data(data_path, experiment_name)

    plt_df_per_action = (df.groupby(['agent', 't', 'agent_id',
                                     'action_id']).agg({
                                         'instant_regret':
                                         np.mean
                                     }).reset_index())

    plt_df_per_period = (df.groupby(['agent', 't']).agg({
        'instant_regret':
        np.mean
    }).reset_index())

    if not paper_version:
        p_per_action = (
            gg.ggplot(plt_df_per_action) +
            gg.aes('action_id', 'instant_regret', colour='agent') +
            gg.geom_line() + gg.geom_line(size=1.25, alpha=0.75) +
            gg.xlim(0, 2.5 * len(plt_df_per_period.groupby('t'))) +
            gg.scale_colour_brewer(name='agent', type='qual', palette='Set1') +
            gg.labels.xlab('number of actions') +
            gg.labels.ylab('per-period regret'))

        p_per_period = (
            gg.ggplot(plt_df_per_period) +
            gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line() +
            gg.geom_line(size=1.25, alpha=0.75) +
            gg.scale_colour_brewer(name='agent', type='qual', palette='Set1') +
            gg.labels.xlab('time period (t)') +
            gg.labels.ylab('per-period regret'))
    else:
        plt_df_per_action['agent_id'] = plt_df_per_action.agent.apply(
            get_agent_id)
        plt_df_per_period['agent_id'] = plt_df_per_period.agent.apply(
            get_agent_id)

        custom_labels = ['K = 1', 'K = 10', 'K = 20', 'K = 50', 'K = 100']
        custom_colors = ["#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00"]

        p_per_action = (
            gg.ggplot(plt_df_per_action) +
            gg.aes('action_id', 'instant_regret', colour='agent_id') +
            gg.geom_line() + gg.geom_line(size=1.25, alpha=0.75) +
            gg.xlim(0, 2.5 * len(plt_df_per_period.groupby('t'))) +
            gg.scale_color_manual(
                name='agent', labels=custom_labels, values=custom_colors) +
            gg.labels.xlab('number of actions') +
            gg.labels.ylab('per-action regret'))

        p_per_period = (
            gg.ggplot(plt_df_per_period) +
            gg.aes('t', 'instant_regret', colour='agent_id') + gg.geom_line() +
            gg.geom_line(size=1.25, alpha=0.75) + gg.scale_color_manual(
                name='agent', labels=custom_labels, values=custom_colors) +
            gg.labels.xlab('time period (t)') +
            gg.labels.ylab('per-period regret'))

    plot_dict = {}
    plot_dict['per_action_plot'] = p_per_action
    plot_dict['per_period_plot'] = p_per_period
    return plot_dict
コード例 #10
0
def plot_ecdf(df_plot,
              variable_column,
              color_column='none',
              output_file='plot_distribution',
              facet_column='none',
              x_log10=False):
    """Plot plot_distribution to png.

    Parameters
    ----------
    df_plot : pandas.DataFrame
        DataFrame with <variable_column> as a column.
    variable_column : string
        String of variable_column column to plot.
    color_column : string
        String of color column to plot.
    output_file : string
        Basename of output file.
    facet_column : string
        Column to facet the plot by.

    Returns
    -------
    NULL
    """
    n_colors = 0
    if color_column != 'none':
        gplt = plt9.ggplot(df_plot,
                           plt9.aes(x=variable_column, color=color_column))
        n_colors = df_plot[color_column].nunique()
    else:
        gplt = plt9.ggplot(df_plot, plt9.aes(x=variable_column))
    gplt = gplt + plt9.theme_bw()
    gplt = gplt + plt9.stat_ecdf(alpha=0.8)
    if x_log10:
        gplt = gplt + plt9.scale_x_continuous(
            trans='log10',
            # labels=comma_labels,
            minor_breaks=0)
    else:
        gplt = gplt + plt9.scale_x_continuous(
            # trans='log10',
            # labels=comma_labels,
            minor_breaks=0)
    gplt = gplt + plt9.scale_y_continuous(
        # trans='log10',
        # labels=comma_labels,
        minor_breaks=0)
    gplt = gplt + plt9.labs(y='Cumulative density', title='')
    if n_colors != 0 and n_colors > 20:
        gplt = gplt + plt9.theme(legend_position='none')
    elif n_colors != 0 and n_colors < 9:
        gplt = gplt + plt9.scale_colour_brewer(palette='Dark2', type='qual')
    if facet_column != 'none':
        gplt = gplt + plt9.facet_wrap('~ {}'.format(facet_column), ncol=5)
        n_facets = df_plot[facet_column].nunique()
        gplt.save('{}.png'.format(output_file),
                  dpi=300,
                  width=6 * (n_facets / 4),
                  height=4 * (n_facets / 4),
                  limitsize=False)
    else:
        gplt.save('{}.png'.format(output_file), dpi=300, width=4, height=4)
    return 0
コード例 #11
0
def plot_umi_mt_density(df_plot,
                        output_file='plot_umi_mt_density',
                        facet_column='none',
                        color_var='density',
                        density_contour=False):
    """Plot plot_umi_mt_density to png.

    Parameters
    ----------
    df_plot : pandas.DataFrame
        DataFrame with the followig keys 'total_counts', 'pct_counts_gene_group__mito_transcript'.
    output_file : string
        Basename of output file.
    facet_column : string
        Column to facet the output by.

    Returns
    -------
    NULL
    """
    if color_var == 'density':
        color_title = 'Density\n'
        # Also calculate density using a gaussian 2d kernal -- use random
        # name for plot column
        color_var = "1251234_density"
        df_plot[color_var] = calculate_density(df_plot, facet_column)
    elif color_var == 'pct_counts_gene_group__mito_transcript':
        color_title = '% MT\n'
    elif color_var == 'cell_passes_qc':
        color_title = 'Cell passed QC\n'
    else:
        color_title = color_var
    gplt = plt9.ggplot(
        df_plot,
        plt9.aes(x='total_counts',
                 y='pct_counts_gene_group__mito_transcript',
                 color=color_var))
    gplt = gplt + plt9.theme_bw()
    gplt = gplt + plt9.geom_point(alpha=0.5, size=0.8)
    gplt = gplt + plt9.scale_x_continuous(
        trans='log10', labels=comma_labels, minor_breaks=0)
    if color_var == 'pct_counts_gene_group__mito_transcript':
        gplt = gplt + plt9.scale_color_gradient2(low='#3B9AB2',
                                                 mid='#EBCC2A',
                                                 high='#F21A00',
                                                 midpoint=50,
                                                 limits=[0, 100])
        gplt = gplt + plt9.guides(color=plt9.guide_colorbar(ticks=False))
    elif color_var == 'cell_passes_qc':
        gplt = gplt + plt9.scale_colour_brewer(type='qual', palette='Dark2')
    elif color_var == '1251234_density':
        gplt = gplt + plt9.scale_color_cmap(cmap_name='viridis')

    if density_contour:
        gplt = gplt + plt9.geom_density_2d(alpha=0.5)
    gplt = gplt + plt9.labs(x='Number of molecules',
                            y='Percent of molecules from MT genes',
                            title='',
                            color=color_title)
    if facet_column != 'none':
        gplt = gplt + plt9.facet_wrap('~ {}'.format(facet_column), ncol=5)
        n_samples = df_plot[facet_column].nunique()
        gplt.save('{}.png'.format(output_file),
                  dpi=300,
                  width=4 * (n_samples / 2),
                  height=4 * (n_samples / 4),
                  limitsize=False)
    else:
        gplt.save('{}.png'.format(output_file), dpi=300, width=4, height=4)