Exemple #1
0
def _base_scaling(plt_df: pd.DataFrame,
                  sweep_vars: Optional[Sequence[str]] = None,
                  with_baseline: bool = True) -> gg.ggplot:
    """Base underlying piece of the scaling plots for deep sea."""
    p = (gg.ggplot(plt_df) + gg.aes(x='size', y='episode'))
    if np.all(plt_df.finished):
        p += gg.geom_point(gg.aes(colour='solved'), size=3, alpha=0.75)
    else:
        p += gg.geom_point(gg.aes(shape='finished', colour='solved'),
                           size=3,
                           alpha=0.75)
        p += gg.scale_shape_manual(values=['x', 'o'])

    if np.all(plt_df.solved):
        p += gg.scale_colour_manual(values=['#313695'])  # blue
    else:
        p += gg.scale_colour_manual(values=['#d73027',
                                            '#313695'])  # [red, blue]

    if with_baseline:
        baseline_df = _make_baseline(plt_df, sweep_vars)
        p += gg.geom_line(data=baseline_df,
                          colour='black',
                          linetype='dashed',
                          alpha=0.4,
                          size=1.5)
    return p
Exemple #2
0
def ensemble_plot(experiment_name='ensemble_nn', data_path=_DEFAULT_DATA_PATH):
    """Specialized plotting script for TS tutorial paper ensemble NN."""
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['agent', 't']).agg({
        'instant_regret': np.mean
    }).reset_index())

    def _get_agent_family(agent_name):
        if 'dropout' in agent_name.lower():
            return 'Dropout'
        elif 'ensemble' in agent_name.lower():
            return 'Ensemble'
        elif '/' in agent_name.lower():
            return 'Annealing epsilon'
        else:
            return 'Fixed epsilon'

    def _rename_ensemble(agent_name):
        if 'ensemble' in agent_name:
            n_ensemble = agent_name.split('-')[0]
            new_name = 'ensemble=' + n_ensemble.zfill(3)

            return new_name
        else:
            return agent_name

    plt_df['agent_name'] = plt_df.agent.apply(_rename_ensemble)
    plt_df['agent_family'] = plt_df.agent.apply(_get_agent_family)

    custom_colors = ['#d53e4f', '#fdae61', '#a6d96a', '#66c2a5', '#5e4fa2']

    plot_dict = {}
    for agent_family, df_family in plt_df.groupby(['agent_family']):
        if agent_family == 'Ensemble':
            custom_labels = [
                'Ensemble 3', 'Ensemble 10', 'Ensemble 30', 'Ensemble 100',
                'Ensemble 300'
            ]
            gg_legend = gg.scale_colour_manual(values=custom_colors,
                                               labels=custom_labels,
                                               name='Agent')
        else:
            gg_legend = gg.scale_colour_manual(custom_colors, name='Agent')

        p = (gg.ggplot(df_family) +
             gg.aes('t', 'instant_regret', colour='agent_name') +
             gg.geom_line(size=1.25, alpha=0.75) +
             gg.facet_wrap('~ agent_family') + gg_legend +
             gg.coord_cartesian(ylim=(0, 60)) + gg.xlab('Timestep (t)') +
             gg.ylab('Average instantaneous regret') +
             gg.theme(figure_size=(6, 6)))
        plot_dict[experiment_name + '_' + agent_family] = p

    return plot_dict
Exemple #3
0
def bsuite_bar_plot(df_in: pd.DataFrame,
                    sweep_vars: Sequence[str] = None) -> gg.ggplot:
  """Output bar plot of bsuite data."""
  df = _clean_bar_plot_data(df_in, sweep_vars)

  p = (gg.ggplot(df)
       + gg.aes(x='env', y='score', colour='type', fill='type')
       + gg.geom_bar(position='dodge', stat='identity')
       + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5)
       + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS)
       + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS)
       + gg.xlab('experiment')
       + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1))
      )
  if not all(df.finished):  # add a layer of alpha for unfinished jobs
    p += gg.aes(alpha='finished')
    p += gg.scale_alpha_discrete(range=[0.3, 1.0])

  # Compute the necessary size of the plot
  if sweep_vars:
    p += gg.facet_wrap(sweep_vars, labeller='label_both', ncol=1)
    n_hypers = df[sweep_vars].drop_duplicates().shape[0]
  else:
    n_hypers = 1
  return p + gg.theme(figure_size=(14, 3 * n_hypers + 1))
Exemple #4
0
def plot_individual_returns(
        df_in: pd.DataFrame,
        max_episode: int,
        return_column: str = 'episode_return',
        colour_var: Optional[str] = None,
        yintercept: Optional[float] = None,
        sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot:
    """Plot individual learning curves: one curve per sweep setting."""
    df = df_in.copy()
    df['unique_group'] = _make_unique_group_col(df, sweep_vars)
    p = (gg.ggplot(df) +
         gg.aes(x='episode', y=return_column, group='unique_group') +
         gg.coord_cartesian(xlim=(0, max_episode)))
    if colour_var:
        p += gg.geom_line(gg.aes(colour=colour_var), size=1.1, alpha=0.75)
        if len(df[colour_var].unique()) <= 5:
            df[colour_var] = df[colour_var].astype('category')
            p += gg.scale_colour_manual(values=FIVE_COLOURS)
    else:
        p += gg.geom_line(size=1.1, alpha=0.75, colour='#313695')
    if yintercept:
        p += gg.geom_hline(yintercept=yintercept,
                           alpha=0.5,
                           size=2,
                           linetype='dashed')
    return facet_sweep_plot(p, sweep_vars, tall_plot=True)
Exemple #5
0
    def plot_time_curve_with_threshold(self):
        toplot = self.aggregated.melt(
            id_vars='hour',
            value_vars=['number_bacteria', 'number_actin'],
            value_name='counts',
            var_name='Object')

        colors = self.create_color_list()

        myfig = (
            ggplot(toplot, aes("hour", "counts", color="Object")) +
            geom_point() + geom_line() + labels.xlab("Time [hours]") +
            labels.ylab("Average number of objects/nuclei") +
            pn.scale_colour_manual(values=colors,
                                   labels=list(self.sel_channel_time.value),
                                   name="") + pn.labs(colour="") +
            pn.scale_x_continuous(
                breaks=np.sort(self.result.hour.unique()),
                labels=list(np.sort(self.result.hour.unique()).astype(str))))

        self.time_curve_fig = myfig

        self.out_plot2.clear_output()
        with self.out_plot2:
            display(myfig)
Exemple #6
0
    def plot_time_curve_by_channel(self, b=None):
        """Callback to polot time curve of number of bacteria/nuclei for
        each selected channel. Called by plot_time_curve_button."""

        if self.aggregated is None:
            self.data_aggregation()

        if len(self.sel_channel_time.value) == 0:
            print("Select at least one channel")
        else:
            subset = self.aggregated[self.aggregated.channel.isin(
                self.sel_channel_time.value)].copy(deep=True)
            subset.loc[:, "channel"] = subset.channel.astype(
                pd.CategoricalDtype(self.sel_channel_time.value, ordered=True))

            colors = self.create_color_list()

            myfig = (
                ggplot(subset, aes("hour", "normalized", color="channel")) +
                geom_point() + geom_line() + labels.xlab("Time [hours]") +
                labels.ylab("Average number of bacteria/nuclei") +
                pn.scale_colour_manual(
                    values=colors,
                    labels=list(self.sel_channel_time.value),
                    name="") + pn.labs(colour="") + pn.scale_x_continuous(
                        breaks=np.sort(self.result.hour.unique()),
                        labels=list(
                            np.sort(self.result.hour.unique()).astype(str))))

            self.time_curve_fig = myfig

            self.out_plot2.clear_output()
            with self.out_plot2:
                display(myfig)
Exemple #7
0
    def __plot(
        self,
        plot_data,
        x,
        y,
        colour,
        lbl_x,
        lbl_y,
        facet,
        facet_scales,
        facet_by,
        smoothed,
        points,
        error_bars,
        save,
    ):
        cbbPalette = [
            "#000000",
            "#E69F00",
            "#56B4E9",
            "#009E73",
            "#0072B2",
            "#D55E00",
            "#CC79A7",
        ]
        plt = ggplot(data=plot_data, mapping=aes(x=x, y=y, colour=colour))
        plt += xlab(lbl_x)
        plt += ylab(lbl_y)
        # + facet_grid("site~", scales="free")
        # + geom_line()
        if facet:
            # TODO: use facet as save
            nrow, ncol = self.get_facet_rows(plot_data, facet_by)
            plt += facet_wrap(facet_by, nrow=nrow, ncol=ncol, scales=facet_scales)
        if points:
            plt += geom_point()
        if error_bars:
            # TODO use generic way to compute them
            pass
            # self.plt += geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
        # TODO: use smooth as save
        if smoothed:
            plt += geom_smooth(
                method="mavg",
                se=False,
                method_args={"window": 4, "center": True, "min_periods": 1},
            )
        else:
            plt += geom_line()
        plt += scale_colour_manual(values=cbbPalette, guide=False)
        plt += scale_x_continuous(labels=label_x)

        plt += theme(figure_size=(15, 18), dpi=150)

        if save:
            plt.save(**save)
        return plt
Exemple #8
0
def _plot_regret_group(df: pd.DataFrame, group_col: str) -> gg.ggplot:
  """Plots the average regret through time when grouped."""
  group_name = group_col.replace('_', ' ')
  df[group_name] = df[group_col].astype('category')
  p = (gg.ggplot(df)
       + gg.aes(x='episode', y='average_regret',
                group=group_name, colour=group_name, fill=group_name)
       + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1)
       + gg.scale_colour_manual(values=FIVE_COLOURS)
       + gg.scale_fill_manual(values=FIVE_COLOURS))
  return p
Exemple #9
0
def _bar_plot_compare(df: pd.DataFrame) -> gg.ggplot:
    """Bar plot of buite score data, comparing agents on each experiment."""
    p = (gg.ggplot(df) +
         gg.aes(x='agent', y='score', colour='agent', fill='agent') +
         gg.geom_bar(position='dodge', stat='identity') +
         gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5) +
         gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1)) +
         gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS) +
         gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS))
    if not all(df.finished):  # add a layer of alpha for unfinished jobs
        p += gg.aes(alpha='finished')
        p += gg.scale_alpha_discrete(range=[0.3, 1.0])
    return p
Exemple #10
0
def plot_regret(df_in: pd.DataFrame,
                sweep_vars: Sequence[Text] = None) -> gg.ggplot:
    """Plot average regret of deep_sea through time by size."""
    df = df_in.copy()
    df = df[df['size'].isin([10, 20, 30, 40, 50])]
    df['avg_bad'] = df.total_bad_episodes / df.episode
    df['size'] = df['size'].astype('category')
    p = (
        gg.ggplot(df[df.episode <= NUM_EPISODES]) +
        gg.aes('episode', 'avg_bad', group='size', colour='size') +
        gg.geom_line(size=2, alpha=0.75) + gg.geom_hline(
            gg.aes(yintercept=0.99), linetype='dashed', alpha=0.4, size=1.75) +
        gg.geom_hline(gg.aes(yintercept=0.0), alpha=0)  # axis hack
        + gg.ylab('average bad episodes') +
        gg.scale_colour_manual(values=plotting.FIVE_COLOURS))
    return plotting.facet_sweep_plot(p, sweep_vars)
Exemple #11
0
def plot_regret_ave_scaling(df_in: pd.DataFrame,
                            group_col: str,
                            episode: int,
                            regret_thresh: float,
                            sweep_vars: Sequence[str] = None,
                            regret_col: str = 'total_regret') -> gg.ggplot:
  """Point plot of average regret investigating scaling to threshold."""
  df = _preprocess_ave_regret(df_in, group_col, episode, sweep_vars, regret_col)
  group_name = group_col.replace('_', ' ')
  p = (gg.ggplot(df)
       + gg.aes(x=group_name, y='average_regret',
                colour='average_regret < {}'.format(regret_thresh))
       + gg.geom_point(size=5, alpha=0.8)
       + gg.scale_x_log10(breaks=[1, 3, 10, 30, 100])
       + gg.scale_colour_manual(values=['#d73027', '#313695'])
       + gg.ylab('average regret at {} episodes'.format(episode))
       + gg.geom_hline(gg.aes(yintercept=0.0), alpha=0)  # axis hack
      )
  return facet_sweep_plot(p, sweep_vars)
Exemple #12
0
def plot_scale(df: pd.DataFrame,
               sweep_vars: Sequence[str] = None) -> gg.ggplot:
    """Plots the best episode observed by height_threshold."""
    df = cp_swingup_preprocess(df_in=df)

    group_vars = ['height_threshold']
    if sweep_vars:
        group_vars += sweep_vars
    plt_df = df.groupby(group_vars)['best_episode'].max().reset_index()

    p = (
        gg.ggplot(plt_df) +
        gg.aes(x='factor(height_threshold)',
               y='best_episode',
               colour='best_episode > {}'.format(GOOD_EPISODE)) +
        gg.geom_point(size=5, alpha=0.8) +
        gg.scale_colour_manual(values=['#d73027', '#313695']) +
        gg.geom_hline(gg.aes(yintercept=0.0), alpha=0)  # axis hack
        + gg.scale_x_discrete(breaks=[0, 0.25, 0.5, 0.75, 1.0]) +
        gg.ylab('best return in first {} episodes'.format(NUM_EPISODES)) +
        gg.xlab('height threshold'))
    return plotting.facet_sweep_plot(p, sweep_vars)
Exemple #13
0
def label_x(dates):
    res = [(datetime.datetime(2018, 1, 1) + datetime.timedelta(x)).strftime("%d-%m") for x in dates]
    print(res)
    return res


(ggplot(data=res, mapping=aes(x='julian', y='value', colour='type'))
    + xlab("Day")
    + ylab("Mean number of detected songs")
    + facet_grid("type~", scales="free")
    # + geom_line()
	# + facet_wrap("type", nrow=2, ncol=1)
    + geom_point()
    # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
    + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
	+ scale_colour_manual(values=cbbPalette, guide=False)
    + scale_x_continuous(labels=label_x)).save("figs/song_events_aci_BARROW_mean_smoothed.png", height=10, width=16, dpi=150)

(ggplot(data=res, mapping=aes(x='julian', y='n_events_sum', colour='site'))
    + xlab("Day")
    + ylab("Total number of detected songs")
    # + facet_grid("site~", scales="free")
	# + facet_wrap("site", nrow=2, ncol=3)
    + geom_point()
    # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
    + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
	+ scale_colour_manual(values=cbbPalette, guide=False)
    + scale_x_continuous(labels=label_x)).save("figs/song_events_BARW0_sum.png", height=10, width=16, dpi=150)

#################
### Denoising ###
Exemple #14
0
                                  na_rm=False)
        else:
            g += p9.geom_crossbar(p9.aes(x="x",
                                         y='center',
                                         ymin='low',
                                         ymax='high',
                                         group="factor(group_x)",
                                         colour="factor(group)",
                                         fill="factor(group)"),
                                  position=p9.position_dodge(
                                      0.7, preserve='single'),
                                  na_rm=True,
                                  alpha=0.2)

            g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group')))
            g += p9.scale_colour_manual(values=ez_colors(g.n_groups('group')))

    elif geom == 'ribbon':

        g = EZPlot(gdata.dropna())

        # set groups
        if group is None:
            g += p9.geom_ribbon(p9.aes(x="x",
                                       y='center',
                                       ymin='low',
                                       ymax='high'),
                                fill=ez_colors(1)[0],
                                alpha=0.2,
                                na_rm=False)
            g += p9.geom_line(p9.aes(x="x", y='center'),
input_data_UMAPencoded_df['dataset'] = 'original'
simulated_data_UMAPencoded_df['dataset'] = 'simulated'

# Concatenate input and simulated dataframes together
combined_data_df = pd.concat(
    [input_data_UMAPencoded_df, simulated_data_UMAPencoded_df])

# Plot
g_input_sim = ggplot(combined_data_df, aes(x='1', y='2')) + geom_point(aes(color='dataset'), alpha=0.3) + labs(x = "UMAP 1", y = "UMAP 2", title = "UMAP of original and simulated data") + theme_bw() + theme(
    legend_title_align = "center",
    plot_background=element_rect(fill='white'),
    legend_key=element_rect(fill='white', colour='white'),
    plot_title=element_text(weight='bold')
) \
+ guides(colour=guide_legend(override_aes={'alpha': 1})) \
+ scale_colour_manual(["grey", '#87CEFA'])

print(g_input_sim)
ggsave(plot=g_input_sim, filename=umap_overlay_file, dpi=300)

# ## 2. Visualize effects of multiple experiments in PCA space

# In[13]:

get_ipython().run_cell_magic(
    'time', '',
    '\nall_data_df = pd.DataFrame()\n\n# Get batch 1 data\npartition_1_file = os.path.join(\n    partition_dir,\n    "Partition_1.txt.xz")\n\npartition_1 = pd.read_table(\n    partition_1_file,\n    header=0,\n    index_col=0,\n    sep=\'\\t\')\n\n\nfor i in lst_num_partitions:\n    print(\'Plotting PCA of 1 partition vs {} partition...\'.format(i))\n    \n    # Simulated data with all samples in a single partition\n    original_data_df =  partition_1.copy()\n    \n    # Add grouping column for plotting\n    original_data_df[\'num_partitions\'] = \'1\'\n    \n    # Get data with additional partitions added\n    partition_other_file = os.path.join(\n        partition_dir,\n        "Partition_"+str(i)+".txt.xz")\n\n    partition_other = pd.read_table(\n        partition_other_file,\n        header=0,\n        index_col=0,\n        sep=\'\\t\')\n    \n    # Simulated data with i partitions\n    partition_data_df =  partition_other\n    \n    # Add grouping column for plotting\n    partition_data_df[\'num_partitions\'] = \'multiple\'\n    \n    # Concatenate datasets together\n    combined_data_df = pd.concat([original_data_df, partition_data_df])\n\n    # PCA projection\n    pca = PCA(n_components=2)\n\n    # Encode expression data into 2D PCA space\n    combined_data_numeric_df = combined_data_df.drop([\'num_partitions\'], axis=1)\n    combined_data_PCAencoded = pca.fit_transform(combined_data_numeric_df)\n\n\n    combined_data_PCAencoded_df = pd.DataFrame(combined_data_PCAencoded,\n                                               index=combined_data_df.index,\n                                               columns=[\'PC1\', \'PC2\']\n                                              )\n                                              \n    # Variance explained\n    print(pca.explained_variance_ratio_)  \n    \n    # Add back in batch labels (i.e. labels = "batch_"<how many batch effects were added>)\n    combined_data_PCAencoded_df[\'num_partitions\'] = combined_data_df[\'num_partitions\']\n    \n    # Add column that designates which batch effect comparision (i.e. comparison of 1 batch vs 5 batches\n    # is represented by label = 5)\n    combined_data_PCAencoded_df[\'comparison\'] = str(i)\n    \n    # Concatenate ALL comparisons\n    all_data_df = pd.concat([all_data_df, combined_data_PCAencoded_df])\n    \n    # Plot individual comparisons\n    print(ggplot(combined_data_PCAencoded_df, aes(x=\'PC1\', y=\'PC2\')) \\\n          + geom_point(aes(color=\'num_partitions\'), alpha=0.2) \\\n          + labs(x = "PC 1", y = "PC 2", title = "Partition 1 and Partition {}".format(i))\\\n          + theme_bw() \\\n          + theme(\n                legend_title_align = "center",\n                plot_background=element_rect(fill=\'white\'),\n                legend_key=element_rect(fill=\'white\', colour=\'white\'), \n                plot_title=element_text(weight=\'bold\')\n            ) \\\n          + guides(colour=guide_legend(override_aes={\'alpha\': 1})) \\\n          + scale_colour_manual(["grey", \'#b3e5fc\'])\n         )             '
)

# In[14]:
Exemple #16
0
#select_data = combined_data_df[combined_data_df['experiment_id'] != 'Not selected']

# Plot
ggplot(combined_data_df, aes(x='1', y='2')) + geom_point(
    aes(color='experiment_id'),
    alpha=0.3) + facet_wrap('~dataset') + xlab('UMAP 1') + ylab(
        'UMAP 2') + ggtitle('UMAP of original and simulated data (gene space)')
#+ xlim(3,12) \
#+ ylim(-7,10) \
#+ scale_colour_manual(values=["blue", "purple", "orange", "red", "magenta", "lightgrey"]) \

# In[12]:

# Overlay original and simulated data
ggplot(combined_data_df, aes(x='1', y='2')) + geom_point(
    aes(color='dataset'), alpha=0.3) + scale_colour_manual(
        values=["grey", "blue"]) + xlab('UMAP 1') + ylab('UMAP 2') + ggtitle(
            'UMAP of original and simulated data (gene space)')

# ## Visualize simulated data (gene space) projected into PCA space

# In[13]:

# UMAP embedding of original input data

# Get and save model
pca = PCA(n_components=2)
pca.fit(normalized_data)

input_data_PCAencoded = pca.transform(normalized_data)
input_data_PCAencoded_df = pd.DataFrame(data=input_data_PCAencoded,
                                        index=normalized_data.index,