def _base_scaling(plt_df: pd.DataFrame, sweep_vars: Optional[Sequence[str]] = None, with_baseline: bool = True) -> gg.ggplot: """Base underlying piece of the scaling plots for deep sea.""" p = (gg.ggplot(plt_df) + gg.aes(x='size', y='episode')) if np.all(plt_df.finished): p += gg.geom_point(gg.aes(colour='solved'), size=3, alpha=0.75) else: p += gg.geom_point(gg.aes(shape='finished', colour='solved'), size=3, alpha=0.75) p += gg.scale_shape_manual(values=['x', 'o']) if np.all(plt_df.solved): p += gg.scale_colour_manual(values=['#313695']) # blue else: p += gg.scale_colour_manual(values=['#d73027', '#313695']) # [red, blue] if with_baseline: baseline_df = _make_baseline(plt_df, sweep_vars) p += gg.geom_line(data=baseline_df, colour='black', linetype='dashed', alpha=0.4, size=1.5) return p
def ensemble_plot(experiment_name='ensemble_nn', data_path=_DEFAULT_DATA_PATH): """Specialized plotting script for TS tutorial paper ensemble NN.""" df = load_data(experiment_name, data_path) plt_df = (df.groupby(['agent', 't']).agg({ 'instant_regret': np.mean }).reset_index()) def _get_agent_family(agent_name): if 'dropout' in agent_name.lower(): return 'Dropout' elif 'ensemble' in agent_name.lower(): return 'Ensemble' elif '/' in agent_name.lower(): return 'Annealing epsilon' else: return 'Fixed epsilon' def _rename_ensemble(agent_name): if 'ensemble' in agent_name: n_ensemble = agent_name.split('-')[0] new_name = 'ensemble=' + n_ensemble.zfill(3) return new_name else: return agent_name plt_df['agent_name'] = plt_df.agent.apply(_rename_ensemble) plt_df['agent_family'] = plt_df.agent.apply(_get_agent_family) custom_colors = ['#d53e4f', '#fdae61', '#a6d96a', '#66c2a5', '#5e4fa2'] plot_dict = {} for agent_family, df_family in plt_df.groupby(['agent_family']): if agent_family == 'Ensemble': custom_labels = [ 'Ensemble 3', 'Ensemble 10', 'Ensemble 30', 'Ensemble 100', 'Ensemble 300' ] gg_legend = gg.scale_colour_manual(values=custom_colors, labels=custom_labels, name='Agent') else: gg_legend = gg.scale_colour_manual(custom_colors, name='Agent') p = (gg.ggplot(df_family) + gg.aes('t', 'instant_regret', colour='agent_name') + gg.geom_line(size=1.25, alpha=0.75) + gg.facet_wrap('~ agent_family') + gg_legend + gg.coord_cartesian(ylim=(0, 60)) + gg.xlab('Timestep (t)') + gg.ylab('Average instantaneous regret') + gg.theme(figure_size=(6, 6))) plot_dict[experiment_name + '_' + agent_family] = p return plot_dict
def bsuite_bar_plot(df_in: pd.DataFrame, sweep_vars: Sequence[str] = None) -> gg.ggplot: """Output bar plot of bsuite data.""" df = _clean_bar_plot_data(df_in, sweep_vars) p = (gg.ggplot(df) + gg.aes(x='env', y='score', colour='type', fill='type') + gg.geom_bar(position='dodge', stat='identity') + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5) + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS) + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS) + gg.xlab('experiment') + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1)) ) if not all(df.finished): # add a layer of alpha for unfinished jobs p += gg.aes(alpha='finished') p += gg.scale_alpha_discrete(range=[0.3, 1.0]) # Compute the necessary size of the plot if sweep_vars: p += gg.facet_wrap(sweep_vars, labeller='label_both', ncol=1) n_hypers = df[sweep_vars].drop_duplicates().shape[0] else: n_hypers = 1 return p + gg.theme(figure_size=(14, 3 * n_hypers + 1))
def plot_individual_returns( df_in: pd.DataFrame, max_episode: int, return_column: str = 'episode_return', colour_var: Optional[str] = None, yintercept: Optional[float] = None, sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot: """Plot individual learning curves: one curve per sweep setting.""" df = df_in.copy() df['unique_group'] = _make_unique_group_col(df, sweep_vars) p = (gg.ggplot(df) + gg.aes(x='episode', y=return_column, group='unique_group') + gg.coord_cartesian(xlim=(0, max_episode))) if colour_var: p += gg.geom_line(gg.aes(colour=colour_var), size=1.1, alpha=0.75) if len(df[colour_var].unique()) <= 5: df[colour_var] = df[colour_var].astype('category') p += gg.scale_colour_manual(values=FIVE_COLOURS) else: p += gg.geom_line(size=1.1, alpha=0.75, colour='#313695') if yintercept: p += gg.geom_hline(yintercept=yintercept, alpha=0.5, size=2, linetype='dashed') return facet_sweep_plot(p, sweep_vars, tall_plot=True)
def plot_time_curve_with_threshold(self): toplot = self.aggregated.melt( id_vars='hour', value_vars=['number_bacteria', 'number_actin'], value_name='counts', var_name='Object') colors = self.create_color_list() myfig = ( ggplot(toplot, aes("hour", "counts", color="Object")) + geom_point() + geom_line() + labels.xlab("Time [hours]") + labels.ylab("Average number of objects/nuclei") + pn.scale_colour_manual(values=colors, labels=list(self.sel_channel_time.value), name="") + pn.labs(colour="") + pn.scale_x_continuous( breaks=np.sort(self.result.hour.unique()), labels=list(np.sort(self.result.hour.unique()).astype(str)))) self.time_curve_fig = myfig self.out_plot2.clear_output() with self.out_plot2: display(myfig)
def plot_time_curve_by_channel(self, b=None): """Callback to polot time curve of number of bacteria/nuclei for each selected channel. Called by plot_time_curve_button.""" if self.aggregated is None: self.data_aggregation() if len(self.sel_channel_time.value) == 0: print("Select at least one channel") else: subset = self.aggregated[self.aggregated.channel.isin( self.sel_channel_time.value)].copy(deep=True) subset.loc[:, "channel"] = subset.channel.astype( pd.CategoricalDtype(self.sel_channel_time.value, ordered=True)) colors = self.create_color_list() myfig = ( ggplot(subset, aes("hour", "normalized", color="channel")) + geom_point() + geom_line() + labels.xlab("Time [hours]") + labels.ylab("Average number of bacteria/nuclei") + pn.scale_colour_manual( values=colors, labels=list(self.sel_channel_time.value), name="") + pn.labs(colour="") + pn.scale_x_continuous( breaks=np.sort(self.result.hour.unique()), labels=list( np.sort(self.result.hour.unique()).astype(str)))) self.time_curve_fig = myfig self.out_plot2.clear_output() with self.out_plot2: display(myfig)
def __plot( self, plot_data, x, y, colour, lbl_x, lbl_y, facet, facet_scales, facet_by, smoothed, points, error_bars, save, ): cbbPalette = [ "#000000", "#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7", ] plt = ggplot(data=plot_data, mapping=aes(x=x, y=y, colour=colour)) plt += xlab(lbl_x) plt += ylab(lbl_y) # + facet_grid("site~", scales="free") # + geom_line() if facet: # TODO: use facet as save nrow, ncol = self.get_facet_rows(plot_data, facet_by) plt += facet_wrap(facet_by, nrow=nrow, ncol=ncol, scales=facet_scales) if points: plt += geom_point() if error_bars: # TODO use generic way to compute them pass # self.plt += geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) # TODO: use smooth as save if smoothed: plt += geom_smooth( method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1}, ) else: plt += geom_line() plt += scale_colour_manual(values=cbbPalette, guide=False) plt += scale_x_continuous(labels=label_x) plt += theme(figure_size=(15, 18), dpi=150) if save: plt.save(**save) return plt
def _plot_regret_group(df: pd.DataFrame, group_col: str) -> gg.ggplot: """Plots the average regret through time when grouped.""" group_name = group_col.replace('_', ' ') df[group_name] = df[group_col].astype('category') p = (gg.ggplot(df) + gg.aes(x='episode', y='average_regret', group=group_name, colour=group_name, fill=group_name) + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1) + gg.scale_colour_manual(values=FIVE_COLOURS) + gg.scale_fill_manual(values=FIVE_COLOURS)) return p
def _bar_plot_compare(df: pd.DataFrame) -> gg.ggplot: """Bar plot of buite score data, comparing agents on each experiment.""" p = (gg.ggplot(df) + gg.aes(x='agent', y='score', colour='agent', fill='agent') + gg.geom_bar(position='dodge', stat='identity') + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5) + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1)) + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS) + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS)) if not all(df.finished): # add a layer of alpha for unfinished jobs p += gg.aes(alpha='finished') p += gg.scale_alpha_discrete(range=[0.3, 1.0]) return p
def plot_regret(df_in: pd.DataFrame, sweep_vars: Sequence[Text] = None) -> gg.ggplot: """Plot average regret of deep_sea through time by size.""" df = df_in.copy() df = df[df['size'].isin([10, 20, 30, 40, 50])] df['avg_bad'] = df.total_bad_episodes / df.episode df['size'] = df['size'].astype('category') p = ( gg.ggplot(df[df.episode <= NUM_EPISODES]) + gg.aes('episode', 'avg_bad', group='size', colour='size') + gg.geom_line(size=2, alpha=0.75) + gg.geom_hline( gg.aes(yintercept=0.99), linetype='dashed', alpha=0.4, size=1.75) + gg.geom_hline(gg.aes(yintercept=0.0), alpha=0) # axis hack + gg.ylab('average bad episodes') + gg.scale_colour_manual(values=plotting.FIVE_COLOURS)) return plotting.facet_sweep_plot(p, sweep_vars)
def plot_regret_ave_scaling(df_in: pd.DataFrame, group_col: str, episode: int, regret_thresh: float, sweep_vars: Sequence[str] = None, regret_col: str = 'total_regret') -> gg.ggplot: """Point plot of average regret investigating scaling to threshold.""" df = _preprocess_ave_regret(df_in, group_col, episode, sweep_vars, regret_col) group_name = group_col.replace('_', ' ') p = (gg.ggplot(df) + gg.aes(x=group_name, y='average_regret', colour='average_regret < {}'.format(regret_thresh)) + gg.geom_point(size=5, alpha=0.8) + gg.scale_x_log10(breaks=[1, 3, 10, 30, 100]) + gg.scale_colour_manual(values=['#d73027', '#313695']) + gg.ylab('average regret at {} episodes'.format(episode)) + gg.geom_hline(gg.aes(yintercept=0.0), alpha=0) # axis hack ) return facet_sweep_plot(p, sweep_vars)
def plot_scale(df: pd.DataFrame, sweep_vars: Sequence[str] = None) -> gg.ggplot: """Plots the best episode observed by height_threshold.""" df = cp_swingup_preprocess(df_in=df) group_vars = ['height_threshold'] if sweep_vars: group_vars += sweep_vars plt_df = df.groupby(group_vars)['best_episode'].max().reset_index() p = ( gg.ggplot(plt_df) + gg.aes(x='factor(height_threshold)', y='best_episode', colour='best_episode > {}'.format(GOOD_EPISODE)) + gg.geom_point(size=5, alpha=0.8) + gg.scale_colour_manual(values=['#d73027', '#313695']) + gg.geom_hline(gg.aes(yintercept=0.0), alpha=0) # axis hack + gg.scale_x_discrete(breaks=[0, 0.25, 0.5, 0.75, 1.0]) + gg.ylab('best return in first {} episodes'.format(NUM_EPISODES)) + gg.xlab('height threshold')) return plotting.facet_sweep_plot(p, sweep_vars)
def label_x(dates): res = [(datetime.datetime(2018, 1, 1) + datetime.timedelta(x)).strftime("%d-%m") for x in dates] print(res) return res (ggplot(data=res, mapping=aes(x='julian', y='value', colour='type')) + xlab("Day") + ylab("Mean number of detected songs") + facet_grid("type~", scales="free") # + geom_line() # + facet_wrap("type", nrow=2, ncol=1) + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1}) + scale_colour_manual(values=cbbPalette, guide=False) + scale_x_continuous(labels=label_x)).save("figs/song_events_aci_BARROW_mean_smoothed.png", height=10, width=16, dpi=150) (ggplot(data=res, mapping=aes(x='julian', y='n_events_sum', colour='site')) + xlab("Day") + ylab("Total number of detected songs") # + facet_grid("site~", scales="free") # + facet_wrap("site", nrow=2, ncol=3) + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1}) + scale_colour_manual(values=cbbPalette, guide=False) + scale_x_continuous(labels=label_x)).save("figs/song_events_BARW0_sum.png", height=10, width=16, dpi=150) ################# ### Denoising ###
na_rm=False) else: g += p9.geom_crossbar(p9.aes(x="x", y='center', ymin='low', ymax='high', group="factor(group_x)", colour="factor(group)", fill="factor(group)"), position=p9.position_dodge( 0.7, preserve='single'), na_rm=True, alpha=0.2) g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group'))) g += p9.scale_colour_manual(values=ez_colors(g.n_groups('group'))) elif geom == 'ribbon': g = EZPlot(gdata.dropna()) # set groups if group is None: g += p9.geom_ribbon(p9.aes(x="x", y='center', ymin='low', ymax='high'), fill=ez_colors(1)[0], alpha=0.2, na_rm=False) g += p9.geom_line(p9.aes(x="x", y='center'),
input_data_UMAPencoded_df['dataset'] = 'original' simulated_data_UMAPencoded_df['dataset'] = 'simulated' # Concatenate input and simulated dataframes together combined_data_df = pd.concat( [input_data_UMAPencoded_df, simulated_data_UMAPencoded_df]) # Plot g_input_sim = ggplot(combined_data_df, aes(x='1', y='2')) + geom_point(aes(color='dataset'), alpha=0.3) + labs(x = "UMAP 1", y = "UMAP 2", title = "UMAP of original and simulated data") + theme_bw() + theme( legend_title_align = "center", plot_background=element_rect(fill='white'), legend_key=element_rect(fill='white', colour='white'), plot_title=element_text(weight='bold') ) \ + guides(colour=guide_legend(override_aes={'alpha': 1})) \ + scale_colour_manual(["grey", '#87CEFA']) print(g_input_sim) ggsave(plot=g_input_sim, filename=umap_overlay_file, dpi=300) # ## 2. Visualize effects of multiple experiments in PCA space # In[13]: get_ipython().run_cell_magic( 'time', '', '\nall_data_df = pd.DataFrame()\n\n# Get batch 1 data\npartition_1_file = os.path.join(\n partition_dir,\n "Partition_1.txt.xz")\n\npartition_1 = pd.read_table(\n partition_1_file,\n header=0,\n index_col=0,\n sep=\'\\t\')\n\n\nfor i in lst_num_partitions:\n print(\'Plotting PCA of 1 partition vs {} partition...\'.format(i))\n \n # Simulated data with all samples in a single partition\n original_data_df = partition_1.copy()\n \n # Add grouping column for plotting\n original_data_df[\'num_partitions\'] = \'1\'\n \n # Get data with additional partitions added\n partition_other_file = os.path.join(\n partition_dir,\n "Partition_"+str(i)+".txt.xz")\n\n partition_other = pd.read_table(\n partition_other_file,\n header=0,\n index_col=0,\n sep=\'\\t\')\n \n # Simulated data with i partitions\n partition_data_df = partition_other\n \n # Add grouping column for plotting\n partition_data_df[\'num_partitions\'] = \'multiple\'\n \n # Concatenate datasets together\n combined_data_df = pd.concat([original_data_df, partition_data_df])\n\n # PCA projection\n pca = PCA(n_components=2)\n\n # Encode expression data into 2D PCA space\n combined_data_numeric_df = combined_data_df.drop([\'num_partitions\'], axis=1)\n combined_data_PCAencoded = pca.fit_transform(combined_data_numeric_df)\n\n\n combined_data_PCAencoded_df = pd.DataFrame(combined_data_PCAencoded,\n index=combined_data_df.index,\n columns=[\'PC1\', \'PC2\']\n )\n \n # Variance explained\n print(pca.explained_variance_ratio_) \n \n # Add back in batch labels (i.e. labels = "batch_"<how many batch effects were added>)\n combined_data_PCAencoded_df[\'num_partitions\'] = combined_data_df[\'num_partitions\']\n \n # Add column that designates which batch effect comparision (i.e. comparison of 1 batch vs 5 batches\n # is represented by label = 5)\n combined_data_PCAencoded_df[\'comparison\'] = str(i)\n \n # Concatenate ALL comparisons\n all_data_df = pd.concat([all_data_df, combined_data_PCAencoded_df])\n \n # Plot individual comparisons\n print(ggplot(combined_data_PCAencoded_df, aes(x=\'PC1\', y=\'PC2\')) \\\n + geom_point(aes(color=\'num_partitions\'), alpha=0.2) \\\n + labs(x = "PC 1", y = "PC 2", title = "Partition 1 and Partition {}".format(i))\\\n + theme_bw() \\\n + theme(\n legend_title_align = "center",\n plot_background=element_rect(fill=\'white\'),\n legend_key=element_rect(fill=\'white\', colour=\'white\'), \n plot_title=element_text(weight=\'bold\')\n ) \\\n + guides(colour=guide_legend(override_aes={\'alpha\': 1})) \\\n + scale_colour_manual(["grey", \'#b3e5fc\'])\n ) ' ) # In[14]:
#select_data = combined_data_df[combined_data_df['experiment_id'] != 'Not selected'] # Plot ggplot(combined_data_df, aes(x='1', y='2')) + geom_point( aes(color='experiment_id'), alpha=0.3) + facet_wrap('~dataset') + xlab('UMAP 1') + ylab( 'UMAP 2') + ggtitle('UMAP of original and simulated data (gene space)') #+ xlim(3,12) \ #+ ylim(-7,10) \ #+ scale_colour_manual(values=["blue", "purple", "orange", "red", "magenta", "lightgrey"]) \ # In[12]: # Overlay original and simulated data ggplot(combined_data_df, aes(x='1', y='2')) + geom_point( aes(color='dataset'), alpha=0.3) + scale_colour_manual( values=["grey", "blue"]) + xlab('UMAP 1') + ylab('UMAP 2') + ggtitle( 'UMAP of original and simulated data (gene space)') # ## Visualize simulated data (gene space) projected into PCA space # In[13]: # UMAP embedding of original input data # Get and save model pca = PCA(n_components=2) pca.fit(normalized_data) input_data_PCAencoded = pca.transform(normalized_data) input_data_PCAencoded_df = pd.DataFrame(data=input_data_PCAencoded, index=normalized_data.index,