Esempio n. 1
0
# pip install wrapt

import collections
import warnings

import numpy as np
import tensorflow as tf
import sonnet as snt

import plotnine as gg

from typing import Any, Callable, Dict, List, Tuple

# Plotnine themes
gg.theme_set(gg.theme_bw(base_size=16, base_family='serif'))
gg.theme_update(figure_size=(12, 8), panel_spacing_x=0.5, panel_spacing_y=0.5)

# Filter meaningless pandas warnings
warnings.filterwarnings(action="ignore", category=UserWarning)
warnings.filterwarnings(action="ignore", category=ImportWarning)

# @title (CODE) Define the _Deep Sea_ environment

TimeStep = collections.namedtuple('TimeStep',
                                  ['observation', 'reward', 'pcont'])


class DeepSea(object):
    def __init__(self, size: int, seed: int = None, randomize: bool = True):

        self._size = size
Esempio n. 2
0
def upper_interval(x):
    return np.mean(x) + 2 * np.std(x)


#############################################################################
# Collating data with Pandas
params_df = config_lib.get_params_df(config)
df = pd.merge(pd.concat(results), params_df, on='unique_id')
plt_df = (df.groupby(['agent', 't']).agg({
    'instant_regret': np.mean
}).reset_index())

#############################################################################
# Plotting and analysis (uses plotnine by default)
gg.theme_set(gg.theme_bw(base_size=16, base_family='serif'))
gg.theme_update(figure_size=(12, 8))

p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') +
     gg.geom_line())
print(p)

#############################################################################
# Collating data with Pandas
params_df = config_lib.get_params_df(config)
df = pd.merge(pd.concat(results), params_df, on='unique_id')
plt_df = (df.groupby(['agent', 't']).agg({
    'cum_reward': np.mean
}).reset_index())

#############################################################################
# Plotting and analysis (uses plotnine by default)
Esempio n. 3
0
def make_plots():
    # Setup plotting
    import pandas as pd
    import plotnine as gg
    import warnings

    pd.options.mode.chained_assignment = None
    gg.theme_set(gg.theme_bw(base_size=16, base_family='serif'))
    gg.theme_update(figure_size=(12, 8),
                    panel_spacing_x=0.5,
                    panel_spacing_y=0.5)
    warnings.filterwarnings('ignore')

    # Load Results
    experiments = {
        'Random': RANDOM_RESULTS_PATH,
        'TRPO': TRPO_RESULTS_PATH,
    }
    data_frame, sweep_vars = csv_load.load_bsuite(experiments)
    bsuite_score = summary_analysis.bsuite_score(data_frame, sweep_vars)
    bsuite_summary = summary_analysis.ave_score_by_tag(bsuite_score,
                                                       sweep_vars)

    # Generate general plots
    radar_fig = summary_analysis.bsuite_radar_plot(bsuite_summary, sweep_vars)
    radar_fig.savefig(PLOTS_PATH + 'radar_fig.png', bbox_inches='tight')
    bar_fig = summary_analysis.bsuite_bar_plot(bsuite_score, sweep_vars)
    bar_fig.save(PLOTS_PATH + 'bar_fig.png')
    compare_bar_fig = summary_analysis.bsuite_bar_plot_compare(
        bsuite_score, sweep_vars)
    compare_bar_fig.save(PLOTS_PATH + 'compare_bar_fig.png')

    # Generate specific analyses
    # Learning performance
    from bsuite.experiments.bandit import analysis as bandit_analysis
    bandit_df = data_frame[data_frame.bsuite_env == 'bandit'].copy()
    bandit_scores = summary_analysis.plot_single_experiment(
        bsuite_score, 'bandit', sweep_vars)
    bandit_scores.save(PLOTS_PATH + 'bandits_scores.png')
    bandit_convergence = bandit_analysis.plot_learning(bandit_df, sweep_vars)
    bandit_convergence.save(PLOTS_PATH + 'bandits_convergence.png')
    bandit_seeds = bandit_analysis.plot_seeds(bandit_df, sweep_vars)
    bandit_seeds.save(PLOTS_PATH + 'bandits_seeds.png')

    # Robustness to noise
    from bsuite.experiments.bandit_noise import analysis as bandit_noise_analysis
    bandit_noise_df = data_frame[data_frame.bsuite_env ==
                                 'bandit_noise'].copy()
    bandit_noise_overall = summary_analysis.plot_single_experiment(
        bsuite_score, 'bandit_noise', sweep_vars)
    bandit_noise_overall.save(PLOTS_PATH + 'bandits_noise_overall.png')
    bandit_noise_avg = bandit_noise_analysis.plot_average(
        bandit_noise_df, sweep_vars)
    bandit_noise_avg.save(PLOTS_PATH + 'bandits_noise_avg.png')
    bandit_noise_regret = bandit_noise_analysis.plot_learning(
        bandit_noise_df, sweep_vars)
    bandit_noise_regret.save(PLOTS_PATH + 'bandits_noise_regret.png')

    # Robustness to reward scaling
    from bsuite.experiments.bandit_scale import analysis as bandit_scale_analysis
    bandit_scale_df = data_frame[data_frame.bsuite_env ==
                                 'bandit_scale'].copy()
    bandit_scale_overall = summary_analysis.plot_single_experiment(
        bsuite_score, 'bandit_scale', sweep_vars)
    bandit_scale_overall.save(PLOTS_PATH + 'bandits_scale_overall.png')
    bandit_scale_avg = bandit_scale_analysis.plot_average(
        bandit_scale_df, sweep_vars)
    bandit_scale_avg.save(PLOTS_PATH + 'bandits_scale_avg.png')
    bandit_scale_learn = bandit_scale_analysis.plot_learning(
        bandit_scale_df, sweep_vars)
    bandit_scale_learn.save(PLOTS_PATH + 'bandits_scale_learn.png')
    """