Example #1
0
def test_monitor_load_results(tmp_path):
    """
    test load_results on log files produced by the monitor wrapper
    """
    tmp_path = str(tmp_path)
    env1 = gym.make("CartPole-v1")
    env1.seed(0)
    monitor_file1 = os.path.join(tmp_path, "stable_baselines-test-{}.monitor.csv".format(uuid.uuid4()))
    monitor_env1 = Monitor(env1, monitor_file1)

    monitor_files = get_monitor_files(tmp_path)
    assert len(monitor_files) == 1
    assert monitor_file1 in monitor_files

    monitor_env1.reset()
    episode_count1 = 0
    for _ in range(1000):
        _, _, done, _ = monitor_env1.step(monitor_env1.action_space.sample())
        if done:
            episode_count1 += 1
            monitor_env1.reset()

    results_size1 = len(load_results(os.path.join(tmp_path)).index)
    assert results_size1 == episode_count1

    env2 = gym.make("CartPole-v1")
    env2.seed(0)
    monitor_file2 = os.path.join(tmp_path, "stable_baselines-test-{}.monitor.csv".format(uuid.uuid4()))
    monitor_env2 = Monitor(env2, monitor_file2)
    monitor_files = get_monitor_files(tmp_path)
    assert len(monitor_files) == 2
    assert monitor_file1 in monitor_files
    assert monitor_file2 in monitor_files

    monitor_env2.reset()
    episode_count2 = 0
    for _ in range(1000):
        _, _, done, _ = monitor_env2.step(monitor_env2.action_space.sample())
        if done:
            episode_count2 += 1
            monitor_env2.reset()

    results_size2 = len(load_results(os.path.join(tmp_path)).index)

    assert results_size2 == (results_size1 + episode_count2)

    os.remove(monitor_file1)
    os.remove(monitor_file2)
def plot_results(log_folder, title="Learning Curve"):
    """

    Parameters
    ----------
    log_folder : str
        the save location of the results to plot
    title : str
        the title of the task to plot
    Returns
    -------

    """

    x, y = ts2xy(load_results(log_folder), "timesteps")
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]
    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel("Number of Timesteps")
    plt.ylabel("Rewards")
    plt.title(title + " Smoothed")
    plt.savefig(title + ".png")
    plt.close()
Example #3
0
def get_learning_data(log_dirs, file_reader=load_results, verbose=1):
    """Load learning curve data from selected log directories
    and return as a list of pandas dataframes.
    """

    learning_curves = {}

    learning_filename = log_filenames['learning']
    if verbose > 1:
        print(f"\nLoading data from '{learning_filename}' files...\n")

    for dir_path in log_dirs:
        dir_name = os.path.split(dir_path)[-1]
        try:
            learning_data = load_results(dir_path)
        except FileNotFoundError:
            if verbose > 0:
                print(f"{dir_name}: WARNING. No '{learning_filename}' "
                      f"file in: {dir_path}")
        except:
            if verbose > 0:
                print(
                    f"{dir_name}: WARNING. Could not read '{learning_filename}' "
                    f"file in: {dir_path}")
        else:
            if verbose > 1:
                print(f"{dir_name}: {len(learning_data)} data points")
            if dir_name in learning_curves:
                raise ValueError(f"Log directory {dir_name} is not unique.")
            learning_curves[dir_name] = learning_data

    if verbose > 0:
        print(f"{len(learning_curves)} '{learning_filename}' " f"files found")

    return learning_curves
Example #4
0
def plot_results(dirs, num_timesteps, xaxis, task_name, labels):
    """
    plot the results

    :param dirs: ([str]) the save location of the results to plot
    :param num_timesteps: (int) only plot the points below this value
    :param xaxis: (str) the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :param task_name: (str) the title of the task to plot
    """

    tslist = []
    for folder in dirs:
        timesteps = load_results(folder)
        timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
        tslist.append(timesteps)
    xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist]
    return plot_curves(xy_list, labels, xaxis, task_name)
def plot_results(dirs, num_timesteps, xaxis, task_name, legend_names=None):
    """
    plot the results

    :param dirs: ([str]) the save location of the results to plot
    :param num_timesteps: (int or None) only plot the points below this value
    :param xaxis: (str) the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :param task_name: (str) the title of the task to plot
    """
    tslist = []
    for folder in dirs:
        if not folder.startswith('/'):
            folder = os.path.join(BASE, folder)
        timesteps = load_results(folder)
        if num_timesteps is not None:
            timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
        tslist.append(timesteps)
    xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist]
    plot_curves(xy_list, xaxis, task_name, legend_names=legend_names)
Example #6
0
    def _on_step(self) -> bool:

        if self.n_calls % interval == 0:
            pp.pprint(self.model.get_env())

        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print("Num timesteps: {}".format(self.num_timesteps))
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(self.best_mean_reward, mean_reward))

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model to {}".format(
                            self.save_path))
                    self.model.save(self.save_path)

            print()

        if self.n_calls % self.save_freq == 0:
            path = log_dir + '/checkpoints/chk_{}'.format(
                int(self.n_calls / interval))
            if self.verbose > 0:
                print("Saving checkpoint to {}".format(path))
            self.model.save(path)

            print()

        return True
    # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_10seed_5000000steps_0gd_100ms_scaling0.5/',
    'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_11seed_5000000steps_0gd_100ms_scaling0.5/',
    # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_12seed_5000000steps_0gd_100ms_scaling0.5/',
    #
    # # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_13seed_5000000steps_0gd_1000ms_scaling0.5/',
    # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_14seed_2500000steps_0gd_100ms_scaling0.5/',
    'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_2seed_5000000steps_0gd_100ms_cur_scaling0.5/',
    # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_3seed_5000000steps_0gd_100ms_cur_scaling0.5/',
    'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_4seed_5000000steps_0gd_100ms_cur_scaling0.5/',

    # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_13seed_2500000steps_0gd_100ms_cur_scaling0.5/',
]

# results_plotter.plot_results(fnames, 1e6, results_plotter.X_TIMESTEPS, "Results")

timesteps = load_results(fnames[0])

print(timesteps)
print(type(timesteps))

tslist = []
num_timesteps = 1e7
xaxis = results_plotter.X_TIMESTEPS
for folder in fnames:
    timesteps = load_results(folder)
    if num_timesteps is not None:
        timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
    tslist.append(timesteps)
xy_list = [
    results_plotter.ts2xy(timesteps_item, xaxis) for timesteps_item in tslist
]
Example #8
0
 df_iteration, df_len_mean, df_legend_iteration = [], [], []
 subfolders = [alg, 'sir', 'sil']
 if 'particle_random0.7' in folder_name:
     subfolders = ['ppo', 'sir', 'sil']
 elif 'particle_random1.0' in folder_name:
     subfolders = ['ppo', 'sir', 'sil']
 elif 'maze' in folder_name:
     subfolders = ['ppo', 'sir_re2']
 for subfolder in subfolders:
     last_success_len = []
     for i in range(3):
         if not os.path.exists(
                 os.path.join(folder_name, subfolder, str(i),
                              '0.monitor.csv')):
             continue
         monitor_df = load_results(
             os.path.join(folder_name, subfolder, str(i)))
         raw_len = monitor_df.l
         raw_success = monitor_df.is_success
         cum_len = raw_len.cumsum()
         masked_len = smooth(raw_len[raw_success > 0.5].values, 100)
         masked_cum_len = smooth(cum_len[raw_success > 0.5].values, 100)
         success_len_f = interpolate.interp1d(masked_cum_len,
                                              masked_len,
                                              fill_value="extrapolate")
         print(masked_cum_len[-1], max_timesteps[env_name])
         timesteps = np.arange(0, max_timesteps[env_name],
                               max_timesteps[env_name] // 500)
         success_len = success_len_f(timesteps)
         # iterations = timesteps / timesteps[-1] * max_iterationss[env_name]
         # iterations = smooth(iterations, 20)
         timesteps = smooth(timesteps, 20)
def multi_seed_plot_results(dirs,
                            num_timesteps,
                            xaxis,
                            task_name,
                            legend_names=None,
                            individual=False,
                            zero_shot=None,
                            zoh=True,
                            hiro=None,
                            clip=None):
    '''
    Directory structure is assumed as follows:
    /experiment_name/RunName/0.monitor.csv
    '''
    import pandas as pd
    data = list()
    sns.set_context(context="paper", font_scale=1.5)
    sns.set_style("darkgrid", {'font.family': 'serif'})

    graph = None
    i = 0
    for experiment in dirs:
        x_list, y_list = list(), list()
        if not experiment.startswith('/'):
            exp_dir = os.path.join(BASE, experiment)
        else:
            exp_dir = experiment
        runs = [
            run for run in os.listdir(exp_dir)
            if os.path.isdir(os.path.join(exp_dir, run))
        ]
        legend_caption = str(legend_names[i]) if legend_names else experiment

        for run in runs:
            timesteps = load_results(os.path.join(exp_dir, run))
            # print("Params", ModelParams.load(os.path.join(exp_dir, run)))
            if num_timesteps is not None:
                timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
            x, y = ts2xy(timesteps, xaxis)
            # Apply the window function on episodes.
            if x.shape[0] >= EPISODES_WINDOW:
                # Compute and plot rolling mean with window of size EPISODE_WINDOW
                if 'Full' in legend_caption:
                    x, y = window_func_full(x, y, EPISODES_WINDOW, np.mean)
                else:
                    x, y = window_func(x, y, EPISODES_WINDOW, np.mean)
            if individual:
                graph = sns.lineplot(x=x, y=y, label=run)

            x_list.append(x)
            if clip:
                y = np.clip(y, clip, np.inf)
            y_list.append(y)

        if not individual:
            combined_x_list, combined_y_list = [], []
            if zoh:
                # Zero Order Hold interpolate the data
                joint_x_list = sorted(list(set(np.concatenate(x_list))))
                for xs, ys in zip(x_list, y_list):
                    cur_ind = 0
                    new_y_list = []
                    # last_y = ys[0]
                    # last_x = xs[0]
                    for x in joint_x_list:
                        if x > xs[cur_ind] and cur_ind < len(ys) - 1:
                            cur_ind += 1
                        new_y_list.append(ys[cur_ind])
                    if not 'Full' in legend_caption:
                        combined_x_list.extend(joint_x_list[::50])
                        combined_y_list.extend(new_y_list[::50])
                    else:
                        combined_x_list.extend(joint_x_list[::5])
                        combined_y_list.extend(new_y_list[::5])
            else:
                # Regular data
                for xs, ys in zip(x_list, y_list):
                    combined_x_list.extend(xs)
                    combined_y_list.extend(ys)

            data = pd.DataFrame({
                xaxis: combined_x_list,
                "reward": combined_y_list
            })
            print(len(combined_x_list))
            graph = sns.lineplot(x=xaxis,
                                 y="reward",
                                 data=data,
                                 ci="sd",
                                 sort=True,
                                 label=legend_caption)
            i += 1

    if not hiro is None:
        hiro_files = list()
        for root, dirs, files in os.walk(hiro):
            for file in files:
                if file == "train.csv":
                    hiro_files.append(os.path.join(root, file))
        print(hiro_files)
        combined_x_list, combined_y_list = [], []
        for hiro_file in hiro_files:
            df = pd.read_csv(hiro_file)
            combined_x_list.extend(df["total/steps"])
            combined_y_list.extend(df["rollout/return_history"])

        data = pd.DataFrame({
            xaxis: combined_x_list,
            "reward": combined_y_list
        })
        graph = sns.lineplot(x=xaxis,
                             y="reward",
                             data=data,
                             ci="sd",
                             n_boot=500,
                             sort=True,
                             label="Hiro")

    hiro_tf = None
    if not hiro_tf is None:
        hiro_files = list()
        for root, dirs, files in os.walk(hiro):
            for file in files:
                if file.endswith(".csv"):
                    hiro_files.append(os.path.join(root, file))

        for hiro_file in hiro_files:
            df = pd.read_csv(hiro_file)
            combined_x_list.extend(df["step"])
            combined_y_list.extend(df["value"])

        data = pd.DataFrame({
            xaxis: combined_x_list,
            "reward": combined_y_list
        })
        graph = sns.lineplot(x=xaxis,
                             y="reward",
                             data=data,
                             ci="sd",
                             n_boot=500,
                             sort=True,
                             label="Hiro")

    if not zero_shot is None:
        graph.axhline(zero_shot,
                      c='purple',
                      linestyle='dashed',
                      label="Zero Shot")
    plt.title(task_name)
    plt.xlabel('Samples')
    plt.ylabel("Episode Rewards")
    plt.legend(loc='lower right')
    # graph.get_legend().remove()
    plt.tight_layout(pad=0)
    plt.ticklabel_format(axis='x', style='sci', scilimits=(6, 6))