def test_monitor_load_results(tmp_path): """ test load_results on log files produced by the monitor wrapper """ tmp_path = str(tmp_path) env1 = gym.make("CartPole-v1") env1.seed(0) monitor_file1 = os.path.join(tmp_path, "stable_baselines-test-{}.monitor.csv".format(uuid.uuid4())) monitor_env1 = Monitor(env1, monitor_file1) monitor_files = get_monitor_files(tmp_path) assert len(monitor_files) == 1 assert monitor_file1 in monitor_files monitor_env1.reset() episode_count1 = 0 for _ in range(1000): _, _, done, _ = monitor_env1.step(monitor_env1.action_space.sample()) if done: episode_count1 += 1 monitor_env1.reset() results_size1 = len(load_results(os.path.join(tmp_path)).index) assert results_size1 == episode_count1 env2 = gym.make("CartPole-v1") env2.seed(0) monitor_file2 = os.path.join(tmp_path, "stable_baselines-test-{}.monitor.csv".format(uuid.uuid4())) monitor_env2 = Monitor(env2, monitor_file2) monitor_files = get_monitor_files(tmp_path) assert len(monitor_files) == 2 assert monitor_file1 in monitor_files assert monitor_file2 in monitor_files monitor_env2.reset() episode_count2 = 0 for _ in range(1000): _, _, done, _ = monitor_env2.step(monitor_env2.action_space.sample()) if done: episode_count2 += 1 monitor_env2.reset() results_size2 = len(load_results(os.path.join(tmp_path)).index) assert results_size2 == (results_size1 + episode_count2) os.remove(monitor_file1) os.remove(monitor_file2)
def plot_results(log_folder, title="Learning Curve"): """ Parameters ---------- log_folder : str the save location of the results to plot title : str the title of the task to plot Returns ------- """ x, y = ts2xy(load_results(log_folder), "timesteps") y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y) plt.xlabel("Number of Timesteps") plt.ylabel("Rewards") plt.title(title + " Smoothed") plt.savefig(title + ".png") plt.close()
def get_learning_data(log_dirs, file_reader=load_results, verbose=1): """Load learning curve data from selected log directories and return as a list of pandas dataframes. """ learning_curves = {} learning_filename = log_filenames['learning'] if verbose > 1: print(f"\nLoading data from '{learning_filename}' files...\n") for dir_path in log_dirs: dir_name = os.path.split(dir_path)[-1] try: learning_data = load_results(dir_path) except FileNotFoundError: if verbose > 0: print(f"{dir_name}: WARNING. No '{learning_filename}' " f"file in: {dir_path}") except: if verbose > 0: print( f"{dir_name}: WARNING. Could not read '{learning_filename}' " f"file in: {dir_path}") else: if verbose > 1: print(f"{dir_name}: {len(learning_data)} data points") if dir_name in learning_curves: raise ValueError(f"Log directory {dir_name} is not unique.") learning_curves[dir_name] = learning_data if verbose > 0: print(f"{len(learning_curves)} '{learning_filename}' " f"files found") return learning_curves
def plot_results(dirs, num_timesteps, xaxis, task_name, labels): """ plot the results :param dirs: ([str]) the save location of the results to plot :param num_timesteps: (int) only plot the points below this value :param xaxis: (str) the axis for the x and y output (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') :param task_name: (str) the title of the task to plot """ tslist = [] for folder in dirs: timesteps = load_results(folder) timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps] tslist.append(timesteps) xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist] return plot_curves(xy_list, labels, xaxis, task_name)
def plot_results(dirs, num_timesteps, xaxis, task_name, legend_names=None): """ plot the results :param dirs: ([str]) the save location of the results to plot :param num_timesteps: (int or None) only plot the points below this value :param xaxis: (str) the axis for the x and y output (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') :param task_name: (str) the title of the task to plot """ tslist = [] for folder in dirs: if not folder.startswith('/'): folder = os.path.join(BASE, folder) timesteps = load_results(folder) if num_timesteps is not None: timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps] tslist.append(timesteps) xy_list = [ts2xy(timesteps_item, xaxis) for timesteps_item in tslist] plot_curves(xy_list, xaxis, task_name, legend_names=legend_names)
def _on_step(self) -> bool: if self.n_calls % interval == 0: pp.pprint(self.model.get_env()) if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print( "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}" .format(self.best_mean_reward, mean_reward)) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print("Saving new best model to {}".format( self.save_path)) self.model.save(self.save_path) print() if self.n_calls % self.save_freq == 0: path = log_dir + '/checkpoints/chk_{}'.format( int(self.n_calls / interval)) if self.verbose > 0: print("Saving checkpoint to {}".format(path)) self.model.save(path) print() return True
# 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_10seed_5000000steps_0gd_100ms_scaling0.5/', 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_11seed_5000000steps_0gd_100ms_scaling0.5/', # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_12seed_5000000steps_0gd_100ms_scaling0.5/', # # # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_13seed_5000000steps_0gd_1000ms_scaling0.5/', # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_14seed_2500000steps_0gd_100ms_scaling0.5/', 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_2seed_5000000steps_0gd_100ms_cur_scaling0.5/', # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_3seed_5000000steps_0gd_100ms_cur_scaling0.5/', 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_4seed_5000000steps_0gd_100ms_cur_scaling0.5/', # 'logdir/models_tensorflow/large_td3_256dim_2048x2_0sensors_13seed_2500000steps_0gd_100ms_cur_scaling0.5/', ] # results_plotter.plot_results(fnames, 1e6, results_plotter.X_TIMESTEPS, "Results") timesteps = load_results(fnames[0]) print(timesteps) print(type(timesteps)) tslist = [] num_timesteps = 1e7 xaxis = results_plotter.X_TIMESTEPS for folder in fnames: timesteps = load_results(folder) if num_timesteps is not None: timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps] tslist.append(timesteps) xy_list = [ results_plotter.ts2xy(timesteps_item, xaxis) for timesteps_item in tslist ]
df_iteration, df_len_mean, df_legend_iteration = [], [], [] subfolders = [alg, 'sir', 'sil'] if 'particle_random0.7' in folder_name: subfolders = ['ppo', 'sir', 'sil'] elif 'particle_random1.0' in folder_name: subfolders = ['ppo', 'sir', 'sil'] elif 'maze' in folder_name: subfolders = ['ppo', 'sir_re2'] for subfolder in subfolders: last_success_len = [] for i in range(3): if not os.path.exists( os.path.join(folder_name, subfolder, str(i), '0.monitor.csv')): continue monitor_df = load_results( os.path.join(folder_name, subfolder, str(i))) raw_len = monitor_df.l raw_success = monitor_df.is_success cum_len = raw_len.cumsum() masked_len = smooth(raw_len[raw_success > 0.5].values, 100) masked_cum_len = smooth(cum_len[raw_success > 0.5].values, 100) success_len_f = interpolate.interp1d(masked_cum_len, masked_len, fill_value="extrapolate") print(masked_cum_len[-1], max_timesteps[env_name]) timesteps = np.arange(0, max_timesteps[env_name], max_timesteps[env_name] // 500) success_len = success_len_f(timesteps) # iterations = timesteps / timesteps[-1] * max_iterationss[env_name] # iterations = smooth(iterations, 20) timesteps = smooth(timesteps, 20)
def multi_seed_plot_results(dirs, num_timesteps, xaxis, task_name, legend_names=None, individual=False, zero_shot=None, zoh=True, hiro=None, clip=None): ''' Directory structure is assumed as follows: /experiment_name/RunName/0.monitor.csv ''' import pandas as pd data = list() sns.set_context(context="paper", font_scale=1.5) sns.set_style("darkgrid", {'font.family': 'serif'}) graph = None i = 0 for experiment in dirs: x_list, y_list = list(), list() if not experiment.startswith('/'): exp_dir = os.path.join(BASE, experiment) else: exp_dir = experiment runs = [ run for run in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, run)) ] legend_caption = str(legend_names[i]) if legend_names else experiment for run in runs: timesteps = load_results(os.path.join(exp_dir, run)) # print("Params", ModelParams.load(os.path.join(exp_dir, run))) if num_timesteps is not None: timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps] x, y = ts2xy(timesteps, xaxis) # Apply the window function on episodes. if x.shape[0] >= EPISODES_WINDOW: # Compute and plot rolling mean with window of size EPISODE_WINDOW if 'Full' in legend_caption: x, y = window_func_full(x, y, EPISODES_WINDOW, np.mean) else: x, y = window_func(x, y, EPISODES_WINDOW, np.mean) if individual: graph = sns.lineplot(x=x, y=y, label=run) x_list.append(x) if clip: y = np.clip(y, clip, np.inf) y_list.append(y) if not individual: combined_x_list, combined_y_list = [], [] if zoh: # Zero Order Hold interpolate the data joint_x_list = sorted(list(set(np.concatenate(x_list)))) for xs, ys in zip(x_list, y_list): cur_ind = 0 new_y_list = [] # last_y = ys[0] # last_x = xs[0] for x in joint_x_list: if x > xs[cur_ind] and cur_ind < len(ys) - 1: cur_ind += 1 new_y_list.append(ys[cur_ind]) if not 'Full' in legend_caption: combined_x_list.extend(joint_x_list[::50]) combined_y_list.extend(new_y_list[::50]) else: combined_x_list.extend(joint_x_list[::5]) combined_y_list.extend(new_y_list[::5]) else: # Regular data for xs, ys in zip(x_list, y_list): combined_x_list.extend(xs) combined_y_list.extend(ys) data = pd.DataFrame({ xaxis: combined_x_list, "reward": combined_y_list }) print(len(combined_x_list)) graph = sns.lineplot(x=xaxis, y="reward", data=data, ci="sd", sort=True, label=legend_caption) i += 1 if not hiro is None: hiro_files = list() for root, dirs, files in os.walk(hiro): for file in files: if file == "train.csv": hiro_files.append(os.path.join(root, file)) print(hiro_files) combined_x_list, combined_y_list = [], [] for hiro_file in hiro_files: df = pd.read_csv(hiro_file) combined_x_list.extend(df["total/steps"]) combined_y_list.extend(df["rollout/return_history"]) data = pd.DataFrame({ xaxis: combined_x_list, "reward": combined_y_list }) graph = sns.lineplot(x=xaxis, y="reward", data=data, ci="sd", n_boot=500, sort=True, label="Hiro") hiro_tf = None if not hiro_tf is None: hiro_files = list() for root, dirs, files in os.walk(hiro): for file in files: if file.endswith(".csv"): hiro_files.append(os.path.join(root, file)) for hiro_file in hiro_files: df = pd.read_csv(hiro_file) combined_x_list.extend(df["step"]) combined_y_list.extend(df["value"]) data = pd.DataFrame({ xaxis: combined_x_list, "reward": combined_y_list }) graph = sns.lineplot(x=xaxis, y="reward", data=data, ci="sd", n_boot=500, sort=True, label="Hiro") if not zero_shot is None: graph.axhline(zero_shot, c='purple', linestyle='dashed', label="Zero Shot") plt.title(task_name) plt.xlabel('Samples') plt.ylabel("Episode Rewards") plt.legend(loc='lower right') # graph.get_legend().remove() plt.tight_layout(pad=0) plt.ticklabel_format(axis='x', style='sci', scilimits=(6, 6))