def plot_results2(log_folder1, log_folder2, title='Learning Curve'): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder1), 'timesteps') y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] x2, y2 = ts2xy(load_results(log_folder2), 'timesteps') y2 = moving_average(y2, window=50) # Truncate x x2 = x2[len(x2) - len(y2):] fig = plt.figure(title) plt.plot(x, y, label='PPO') plt.plot(x2, y2, label='A2C') plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") plt.legend() plt.show()
def plot_results(log_folder): from scipy.signal import savgol_filter R = load_results(log_folder)['r'] T = load_results(log_folder)['t'] # _w = 7 # _window_size = len(R) // _w if (len(R) // _w) % 2 != 0 else len(R) // _w + 1 # filtered = savgol_filter(R, _window_size, 1) plt.title('smoothed returns') plt.ylabel('Returns') plt.xlabel('time step') plt.plot(T, R) plt.grid() plt.show()
def plot_results(log_folder, title='Learning Curve', save_fig=False): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder), 'timesteps') y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") plt.show() if save_fig: fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") plt.savefig(os.path.join(save_dir, "learning_curve.png"))
def main(): # Create log dir args = parse_arguments() save_dir = args.log_dir logger = Logger(output_dir=save_dir) title = 'Learning Curve' x, y = logger.load_results(["EpLen", "EpRet"]) x = cumulative_sum(x) y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y, label="Own implementation") if args.compare: log_dir = os.path.join( "Stable_Baselines", "logs", os.path.sep.join(args.log_dir.split(os.path.sep)[1:])) from stable_baselines3.common.results_plotter import load_results, ts2xy x2, y2 = ts2xy(load_results(log_dir), 'timesteps') y2 = moving_average(y2, window=50) # Truncate x x2 = x2[len(x2) - len(y2):] x2, y2 = standardise_graph(x, y, x2, y2) plt.plot(x2, y2, label="Stable_Baselines3 implementation") plt.legend() plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") if args.save: fname = "comparison.png" if args.compare else "learning_curve.png" plt.savefig(os.path.join(save_dir, fname)) plt.show()
def _on_step(self): if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') print(y) if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-50:]) if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print( "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}" .format(self.best_mean_reward, mean_reward)) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print("Saving new best model to {}".format( self.save_path)) self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # スコアの検索 x, y = ts2xy(load_results(self.log_dir), 'timesteps') self.logger.record('timesteps', self.num_timesteps) self.logger.record('mean_reward', self.best_mean_reward) if self.usewandb: wandb.log({'mean_reward': self.best_mean_reward}) wandb.log({'timesteps': self.num_timesteps}) if len(x) > 0: # 直近100lifeのスコアの平均 mean_reward = np.mean(y[-2 * self.check_freq:]) if self.verbose > 0: print(f"Num timesteps: {self.num_timesteps} : ", end='') print( f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward/ep: {mean_reward:.2f}" ) # 直近の平均報酬が上昇した場合はモデルを保存 if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward if self.verbose > 0: print(f"Saving new best model to {self.save_path}.zip") self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: x, y = ts2xy(load_results(self.locals['tb_log_name']), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) self.logger.record('rollout/mean reward of 100 steps', mean_reward) return True
def _on_rollout_end(self) -> None: """ This event is triggered before updating the policy. """ dict = load_results(self.log_dir) #print(dict) actor_loss = dict.get("train/actor_loss") critic_loss = dict.get("train/critic_loss") x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0 and actor_loss and critic_loss: mean_reward = np.mean(y[-min(100, len(y)):]) if self.wandb_logging: wandb.log({ "episode_reward": y[-1], "mean_episode_reward": mean_reward, "actor_loss": actor_loss, "critic_loss": critic_loss })
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') # x represents timesteps if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-50:]) if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward)) writer.add_scalar("agent_reward/train/mean_episode_reward", mean_reward, self.n_calls) print('iteration: {} - mse_loss: {} - val_mse_loss: {}'.format(i, torch.mean(mse_losses), torch.mean(val_mse_loss)))
def plot_results2(log_folder, title='Learning Curve'): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder), 'timesteps') y = moving_average(y, window=100) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards')
def _on_step(self) -> bool: # get the monitor's data x, y = ts2xy(load_results(self.log_dir), 'timesteps') if self._plot is None: # make the plot plt.ion() fig = plt.figure(figsize=(6,3)) ax = fig.add_subplot(111) line, = ax.plot(x, y) self._plot = (line, ax, fig) plt.show() else: # update and rescale the plot self._plot[0].set_data(x, y) self._plot[-2].relim() self._plot[-2].set_xlim([self.locals["total_timesteps"] * -0.02, self.locals["total_timesteps"] * 1.02]) self._plot[-2].autoscale_view(True,True,True) self._plot[-1].canvas.draw()
def _on_step(self) -> bool: x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: mean_reward = np.mean(y[-100:]) if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print( "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}" .format(self.best_mean_reward, mean_reward)) if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward if self.verbose > 0: print("Saving new best model to {}".format(self.save_path)) self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: mean_reward = np.mean(y[-100:]) if self.verbose > 0: print(f"Num timesteps: {self.num_timesteps}") print( f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}" ) if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward if self.verbose > 0: print(f"Saving new best drive to {self.save_path}.zip") self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: global episodes global mean_reward global best_reward episodes = len(y) # print(episodes) mean_reward = np.mean(y[-50:]) mean_reward = round(mean_reward, 0) if self.verbose > 0: print(f"Episodes: {episodes}") print(f"Num steps: {self.num_timesteps}") print(f"Mean reward: {mean_reward:.2f} ") print("=================================") # Report intermediate objective value to Optima and Handle pruning trial.report(mean_reward,self.num_timesteps) if trial.should_prune(): raise optuna.TrialPruned() # New best model, you could save the agent here if mean_reward > best_reward: best_reward = mean_reward if mean_reward > reward_threshold: print("REWARD ACHIVED") model.save(f"{self.save_path}/reward_achived_{str(mean_reward)}") return False else: model.save(f"{self.save_path}/best_model") # New best model, you could save the agent here # if episodes > episodes_threshold: # print("REWARD ACHIVED") # model.save(self.save_path) # return False return True
def save_episode_rewards_as_csv(self, data_directory="data/", log_dir=None): if log_dir is None: log_dir = self.log_dir episode, rewards = ts2xy(load_results(log_dir), 'timesteps') filename = self.model_name + "_episode_rewards" location = data_directory + '{}.csv'.format(filename) with open(location, mode='w') as results_file: results_writer = csv.writer(results_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) results_writer.writerow(episode) results_writer.writerow(rewards)
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) if self.verbose > 0: print(f'Num timesteps: {self.num_timesteps}') print(f'Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}') # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print(f'Saving new best model to {self.save_path}.zip') self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.eval_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.data_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) else: mean_reward = -np.inf if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward)) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print("Saving new best model.") self.model.save(self.data_dir + '/best_model') return True
def plot_training_results(self, title="Learning Curve", xlabel="episode", ylabel="cumulative reward", filename="reward", log_dir=None, show=False): plt.clf() if log_dir is None: log_dir = self.log_dir x, y = ts2xy(load_results(log_dir), 'timesteps') plt.figure(figsize=(20, 10)) plt.title(title) plt.plot(x, y, "b", label="Cumulative Reward") plt.legend() plt.xlabel(xlabel) plt.ylabel(ylabel) plt.axhline(y=0, color='r', linestyle='-') plt.savefig("img/" + filename) if show: plt.show()
def plot_results(log_folder, title='Learning Curve'): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder), 'timesteps') y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") # built int results_plotter.plot_results([log_folder], 3e5, results_plotter.X_TIMESTEPS, "TD3 LunarLander") plt.show()
str(args.num_threads), "--seed", str(args.seed), "--verbose", "0", "--reward-log", reward_log, ] if args.verbose >= 1: print(f"{idx + 1}/{n_experiments}") print(f"Evaluating {algo} on {env_id}...") skip_eval = False if os.path.isdir(reward_log): try: x, y = ts2xy(load_results(reward_log), "timesteps") skip_eval = len(x) > 0 except (json.JSONDecodeError, pd.errors.EmptyDataError, TypeError): pass if skip_eval: print("Skipping eval...") else: return_code = subprocess.call(["python", "enjoy.py"] + arguments) if return_code != 0: print("Error during evaluation, skipping...") continue x, y = ts2xy(load_results(reward_log), "timesteps") if len(x) > 0: # Retrieve training timesteps from config
def generate_plots(dirs, xaxis=X_TIMESTEPS, yaxis='r', title=None, labels=None, num_timesteps=None, subsample=None, individual=False): for i in range(len(dirs)): if not dirs[i].startswith('/'): dirs[i] = os.path.join(BASE, dirs[i]) # If pointing to a single folder and that folder has many results, use that as dir if len(dirs) == 1 and len(get_subdirs(dirs[0])) > 1: dirs = [ os.path.join(dirs[0], subdir) for subdir in get_subdirs(dirs[0]) ] dirs = sorted(dirs) # Make everything reproducible by sorting. Can comment out later for organization. if labels is None: labels = [os.path.basename(os.path.normpath(d)) for d in dirs] dirs, labels = zip(*sorted(zip(dirs, labels), key=lambda x: x[0])) plt.gcf().dpi = 100.0 print("Dirs", dirs) for i, folder in enumerate(dirs): if not 'params.yaml' in os.listdir(folder): # If directory contains 1 folder, and none of those folders have params.yaml, move down. while True: contents = get_subdirs(folder) if any([ 'params.yaml' in os.listdir(os.path.join(folder, c)) for c in contents ]): break folder = os.path.join(folder, contents[0]) if not 'params.yaml' in os.listdir(folder): runs = sorted( [os.path.join(folder, r) for r in get_subdirs(folder)]) else: runs = [folder] print("Different seeds for folder", folder, ":") print(runs) print("----") sns.set_context(context="paper", font_scale=1.5) sns.set_style("darkgrid", {'font.family': 'serif'}) xlist, ylist = [], [] for run in runs: timesteps = load_results(run) if num_timesteps is not None: timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps] x, y = ts2xy(timesteps, xaxis, yaxis=yaxis) if x.shape[0] >= EPISODES_WINDOW: x, y = window_func(x, y, EPISODES_WINDOW, np.mean) xlist.append(x) ylist.append(y) if individual: for i, (xs, ys, run) in enumerate(zip(xlist, ylist, runs)): g = sns.lineplot(x=xs, y=ys, label=os.path.basename(run)) else: # Zero-order hold to align the data for plotting joint_x = sorted(list(set(np.concatenate(xlist)))) combined_x, combined_y = [], [] for xs, ys in zip(xlist, ylist): cur_ind = 0 zoh_y = [] for x in joint_x: # The next value matters if cur_ind < len(ys) - 1 and x >= xs[cur_ind + 1]: cur_ind += 1 zoh_y.append(ys[cur_ind]) if subsample: combined_x.extend(joint_x[::subsample]) combined_y.extend(zoh_y[::subsample]) else: combined_x.extend(joint_x) combined_y.extend(zoh_y) data = pd.DataFrame({xaxis: combined_x, yaxis: combined_y}) g = sns.lineplot(x=xaxis, y=yaxis, data=data, ci=None, sort=True, label=labels[i]) print("Completed folder", folder) if title: plt.title(title) plt.xlabel(xaxis) plt.ylabel(yaxis) plt.tight_layout(pad=0)
def percentile_plot(dirs, title=None, labels=None, curve=False): for i in range(len(dirs)): if not dirs[i].startswith('/'): dirs[i] = os.path.join(BASE, dirs[i]) if labels is None: labels = [os.path.basename(os.path.normpath(d)) for d in dirs] categorical_x = [] categorical_y = [] for i, directory in enumerate(dirs): all_runs = [] folders = [ os.path.join(directory, subdir) for subdir in get_subdirs(directory) ] rewards = [] for folder in folders: runs = [] for item in os.listdir(folder): item_path = os.path.join(folder, item) if os.path.isdir(item_path) and '0.monitor.csv' in os.listdir( item_path): runs.append(item_path) returns = [] print( "Different evals for folder", folder, "Please ensure this has ALL intended environments or results may skew:" ) print(runs) print("----") for run in runs: timesteps = load_results(run) x, y = ts2xy(timesteps, X_TIMESTEPS, yaxis='r') if x.shape[0] >= EPISODES_WINDOW: x, y = window_func(x, y, EPISODES_WINDOW, np.mean) if len(y) > 0: returns.append(max(y)) else: print("Was Error on run", run) avg_reward = np.mean(returns) print(folder, avg_reward) rewards.append(avg_reward) print("Seeds for", directory, ":", len(rewards), "of", len(folders)) print("Stats:") rounded_mean = round(np.mean(rewards), 1) rounded_confidence = round(np.std(rewards) / np.sqrt(len(rewards)), 1) print("$" + str(rounded_mean) + " \pm " + str(rounded_confidence) + "$") if curve: x = np.sort(rewards) y = 1 - (np.arange(0, len(x)) / len(x)) sns.set_context(context="paper", font_scale=1.5) sns.set_style("darkgrid", {'font.family': 'serif'}) sns.scatterplot(x=x, y=y, label=labels[i]) else: categorical_x.extend([labels[i]] * len(rewards)) categorical_y.extend(rewards) if curve: plt.xlabel('reward') plt.ylabel('% Above threshold') else: sns.set(rc={'figure.figsize': (6.4, 4.8)}) sns.set_context(context="paper", font_scale=1.4) sns.set_style("darkgrid", {'font.family': 'serif'}) sns.boxplot(x=categorical_x, y=categorical_y, whis=1.5, showcaps=False, showfliers=True, saturation=0.7, width=0.9) sns.swarmplot(x=categorical_x, y=categorical_y, color="0.25") plt.ylabel('Reward') if title: plt.title(title) plt.tight_layout(pad=0)
parser = argparse.ArgumentParser() parser.add_argument('-f', '--log-folder', help='Log folder', type=str, default='logs') parser.add_argument("--env", help="environment ID", type=str, default="CartPole-v1") args = parser.parse_args() log_dir = args.log_folder # Load results W = load_results(log_dir) # print("Results seed: ", W) # Save walltime to stats.csv df = pd.read_csv(log_dir + 'stats.csv') df["Train walltime (s)"] = W["t"].max() df.to_csv(log_dir + "stats.csv", index=False) # print(df) # Plot training rewards TIMESTEPS = 1e10 plot_results([log_dir], TIMESTEPS, X_TIMESTEPS, args.env) plt.savefig(log_dir + "reward_vs_timesteps.png") # plt.show()
for path in Path(log_dir).rglob(args.env + '_*'): res_file_list.append(path) res_file_list = sorted(res_file_list) # print(res_file_list) df_list = [] col_list = [] count = 1 for filename in res_file_list: # print(filename) filename = str(filename) # convert from Posixpath to string W = load_results(filename) # print(W['r']) df_list.append(W['r']) col_list.append("seed " + str(count)) count += 1 all_rewards = pd.concat(df_list, axis=1) all_rewards.columns = col_list all_rewards_copy = all_rewards.copy() all_rewards["mean_reward"] = all_rewards_copy.mean(axis=1) all_rewards["std_reward"] = all_rewards_copy.std(axis=1) all_rewards["upper"] = all_rewards["mean_reward"] + \ all_rewards["std_reward"] all_rewards["lower"] = all_rewards["mean_reward"] - \
for path in Path(log_dir).rglob(args.env + '_*'): res_file_list.append(path) res_file_list = sorted(res_file_list) # print(res_file_list) df_list = [] col_list = [] COUNT = 1 for filename in res_file_list: # print(filename) FILENAME = str(filename) # convert from Posixpath to string W = load_results(FILENAME) # print(W['r']) df_list.append(W['r']) col_list.append("seed " + str(COUNT)) COUNT += 1 all_rewards = pd.concat(df_list, axis=1) all_rewards.columns = col_list all_rewards_copy = all_rewards.copy() all_rewards["mean_reward"] = all_rewards_copy.mean(axis=1) all_rewards["std_reward"] = all_rewards_copy.std(axis=1) all_rewards["upper"] = all_rewards["mean_reward"] + \ all_rewards["std_reward"] all_rewards["lower"] = all_rewards["mean_reward"] - \
] else: dirs = [ os.path.join(log_path, folder) for folder in os.listdir(log_path) if (env in folder and os.path.isdir(os.path.join(log_path, folder))) ] plt.figure(y_label, figsize=args.figsize) plt.title(y_label, fontsize=args.fontsize) plt.xlabel(f"{x_label}", fontsize=args.fontsize) plt.ylabel(y_label, fontsize=args.fontsize) for folder in dirs: try: data_frame = load_results(folder) except LoadMonitorResultsError: continue if args.max_timesteps is not None: data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps] try: y = np.array(data_frame[y_axis]) except KeyError: print(f"No data available for {folder}") continue x, _ = ts2xy(data_frame, x_axis) print(x) # Do not plot the smoothed curve at all if the timeseries is shorter than window size. if x.shape[0] >= args.episode_window: # Compute and plot rolling mean with window of size args.episode_window
morphology=morphology, path=base_path, verbose=1) eval_env = get_env(params, morphology=morphology) if args.eval_ep > 0: fitness = eval_policy(model, eval_env, num_ep=args.eval_ep, deterministic=True, gif=False, render=False, verbose=0)[0] else: # Determine fitness from training history x, y = ts2xy(load_results(model_path), 'timesteps') if len(x) > 0: # Mean training reward over the last -eval_ep episodes ys = y[args.eval_ep:] fitness = np.mean(ys) else: fitness = -np.inf # except Exception as e: # print("Encountered Error", e) # print("Assigning zero fitness") # model_path, tb_path = get_paths(params, path=base_path) # os.makedirs(model_path, exist_ok=True) # with open(os.path.join(model_path, 'fitness.tmp'), 'w+') as f: # f.write(str(-1*float('inf'))) # exit() # Exit if we fail to train the model.
'-f', args.log_dir, '--algo', algo, '--env', env_id, '--no-render', '--seed', str(args.seed), '--verbose', '0', '--reward-log', reward_log ] if args.verbose >= 1: print(f"{idx + 1}/{n_experiments}") print(f"Evaluating {algo} on {env_id}...") skip_eval = False if os.path.isdir(reward_log): try: x, y = ts2xy(load_results(reward_log), 'timesteps') skip_eval = len(x) > 0 # TODO: fix this bare except except: pass if skip_eval: print("Skipping eval...") else: return_code = subprocess.call(['python', 'enjoy.py'] + arguments) x, y = ts2xy(load_results(reward_log), 'timesteps') if len(x) > 0: mean_reward = np.mean(y) std_reward = np.std(y) results['algo'].append(algo)