def test_vec_monitor_load_results(tmp_path): """ test load_results on log files produced by the monitor wrapper """ tmp_path = str(tmp_path) env1 = DummyVecEnv([lambda: gym.make("CartPole-v1")]) env1.seed(0) monitor_file1 = os.path.join( str(tmp_path), f"stable_baselines-test-{uuid.uuid4()}.monitor.csv") monitor_env1 = VecMonitor(env1, monitor_file1) monitor_files = get_monitor_files(tmp_path) assert len(monitor_files) == 1 assert monitor_file1 in monitor_files monitor_env1.reset() episode_count1 = 0 for _ in range(1000): _, _, dones, _ = monitor_env1.step( [monitor_env1.action_space.sample()]) if dones[0]: episode_count1 += 1 monitor_env1.reset() results_size1 = len(load_results(os.path.join(tmp_path)).index) assert results_size1 == episode_count1 env2 = DummyVecEnv([lambda: gym.make("CartPole-v1")]) env2.seed(0) monitor_file2 = os.path.join( str(tmp_path), f"stable_baselines-test-{uuid.uuid4()}.monitor.csv") monitor_env2 = VecMonitor(env2, monitor_file2) monitor_files = get_monitor_files(tmp_path) assert len(monitor_files) == 2 assert monitor_file1 in monitor_files assert monitor_file2 in monitor_files monitor_env2.reset() episode_count2 = 0 for _ in range(1000): _, _, dones, _ = monitor_env2.step( [monitor_env2.action_space.sample()]) if dones[0]: episode_count2 += 1 monitor_env2.reset() results_size2 = len(load_results(os.path.join(tmp_path)).index) assert results_size2 == (results_size1 + episode_count2) os.remove(monitor_file1) os.remove(monitor_file2)
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward data_frame = load_results(constants.OUT_DIR) x: np.ndarray[Any] y: np.ndarray[Any] x, y = ts2xy(data_frame, 'timesteps') # type: ignore len_x = len(x) if len_x > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) mean_steps = np.mean( data_frame.l.values[-100:]) # type: ignore if self.verbose > 0: print(f"Num timesteps: {self.num_timesteps}") print( f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}" ) print("Last mean time steps: {}".format(mean_steps)) self.experiment.log_metric('rewards', mean_reward, len_x) self.experiment.log_metric('steps', mean_steps, len_x) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: # type: ignore self.best_mean_reward = mean_reward # type: ignore # Example for saving best model # if self.verbose > 0: # print(f"Saving new best model to {self.save_path}.zip") # self.model.save(self.save_path) return True
def get_mean_reward_last_n_steps(n, log_dir): x, y = ts2xy(load_results(log_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-n:]) return mean_reward else: logging.warning( f'{get_mean_reward_last_n_steps.__name__} called when the number of logged timesteps was 0' )
def plot_results(dirs: List[str], num_timesteps: Optional[int], x_axis: str, task_name: str, figsize: Tuple[int, int] = (8, 2)) -> None: """ Plot the results using csv files from ``Monitor`` wrapper. :param dirs: ([str]) the save location of the results to plot :param num_timesteps: (int or None) only plot the points below this value :param x_axis: (str) the axis for the x and y output (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs') :param task_name: (str) the title of the task to plot :param figsize: (Tuple[int, int]) Size of the figure (width, height) """ data_frames = [] for folder in dirs: data_frame = load_results(folder) if num_timesteps is not None: data_frame = data_frame[data_frame.l.cumsum() <= num_timesteps] data_frames.append(data_frame) xy_list = [ts2xy(data_frame, x_axis) for data_frame in data_frames] plot_curves(xy_list, x_axis, task_name, figsize)
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) if self.verbose > 0: print(f"Num timesteps: {self.num_timesteps}") print( f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}" ) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print(f"Saving new best model to {self.save_path}.zip") self.model.save(self.save_path) return True
x_label = { 'steps': 'Timesteps', 'episodes': 'Episodes', 'time': 'Walltime (in hours)' }[args.x_axis] dirs = [ os.path.join(log_path, folder) for folder in os.listdir(log_path) if env in folder and os.path.isdir(os.path.join(log_path, folder)) ] plt.figure('Training Success Rate', figsize=args.figsize) plt.title('Training Success Rate', fontsize=args.fontsize) plt.xlabel(f'{x_label}', fontsize=args.fontsize) plt.ylabel('Success Rate', fontsize=args.fontsize) for folder in dirs: data_frame = load_results(folder) if args.max_timesteps is not None: data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps] success = np.array(data_frame['is_success']) x, _ = ts2xy(data_frame, x_axis) # Do not plot the smoothed curve at all if the timeseries is shorter than window size. if x.shape[0] >= args.episode_window: # Compute and plot rolling mean with window of size args.episode_window x, y_mean = window_func(x, success, args.episode_window, np.mean) plt.plot(x, y_mean, linewidth=2) plt.tight_layout() plt.show()