Example #1
0
def plot_results2(log_folder1, log_folder2, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder1), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    x2, y2 = ts2xy(load_results(log_folder2), 'timesteps')
    y2 = moving_average(y2, window=50)
    # Truncate x
    x2 = x2[len(x2) - len(y2):]

    fig = plt.figure(title)

    plt.plot(x, y, label='PPO')
    plt.plot(x2, y2, label='A2C')
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.legend()
    plt.show()
Example #2
0
def main():
    # Create log dir
    args = parse_arguments()
    save_dir = args.log_dir
    logger = Logger(output_dir=save_dir)
    title = 'Learning Curve'
    x, y = logger.load_results(["EpLen", "EpRet"])

    x = cumulative_sum(x)
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]
    fig = plt.figure(title)
    plt.plot(x, y, label="Own implementation")
    if args.compare:
        log_dir = os.path.join(
            "Stable_Baselines", "logs",
            os.path.sep.join(args.log_dir.split(os.path.sep)[1:]))
        from stable_baselines3.common.results_plotter import load_results, ts2xy
        x2, y2 = ts2xy(load_results(log_dir), 'timesteps')
        y2 = moving_average(y2, window=50)
        # Truncate x
        x2 = x2[len(x2) - len(y2):]
        x2, y2 = standardise_graph(x, y, x2, y2)

        plt.plot(x2, y2, label="Stable_Baselines3 implementation")

    plt.legend()
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    if args.save:
        fname = "comparison.png" if args.compare else "learning_curve.png"
        plt.savefig(os.path.join(save_dir, fname))
    plt.show()
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            print(y)
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-50:])
                if self.verbose > 0:
                    print("Num timesteps: {}".format(self.num_timesteps))
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(self.best_mean_reward, mean_reward))

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model to {}".format(
                            self.save_path))
                    self.model.save(self.save_path)

        return True
Example #4
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            # スコアの検索
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            self.logger.record('timesteps', self.num_timesteps)
            self.logger.record('mean_reward', self.best_mean_reward)
            if self.usewandb:
                wandb.log({'mean_reward': self.best_mean_reward})
                wandb.log({'timesteps': self.num_timesteps})
            if len(x) > 0:
                # 直近100lifeのスコアの平均
                mean_reward = np.mean(y[-2 * self.check_freq:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps} : ", end='')
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward/ep: {mean_reward:.2f}"
                    )

                # 直近の平均報酬が上昇した場合はモデルを保存
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}.zip")
                    self.model.save(self.save_path)
        return True
 def _on_step(self) -> bool:
     if self.n_calls % self.check_freq == 0:
         # Retrieve training reward
         data_frame = load_results(constants.OUT_DIR)
         x: np.ndarray[Any]
         y: np.ndarray[Any]
         x, y = ts2xy(data_frame, 'timesteps')  # type: ignore
         len_x = len(x)
         if len_x > 0:
             # Mean training reward over the last 100 episodes
             mean_reward = np.mean(y[-100:])
             mean_steps = np.mean(
                 data_frame.l.values[-100:])  # type: ignore
             if self.verbose > 0:
                 print(f"Num timesteps: {self.num_timesteps}")
                 print(
                     f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                 )
                 print("Last mean time steps: {}".format(mean_steps))
             self.experiment.log_metric('rewards', mean_reward, len_x)
             self.experiment.log_metric('steps', mean_steps, len_x)
             # New best model, you could save the agent here
             if mean_reward > self.best_mean_reward:  # type: ignore
                 self.best_mean_reward = mean_reward  # type: ignore
                 # Example for saving best model
                 # if self.verbose > 0:
                 #     print(f"Saving new best model to {self.save_path}.zip")
                 # self.model.save(self.save_path)
     return True
Example #6
0
def plot_results(log_folder, title='Learning Curve', save_fig=False):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.show()
    if save_fig:
        fig = plt.figure(title)
        plt.plot(x, y)
        plt.xlabel('Number of Timesteps')
        plt.ylabel('Rewards')
        plt.title(title + " Smoothed")
        plt.savefig(os.path.join(save_dir, "learning_curve.png"))
Example #7
0
 def _on_step(self) -> bool:
     if self.n_calls % self.check_freq == 0:
         x, y = ts2xy(load_results(self.locals['tb_log_name']), 'timesteps')
         if len(x) > 0:
             # Mean training reward over the last 100 episodes
             mean_reward = np.mean(y[-100:])
             self.logger.record('rollout/mean reward of 100 steps', mean_reward)
     
     return True
Example #8
0
def get_mean_reward_last_n_steps(n, log_dir):
    x, y = ts2xy(load_results(log_dir), 'timesteps')
    if len(x) > 0:
        # Mean training reward over the last 100 episodes
        mean_reward = np.mean(y[-n:])
        return mean_reward
    else:
        logging.warning(
            f'{get_mean_reward_last_n_steps.__name__} called when the number of logged timesteps was 0'
        )
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # Retrieve training reward
          x, y = ts2xy(load_results(self.log_dir), 'timesteps')     # x represents timesteps
          if len(x) > 0:
              # Mean training reward over the last 100 episodes
              mean_reward = np.mean(y[-50:])
              if self.verbose > 0:
                print("Num timesteps: {}".format(self.num_timesteps))
                print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward))

            writer.add_scalar("agent_reward/train/mean_episode_reward", mean_reward, self.n_calls)

            print('iteration: {} - mse_loss: {} - val_mse_loss: {}'.format(i, torch.mean(mse_losses), torch.mean(val_mse_loss)))
def plot_results2(log_folder, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=100)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
Example #11
0
    def _on_step(self) -> bool:
        x, y = ts2xy(load_results(self.log_dir), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            if self.verbose > 0:
                print("Num timesteps: {}".format(self.num_timesteps))
                print(
                    "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                    .format(self.best_mean_reward, mean_reward))

            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                if self.verbose > 0:
                    print("Saving new best model to {}".format(self.save_path))
                self.model.save(self.save_path)

        return True
Example #12
0
 def _on_step(self) -> bool:
     # get the monitor's data
     x, y = ts2xy(load_results(self.log_dir), 'timesteps')
     if self._plot is None: # make the plot
         plt.ion()
         fig = plt.figure(figsize=(6,3))
         ax = fig.add_subplot(111)
         line, = ax.plot(x, y)
         self._plot = (line, ax, fig)
         plt.show()
     else: # update and rescale the plot
         self._plot[0].set_data(x, y)
         self._plot[-2].relim()
         self._plot[-2].set_xlim([self.locals["total_timesteps"] * -0.02, 
                                  self.locals["total_timesteps"] * 1.02])
         self._plot[-2].autoscale_view(True,True,True)
         self._plot[-1].canvas.draw()
    def _on_rollout_end(self) -> None:
        """
        This event is triggered before updating the policy.
        """
        dict = get_log_dict()
        actor_loss = dict.get("train/actor_loss")
        critic_loss = dict.get("train/critic_loss")

        x, y = ts2xy(load_results(self.log_dir), 'timesteps')
        if len(x) > 0 and actor_loss and critic_loss:
            mean_reward = np.mean(y[-min(100, len(y)):])
            wandb.log({
                "episode_reward": y[-1],
                "mean_episode_reward": mean_reward,
                "actor_loss": actor_loss,
                "critic_loss": critic_loss
            })
Example #14
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                    )

                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    if self.verbose > 0:
                        print(f"Saving new best drive to {self.save_path}.zip")
                    self.model.save(self.save_path)

        return True
Example #15
0
        def _on_step(self) -> bool:
            if self.n_calls % self.check_freq == 0:

                # Retrieve training reward
                x, y = ts2xy(load_results(self.log_dir), 'timesteps')
                if len(x) > 0:
                    global episodes
                    global mean_reward
                    global best_reward
                    episodes = len(y)
                    # print(episodes)
                    mean_reward = np.mean(y[-50:])
                    mean_reward = round(mean_reward, 0)
                    if self.verbose > 0:
                        print(f"Episodes: {episodes}")
                        print(f"Num steps: {self.num_timesteps}")
                        print(f"Mean reward: {mean_reward:.2f} ")
                        print("=================================")
                    # Report intermediate objective value to Optima and Handle pruning
                    trial.report(mean_reward,self.num_timesteps)
                    if trial.should_prune():
                        raise optuna.TrialPruned()

                    # New best model, you could save the agent here
                    if mean_reward > best_reward:
                        best_reward = mean_reward
                        if mean_reward > reward_threshold:
                            print("REWARD ACHIVED")
                            model.save(f"{self.save_path}/reward_achived_{str(mean_reward)}")
                            return False
                        else:
                            model.save(f"{self.save_path}/best_model")
                        

                    # New best model, you could save the agent here
                    # if episodes > episodes_threshold:
                    #     print("REWARD ACHIVED")
                    #     model.save(self.save_path)
                    #     return False



            return True
    def save_episode_rewards_as_csv(self,
                                    data_directory="data/",
                                    log_dir=None):

        if log_dir is None:
            log_dir = self.log_dir

        episode, rewards = ts2xy(load_results(log_dir), 'timesteps')

        filename = self.model_name + "_episode_rewards"
        location = data_directory + '{}.csv'.format(filename)

        with open(location, mode='w') as results_file:
            results_writer = csv.writer(results_file,
                                        delimiter=',',
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
            results_writer.writerow(episode)
            results_writer.writerow(rewards)
Example #17
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # Retrieve training reward
          x, y = ts2xy(load_results(self.log_dir), 'timesteps')
          if len(x) > 0:
              # Mean training reward over the last 100 episodes
              mean_reward = np.mean(y[-100:])
              if self.verbose > 0:
                print(f'Num timesteps: {self.num_timesteps}')
                print(f'Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}')

              # New best model, you could save the agent here
              if mean_reward > self.best_mean_reward:
                  self.best_mean_reward = mean_reward
                  # Example for saving best model
                  if self.verbose > 0:
                    print(f'Saving new best model to {self.save_path}.zip')
                  self.model.save(self.save_path)

        return True
Example #18
0
    def _on_step(self) -> bool:
        if self.n_calls % self.eval_freq == 0:
            # Retrieve training reward
            x, y = ts2xy(load_results(self.data_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
            else:
                mean_reward = -np.inf
            if self.verbose > 0:
                print("Num timesteps: {}".format(self.num_timesteps))
                print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward))
            # New best model, you could save the agent here
            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                # Example for saving best model
                if self.verbose > 0:
                    print("Saving new best model.")
                self.model.save(self.data_dir + '/best_model')

        return True
    def plot_training_results(self,
                              title="Learning Curve",
                              xlabel="episode",
                              ylabel="cumulative reward",
                              filename="reward",
                              log_dir=None,
                              show=False):
        plt.clf()

        if log_dir is None:
            log_dir = self.log_dir
        x, y = ts2xy(load_results(log_dir), 'timesteps')

        plt.figure(figsize=(20, 10))
        plt.title(title)
        plt.plot(x, y, "b", label="Cumulative Reward")
        plt.legend()
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.axhline(y=0, color='r', linestyle='-')
        plt.savefig("img/" + filename)
        if show:
            plt.show()
Example #20
0
def plot_results(log_folder, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")

    # built int
    results_plotter.plot_results([log_folder], 3e5,
                                 results_plotter.X_TIMESTEPS,
                                 "TD3 LunarLander")

    plt.show()
        str(args.num_threads),
        "--seed",
        str(args.seed),
        "--verbose",
        "0",
        "--reward-log",
        reward_log,
    ]
    if args.verbose >= 1:
        print(f"{idx + 1}/{n_experiments}")
        print(f"Evaluating {algo} on {env_id}...")

    skip_eval = False
    if os.path.isdir(reward_log):
        try:
            x, y = ts2xy(load_results(reward_log), "timesteps")
            skip_eval = len(x) > 0
        except (json.JSONDecodeError, pd.errors.EmptyDataError, TypeError):
            pass

    if skip_eval:
        print("Skipping eval...")
    else:
        return_code = subprocess.call(["python", "enjoy.py"] + arguments)
        if return_code != 0:
            print("Error during evaluation, skipping...")
            continue
        x, y = ts2xy(load_results(reward_log), "timesteps")

    if len(x) > 0:
        # Retrieve training timesteps from config
Example #22
0
x_label = {
    'steps': 'Timesteps',
    'episodes': 'Episodes',
    'time': 'Walltime (in hours)'
}[args.x_axis]

dirs = [
    os.path.join(log_path, folder) for folder in os.listdir(log_path)
    if env in folder and os.path.isdir(os.path.join(log_path, folder))
]

plt.figure('Training Success Rate', figsize=args.figsize)
plt.title('Training Success Rate', fontsize=args.fontsize)
plt.xlabel(f'{x_label}', fontsize=args.fontsize)
plt.ylabel('Success Rate', fontsize=args.fontsize)
for folder in dirs:
    data_frame = load_results(folder)
    if args.max_timesteps is not None:
        data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps]
    success = np.array(data_frame['is_success'])
    x, _ = ts2xy(data_frame, x_axis)

    # Do not plot the smoothed curve at all if the timeseries is shorter than window size.
    if x.shape[0] >= args.episode_window:
        # Compute and plot rolling mean with window of size args.episode_window
        x, y_mean = window_func(x, success, args.episode_window, np.mean)
        plt.plot(x, y_mean, linewidth=2)
plt.tight_layout()
plt.show()
Example #23
0
        '-f', args.log_dir,
        '--algo', algo,
        '--env', env_id,
        '--no-render',
        '--seed', str(args.seed),
        '--verbose', '0',
        '--reward-log', reward_log
    ]
    if args.verbose >= 1:
        print(f"{idx + 1}/{n_experiments}")
        print(f"Evaluating {algo} on {env_id}...")

    skip_eval = False
    if os.path.isdir(reward_log):
        try:
            x, y = ts2xy(load_results(reward_log), 'timesteps')
            skip_eval = len(x) > 0
        # TODO: fix this bare except
        except:
            pass

    if skip_eval:
        print("Skipping eval...")
    else:
        return_code = subprocess.call(['python', 'enjoy.py'] + arguments)
        x, y = ts2xy(load_results(reward_log), 'timesteps')

    if len(x) > 0:
        mean_reward = np.mean(y)
        std_reward = np.std(y)
        results['algo'].append(algo)
Example #24
0
                                 morphology=morphology,
                                 path=base_path,
                                 verbose=1)
    eval_env = get_env(params, morphology=morphology)

    if args.eval_ep > 0:
        fitness = eval_policy(model,
                              eval_env,
                              num_ep=args.eval_ep,
                              deterministic=True,
                              gif=False,
                              render=False,
                              verbose=0)[0]
    else:
        # Determine fitness from training history
        x, y = ts2xy(load_results(model_path), 'timesteps')
        if len(x) > 0:
            # Mean training reward over the last -eval_ep episodes
            ys = y[args.eval_ep:]
            fitness = np.mean(ys)
        else:
            fitness = -np.inf
    # except Exception as e:
    #     print("Encountered Error", e)
    #     print("Assigning zero fitness")
    #     model_path, tb_path = get_paths(params, path=base_path)
    #     os.makedirs(model_path, exist_ok=True)
    #     with open(os.path.join(model_path, 'fitness.tmp'), 'w+') as f:
    #         f.write(str(-1*float('inf')))
    #     exit() # Exit if we fail to train the model.