Esempio n. 1
0
def plot_results2(log_folder1, log_folder2, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder1), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    x2, y2 = ts2xy(load_results(log_folder2), 'timesteps')
    y2 = moving_average(y2, window=50)
    # Truncate x
    x2 = x2[len(x2) - len(y2):]

    fig = plt.figure(title)

    plt.plot(x, y, label='PPO')
    plt.plot(x2, y2, label='A2C')
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.legend()
    plt.show()
Esempio n. 2
0
def plot_results(log_folder):
    from scipy.signal import savgol_filter
    R = load_results(log_folder)['r']
    T = load_results(log_folder)['t']
    # _w = 7
    # _window_size = len(R) // _w if (len(R) // _w) % 2 != 0 else len(R) // _w + 1
    # filtered = savgol_filter(R, _window_size, 1)

    plt.title('smoothed returns')
    plt.ylabel('Returns')
    plt.xlabel('time step')
    plt.plot(T, R)
    plt.grid()
    plt.show()
Esempio n. 3
0
def plot_results(log_folder, title='Learning Curve', save_fig=False):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.show()
    if save_fig:
        fig = plt.figure(title)
        plt.plot(x, y)
        plt.xlabel('Number of Timesteps')
        plt.ylabel('Rewards')
        plt.title(title + " Smoothed")
        plt.savefig(os.path.join(save_dir, "learning_curve.png"))
Esempio n. 4
0
def main():
    # Create log dir
    args = parse_arguments()
    save_dir = args.log_dir
    logger = Logger(output_dir=save_dir)
    title = 'Learning Curve'
    x, y = logger.load_results(["EpLen", "EpRet"])

    x = cumulative_sum(x)
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]
    fig = plt.figure(title)
    plt.plot(x, y, label="Own implementation")
    if args.compare:
        log_dir = os.path.join(
            "Stable_Baselines", "logs",
            os.path.sep.join(args.log_dir.split(os.path.sep)[1:]))
        from stable_baselines3.common.results_plotter import load_results, ts2xy
        x2, y2 = ts2xy(load_results(log_dir), 'timesteps')
        y2 = moving_average(y2, window=50)
        # Truncate x
        x2 = x2[len(x2) - len(y2):]
        x2, y2 = standardise_graph(x, y, x2, y2)

        plt.plot(x2, y2, label="Stable_Baselines3 implementation")

    plt.legend()
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    if args.save:
        fname = "comparison.png" if args.compare else "learning_curve.png"
        plt.savefig(os.path.join(save_dir, fname))
    plt.show()
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            print(y)
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-50:])
                if self.verbose > 0:
                    print("Num timesteps: {}".format(self.num_timesteps))
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(self.best_mean_reward, mean_reward))

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model to {}".format(
                            self.save_path))
                    self.model.save(self.save_path)

        return True
Esempio n. 6
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            # スコアの検索
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            self.logger.record('timesteps', self.num_timesteps)
            self.logger.record('mean_reward', self.best_mean_reward)
            if self.usewandb:
                wandb.log({'mean_reward': self.best_mean_reward})
                wandb.log({'timesteps': self.num_timesteps})
            if len(x) > 0:
                # 直近100lifeのスコアの平均
                mean_reward = np.mean(y[-2 * self.check_freq:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps} : ", end='')
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward/ep: {mean_reward:.2f}"
                    )

                # 直近の平均報酬が上昇した場合はモデルを保存
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}.zip")
                    self.model.save(self.save_path)
        return True
Esempio n. 7
0
 def _on_step(self) -> bool:
     if self.n_calls % self.check_freq == 0:
         x, y = ts2xy(load_results(self.locals['tb_log_name']), 'timesteps')
         if len(x) > 0:
             # Mean training reward over the last 100 episodes
             mean_reward = np.mean(y[-100:])
             self.logger.record('rollout/mean reward of 100 steps', mean_reward)
     
     return True
Esempio n. 8
0
    def _on_rollout_end(self) -> None:
        """
        This event is triggered before updating the policy.
        """
        dict = load_results(self.log_dir)
        #print(dict)
        actor_loss = dict.get("train/actor_loss")
        critic_loss = dict.get("train/critic_loss")

        x, y = ts2xy(load_results(self.log_dir), 'timesteps')
        if len(x) > 0 and actor_loss and critic_loss:
            mean_reward = np.mean(y[-min(100, len(y)):])
            if self.wandb_logging:
                wandb.log({
                    "episode_reward": y[-1],
                    "mean_episode_reward": mean_reward,
                    "actor_loss": actor_loss,
                    "critic_loss": critic_loss
                })
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # Retrieve training reward
          x, y = ts2xy(load_results(self.log_dir), 'timesteps')     # x represents timesteps
          if len(x) > 0:
              # Mean training reward over the last 100 episodes
              mean_reward = np.mean(y[-50:])
              if self.verbose > 0:
                print("Num timesteps: {}".format(self.num_timesteps))
                print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward))

            writer.add_scalar("agent_reward/train/mean_episode_reward", mean_reward, self.n_calls)

            print('iteration: {} - mse_loss: {} - val_mse_loss: {}'.format(i, torch.mean(mse_losses), torch.mean(val_mse_loss)))
def plot_results2(log_folder, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=100)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
Esempio n. 11
0
 def _on_step(self) -> bool:
     # get the monitor's data
     x, y = ts2xy(load_results(self.log_dir), 'timesteps')
     if self._plot is None: # make the plot
         plt.ion()
         fig = plt.figure(figsize=(6,3))
         ax = fig.add_subplot(111)
         line, = ax.plot(x, y)
         self._plot = (line, ax, fig)
         plt.show()
     else: # update and rescale the plot
         self._plot[0].set_data(x, y)
         self._plot[-2].relim()
         self._plot[-2].set_xlim([self.locals["total_timesteps"] * -0.02, 
                                  self.locals["total_timesteps"] * 1.02])
         self._plot[-2].autoscale_view(True,True,True)
         self._plot[-1].canvas.draw()
Esempio n. 12
0
    def _on_step(self) -> bool:
        x, y = ts2xy(load_results(self.log_dir), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            if self.verbose > 0:
                print("Num timesteps: {}".format(self.num_timesteps))
                print(
                    "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                    .format(self.best_mean_reward, mean_reward))

            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                if self.verbose > 0:
                    print("Saving new best model to {}".format(self.save_path))
                self.model.save(self.save_path)

        return True
Esempio n. 13
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                    )

                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    if self.verbose > 0:
                        print(f"Saving new best drive to {self.save_path}.zip")
                    self.model.save(self.save_path)

        return True
Esempio n. 14
0
        def _on_step(self) -> bool:
            if self.n_calls % self.check_freq == 0:

                # Retrieve training reward
                x, y = ts2xy(load_results(self.log_dir), 'timesteps')
                if len(x) > 0:
                    global episodes
                    global mean_reward
                    global best_reward
                    episodes = len(y)
                    # print(episodes)
                    mean_reward = np.mean(y[-50:])
                    mean_reward = round(mean_reward, 0)
                    if self.verbose > 0:
                        print(f"Episodes: {episodes}")
                        print(f"Num steps: {self.num_timesteps}")
                        print(f"Mean reward: {mean_reward:.2f} ")
                        print("=================================")
                    # Report intermediate objective value to Optima and Handle pruning
                    trial.report(mean_reward,self.num_timesteps)
                    if trial.should_prune():
                        raise optuna.TrialPruned()

                    # New best model, you could save the agent here
                    if mean_reward > best_reward:
                        best_reward = mean_reward
                        if mean_reward > reward_threshold:
                            print("REWARD ACHIVED")
                            model.save(f"{self.save_path}/reward_achived_{str(mean_reward)}")
                            return False
                        else:
                            model.save(f"{self.save_path}/best_model")
                        

                    # New best model, you could save the agent here
                    # if episodes > episodes_threshold:
                    #     print("REWARD ACHIVED")
                    #     model.save(self.save_path)
                    #     return False



            return True
Esempio n. 15
0
    def save_episode_rewards_as_csv(self,
                                    data_directory="data/",
                                    log_dir=None):

        if log_dir is None:
            log_dir = self.log_dir

        episode, rewards = ts2xy(load_results(log_dir), 'timesteps')

        filename = self.model_name + "_episode_rewards"
        location = data_directory + '{}.csv'.format(filename)

        with open(location, mode='w') as results_file:
            results_writer = csv.writer(results_file,
                                        delimiter=',',
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
            results_writer.writerow(episode)
            results_writer.writerow(rewards)
Esempio n. 16
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

          # Retrieve training reward
          x, y = ts2xy(load_results(self.log_dir), 'timesteps')
          if len(x) > 0:
              # Mean training reward over the last 100 episodes
              mean_reward = np.mean(y[-100:])
              if self.verbose > 0:
                print(f'Num timesteps: {self.num_timesteps}')
                print(f'Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}')

              # New best model, you could save the agent here
              if mean_reward > self.best_mean_reward:
                  self.best_mean_reward = mean_reward
                  # Example for saving best model
                  if self.verbose > 0:
                    print(f'Saving new best model to {self.save_path}.zip')
                  self.model.save(self.save_path)

        return True
Esempio n. 17
0
    def _on_step(self) -> bool:
        if self.n_calls % self.eval_freq == 0:
            # Retrieve training reward
            x, y = ts2xy(load_results(self.data_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
            else:
                mean_reward = -np.inf
            if self.verbose > 0:
                print("Num timesteps: {}".format(self.num_timesteps))
                print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward))
            # New best model, you could save the agent here
            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                # Example for saving best model
                if self.verbose > 0:
                    print("Saving new best model.")
                self.model.save(self.data_dir + '/best_model')

        return True
Esempio n. 18
0
    def plot_training_results(self,
                              title="Learning Curve",
                              xlabel="episode",
                              ylabel="cumulative reward",
                              filename="reward",
                              log_dir=None,
                              show=False):
        plt.clf()

        if log_dir is None:
            log_dir = self.log_dir
        x, y = ts2xy(load_results(log_dir), 'timesteps')

        plt.figure(figsize=(20, 10))
        plt.title(title)
        plt.plot(x, y, "b", label="Cumulative Reward")
        plt.legend()
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.axhline(y=0, color='r', linestyle='-')
        plt.savefig("img/" + filename)
        if show:
            plt.show()
Esempio n. 19
0
def plot_results(log_folder, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")

    # built int
    results_plotter.plot_results([log_folder], 3e5,
                                 results_plotter.X_TIMESTEPS,
                                 "TD3 LunarLander")

    plt.show()
Esempio n. 20
0
        str(args.num_threads),
        "--seed",
        str(args.seed),
        "--verbose",
        "0",
        "--reward-log",
        reward_log,
    ]
    if args.verbose >= 1:
        print(f"{idx + 1}/{n_experiments}")
        print(f"Evaluating {algo} on {env_id}...")

    skip_eval = False
    if os.path.isdir(reward_log):
        try:
            x, y = ts2xy(load_results(reward_log), "timesteps")
            skip_eval = len(x) > 0
        except (json.JSONDecodeError, pd.errors.EmptyDataError, TypeError):
            pass

    if skip_eval:
        print("Skipping eval...")
    else:
        return_code = subprocess.call(["python", "enjoy.py"] + arguments)
        if return_code != 0:
            print("Error during evaluation, skipping...")
            continue
        x, y = ts2xy(load_results(reward_log), "timesteps")

    if len(x) > 0:
        # Retrieve training timesteps from config
Esempio n. 21
0
def generate_plots(dirs,
                   xaxis=X_TIMESTEPS,
                   yaxis='r',
                   title=None,
                   labels=None,
                   num_timesteps=None,
                   subsample=None,
                   individual=False):
    for i in range(len(dirs)):
        if not dirs[i].startswith('/'):
            dirs[i] = os.path.join(BASE, dirs[i])

    # If pointing to a single folder and that folder has many results, use that as dir
    if len(dirs) == 1 and len(get_subdirs(dirs[0])) > 1:
        dirs = [
            os.path.join(dirs[0], subdir) for subdir in get_subdirs(dirs[0])
        ]
        dirs = sorted(dirs)

    # Make everything reproducible by sorting. Can comment out later for organization.
    if labels is None:
        labels = [os.path.basename(os.path.normpath(d)) for d in dirs]

    dirs, labels = zip(*sorted(zip(dirs, labels), key=lambda x: x[0]))
    plt.gcf().dpi = 100.0
    print("Dirs", dirs)
    for i, folder in enumerate(dirs):
        if not 'params.yaml' in os.listdir(folder):
            # If directory contains 1 folder, and none of those folders have params.yaml, move down.
            while True:
                contents = get_subdirs(folder)
                if any([
                        'params.yaml' in os.listdir(os.path.join(folder, c))
                        for c in contents
                ]):
                    break
                folder = os.path.join(folder, contents[0])

        if not 'params.yaml' in os.listdir(folder):
            runs = sorted(
                [os.path.join(folder, r) for r in get_subdirs(folder)])
        else:
            runs = [folder]

        print("Different seeds for folder", folder, ":")
        print(runs)
        print("----")

        sns.set_context(context="paper", font_scale=1.5)
        sns.set_style("darkgrid", {'font.family': 'serif'})
        xlist, ylist = [], []
        for run in runs:
            timesteps = load_results(run)
            if num_timesteps is not None:
                timesteps = timesteps[timesteps.l.cumsum() <= num_timesteps]
            x, y = ts2xy(timesteps, xaxis, yaxis=yaxis)
            if x.shape[0] >= EPISODES_WINDOW:
                x, y = window_func(x, y, EPISODES_WINDOW, np.mean)
            xlist.append(x)
            ylist.append(y)
        if individual:
            for i, (xs, ys, run) in enumerate(zip(xlist, ylist, runs)):
                g = sns.lineplot(x=xs, y=ys, label=os.path.basename(run))
        else:
            # Zero-order hold to align the data for plotting
            joint_x = sorted(list(set(np.concatenate(xlist))))
            combined_x, combined_y = [], []
            for xs, ys in zip(xlist, ylist):
                cur_ind = 0
                zoh_y = []
                for x in joint_x:
                    # The next value matters
                    if cur_ind < len(ys) - 1 and x >= xs[cur_ind + 1]:
                        cur_ind += 1
                    zoh_y.append(ys[cur_ind])
                if subsample:
                    combined_x.extend(joint_x[::subsample])
                    combined_y.extend(zoh_y[::subsample])
                else:
                    combined_x.extend(joint_x)
                    combined_y.extend(zoh_y)
            data = pd.DataFrame({xaxis: combined_x, yaxis: combined_y})
            g = sns.lineplot(x=xaxis,
                             y=yaxis,
                             data=data,
                             ci=None,
                             sort=True,
                             label=labels[i])

        print("Completed folder", folder)

    if title:
        plt.title(title)
    plt.xlabel(xaxis)
    plt.ylabel(yaxis)
    plt.tight_layout(pad=0)
Esempio n. 22
0
def percentile_plot(dirs, title=None, labels=None, curve=False):
    for i in range(len(dirs)):
        if not dirs[i].startswith('/'):
            dirs[i] = os.path.join(BASE, dirs[i])

    if labels is None:
        labels = [os.path.basename(os.path.normpath(d)) for d in dirs]

    categorical_x = []
    categorical_y = []
    for i, directory in enumerate(dirs):
        all_runs = []
        folders = [
            os.path.join(directory, subdir)
            for subdir in get_subdirs(directory)
        ]
        rewards = []
        for folder in folders:
            runs = []
            for item in os.listdir(folder):
                item_path = os.path.join(folder, item)
                if os.path.isdir(item_path) and '0.monitor.csv' in os.listdir(
                        item_path):
                    runs.append(item_path)
            returns = []
            print(
                "Different evals for folder", folder,
                "Please ensure this has ALL intended environments or results may skew:"
            )
            print(runs)
            print("----")
            for run in runs:
                timesteps = load_results(run)
                x, y = ts2xy(timesteps, X_TIMESTEPS, yaxis='r')
                if x.shape[0] >= EPISODES_WINDOW:
                    x, y = window_func(x, y, EPISODES_WINDOW, np.mean)
                if len(y) > 0:
                    returns.append(max(y))
                else:
                    print("Was Error on run", run)
            avg_reward = np.mean(returns)
            print(folder, avg_reward)
            rewards.append(avg_reward)
        print("Seeds for", directory, ":", len(rewards), "of", len(folders))
        print("Stats:")
        rounded_mean = round(np.mean(rewards), 1)
        rounded_confidence = round(np.std(rewards) / np.sqrt(len(rewards)), 1)
        print("$" + str(rounded_mean) + " \pm " + str(rounded_confidence) +
              "$")

        if curve:
            x = np.sort(rewards)
            y = 1 - (np.arange(0, len(x)) / len(x))
            sns.set_context(context="paper", font_scale=1.5)
            sns.set_style("darkgrid", {'font.family': 'serif'})
            sns.scatterplot(x=x, y=y, label=labels[i])
        else:
            categorical_x.extend([labels[i]] * len(rewards))
            categorical_y.extend(rewards)

    if curve:
        plt.xlabel('reward')
        plt.ylabel('% Above threshold')
    else:
        sns.set(rc={'figure.figsize': (6.4, 4.8)})
        sns.set_context(context="paper", font_scale=1.4)
        sns.set_style("darkgrid", {'font.family': 'serif'})
        sns.boxplot(x=categorical_x,
                    y=categorical_y,
                    whis=1.5,
                    showcaps=False,
                    showfliers=True,
                    saturation=0.7,
                    width=0.9)
        sns.swarmplot(x=categorical_x, y=categorical_y, color="0.25")
        plt.ylabel('Reward')
    if title:
        plt.title(title)
    plt.tight_layout(pad=0)
Esempio n. 23
0
    parser = argparse.ArgumentParser()
    parser.add_argument('-f',
                        '--log-folder',
                        help='Log folder',
                        type=str,
                        default='logs')
    parser.add_argument("--env",
                        help="environment ID",
                        type=str,
                        default="CartPole-v1")
    args = parser.parse_args()

    log_dir = args.log_folder

    # Load results
    W = load_results(log_dir)
    # print("Results seed: ", W)

    # Save walltime to stats.csv
    df = pd.read_csv(log_dir + 'stats.csv')
    df["Train walltime (s)"] = W["t"].max()
    df.to_csv(log_dir + "stats.csv", index=False)
    # print(df)

    # Plot training rewards

    TIMESTEPS = 1e10

    plot_results([log_dir], TIMESTEPS, X_TIMESTEPS, args.env)
    plt.savefig(log_dir + "reward_vs_timesteps.png")
    # plt.show()
Esempio n. 24
0
    for path in Path(log_dir).rglob(args.env + '_*'):
        res_file_list.append(path)

    res_file_list = sorted(res_file_list)
    # print(res_file_list)

    df_list = []
    col_list = []
    count = 1

    for filename in res_file_list:
        # print(filename)
        filename = str(filename)  # convert from Posixpath to string

        W = load_results(filename)
        # print(W['r'])

        df_list.append(W['r'])
        col_list.append("seed " + str(count))
        count += 1

    all_rewards = pd.concat(df_list, axis=1)
    all_rewards.columns = col_list

    all_rewards_copy = all_rewards.copy()
    all_rewards["mean_reward"] = all_rewards_copy.mean(axis=1)
    all_rewards["std_reward"] = all_rewards_copy.std(axis=1)
    all_rewards["upper"] = all_rewards["mean_reward"] + \
        all_rewards["std_reward"]
    all_rewards["lower"] = all_rewards["mean_reward"] - \
    for path in Path(log_dir).rglob(args.env + '_*'):
        res_file_list.append(path)

    res_file_list = sorted(res_file_list)
    # print(res_file_list)

    df_list = []
    col_list = []
    COUNT = 1

    for filename in res_file_list:
        # print(filename)
        FILENAME = str(filename)  # convert from Posixpath to string

        W = load_results(FILENAME)
        # print(W['r'])

        df_list.append(W['r'])
        col_list.append("seed " + str(COUNT))
        COUNT += 1

    all_rewards = pd.concat(df_list, axis=1)
    all_rewards.columns = col_list

    all_rewards_copy = all_rewards.copy()
    all_rewards["mean_reward"] = all_rewards_copy.mean(axis=1)
    all_rewards["std_reward"] = all_rewards_copy.std(axis=1)
    all_rewards["upper"] = all_rewards["mean_reward"] + \
        all_rewards["std_reward"]
    all_rewards["lower"] = all_rewards["mean_reward"] - \
Esempio n. 26
0
    ]
else:
    dirs = [
        os.path.join(log_path, folder)
        for folder in os.listdir(log_path)
        if (env in folder and os.path.isdir(os.path.join(log_path, folder)))
    ]


plt.figure(y_label, figsize=args.figsize)
plt.title(y_label, fontsize=args.fontsize)
plt.xlabel(f"{x_label}", fontsize=args.fontsize)
plt.ylabel(y_label, fontsize=args.fontsize)
for folder in dirs:
    try:
        data_frame = load_results(folder)
    except LoadMonitorResultsError:
        continue
    if args.max_timesteps is not None:
        data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps]
        
    try:
        y = np.array(data_frame[y_axis])
    except KeyError:
        print(f"No data available for {folder}")
        continue
    x, _ = ts2xy(data_frame, x_axis)
    print(x)
    # Do not plot the smoothed curve at all if the timeseries is shorter than window size.
    if x.shape[0] >= args.episode_window:
        # Compute and plot rolling mean with window of size args.episode_window
Esempio n. 27
0
                                 morphology=morphology,
                                 path=base_path,
                                 verbose=1)
    eval_env = get_env(params, morphology=morphology)

    if args.eval_ep > 0:
        fitness = eval_policy(model,
                              eval_env,
                              num_ep=args.eval_ep,
                              deterministic=True,
                              gif=False,
                              render=False,
                              verbose=0)[0]
    else:
        # Determine fitness from training history
        x, y = ts2xy(load_results(model_path), 'timesteps')
        if len(x) > 0:
            # Mean training reward over the last -eval_ep episodes
            ys = y[args.eval_ep:]
            fitness = np.mean(ys)
        else:
            fitness = -np.inf
    # except Exception as e:
    #     print("Encountered Error", e)
    #     print("Assigning zero fitness")
    #     model_path, tb_path = get_paths(params, path=base_path)
    #     os.makedirs(model_path, exist_ok=True)
    #     with open(os.path.join(model_path, 'fitness.tmp'), 'w+') as f:
    #         f.write(str(-1*float('inf')))
    #     exit() # Exit if we fail to train the model.
Esempio n. 28
0
        '-f', args.log_dir,
        '--algo', algo,
        '--env', env_id,
        '--no-render',
        '--seed', str(args.seed),
        '--verbose', '0',
        '--reward-log', reward_log
    ]
    if args.verbose >= 1:
        print(f"{idx + 1}/{n_experiments}")
        print(f"Evaluating {algo} on {env_id}...")

    skip_eval = False
    if os.path.isdir(reward_log):
        try:
            x, y = ts2xy(load_results(reward_log), 'timesteps')
            skip_eval = len(x) > 0
        # TODO: fix this bare except
        except:
            pass

    if skip_eval:
        print("Skipping eval...")
    else:
        return_code = subprocess.call(['python', 'enjoy.py'] + arguments)
        x, y = ts2xy(load_results(reward_log), 'timesteps')

    if len(x) > 0:
        mean_reward = np.mean(y)
        std_reward = np.std(y)
        results['algo'].append(algo)