Exemple #1
0
def plot_results(log_folder, title='Learning Curve', smoothing=True):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot

    from stable-baselines example
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')

    if smoothing:
        y = movingAverage(y, window=50)
    else:
        title = 'Learning Curve no smoothing'

    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.show()
Exemple #2
0
def _get_random_source_policies():

    opt_source_policies = []
    subopt_source_policies = []
    for source_path in os.listdir(sources_dir):
        if env_id in source_path and source_path[-1] == '1':
            path = sources_dir + source_path
            _, y = ts2xy(load_results(path), 'episodes')
            if np.mean(y[-100:]) > _OPT_THRESH[env_id]:
                opt_source_policies.append('{}/{}.pkl'.format(path, env_id))
            if np.mean(y[-100:]) < _SUBOPT_THRESH[env_id]:
                subopt_source_policies.append('{}/{}.pkl'.format(path, env_id))

    if len(opt_source_policies) < num_opt_sources:
        raise ValueError(
            '{} number of optimal source policies is less than the requested number {}'
            .format(opt_source_policies, num_opt_sources))
    if len(subopt_source_policies) < num_subopt_sources:
        raise ValueError(
            '{} number of suboptimal source policies is less than the requested number {}'
            .format(subopt_source_policies, num_subopt_sources))

    source_policies = np.random.choice(opt_source_policies,
                                       num_opt_sources,
                                       replace=False).tolist()
    source_policies += np.random.choice(subopt_source_policies,
                                        num_subopt_sources,
                                        replace=False).tolist()

    return source_policies
Exemple #3
0
def callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_steps, best_mean_reward
    # Print stats every 1000 calls
    if (n_steps + 1) % 10 == 0:
        # Evaluate policy training performance
        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps')
            print(
                "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                .format(best_mean_reward, mean_reward))

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
                print("Saving new best model")
                _locals['self'].save(log_dir + 'best_model_prof.pkl')
    n_steps += 1
    # Returning False will stop training early
    return True
Exemple #4
0
def callback(_locals, _globals):
    """
    Callback called after n steps
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global best_mean_reward, n_episodes, saving_interval

    n_episodes += 1
    if n_episodes % saving_interval == 0:
        x, y = ts2xy(load_results(log_dir), 'episodes')
        if len(x) > 0:
            mean_reward = np.mean(y[-int(saving_interval):])
            logger.info("{}: Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}\n".format(x[-1], best_mean_reward, mean_reward))

            with open("mean_reward.txt", "a") as text_file:
                print("{}: Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(x[-1], best_mean_reward, mean_reward), file=text_file)

            _locals['self'].save(pickle_dir + 'ppo2_recent_model.pkl')
            if mean_reward >= best_mean_reward:
                best_mean_reward = mean_reward
                logger.debug("Saving new best model")
                _locals['self'].save(pickle_dir + 'ppo2_best_model.pkl')

    return True
Exemple #5
0
def callback(_locals, _globals):
    def shift(arr, num, fill_value=np.nan):
        result = np.empty_like(arr)
        if num > 0:
            result[:num] = fill_value
            result[num:] = arr[:-num]
        elif num < 0:
            result[num:] = fill_value
            result[:num] = arr[-num:]
        else:
            result = arr
        return result

    global best_mean_reward
    # Evaluate policy training performance
    copyfile(
        "/tmp/monitor/{0}/{1}/monitor.csv".format(
            dir_dict['_hyper_weights_index'], 0),
        "{0}monitor.csv".format(dir_dict['log']))
    x, y = ts2xy(load_results(dir_dict['log']), 'timesteps')
    if len(x) > 0:
        mean_reward = np.mean((y[-100:] - shift(y[-100:], 1, fill_value=0.0)))
        print(x[-1], 'timesteps')
        print(
            "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".
            format(best_mean_reward, mean_reward))

        # New best model, you could save the agent here
        if mean_reward > best_mean_reward:
            best_mean_reward = mean_reward
            # Example for saving best model
            print("Saving new best model")
            _locals['self'].save(dir_dict['model'] + 'best_model.pkl')
    return True
Exemple #6
0
def auto_save_callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    # get callback variables, with default values if unintialized
    callback_vars = get_callback_vars(_locals["self"],
                                      n_steps=0,
                                      best_mean_reward=-np.inf)
    # skip every 20 steps
    if callback_vars["n_steps"] % 20 == 0:
        # Evaluate policy training performance
        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])

            # New best model, you could save the agent here
            if mean_reward > callback_vars["best_mean_reward"]:
                callback_vars["best_mean_reward"] = mean_reward
                # Example for saving best model
                print("Saving new best model at {} timesteps".format(x[-1]))
                _locals['self'].save(log_dir + 'best_model')
    callback_vars["n_steps"] += 1
    return True
Exemple #7
0
def callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_steps, best_mean_reward, save_path, log_dir
    # Print stats every 10 calls
    if (n_steps + 1) % args.save_interval == 0:
        # Evaluate policy training performance
        x, y = ts2xy(load_results(log_dir), "timesteps")
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps')
            print(
                "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                .format(best_mean_reward, mean_reward))

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
                print("Saving new best model")
                model.save(save_path)
    n_steps += 1
    return True
def multi_callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_steps, best_mean_reward, log_dir

    # Print stats every 1000 calls
    if (n_steps + 1) % 1000 == 0:
        seed = _locals['seed']
        experiment_dir = log_dir + 'seed_{}/'.format(seed)
        # Evaluate policy performance
        x, y = ts2xy(load_results(experiment_dir), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                print(x[-1], 'timesteps')
                print(
                    "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                    .format(mean_reward, best_mean_reward))
                globals()['best_mean_reward'] = mean_reward
                # Example for saving best model
                print("Saving new best model")
                _locals['self'].save(experiment_dir +
                                     'best_model_{}_steps.pkl'.format(n_steps))

        print("Saving checkpoint model")
        _locals['self'].save(experiment_dir +
                             'model_{}_steps.pkl'.format(n_steps))

    n_steps += 1
    return True
Exemple #9
0
def callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global best_mean_reward, n_steps
    mean_reward = 0
    if (n_steps + 1) % 100000 == 0:
        print("Saving new best model")
        _locals['self'].save(model_directory + 'sac-model_' +
                             str(n_steps + 1) + '.pkl')
    if (n_steps + 1) % 1000 == 0:
        x, y = ts2xy(load_results(log_directory), 'timesteps')
        if len(x) > 0:
            mean_reward = numpy.mean(y[-100:])
            print(x[-1], 'timesteps')
            print(
                "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                .format(best_mean_reward, mean_reward))

        if mean_reward > best_mean_reward:
            best_mean_reward = mean_reward
            print("Saving new best model")
            _locals['self'].save(model_directory + 'sac-model_' +
                                 str(n_steps + 1) + '.pkl')
    n_steps += 1
    return True
    def callback(_locals, _globals):
        """
        Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
        :param _locals: (dict)
        :param _globals: (dict)
        """
        nonlocal n_steps, best_mean_reward, hist_rew
        # Print stats every 1000 calls
        if (n_steps + 1) % 5 == 0:
            # Evaluate policy performance
            x, y = ts2xy(load_results(log_dir), 'timesteps')
            if len(x) > 0:
                # mean_rew_plot(y, len(x))
                hist_rew = y.copy()
                mean_reward = np.mean(y[-100:])
                if (n_steps + 1) % 100 == 0:
                    print(x[-1], 'timesteps')
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(best_mean_reward, mean_reward))

                # New best model, you could save the agent here
                if mean_reward > best_mean_reward:
                    best_mean_reward = mean_reward
                    # Example for saving best model
                    print("Saving new best model")
                    _locals['self'].save(log_dir +
                                         "/deep_{0:.0E}.pkl".format(lr))

        n_steps += 1
        return False
Exemple #11
0
def log_callback(_locals, _globals):

    global n_steps, best_mean_reward, log_dir
    # Print stats every 1000 calls
    if n_steps % 3000 == 0:
        # Evaluate policy training performance
        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps')
            print(
                "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                .format(best_mean_reward, mean_reward))

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
            print("Saving new model")
            _locals['self'].save(
                os.path.join(
                    output_dir,
                    str(n_steps) + '_model_r_' + str(best_mean_reward) +
                    '.pkl'))
    n_steps += 1
    return True
Exemple #12
0
def tsplot_result(log_dirs_dict, num_timesteps, title='Learning Curve'):
    # print('load_results', load_results(log_dir))
    import seaborn as sns
    import pandas as pd
    datas = []
    for key in log_dirs_dict:
        log_dirs = log_dirs_dict[key]
        for index, dir in enumerate(log_dirs):
            init_data = load_results(dir)
            init_data = init_data[init_data.l.cumsum() <= num_timesteps]
            x, y = ts2xy(init_data, 'timesteps')
            y = movingAverage(y, window=100)
            x = x[len(x) - len(y):]
            # x = x[:len(y)]
            print('y', y)
            x, y = subsample(t=x, vt=y, bins=np.linspace(0, num_timesteps, int(1000) + 1))
            x = np.append(x, np.array([0]))
            y = np.append(y, np.array([0]))
            print('y after subsample', y)

            # y = movingAverage(y, window=10)
            # # x = x[len(x) - len(y):]
            # x = x[:len(y)]
            data = pd.DataFrame({'Timesteps': x,  'Reward': y, 'subject': np.repeat(index, len(x)), 'Algorithm': np.repeat(key, len(x))})

            datas.append(data)

    data_df = pd.concat(datas, ignore_index=True)

    print('data', data_df)
    sns.tsplot(data=data_df, time='Timesteps', value='Reward', unit='subject', condition='Algorithm')
def plot_results(log_folder, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """

    x, y = ts2xy(load_results(log_folder), 'timesteps')
    # x, y = ts2xy(load_results(log_folder), 'episodes')
    # x, y = ts2xy(load_results(log_folder), 'walltime_hrs')

    print(x)
    print(y)

    y = moving_average(y, window=10)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.show()
Exemple #14
0
def plot_results(log_folder, plot_dir, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    os.makedirs(plot_dir, exist_ok=True)

    pdb.set_trace()
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = movingAverage(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")

    if not plot_dir.endswith("/"):
        plot_dir += "/"

    plt.savefig(str(plot_dir) + "results_dqn_" + str(env_name) + "_trained_timesteps_" + str(train_timesteps))
def plot_results(log_folder, title="Learning Curve"):
    """

    Parameters
    ----------
    log_folder : str
        the save location of the results to plot
    title : str
        the title of the task to plot
    Returns
    -------

    """

    x, y = ts2xy(load_results(log_folder), "timesteps")
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]
    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel("Number of Timesteps")
    plt.ylabel("Rewards")
    plt.title(title + " Smoothed")
    plt.savefig(title + ".png")
    plt.close()
Exemple #16
0
def callback(_locals, _globals):
    global nupdates
    global best_mean_reward
    nupdates += 1

    if nupdates % period_check == 0:

        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(y) > 0:

            mean_reward = np.mean(y[-period_check:])
            max_reward = max(y[-period_check:])
            min_reward = min(y[-period_check:])

            update_model = mean_reward > best_mean_reward
            if update_model:
                best_mean_reward = mean_reward
                _locals['self'].save('model')

            print(
                'time: {}, nupdates: {}, max_reward: {:.2f}, min_reward: {:.2f}, mean: {:.2f}, best_mean: {:.2f}, model_update: {}'
                .format(datetime.datetime.now(), nupdates - 1, max_reward,
                        min_reward, mean_reward, best_mean_reward,
                        update_model))

    return True
Exemple #17
0
def plotting_callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    # get callback variables, with default values if unintialized
    callback_vars = get_callback_vars(_locals["self"], plot=None)

    # get the monitor's data
    x, y = ts2xy(load_results(log_dir), 'timesteps')
    if callback_vars["plot"] is None:  # make the plot
        plt.ion()
        fig = plt.figure(figsize=(6, 3))
        ax = fig.add_subplot(111)
        line, = ax.plot(x, y)
        callback_vars["plot"] = (line, ax, fig)
        plt.show()
    else:  # update and rescale the plot
        callback_vars["plot"][0].set_data(x, y)
        callback_vars["plot"][-2].relim()
        callback_vars["plot"][-2].set_xlim([_locals["total_timesteps"] * -0.02,
                                            _locals["total_timesteps"] * 1.02])
        callback_vars["plot"][-2].autoscale_view(True, True, True)
        callback_vars["plot"][-1].canvas.draw()
Exemple #18
0
def train_callback(_locals, _globals):
    """
  Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
  :param _locals: (dict)
  :param _globals: (dict)
  """
    global n_callback, best_mean_reward, agent_name, path_to_models
    # Print stats every 1000 calls
    if (n_callback + 1) % 10 == 0:

        # Evaluate policy performance
        x, y = ts2xy(load_results('%s/%s/' % (path_to_models, agent_name)),
                     'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps')
            print(
                "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                .format(best_mean_reward, mean_reward))

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
                print("Saving new best model")
                _locals['self'].save(path_to_models + '/%s/%s.pkl' %
                                     (agent_name, agent_name))
    n_callback += 1
    return True
Exemple #19
0
def plotting_callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
if ENVIRONMENT == 'possensor':
  env = tm700_possensor_gym(renders=RENDERS, isDiscrete=DISCRETE)
  env = Monitor(env, os.path.join(log_dir, 'monitor.csv'), allow_early_resets=True)

    """
    # get callback variables, with default values if unintialized
    callback_vars = get_callback_vars(_locals["self"], plot=None)

    # get the monitor's data
    x, y = ts2xy(load_results(log_dir), 'timesteps')
    if callback_vars["plot"] is None:  # make the plot
        plt.ion()
        fig = plt.figure(figsize=(6, 3))
        ax = fig.add_subplot(111)
        line, = ax.plot(x, y)
        callback_vars["plot"] = (line, ax, fig)
        plt.show()
    else:  # update and rescale the plot
        callback_vars["plot"][0].set_data(x, y)
        callback_vars["plot"][-2].relim()
        callback_vars["plot"][-2].set_xlim([
            _locals["total_timesteps"] * -0.02,
            _locals["total_timesteps"] * 1.02
        ])
        callback_vars["plot"][-2].autoscale_view(True, True, True)
        callback_vars["plot"][-1].canvas.draw()
Exemple #20
0
    def _on_step(self) -> bool:
        rospy.logdebug("on_step callback")
        if self.n_calls % self.check_freq == 0:
            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print("Num timesteps: {}".format(self.num_timesteps))
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(self.best_mean_reward, mean_reward))
                    print(
                        f"self.reward_bound is {self.reward_bound} and mean_ward is {mean_reward}"
                    )
                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model to {}".format(
                            self.save_path))
                    self.model.save(self.save_path)
                # early stop the training
                if self.reward_bound is not None and mean_reward > self.reward_bound:
                    print("early stop!")
                    return False

        return True
Exemple #21
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 1000 episodes
                mean_reward = np.mean(y[-1:])
                if self.verbose > 0:
                    print("Num timesteps: {}".format(self.num_timesteps))
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(self.best_mean_reward, mean_reward))
                    self.model.save(self.model_path)

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model to {}".format(
                            self.save_path))
                    self.model.save(self.save_path)

        return True
def callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_steps, best_mean_reward
    # Print stats every 1000 calls
    if (n_steps + 1) % 500 == 0:
        # Evaluate policy training performance
        x, y = ts2xy(load_results(os.path.join(output_dir,'log')), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps')
            print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
            if (n_steps + 1) % 5000 == 0:
                # Save model
                print("Saving model at iter {}".format(x[-1]))
                _locals['self'].save(os.path.join(output_dir, str(x[-1])+'model.pkl'))
    n_steps += 1
    # Returning False will stop training early
    return True
Exemple #23
0
def plot_results(log_folder, model_name, plt_dir, title='Learning Curve'):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param title: (str) the title of the task to plot
    """
    m_name_csv = model_name + ".csv"
    old_file_name = os.path.join(log_folder, "monitor.csv")
    new_file_name = os.path.join(log_folder, m_name_csv)
    save_name = os.path.join(plt_dir, model_name)

    x, y = ts2xy(load_results(log_folder), 'timesteps')
    shutil.copy(old_file_name, new_file_name)
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    fig = plt.figure(title)
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Rewards')
    plt.title(title + " Smoothed")
    plt.savefig(save_name + ".png")
    plt.savefig(save_name + ".eps")
    print("plots saved...")
    plt.show()
Exemple #24
0
def callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_updates
    global best_mean_reward
    # Print stats every 1000 calls
    if (n_updates + 1) % 1000 == 0:
        # Evaluate policy training performance
        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(x) > 0:
            #100ステップ毎に過去100件の平均報酬を計算し、ベスト平均報酬を越えていたらエージェントを保存しています。
            mean_reward = np.mean(y[-100:])
            #print(x[-1], 'timesteps')
            #print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
                print("Saving new best model")
                _locals['self'].save(log_dir + 'best_model.pkl')

    n_updates += 1
    return True
def plot_results(log_folder, out_folder = None, title='Learning_Curve', save_plot = True):
    """
    plot the results

    :param log_folder: (str) the save location of the results to plot
    :param out_folder: (str) the location where the plot is saved (default: log_folder)
    :param title: (str) the title of the task to plot
    :param save_plot: (bool) save the plot as pdf?
    """
    x, y = ts2xy(load_results(log_folder), 'timesteps')
    y = moving_average(y, window=50)
    # Truncate x
    x = x[len(x) - len(y):]

    matplotlib.rc('font', size=14)
    matplotlib.rc('text', usetex=True)
    #fig1 = plt.figure() #figsize=(10, 10))
    plt.plot(x, y)
    plt.xlabel('Number of Timesteps')
    plt.ylabel('Cumulative reward')
    #plt.xscale('log')
    plt.yscale('symlog')
    plt.grid()
    #plt.title("Learning curve smoothed")
    if (save_plot):
        if (out_folder is None):
            plt.savefig(log_folder + title + ".pdf", dpi=300)
        else:
            plt.savefig(out_folder + title + ".pdf", dpi=300)
Exemple #26
0
def callback(_locals, _globals):
    global nupdates
    global best_mean_reward

    # 10更新毎
    if (nupdates + 1) % 10 == 0:
        # 平均エピソード長、平均報酬の取得
        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(y) > 0:
            # 最近10件の平均報酬
            mean_reward = np.mean(y[-10:])

            # 平均報酬がベスト報酬以上の時はモデルを保存
            update_model = mean_reward > best_mean_reward
            if update_model:
                best_mean_reward = mean_reward
                _locals['self'].save('airstriker_model')

            # ログ
            print(
                'time: {}, nupdates: {}, mean: {:.2f}, best_mean: {:.2f}, model_update: {}'
                .format(datetime.datetime.now(pytz.timezone('Asia/Tokyo')),
                        nupdates, mean_reward, best_mean_reward, update_model))

    nupdates += 1
    return True
def callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_steps, best_mean_reward, console_log_dir, model_file_name, log_name

    # This is invoked in every update
    if (n_steps + 1) % 1 == 0:
        # Evaluate policy performance
        x, y = ts2xy(load_results(console_log_dir), 'timesteps')
        if len(x) > 0:
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps', file=sys.stderr)
            print(
                "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                .format(best_mean_reward, mean_reward),
                file=sys.stderr)

            # New best model, you could save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model

    n_steps += 1
    return True
Exemple #28
0
def callback(_locals, _globals):
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_steps, best_mean_reward
    # Print stats every 1000 calls
    if (n_steps + 1) % 10 == 0:
        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(x) > 100:
            #pdb.set_trace()
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps')
            print(
                "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                .format(best_mean_reward, mean_reward))

            # New best model, we save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
                print("Saving new best model")
                _locals['self'].save(os.path.join(log_dir, 'best_model.pkl'))
            else:
                print("Saving latest model")
                _locals['self'].save(os.path.join(log_dir, 'latest_model.pkl'))
        else:
            print('{} monitor entries'.format(len(x)))
            pass
    n_steps += 1
    # Returning False will stop training early
    return True
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print("Num timesteps: {}".format(self.num_timesteps))
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(self.best_mean_reward, mean_reward))

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    self.model_index += 1
                    # Example for saving best model
                    if self.verbose > 0:
                        print("Saving new best model at {} timesteps".format(
                            x[-1]))
                        print(Fore.YELLOW +
                              "Saving new best model to {}".format(
                                  self.save_path + str(self.model_index)) +
                              Style.RESET_ALL)
                    self.model.save(self.save_path + str(self.model_index) +
                                    ".zip")
        return True
def callback(_locals, _globals):
    """
    Callback called at each step
    Params:
        _locals: (dict)
        _globals: (dict)
    """
    global n_steps, best_mean_reward
    if (n_steps + 1) % 10 == 0:
        print('Saving the Model.. (every 10 updates)')
        _locals['self'].save('ppo_pong')
    n_steps += 1
    print('n_steps = ', n_steps)

    x, y = ts2xy(load_results(log_dir), 'timesteps')
    if len(x) > 0:
        # Mean reward over last 10 episodes
        if (len(x) % 10 == 0):
            mean_reward = np.mean(y[-10:])
            print(
                "Best mean reward over last 10 episodes: {:.2f} - Mean reward over last 10 episodes: {:.2f}"
                .format(best_mean_reward, mean_reward))
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
        # Timesteps passed
        print(x[-1], 'timesteps')
    # Returning False will stop training early
    return True