def test_default_algo(use_ref_model: bool = False):
    env = utils_env.Environment()
    # use default params
    ag = agent.DRLAgent()
    if use_ref_model:
        print('... Test the agent using reference model ...')
        ag.set_model_path('ref')
    al = algo.DRLAlgo(env, ag)
    al.test()
    def fit(self, i: int, env: utils_env.Environment):
        self.ag = agent.DRLAgent(state_size, action_size,
                                 self.memory_size, self.gamma, self.batch_size,
                                 self.expl_noise, self.model_learning_rate,
                                 self.num_fc_actor, self.num_fc_critic)
        self.ag.set_model_path(i)  # save each candidate's model separately

        self.al = algo.DRLAlgo(env, self.ag, self.num_episodes)
        self.al.set_image_path(i)  # save each candidate's score separately

        history = self.al.train(with_close=False)  # do not close the Env so that other agents can be trained
        score = self._get_score(history)
        return score
Esempio n. 3
0
def test_default_algo(use_ref_model: bool = False):
    env = utils_env.Environment()
    model_name_suffix = ''
    if use_ref_model:
        print('... Test the agent using reference model ...')
        model_name_suffix = 'ref_'

    # use default params
    ag_1 = agent.DRLAgent()
    ag_1.set_model_path(model_name_suffix + str(1))
    ag_2 = agent.DRLAgent()
    ag_2.set_model_path(model_name_suffix + str(2))
    al = algo.DRLAlgo(env, ag_1, ag_2)
    al.test()
Esempio n. 4
0
def train_two_agents():
    env = utils_env.Environment()
    # use default params
    ag_1 = agent.DRLAgent()
    ag_1.set_model_path(1)
    ag_2 = agent.DRLAgent()
    ag_2.set_model_path(2)
    al = algo.DRLAlgo(env, ag_1, ag_2)
    history, best_e, best_score = al.train()
    print('\nFinal score: {:.3f}'.format(
        np.mean(history[-const.rolling_mean_N:])))
    print('Final memory length:', ag_1.memory.get_length())
    print('Best score in {:d} episodes, avg_score: {:.3f}'.format(
        best_e, best_score))

    # plot losses
    losses_lists = [
        ag_1.actor_loss_list, ag_2.actor_loss_list, ag_1.critic_loss_list,
        ag_2.critic_loss_list
    ]
    losses_labels = [
        'agent_1_actor', 'agent_2_actor', 'agent_1_critic', 'agent_2_critic'
    ]
    utils_plot.plot_loss(losses_lists, losses_labels)

    # plot noise
    utils_plot.plot_scatter(ag_1.noise_list,
                            title_text='Noise',
                            fp=const.file_path_img_noise)

    # plot memory actions
    memory_actions = np.array([t[1] for t in ag_1.memory.memory])
    utils_plot.plot_scatter(memory_actions,
                            title_text='Actions',
                            fp=const.file_path_img_actions)

    # show mean memory actions
    mean_a = np.mean(memory_actions, axis=0)
    std_a = np.std(memory_actions, axis=0)
    print('Mean/std actions agent_1:', mean_a[:2], std_a[:2])
    print('Mean/std actions agent_2:', mean_a[2:], std_a[2:])
    def fit(self, i: int, env: utils_env.Environment):
        self.ag_1 = agent.DRLAgent(self.num_states, self.num_actions,
                                   self.memory_size, self.gamma,
                                   self.batch_size, self.tau,
                                   self.model_learning_rate, self.num_fc_1,
                                   self.num_fc_2)
        self.ag_1.set_model_path(
            str(i) + '_0')  # save each candidate's model separately
        self.ag_2 = agent.DRLAgent(self.num_states, self.num_actions,
                                   self.memory_size, self.gamma,
                                   self.batch_size, self.tau,
                                   self.model_learning_rate, self.num_fc_1,
                                   self.num_fc_2)
        self.ag_2.set_model_path(
            str(i) + '_1')  # save each candidate's model separately

        self.al = algo.DRLAlgo(env, self.ag_1, self.ag_2, self.num_episodes)
        self.al.set_image_path(i)  # save each candidate's score separately

        # do not close the Env so that other agents can be trained
        history, _, best_score = self.al.train(
            with_close=False)  # todo use best score?
        score = self._get_score(history)
        return score
def train_default_algo():
    env = utils_env.Environment()
    # use default params
    ag = agent.DRLAgent()
    al = algo.DRLAlgo(env, ag)
    al.train()