Esempio n. 1
0
 def test_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfDqnAgent(model_config=model_config)
     dqn_agent.train(train_context=tc,
                     callbacks=[duration.Fast(),
                                log.Iteration()])
Esempio n. 2
0
 def test_train(self):
     model_config = core.ModelConfig(_mountaincar_continuous_name)
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfSacAgent(model_config=model_config)
     dqn_agent.train(
         train_context=tc,
         callbacks=[duration.Fast(),
                    log.Iteration(),
                    log.Agent()])
Esempio n. 3
0
 def test_dueling_dqn_train(self):
     model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, ))
     tc: core.StepsTrainContext = core.StepsTrainContext()
     tc.num_iterations = 20000
     tc.num_steps_buffer_preload = 1000
     tc.num_iterations_between_eval = 1000
     tc.max_steps_per_episode = 200
     dqnAgent = tforce.TforceDuelingDqnAgent(model_config=model_config)
     dqnAgent.train(train_context=tc,
                    callbacks=[log.Iteration(eval_only=True),
                               log.Agent()])
Esempio n. 4
0
    def test_dqn_train(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name, fc_layers=(100, 100))
        tc: core.StepsTrainContext = core.StepsTrainContext()
        tc.num_iterations = 10000
        tc.num_steps_buffer_preload = 500
        tc.num_iterations_between_eval = 500
        tc.max_steps_per_episode = 200
        dqn_agent = tforce.TforceDqnAgent(model_config=model_config)
        dqn_agent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()])
        (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training]
        assert avg_r > 50
Esempio n. 5
0
    def train(self,
              callbacks: Union[List[core.AgentCallback], core.AgentCallback,
                               None] = None,
              num_iterations: int = 20000,
              max_steps_per_episode: int = 500,
              num_steps_per_iteration: int = 1,
              num_steps_buffer_preload=1000,
              num_steps_sampled_from_buffer=64,
              num_iterations_between_eval: int = 1000,
              num_episodes_per_eval: int = 10,
              learning_rate: float = 0.001,
              train_context: core.StepsTrainContext = None,
              default_plots: bool = None):
        """Trains a new model using the gym environment passed during instantiation.

        Args:
            callbacks: list of callbacks called during training and evaluation
            num_iterations: number of times the training is repeated (with additional data)
            max_steps_per_episode: maximum number of steps per episode
            num_steps_per_iteration: number of steps played per training iteration
            num_steps_buffer_preload: number of initial collect steps to preload the buffer
            num_steps_sampled_from_buffer: the number of steps sampled from buffer for each iteration training
            num_iterations_between_eval: number of training iterations before the current policy is evaluated.
                if 0 no evaluation is performed.
            num_episodes_per_eval: number of episodes played to estimate the average return and steps
            learning_rate: the learning rate used in the next iteration's policy training (0,1]
            train_context: training configuration to be used. if set overrides all other training context arguments.
            default_plots: if set adds a set of default callbacks (plot.State, plot.Rewards, plot.Loss,...).
                if None default callbacks are only added if the callbacks list is empty

        Returns:
            train_context: the training configuration containing the loss and sum of rewards encountered
                during training
        """
        if train_context is None:
            train_context = core.StepsTrainContext()
            train_context.num_iterations = num_iterations
            train_context.max_steps_per_episode = max_steps_per_episode
            train_context.num_steps_per_iteration = num_steps_per_iteration
            train_context.num_steps_buffer_preload = num_steps_buffer_preload
            train_context.num_steps_sampled_from_buffer = num_steps_sampled_from_buffer
            train_context.num_iterations_between_eval = num_iterations_between_eval
            train_context.num_episodes_per_eval = num_episodes_per_eval
            train_context.learning_rate = learning_rate

        super().train(train_context=train_context,
                      callbacks=callbacks,
                      default_plots=default_plots)
        return train_context