def test_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.StepsTrainContext() dqn_agent = tfagents.TfDqnAgent(model_config=model_config) dqn_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
def test_train(self): model_config = core.ModelConfig(_mountaincar_continuous_name) tc = core.StepsTrainContext() dqn_agent = tfagents.TfSacAgent(model_config=model_config) dqn_agent.train( train_context=tc, callbacks=[duration.Fast(), log.Iteration(), log.Agent()])
def test_dueling_dqn_train(self): model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, )) tc: core.StepsTrainContext = core.StepsTrainContext() tc.num_iterations = 20000 tc.num_steps_buffer_preload = 1000 tc.num_iterations_between_eval = 1000 tc.max_steps_per_episode = 200 dqnAgent = tforce.TforceDuelingDqnAgent(model_config=model_config) dqnAgent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()])
def test_dqn_train(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name, fc_layers=(100, 100)) tc: core.StepsTrainContext = core.StepsTrainContext() tc.num_iterations = 10000 tc.num_steps_buffer_preload = 500 tc.num_iterations_between_eval = 500 tc.max_steps_per_episode = 200 dqn_agent = tforce.TforceDqnAgent(model_config=model_config) dqn_agent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()]) (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training] assert avg_r > 50
def train(self, callbacks: Union[List[core.AgentCallback], core.AgentCallback, None] = None, num_iterations: int = 20000, max_steps_per_episode: int = 500, num_steps_per_iteration: int = 1, num_steps_buffer_preload=1000, num_steps_sampled_from_buffer=64, num_iterations_between_eval: int = 1000, num_episodes_per_eval: int = 10, learning_rate: float = 0.001, train_context: core.StepsTrainContext = None, default_plots: bool = None): """Trains a new model using the gym environment passed during instantiation. Args: callbacks: list of callbacks called during training and evaluation num_iterations: number of times the training is repeated (with additional data) max_steps_per_episode: maximum number of steps per episode num_steps_per_iteration: number of steps played per training iteration num_steps_buffer_preload: number of initial collect steps to preload the buffer num_steps_sampled_from_buffer: the number of steps sampled from buffer for each iteration training num_iterations_between_eval: number of training iterations before the current policy is evaluated. if 0 no evaluation is performed. num_episodes_per_eval: number of episodes played to estimate the average return and steps learning_rate: the learning rate used in the next iteration's policy training (0,1] train_context: training configuration to be used. if set overrides all other training context arguments. default_plots: if set adds a set of default callbacks (plot.State, plot.Rewards, plot.Loss,...). if None default callbacks are only added if the callbacks list is empty Returns: train_context: the training configuration containing the loss and sum of rewards encountered during training """ if train_context is None: train_context = core.StepsTrainContext() train_context.num_iterations = num_iterations train_context.max_steps_per_episode = max_steps_per_episode train_context.num_steps_per_iteration = num_steps_per_iteration train_context.num_steps_buffer_preload = num_steps_buffer_preload train_context.num_steps_sampled_from_buffer = num_steps_sampled_from_buffer train_context.num_iterations_between_eval = num_iterations_between_eval train_context.num_episodes_per_eval = num_episodes_per_eval train_context.learning_rate = learning_rate super().train(train_context=train_context, callbacks=callbacks, default_plots=default_plots) return train_context