예제 #1
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.PpoTrainContext()
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.train(train_context=tc,
                     callbacks=[duration.Fast(),
                                log.Iteration()])
예제 #2
0
 def test_train(self):
     for backend in get_backends(SacAgent):
         sac_agent: SacAgent = SacAgent('MountainCarContinuous-v0', backend=backend)
         sac_agent.train([log.Duration(), log.Iteration(), log.Agent(), duration.Fast()],
                         num_iterations=10,
                         max_steps_per_episode=200,
                         default_plots=False)
예제 #3
0
 def test_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfDqnAgent(model_config=model_config)
     dqn_agent.train(train_context=tc,
                     callbacks=[duration.Fast(),
                                log.Iteration()])
예제 #4
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.TrainContext()
     random_agent = tfagents.TfRandomAgent(model_config=model_config)
     random_agent.train(train_context=tc,
                        callbacks=[duration.Fast(),
                                   log.Iteration()])
     assert tc.episodes_done_in_iteration == 1
예제 #5
0
 def test_reinforce_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.EpisodesTrainContext()
     reinforceAgent = tforce.TforceReinforceAgent(model_config=model_config)
     reinforceAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
예제 #6
0
 def test_ppo_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.PpoTrainContext()
     ppoAgent = tforce.TforcePpoAgent(model_config=model_config)
     ppoAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
예제 #7
0
 def test_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.EpisodesTrainContext()
     reinforceAgent = tfagents.TfReinforceAgent(model_config=model_config)
     reinforceAgent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
     assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0
     assert tc.iterations_done_in_training == tc.num_iterations > 0
     rmin, ravg, rmax = tc.eval_rewards[tc.episodes_done_in_training]
     assert rmax >= 10
예제 #8
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.EpisodesTrainContext()
     reinforce_agent = tfagents.TfReinforceAgent(model_config=model_config)
     reinforce_agent.train(train_context=tc,
                           callbacks=[duration.Fast(),
                                      log.Iteration()])
     assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0
     assert tc.iterations_done_in_training == tc.num_iterations > 0
예제 #9
0
 def test_train(self):
     model_config = core.ModelConfig(_mountaincar_continuous_name)
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfSacAgent(model_config=model_config)
     dqn_agent.train(
         train_context=tc,
         callbacks=[duration.Fast(),
                    log.Iteration(),
                    log.Agent()])
예제 #10
0
 def test_dqn(self):
     easyagents.agents.seed = 0
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.StepsTrainContext()
     dqnAgent = kerasrl.KerasRlDqnAgent(model_config=model_config)
     dqnAgent.train(train_context=tc,
                    callbacks=[
                        duration.Fast(),
                        log.Agent(),
                        log.Step(),
                        log.Iteration()
                    ])
예제 #11
0
 def test_log_agent(self):
     agent = agents.PpoAgent(_step_count_name)
     agent.train([log.Agent(), duration.Fast()])
예제 #12
0
 def test_train_plotsteprewards(self):
     ppoAgent = agents.PpoAgent('CartPole-v0')
     ppoAgent.train([plot.StepRewards(), duration.Fast()])
예제 #13
0
 def test_default_plots_None_durationcallback(self):
     agent = agents.PpoAgent("CartPole-v0")
     p = plot.Loss()
     c = agent._add_plot_callbacks([duration.Fast()], None, [p])
     assert p in c
예제 #14
0
 def test_train(self):
     for backend in get_backends(SacAgent):
         sac_agent: SacAgent = SacAgent(_mountaincart_continuous_name, backend=backend)
         tc: core.TrainContext = sac_agent.train([log.Duration(), log.Iteration(eval_only=True), duration.Fast()],
                                                 default_plots=False)
         r = max_avg_rewards(tc)
         assert r >= -1