Beispiel #1
0
 def test_train(self):
     for backend in get_backends(SacAgent):
         sac_agent: SacAgent = SacAgent(_mountaincart_continuous_name, backend=backend)
         tc: core.TrainContext = sac_agent.train([log.Duration(), log.Iteration(eval_only=True), duration.Fast()],
                                                 default_plots=False)
         r = max_avg_rewards(tc)
         assert r >= -1
Beispiel #2
0
 def test_train(self):
     for backend in get_backends(SacAgent):
         sac_agent: SacAgent = SacAgent('CartPole-v0', backend=backend)
         sac_agent.train([log.Duration(), log.Iteration(), log.Agent()],
                         num_iterations=10,
                         max_steps_per_episode=200,
                         default_plots=False)
Beispiel #3
0
 def train_and_assert(self, agent_type, is_v1: bool, num_iterations=100):
     logger = logging.warning
     v2_backends = [b for b in get_backends(agent_type, skip_v1=True)]
     v1_backends = [
         b for b in get_backends(agent_type) if (not b in v2_backends)
     ]
     backends = v1_backends if is_v1 else v2_backends
     for backend in backends:
         logger(
             f'backend={backend} agent={agent_type}, num_iterations={num_iterations}'
         )
         cem_agent: CemAgent = agent_type('CartPole-v0',
                                          fc_layers=(100, ),
                                          backend=backend)
         tc: core.TrainContext = cem_agent.train(
             [log.Duration(),
              log.Iteration(eval_only=True),
              log.Agent()],
             num_iterations=num_iterations,
             num_iterations_between_eval=10,
             max_steps_per_episode=200,
             default_plots=False)
         (min_steps, avg_steps,
          max_steps) = tc.eval_steps[tc.episodes_done_in_training]
         assert max_steps >= 100
         assert avg_steps >= 50
Beispiel #4
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.PpoTrainContext()
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.train(train_context=tc,
                     callbacks=[duration.Fast(),
                                log.Iteration()])
Beispiel #5
0
 def test_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfDqnAgent(model_config=model_config)
     dqn_agent.train(train_context=tc,
                     callbacks=[duration.Fast(),
                                log.Iteration()])
Beispiel #6
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.TrainContext()
     random_agent = tfagents.TfRandomAgent(model_config=model_config)
     random_agent.train(train_context=tc,
                        callbacks=[duration.Fast(),
                                   log.Iteration()])
     assert tc.episodes_done_in_iteration == 1
Beispiel #7
0
 def test_train(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.EpisodesTrainContext()
     reinforce_agent = tfagents.TfReinforceAgent(model_config=model_config)
     reinforce_agent.train(train_context=tc,
                           callbacks=[duration.Fast(),
                                      log.Iteration()])
     assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0
     assert tc.iterations_done_in_training == tc.num_iterations > 0
Beispiel #8
0
 def test_train(self):
     for backend in get_backends(RandomAgent):
         reinforce_agent: ReinforceAgent = ReinforceAgent(_line_world_name, backend=backend)
         tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()],
                                                       num_iterations=10,
                                                       max_steps_per_episode=200,
                                                       default_plots=False)
         r = max_avg_rewards(tc)
         assert r >= 5
Beispiel #9
0
 def test_train(self):
     model_config = core.ModelConfig(_mountaincar_continuous_name)
     tc = core.StepsTrainContext()
     dqn_agent = tfagents.TfSacAgent(model_config=model_config)
     dqn_agent.train(
         train_context=tc,
         callbacks=[duration.Fast(),
                    log.Iteration(),
                    log.Agent()])
Beispiel #10
0
 def test_train(self):
     for backend in get_backends(RandomAgent):
         reinforce_agent: ReinforceAgent = ReinforceAgent('CartPole-v0', backend=backend)
         tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()],
                                                       num_iterations=10,
                                                       max_steps_per_episode=200,
                                                       default_plots=False)
         (min_steps, avg_steps, max_steps) = tc.eval_steps[tc.episodes_done_in_training]
         assert avg_steps >= 10
Beispiel #11
0
 def test_ppo_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.PpoTrainContext()
     ppoAgent = tforce.TforcePpoAgent(model_config=model_config)
     ppoAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
Beispiel #12
0
 def test_reinforce_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.EpisodesTrainContext()
     reinforceAgent = tforce.TforceReinforceAgent(model_config=model_config)
     reinforceAgent.train(
         train_context=tc,
         callbacks=[log.Iteration(),
                    log.Agent(),
                    duration.Fast()])
Beispiel #13
0
 def train_and_eval(self, agent_type, backend, num_iterations):
     dqn_agent: DqnAgent = agent_type(_cartpole_name, fc_layers=(100,), backend=backend)
     tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()],
                                             num_iterations=num_iterations,
                                             num_steps_buffer_preload=1000,
                                             num_iterations_between_eval=500,
                                             max_steps_per_episode=200,
                                             default_plots=False)
     return max_avg_rewards(tc)
Beispiel #14
0
 def test_train(self):
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.EpisodesTrainContext()
     reinforceAgent = tfagents.TfReinforceAgent(model_config=model_config)
     reinforceAgent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
     assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0
     assert tc.iterations_done_in_training == tc.num_iterations > 0
     rmin, ravg, rmax = tc.eval_rewards[tc.episodes_done_in_training]
     assert rmax >= 10
Beispiel #15
0
 def train_and_eval(self, agent_type, backend, num_iterations):
     dqn_agent: DqnAgent = agent_type('CartPole-v0', fc_layers=(100,), backend=backend)
     tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()],
                                             num_iterations=num_iterations,
                                             num_steps_buffer_preload=1000,
                                             num_iterations_between_eval=500,
                                             max_steps_per_episode=200,
                                             default_plots=False)
     max_avg_steps = max([avg_steps for (min_steps, avg_steps, max_steps) in tc.eval_steps.values()])
     return max_avg_steps
Beispiel #16
0
    def test_reinforce_train(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name)
        tc = core.EpisodesTrainContext()
        tc.num_iterations = 50
        reinforce_agent = tforce.TforceReinforceAgent(model_config=model_config)
        reinforce_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()])
        (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training]
        assert avg_r > 100
Beispiel #17
0
 def test_dueling_dqn_train(self):
     model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, ))
     tc: core.StepsTrainContext = core.StepsTrainContext()
     tc.num_iterations = 20000
     tc.num_steps_buffer_preload = 1000
     tc.num_iterations_between_eval = 1000
     tc.max_steps_per_episode = 200
     dqnAgent = tforce.TforceDuelingDqnAgent(model_config=model_config)
     dqnAgent.train(train_context=tc,
                    callbacks=[log.Iteration(eval_only=True),
                               log.Agent()])
Beispiel #18
0
 def test_train_cartpole(self):
     for backend in get_backends(PpoAgent):
         ppo = PpoAgent(gym_env_name="CartPole-v0", backend=backend)
         tc = core.PpoTrainContext()
         tc.num_iterations = 3
         tc.num_episodes_per_iteration = 10
         tc.max_steps_per_episode = 500
         tc.num_epochs_per_iteration = 5
         tc.num_iterations_between_eval = 2
         tc.num_episodes_per_eval = 5
         ppo.train([log.Iteration()], train_context=tc)
Beispiel #19
0
 def test_dqn(self):
     easyagents.agents.seed = 0
     model_config = core.ModelConfig("CartPole-v0")
     tc = core.StepsTrainContext()
     dqnAgent = kerasrl.KerasRlDqnAgent(model_config=model_config)
     dqnAgent.train(train_context=tc,
                    callbacks=[
                        duration.Fast(),
                        log.Agent(),
                        log.Step(),
                        log.Iteration()
                    ])
Beispiel #20
0
 def test_cem(self):
     easyagents.agents.seed = 0
     model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, ))
     tc = core.CemTrainContext()
     tc.num_iterations = 100
     tc.num_episodes_per_iteration = 50
     tc.max_steps_per_episode = 200
     tc.elite_set_fraction = 0.1
     tc.num_steps_buffer_preload = 2000
     cemAgent = kerasrl.KerasRlCemAgent(model_config=model_config)
     cemAgent.train(train_context=tc,
                    callbacks=[log.Agent(), log.Iteration()])
Beispiel #21
0
 def test_train(self):
     agents.seed = 0
     for backend in get_backends(PpoAgent):
         ppo = PpoAgent(gym_env_name=_cartpole_name, backend=backend)
         tc = core.PpoTrainContext()
         tc.num_iterations = 10
         tc.num_episodes_per_iteration = 10
         tc.max_steps_per_episode = 200
         tc.num_epochs_per_iteration = 5
         tc.num_iterations_between_eval = 5
         tc.num_episodes_per_eval = 5
         ppo.train([log.Iteration()], train_context=tc, default_plots=False)
         assert max_avg_rewards(tc) >= 50
Beispiel #22
0
    def test_dqn_train(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name, fc_layers=(100, 100))
        tc: core.StepsTrainContext = core.StepsTrainContext()
        tc.num_iterations = 10000
        tc.num_steps_buffer_preload = 500
        tc.num_iterations_between_eval = 500
        tc.max_steps_per_episode = 200
        dqn_agent = tforce.TforceDqnAgent(model_config=model_config)
        dqn_agent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()])
        (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training]
        assert avg_r > 50
Beispiel #23
0
    def test_save_(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name)
        tc = core.PpoTrainContext()
        tc.num_iterations = 3
        ppo_agent = tforce.TforcePpoAgent(model_config=model_config)
        ppo_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()])
        tempdir = bcore._get_temp_path()
        bcore._mkdir(tempdir)
        ppo_agent.save(tempdir, [])

        loaded_agent = tforce.TforcePpoAgent(model_config=model_config)
        loaded_agent.load(tempdir, [])
        bcore._rmpath(tempdir)
Beispiel #24
0
 def test_save_load(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.PpoTrainContext()
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.train(
         train_context=tc,
         callbacks=[duration._SingleIteration(),
                    log.Iteration()])
     tempdir = bcore._get_temp_path()
     bcore._mkdir(tempdir)
     ppo_agent.save(tempdir, [])
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.load(tempdir, [])
     pc = core.PlayContext()
     pc.max_steps_per_episode = 10
     pc.num_episodes = 1
     ppo_agent.play(play_context=pc, callbacks=[])
     bcore._rmpath(tempdir)
Beispiel #25
0
 def test_cartpole_log_iteration(self):
     ppo = agents.PpoAgent(gym_env_name="CartPole-v0", backend='tfagents')
     ppo.train([log.Iteration(), duration._SingleIteration()])
Beispiel #26
0
 def test_log_iteration(self):
     agent = agents.PpoAgent(_step_count_name)
     agent.train([log.Iteration(), duration._SingleIteration()])