def test_train(self): for backend in get_backends(SacAgent): sac_agent: SacAgent = SacAgent(_mountaincart_continuous_name, backend=backend) tc: core.TrainContext = sac_agent.train([log.Duration(), log.Iteration(eval_only=True), duration.Fast()], default_plots=False) r = max_avg_rewards(tc) assert r >= -1
def test_train(self): for backend in get_backends(SacAgent): sac_agent: SacAgent = SacAgent('CartPole-v0', backend=backend) sac_agent.train([log.Duration(), log.Iteration(), log.Agent()], num_iterations=10, max_steps_per_episode=200, default_plots=False)
def train_and_assert(self, agent_type, is_v1: bool, num_iterations=100): logger = logging.warning v2_backends = [b for b in get_backends(agent_type, skip_v1=True)] v1_backends = [ b for b in get_backends(agent_type) if (not b in v2_backends) ] backends = v1_backends if is_v1 else v2_backends for backend in backends: logger( f'backend={backend} agent={agent_type}, num_iterations={num_iterations}' ) cem_agent: CemAgent = agent_type('CartPole-v0', fc_layers=(100, ), backend=backend) tc: core.TrainContext = cem_agent.train( [log.Duration(), log.Iteration(eval_only=True), log.Agent()], num_iterations=num_iterations, num_iterations_between_eval=10, max_steps_per_episode=200, default_plots=False) (min_steps, avg_steps, max_steps) = tc.eval_steps[tc.episodes_done_in_training] assert max_steps >= 100 assert avg_steps >= 50
def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.PpoTrainContext() ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
def test_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.StepsTrainContext() dqn_agent = tfagents.TfDqnAgent(model_config=model_config) dqn_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.TrainContext() random_agent = tfagents.TfRandomAgent(model_config=model_config) random_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == 1
def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.EpisodesTrainContext() reinforce_agent = tfagents.TfReinforceAgent(model_config=model_config) reinforce_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0 assert tc.iterations_done_in_training == tc.num_iterations > 0
def test_train(self): for backend in get_backends(RandomAgent): reinforce_agent: ReinforceAgent = ReinforceAgent(_line_world_name, backend=backend) tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()], num_iterations=10, max_steps_per_episode=200, default_plots=False) r = max_avg_rewards(tc) assert r >= 5
def test_train(self): model_config = core.ModelConfig(_mountaincar_continuous_name) tc = core.StepsTrainContext() dqn_agent = tfagents.TfSacAgent(model_config=model_config) dqn_agent.train( train_context=tc, callbacks=[duration.Fast(), log.Iteration(), log.Agent()])
def test_train(self): for backend in get_backends(RandomAgent): reinforce_agent: ReinforceAgent = ReinforceAgent('CartPole-v0', backend=backend) tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()], num_iterations=10, max_steps_per_episode=200, default_plots=False) (min_steps, avg_steps, max_steps) = tc.eval_steps[tc.episodes_done_in_training] assert avg_steps >= 10
def test_ppo_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.PpoTrainContext() ppoAgent = tforce.TforcePpoAgent(model_config=model_config) ppoAgent.train( train_context=tc, callbacks=[log.Iteration(), log.Agent(), duration.Fast()])
def test_reinforce_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.EpisodesTrainContext() reinforceAgent = tforce.TforceReinforceAgent(model_config=model_config) reinforceAgent.train( train_context=tc, callbacks=[log.Iteration(), log.Agent(), duration.Fast()])
def train_and_eval(self, agent_type, backend, num_iterations): dqn_agent: DqnAgent = agent_type(_cartpole_name, fc_layers=(100,), backend=backend) tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()], num_iterations=num_iterations, num_steps_buffer_preload=1000, num_iterations_between_eval=500, max_steps_per_episode=200, default_plots=False) return max_avg_rewards(tc)
def test_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.EpisodesTrainContext() reinforceAgent = tfagents.TfReinforceAgent(model_config=model_config) reinforceAgent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0 assert tc.iterations_done_in_training == tc.num_iterations > 0 rmin, ravg, rmax = tc.eval_rewards[tc.episodes_done_in_training] assert rmax >= 10
def train_and_eval(self, agent_type, backend, num_iterations): dqn_agent: DqnAgent = agent_type('CartPole-v0', fc_layers=(100,), backend=backend) tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()], num_iterations=num_iterations, num_steps_buffer_preload=1000, num_iterations_between_eval=500, max_steps_per_episode=200, default_plots=False) max_avg_steps = max([avg_steps for (min_steps, avg_steps, max_steps) in tc.eval_steps.values()]) return max_avg_steps
def test_reinforce_train(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name) tc = core.EpisodesTrainContext() tc.num_iterations = 50 reinforce_agent = tforce.TforceReinforceAgent(model_config=model_config) reinforce_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()]) (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training] assert avg_r > 100
def test_dueling_dqn_train(self): model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, )) tc: core.StepsTrainContext = core.StepsTrainContext() tc.num_iterations = 20000 tc.num_steps_buffer_preload = 1000 tc.num_iterations_between_eval = 1000 tc.max_steps_per_episode = 200 dqnAgent = tforce.TforceDuelingDqnAgent(model_config=model_config) dqnAgent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()])
def test_train_cartpole(self): for backend in get_backends(PpoAgent): ppo = PpoAgent(gym_env_name="CartPole-v0", backend=backend) tc = core.PpoTrainContext() tc.num_iterations = 3 tc.num_episodes_per_iteration = 10 tc.max_steps_per_episode = 500 tc.num_epochs_per_iteration = 5 tc.num_iterations_between_eval = 2 tc.num_episodes_per_eval = 5 ppo.train([log.Iteration()], train_context=tc)
def test_dqn(self): easyagents.agents.seed = 0 model_config = core.ModelConfig("CartPole-v0") tc = core.StepsTrainContext() dqnAgent = kerasrl.KerasRlDqnAgent(model_config=model_config) dqnAgent.train(train_context=tc, callbacks=[ duration.Fast(), log.Agent(), log.Step(), log.Iteration() ])
def test_cem(self): easyagents.agents.seed = 0 model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, )) tc = core.CemTrainContext() tc.num_iterations = 100 tc.num_episodes_per_iteration = 50 tc.max_steps_per_episode = 200 tc.elite_set_fraction = 0.1 tc.num_steps_buffer_preload = 2000 cemAgent = kerasrl.KerasRlCemAgent(model_config=model_config) cemAgent.train(train_context=tc, callbacks=[log.Agent(), log.Iteration()])
def test_train(self): agents.seed = 0 for backend in get_backends(PpoAgent): ppo = PpoAgent(gym_env_name=_cartpole_name, backend=backend) tc = core.PpoTrainContext() tc.num_iterations = 10 tc.num_episodes_per_iteration = 10 tc.max_steps_per_episode = 200 tc.num_epochs_per_iteration = 5 tc.num_iterations_between_eval = 5 tc.num_episodes_per_eval = 5 ppo.train([log.Iteration()], train_context=tc, default_plots=False) assert max_avg_rewards(tc) >= 50
def test_dqn_train(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name, fc_layers=(100, 100)) tc: core.StepsTrainContext = core.StepsTrainContext() tc.num_iterations = 10000 tc.num_steps_buffer_preload = 500 tc.num_iterations_between_eval = 500 tc.max_steps_per_episode = 200 dqn_agent = tforce.TforceDqnAgent(model_config=model_config) dqn_agent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()]) (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training] assert avg_r > 50
def test_save_(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name) tc = core.PpoTrainContext() tc.num_iterations = 3 ppo_agent = tforce.TforcePpoAgent(model_config=model_config) ppo_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()]) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) ppo_agent.save(tempdir, []) loaded_agent = tforce.TforcePpoAgent(model_config=model_config) loaded_agent.load(tempdir, []) bcore._rmpath(tempdir)
def test_save_load(self): model_config = core.ModelConfig(_lineworld_name) tc = core.PpoTrainContext() ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.train( train_context=tc, callbacks=[duration._SingleIteration(), log.Iteration()]) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) ppo_agent.save(tempdir, []) ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.load(tempdir, []) pc = core.PlayContext() pc.max_steps_per_episode = 10 pc.num_episodes = 1 ppo_agent.play(play_context=pc, callbacks=[]) bcore._rmpath(tempdir)
def test_cartpole_log_iteration(self): ppo = agents.PpoAgent(gym_env_name="CartPole-v0", backend='tfagents') ppo.train([log.Iteration(), duration._SingleIteration()])
def test_log_iteration(self): agent = agents.PpoAgent(_step_count_name) agent.train([log.Iteration(), duration._SingleIteration()])