def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.PpoTrainContext() ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
def test_train(self): for backend in get_backends(SacAgent): sac_agent: SacAgent = SacAgent('MountainCarContinuous-v0', backend=backend) sac_agent.train([log.Duration(), log.Iteration(), log.Agent(), duration.Fast()], num_iterations=10, max_steps_per_episode=200, default_plots=False)
def test_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.StepsTrainContext() dqn_agent = tfagents.TfDqnAgent(model_config=model_config) dqn_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.TrainContext() random_agent = tfagents.TfRandomAgent(model_config=model_config) random_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == 1
def test_reinforce_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.EpisodesTrainContext() reinforceAgent = tforce.TforceReinforceAgent(model_config=model_config) reinforceAgent.train( train_context=tc, callbacks=[log.Iteration(), log.Agent(), duration.Fast()])
def test_ppo_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.PpoTrainContext() ppoAgent = tforce.TforcePpoAgent(model_config=model_config) ppoAgent.train( train_context=tc, callbacks=[log.Iteration(), log.Agent(), duration.Fast()])
def test_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.EpisodesTrainContext() reinforceAgent = tfagents.TfReinforceAgent(model_config=model_config) reinforceAgent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0 assert tc.iterations_done_in_training == tc.num_iterations > 0 rmin, ravg, rmax = tc.eval_rewards[tc.episodes_done_in_training] assert rmax >= 10
def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.EpisodesTrainContext() reinforce_agent = tfagents.TfReinforceAgent(model_config=model_config) reinforce_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0 assert tc.iterations_done_in_training == tc.num_iterations > 0
def test_train(self): model_config = core.ModelConfig(_mountaincar_continuous_name) tc = core.StepsTrainContext() dqn_agent = tfagents.TfSacAgent(model_config=model_config) dqn_agent.train( train_context=tc, callbacks=[duration.Fast(), log.Iteration(), log.Agent()])
def test_dqn(self): easyagents.agents.seed = 0 model_config = core.ModelConfig("CartPole-v0") tc = core.StepsTrainContext() dqnAgent = kerasrl.KerasRlDqnAgent(model_config=model_config) dqnAgent.train(train_context=tc, callbacks=[ duration.Fast(), log.Agent(), log.Step(), log.Iteration() ])
def test_log_agent(self): agent = agents.PpoAgent(_step_count_name) agent.train([log.Agent(), duration.Fast()])
def test_train_plotsteprewards(self): ppoAgent = agents.PpoAgent('CartPole-v0') ppoAgent.train([plot.StepRewards(), duration.Fast()])
def test_default_plots_None_durationcallback(self): agent = agents.PpoAgent("CartPole-v0") p = plot.Loss() c = agent._add_plot_callbacks([duration.Fast()], None, [p]) assert p in c
def test_train(self): for backend in get_backends(SacAgent): sac_agent: SacAgent = SacAgent(_mountaincart_continuous_name, backend=backend) tc: core.TrainContext = sac_agent.train([log.Duration(), log.Iteration(eval_only=True), duration.Fast()], default_plots=False) r = max_avg_rewards(tc) assert r >= -1