def train_and_assert(self, agent_type, is_v1: bool, num_iterations=100): logger = logging.warning v2_backends = [b for b in get_backends(agent_type, skip_v1=True)] v1_backends = [ b for b in get_backends(agent_type) if (not b in v2_backends) ] backends = v1_backends if is_v1 else v2_backends for backend in backends: logger( f'backend={backend} agent={agent_type}, num_iterations={num_iterations}' ) cem_agent: CemAgent = agent_type('CartPole-v0', fc_layers=(100, ), backend=backend) tc: core.TrainContext = cem_agent.train( [log.Duration(), log.Iteration(eval_only=True), log.Agent()], num_iterations=num_iterations, num_iterations_between_eval=10, max_steps_per_episode=200, default_plots=False) (min_steps, avg_steps, max_steps) = tc.eval_steps[tc.episodes_done_in_training] assert max_steps >= 100 assert avg_steps >= 50
def test_train(self): for backend in get_backends(SacAgent): sac_agent: SacAgent = SacAgent('CartPole-v0', backend=backend) sac_agent.train([log.Duration(), log.Iteration(), log.Agent()], num_iterations=10, max_steps_per_episode=200, default_plots=False)
def test_train(self): for backend in get_backends(SacAgent): sac_agent: SacAgent = SacAgent(_mountaincart_continuous_name, backend=backend) tc: core.TrainContext = sac_agent.train([log.Duration(), log.Iteration(eval_only=True), duration.Fast()], default_plots=False) r = max_avg_rewards(tc) assert r >= -1
def test_train(self): for backend in get_backends(RandomAgent): reinforce_agent: ReinforceAgent = ReinforceAgent('CartPole-v0', backend=backend) tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()], num_iterations=10, max_steps_per_episode=200, default_plots=False) (min_steps, avg_steps, max_steps) = tc.eval_steps[tc.episodes_done_in_training] assert avg_steps >= 10
def test_train(self): for backend in get_backends(RandomAgent): reinforce_agent: ReinforceAgent = ReinforceAgent(_line_world_name, backend=backend) tc: core.TrainContext = reinforce_agent.train([log.Duration(), log.Iteration()], num_iterations=10, max_steps_per_episode=200, default_plots=False) r = max_avg_rewards(tc) assert r >= 5
def train_and_eval(self, agent_type, backend, num_iterations): dqn_agent: DqnAgent = agent_type(_cartpole_name, fc_layers=(100,), backend=backend) tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()], num_iterations=num_iterations, num_steps_buffer_preload=1000, num_iterations_between_eval=500, max_steps_per_episode=200, default_plots=False) return max_avg_rewards(tc)
def train_and_eval(self, agent_type, backend, num_iterations): dqn_agent: DqnAgent = agent_type('CartPole-v0', fc_layers=(100,), backend=backend) tc: core.TrainContext = dqn_agent.train([log.Duration(), log.Iteration(eval_only=True), log.Agent()], num_iterations=num_iterations, num_steps_buffer_preload=1000, num_iterations_between_eval=500, max_steps_per_episode=200, default_plots=False) max_avg_steps = max([avg_steps for (min_steps, avg_steps, max_steps) in tc.eval_steps.values()]) return max_avg_steps