def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.PpoTrainContext() ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
def test_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.StepsTrainContext() dqn_agent = tfagents.TfDqnAgent(model_config=model_config) dqn_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()])
def test_save_load(self): model_config = core.ModelConfig(_lineworld_name) random_agent = tfagents.TfRandomAgent(model_config=model_config) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) random_agent.save(directory=tempdir, callbacks=[]) random_agent.load(directory=tempdir, callbacks=[]) bcore._rmpath(tempdir)
def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.TrainContext() random_agent = tfagents.TfRandomAgent(model_config=model_config) random_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == 1
def test_play(self): model_config = core.ModelConfig("CartPole-v0") randomAgent = tfagents.TfRandomAgent(model_config=model_config) pc=core.PlayContext() pc.max_steps_per_episode=10 pc.num_episodes=1 randomAgent.play(play_context=pc,callbacks=[]) assert pc.num_episodes == 1
def test_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.EpisodesTrainContext() reinforceAgent = tfagents.TfReinforceAgent(model_config=model_config) reinforceAgent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0 assert tc.iterations_done_in_training == tc.num_iterations > 0 rmin, ravg, rmax = tc.eval_rewards[tc.episodes_done_in_training] assert rmax >= 10
def test_ppo_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.PpoTrainContext() ppoAgent = tforce.TforcePpoAgent(model_config=model_config) ppoAgent.train( train_context=tc, callbacks=[log.Iteration(), log.Agent(), duration.Fast()])
def test_train(self): model_config = core.ModelConfig(_lineworld_name) tc = core.EpisodesTrainContext() reinforce_agent = tfagents.TfReinforceAgent(model_config=model_config) reinforce_agent.train(train_context=tc, callbacks=[duration.Fast(), log.Iteration()]) assert tc.episodes_done_in_iteration == tc.num_episodes_per_iteration > 0 assert tc.iterations_done_in_training == tc.num_iterations > 0
def test_reinforce_train(self): model_config = core.ModelConfig("CartPole-v0") tc = core.EpisodesTrainContext() reinforceAgent = tforce.TforceReinforceAgent(model_config=model_config) reinforceAgent.train( train_context=tc, callbacks=[log.Iteration(), log.Agent(), duration.Fast()])
def test_train(self): model_config = core.ModelConfig(_mountaincar_continuous_name) tc = core.StepsTrainContext() dqn_agent = tfagents.TfSacAgent(model_config=model_config) dqn_agent.train( train_context=tc, callbacks=[duration.Fast(), log.Iteration(), log.Agent()])
def test_reinforce_train(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name) tc = core.EpisodesTrainContext() tc.num_iterations = 50 reinforce_agent = tforce.TforceReinforceAgent(model_config=model_config) reinforce_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()]) (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training] assert avg_r > 100
def test_dueling_dqn_train(self): model_config = core.ModelConfig("CartPole-v0", fc_layers=(100, )) tc: core.StepsTrainContext = core.StepsTrainContext() tc.num_iterations = 20000 tc.num_steps_buffer_preload = 1000 tc.num_iterations_between_eval = 1000 tc.max_steps_per_episode = 200 dqnAgent = tforce.TforceDuelingDqnAgent(model_config=model_config) dqnAgent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()])
def test_dqn_train(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name, fc_layers=(100, 100)) tc: core.StepsTrainContext = core.StepsTrainContext() tc.num_iterations = 10000 tc.num_steps_buffer_preload = 500 tc.num_iterations_between_eval = 500 tc.max_steps_per_episode = 200 dqn_agent = tforce.TforceDqnAgent(model_config=model_config) dqn_agent.train(train_context=tc, callbacks=[log.Iteration(eval_only=True), log.Agent()]) (min_r, avg_r, max_r) = tc.eval_rewards[tc.episodes_done_in_training] assert avg_r > 50
def test_save_(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name) tc = core.PpoTrainContext() tc.num_iterations = 3 ppo_agent = tforce.TforcePpoAgent(model_config=model_config) ppo_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()]) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) ppo_agent.save(tempdir, []) loaded_agent = tforce.TforcePpoAgent(model_config=model_config) loaded_agent.load(tempdir, []) bcore._rmpath(tempdir)
def __init__(self, gym_env_name: str, fc_layers: Union[Tuple[int, ...], int, None] = None, backend: str = None): """ Args: gym_env_name: name of an OpenAI gym environment to be used for training and evaluation fc_layers: defines the neural network to be used, a sequence of fully connected layers of the given size. Eg (75,40) yields a neural network consisting out of 2 hidden layers, the first one containing 75 and the second layer containing 40 neurons. backend=the backend to be used (eg 'tfagents'), if None a default implementation is used. call get_backends() to get a list of the available backends. """ model_config = core.ModelConfig(gym_env_name=gym_env_name, fc_layers=fc_layers, seed=seed) self._initialize(model_config=model_config, backend_name=backend) return
def test_save_load(self): model_config = core.ModelConfig(_lineworld_name) tc = core.PpoTrainContext() ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.train( train_context=tc, callbacks=[duration._SingleIteration(), log.Iteration()]) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) ppo_agent.save(tempdir, []) ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.load(tempdir, []) pc = core.PlayContext() pc.max_steps_per_episode = 10 pc.num_episodes = 1 ppo_agent.play(play_context=pc, callbacks=[]) bcore._rmpath(tempdir)
def _initialize(self, gym_env_name: str = None, fc_layers: Tuple[int, ...] = None, model_config: core.ModelConfig = None, backend_name: str = None): if model_config is None: model_config = core.ModelConfig(gym_env_name=gym_env_name, fc_layers=fc_layers) if backend_name is None: backend_name = easyagents.backends.default.BackendAgentFactory.backend_name backend: bcore.BackendAgentFactory = _get_backend(backend_name) assert model_config is not None, "model_config not set." assert backend, f'Backend "{backend_name}" not found. The registered backends are {get_backends()}.' self._model_config: core.ModelConfig = model_config backend_agent = backend.create_agent(easyagent_type=type(self), model_config=model_config) assert backend_agent, f'Backend "{backend_name}" does not implement "{type(self).__name__}". ' + \ f'Choose one of the following backend {get_backends(type(self))}.' self._backend_agent: Optional[bcore._BackendAgent] = backend_agent return
def test_create_agent(self): f = BackendAgentFactoryTest.DebugAgentFactory() mc = core.ModelConfig(gym_env_name="CartPole-v0") a = f.create_agent(easyagent_type=easyagents.agents.DqnAgent, model_config=mc) assert a is not None
def test_setbackendagent_twice(self): model_config = core.ModelConfig(self.env_name) agent = debug.DebugAgent(model_config) monitor._MonitorEnv._register_backend_agent(agent) monitor._MonitorEnv._register_backend_agent(agent) monitor._MonitorEnv._register_backend_agent(None)
def test_create_agent_not_implemented(self): f = BackendAgentFactoryTest.DebugAgentFactory() mc = core.ModelConfig(gym_env_name="CartPole-v0") a = f.create_agent(easyagent_type=easyagents.agents.ReinforceAgent, model_config=mc) assert a is None
def __init__(self): super().__init__( core.ModelConfig(gym_env_name=BackendAgentTest.env_name), action=1)