def setUpClass(cls): cls.batch_size = 32 cls.memory_capacity = 32 cls.on_policy_agent = OnPolicyAgent(name="OnPolicyAgent", batch_size=cls.batch_size) cls.off_policy_agent = OffPolicyAgent( name="OffPolicyAgent", memory_capacity=cls.memory_capacity) cls.discrete_env = make("CartPole-v0") cls.continuous_env = make("Pendulum-v0")
def test_empty_args(self): """ Test empty args {} """ env = make("Pendulum-v0") test_env = make("Pendulum-v0") policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, memory_capacity=1000, max_action=env.action_space.high[0], batch_size=32, n_warmup=10) Trainer(policy, env, {}, test_env=test_env)
def test_invalid_args(self): """ Test with invalid args """ env = make("Pendulum-v0") test_env = make("Pendulum-v0") policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, memory_capacity=1000, max_action=env.action_space.high[0], batch_size=32, n_warmup=10) with self.assertRaises(ValueError): Trainer(policy, env, {"NOT_EXISTING_OPTIONS": 1}, test_env=test_env)
def test_with_args(self): """ Test with args """ max_steps = 400 env = make("Pendulum-v0") test_env = make("Pendulum-v0") policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, memory_capacity=1000, max_action=env.action_space.high[0], batch_size=32, n_warmup=10) trainer = Trainer(policy, env, {"max_steps": max_steps}, test_env=test_env) self.assertEqual(trainer._max_steps, max_steps)
def setUpClass(cls): cls.env = make("CartPole-v0") policy = DQN(state_shape=cls.env.observation_space.shape, action_dim=cls.env.action_space.n, memory_capacity=2**4) cls.replay_buffer = get_replay_buffer(policy, cls.env) cls.output_dir = os.path.join(os.path.dirname(__file__), "tests") if not os.path.isdir(cls.output_dir): os.makedirs(cls.output_dir)
def test_wrap_dqn(self): env = wrap_dqn(make("SpaceInvadersNoFrameskip-v4"), wrap_ndarray=True) obs = env.reset() self.assertEqual(type(obs), np.ndarray) self.assertEqual(obs.shape, (84, 84, 4))
def env_fn(): return make("CartPole-v0")
def env_fn(): return make("Pendulum-v0")
if __name__ == '__main__': parser = Trainer.get_argument() parser = DQN.get_argument(parser) parser.add_argument('--env-name', type=str, default="SpaceInvadersNoFrameskip-v4") parser.set_defaults(episode_max_steps=108000) parser.set_defaults(test_interval=10000) parser.set_defaults(max_steps=int(1e9)) parser.set_defaults(save_model_interval=500000) parser.set_defaults(gpu=0) parser.set_defaults(show_test_images=True) parser.set_defaults(memory_capacity=int(1e6)) args = parser.parse_args() env = wrap_dqn(make(args.env_name)) test_env = wrap_dqn(make(args.env_name), reward_clipping=False) # Following parameters are equivalent to DeepMind DQN paper # https://www.nature.com/articles/nature14236 policy = DQN( enable_double_dqn=args.enable_double_dqn, enable_dueling_dqn=args.enable_dueling_dqn, enable_noisy_dqn=args.enable_noisy_dqn, state_shape=env.observation_space.shape, action_dim=env.action_space.n, lr=0.0000625, # This value is from Rainbow adam_eps=1.5e-4, # This value is from Rainbow n_warmup=50000, target_replace_interval=10000, batch_size=32, memory_capacity=args.memory_capacity,
def make_atari(env_id, max_episode_steps=None): env = make(env_id) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) return env
def env_fn_continuous(): return make('Pendulum-v0')
def test_is_discrete(self): discrete_space = make('CartPole-v0').action_space continuous_space = make('Pendulum-v0').action_space self.assertTrue(is_discrete(discrete_space)) self.assertFalse(is_discrete(continuous_space))
def setUpClass(cls): # TODO: Remove dependencies to gym cls.discrete_env = make("CartPole-v0") cls.continuous_env = make("Pendulum-v0") cls.batch_size = 32 cls.agent = None
def __call__(self): return make(self.env_name)
from tf2rl.algos.ppo import PPO from tf2rl.envs.utils import is_discrete, get_act_dim, make from tf2rl.experiments.me_trpo_trainer import MeTrpoTrainer from examples.run_mpc import reward_fn_pendulum if __name__ == "__main__": parser = MeTrpoTrainer.get_argument() parser = PPO.get_argument(parser) parser.set_defaults(episode_max_steps=100) parser.set_defaults(n_collect_steps=2048) parser.set_defaults(n_generate_steps=2048) args = parser.parse_args() args.n_generate_steps = args.horizon env = make("Pendulum-v0") test_env = make("Pendulum-v0") policy = PPO(state_shape=env.observation_space.shape, action_dim=get_act_dim(env.action_space), is_discrete=is_discrete(env.action_space), max_action=None if is_discrete(env.action_space) else env.action_space.high[0], batch_size=args.batch_size, actor_units=(32, 32), critic_units=(32, 32), n_epoch=10, lr_actor=3e-4, lr_critic=3e-4, hidden_activation_actor="tanh", hidden_activation_critic="tanh",
if __name__ == '__main__': parser = IRLTrainer.get_argument() parser = GAIfO.get_argument(parser) parser.add_argument('--env-name', type=str, default="Pendulum-v0") args = parser.parse_args() if args.expert_path_dir is None: print("Plaese generate demonstrations first") print( "python examples/run_sac.py --env-name=Pendulum-v0 --save-test-path --test-interval=50000" ) exit() units = [400, 300] env = make(args.env_name) test_env = make(args.env_name) policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, max_action=env.action_space.high[0], gpu=args.gpu, actor_units=units, critic_units=units, n_warmup=10000, batch_size=100) irl = GAIfO(state_shape=env.observation_space.shape, units=units, enable_sn=args.enable_sn, batch_size=32, gpu=args.gpu) expert_trajs = restore_latest_n_traj(args.expert_path_dir,
def env_fn_discrete(): return make("CartPole-v0")