Esempio n. 1
0
 def setUpClass(cls):
     cls.batch_size = 32
     cls.memory_capacity = 32
     cls.on_policy_agent = OnPolicyAgent(name="OnPolicyAgent",
                                         batch_size=cls.batch_size)
     cls.off_policy_agent = OffPolicyAgent(
         name="OffPolicyAgent", memory_capacity=cls.memory_capacity)
     cls.discrete_env = make("CartPole-v0")
     cls.continuous_env = make("Pendulum-v0")
Esempio n. 2
0
 def test_empty_args(self):
     """
     Test empty args {}
     """
     env = make("Pendulum-v0")
     test_env = make("Pendulum-v0")
     policy = DDPG(state_shape=env.observation_space.shape,
                   action_dim=env.action_space.high.size,
                   gpu=-1,
                   memory_capacity=1000,
                   max_action=env.action_space.high[0],
                   batch_size=32,
                   n_warmup=10)
     Trainer(policy, env, {}, test_env=test_env)
Esempio n. 3
0
 def test_invalid_args(self):
     """
     Test with invalid args
     """
     env = make("Pendulum-v0")
     test_env = make("Pendulum-v0")
     policy = DDPG(state_shape=env.observation_space.shape,
                   action_dim=env.action_space.high.size,
                   gpu=-1,
                   memory_capacity=1000,
                   max_action=env.action_space.high[0],
                   batch_size=32,
                   n_warmup=10)
     with self.assertRaises(ValueError):
         Trainer(policy,
                 env, {"NOT_EXISTING_OPTIONS": 1},
                 test_env=test_env)
Esempio n. 4
0
 def test_with_args(self):
     """
     Test with args
     """
     max_steps = 400
     env = make("Pendulum-v0")
     test_env = make("Pendulum-v0")
     policy = DDPG(state_shape=env.observation_space.shape,
                   action_dim=env.action_space.high.size,
                   gpu=-1,
                   memory_capacity=1000,
                   max_action=env.action_space.high[0],
                   batch_size=32,
                   n_warmup=10)
     trainer = Trainer(policy,
                       env, {"max_steps": max_steps},
                       test_env=test_env)
     self.assertEqual(trainer._max_steps, max_steps)
Esempio n. 5
0
 def setUpClass(cls):
     cls.env = make("CartPole-v0")
     policy = DQN(state_shape=cls.env.observation_space.shape,
                  action_dim=cls.env.action_space.n,
                  memory_capacity=2**4)
     cls.replay_buffer = get_replay_buffer(policy, cls.env)
     cls.output_dir = os.path.join(os.path.dirname(__file__), "tests")
     if not os.path.isdir(cls.output_dir):
         os.makedirs(cls.output_dir)
Esempio n. 6
0
    def test_wrap_dqn(self):
        env = wrap_dqn(make("SpaceInvadersNoFrameskip-v4"), wrap_ndarray=True)

        obs = env.reset()
        self.assertEqual(type(obs), np.ndarray)
        self.assertEqual(obs.shape, (84, 84, 4))
Esempio n. 7
0
 def env_fn():
     return make("CartPole-v0")
Esempio n. 8
0
 def env_fn():
     return make("Pendulum-v0")
Esempio n. 9
0
if __name__ == '__main__':
    parser = Trainer.get_argument()
    parser = DQN.get_argument(parser)
    parser.add_argument('--env-name',
                        type=str,
                        default="SpaceInvadersNoFrameskip-v4")
    parser.set_defaults(episode_max_steps=108000)
    parser.set_defaults(test_interval=10000)
    parser.set_defaults(max_steps=int(1e9))
    parser.set_defaults(save_model_interval=500000)
    parser.set_defaults(gpu=0)
    parser.set_defaults(show_test_images=True)
    parser.set_defaults(memory_capacity=int(1e6))
    args = parser.parse_args()

    env = wrap_dqn(make(args.env_name))
    test_env = wrap_dqn(make(args.env_name), reward_clipping=False)
    # Following parameters are equivalent to DeepMind DQN paper
    # https://www.nature.com/articles/nature14236
    policy = DQN(
        enable_double_dqn=args.enable_double_dqn,
        enable_dueling_dqn=args.enable_dueling_dqn,
        enable_noisy_dqn=args.enable_noisy_dqn,
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.n,
        lr=0.0000625,  # This value is from Rainbow
        adam_eps=1.5e-4,  # This value is from Rainbow
        n_warmup=50000,
        target_replace_interval=10000,
        batch_size=32,
        memory_capacity=args.memory_capacity,
Esempio n. 10
0
def make_atari(env_id, max_episode_steps=None):
    env = make(env_id)
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    return env
Esempio n. 11
0
def env_fn_continuous():
    return make('Pendulum-v0')
Esempio n. 12
0
 def test_is_discrete(self):
     discrete_space = make('CartPole-v0').action_space
     continuous_space = make('Pendulum-v0').action_space
     self.assertTrue(is_discrete(discrete_space))
     self.assertFalse(is_discrete(continuous_space))
Esempio n. 13
0
 def setUpClass(cls):
     # TODO: Remove dependencies to gym
     cls.discrete_env = make("CartPole-v0")
     cls.continuous_env = make("Pendulum-v0")
     cls.batch_size = 32
     cls.agent = None
Esempio n. 14
0
 def __call__(self):
     return make(self.env_name)
Esempio n. 15
0
from tf2rl.algos.ppo import PPO
from tf2rl.envs.utils import is_discrete, get_act_dim, make
from tf2rl.experiments.me_trpo_trainer import MeTrpoTrainer
from examples.run_mpc import reward_fn_pendulum

if __name__ == "__main__":
    parser = MeTrpoTrainer.get_argument()
    parser = PPO.get_argument(parser)
    parser.set_defaults(episode_max_steps=100)
    parser.set_defaults(n_collect_steps=2048)
    parser.set_defaults(n_generate_steps=2048)
    args = parser.parse_args()

    args.n_generate_steps = args.horizon

    env = make("Pendulum-v0")
    test_env = make("Pendulum-v0")

    policy = PPO(state_shape=env.observation_space.shape,
                 action_dim=get_act_dim(env.action_space),
                 is_discrete=is_discrete(env.action_space),
                 max_action=None if is_discrete(env.action_space) else
                 env.action_space.high[0],
                 batch_size=args.batch_size,
                 actor_units=(32, 32),
                 critic_units=(32, 32),
                 n_epoch=10,
                 lr_actor=3e-4,
                 lr_critic=3e-4,
                 hidden_activation_actor="tanh",
                 hidden_activation_critic="tanh",
Esempio n. 16
0
if __name__ == '__main__':
    parser = IRLTrainer.get_argument()
    parser = GAIfO.get_argument(parser)
    parser.add_argument('--env-name', type=str, default="Pendulum-v0")
    args = parser.parse_args()

    if args.expert_path_dir is None:
        print("Plaese generate demonstrations first")
        print(
            "python examples/run_sac.py --env-name=Pendulum-v0 --save-test-path --test-interval=50000"
        )
        exit()

    units = [400, 300]

    env = make(args.env_name)
    test_env = make(args.env_name)
    policy = DDPG(state_shape=env.observation_space.shape,
                  action_dim=env.action_space.high.size,
                  max_action=env.action_space.high[0],
                  gpu=args.gpu,
                  actor_units=units,
                  critic_units=units,
                  n_warmup=10000,
                  batch_size=100)
    irl = GAIfO(state_shape=env.observation_space.shape,
                units=units,
                enable_sn=args.enable_sn,
                batch_size=32,
                gpu=args.gpu)
    expert_trajs = restore_latest_n_traj(args.expert_path_dir,
Esempio n. 17
0
def env_fn_discrete():
    return make("CartPole-v0")