Esempio n. 1
0
def pong_model_free():
    """TODO(piotrmilos): Document this."""
    hparams = tf.contrib.training.HParams(
        epochs_num=4,
        eval_every_epochs=2,
        num_agents=2,
        optimization_epochs=3,
        epoch_length=30,
        entropy_loss_coef=0.003,
        learning_rate=8e-05,
        optimizer="Adam",
        policy_network=feed_forward_cnn_small_categorical_fun,
        gae_lambda=0.985,
        num_eval_agents=2,
        max_gradients_norm=0.5,
        gae_gamma=0.985,
        optimization_batch_size=4,
        clipping_coef=0.2,
        value_loss_coef=1,
        save_models_every_epochs=False,
        frame_stack_size=4,
        force_beginning_resets=False,
    )
    env = gym_env.T2TGymEnv("PongNoFrameskip-v4", batch_size=2)
    env.start_new_epoch(0)
    hparams.add_hparam("env_fn", make_real_env_fn(env))
    eval_env = gym_env.T2TGymEnv("PongNoFrameskip-v4", batch_size=2)
    eval_env.start_new_epoch(0)
    hparams.add_hparam("eval_env_fn", make_real_env_fn(eval_env))
    return hparams
Esempio n. 2
0
    def test_generating_and_loading_preserves_rollouts(self):
        env_name = TEST_ENV_NAME
        from_env = gym_env.T2TGymEnv(env_name, batch_size=1)
        from_env.start_new_epoch(0, self.out_dir)
        self.play(from_env, n_steps=20)
        from_env.generate_data(self.out_dir)

        to_env = gym_env.T2TGymEnv(env_name, batch_size=1)
        to_env.start_new_epoch(0, self.out_dir)

        self.assertEqual(from_env.current_epoch_rollouts(),
                         to_env.current_epoch_rollouts())
def initialize_env_specs(hparams):
    """Initializes env_specs using T2TGymEnvs."""
    if getattr(hparams, "game", None):
        game_name = gym_env.camel_case_name(hparams.game)
        env = gym_env.T2TGymEnv("{}Deterministic-v4".format(game_name),
                                batch_size=hparams.batch_size)
        env.start_new_epoch(0)
        hparams.add_hparam("env_fn", rl.make_real_env_fn(env))
        eval_env = gym_env.T2TGymEnv("{}Deterministic-v4".format(game_name),
                                     batch_size=hparams.eval_batch_size)
        eval_env.start_new_epoch(0)
        hparams.add_hparam("eval_env_fn", rl.make_real_env_fn(eval_env))
    return hparams
def initialize_env_specs(hparams):
  """Initializes env_specs using T2TGymEnvs."""
  if getattr(hparams, "game", None):
    game_name = gym_env.camel_case_name(hparams.game)
    env = gym_env.T2TGymEnv("{}Deterministic-v4".format(game_name),
                            batch_size=hparams.num_agents)
    env.start_new_epoch(0)
    hparams.add_hparam("environment_spec", rl.standard_atari_env_spec(env))
    eval_env = gym_env.T2TGymEnv("{}Deterministic-v4".format(game_name),
                                 batch_size=hparams.num_eval_agents)
    eval_env.start_new_epoch(0)
    hparams.add_hparam(
        "environment_eval_spec", rl.standard_atari_env_eval_spec(eval_env))
  return hparams
Esempio n. 5
0
def _define_batch_env(environment_spec, num_agents):
    """Create environments and apply all desired wrappers."""

    with tf.variable_scope("environments"):
        envs = [environment_spec.env_lambda() for _ in range(num_agents)]
        env = gym_env.T2TGymEnv(envs)
        return env
Esempio n. 6
0
def pong_model_free():
  """TODO(piotrmilos): Document this."""
  hparams = mfrl_base()
  hparams.batch_size = 2
  hparams.ppo_eval_every_epochs = 2
  hparams.ppo_epochs_num = 4
  hparams.add_hparam("ppo_optimization_epochs", 3)
  hparams.add_hparam("ppo_epoch_length", 30)
  hparams.add_hparam("ppo_learning_rate", 8e-05)
  hparams.add_hparam("ppo_optimizer", "Adam")
  hparams.add_hparam("ppo_optimization_batch_size", 4)
  hparams.add_hparam("ppo_save_models_every_epochs", 1000000)
  env = gym_env.T2TGymEnv("PongNoFrameskip-v4", batch_size=2)
  env.start_new_epoch(0)
  hparams.add_hparam("env_fn", make_real_env_fn(env))
  eval_env = gym_env.T2TGymEnv("PongNoFrameskip-v4", batch_size=2)
  eval_env.start_new_epoch(0)
  hparams.add_hparam("eval_env_fn", make_real_env_fn(eval_env))
  return hparams
Esempio n. 7
0
def _define_batch_env(environment_spec, num_agents, xvfb=False):
    """Create environments and apply all desired wrappers."""

    with tf.variable_scope("environments"):
        envs = [
            ExternalProcessEnv(environment_spec.env_lambda, xvfb)
            for _ in range(num_agents)
        ]
        env = gym_env.T2TGymEnv(envs)
        env = py_func_batch_env.PyFuncBatchEnv(env)
        return env
Esempio n. 8
0
 def init_batch_and_play(self, env_lambda, n_steps=1, **kwargs):
     raw_envs = [env_lambda(), env_lambda()]
     env = gym_env.T2TGymEnv(raw_envs, **kwargs)
     obs = list()
     rewards = list()
     obs.append(env.reset())
     for _ in range(n_steps):
         step_obs, step_rewards, dones = env.step(actions=[0, 0])
         obs.append(step_obs)
         rewards.append(step_rewards)
         for (i, done) in enumerate(dones):
             if done:
                 env.reset([i])
     return env, obs, rewards
Esempio n. 9
0
    def test_generates(self):
        env = gym_env.T2TGymEnv([TestEnv(), TestEnv()])
        env.reset()
        for _ in range(20):
            (_, _, dones) = env.step([0, 0])
            for (i, done) in enumerate(dones):
                if done:
                    env.reset([i])
        env.generate_data(self.out_dir, tmp_dir=None)

        filenames = os.listdir(self.out_dir)
        self.assertTrue(filenames)
        path = os.path.join(self.out_dir, filenames[0])
        records = list(tf.python_io.tf_record_iterator(path))
        self.assertTrue(records)
Esempio n. 10
0
 def init_batch_and_play(self, env_name, steps_per_epoch=1,
                         epochs=(0,), generate_data=False, **kwargs):
   env = gym_env.T2TGymEnv(env_name, batch_size=2, **kwargs)
   obs = list()
   rewards = list()
   num_dones = 0
   for epoch in epochs:
     env.start_new_epoch(epoch, self.out_dir)
     _, epoch_obs, epoch_rewards, epoch_num_dones = \
         self.play(env, steps_per_epoch)
     if generate_data:
       env.generate_data(self.out_dir)
     obs.extend(epoch_obs)
     rewards.extend(epoch_rewards)
     num_dones += epoch_num_dones
   return env, obs, rewards, num_dones
Esempio n. 11
0
  def test_shards_per_epoch(self):
    def num_ending_with(filenames, suffix):
      return sum(
          1 for filename in filenames if filename.endswith(suffix)
      )

    env = gym_env.T2TGymEnv(TEST_ENV_NAME, batch_size=2)
    env.start_new_epoch(0, self.out_dir)
    self.play(env, n_steps=20)
    env.generate_data(self.out_dir)

    filenames = os.listdir(self.out_dir)
    num_shards_per_epoch = len(filenames)
    self.assertEqual(num_ending_with(filenames, ".0"), num_shards_per_epoch)

    env.start_new_epoch(1, self.out_dir)
    self.play(env, n_steps=20)
    env.generate_data(self.out_dir)

    filenames = os.listdir(self.out_dir)
    self.assertEqual(len(filenames), 2 * num_shards_per_epoch)
    for suffix in (".0", ".1"):
      self.assertEqual(num_ending_with(filenames, suffix), num_shards_per_epoch)
Esempio n. 12
0
 def init_batch_and_play(self, env_lambda, n_steps=1, **kwargs):
   raw_envs = [env_lambda(), env_lambda()]
   env = gym_env.T2TGymEnv(raw_envs, **kwargs)
   env.start_new_epoch(0)
   return self.play(env, n_steps)