def main(argv):
    del argv

    # Load the keyboard.
    keyboard = smart_module.SmartModuleImport(hub.Module(FLAGS.keyboard_path))

    # Create the task environment.
    base_env_config = configs.get_fig4_task_config()
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    additional_discount = 0.9
    env = environment_wrappers.EnvironmentWithKeyboardDirect(
        env=base_env,
        keyboard=keyboard,
        keyboard_ckpt_path=None,
        additional_discount=additional_discount,
        call_and_return=False)

    # Create the player agent.
    agent = regressed_agent.Agent(
        batch_size=10,
        optimizer_name="AdamOptimizer",
        optimizer_kwargs=dict(learning_rate=3e-2, ),
        init_w=np.random.normal(size=keyboard.num_cumulants) * 0.1,
    )

    _, ema_returns = experiment.run(env,
                                    agent,
                                    num_episodes=FLAGS.num_episodes,
                                    report_every=FLAGS.report_every,
                                    num_eval_reps=100)
    if FLAGS.output_path:
        experiment.write_returns_to_file(FLAGS.output_path, ema_returns)
def main(argv):
  del argv

  # Load the keyboard.
  keyboard = smart_module.SmartModuleImport(hub.Module(FLAGS.keyboard_path))

  # Create the task environment.
  base_env_config = configs.get_fig4_task_config()
  base_env = scavenger.Scavenger(**base_env_config)
  base_env = environment_wrappers.EnvironmentWithLogging(base_env)

  # Wrap the task environment with the keyboard.
  additional_discount = 0.9
  env = environment_wrappers.EnvironmentWithKeyboardDirect(
      env=base_env,
      keyboard=keyboard,
      keyboard_ckpt_path=None,
      additional_discount=additional_discount,
      call_and_return=False)

  # Create the player agent.
  agent = regressed_agent.Agent(
      batch_size=10,
      optimizer_name="AdamOptimizer",
      # Disable training.
      optimizer_kwargs=dict(learning_rate=0.0,),
      init_w=[1., -1.])

  returns = []
  for _ in range(FLAGS.num_episodes):
    returns.append(experiment.run_episode(env, agent))
  tf.logging.info("#" * 80)
  tf.logging.info(
      f"Avg. return over {FLAGS.num_episodes} episodes is {np.mean(returns)}")
  tf.logging.info("#" * 80)
Ejemplo n.º 3
0
def main(argv):
    del argv

    # Pretrain the keyboard and save a checkpoint.
    if FLAGS.keyboard_path:
        keyboard_path = FLAGS.keyboard_path
    else:
        with tf.Graph().as_default():
            export_path = "/tmp/option_keyboard/keyboard"
            _ = keyboard_utils.create_and_train_keyboard(
                num_episodes=FLAGS.num_pretrain_episodes,
                export_path=export_path)
            keyboard_path = os.path.join(export_path, "tfhub")

    # Load the keyboard.
    keyboard = smart_module.SmartModuleImport(hub.Module(keyboard_path))

    # Create the task environment.
    base_env_config = configs.get_task_config()
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    additional_discount = 0.9
    env = environment_wrappers.EnvironmentWithKeyboard(
        env=base_env,
        keyboard=keyboard,
        keyboard_ckpt_path=None,
        n_actions_per_dim=3,
        additional_discount=additional_discount,
        call_and_return=False)

    # Create the player agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=additional_discount,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    _, ema_returns = experiment.run(env,
                                    agent,
                                    num_episodes=FLAGS.num_episodes,
                                    report_every=FLAGS.report_every)
    if FLAGS.output_path:
        experiment.write_returns_to_file(FLAGS.output_path, ema_returns)
def evaluate_keyboard(keyboard_path):
  """Evaluate a keyboard."""

  angles_to_sweep = np.deg2rad(np.linspace(-90, 180, num=19, endpoint=True))
  weights_to_sweep = np.stack(
      [np.cos(angles_to_sweep),
       np.sin(angles_to_sweep)], axis=-1)
  weights_to_sweep /= np.sum(
      np.maximum(weights_to_sweep, 0.0), axis=-1, keepdims=True)
  weights_to_sweep = np.clip(weights_to_sweep, -1000, 1000)
  tf.logging.info(weights_to_sweep)

  # Load the keyboard.
  keyboard = smart_module.SmartModuleImport(hub.Module(keyboard_path))

  # Create the task environment.
  all_returns = []
  for w_to_sweep in weights_to_sweep.tolist():
    base_env_config = configs.get_fig5_task_config(w_to_sweep)
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    with tf.variable_scope(None, default_name="inner_loop"):
      additional_discount = 0.9
      env = environment_wrappers.EnvironmentWithKeyboardDirect(
          env=base_env,
          keyboard=keyboard,
          keyboard_ckpt_path=None,
          additional_discount=additional_discount,
          call_and_return=False)

      # Create the player agent.
      agent = regressed_agent.Agent(
          batch_size=10,
          optimizer_name="AdamOptimizer",
          # Disable training.
          optimizer_kwargs=dict(learning_rate=0.0,),
          init_w=w_to_sweep)

    returns = []
    for _ in range(FLAGS.num_episodes):
      returns.append(experiment.run_episode(env, agent))
    tf.logging.info(f"Task: {w_to_sweep}, mean returns over "
                    f"{FLAGS.num_episodes} episodes is {np.mean(returns)}")
    all_returns.append(returns)

  return all_returns, weights_to_sweep
Ejemplo n.º 5
0
def main(argv):
    del argv

    # Load the keyboard.
    keyboard = smart_module.SmartModuleImport(hub.Module(FLAGS.keyboard_path))

    # Create the task environment.
    base_env_config = configs.get_task_config()
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    additional_discount = 0.9
    env = environment_wrappers.EnvironmentWithKeyboardDirect(
        env=base_env,
        keyboard=keyboard,
        keyboard_ckpt_path=None,
        additional_discount=additional_discount,
        call_and_return=False)

    # Create the player agent.
    agent = regressed_agent.Agent(
        batch_size=10,
        optimizer_name="AdamOptimizer",
        # Disable training.
        optimizer_kwargs=dict(learning_rate=0.0, ),
        init_w=[float(x) for x in FLAGS.test_w])

    returns = []
    for _ in range(FLAGS.num_episodes):
        returns.append(experiment.run_episode(env, agent))
    tf.logging.info("#" * 80)
    tf.logging.info(
        f"Avg. return over {FLAGS.num_episodes} episodes is {np.mean(returns)}"
    )
    tf.logging.info("#" * 80)

    if FLAGS.output_path:
        with gfile.GFile(FLAGS.output_path, "w") as file:
            writer = csv.writer(file, delimiter=" ", quoting=csv.QUOTE_MINIMAL)
            writer.writerow(["return"])
            for val in returns:
                writer.writerow([val])
Ejemplo n.º 6
0
  def __init__(self, env, model_path):
    self._env = env

    create_ph = lambda x: tf.placeholder(shape=x.shape, dtype=x.dtype)
    add_batch = lambda x: tf.expand_dims(x, axis=0)

    # Make session and callables.
    with tf.Graph().as_default():
      model = smart_module.SmartModuleImport(hub.Module(model_path))

      obs_spec = env.observation_spec()
      obs_ph = tree.map_structure(create_ph, obs_spec)
      action_ph = tf.placeholder(shape=(), dtype=tf.int32)
      phis = model(tree.map_structure(add_batch, obs_ph), add_batch(action_ph))

      self.num_phis = phis.shape.as_list()[-1]
      self._last_phis = np.zeros((self.num_phis,), dtype=np.float32)

      session = tf.Session()
      self._session = session
      self._phis_fn = session.make_callable(
          phis[0], tree.flatten([obs_ph, action_ph]))
      self._session.run(tf.global_variables_initializer())