def testCreateHparams(self):
    # Get json_path
    pkg, _ = os.path.split(__file__)
    pkg, _ = os.path.split(pkg)
    json_path = os.path.join(
        pkg, "test_data", "transformer_test_ckpt", "hparams.json")

    # Create hparams
    hparams = trainer_lib.create_hparams("transformer_big", "hidden_size=1",
                                         hparams_path=json_path)
    self.assertEqual(2, hparams.num_hidden_layers)  # from json
    self.assertEqual(1, hparams.hidden_size)  # from hparams_overrides_str

    # Compare with base hparams
    base_hparams = trainer_lib.create_hparams("transformer_big")
    self.assertEqual(len(base_hparams.values()), len(hparams.values()))
  def test_get_vis_data_from_string(self):
    visualizer = visualization.AttentionVisualizer(
        hparams_set, model_name, self.data_dir, problem_name, beam_size=8)

    input_sentence = 'I have two dogs.'
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      _, inp_text, out_text, att_mats = (
          visualizer.get_vis_data_from_string(sess, input_sentence))

    self.assertAllEqual(
        [u'I_', u'have_', u'two_', u'dogs_', u'._', u'<EOS>'], inp_text)

    hparams = trainer_lib.create_hparams(
        hparams_set, data_dir=self.data_dir, problem_name=problem_name)

    enc_atts, dec_atts, encdec_atts = att_mats

    self.assertAllEqual(hparams.num_hidden_layers, len(enc_atts))

    enc_atts = enc_atts[0]
    dec_atts = dec_atts[0]
    encdec_atts = encdec_atts[0]

    batch_size = 1
    num_heads = hparams.num_heads
    inp_len = len(inp_text)
    out_len = len(out_text)

    self.assertAllEqual(
        (batch_size, num_heads, inp_len, inp_len), enc_atts.shape)
    self.assertAllEqual(
        (batch_size, num_heads, out_len, out_len), dec_atts.shape)
    self.assertAllEqual(
        (batch_size, num_heads, out_len, inp_len), encdec_atts.shape)
Example #3
0
  def testModel(self):
    # HParams
    hparams = trainer_lib.create_hparams(
        "transformer_tiny", data_dir=self.data_dir, problem_name="tiny_algo")

    # Dataset
    problem = hparams.problem
    dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir)
    dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
    features = dataset.make_one_shot_iterator().get_next()
    features = problem_lib.standardize_shapes(features)

    # Model
    model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN)
    logits, losses = model(features)

    self.assertTrue("training" in losses)
    loss = losses["training"]

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      logits_val, loss_val = sess.run([logits, loss])
      logits_shape = list(logits_val.shape)
      logits_shape[1] = None
      self.assertAllEqual(logits_shape, [10, None, 1, 1, 4])
      self.assertEqual(loss_val.shape, tuple())
  def test_no_crash_pendulum(self):
    hparams = trainer_lib.create_hparams(
        "ppo_continuous_action_base",
        TrainTest.test_config)

    hparams.add_hparam("environment_spec", simple_gym_spec("Pendulum-v0"))
    rl_trainer_lib.train(hparams)
Example #5
0
def main(_):
  problem_name = FLAGS.problem
  if "video" not in problem_name and "gym" not in problem_name:
    print("This tool only works for video problems.")
    return

  mode = tf.estimator.ModeKeys.TRAIN
  hparams = trainer_lib.create_hparams(
      FLAGS.hparams_set,
      FLAGS.hparams,
      data_dir=os.path.expanduser(FLAGS.data_dir),
      problem_name=problem_name)

  dataset = hparams.problem.input_fn(mode, hparams)
  features = dataset.make_one_shot_iterator().get_next()

  tf.gfile.MakeDirs(FLAGS.output_dir)
  base_template = os.path.join(FLAGS.output_dir, FLAGS.problem)
  count = 0
  with tf.train.MonitoredTrainingSession() as sess:
    while not sess.should_stop():
      # TODO(mbz): figure out what the second output is.
      data, _ = sess.run(features)
      video_batch = np.concatenate((data["inputs"], data["targets"]), axis=1)

      for video in video_batch:
        print("Saving {}/{}".format(count, FLAGS.num_samples))
        name = "%s_%05d" % (base_template, count)
        decoding.save_video(video, name + "_{:05d}.png")
        create_gif(name)
        count += 1

        if count == FLAGS.num_samples:
          sys.exit(0)
  def test_no_crash_cartpole(self):
    hparams = trainer_lib.create_hparams(
        "ppo_discrete_action_base",
        TrainTest.test_config)

    hparams.add_hparam("environment_spec",
                       standard_atari_env_spec("CartPole-v0"))
    rl_trainer_lib.train(hparams)
Example #7
0
def create_hparams():
  """Create hyper-parameters object."""
  return trainer_lib.create_hparams(
      FLAGS.hparams_set,
      FLAGS.hparams,
      data_dir=os.path.expanduser(FLAGS.data_dir),
      problem_name=FLAGS.problem,
      hparams_path=_get_hparams_path())
Example #8
0
 def __init__(self, config):
     self.translate_problem = problems.problem(config.PROBLEM)
     self.encoder = self.translate_problem.feature_encoders(
         config.VOCAB_DIR)
     self.hparams = trainer_lib.create_hparams(config.HPARAMS,
                                               data_dir=config.VOCAB_DIR,
                                               problem_name=config.PROBLEM)
     self.checkpoint_path = config.CHECKPOINT_PATH
     self.translate_model = registry.model(config.MODEL)(self.hparams,
                                                         Modes.PREDICT)
Example #9
0
def main(_):
    now = datetime.datetime.now()
    now_tag = now.strftime("%Y_%m_%d_%H_%M")
    loop_hparams = trainer_lib.create_hparams(FLAGS.loop_hparams_set,
                                              FLAGS.loop_hparams)
    if FLAGS.worker_to_game_map and FLAGS.total_num_workers > 1:
        loop_hparams.game = get_game_for_worker(FLAGS.worker_to_game_map,
                                                FLAGS.worker_id + 1)
        tf.logging.info("Set game to %s." % loop_hparams.game)
    loop_hparams.eval_rl_env_max_episode_steps = FLAGS.eval_step_limit
    loop_hparams.eval_batch_size = FLAGS.eval_batch_size
    planner_hparams = trainer_lib.create_hparams(FLAGS.planner_hparams_set,
                                                 FLAGS.planner_hparams)
    policy_dir = FLAGS.policy_dir
    model_dir = FLAGS.model_dir
    eval_metrics_dir = FLAGS.eval_metrics_dir
    if FLAGS.output_dir:
        cur_dir = FLAGS.output_dir
        if FLAGS.total_num_workers > 1:
            cur_dir = os.path.join(cur_dir, "%d" % (FLAGS.worker_id + 1))
        policy_dir = os.path.join(cur_dir, "policy")
        model_dir = os.path.join(cur_dir, "world_model")
        eval_dir_basename = "evaluator_"
        if FLAGS.agent == "planner":
            eval_dir_basename = "planner_"
        eval_metrics_dir = os.path.join(cur_dir, eval_dir_basename + now_tag)
        tf.logging.info("Writing metrics to %s." % eval_metrics_dir)
        if not tf.gfile.Exists(eval_metrics_dir):
            tf.gfile.MkDir(eval_metrics_dir)
    evaluate(
        loop_hparams,
        planner_hparams,
        policy_dir,
        model_dir,
        eval_metrics_dir,
        FLAGS.agent,
        FLAGS.mode,
        FLAGS.eval_with_learner,
        FLAGS.log_every_steps if FLAGS.log_every_steps > 0 else None,
        debug_video_path=FLAGS.debug_video_path,
        num_debug_videos=FLAGS.num_debug_videos,
        random_starts_step_limit=FLAGS.random_starts_step_limit,
    )
def get_problem_model_hparams(config):
    """Constructs problem, model, and hparams objects from a config."""
    hparams = trainer_lib.create_hparams(config.hparams_set,
                                         config.hparams,
                                         data_dir=os.path.expanduser(
                                             config.data_dir),
                                         problem_name=config.problem)
    problem = registry.problem(config.problem)
    model = registry.model(config.model)(hparams, tf.estimator.ModeKeys.EVAL)
    return (problem, model, hparams)
def create_hparams():
    hparams_path = None
    if FLAGS.output_dir:
        hparams_path = os.path.join(FLAGS.output_dir, "hparams.json")
    return trainer_lib.create_hparams(FLAGS.hparams_set,
                                      FLAGS.hparams,
                                      data_dir=os.path.expanduser(
                                          FLAGS.data_dir),
                                      problem_name=FLAGS.problem,
                                      hparams_path=hparams_path)
def train_agent(problem_name, agent_model_dir,
                event_dir, world_model_dir, epoch_data_dir, hparams, epoch=0,
                is_final_epoch=False):
  """Train the PPO agent in the simulated environment."""
  gym_problem = registry.problem(problem_name)
  ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params)
  ppo_params_names = ["epochs_num", "epoch_length",
                      "learning_rate", "num_agents",
                      "optimization_epochs"]

  for param_name in ppo_params_names:
    ppo_param_name = "ppo_"+ param_name
    if ppo_param_name in hparams:
      ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name))

  ppo_epochs_num = hparams.ppo_epochs_num
  if is_final_epoch:
    ppo_epochs_num *= 2
    ppo_hparams.epoch_length *= 2
  ppo_hparams.save_models_every_epochs = ppo_epochs_num
  ppo_hparams.world_model_dir = world_model_dir
  ppo_hparams.add_hparam("force_beginning_resets", True)

  # Adding model hparams for model specific adjustments
  model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
  ppo_hparams.add_hparam("model_hparams", model_hparams)

  environment_spec = copy.copy(gym_problem.environment_spec)
  environment_spec.simulation_random_starts = hparams.simulation_random_starts
  do_flip = hparams.simulation_flip_first_random_for_beginning
  environment_spec.simulation_flip_first_random_for_beginning = do_flip
  environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale

  ppo_hparams.add_hparam("environment_spec", environment_spec)

  with temporary_flags({
      "problem": problem_name,
      "model": hparams.generative_model,
      "hparams_set": hparams.generative_model_params,
      "output_dir": world_model_dir,
      "data_dir": epoch_data_dir,
  }):
    rl_trainer_lib.train(ppo_hparams, event_dir, agent_model_dir, epoch=epoch)
Example #13
0
def create_hparams():
    """Create hparams."""
    if FLAGS.use_tpu and "tpu" not in FLAGS.hparams_set:
        tf.logging.warn("Not all hyperparameter sets work on TPU. "
                        "Prefer hparams_sets with a '_tpu' suffix, "
                        "e.g. transformer_tpu, if available for your model.")
    hparams_path = os.path.join(FLAGS.output_dir, "hparams.json")
    return trainer_lib.create_hparams(FLAGS.hparams_set,
                                      FLAGS.hparams,
                                      hparams_path=hparams_path)
Example #14
0
def train_agent_real_env(problem_name,
                         agent_model_dir,
                         event_dir,
                         world_model_dir,
                         epoch_data_dir,
                         hparams,
                         epoch=0,
                         is_final_epoch=False):
    """Train the PPO agent in the real environment."""
    global dumper_path, ppo_data_dumper_counter

    gym_problem = registry.problem(problem_name)
    ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params)
    ppo_params_names = [
        "epochs_num", "epoch_length", "learning_rate", "num_agents",
        "eval_every_epochs", "optimization_epochs", "effective_num_agents"
    ]

    # This should be overridden.
    ppo_hparams.add_hparam("effective_num_agents", None)
    for param_name in ppo_params_names:
        ppo_param_name = "real_ppo_" + param_name
        if ppo_param_name in hparams:
            ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name))

    ppo_hparams.epochs_num = _ppo_training_epochs(hparams, epoch,
                                                  is_final_epoch, True)
    # We do not save model, as that resets frames that we need at restarts.
    # But we need to save at the last step, so we set it very high.
    ppo_hparams.save_models_every_epochs = 1000000

    environment_spec = copy.copy(gym_problem.environment_spec)

    if hparams.gather_ppo_real_env_data:
        # TODO(piotrmilos):This should be refactored
        assert hparams.real_ppo_num_agents == 1, (
            "It is required to use collect with pyfunc_wrapper")

        ppo_data_dumper_counter = 0
        dumper_path = os.path.join(epoch_data_dir, "dumper")
        tf.gfile.MakeDirs(dumper_path)
        dumper_spec = [PyFuncWrapper, {"process_fun": ppo_data_dumper}]
        environment_spec.wrappers.insert(2, dumper_spec)

    ppo_hparams.add_hparam("environment_spec", environment_spec)

    with temporary_flags({
            "problem": problem_name,
            "output_dir": world_model_dir,
            "data_dir": epoch_data_dir,
    }):
        rl_trainer_lib.train(ppo_hparams,
                             event_dir + "real",
                             agent_model_dir,
                             name_scope="ppo_real%d" % (epoch + 1))
Example #15
0
def main(_):
  decode_hp = decode_hparams(FLAGS.decode_hparams)
  trainer_lib.set_random_seed(FLAGS.random_seed)
  if FLAGS.output_dir is None:
    raise ValueError("Expected output_dir to be set to a valid path.")

  hparams = trainer_lib.create_hparams(
      FLAGS.hparams_set, FLAGS.hparams, data_dir=FLAGS.data_dir,
      problem_name=FLAGS.problem)
  if hparams.batch_size != 1:
    raise ValueError("Set batch-size to be equal to 1")

  # prepare dataset using Predict mode.
  dataset_split = "test" if FLAGS.eval_use_test_set else None
  dataset = hparams.problem.dataset(
      tf.estimator.ModeKeys.PREDICT, shuffle_files=False, hparams=hparams,
      data_dir=FLAGS.data_dir, dataset_split=dataset_split)
  dataset = dataset.batch(hparams.batch_size)
  dataset = dataset.make_one_shot_iterator().get_next()

  # Obtain frame interpolations.
  ops = [glow_ops.get_variable_ddi, glow_ops.actnorm, glow_ops.get_dropout]
  var_scope = tf.variable_scope("next_frame_glow/body", reuse=tf.AUTO_REUSE)
  with arg_scope(ops, init=False), var_scope:
    interpolations, first_frame, last_frame = interpolate(
        dataset, hparams, decode_hp)

  var_list = tf.global_variables()
  saver = tf.train.Saver(var_list)

  # Get latest checkpoints from model_dir.
  ckpt_path = tf.train.latest_checkpoint(FLAGS.output_dir)
  final_dir = get_summaries_log_dir(decode_hp, FLAGS.output_dir, dataset_split)
  summary_writer = tf.summary.FileWriter(final_dir)
  global_step = decoding.latest_checkpoint_step(FLAGS.output_dir)

  sample_ind = 0
  num_samples = decode_hp.num_samples
  all_summaries = []

  with tf.train.MonitoredTrainingSession() as sess:
    saver.restore(sess, ckpt_path)

    while not sess.should_stop() and sample_ind < num_samples:
      interp_np, first_frame_np, last_frame_np = sess.run(
          [interpolations, first_frame, last_frame])

      interp_summ = interpolations_to_summary(sample_ind, interp_np,
                                              first_frame_np[0],
                                              last_frame_np[0],
                                              hparams, decode_hp)
      all_summaries.extend(interp_summ)
      sample_ind += 1
    all_summaries = tf.Summary(value=list(all_summaries))
    summary_writer.add_summary(all_summaries, global_step)
def train_agent(real_env,
                agent_model_dir,
                event_dir,
                world_model_dir,
                data_dir,
                hparams,
                completed_epochs_num,
                epoch=0,
                is_final_epoch=False):
    """Train the PPO agent in the simulated environment."""
    del data_dir

    frame_stack_size = hparams.frame_stack_size
    initial_frame_rollouts = real_env.current_epoch_rollouts(
        split=tf.contrib.learn.ModeKeys.TRAIN,
        minimal_rollout_frames=frame_stack_size,
    )

    # TODO(koz4k): Move this to a different module.
    def initial_frame_chooser(batch_size):
        """Frame chooser."""

        deterministic_initial_frames =\
            initial_frame_rollouts[0][:frame_stack_size]
        if not hparams.simulation_random_starts:
            # Deterministic starts: repeat first frames from the first rollout.
            initial_frames = [deterministic_initial_frames] * batch_size
        else:
            # Random starts: choose random initial frames from random rollouts.
            initial_frames = random_rollout_subsequences(
                initial_frame_rollouts, batch_size, frame_stack_size)
            if hparams.simulation_flip_first_random_for_beginning:
                # Flip first entry in the batch for deterministic initial frames.
                initial_frames[0] = deterministic_initial_frames

        return np.stack(
            [[frame.observation.decode() for frame in initial_frame_stack]
             for initial_frame_stack in initial_frames])

    env_fn = make_simulated_env_fn(real_env, hparams, hparams.ppo_num_agents,
                                   initial_frame_chooser, world_model_dir)
    base_algo_str = hparams.base_algo
    train_hparams = trainer_lib.create_hparams(hparams.base_algo_params)

    _update_hparams_from_hparams(train_hparams, hparams, base_algo_str + "_")
    completed_epochs_num += sim_ppo_epoch_increment(hparams, is_final_epoch)
    learner = LEARNERS[base_algo_str](frame_stack_size, event_dir,
                                      agent_model_dir)
    learner.train(env_fn,
                  train_hparams,
                  completed_epochs_num,
                  simulated=True,
                  epoch=epoch)

    return completed_epochs_num
Example #17
0
  def __init__(self, environment_spec, length):
    """Batch of environments inside the TensorFlow graph."""

    observ_space = utils.get_observation_space(environment_spec)
    initial_frames_problem = environment_spec.initial_frames_problem
    observ_shape = (initial_frames_problem.frame_height,
                    initial_frames_problem.frame_width,
                    initial_frames_problem.num_channels)
    observ_space.shape = observ_shape
    action_space = utils.get_action_space(environment_spec)
    super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

    self.length = length
    self._min_reward = initial_frames_problem.min_reward
    self._num_frames = environment_spec.video_num_input_frames
    self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

    model_hparams = trainer_lib.create_hparams(
        FLAGS.hparams_set, problem_name=FLAGS.problem)
    model_hparams.force_full_predict = True
    self._model = registry.model(FLAGS.model)(
        model_hparams, tf.estimator.ModeKeys.PREDICT)

    hparams = HParams(video_num_input_frames=
                      environment_spec.video_num_input_frames,
                      video_num_target_frames=
                      environment_spec.video_num_target_frames,
                      environment_spec=environment_spec)

    # TODO(piotrmilos): check if this shouldn't be tf.estimator.ModeKeys.Predict
    initial_frames_dataset = initial_frames_problem.dataset(
        tf.estimator.ModeKeys.TRAIN, FLAGS.data_dir, shuffle_files=False,
        hparams=hparams).take(1)
    start_frame = None
    if environment_spec.simulation_random_starts:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=True,
                                               hparams=hparams)
      dataset = dataset.shuffle(buffer_size=1000)
      if environment_spec.simulation_flip_first_random_for_beginning:
        # Later flip the first random frame in PPO batch for the true beginning.
        start = initial_frames_dataset.make_one_shot_iterator().get_next()
        start_frame = tf.expand_dims(start["inputs"], axis=0)
    else:
      dataset = initial_frames_dataset

    dataset = dataset.map(lambda x: x["inputs"]).repeat()
    self.history_buffer = HistoryBuffer(
        dataset, self.length, self.observ_dtype, start_frame=start_frame)

    self._observ = tf.Variable(
        tf.zeros((len(self),) + observ_shape, self.observ_dtype),
        trainable=False)
Example #18
0
def make_simulated_env_fn_from_hparams(
    real_env, hparams, batch_size, initial_frame_chooser, model_dir,
    sim_video_dir=None):
  """Creates a simulated env_fn."""
  model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
  if hparams.wm_policy_param_sharing:
    model_hparams.optimizer_zero_grads = True
  return make_simulated_env_fn(
      reward_range=real_env.reward_range,
      observation_space=real_env.observation_space,
      action_space=real_env.action_space,
      frame_stack_size=hparams.frame_stack_size,
      frame_height=real_env.frame_height, frame_width=real_env.frame_width,
      initial_frame_chooser=initial_frame_chooser, batch_size=batch_size,
      model_name=hparams.generative_model,
      model_hparams=trainer_lib.create_hparams(hparams.generative_model_params),
      model_dir=model_dir,
      intrinsic_reward_scale=hparams.intrinsic_reward_scale,
      sim_video_dir=sim_video_dir,
  )
def make_simulated_env_spec(real_env, hparams):
    """Creates a simulated environment_spec."""
    return rl.standard_atari_env_simulated_spec(
        real_env,
        intrinsic_reward_scale=hparams.intrinsic_reward_scale,
        model_name=hparams.generative_model,
        model_hparams=trainer_lib.create_hparams(
            hparams.generative_model_params),
        # Hardcoded for now. TODO(koz4k): Make it a hparam.
        video_num_input_frames=4,
        video_num_target_frames=1)
Example #20
0
    def __init__(self, model_dir, config):
        self._signatures = dict()

        self._graph = tf.Graph()
        with self._graph.as_default():
            tf.set_random_seed(1234)

            # initialize the hparams, problem and model
            self._hparams = trainer_lib.create_hparams(
                config['hparams_set'], config.get('hparams_overrides', ''),
                os.path.join(model_dir, 'assets.extra'), config['problem'])
            problem = self._hparams.problem

            decode_hp = decoding.decode_hparams(
                config.get('decode_hparams', ''))

            run_config = trainer_lib.create_run_config(self._hparams,
                                                       model_dir=model_dir,
                                                       schedule="decode")

            model_fn = t2t_model.T2TModel.make_estimator_model_fn(
                config['model'], self._hparams, decode_hparams=decode_hp)

            # create the orediction signatures (input/output ops)
            serving_receiver = problem.direct_serving_input_fn(self._hparams)
            estimator_spec = model_fn(serving_receiver.features,
                                      None,
                                      mode=tf.estimator.ModeKeys.PREDICT,
                                      params=None,
                                      config=run_config)

            for key, sig_spec in estimator_spec.export_outputs.items():
                # only PredictOutputs are supported, ClassificationOutput
                # and RegressionOutputs are weird artifacts of Google shipping
                # almost unmodified Tensorflow graphs through their Cloud ML
                # platform
                assert isinstance(sig_spec, tf.estimator.export.PredictOutput)

                sig = Signature(key, serving_receiver.receiver_tensors,
                                sig_spec.outputs)
                self._signatures[key] = sig

            # load the model & init the session

            scaffold = tf.train.Scaffold()
            checkpoint_filename = os.path.join(
                model_dir, tf.saved_model.constants.VARIABLES_DIRECTORY,
                tf.saved_model.constants.VARIABLES_FILENAME)
            session_creator = tf.train.ChiefSessionCreator(
                scaffold,
                config=run_config.session_config,
                checkpoint_filename_with_path=checkpoint_filename)
            self._session = tf.train.MonitoredSession(
                session_creator=session_creator)
Example #21
0
  def testSparseTransformer(self):
    """Test sparse transformer decode."""
    with self.cached_session() as sess:
      with tf.variable_scope("sparse_transformer", reuse=tf.AUTO_REUSE):
        hparams_set = "sparse_transformer_local"
        problem = ""
        hparams = trainer_lib.create_hparams(hparams_set, problem_name=problem)
        hparams.layer_prepostprocess_dropout = 0.
        hparams.dropout = 0.
        hparams.num_encoder_layers = 0
        hparams.num_decoder_layers = 2
        hparams.local_relative = False
        hparams.query_shape = (20,)
        hparams.memory_flange = (0,)
        hparams.max_length = 200
        sparse_transformer = sptf.SparseTransformer(hparams)
        sparse_transformer.set_mode(tf.estimator.ModeKeys.PREDICT)
        sparse_transformer.vocab_size = 50
        features = {}
        decode_step = 10
        cache = {}
        # Testing that changing target tokens beyond decode_step has no effect
        # i = 0 or less should have the next cell sum == 0
        i = -5
        targets_prefix = tf.random.stateless_uniform(
            [1, decode_step - i],
            minval=0,
            maxval=sparse_transformer.vocab_size,
            dtype=tf.dtypes.int32,
            seed=(75, 48))
        zeros = tf.zeros([1, hparams.max_length - decode_step + i],
                         dtype=tf.int32)
        features["targets"] = tf.concat([targets_prefix, zeros],
                                        axis=-1)
        output_step1 = sparse_transformer.body(features,
                                               decode_step=decode_step,
                                               cache=cache)
        features["targets"] = tf.concat([
            targets_prefix, tf.random.stateless_uniform(
                [1, hparams.max_length - decode_step + i],
                minval=0,
                maxval=sparse_transformer.vocab_size,
                dtype=tf.dtypes.int32,
                seed=(67, 89))], axis=-1)
        output_step2 = sparse_transformer.body(features,
                                               decode_step=decode_step,
                                               cache=cache)
        initializer = tf.global_variables_initializer()
        if initializer is not None:
          initializer.run()

        output1_np = sess.run(output_step1)
        output2_np = sess.run(output_step2)
        self.assertEqual(output1_np.shape, output2_np.shape)
Example #22
0
  def testCompatibility(self):
    model = "transformer"
    hp_set = "transformer_test"
    problem_name = "translate_ende_wmt8k"

    hp = trainer_lib.create_hparams(
        hp_set, data_dir=_DATA_DIR, problem_name=problem_name)
    run_config = trainer_lib.create_run_config(model_dir=_CKPT_DIR)
    estimator = trainer_lib.create_estimator(model, hp, run_config)

    for prediction in estimator.predict(self.input_fn):
      self.assertEqual(prediction["outputs"].dtype, np.int32)
Example #23
0
def encode_env_frames(problem_name, ae_problem_name, ae_hparams_set,
                      autoencoder_path, epoch_data_dir):
    """Encode all frames from problem_name and write out as ae_problem_name."""
    with tf.Graph().as_default():
        ae_hparams = trainer_lib.create_hparams(ae_hparams_set,
                                                problem_name=problem_name)
        problem = ae_hparams.problem
        model = registry.model("autoencoder_ordered_discrete")(
            ae_hparams, tf.estimator.ModeKeys.EVAL)

        ae_problem = registry.problem(ae_problem_name)
        ae_training_paths = ae_problem.training_filepaths(
            epoch_data_dir, 10, True)
        ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True)

        skip_train = False
        skip_eval = False
        for path in ae_training_paths:
            if tf.gfile.Exists(path):
                skip_train = True
                break
        for path in ae_eval_paths:
            if tf.gfile.Exists(path):
                skip_eval = True
                break

        # Encode train data
        if not skip_train:
            dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                      epoch_data_dir,
                                      shuffle_files=False,
                                      output_buffer_size=100,
                                      preprocess=False)
            encode_dataset(model,
                           dataset,
                           problem=problem,
                           ae_hparams=ae_hparams,
                           autoencoder_path=autoencoder_path,
                           out_files=ae_training_paths)

        # Encode eval data
        if not skip_eval:
            dataset = problem.dataset(tf.estimator.ModeKeys.EVAL,
                                      epoch_data_dir,
                                      shuffle_files=False,
                                      output_buffer_size=100,
                                      preprocess=False)
            encode_dataset(model,
                           dataset,
                           problem=problem,
                           ae_hparams=ae_hparams,
                           autoencoder_path=autoencoder_path,
                           out_files=ae_eval_paths)
def hparams_set_up(problem_name,
                   data_dir,
                   hparam_set=None,
                   hparams_override=None):
    if hparam_set:
        hparams = trainer_lib.create_hparams(
            hparam_set, hparams_overrides_str=hparams_override)
    else:
        hparams = common_hparams.basic_params1()
    hparams.data_dir = data_dir
    hparams_lib.add_problem_hparams(hparams, problem_name)
    return hparams, hparams.problem
Example #25
0
def build_model(hparams_set,
                hparamss,
                model_name,
                data_dir,
                problem_name,
                beam_size=1):
    """Build the graph required to fetch the attention weights.

  Args:
    hparams_set: HParams set to build the model with.
    hparamss:
    model_name: Name of model.
    data_dir: Path to directory containing training data.
    problem_name: Name of problem.
    beam_size: (Optional) Number of beams to use when decoding a translation.
        If set to 1 (default) then greedy decoding is used.

  Returns:
    Tuple of (
        inputs: Input placeholder to feed in ids to be translated.
        targets: Targets placeholder to feed to translation when fetching
            attention weights.
        samples: Tensor representing the ids of the translation.
        att_mats: Tensors representing the attention weights.
    )
  """
    hparams = trainer_lib.create_hparams(hparams_set,
                                         hparamss,
                                         data_dir=data_dir,
                                         problem_name=problem_name)
    translate_model = registry.model(model_name)(hparams,
                                                 tf.estimator.ModeKeys.EVAL)

    inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs')
    targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets')
    translate_model({
        'inputs': inputs,
        'targets': targets,
    })

    # Must be called after building the training graph, so that the dict will
    # have been filled with the attention tensors. BUT before creating the
    # inference graph otherwise the dict will be filled with tensors from
    # inside a tf.while_loop from decoding and are marked unfetchable.
    att_mats = get_att_mats(translate_model)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
        samples = translate_model.infer({
            'inputs': inputs,
        },
                                        beam_size=beam_size)['outputs']

    return inputs, targets, samples, att_mats
    def __init__(self, len, observ_shape, observ_dtype, action_shape,
                 action_dtype):
        """Batch of environments inside the TensorFlow graph.

    Args:
      batch_env: Batch environment.
    """

        self.length = len

        hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                             problem_name=FLAGS.problems,
                                             data_dir="UNUSED")
        hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            hparams, tf.estimator.ModeKeys.PREDICT)

        self.action_shape = action_shape
        self.action_dtype = action_dtype

        with open(
                pkg_resources.resource_filename("tensor2tensor.rl.envs",
                                                "frame1.png"), "rb") as f:
            png_frame_1_raw = f.read()

        with open(
                pkg_resources.resource_filename("tensor2tensor.rl.envs",
                                                "frame2.png"), "rb") as f:
            png_frame_2_raw = f.read()

        self.frame_1 = tf.expand_dims(
            tf.cast(tf.image.decode_png(png_frame_1_raw), tf.float32), 0)
        self.frame_2 = tf.expand_dims(
            tf.cast(tf.image.decode_png(png_frame_2_raw), tf.float32), 0)

        shape = (self.length, ) + observ_shape
        self._observ = tf.Variable(tf.zeros(shape, observ_dtype),
                                   trainable=False)
        self._prev_observ = tf.Variable(tf.zeros(shape, observ_dtype),
                                        trainable=False)
        self._starting_observ = tf.Variable(tf.zeros(shape, observ_dtype),
                                            trainable=False)

        observ_dtype = tf.int64
        self._observ_not_sure_why_we_need_this = tf.Variable(tf.zeros(
            (self.length, ) + observ_shape, observ_dtype),
                                                             name='observ_new',
                                                             trainable=False)

        self._reward_not_sure_why_we_need_this = tf.Variable(tf.zeros(
            (self.length, 1), observ_dtype),
                                                             name='reward_new',
                                                             trainable=False)
  def testCompatibility(self):
    model = "transformer"
    hp_set = "transformer_test"
    problem_name = "translate_ende_wmt8k"

    hp = trainer_lib.create_hparams(
        hp_set, data_dir=_DATA_DIR, problem_name=problem_name)
    run_config = trainer_lib.create_run_config(model, model_dir=_CKPT_DIR)
    estimator = trainer_lib.create_estimator(model, hp, run_config)

    for prediction in estimator.predict(self.input_fn):
      self.assertEqual(prediction["outputs"].dtype, np.int32)
Example #28
0
def example_apply_model(ckpt_path,
                        hparams_set="img2img_transformer2d_tiny",
                        problem_name="img2img_allen_brain_dim8to32",
                        model_name="img2img_transformer",
                        data_dir="/mnt/nfs-east1-d/data",
                        input_dim=8,
                        output_dim=32):

    # HACK: Avoid re-instantiating the model which causes problems...
    # TODO: Better way to handle this, e.g. delete from globals.
    if 'model' not in globals():

        hp = trainer_lib.create_hparams(hparams_set,
                                        data_dir=data_dir,
                                        problem_name=problem_name)

        model = registry.model(model_name)(hp, Modes.TRAIN)

    problem_object = problems.problem(problem_name)

    dataset = problem_object.dataset(Modes.TRAIN, data_dir)

    with tfe.restore_variables_on_create(ckpt_path):
        for count, example in enumerate(tfe.Iterator(dataset)):
            if count > 1234:
                break

        # Example input
        fig = plt.figure(figsize=(8, 8))
        example["inputs"] = tf.reshape(example["inputs"],
                                       [1, input_dim, input_dim, 3])
        fig.add_subplot(1, 3, 1)
        plt.imshow(example["inputs"].numpy()[0])

        # Example target
        fig.add_subplot(1, 3, 2)
        example["targets"] = tf.reshape(example["targets"],
                                        [1, output_dim, output_dim, 3])
        plt.imshow(example["targets"].numpy()[0])

        # Dummy target (expected by model)
        example["targets"] = tf.reshape(
            tf.zeros((1, output_dim, output_dim, 3), dtype=np.uint8),
            [1, output_dim, output_dim, 3])

        # Produce and display prediction
        predictions, _ = model(example)
        fig.add_subplot(1, 3, 3)
        inferred = demo.infer(predictions)
        plt.imshow(inferred)
        plt.show()

    return example, predictions, inferred
def train_agent(problem_name, agent_model_dir,
                event_dir, world_model_dir, epoch_data_dir, hparams,
                autoencoder_path=None, epoch=0):
  """Train the PPO agent in the simulated environment."""
  gym_problem = registry.problem(problem_name)
  ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params)
  ppo_params_names = ["epochs_num", "epoch_length",
                      "learning_rate", "num_agents",
                      "optimization_epochs"]

  for param_name in ppo_params_names:
    ppo_param_name = "ppo_"+ param_name
    if ppo_param_name in hparams:
      ppo_hparams.set_hparam(param_name, hparams.get(ppo_param_name))

  ppo_epochs_num = hparams.ppo_epochs_num
  ppo_hparams.save_models_every_epochs = ppo_epochs_num
  ppo_hparams.world_model_dir = world_model_dir
  ppo_hparams.add_hparam("force_beginning_resets", True)

  # Adding model hparams for model specific adjustments
  model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
  ppo_hparams.add_hparam("model_hparams", model_hparams)

  environment_spec = copy.copy(gym_problem.environment_spec)
  environment_spec.simulation_random_starts = hparams.simulation_random_starts
  environment_spec.intrinsic_reward_scale = hparams.intrinsic_reward_scale

  ppo_hparams.add_hparam("environment_spec", environment_spec)

  with temporary_flags({
      "problem": problem_name,
      "model": hparams.generative_model,
      "hparams_set": hparams.generative_model_params,
      "output_dir": world_model_dir,
      "data_dir": epoch_data_dir,
      "autoencoder_path": autoencoder_path,
  }):
    rl_trainer_lib.train(ppo_hparams, event_dir, agent_model_dir, epoch=epoch)
def train(hparams, output_dir, report_fn=None):
  hparams = initialize_env_specs(hparams)
  learner = LEARNERS[hparams.base_algo](
      hparams.frame_stack_size, FLAGS.output_dir, output_dir
  )
  policy_hparams = trainer_lib.create_hparams(hparams.base_algo_params)
  update_hparams_from_hparams(
      policy_hparams, hparams, hparams.base_algo + "_"
  )
  learner.train(
      hparams.env_fn, policy_hparams, simulated=False, save_continuously=True,
      epoch=0, eval_env_fn=hparams.eval_env_fn, report_fn=report_fn
  )
Example #31
0
def train_agent(real_env, learner, world_model_dir, hparams, epoch):
    """Train the PPO agent in the simulated environment."""
    frame_stack_size = hparams.frame_stack_size
    initial_frame_rollouts = real_env.current_epoch_rollouts(
        split=tf.contrib.learn.ModeKeys.TRAIN,
        minimal_rollout_frames=frame_stack_size,
    )

    # TODO(koz4k): Move this to a different module.
    def initial_frame_chooser(batch_size):
        """Frame chooser."""

        deterministic_initial_frames =\
            initial_frame_rollouts[0][:frame_stack_size]
        if not hparams.simulation_random_starts:
            # Deterministic starts: repeat first frames from the first rollout.
            initial_frames = [deterministic_initial_frames] * batch_size
        else:
            # Random starts: choose random initial frames from random rollouts.
            initial_frames = random_rollout_subsequences(
                initial_frame_rollouts, batch_size, frame_stack_size)
            if hparams.simulation_flip_first_random_for_beginning:
                # Flip first entry in the batch for deterministic initial frames.
                initial_frames[0] = deterministic_initial_frames

        return np.stack(
            [[frame.observation.decode() for frame in initial_frame_stack]
             for initial_frame_stack in initial_frames])

    env_fn = make_simulated_env_fn(
        real_env, hparams, hparams.simulated_batch_size, initial_frame_chooser,
        world_model_dir,
        os.path.join(learner.agent_model_dir, "sim_videos_{}".format(epoch)))
    base_algo_str = hparams.base_algo
    train_hparams = trainer_lib.create_hparams(hparams.base_algo_params)
    if hparams.wm_policy_param_sharing:
        train_hparams.optimizer_zero_grads = True

    rl_utils.update_hparams_from_hparams(train_hparams, hparams,
                                         base_algo_str + "_")

    final_epoch = hparams.epochs - 1
    is_special_epoch = (epoch + 3) == final_epoch or (epoch + 7) == final_epoch
    is_final_epoch = epoch == final_epoch
    env_step_multiplier = 3 if is_final_epoch else 2 if is_special_epoch else 1
    learner.train(env_fn,
                  train_hparams,
                  simulated=True,
                  save_continuously=True,
                  epoch=epoch,
                  env_step_multiplier=env_step_multiplier)
Example #32
0
def main(_):
    data_dir = os.path.expanduser(FLAGS.data_dir)
    ckpt_dir = FLAGS.ckpt_dir
    percent = float(FLAGS.threshold_percentile) / 100
    new_ckpt = os.path.join(ckpt_dir, 'pruned/pruned_{}'.format(percent))

    hparams = trainer_lib.create_hparams(hparams_set=FLAGS.hparams_set,
                                         data_dir=data_dir,
                                         problem_name=FLAGS.problem_name)

    convert_lib.prune_checkpoint(hparams,
                                 ckpt_dir=ckpt_dir,
                                 threshold_percentile=percent,
                                 new_ckpt=new_ckpt)
def train(hparams, output_dir, report_fn=None):
    hparams = initialize_env_specs(hparams)
    learner = LEARNERS[hparams.base_algo](hparams.frame_stack_size,
                                          FLAGS.output_dir, output_dir)
    policy_hparams = trainer_lib.create_hparams(hparams.base_algo_params)
    update_hparams_from_hparams(policy_hparams, hparams,
                                hparams.base_algo + "_")
    learner.train(hparams.env_fn,
                  policy_hparams,
                  simulated=False,
                  save_continuously=True,
                  epoch=0,
                  eval_env_fn=hparams.eval_env_fn,
                  report_fn=report_fn)
Example #34
0
def make_simulated_env_fn(
    real_env, hparams, batch_size, initial_frame_chooser, model_dir):
  """Creates a simulated env_fn."""
  return rl.make_simulated_env_fn(
      reward_range=real_env.reward_range,
      observation_space=real_env.observation_space,
      action_space=real_env.action_space,
      frame_stack_size=hparams.frame_stack_size,
      initial_frame_chooser=initial_frame_chooser, batch_size=batch_size,
      model_name=hparams.generative_model,
      model_hparams=trainer_lib.create_hparams(hparams.generative_model_params),
      model_dir=model_dir,
      intrinsic_reward_scale=hparams.intrinsic_reward_scale,
  )
Example #35
0
    def __init__(self, environment_spec, length):
        """Batch of environments inside the TensorFlow graph."""

        observ_space = utils.get_observation_space(environment_spec)
        initial_frames_problem = environment_spec.initial_frames_problem
        observ_shape = (initial_frames_problem.frame_height,
                        initial_frames_problem.frame_width,
                        initial_frames_problem.num_channels)
        observ_space.shape = observ_shape
        action_space = utils.get_action_space(environment_spec)
        super(SimulatedBatchEnv, self).__init__(observ_space, action_space)

        self.length = length
        self._min_reward = initial_frames_problem.min_reward
        self._num_frames = environment_spec.video_num_input_frames
        self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

        model_hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                                   problem_name=FLAGS.problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            model_hparams, tf.estimator.ModeKeys.PREDICT)

        hparams = HParams(
            video_num_input_frames=environment_spec.video_num_input_frames,
            video_num_target_frames=environment_spec.video_num_target_frames,
            environment_spec=environment_spec)

        if environment_spec.simulation_random_starts:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=True,
                hparams=hparams)
            dataset = dataset.shuffle(buffer_size=1000)
        else:
            dataset = initial_frames_problem.dataset(
                tf.estimator.ModeKeys.TRAIN,
                FLAGS.data_dir,
                shuffle_files=False,
                hparams=hparams).take(1)

        dataset = dataset.map(lambda x: x["inputs"]).repeat()
        self.history_buffer = HistoryBuffer(dataset, self.length,
                                            self.observ_dtype)

        self._observ = tf.Variable(tf.zeros((len(self), ) + observ_shape,
                                            self.observ_dtype),
                                   trainable=False)
Example #36
0
    def __init__(self, environment_lambda, length, problem):
        """Batch of environments inside the TensorFlow graph."""
        self.length = length
        self._num_frames = problem.num_input_frames

        # TODO(piotrmilos): For the moment we are fine with that.
        assert self.length == 1, "Currently SimulatedBatchEnv support only one env"
        initialization_env = environment_lambda()
        hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                             problem_name=FLAGS.problem)
        hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            hparams, tf.estimator.ModeKeys.PREDICT)

        self.action_space = initialization_env.action_space
        self.action_shape = list(initialization_env.action_space.shape)
        self.action_dtype = tf.int32

        obs = []
        if hasattr(initialization_env.env, "get_starting_data"):
            obs, _, _ = initialization_env.env.get_starting_data()
        else:
            # TODO(piotrmilos): Ancient method for environments not supporting
            # get_starting_data. This is probably not compatibile with
            # self._num_frames != 2 and should be removed at some point.
            num_frames = self._num_frames
            initialization_env.reset()
            skip_frames = 20
            for _ in range(skip_frames):
                initialization_env.step(0)
            for _ in range(num_frames):
                obs.append(initialization_env.step(0)[0])

        initial_frames = tf.stack(obs)
        initial_frames = tf.cast(initial_frames, tf.float32)

        self.history_buffer = HistoryBuffer(initial_frames, problem=problem)

        height, width, channels = initialization_env.observation_space.shape
        # TODO(lukaszkaiser): remove this and just use Problem.frame_height.
        if FLAGS.autoencoder_path:
            height = problem.frame_height
            width = problem.frame_width
        shape = (self.length, height, width, channels)
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            self._observ = tf.get_variable("observ",
                                           shape,
                                           initializer=tf.zeros_initializer,
                                           trainable=False)
def make_simulated_env_fn(
    real_env, hparams, batch_size, initial_frame_chooser, model_dir):
  """Creates a simulated env_fn."""
  return rl.make_simulated_env_fn(
      reward_range=real_env.reward_range,
      observation_space=real_env.observation_space,
      action_space=real_env.action_space,
      frame_stack_size=hparams.frame_stack_size,
      frame_height=real_env.frame_height, frame_width=real_env.frame_width,
      initial_frame_chooser=initial_frame_chooser, batch_size=batch_size,
      model_name=hparams.generative_model,
      model_hparams=trainer_lib.create_hparams(hparams.generative_model_params),
      model_dir=model_dir,
      intrinsic_reward_scale=hparams.intrinsic_reward_scale,
  )
Example #38
0
 def testBasicFcRelu(self):
   x = np.random.random_integers(0, high=255, size=(1, 28, 28, 1))
   y = np.random.random_integers(0, high=9, size=(1, 1))
   hparams = trainer_lib.create_hparams(
       "basic_fc_small", problem_name="image_mnist", data_dir=".")
   with self.test_session() as session:
     features = {
         "inputs": tf.constant(x, dtype=tf.int32),
         "targets": tf.constant(y, dtype=tf.int32),
     }
     model = basic.BasicFcRelu(hparams, tf.estimator.ModeKeys.TRAIN)
     logits, _ = model(features)
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   self.assertEqual(res.shape, (1, 1, 1, 1, 10))
Example #39
0
def train_agent(problem_name,
                agent_model_dir,
                event_dir,
                world_model_dir,
                epoch_data_dir,
                hparams,
                autoencoder_path=None,
                epoch=0):
    """Train the PPO agent in the simulated environment."""
    gym_problem = registry.problem(problem_name)
    ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params)
    ppo_epochs_num = hparams.ppo_epochs_num
    ppo_hparams.epochs_num = ppo_epochs_num
    ppo_hparams.simulated_environment = True
    ppo_hparams.simulation_random_starts = hparams.simulation_random_starts
    ppo_hparams.intrinsic_reward_scale = hparams.intrinsic_reward_scale
    ppo_hparams.eval_every_epochs = 50
    ppo_hparams.save_models_every_epochs = ppo_epochs_num
    ppo_hparams.epoch_length = hparams.ppo_epoch_length
    ppo_hparams.num_agents = hparams.ppo_num_agents
    ppo_hparams.problem = gym_problem
    ppo_hparams.world_model_dir = world_model_dir
    if hparams.ppo_learning_rate:
        ppo_hparams.learning_rate = hparams.ppo_learning_rate
    # 4x for the StackAndSkipWrapper minus one to always finish for reporting.
    ppo_time_limit = (ppo_hparams.epoch_length - 1) * 4

    in_graph_wrappers = [(TimeLimitWrapper, {
        "timelimit": ppo_time_limit
    }), (StackAndSkipWrapper, {
        "skip": 4
    })]
    in_graph_wrappers += gym_problem.in_graph_wrappers
    ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers)

    with temporary_flags({
            "problem": problem_name,
            "model": hparams.generative_model,
            "hparams_set": hparams.generative_model_params,
            "output_dir": world_model_dir,
            "data_dir": epoch_data_dir,
            "autoencoder_path": autoencoder_path,
    }):
        rl_trainer_lib.train(ppo_hparams,
                             gym_problem.env_name,
                             event_dir,
                             agent_model_dir,
                             epoch=epoch)
Example #40
0
 def testBasicFcRelu(self):
     x = np.random.randint(256, size=(1, 28, 28, 1))
     y = np.random.randint(10, size=(1, 1))
     hparams = trainer_lib.create_hparams("basic_fc_small",
                                          problem_name="image_mnist",
                                          data_dir=".")
     with self.test_session() as session:
         features = {
             "inputs": tf.constant(x, dtype=tf.int32),
             "targets": tf.constant(y, dtype=tf.int32),
         }
         model = basic.BasicFcRelu(hparams, tf_estimator.ModeKeys.TRAIN)
         logits, _ = model(features)
         session.run(tf.global_variables_initializer())
         res = session.run(logits)
     self.assertEqual(res.shape, (1, 1, 1, 1, 10))
    def testMultipleTargetModalities(self):
        # Use existing hparams and override target modality.
        hparams = trainer_lib.create_hparams(
            "transformer_tiny",
            data_dir=algorithmic.TinyAlgo.data_dir,
            problem_name="tiny_algo")
        # Manually turn off sharing. It is not currently supported for multitargets.
        hparams.shared_embedding_and_softmax_weights = 0  # pylint: disable=line-too-long
        hparams.problem_hparams.modality = {
            "targets": hparams.problem_hparams.modality["targets"],
            "targets_A": hparams.problem_hparams.modality["targets"],
            "targets_B": hparams.problem_hparams.modality["targets"],
        }
        hparams.problem_hparams.vocab_size = {
            "targets": hparams.problem_hparams.vocab_size["targets"],
            "targets_A": hparams.problem_hparams.vocab_size["targets"],
            "targets_B": hparams.problem_hparams.vocab_size["targets"],
        }
        hparams.problem._hparams = hparams.problem_hparams

        # Dataset
        problem = hparams.problem
        dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                  algorithmic.TinyAlgo.data_dir)
        dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
        features = dataset.make_one_shot_iterator().get_next()
        features = data_reader.standardize_shapes(features)
        features["targets_A"] = features["targets_B"] = features["targets"]

        # Model
        model = registry.model("transformer")(hparams,
                                              tf.estimator.ModeKeys.TRAIN)

        def body(args, mb=model.body):
            out = mb(args)
            return {"targets": out, "targets_A": out, "targets_B": out}

        model.body = body

        logits, losses = model(features)

        self.assertTrue("training" in losses)
        loss = losses["training"]

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run([logits, loss])
Example #42
0
def make_simulated_env_kwargs(real_env, hparams, **extra_kwargs):
    """Extracts simulated env kwargs from real_env and loop hparams."""
    objs_and_attrs = [(real_env, [
        "reward_range", "observation_space", "action_space", "frame_height",
        "frame_width"
    ]), (hparams, ["frame_stack_size", "intrinsic_reward_scale"])]
    kwargs = {
        attr: getattr(obj, attr)  # pylint: disable=g-complex-comprehension
        for (obj, attrs) in objs_and_attrs for attr in attrs
    }
    kwargs["model_name"] = hparams.generative_model
    kwargs["model_hparams"] = trainer_lib.create_hparams(
        hparams.generative_model_params)
    if hparams.wm_policy_param_sharing:
        kwargs["model_hparams"].optimizer_zero_grads = True
    kwargs.update(extra_kwargs)
    return kwargs
def train_agent(
    real_env, learner, world_model_dir, hparams, epoch, is_final_epoch):
  """Train the PPO agent in the simulated environment."""
  frame_stack_size = hparams.frame_stack_size
  initial_frame_rollouts = real_env.current_epoch_rollouts(
      split=tf.contrib.learn.ModeKeys.TRAIN,
      minimal_rollout_frames=frame_stack_size,
  )
  # TODO(koz4k): Move this to a different module.
  def initial_frame_chooser(batch_size):
    """Frame chooser."""

    deterministic_initial_frames =\
        initial_frame_rollouts[0][:frame_stack_size]
    if not hparams.simulation_random_starts:
      # Deterministic starts: repeat first frames from the first rollout.
      initial_frames = [deterministic_initial_frames] * batch_size
    else:
      # Random starts: choose random initial frames from random rollouts.
      initial_frames = random_rollout_subsequences(
          initial_frame_rollouts, batch_size, frame_stack_size
      )
      if hparams.simulation_flip_first_random_for_beginning:
        # Flip first entry in the batch for deterministic initial frames.
        initial_frames[0] = deterministic_initial_frames

    return np.stack([
        [frame.observation.decode() for frame in initial_frame_stack]
        for initial_frame_stack in initial_frames
    ])
  env_fn = make_simulated_env_fn(
      real_env, hparams, hparams.simulated_batch_size, initial_frame_chooser,
      world_model_dir
  )
  base_algo_str = hparams.base_algo
  train_hparams = trainer_lib.create_hparams(hparams.base_algo_params)

  update_hparams_from_hparams(
      train_hparams, hparams, base_algo_str + "_"
  )

  env_step_multiplier = 1 if not is_final_epoch else 2
  learner.train(
      env_fn, train_hparams, simulated=True, save_continuously=True,
      epoch=epoch, env_step_multiplier=env_step_multiplier
  )
def train_agent_real_env(env, learner, hparams, epoch):
  """Train the PPO agent in the real environment."""
  base_algo_str = hparams.base_algo

  train_hparams = trainer_lib.create_hparams(hparams.base_algo_params)
  update_hparams_from_hparams(
      train_hparams, hparams, "real_" + base_algo_str + "_"
  )

  env_fn = rl.make_real_env_fn(env)
  num_env_steps = real_env_step_increment(hparams)
  learner.train(
      env_fn, train_hparams, simulated=False, save_continuously=False,
      epoch=epoch, num_env_steps=num_env_steps
  )
  # Save unfinished rollouts to history.
  env.reset()
Example #45
0
 def get_mnist_random_output(self, model_name, hparams_set=None,
                             mode=tf.estimator.ModeKeys.TRAIN):
   hparams_set = hparams_set or model_name
   x = np.random.random_integers(0, high=255, size=(1, 28, 28, 1))
   y = np.random.random_integers(0, high=9, size=(1, 1))
   features = {
       "targets": tf.constant(x, dtype=tf.int32),
       "inputs": tf.constant(y, dtype=tf.int32),
   }
   hparams = trainer_lib.create_hparams(
       hparams_set, problem_name="image_mnist_rev", data_dir=".")
   model = registry.model(model_name)(hparams, mode)
   tf.train.create_global_step()
   logits, _ = model(features)
   with self.test_session() as session:
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   return res
Example #46
0
def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
  """Build the graph required to fetch the attention weights.

  Args:
    hparams_set: HParams set to build the model with.
    model_name: Name of model.
    data_dir: Path to directory containing training data.
    problem_name: Name of problem.
    beam_size: (Optional) Number of beams to use when decoding a translation.
        If set to 1 (default) then greedy decoding is used.

  Returns:
    Tuple of (
        inputs: Input placeholder to feed in ids to be translated.
        targets: Targets placeholder to feed to translation when fetching
            attention weights.
        samples: Tensor representing the ids of the translation.
        att_mats: Tensors representing the attention weights.
    )
  """
  hparams = trainer_lib.create_hparams(
      hparams_set, data_dir=data_dir, problem_name=problem_name)
  translate_model = registry.model(model_name)(
      hparams, tf.estimator.ModeKeys.EVAL)

  inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs')
  targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets')
  translate_model({
      'inputs': inputs,
      'targets': targets,
  })

  # Must be called after building the training graph, so that the dict will
  # have been filled with the attention tensors. BUT before creating the
  # inference graph otherwise the dict will be filled with tensors from
  # inside a tf.while_loop from decoding and are marked unfetchable.
  att_mats = get_att_mats(translate_model)

  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    samples = translate_model.infer({
        'inputs': inputs,
    }, beam_size=beam_size)['outputs']

  return inputs, targets, samples, att_mats
def evaluate_single_config(hparams, stochastic, max_num_noops, agent_model_dir):
  """Evaluate the PPO agent in the real environment."""
  eval_hparams = trainer_lib.create_hparams(hparams.base_algo_params)
  env = setup_env(
      hparams, batch_size=hparams.eval_batch_size, max_num_noops=max_num_noops
  )
  env.start_new_epoch(0)
  env_fn = rl.make_real_env_fn(env)
  learner = LEARNERS[hparams.base_algo](
      hparams.frame_stack_size, base_event_dir=None,
      agent_model_dir=agent_model_dir
  )
  learner.evaluate(env_fn, eval_hparams, stochastic)
  rollouts = env.current_epoch_rollouts()
  env.close()

  return tuple(
      compute_mean_reward(rollouts, clipped) for clipped in (True, False)
  )
  def __init__(self, environment_spec, length, other_hparams):
    """Batch of environments inside the TensorFlow graph."""
    del other_hparams
    self.length = length
    initial_frames_problem = environment_spec.initial_frames_problem
    self._min_reward = initial_frames_problem.min_reward
    self._num_frames = environment_spec.video_num_input_frames
    self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

    model_hparams = trainer_lib.create_hparams(
        FLAGS.hparams_set, problem_name=FLAGS.problem)
    model_hparams.force_full_predict = True
    self._model = registry.model(FLAGS.model)(
        model_hparams, tf.estimator.ModeKeys.PREDICT)

    _, self.action_shape, self.action_dtype = get_action_space(environment_spec)

    hparams = HParams(video_num_input_frames=
                      environment_spec.video_num_input_frames,
                      video_num_target_frames=
                      environment_spec.video_num_target_frames,
                      environment_spec=environment_spec)

    if environment_spec.simulation_random_starts:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=True,
                                               hparams=hparams)
      dataset = dataset.shuffle(buffer_size=100)
    else:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=False,
                                               hparams=hparams).take(1)

    dataset = dataset.map(lambda x: x["inputs"]).repeat()
    self.history_buffer = HistoryBuffer(dataset, self.length)

    shape = (self.length, initial_frames_problem.frame_height,
             initial_frames_problem.frame_width,
             initial_frames_problem.num_channels)
    self._observ = tf.Variable(tf.zeros(shape, tf.float32), trainable=False)
def encode_env_frames(problem_name, ae_problem_name, autoencoder_path,
                      epoch_data_dir):
  """Encode all frames from problem_name and write out as ae_problem_name."""
  with tf.Graph().as_default():
    ae_hparams = trainer_lib.create_hparams("autoencoder_discrete_pong",
                                            problem_name=problem_name)
    problem = ae_hparams.problem
    model = registry.model("autoencoder_ordered_discrete")(
        ae_hparams, tf.estimator.ModeKeys.EVAL)

    ae_problem = registry.problem(ae_problem_name)
    ae_training_paths = ae_problem.training_filepaths(epoch_data_dir, 10, True)
    ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True)

    skip_train = False
    skip_eval = False
    for path in ae_training_paths:
      if tf.gfile.Exists(path):
        skip_train = True
        break
    for path in ae_eval_paths:
      if tf.gfile.Exists(path):
        skip_eval = True
        break

    # Encode train data
    if not skip_train:
      dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, epoch_data_dir,
                                shuffle_files=False, output_buffer_size=100,
                                preprocess=False)
      encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path,
                     ae_training_paths)

    # Encode eval data
    if not skip_eval:
      dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, epoch_data_dir,
                                shuffle_files=False, output_buffer_size=100,
                                preprocess=False)
      encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path,
                     ae_eval_paths)
Example #50
0
  def __init__(self, processor_configuration):
    """Creates the Transformer estimator.

    Args:
      processor_configuration: A ProcessorConfiguration protobuffer with the
        transformer fields populated.
    """
    # Do the pre-setup tensor2tensor requires for flags and configurations.
    transformer_config = processor_configuration["transformer"]
    FLAGS.output_dir = transformer_config["model_dir"]
    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
    data_dir = os.path.expanduser(transformer_config["data_dir"])

    # Create the basic hyper parameters.
    self.hparams = trainer_lib.create_hparams(
        transformer_config["hparams_set"],
        transformer_config["hparams"],
        data_dir=data_dir,
        problem_name=transformer_config["problem"])

    decode_hp = decoding.decode_hparams()
    decode_hp.add_hparam("shards", 1)
    decode_hp.add_hparam("shard_id", 0)

    # Create the estimator and final hyper parameters.
    self.estimator = trainer_lib.create_estimator(
        transformer_config["model"],
        self.hparams,
        t2t_trainer.create_run_config(self.hparams),
        decode_hparams=decode_hp, use_tpu=False)

    # Fetch the vocabulary and other helpful variables for decoding.
    self.source_vocab = self.hparams.problem_hparams.vocabulary["inputs"]
    self.targets_vocab = self.hparams.problem_hparams.vocabulary["targets"]
    self.const_array_size = 10000

    # Prepare the Transformer's debug data directory.
    run_dirs = sorted(glob.glob(os.path.join("/tmp/t2t_server_dump", "run_*")))
    for run_dir in run_dirs:
      shutil.rmtree(run_dir)
Example #51
0
  def testMultipleTargetModalities(self):
    # Use existing hparams and override target modality.
    hparams = trainer_lib.create_hparams(
        "transformer_tiny", data_dir=algorithmic.TinyAlgo.data_dir,
        problem_name="tiny_algo")
    # Manually turn off sharing. It is not currently supported for multitargets.
    hparams.shared_embedding_and_softmax_weights = 0  # pylint: disable=line-too-long
    hparams.problem_hparams.modality = {
        "targets": hparams.problem_hparams.modality["targets"],
        "targets_A": hparams.problem_hparams.modality["targets"],
        "targets_B": hparams.problem_hparams.modality["targets"],
    }
    hparams.problem._hparams = hparams.problem_hparams

    # Dataset
    problem = hparams.problem
    dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                              algorithmic.TinyAlgo.data_dir)
    dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
    features = dataset.make_one_shot_iterator().get_next()
    features = problem_lib.standardize_shapes(features)
    features["targets_A"] = features["targets_B"] = features["targets"]

    # Model
    model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN)

    def body(args, mb=model.body):
      out = mb(args)
      return {"targets": out, "targets_A": out, "targets_B": out}

    model.body = body

    logits, losses = model(features)

    self.assertTrue("training" in losses)
    loss = losses["training"]

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run([logits, loss])
def train_world_model(
    env, data_dir, output_dir, hparams, world_model_steps_num, epoch
):
  """Train the world model on problem_name."""
  world_model_steps_num += world_model_step_increment(
      hparams, is_initial_epoch=(epoch == 0)
  )
  model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
  model_hparams.learning_rate = model_hparams.learning_rate_constant
  if epoch > 0:
    model_hparams.learning_rate *= hparams.learning_rate_bump

  train_supervised(
      problem=env,
      model_name=hparams.generative_model,
      hparams=model_hparams,
      data_dir=data_dir,
      output_dir=output_dir,
      train_steps=world_model_steps_num,
      eval_steps=100,
      local_eval_frequency=2000
  )

  return world_model_steps_num
Example #53
0
  def testMultipleTargetModalities(self):
    # HParams
    hparams = trainer_lib.create_hparams(
        "transformer_tiny", data_dir=self.data_dir, problem_name="tiny_algo")
    tm = hparams.problem.get_hparams().target_modality
    hparams.problem.get_hparams().target_modality = {
        "targets": tm,
        "A": tm,
        "B": tm
    }

    # Dataset
    problem = hparams.problem
    dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir)
    dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
    features = dataset.make_one_shot_iterator().get_next()
    features = problem_lib.standardize_shapes(features)
    features["A"] = features["B"] = features["targets"]

    # Model
    model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN)

    def body(args, mb=model.body):
      out = mb(args)
      return {"targets": out, "A": out, "B": out}

    model.body = body

    logits, losses = model(features)

    self.assertTrue("training" in losses)
    loss = losses["training"]

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run([logits, loss])
 def test_no_crash_cartpole(self):
   hparams = trainer_lib.create_hparams(
       "discrete_action_base", "epochs_num=11,video_during_eval=False")
   rl_trainer_lib.train(hparams, "CartPole-v0")
 def test_no_crash_pendulum(self):
   hparams = trainer_lib.create_hparams(
       "continuous_action_base", "epochs_num=11,video_during_eval=False")
   rl_trainer_lib.train(hparams, "Pendulum-v0")
def main(_):
  hparams = registry.hparams(FLAGS.loop_hparams_set)
  hparams.parse(FLAGS.loop_hparams)
  output_dir = FLAGS.output_dir

  subdirectories = ["data", "tmp", "world_model", "ppo"]
  using_autoencoder = hparams.autoencoder_train_steps > 0
  if using_autoencoder:
    subdirectories.append("autoencoder")
  directories = setup_directories(output_dir, subdirectories)

  if hparams.game in gym_env.ATARI_GAMES:
    game_with_mode = hparams.game + "_deterministic-v4"
  else:
    game_with_mode = hparams.game

  if using_autoencoder:
    simulated_problem_name = (
        "gym_simulated_discrete_problem_with_agent_on_%s_autoencoded"
        % game_with_mode)
  else:
    simulated_problem_name = ("gym_simulated_discrete_problem_with_agent_on_%s"
                              % game_with_mode)
    if simulated_problem_name not in registry.list_problems():
      tf.logging.info("Game Problem %s not found; dynamically registering",
                      simulated_problem_name)
      gym_env.register_game(hparams.game, game_mode="Deterministic-v4")

  epoch = hparams.epochs-1
  epoch_data_dir = os.path.join(directories["data"], str(epoch))
  ppo_model_dir = directories["ppo"]

  world_model_dir = directories["world_model"]

  gym_problem = registry.problem(simulated_problem_name)

  model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
  environment_spec = copy.copy(gym_problem.environment_spec)
  environment_spec.simulation_random_starts = hparams.simulation_random_starts

  batch_env_hparams = trainer_lib.create_hparams(hparams.ppo_params)
  batch_env_hparams.add_hparam("model_hparams", model_hparams)
  batch_env_hparams.add_hparam("environment_spec", environment_spec)
  batch_env_hparams.num_agents = 1

  with temporary_flags({
      "problem": simulated_problem_name,
      "model": hparams.generative_model,
      "hparams_set": hparams.generative_model_params,
      "output_dir": world_model_dir,
      "data_dir": epoch_data_dir,
  }):
    sess = tf.Session()
    env = DebugBatchEnv(batch_env_hparams, sess)
    sess.run(tf.global_variables_initializer())
    env.initialize()

    env_model_loader = tf.train.Saver(
        tf.global_variables("next_frame*"))
    trainer_lib.restore_checkpoint(world_model_dir, env_model_loader, sess,
                                   must_restore=True)

    model_saver = tf.train.Saver(
        tf.global_variables(".*network_parameters.*"))
    trainer_lib.restore_checkpoint(ppo_model_dir, model_saver, sess)

    key_mapping = gym_problem.env.env.get_keys_to_action()
    # map special codes
    key_mapping[()] = 100
    key_mapping[(ord("r"),)] = 101
    key_mapping[(ord("p"),)] = 102

    play.play(env, zoom=2, fps=10, keys_to_action=key_mapping)
def main(_):
  hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams)
  train(hparams, FLAGS.output_dir)
Example #58
0
def create_t2t_hparams():
  return trainer_lib.create_hparams(
      FLAGS_hparams_set,
      FLAGS_hparams,
      data_dir=os.path.expanduser(FLAGS_data_dir),
      problem_name=FLAGS_problem)
Example #59
0
def create_surrogate_hparams():
  return trainer_lib.create_hparams(FLAGS.surrogate_hparams_set, None)
Example #60
0
def main(_):
  hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams)
  rl_trainer_lib.train(hparams, FLAGS.problem, FLAGS.output_dir)