예제 #1
0
  def testModel(self):
    # HParams
    hparams = trainer_lib.create_hparams(
        "transformer_tiny", data_dir=self.data_dir, problem_name="tiny_algo")

    # Dataset
    problem = hparams.problem
    dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir)
    dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
    features = dataset.make_one_shot_iterator().get_next()
    features = problem_lib.standardize_shapes(features)

    # Model
    model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN)
    logits, losses = model(features)

    self.assertTrue("training" in losses)
    loss = losses["training"]

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      logits_val, loss_val = sess.run([logits, loss])
      logits_shape = list(logits_val.shape)
      logits_shape[1] = None
      self.assertAllEqual(logits_shape, [10, None, 1, 1, 4])
      self.assertEqual(loss_val.shape, tuple())
예제 #2
0
  def testT2TModelRegistration(self):

    @registry.register_model
    class MyModel1(t2t_model.T2TModel):
      pass

    model = registry.model("my_model1")
    self.assertTrue(model is MyModel1)
예제 #3
0
  def testNamedRegistration(self):

    @registry.register_model("model2")
    class MyModel1(t2t_model.T2TModel):
      pass

    model = registry.model("model2")
    self.assertTrue(model is MyModel1)
예제 #4
0
  def testNonT2TModelRegistration(self):

    @registry.register_model
    def model_fn():
      pass

    model = registry.model("model_fn")
    self.assertTrue(model is model_fn)
예제 #5
0
 def nth_model(n):
   """Build the model for the n-th problem, plus some added variables."""
   model_class = registry.model(model)(
       hparams,
       mode,
       hparams.problems[n],
       n,
       dp,
       devices.ps_devices(all_workers=True),
       decode_hparams=decode_hparams)
   if mode == tf.estimator.ModeKeys.PREDICT:
     return model_class.infer(
         features,
         beam_size=decode_hp.beam_size,
         top_beams=(decode_hp.beam_size if decode_hp.return_beams else 1),
         alpha=decode_hp.alpha,
         decode_length=decode_hp.extra_length)
   # In distributed mode, we build graph for problem=0 and problem=worker_id.
   skipping_is_on = hparams.problem_choice == "distributed" and is_training
   problem_worker_id = worker_id % len(hparams.problems)
   skip_this_one = n != 0 and n % worker_replicas != problem_worker_id
   # On worker 0 also build graph for problems <= 1.
   # TODO(lukaszkaiser): why is this hack needed for variables init? Repair.
   skip_this_one = skip_this_one and (worker_id != 0 or n > 1)
   if eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL:
     logits, losses_dict = model_class.eval_autoregressive(features)
   else:
     logits, losses_dict = model_class(
         features, skip=(skipping_is_on and skip_this_one))
   with tf.variable_scope("losses_avg"):
     total_loss, ops = 0.0, []
     for loss_key, loss_value in six.iteritems(losses_dict):
       loss_name = "problem_%d/%s_loss" % (n, loss_key)
       loss_moving_avg = tf.get_variable(
           loss_name, initializer=100.0, trainable=False)
       loss_variable_names.append(loss_name)
       ops.append(
           loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1))
       total_loss += loss_value
     try:  # Total loss avg might be reused or not, we try both.
       with tf.variable_scope(tf.get_variable_scope(), reuse=True):
         # Total loss was already constructed on input.
         loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n)
     except ValueError:
       loss_moving_avg = tf.get_variable(
           "problem_%d/total_loss" % n, initializer=100.0, trainable=False)
     ops.append(
         loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1))
   with tf.variable_scope("train_stats"):  # Count steps for this problem.
     problem_steps = tf.get_variable(
         "problem_%d_steps" % n, initializer=0, trainable=False)
     ops.append(problem_steps.assign_add(1))
   with tf.control_dependencies(ops):  # Make sure the ops run.
     # Ensure the loss is a scalar here.
     total_loss = tf.reshape(total_loss, [], name="total_loss_control_id")
   return [total_loss, logits]
예제 #6
0
def score_file(filename):
  """Score each line in a file and return the scores."""
  # Prepare model.
  hparams = create_hparams()
  encoders = registry.problem(FLAGS.problem).feature_encoders(FLAGS.data_dir)
  has_inputs = "inputs" in encoders

  # Prepare features for feeding into the model.
  if has_inputs:
    inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
  targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
  batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
  features = {
      "inputs": batch_inputs,
      "targets": batch_targets,
  } if has_inputs else {"targets": batch_targets}

  # Prepare the model and the graph when model runs on features.
  model = registry.model(FLAGS.model)(hparams, tf.estimator.ModeKeys.EVAL)
  _, losses = model(features)
  saver = tf.train.Saver()

  with tf.Session() as sess:
    # Load weights from checkpoint.
    ckpts = tf.train.get_checkpoint_state(FLAGS.output_dir)
    ckpt = ckpts.model_checkpoint_path
    saver.restore(sess, ckpt)
    # Run on each line.
    with tf.gfile.Open(filename) as f:
      lines = f.readlines()
    results = []
    for line in lines:
      tab_split = line.split("\t")
      if len(tab_split) > 2:
        raise ValueError("Each line must have at most one tab separator.")
      if len(tab_split) == 1:
        targets = tab_split[0].strip()
      else:
        targets = tab_split[1].strip()
        inputs = tab_split[0].strip()
      # Run encoders and append EOS symbol.
      targets_numpy = encoders["targets"].encode(
          targets) + [text_encoder.EOS_ID]
      if has_inputs:
        inputs_numpy = encoders["inputs"].encode(inputs) + [text_encoder.EOS_ID]
      # Prepare the feed.
      feed = {
          inputs_ph: inputs_numpy,
          targets_ph: targets_numpy
      } if has_inputs else {targets_ph: targets_numpy}
      # Get the score.
      np_loss = sess.run(losses["training"], feed)
      results.append(np_loss)
  return results
예제 #7
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  trainer_lib.set_random_seed(FLAGS.random_seed)
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

  # Create hparams
  hparams = create_hparams()
  hparams.force_full_predict = True
  batch_size = hparams.batch_size

  # Iterating over dev/test partition of the data.
  # Change the data partition if necessary.
  dataset = registry.problem(FLAGS.problem).dataset(
      tf.estimator.ModeKeys.PREDICT,
      shuffle_files=False,
      hparams=hparams)

  dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size))
  data = dataset.make_one_shot_iterator().get_next()
  input_data = dict((k, data[k]) for k in data.keys() if k.startswith("input"))

  # Creat model
  model_cls = registry.model(FLAGS.model)
  model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT)
  prediction_ops = model.infer(input_data)

  # Confusion Matrix
  nr = hparams.problem.num_rewards
  cm_per_frame = np.zeros((nr, nr), dtype=np.uint64)
  cm_next_frame = np.zeros((nr, nr), dtype=np.uint64)

  saver = tf.train.Saver()
  with tf.train.SingularMonitoredSession() as sess:
    # Load latest checkpoint
    ckpt = tf.train.get_checkpoint_state(FLAGS.output_dir).model_checkpoint_path
    saver.restore(sess.raw_session(), ckpt)

    counter = 0
    while not sess.should_stop():
      counter += 1
      if counter % 1 == 0:
        print(counter)

      # Predict next frames
      rew_pd, rew_gt = sess.run(
          [prediction_ops["target_reward"], data["target_reward"]])

      for i in range(batch_size):
        cm_next_frame[rew_gt[i, 0, 0], rew_pd[i, 0, 0]] += 1
        for gt, pd in zip(rew_gt[i], rew_pd[i]):
          cm_per_frame[gt, pd] += 1

  print_confusion_matrix("Per-frame Confusion Matrix", cm_per_frame)
  print_confusion_matrix("Next-frame Confusion Matrix", cm_next_frame)
예제 #8
0
  def __init__(self,
               hparams,
               mode=tf.estimator.ModeKeys.TRAIN,
               problem_hparams=None,
               data_parallelism=None,
               decode_hparams=None):
    assert hparams.distill_phase in ["train", "distill"]

    if hparams.distill_phase == "train" and hparams.teacher_learning_rate:
      hparams.learning_rate = hparams.teacher_learning_rate
    elif hparams.distill_phase == "distill" and hparams.student_learning_rate:
      hparams.learning_rate = hparams.student_learning_rate

    self.teacher_hparams = registry.hparams(hparams.teacher_hparams)
    self.teacher_model = registry.model(
        hparams.teacher_model)(self.teacher_hparams, mode, problem_hparams,
                               data_parallelism, decode_hparams)
    self.student_hparams = registry.hparams(hparams.student_hparams)
    self.student_model = registry.model(
        hparams.student_model)(self.student_hparams, mode, problem_hparams,
                               data_parallelism, decode_hparams)
    super(Distillation, self).__init__(hparams, mode, problem_hparams,
                                       data_parallelism, decode_hparams)
예제 #9
0
  def testSingleTrainStepCall(self):
    """Illustrate how to run a T2T model in a raw session."""

    # Set model name, hparams, problems as would be set on command line.
    model_name = "transformer"
    FLAGS.hparams_set = "transformer_test"
    FLAGS.problems = "tiny_algo"
    data_dir = "/tmp"  # Used only when a vocab file or such like is needed.

    # Create the problem object, hparams, placeholders, features dict.
    encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir)
    hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir)
    trainer_utils.add_problem_hparams(hparams, FLAGS.problems)

    # Now set a mode and create the model.
    mode = tf.estimator.ModeKeys.TRAIN
    model = registry.model(model_name)(hparams, mode)

    # Create placeholder for features and make them batch-sized.
    inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
    targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
    features = {
        "inputs": batch_inputs,
        "targets": batch_targets,
        "target_space_id": tf.constant(hparams.problems[0].target_space_id)
    }

    # Call the model.
    predictions, _ = model(features)
    nvars = len(tf.trainable_variables())
    model(features)  # Call again and check that reuse works.
    self.assertEqual(nvars, len(tf.trainable_variables()))

    # Having the graph, let's run it on some data.
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      inputs = "0 1 0"
      targets = "0 1 0"
      # Encode from raw string to numpy input array using problem encoders.
      inputs_numpy = encoders["inputs"].encode(inputs)
      targets_numpy = encoders["targets"].encode(targets)
      # Feed the encoded inputs and targets and run session.
      feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy}
      np_predictions = sess.run(predictions, feed)
      # Check that the result has the correct shape: batch x length x vocab_size
      #   where, for us, batch = 1, length = 3, vocab_size = 4.
      self.assertEqual(np_predictions.shape, (1, 3, 1, 1, 4))
예제 #10
0
  def make_estimator_model_fn(model_name,
                              hparams,
                              decode_hparams=None,
                              use_tpu=False):
    model_cls = registry.model(model_name)

    def wrapping_model_fn(features, labels, mode, params=None, config=None):
      return model_cls.estimator_model_fn(
          hparams,
          features,
          labels,
          mode,
          config=config,
          params=params,
          decode_hparams=decode_hparams,
          use_tpu=use_tpu)

    return wrapping_model_fn
예제 #11
0
 def get_mnist_random_output(self, model_name, hparams_set=None,
                             mode=tf.estimator.ModeKeys.TRAIN):
   hparams_set = hparams_set or model_name
   x = np.random.random_integers(0, high=255, size=(1, 28, 28, 1))
   y = np.random.random_integers(0, high=9, size=(1, 1))
   features = {
       "targets": tf.constant(x, dtype=tf.int32),
       "inputs": tf.constant(y, dtype=tf.int32),
   }
   hparams = trainer_lib.create_hparams(
       hparams_set, problem_name="image_mnist_rev", data_dir=".")
   model = registry.model(model_name)(hparams, mode)
   tf.train.create_global_step()
   logits, _ = model(features)
   with self.test_session() as session:
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   return res
예제 #12
0
def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
  """Build the graph required to fetch the attention weights.

  Args:
    hparams_set: HParams set to build the model with.
    model_name: Name of model.
    data_dir: Path to directory containing training data.
    problem_name: Name of problem.
    beam_size: (Optional) Number of beams to use when decoding a translation.
        If set to 1 (default) then greedy decoding is used.

  Returns:
    Tuple of (
        inputs: Input placeholder to feed in ids to be translated.
        targets: Targets placeholder to feed to translation when fetching
            attention weights.
        samples: Tensor representing the ids of the translation.
        att_mats: Tensors representing the attention weights.
    )
  """
  hparams = trainer_lib.create_hparams(
      hparams_set, data_dir=data_dir, problem_name=problem_name)
  translate_model = registry.model(model_name)(
      hparams, tf.estimator.ModeKeys.EVAL)

  inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs')
  targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets')
  translate_model({
      'inputs': inputs,
      'targets': targets,
  })

  # Must be called after building the training graph, so that the dict will
  # have been filled with the attention tensors. BUT before creating the
  # inference graph otherwise the dict will be filled with tensors from
  # inside a tf.while_loop from decoding and are marked unfetchable.
  att_mats = get_att_mats(translate_model)

  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    samples = translate_model.infer({
        'inputs': inputs,
    }, beam_size=beam_size)['outputs']

  return inputs, targets, samples, att_mats
예제 #13
0
  def __init__(self, environment_spec, length, other_hparams):
    """Batch of environments inside the TensorFlow graph."""
    del other_hparams
    self.length = length
    initial_frames_problem = environment_spec.initial_frames_problem
    self._min_reward = initial_frames_problem.min_reward
    self._num_frames = environment_spec.video_num_input_frames
    self._intrinsic_reward_scale = environment_spec.intrinsic_reward_scale

    model_hparams = trainer_lib.create_hparams(
        FLAGS.hparams_set, problem_name=FLAGS.problem)
    model_hparams.force_full_predict = True
    self._model = registry.model(FLAGS.model)(
        model_hparams, tf.estimator.ModeKeys.PREDICT)

    _, self.action_shape, self.action_dtype = get_action_space(environment_spec)

    hparams = HParams(video_num_input_frames=
                      environment_spec.video_num_input_frames,
                      video_num_target_frames=
                      environment_spec.video_num_target_frames,
                      environment_spec=environment_spec)

    if environment_spec.simulation_random_starts:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=True,
                                               hparams=hparams)
      dataset = dataset.shuffle(buffer_size=100)
    else:
      dataset = initial_frames_problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                               FLAGS.data_dir,
                                               shuffle_files=False,
                                               hparams=hparams).take(1)

    dataset = dataset.map(lambda x: x["inputs"]).repeat()
    self.history_buffer = HistoryBuffer(dataset, self.length)

    shape = (self.length, initial_frames_problem.frame_height,
             initial_frames_problem.frame_width,
             initial_frames_problem.num_channels)
    self._observ = tf.Variable(tf.zeros(shape, tf.float32), trainable=False)
예제 #14
0
def encode_env_frames(problem_name, ae_problem_name, autoencoder_path,
                      epoch_data_dir):
  """Encode all frames from problem_name and write out as ae_problem_name."""
  with tf.Graph().as_default():
    ae_hparams = trainer_lib.create_hparams("autoencoder_discrete_pong",
                                            problem_name=problem_name)
    problem = ae_hparams.problem
    model = registry.model("autoencoder_ordered_discrete")(
        ae_hparams, tf.estimator.ModeKeys.EVAL)

    ae_problem = registry.problem(ae_problem_name)
    ae_training_paths = ae_problem.training_filepaths(epoch_data_dir, 10, True)
    ae_eval_paths = ae_problem.dev_filepaths(epoch_data_dir, 1, True)

    skip_train = False
    skip_eval = False
    for path in ae_training_paths:
      if tf.gfile.Exists(path):
        skip_train = True
        break
    for path in ae_eval_paths:
      if tf.gfile.Exists(path):
        skip_eval = True
        break

    # Encode train data
    if not skip_train:
      dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, epoch_data_dir,
                                shuffle_files=False, output_buffer_size=100,
                                preprocess=False)
      encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path,
                     ae_training_paths)

    # Encode eval data
    if not skip_eval:
      dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, epoch_data_dir,
                                shuffle_files=False, output_buffer_size=100,
                                preprocess=False)
      encode_dataset(model, dataset, problem, ae_hparams, autoencoder_path,
                     ae_eval_paths)
예제 #15
0
  def testMultipleTargetModalities(self):
    # Use existing hparams and override target modality.
    hparams = trainer_lib.create_hparams(
        "transformer_tiny", data_dir=algorithmic.TinyAlgo.data_dir,
        problem_name="tiny_algo")
    # Manually turn off sharing. It is not currently supported for multitargets.
    hparams.shared_embedding_and_softmax_weights = 0  # pylint: disable=line-too-long
    hparams.problem_hparams.modality = {
        "targets": hparams.problem_hparams.modality["targets"],
        "targets_A": hparams.problem_hparams.modality["targets"],
        "targets_B": hparams.problem_hparams.modality["targets"],
    }
    hparams.problem._hparams = hparams.problem_hparams

    # Dataset
    problem = hparams.problem
    dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                              algorithmic.TinyAlgo.data_dir)
    dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
    features = dataset.make_one_shot_iterator().get_next()
    features = data_reader.standardize_shapes(features)
    features["targets_A"] = features["targets_B"] = features["targets"]

    # Model
    model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN)

    def body(args, mb=model.body):
      out = mb(args)
      return {"targets": out, "targets_A": out, "targets_B": out}

    model.body = body

    logits, losses = model(features)

    self.assertTrue("training" in losses)
    loss = losses["training"]

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run([logits, loss])
예제 #16
0
    def __init__(self, environment_lambda, length):
        """Batch of environments inside the TensorFlow graph."""
        self.length = length
        initialization_env = environment_lambda()
        hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                             problem_name=FLAGS.problem,
                                             data_dir="UNUSED")
        hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            hparams, tf.estimator.ModeKeys.PREDICT)

        self.action_space = initialization_env.action_space
        self.action_shape = list(initialization_env.action_space.shape)
        self.action_dtype = tf.int32

        if hasattr(initialization_env.env, "get_starting_data"):
            starting_observations, _, _ = initialization_env.env.get_starting_data(
            )
            obs_1 = starting_observations[0]
            obs_2 = starting_observations[1]
        else:
            # Ancient method for environments not supporting get_starting_data
            initialization_env.reset()
            skip_frames = 20
            for _ in range(skip_frames):
                initialization_env.step(0)
            obs_1 = initialization_env.step(0)[0]
            obs_2 = initialization_env.step(0)[0]

        self.frame_1 = tf.expand_dims(tf.cast(obs_1, tf.float32), 0)
        self.frame_2 = tf.expand_dims(tf.cast(obs_2, tf.float32), 0)

        shape = (self.length, ) + initialization_env.observation_space.shape
        # TODO(blazej0) - make more generic - make higher number of
        # and make it compatibile with NUMBER_OF_HISTORY_FRAMES
        #   previous observations possible.
        self._observ = tf.Variable(tf.zeros(shape, tf.float32),
                                   trainable=False)
        self._prev_observ = tf.Variable(tf.zeros(shape, tf.float32),
                                        trainable=False)
예제 #17
0
  def testMultipleTargetModalities(self):
    # Use existing hparams and override target modality.
    hparams = trainer_lib.create_hparams(
        "transformer_tiny", data_dir=algorithmic.TinyAlgo.data_dir,
        problem_name="tiny_algo")
    # Manually turn off sharing. It is not currently supported for multitargets.
    hparams.shared_embedding_and_softmax_weights = 0  # pylint: disable=line-too-long
    hparams.problem_hparams.modality = {
        "targets": hparams.problem_hparams.modality["targets"],
        "targets_A": hparams.problem_hparams.modality["targets"],
        "targets_B": hparams.problem_hparams.modality["targets"],
    }
    hparams.problem._hparams = hparams.problem_hparams

    # Dataset
    problem = hparams.problem
    dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                              algorithmic.TinyAlgo.data_dir)
    dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
    features = dataset.make_one_shot_iterator().get_next()
    features = problem_lib.standardize_shapes(features)
    features["targets_A"] = features["targets_B"] = features["targets"]

    # Model
    model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN)

    def body(args, mb=model.body):
      out = mb(args)
      return {"targets": out, "targets_A": out, "targets_B": out}

    model.body = body

    logits, losses = model(features)

    self.assertTrue("training" in losses)
    loss = losses["training"]

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run([logits, loss])
예제 #18
0
 def __init__(self, *args, **kwargs):
   model_args = copy.deepcopy(args)
   model_kwargs = copy.deepcopy(kwargs)
   super(Glue, self).__init__(*args, **kwargs)
   try:
     self._glue_symbol = self._hparams.glue_symbol
   except:
     self._glue_symbol = 2
   if hasattr(model_args[0], "problem_hparams"):
     model_args[0].problem_hparams = None
   model_kwargs["problem_hparams"] = None
   model_class = registry.model(self._hparams.glue_model)
   if self._hparams.glue_is_lm:
     class ModelWithoutInput(model_class):
       # This hack is necessary because has_input() always returns
       # true if no problem hparams are provided
       @property
       def has_input(self):
         return False
     self._glue_model = ModelWithoutInput(*model_args, **model_kwargs)
   else:
     self._glue_model = model_class(*model_args, **model_kwargs)
예제 #19
0
    def __init__(self,
                 environment_lambda,
                 length,
                 problem,
                 simulation_random_starts=False,
                 intrinsic_reward_scale=0.):
        """Batch of environments inside the TensorFlow graph."""
        self.length = length
        self._min_reward = problem.min_reward
        self._num_frames = problem.num_input_frames
        self._intrinsic_reward_scale = intrinsic_reward_scale

        initialization_env = environment_lambda()
        hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                             problem_name=FLAGS.problem)
        hparams.force_full_predict = True
        self._model = registry.model(FLAGS.model)(
            hparams, tf.estimator.ModeKeys.PREDICT)

        self.action_space = initialization_env.action_space
        self.action_shape = list(initialization_env.action_space.shape)
        self.action_dtype = tf.int32

        if simulation_random_starts:
            dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                      FLAGS.data_dir,
                                      shuffle_files=True)
        else:
            dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN,
                                      FLAGS.data_dir,
                                      shuffle_files=False).take(1)

        dataset = dataset.map(lambda x: x["inputs"]).repeat()
        self.history_buffer = HistoryBuffer(dataset, self.length)

        shape = (self.length, problem.frame_height, problem.frame_width,
                 problem.num_channels)
        self._observ = tf.Variable(tf.zeros(shape, tf.float32),
                                   trainable=False)
예제 #20
0
    def testMultipleTargetModalities(self):
        # HParams
        hparams = trainer_lib.create_hparams("transformer_tiny",
                                             data_dir=self.data_dir,
                                             problem_name="tiny_algo")
        tm = hparams.problem.get_hparams().target_modality
        hparams.problem.get_hparams().target_modality = {
            "targets": tm,
            "A": tm,
            "B": tm
        }

        # Dataset
        problem = hparams.problem
        dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir)
        dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
        features = dataset.make_one_shot_iterator().get_next()
        features = problem_lib.standardize_shapes(features)
        features["A"] = features["B"] = features["targets"]

        # Model
        model = registry.model("transformer")(hparams,
                                              tf.estimator.ModeKeys.TRAIN)

        def body(args, mb=model.body):
            out = mb(args)
            return {"targets": out, "A": out, "B": out}

        model.body = body

        logits, losses = model(features)

        self.assertTrue("training" in losses)
        loss = losses["training"]

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run([logits, loss])
def main(_):

    # Fetch the problem
    wmt_problem = problems.problem(FLAGS.problem)

    # Declare the path we need
    data_dir = FLAGS.data_dir

    checkpoint_dir = FLAGS.model_dir
    ckpt_name = FLAGS.problem
    # ckpt_dir = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))
    ckpt_dir = tf.train.latest_checkpoint(checkpoint_dir)

    # Create hparams and the model
    model_name = FLAGS.model
    hparams_set = FLAGS.hparams_set
    hparams = trainer_lib.create_hparams(hparams_set,
                                         data_dir=data_dir,
                                         problem_name=FLAGS.problem)

    # Get the encoders from the problem
    encoders = wmt_problem.feature_encoders(data_dir)

    translate_model = registry.model(model_name)(hparams, Modes.EVAL)

    sys.stdout.write('> ')
    sys.stdout.flush()
    sentence_en = sys.stdin.readline().strip()
    while sentence_en:
        if sentence_en == 'q':
            print("Close this process")
            break
        outputs = translate(encoders, translate_model, ckpt_dir, sentence_en)
        print(outputs)
        print('> ', end='')
        sys.stdout.flush()
        sentence_en = sys.stdin.readline()
예제 #22
0
    def __init__(self,
                 reward_range,
                 observation_space,
                 action_space,
                 frame_stack_size,
                 initial_frame_chooser,
                 batch_size,
                 model_name,
                 model_hparams,
                 model_dir,
                 intrinsic_reward_scale=0.0):
        """Batch of environments inside the TensorFlow graph."""
        super(SimulatedBatchEnv, self).__init__(observation_space,
                                                action_space)

        self.batch_size = batch_size
        self._min_reward = reward_range[0]
        self._num_frames = frame_stack_size
        self._intrinsic_reward_scale = intrinsic_reward_scale

        model_hparams = copy.copy(model_hparams)
        problem = DummyWorldModelProblem(action_space, reward_range)
        trainer_lib.add_problem_hparams(model_hparams, problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(model_name)(model_hparams,
                                                 tf.estimator.ModeKeys.PREDICT)

        self.history_buffer = HistoryBuffer(initial_frame_chooser,
                                            self.observ_shape,
                                            self.observ_dtype,
                                            self._num_frames, self.batch_size)

        self._observ = tf.Variable(tf.zeros((batch_size, ) + self.observ_shape,
                                            self.observ_dtype),
                                   trainable=False)

        self._model_dir = model_dir
예제 #23
0
def initialize_model(problem_name,
                     data_dir,
                     hparam_set,
                     hparams,
                     model_name,
                     ckpt_dir,
                     split=Modes.TRAIN):
    """Returns an initialized model, dataset iterator and hparams."""
    tf.reset_default_graph()

    # create hparams and get glyphazzn problem definition
    hparams = trainer_lib.create_hparams(hparam_set,
                                         hparams,
                                         data_dir=data_dir,
                                         problem_name=problem_name)
    problem = registry.problem(problem_name)

    # get model definition
    ModelClass = registry.model(model_name)
    model = ModelClass(hparams,
                       mode=Modes.PREDICT,
                       problem_hparams=hparams.problem_hparams)

    # create dataset iterator from problem definition
    dataset = problem.dataset(Modes.PREDICT,
                              dataset_split=split,
                              data_dir=data_dir,
                              shuffle_files=False,
                              hparams=hparams).batch(1)
    iterator = tfe.Iterator(dataset)

    # finalize/initialize model
    # creates ops to be initialized
    output, extra_losses = model(iterator.next())
    model.initialize_from_ckpt(ckpt_dir)  # initializes ops

    return model, iterator, hparams
예제 #24
0
  def testMultipleTargetModalities(self):
    # HParams
    hparams = trainer_lib.create_hparams(
        "transformer_tiny", data_dir=self.data_dir, problem_name="tiny_algo")
    tm = hparams.problem.get_hparams().target_modality
    hparams.problem.get_hparams().target_modality = {
        "targets": tm,
        "A": tm,
        "B": tm
    }

    # Dataset
    problem = hparams.problem
    dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir)
    dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes)
    features = dataset.make_one_shot_iterator().get_next()
    features = problem_lib.standardize_shapes(features)
    features["A"] = features["B"] = features["targets"]

    # Model
    model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN)

    def body(args, mb=model.body):
      out = mb(args)
      return {"targets": out, "A": out, "B": out}

    model.body = body

    logits, losses = model(features)

    self.assertTrue("training" in losses)
    loss = losses["training"]

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run([logits, loss])
예제 #25
0
  def __init__(
      self, reward_range, observation_space, action_space, frame_stack_size,
      frame_height, frame_width, initial_frame_chooser, batch_size, model_name,
      model_hparams, model_dir, intrinsic_reward_scale=0.0
  ):
    """Batch of environments inside the TensorFlow graph."""
    super(SimulatedBatchEnv, self).__init__(observation_space, action_space)

    self.batch_size = batch_size
    self._min_reward = reward_range[0]
    self._num_frames = frame_stack_size
    self._intrinsic_reward_scale = intrinsic_reward_scale

    model_hparams = copy.copy(model_hparams)
    problem = DummyWorldModelProblem(action_space, reward_range,
                                     frame_height, frame_width)
    trainer_lib.add_problem_hparams(model_hparams, problem)
    model_hparams.force_full_predict = True
    self._model = registry.model(model_name)(
        model_hparams, tf.estimator.ModeKeys.PREDICT
    )

    self.history_buffer = HistoryBuffer(
        initial_frame_chooser, self.observ_shape, self.observ_dtype,
        self._num_frames, self.batch_size
    )

    self._observ = tf.Variable(
        tf.zeros((batch_size,) + self.observ_shape, self.observ_dtype),
        trainable=False
    )

    self._reset_model = tf.get_variable(
        "reset_model", [], trainable=False, initializer=tf.zeros_initializer())

    self._model_dir = model_dir
# -*- coding: utf-8 -*-
"""
@author: 代码医生工作室 
@公众号:xiangyuejiqiren   (内有更多优秀文章及学习资料)
@来源: <深度学习之TensorFlow工程化项目实战>配套代码 (700+页)
@配套代码技术支持:bbs.aianaconda.com      (有问必答)
"""

#6-19

import tensorflow as tf
from tensor2tensor import models

from tensor2tensor.utils import t2t_model
from tensor2tensor.utils import registry

print(len(registry.list_models()), registry.list_models())
print(registry.model('transformer'))
print(len(registry.list_hparams()), registry.list_hparams('transformer'))
print(registry.hparams('transformer_base_v1'))
    def __init__(self,
                 reward_range,
                 observation_space,
                 action_space,
                 frame_stack_size,
                 frame_height,
                 frame_width,
                 initial_frame_chooser,
                 batch_size,
                 model_name,
                 model_hparams,
                 model_dir,
                 intrinsic_reward_scale=0.0,
                 sim_video_dir=None):
        """Batch of environments inside the TensorFlow graph."""
        super(SimulatedBatchEnv, self).__init__(observation_space,
                                                action_space)

        self._ffmpeg_works = common_video.ffmpeg_works()
        self.batch_size = batch_size
        self._min_reward = reward_range[0]
        self._num_frames = frame_stack_size
        self._intrinsic_reward_scale = intrinsic_reward_scale
        self._episode_counter = tf.get_variable("episode_counter",
                                                initializer=tf.zeros(
                                                    (), dtype=tf.int32),
                                                trainable=False,
                                                dtype=tf.int32)
        if sim_video_dir:
            self._video_every_epochs = 100
            self._video_dir = sim_video_dir
            self._video_writer = None
            self._video_counter = 0
            tf.gfile.MakeDirs(self._video_dir)
            self._video_condition = tf.equal(
                self._episode_counter.read_value() % self._video_every_epochs,
                0)
        else:
            self._video_condition = tf.constant(False, dtype=tf.bool, shape=())

        model_hparams = copy.copy(model_hparams)
        problem = DummyWorldModelProblem(action_space, reward_range,
                                         frame_height, frame_width)
        trainer_lib.add_problem_hparams(model_hparams, problem)
        model_hparams.force_full_predict = True
        self._model = registry.model(model_name)(model_hparams,
                                                 tf.estimator.ModeKeys.PREDICT)

        self.history_buffer = HistoryBuffer(initial_frame_chooser,
                                            self.observ_shape,
                                            self.observ_dtype,
                                            self._num_frames, self.batch_size)

        self._observ = tf.Variable(tf.zeros((batch_size, ) + self.observ_shape,
                                            self.observ_dtype),
                                   trainable=False)

        self._reset_model = tf.get_variable("reset_model", [],
                                            trainable=False,
                                            initializer=tf.zeros_initializer())

        self._model_dir = model_dir
예제 #28
0
def get_policy(observations, hparams, action_space,
               distributional_size=1, epoch=-1):
  """Get a policy network.

  Args:
    observations: observations
    hparams: parameters
    action_space: action space
    distributional_size: optional number of buckets for distributional RL
    epoch: optional epoch number

  Returns:
    Tuple (action logits, value).
  """
  if not isinstance(action_space, gym.spaces.Discrete):
    raise ValueError("Expecting discrete action space.")

  obs_shape = common_layers.shape_list(observations)
  (frame_height, frame_width) = obs_shape[2:4]

  # TODO(afrozm): We have these dummy problems mainly for hparams, so cleanup
  # when possible and do this properly.
  if hparams.policy_problem_name == "dummy_policy_problem_ttt":
    tf.logging.info("Using DummyPolicyProblemTTT for the policy.")
    policy_problem = tic_tac_toe_env.DummyPolicyProblemTTT()
  else:
    tf.logging.info("Using DummyPolicyProblem for the policy.")
    policy_problem = DummyPolicyProblem(action_space, frame_height, frame_width)

  trainer_lib.add_problem_hparams(hparams, policy_problem)
  hparams.force_full_predict = True
  model = registry.model(hparams.policy_network)(
      hparams, tf.estimator.ModeKeys.TRAIN
  )
  try:
    num_target_frames = hparams.video_num_target_frames
  except AttributeError:
    num_target_frames = 1
  target_value_shape_suffix = [num_target_frames]
  if distributional_size > 1:
    target_value_shape_suffix = [num_target_frames, distributional_size]
  features = {
      "inputs": observations,
      "epoch": tf.constant(epoch + 1),
      "input_action": tf.zeros(obs_shape[:2] + [1], dtype=tf.int32),
      "input_reward": tf.zeros(obs_shape[:2] + [1], dtype=tf.int32),
      "targets": tf.zeros(obs_shape[:1] + [num_target_frames] + obs_shape[2:]),
      "target_action": tf.zeros(
          obs_shape[:1] + [num_target_frames, 1], dtype=tf.int32),
      "target_reward": tf.zeros(
          obs_shape[:1] + [num_target_frames, 1], dtype=tf.int32),
      "target_policy": tf.zeros(
          obs_shape[:1] + [num_target_frames] + [action_space.n]),
      "target_value": tf.zeros(
          obs_shape[:1] + target_value_shape_suffix)
  }
  model.distributional_value_size = max(distributional_size, 1)
  model.use_epochs = hparams.use_epochs
  with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
    t2t_model.create_dummy_vars()
    (targets, _) = model(features)
  target_values = targets["target_value"][:, 0]
  if distributional_size > 1:
    target_values = targets["target_value"][:, :]
  return (targets["target_policy"][:, 0, :], target_values)
    def _init_env(self):
        FLAGS.use_tpu = False
        tf.logging.set_verbosity(tf.logging.DEBUG)
        tf.logging.info("Import usr dir from %s", self._usr_dir)
        if self._usr_dir != None:
            usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
        tf.logging.info("Start to create hparams,for %s of %s", self._problem,
                        self._hparams_set)

        # 获取 模型参数
        self._hparams = create_hparams()
        # 获取 decode用的参数
        self._hparams_decode = create_decode_hparams(
            extra_length=self._extra_length,
            batch_size=self._batch_size,
            beam_size=self._beam_size,
            alpha=self._alpha,
            return_beams=self._return_beams,
            write_beam_scores=self._write_beam_scores,
            force_decode_length=self._force_decode_length)

        # self.estimator = trainer_lib.create_estimator(
        #     FLAGS.model,
        #     self._hparams,
        #     t2t_trainer.create_run_config(self._hparams),
        #     decode_hparams=self._hparams_decode,
        #     use_tpu=False)

        tf.logging.info("Finish intialize environment")

        #######

        ### make input placeholder
        self._inputs_ph = tf.placeholder(
            dtype=tf.int32)  # shape not specified,any shape

        x = tf.placeholder(dtype=tf.int32)
        x.set_shape([None, None])  # ? -> (?,?)
        x = tf.expand_dims(x, axis=[2])  # -> (?,?,1)
        x = tf.to_int32(x)
        self._inputs_ph = x

        #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1])
        batch_inputs = x
        ###

        # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1])

        #targets_ph = tf.placeholder(dtype=tf.int32)
        #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])
        self._features = {
            "inputs": batch_inputs,
            "problem_choice": 0,  # We run on the first problem here.
            "input_space_id": self._hparams.problem_hparams.input_space_id,
            "target_space_id": self._hparams.problem_hparams.target_space_id
        }
        ### 加入 decode length  变长的  分类时候没用
        self.input_extra_length_ph = tf.placeholder(dtype=tf.int32)
        self._features['decode_length'] = self.input_extra_length_ph
        ## target
        self._targets_ph = tf.placeholder(tf.int32,
                                          shape=(None, None, None, None),
                                          name='targets')
        self._features['targets'] = self._targets_ph
        target_pretend = np.zeros((1, 1, 1, 1))

        ## 去掉 整数的
        del self._features["problem_choice"]
        del self._features["input_space_id"]
        del self._features["target_space_id"]
        del self._features['decode_length']
        ####
        #mode = tf.estimator.ModeKeys.PREDICT # affect last_only  t2t_model._top_single  ,[1,?,1,512]->[1,1,1,1,64]
        # if self.predict_or_eval=='EVAL':
        #     mode = tf.estimator.ModeKeys.EVAL # affect last_only  t2t_model._top_single  ,[1,?,1,512]->[1,?,1,1,64]
        # # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams,
        # #                                         problem_names=[self._problem], decode_hparams=self._hparams_dc)
        # if self.predict_or_eval=='PREDICT':
        #     mode = tf.estimator.ModeKeys.PREDICT

        if self.predict_or_eval == 'and':
            mode = tf.estimator.ModeKeys.EVAL

        ###########
        # registry.model
        ############
        translate_model = registry.model(self._model_name)(
            hparams=self._hparams,
            decode_hparams=self._hparams_decode,
            mode=mode)

        self.predict_dict = {}

        ### get logit ,EVAL mode
        self.logits, _ = translate_model(self._features)
        ### get infer result ,PREDICT mode
        translate_model.set_mode(tf.estimator.ModeKeys.PREDICT)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            self.outputs_scores = translate_model.infer(
                features=self._features,
                decode_length=50,
                beam_size=self._beam_size,
                top_beams=self._beam_size,
                alpha=self._alpha)

        ######

        tf.logging.info("Start to init tf session")
        if self._isGpu:
            print('Using GPU in Decoder')
            gpu_options = tf.GPUOptions(
                per_process_gpu_memory_fraction=self._fraction)
            self._sess = tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False,
                                      gpu_options=gpu_options))
        else:
            print('Using CPU in Decoder')
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0)
            config = tf.ConfigProto(gpu_options=gpu_options)
            config.allow_soft_placement = True
            config.log_device_placement = False
            self._sess = tf.Session(config=config)
        with self._sess.as_default():
            ckpt = saver_mod.get_checkpoint_state(self._model_dir)
            saver = tf.train.Saver()
            tf.logging.info("Start to restore the parameters from %s",
                            ckpt.model_checkpoint_path)
            saver.restore(self._sess, ckpt.model_checkpoint_path)
        tf.logging.info("Finish intialize environment")
    def _init_env(self):
        FLAGS.use_tpu = False
        tf.logging.set_verbosity(tf.logging.DEBUG)
        tf.logging.info("Import usr dir from %s", self._usr_dir)
        if self._usr_dir != None:
            usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
        tf.logging.info("Start to create hparams,for %s of %s", self._problem,
                        self._hparams_set)

        self._hparams = create_hparams()
        self._hparams_decode = create_decode_hparams(
            extra_length=self._extra_length,
            batch_size=self._batch_size,
            beam_size=self._beam_size,
            alpha=self._alpha,
            return_beams=self._return_beams,
            write_beam_scores=self._write_beam_scores,
            force_decode_length=self._force_decode_length)

        self.estimator = trainer_lib.create_estimator(
            FLAGS.model,
            self._hparams,
            t2t_trainer.create_run_config(self._hparams),
            decode_hparams=self._hparams_decode,
            use_tpu=False)

        tf.logging.info("Finish intialize environment")

        #######

        ### make input placeholder
        #self._inputs_ph = tf.placeholder(dtype=tf.int32)  # shape not specified,any shape

        # x=tf.placeholder(dtype=tf.int32)
        # x.set_shape([None, None]) # ? -> (?,?)
        # x = tf.expand_dims(x, axis=[2])# -> (?,?,1)
        # x = tf.to_int32(x)
        # self._inputs_ph=x

        #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1])
        #batch_inputs=x
        ###

        # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1])

        #targets_ph = tf.placeholder(dtype=tf.int32)
        #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])

        self.inputs_ph = tf.placeholder(tf.int32,
                                        shape=(None, None, 1, 1),
                                        name='inputs')
        self.targets_ph = tf.placeholder(tf.int32,
                                         shape=(None, None, None, None),
                                         name='targets')
        self.input_extra_length_ph = tf.placeholder(dtype=tf.int32, shape=[])

        self._features = {
            "inputs": self.inputs_ph,
            "problem_choice": 0,  # We run on the first problem here.
            "input_space_id": self._hparams.problem_hparams.input_space_id,
            "target_space_id": self._hparams.problem_hparams.target_space_id
        }
        ### 加入 decode length  变长的
        self._features['decode_length'] = self.input_extra_length_ph
        ## target
        self._features['targets'] = self.targets_ph

        ## 去掉 整数的
        del self._features["problem_choice"]
        del self._features["input_space_id"]
        del self._features["target_space_id"]
        #del self._features['decode_length']
        ####

        mode = tf.estimator.ModeKeys.EVAL

        translate_model = registry.model(self._model_name)(
            hparams=self._hparams,
            decode_hparams=self._hparams_decode,
            mode=mode)

        self.predict_dict = {}

        ### get logit  ,attention mats
        self.logits, _ = translate_model(self._features)  #[? ? ? 1 vocabsz]
        #translate_model(features)
        from visualization import get_att_mats
        self.att_mats = get_att_mats(translate_model,
                                     self._model_name)  # enc, dec, encdec
        ### get infer
        translate_model.set_mode(tf.estimator.ModeKeys.PREDICT)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            self.outputs_scores = translate_model.infer(
                features=self._features,
                decode_length=self._extra_length,
                beam_size=self._beam_size,
                top_beams=self._beam_size,
                alpha=self._alpha)  #outputs 4,4,63

        ######
        tf.logging.info("Start to init tf session")
        if self._isGpu:
            print('Using GPU in Decoder')
            gpu_options = tf.GPUOptions(
                per_process_gpu_memory_fraction=self._fraction)
            self._sess = tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False,
                                      gpu_options=gpu_options))
        else:
            print('Using CPU in Decoder')
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0)
            config = tf.ConfigProto(gpu_options=gpu_options)
            config.allow_soft_placement = True
            config.log_device_placement = False
            self._sess = tf.Session(config=config)
        with self._sess.as_default():
            ckpt = saver_mod.get_checkpoint_state(self._model_dir)
            saver = tf.train.Saver()
            tf.logging.info("Start to restore the parameters from %s",
                            ckpt.model_checkpoint_path)
            saver.restore(self._sess, ckpt.model_checkpoint_path)
        tf.logging.info("Finish intialize environment")
예제 #31
0
    def benchmark(self, ckpt_dir, outer_steps=100, inner_steps=1000):
        """Run repeatedly on dummy data to benchmark inference."""
        # Turn off Grappler optimizations.
        options = {"disable_meta_optimizer": True}
        tf.config.optimizer.set_experimental_options(options)

        # Create the model outside the loop body.
        hparams = registry.hparams(self.hparams_set)
        hparams_lib.add_problem_hparams(hparams, self.problem_name)
        model_cls = registry.model(self.model_name)
        model = model_cls(hparams, tf.estimator.ModeKeys.EVAL)

        # Run only the model body (no data pipeline) on device.
        feature_shape = [
            hparams.batch_size, 3 * self.image_size * self.image_size
        ]
        features = {"targets": tf.zeros(feature_shape, dtype=tf.int32)}

        # Call the model once to initialize the variables. Note that
        # this should never execute.
        with tf.variable_scope(self.model_name) as vso:
            transformed_features = model.bottom(features)
            with tf.variable_scope("body") as vsi:
                body_out = model.body(transformed_features)
            logits = model.top(body_out, features)
            model.loss(logits, features)

        def call_model(features):
            with tf.variable_scope(vso, reuse=tf.AUTO_REUSE):
                transformed_features = model.bottom(features)
                with tf.variable_scope(vsi, reuse=tf.AUTO_REUSE):
                    body_out = model.body(transformed_features)
                logits = model.top(body_out, features)
                return model.loss(logits, features)

        # Run the function body in a loop to amortize session overhead.
        loop_index = tf.zeros([], dtype=tf.int32)
        initial_loss = (tf.zeros([]), tf.zeros([]))

        def loop_cond(idx, _):
            return tf.less(idx, tf.constant(inner_steps, dtype=tf.int32))

        def loop_body(idx, _):
            return idx + 1, call_model(features)

        benchmark_op = tf.while_loop(loop_cond,
                                     loop_body, [loop_index, initial_loss],
                                     parallel_iterations=1,
                                     back_prop=False)

        session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            allow_growth=False, per_process_gpu_memory_fraction=0.95))
        run_metadata = tf.RunMetadata()
        with tf.Session(config=session_config) as sess:
            self.restore_model(sess, ckpt_dir)
            tps = []
            for idx in range(outer_steps):
                start_time = time.time()
                sess.run(benchmark_op, run_metadata=run_metadata)
                elapsed_time = time.time() - start_time
                tps.append(inner_steps * hparams.batch_size * (64 * 64 * 3) /
                           elapsed_time)
                logging.error("Iterations %d processed %f TPS.", idx, tps[-1])
            # Skip the first iteration where all the setup and allocation happens.
            tps = np.asarray(tps[1:])
            logging.error("Mean/Std/Max/Min throughput = %f / %f / %f / %f",
                          np.mean(tps), np.std(tps), tps.max(), tps.min())
예제 #32
0
    def __init__(self,
                 src_vocab_size,
                 trg_vocab_size,
                 model_name,
                 problem_name,
                 hparams_set_name,
                 t2t_usr_dir,
                 checkpoint_dir,
                 t2t_unk_id=None,
                 single_cpu_thread=False,
                 max_terminal_id=-1,
                 pop_id=-1):
        """Creates a new simultaneous T2T predictor. The constructor prepares
        the TensorFlow session for predict_next() calls. This includes:
        - Load hyper parameters from the given set (hparams)
        - Update registry, load T2T model
        - Create TF placeholders for source sequence and target prefix
        - Create computation graph for computing log probs.
        - Create a MonitoredSession object, which also handles
          restoring checkpoints.

        Args:
            src_vocab_size (int): Source vocabulary size.
            trg_vocab_size (int): Target vocabulary size.
            model_name (string): T2T model name.
            problem_name (string): T2T problem name.
            hparams_set_name (string): T2T hparams set name.
            t2t_usr_dir (string): See --t2t_usr_dir in tensor2tensor.
            checkpoint_dir (string): Path to the T2T checkpoint
                                     directory. The predictor will load
                                     the top most checkpoint in the
                                     `checkpoints` file.
            t2t_unk_id (int): If set, use this ID to get UNK scores. If
                              None, UNK is always scored with -inf.
            single_cpu_thread (bool): If true, prevent tensorflow from
                                      doing multithreading.
            max_terminal_id (int): If positive, maximum terminal ID. Needs to
                be set for syntax-based T2T models.
            pop_id (int): If positive, ID of the POP or closing bracket symbol.
                Needs to be set for syntax-based T2T models.
        """
        super(SimT2TPredictor_v2, self).__init__(t2t_usr_dir, checkpoint_dir,
                                                 t2t_unk_id, single_cpu_thread)
        self.consumed = []
        self.src_sentence = []
        self.pop_id = pop_id
        self.max_terminal_id = max_terminal_id
        self.previous_encode = -1
        self.previous_decode = -1
        predictor_graph = tf.Graph()
        with predictor_graph.as_default() as g:
            hparams = self._create_hparams(src_vocab_size, trg_vocab_size,
                                           hparams_set_name, problem_name)
            p_hparams = hparams.problems[0]
            self._inputs_var = tf.placeholder(dtype=tf.int32,
                                              shape=[None],
                                              name="sgnmt_inputs")
            self._targets_var = tf.placeholder(dtype=tf.int32,
                                               shape=[None],
                                               name="sgnmt_targets")
            features = {
                "problem_choice": tf.constant(0),
                "input_space_id": tf.constant(p_hparams.input_space_id),
                "target_space_id": tf.constant(p_hparams.target_space_id),
                "inputs": expand_input_dims_for_t2t(self._inputs_var),
                "targets": expand_input_dims_for_t2t(self._targets_var)
            }

            model = registry.model(model_name)(
                hparams, tf.estimator.ModeKeys.PREDICT, hparams.problems[0], 0,
                devices.data_parallelism(),
                devices.ps_devices(all_workers=True))
            sharded_logits, _ = model.model_fn(features)
            self._log_probs = log_prob_from_logits(sharded_logits[0])
            self._encoder_output = model.encoder_output
            self._encoder_decoder_attention_bias = model.attention_bias
            self._decoder_output = model.decoder_output

            self.mon_sess = self.create_session()
예제 #33
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    trainer_lib.set_random_seed(FLAGS.random_seed)
    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

    # Create hparams
    hparams = create_hparams()
    hparams.force_full_predict = True
    hparams.scheduled_sampling_k = -1

    # Params
    num_agents = 1  # TODO(mbz): fix the code for more agents
    num_steps = FLAGS.num_steps
    num_actions = hparams.problem.num_actions
    frame_shape = hparams.problem.frame_shape
    resized_frame = hparams.preprocess_resize_frames is not None
    if resized_frame:
        frame_shape = hparams.preprocess_resize_frames
        frame_shape += [hparams.problem.num_channels]

    dataset = registry.problem(FLAGS.problem).dataset(
        tf.estimator.ModeKeys.TRAIN, shuffle_files=True, hparams=hparams)

    dataset = dataset.apply(
        tf.contrib.data.batch_and_drop_remainder(num_agents))
    data = dataset.make_one_shot_iterator().get_next()
    # Setup input placeholders
    input_size = [num_agents, hparams.video_num_input_frames]
    placeholders = {
        "inputs": tf.placeholder(tf.float32, input_size + frame_shape),
        "input_action": tf.placeholder(tf.int64, input_size + [1]),
        "input_reward": tf.placeholder(tf.int64, input_size + [1]),
    }
    # Creat model
    model_cls = registry.model(FLAGS.model)
    model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT)
    prediction_ops = model.infer(placeholders)

    states_q = Queue(maxsize=hparams.video_num_input_frames)
    actions_q = Queue(maxsize=hparams.video_num_input_frames)
    rewards_q = Queue(maxsize=hparams.video_num_input_frames)
    all_qs = (states_q, actions_q, rewards_q)

    writer = common_video.WholeVideoWriter(fps=10,
                                           output_path=FLAGS.output_gif)

    saver = tf.train.Saver()
    with tf.train.SingularMonitoredSession() as sess:
        # Load latest checkpoint
        ckpt = tf.train.get_checkpoint_state(
            FLAGS.output_dir).model_checkpoint_path
        saver.restore(sess.raw_session(), ckpt)

        # get init frames from the dataset
        data_np = sess.run(data)

        frames = np.split(data_np["inputs"], hparams.video_num_input_frames, 1)
        for frame in frames:
            frame = np.squeeze(frame, 1)
            states_q.put(frame)
            writer.write(frame[0].astype(np.uint8))

        actions = np.split(data_np["input_action"],
                           hparams.video_num_input_frames, 1)
        for action in actions:
            actions_q.put(np.squeeze(action, 1))

        rewards = np.split(data_np["input_reward"],
                           hparams.video_num_input_frames, 1)
        for reward in rewards:
            rewards_q.put(np.squeeze(reward, 1))

        for step in range(num_steps):
            print(">>>>>>> ", step)

            random_actions = np.random.randint(num_actions - 1)
            random_actions = np.expand_dims(random_actions, 0)
            random_actions = np.tile(random_actions, (num_agents, 1))

            # Shape inputs and targets
            inputs, input_action, input_reward = (np.stack(list(q.queue),
                                                           axis=1)
                                                  for q in all_qs)

            # Predict next frames
            feed = {
                placeholders["inputs"]: inputs,
                placeholders["input_action"]: input_action,
                placeholders["input_reward"]: input_reward,
            }
            predictions = sess.run(prediction_ops, feed_dict=feed)

            predicted_states = predictions["targets"][:, 0]
            predicted_reward = predictions["target_reward"][:, 0]

            # Update queues
            new_data = (predicted_states, random_actions, predicted_reward)
            for q, d in zip(all_qs, new_data):
                q.get()
                q.put(d.copy())

            writer.write(np.round(predicted_states[0]).astype(np.uint8))

        video = writer.finish()
        writer.save_to_disk(video)
예제 #34
0
x_y = iterator.get_next()
x = tf.cast(x_y['inputs'], tf.int32)
y = tf.cast(x_y['targets'], tf.int32)

inputs = {
    "inputs": tf.expand_dims(tf.expand_dims(x, 2), 3),
    "target_space_id": p_hparams.target_space_id,
    "targets": tf.expand_dims(tf.expand_dims(y, 2), 3)
}

# version 1.3.0 introduced changes in the API
from pkg_resources import get_distribution as get_version
t2t_version = int(''.join(get_version('tensor2tensor').version.split('.')[:2]))
is_t2t130_or_greater = t2t_version >= 13
if is_t2t130_or_greater:
    translate_model = registry.model(model_name)(hparams, mode)
    inputs['target_space_id'] = tf.convert_to_tensor(inputs['target_space_id'])
    logits, losses = translate_model(inputs)
    logits = tf.squeeze(logits, [2, 3])
else:
    from tensor2tensor.models.transformer import Transformer
    translate_model = Transformer(hparams, mode, p_hparams)
    translate_model._hparams.problems[0].target_modality = SymbolModality(
        hparams, encoders['targets'].vocab_size)
    sharded_logits, losses = translate_model.model_fn(features=inputs)
    logits = tf.concat(sharded_logits, 0)
    logits = tf.squeeze(logits, [2, 3])

with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    if args.training:
        preds = tf.to_int32(tf.arg_max(logits, dimension=-1))
예제 #35
0
 def testUnknownModel(self):
   with self.assertRaisesRegexp(LookupError, "never registered"):
     registry.model("not_registered")
예제 #36
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  trainer_lib.set_random_seed(FLAGS.random_seed)
  usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

  # Create hparams
  hparams = trainer_lib.create_hparams(
      FLAGS.hparams_set,
      FLAGS.hparams,
      data_dir=os.path.expanduser(FLAGS.data_dir),
      problem_name=FLAGS.problem)
  hparams.force_full_predict = True
  hparams.scheduled_sampling_k = -1

  # Params
  num_agents = 1  # TODO(mbz): fix the code for more agents
  num_steps = FLAGS.num_steps
  if hasattr(hparams.problem, "num_actions"):
    num_actions = hparams.problem.num_actions
  else:
    num_actions = None
  frame_shape = hparams.problem.frame_shape
  resized_frame = hparams.preprocess_resize_frames is not None
  if resized_frame:
    frame_shape = hparams.preprocess_resize_frames
    frame_shape += [hparams.problem.num_channels]

  dataset = registry.problem(FLAGS.problem).dataset(
      tf.estimator.ModeKeys.TRAIN,
      shuffle_files=True,
      data_dir=os.path.expanduser(FLAGS.data_dir),
      hparams=hparams)

  dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(num_agents))
  data = dataset.make_one_shot_iterator().get_next()
  # Setup input placeholders
  input_size = [num_agents, hparams.video_num_input_frames]
  if num_actions is None:
    placeholders = {
        "inputs": tf.placeholder(tf.float32, input_size + frame_shape)
    }
  else:
    placeholders = {
        "inputs": tf.placeholder(tf.float32, input_size + frame_shape),
        "input_action": tf.placeholder(tf.int64, input_size + [1]),
        "input_reward": tf.placeholder(tf.int64, input_size + [1]),
        "reset_internal_states": tf.placeholder(tf.float32, []),
    }
  # Create model.
  model_cls = registry.model(FLAGS.model)
  model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT)
  prediction_ops = model.infer(placeholders)

  states_q = Queue(maxsize=hparams.video_num_input_frames)
  actions_q = Queue(maxsize=hparams.video_num_input_frames)
  rewards_q = Queue(maxsize=hparams.video_num_input_frames)
  if num_actions is not None:
    all_qs = [states_q, actions_q, rewards_q]
  else:
    all_qs = [states_q]

  writer = common_video.WholeVideoWriter(
      fps=FLAGS.fps, output_path=FLAGS.output_gif)

  saver = tf.train.Saver(tf.trainable_variables())
  with tf.train.SingularMonitoredSession() as sess:
    # Load latest checkpoint
    ckpt = tf.train.get_checkpoint_state(FLAGS.output_dir).model_checkpoint_path
    saver.restore(sess.raw_session(), ckpt)

    # get init frames from the dataset
    data_np = sess.run(data)

    frames = np.split(data_np["inputs"], hparams.video_num_input_frames, 1)
    for frame in frames:
      frame = np.squeeze(frame, 1)
      states_q.put(frame)
      writer.write(frame[0].astype(np.uint8))

    if num_actions is not None:
      actions = np.split(data_np["input_action"],
                         hparams.video_num_input_frames, 1)
      for action in actions:
        actions_q.put(np.squeeze(action, 1))

      rewards = np.split(data_np["input_reward"],
                         hparams.video_num_input_frames, 1)
      for reward in rewards:
        rewards_q.put(np.squeeze(reward, 1))

    for step in range(num_steps):
      print(">>>>>>> ", step)

      if num_actions is not None:
        random_actions = np.random.randint(num_actions-1)
        random_actions = np.expand_dims(random_actions, 0)
        random_actions = np.tile(random_actions, (num_agents, 1))

        # Shape inputs and targets
        inputs, input_action, input_reward = (
            np.stack(list(q.queue), axis=1) for q in all_qs)
      else:
        assert len(all_qs) == 1
        q = all_qs[0]
        elems = list(q.queue)
        # Need to adjust shapes sometimes.
        for i, e in enumerate(elems):
          if len(e.shape) < 4:
            elems[i] = np.expand_dims(e, axis=0)
        inputs = np.stack(elems, axis=1)

      # Predict next frames
      if num_actions is None:
        feed = {placeholders["inputs"]: inputs}
      else:
        feed = {
            placeholders["inputs"]: inputs,
            placeholders["input_action"]: input_action,
            placeholders["input_reward"]: input_reward,
            placeholders["reset_internal_states"]: float(step == 0),
        }
      predictions = sess.run(prediction_ops, feed_dict=feed)

      if num_actions is None:
        predicted_states = predictions[:, 0]
      else:
        predicted_states = predictions["targets"][:, 0]
        predicted_reward = predictions["target_reward"][:, 0]

      # Update queues
      if num_actions is None:
        new_data = (predicted_states)
      else:
        new_data = (predicted_states, random_actions, predicted_reward)
      for q, d in zip(all_qs, new_data):
        q.get()
        q.put(d.copy())

      writer.write(np.round(predicted_states[0]).astype(np.uint8))

    writer.finish_to_disk()
예제 #37
0
def t2t_score_file(filename):
  """
  Score each line in a file and return the scores.

  :param str filename: T2T checkpoint
  """
  # Prepare model.
  hparams = create_t2t_hparams()
  encoders = registry.problem(FLAGS_problem).feature_encoders(FLAGS_data_dir)

  # Prepare features for feeding into the model.
  inputs_ph = tf.placeholder(dtype=tf.int32, shape=(None, None))  # Just length dimension.
  targets_ph = tf.placeholder(dtype=tf.int32, shape=(None, None))  # Just length dimension.

  features = {
      "inputs": inputs_ph,
      "targets": targets_ph,
  }

  # Prepare the model and the graph when model runs on features.
  model = registry.model(FLAGS_model)(hparams, tf.estimator.ModeKeys.EVAL)
  assert isinstance(model, tensor2tensor.models.transformer.Transformer)
  #       final_output: tensor of logits with shape [batch_size, O, P, body_output_size.
  #       losses: either single loss as a scalar, a list, a tensor (to be averaged)
  #               or a dictionary of losses.
  final_output, losses = model(features)
  assert isinstance(losses, dict)
  saver = tf.train.Saver()

  sess = tf.Session()
  # Load weights from checkpoint.
  ckpts = tf.train.get_checkpoint_state(FLAGS_output_dir)
  ckpt = ckpts.model_checkpoint_path
  saver.restore(sess, ckpt)

  # writer = tf.summary.FileWriter('logs', sess.graph)

  # writer.close()


  # Run on each line.
  results = []
  for line in open(filename):
    tab_split = line.split("\t")
    if len(tab_split) > 2:
      raise ValueError("Each line must have at most one tab separator.")
    assert len(tab_split) == 2
    targets = tab_split[1].strip()
    inputs = tab_split[0].strip()
    # Run encoders and append EOS symbol.
    targets_numpy = encoders["targets"].encode(targets) + [text_encoder.EOS_ID]
    inputs_numpy = encoders["inputs"].encode(inputs) + [text_encoder.EOS_ID]
    # Prepare the feed.
    feed = {
        inputs_ph: [inputs_numpy],
        targets_ph: [targets_numpy]
    }

    np_res = sess.run({"losses": losses, "final_output": final_output}, feed_dict=feed)
    pprint(np_res)

    tvars = tf.trainable_variables()

    print('t2t inputs_ph:', inputs_ph, inputs_numpy)
    print('t2t targets_ph:', targets_ph, targets_numpy)

    return sess, tvars, inputs_ph, targets_ph, losses
예제 #38
0
파일: tf_t2t.py 프로젝트: ucam-smt/sgnmt
 def __init__(self,
              src_vocab_size,
              trg_vocab_size,
              model_name,
              problem_name,
              hparams_set_name,
              t2t_usr_dir,
              checkpoint_dir,
              t2t_unk_id=None,
              n_cpu_threads=-1,
              max_terminal_id=-1,
              pop_id=-1):
     """Creates a new T2T predictor. The constructor prepares the
     TensorFlow session for predict_next() calls. This includes:
     - Load hyper parameters from the given set (hparams)
     - Update registry, load T2T model
     - Create TF placeholders for source sequence and target prefix
     - Create computation graph for computing log probs.
     - Create a MonitoredSession object, which also handles 
       restoring checkpoints.
     
     Args:
         src_vocab_size (int): Source vocabulary size.
         trg_vocab_size (int): Target vocabulary size.
         model_name (string): T2T model name.
         problem_name (string): T2T problem name.
         hparams_set_name (string): T2T hparams set name.
         t2t_usr_dir (string): See --t2t_usr_dir in tensor2tensor.
         checkpoint_dir (string): Path to the T2T checkpoint 
                                  directory. The predictor will load
                                  the top most checkpoint in the 
                                  `checkpoints` file.
         t2t_unk_id (int): If set, use this ID to get UNK scores. If
                           None, UNK is always scored with -inf.
         n_cpu_threads (int): Number of TensorFlow CPU threads.
         max_terminal_id (int): If positive, maximum terminal ID. Needs to
             be set for syntax-based T2T models.
         pop_id (int): If positive, ID of the POP or closing bracket symbol.
             Needs to be set for syntax-based T2T models.
     """
     super(T2TPredictor, self).__init__(t2t_usr_dir, 
                                        checkpoint_dir, 
                                        src_vocab_size,
                                        trg_vocab_size,
                                        t2t_unk_id, 
                                        n_cpu_threads,
                                        max_terminal_id,
                                        pop_id)
     if not model_name or not problem_name or not hparams_set_name:
         logging.fatal(
             "Please specify t2t_model, t2t_problem, and t2t_hparams_set!")
         raise AttributeError
     self.consumed = []
     self.src_sentence = []
     predictor_graph = tf.Graph()
     with predictor_graph.as_default() as g:
         hparams = trainer_lib.create_hparams(hparams_set_name)
         self._add_problem_hparams(hparams, problem_name)
         translate_model = registry.model(model_name)(
             hparams, tf.estimator.ModeKeys.PREDICT)
         self._inputs_var = tf.placeholder(dtype=tf.int32, shape=[None],
                                           name="sgnmt_inputs")
         self._targets_var = tf.placeholder(dtype=tf.int32, shape=[None], 
                                            name="sgnmt_targets")
         features = {"inputs": expand_input_dims_for_t2t(self._inputs_var), 
                     "targets": expand_input_dims_for_t2t(self._targets_var)}
         translate_model.prepare_features_for_infer(features)
         translate_model._fill_problem_hparams_features(features)
         logits, _ = translate_model(features)
         logits = tf.squeeze(logits, [0, 1, 2, 3])
         self._log_probs = log_prob_from_logits(logits)
         self.mon_sess = self.create_session()
예제 #39
0
 def nth_model(n):
     """Build the model for the n-th problem, plus some added variables."""
     model_class = registry.model(model)(
         hparams, mode, hparams.problems[n], n, dp,
         devices.ps_devices(all_workers=True))
     if mode == tf.estimator.ModeKeys.PREDICT:
         return model_class.infer(
             features,
             beam_size=decode_hp.beam_size,
             top_beams=(decode_hp.beam_size
                        if decode_hp.return_beams else 1),
             last_position_only=decode_hp.use_last_position_only,
             alpha=decode_hp.alpha,
             decode_length=decode_hp.extra_length)
     # In distributed mode, we build graph for problem=0 and problem=worker_id.
     skipping_is_on = hparams.problem_choice == "distributed" and is_training
     problem_worker_id = worker_id % len(hparams.problems)
     skip_this_one = n != 0 and n % worker_replicas != problem_worker_id
     # On worker 0 also build graph for problems <= 1.
     # TODO(lukaszkaiser): why is this hack needed for variables init? Repair.
     skip_this_one = skip_this_one and (worker_id != 0 or n > 1)
     if eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL:
         sharded_logits, losses_dict = model_class.eval_autoregressive(
             features)
     else:
         sharded_logits, losses_dict = model_class.model_fn(
             features, skip=(skipping_is_on and skip_this_one))
     with tf.variable_scope("losses_avg"):
         total_loss, ops = 0.0, []
         for loss_key, loss_value in six.iteritems(losses_dict):
             loss_name = "problem_%d/%s_loss" % (n, loss_key)
             loss_moving_avg = tf.get_variable(loss_name,
                                               initializer=100.0,
                                               trainable=False)
             loss_variable_names.append(loss_name)
             ops.append(
                 loss_moving_avg.assign(loss_moving_avg * 0.9 +
                                        loss_value * 0.1))
             total_loss += loss_value
         try:  # Total loss avg might be reused or not, we try both.
             with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                 # Total loss was already constructed on input.
                 loss_moving_avg = tf.get_variable("problem_%d/total_loss" %
                                                   n)
         except ValueError:
             loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n,
                                               initializer=100.0,
                                               trainable=False)
         ops.append(
             loss_moving_avg.assign(loss_moving_avg * 0.9 +
                                    total_loss * 0.1))
     with tf.variable_scope("train_stats"):  # Count steps for this problem.
         problem_steps = tf.get_variable("problem_%d_steps" % n,
                                         initializer=0,
                                         trainable=False)
         ops.append(problem_steps.assign_add(1))
     with tf.control_dependencies(ops):  # Make sure the ops run.
         # Ensure the loss is a scalar here.
         total_loss = tf.reshape(total_loss, [],
                                 name="total_loss_control_id")
     return [total_loss, tf.concat(sharded_logits, 0)]
예제 #40
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    trainer_lib.set_random_seed(FLAGS.random_seed)
    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

    # Create hparams
    hparams = trainer_lib.create_hparams(FLAGS.hparams_set,
                                         FLAGS.hparams,
                                         data_dir=os.path.expanduser(
                                             FLAGS.data_dir),
                                         problem_name=FLAGS.problem)
    hparams.force_full_predict = True
    hparams.scheduled_sampling_k = -1

    # Params
    num_agents = 1  # TODO(mbz): fix the code for more agents
    num_steps = FLAGS.num_steps
    if hasattr(hparams.problem, "num_actions"):
        num_actions = hparams.problem.num_actions
    else:
        num_actions = None
    frame_shape = hparams.problem.frame_shape
    resized_frame = hparams.preprocess_resize_frames is not None
    if resized_frame:
        frame_shape = hparams.preprocess_resize_frames
        frame_shape += [hparams.problem.num_channels]

    dataset = registry.problem(FLAGS.problem).dataset(
        tf.estimator.ModeKeys.TRAIN,
        shuffle_files=True,
        data_dir=os.path.expanduser(FLAGS.data_dir),
        hparams=hparams)

    dataset = dataset.batch(num_agents, drop_remainder=True)
    data = dataset.make_one_shot_iterator().get_next()
    # Setup input placeholders
    input_size = [num_agents, hparams.video_num_input_frames]
    if num_actions is None:
        placeholders = {
            "inputs": tf.placeholder(tf.float32, input_size + frame_shape)
        }
    else:
        placeholders = {
            "inputs": tf.placeholder(tf.float32, input_size + frame_shape),
            "input_action": tf.placeholder(tf.int64, input_size + [1]),
            "input_reward": tf.placeholder(tf.int64, input_size + [1]),
            "reset_internal_states": tf.placeholder(tf.float32, []),
        }
    # Create model.
    model_cls = registry.model(FLAGS.model)
    model = model_cls(hparams, tf.estimator.ModeKeys.PREDICT)
    prediction_ops = model.infer(placeholders)

    states_q = Queue(maxsize=hparams.video_num_input_frames)
    actions_q = Queue(maxsize=hparams.video_num_input_frames)
    rewards_q = Queue(maxsize=hparams.video_num_input_frames)
    if num_actions is not None:
        all_qs = [states_q, actions_q, rewards_q]
    else:
        all_qs = [states_q]

    writer = common_video.WholeVideoWriter(fps=FLAGS.fps,
                                           output_path=FLAGS.output_gif)

    saver = tf.train.Saver(tf.trainable_variables())
    with tf.train.SingularMonitoredSession() as sess:
        # Load latest checkpoint
        ckpt = tf.train.get_checkpoint_state(
            FLAGS.output_dir).model_checkpoint_path
        saver.restore(sess.raw_session(), ckpt)

        # get init frames from the dataset
        data_np = sess.run(data)

        frames = np.split(data_np["inputs"], hparams.video_num_input_frames, 1)
        for frame in frames:
            frame = np.squeeze(frame, 1)
            states_q.put(frame)
            writer.write(frame[0].astype(np.uint8))

        if num_actions is not None:
            actions = np.split(data_np["input_action"],
                               hparams.video_num_input_frames, 1)
            for action in actions:
                actions_q.put(np.squeeze(action, 1))

            rewards = np.split(data_np["input_reward"],
                               hparams.video_num_input_frames, 1)
            for reward in rewards:
                rewards_q.put(np.squeeze(reward, 1))

        for step in range(num_steps):
            print(">>>>>>> ", step)

            if num_actions is not None:
                random_actions = np.random.randint(num_actions - 1)
                random_actions = np.expand_dims(random_actions, 0)
                random_actions = np.tile(random_actions, (num_agents, 1))

                # Shape inputs and targets
                inputs, input_action, input_reward = (np.stack(list(q.queue),
                                                               axis=1)
                                                      for q in all_qs)
            else:
                assert len(all_qs) == 1
                q = all_qs[0]
                elems = list(q.queue)
                # Need to adjust shapes sometimes.
                for i, e in enumerate(elems):
                    if len(e.shape) < 4:
                        elems[i] = np.expand_dims(e, axis=0)
                inputs = np.stack(elems, axis=1)

            # Predict next frames
            if num_actions is None:
                feed = {placeholders["inputs"]: inputs}
            else:
                feed = {
                    placeholders["inputs"]: inputs,
                    placeholders["input_action"]: input_action,
                    placeholders["input_reward"]: input_reward,
                    placeholders["reset_internal_states"]: float(step == 0),
                }
            predictions = sess.run(prediction_ops, feed_dict=feed)

            if num_actions is None:
                predicted_states = predictions[:, 0]
            else:
                predicted_states = predictions["targets"][:, 0]
                predicted_reward = predictions["target_reward"][:, 0]

            # Update queues
            if num_actions is None:
                new_data = (predicted_states)
            else:
                new_data = (predicted_states, random_actions, predicted_reward)
            for q, d in zip(all_qs, new_data):
                q.get()
                q.put(d.copy())

            writer.write(np.round(predicted_states[0]).astype(np.uint8))

        writer.finish_to_disk()
    def __init__(self,
                 t2t_usr_dir,
                 src_vocab_size,
                 trg_vocab_size,
                 model_name,
                 problem_name,
                 hparams_set_name,
                 checkpoint_dir,
                 t2t_unk_id=None,
                 single_cpu_thread=False):
        """Creates a new T2T predictor. The constructor prepares the
        TensorFlow session for predict_next() calls. This includes:
        - Load hyper parameters from the given set (hparams)
        - Update registry, load T2T model
        - Create TF placeholders for source sequence and target pefix
        - Create computation graph for computing log probs.
        - Create a MonitoredSession object, which also handles 
          restoring checkpoints.
        
        Args:
            t2t_usr_dir (string): See --t2t_usr_dir in tensor2tensor.
            src_vocab_size (int): Source vocabulary size.
            trg_vocab_size (int): Target vocabulary size.
            model_name (string): T2T model name.
            problem_name (string): T2T problem name.
            hparams_set_name (string): T2T hparams set name.
            checkpoint_dir (string): Path to the T2T checkpoint 
                                     directory. The predictor will load
                                     the top most checkpoint in the 
                                     `checkpoints` file.
            t2t_unk_id (int): If set, use this ID to get UNK scores. If
                              None, UNK is always scored with -inf.
            single_cpu_thread (bool): If true, prevent tensorflow from
                                      doing multithreading.
        """
        super(T2TPredictor, self).__init__(t2t_usr_dir, checkpoint_dir,
                                           t2t_unk_id, single_cpu_thread)
        self.consumed = []
        self.src_sentence = []
        predictor_graph = tf.Graph()
        with predictor_graph.as_default() as g:
            hparams = self._create_hparams(src_vocab_size, trg_vocab_size,
                                           hparams_set_name, problem_name)
            p_hparams = hparams.problems[0]
            self._inputs_var = tf.placeholder(dtype=tf.int32,
                                              shape=[None],
                                              name="sgnmt_inputs")
            self._targets_var = tf.placeholder(dtype=tf.int32,
                                               shape=[None],
                                               name="sgnmt_targets")

            def expand_input_dims_for_t2t(t):
                t = tf.expand_dims(t, 0)  # Because of batch_size
                t = tf.expand_dims(t, -1)  # Because of modality
                t = tf.expand_dims(t, -1)  # Because of random reason X
                return t

            features = {
                "problem_choice": tf.constant(0),
                "input_space_id": tf.constant(p_hparams.input_space_id),
                "target_space_id": tf.constant(p_hparams.target_space_id),
                "inputs": expand_input_dims_for_t2t(self._inputs_var),
                "targets": expand_input_dims_for_t2t(self._targets_var)
            }

            model = registry.model(model_name)(
                hparams, tf.estimator.ModeKeys.PREDICT, hparams.problems[0], 0,
                devices.data_parallelism(),
                devices.ps_devices(all_workers=True))
            sharded_logits, _ = model.model_fn(features,
                                               last_position_only=True)
            self._log_probs = log_prob_from_logits(sharded_logits[0])
            self.mon_sess = self.create_session()
예제 #42
0
파일: tf_t2t.py 프로젝트: ucam-smt/sgnmt
 def __init__(self,
              src_vocab_size,
              trg_vocab_size,
              model_name,
              problem_name,
              hparams_set_name,
              t2t_usr_dir,
              checkpoint_dir,
              t2t_unk_id=None,
              n_cpu_threads=-1,
              max_terminal_id=-1,
              pop_id=-1):
     """Creates a new document-level T2T predictor. See
     T2TPredictor.__init__().
     
     Args:
         src_vocab_size (int): Source vocabulary size.
         trg_vocab_size (int): Target vocabulary size.
         model_name (string): T2T model name.
         problem_name (string): T2T problem name.
         hparams_set_name (string): T2T hparams set name.
         t2t_usr_dir (string): See --t2t_usr_dir in tensor2tensor.
         checkpoint_dir (string): Path to the T2T checkpoint 
                                  directory. The predictor will load
                                  the top most checkpoint in the 
                                  `checkpoints` file.
         t2t_unk_id (int): If set, use this ID to get UNK scores. If
                           None, UNK is always scored with -inf.
         n_cpu_threads (int): Number of TensorFlow CPU threads.
         max_terminal_id (int): If positive, maximum terminal ID. Needs to
             be set for syntax-based T2T models.
         pop_id (int): If positive, ID of the POP or closing bracket symbol.
             Needs to be set for syntax-based T2T models.
     """
     super(SegT2TPredictor, self).__init__(t2t_usr_dir, 
                                          checkpoint_dir, 
                                          src_vocab_size,
                                          trg_vocab_size,
                                          t2t_unk_id, 
                                          n_cpu_threads,
                                          max_terminal_id,
                                          pop_id)
     if not model_name or not problem_name or not hparams_set_name:
         logging.fatal(
             "Please specify t2t_model, t2t_problem, and t2t_hparams_set!")
         raise AttributeError
     self.begin_margin = 4
     self.end_margin = 1
     self.max_sentences = self.begin_margin + self.end_margin
     self.max_sentences = 10000
     predictor_graph = tf.Graph()
     with predictor_graph.as_default() as g:
         hparams = trainer_lib.create_hparams(hparams_set_name)
         self._add_problem_hparams(hparams, problem_name)
         translate_model = registry.model(model_name)(
             hparams, tf.estimator.ModeKeys.PREDICT)
         self._inputs_var = tf.placeholder(dtype=tf.int32, shape=[None],
                                           name="sgnmt_inputs")
         self._targets_var = tf.placeholder(dtype=tf.int32, shape=[None], 
                                            name="sgnmt_targets")
         self._inputs_seg_var = tf.placeholder(dtype=tf.int32, shape=[None],
                                               name="sgnmt_inputs_seg")
         self._targets_seg_var = tf.placeholder(dtype=tf.int32, shape=[None], 
                                                name="sgnmt_targets_seg")
         self._inputs_pos_var = tf.placeholder(dtype=tf.int32, shape=[None],
                                               name="sgnmt_inputs_pos")
         self._targets_pos_var = tf.placeholder(dtype=tf.int32, shape=[None],
                                                name="sgnmt_targets_pos")
         features = {
             "inputs": expand_input_dims_for_t2t(self._inputs_var), 
             "targets": expand_input_dims_for_t2t(self._targets_var),
             "inputs_seg": tf.expand_dims(self._inputs_seg_var, 0),
             "targets_seg": tf.expand_dims(self._targets_seg_var, 0),
             "inputs_pos": tf.expand_dims(self._inputs_pos_var, 0), 
             "targets_pos": tf.expand_dims(self._targets_pos_var, 0)
         }
         translate_model.prepare_features_for_infer(features)
         translate_model._fill_problem_hparams_features(features)
         logits, _ = translate_model(features)
         logits = tf.squeeze(logits, [0, 1, 2, 3])
         self._log_probs = log_prob_from_logits(logits)
         self.mon_sess = self.create_session()
예제 #43
0
 def testUnknownModel(self):
     with self.assertRaisesRegexp(LookupError, "never registered"):
         registry.model("not_registered")
예제 #44
0
    def _init_env(self):
        FLAGS.use_tpu = False
        tf.logging.set_verbosity(tf.logging.DEBUG)
        tf.logging.info("Import usr dir from %s", self._usr_dir)
        if self._usr_dir != None:
            usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
        tf.logging.info("Start to create hparams,for %s of %s", self._problem,
                        self._hparams_set)

        self._hparams = create_hparams()
        self._hparams_decode = create_decode_hparams(
            extra_length=self._extra_length,
            batch_size=self._batch_size,
            beam_size=self._beam_size,
            alpha=self._alpha,
            return_beams=self._return_beams,
            write_beam_scores=self._write_beam_scores,
            force_decode_length=self._force_decode_length)

        # self.estimator_spec = t2t_model.T2TModel.make_estimator_model_fn(
        #     self._model_name, self._hparams, decode_hparams=self._hparams_decode, use_tpu=False)

        self.estimator = trainer_lib.create_estimator(
            FLAGS.model,
            self._hparams,
            t2t_trainer.create_run_config(self._hparams),
            decode_hparams=self._hparams_decode,
            use_tpu=False)

        tf.logging.info("Finish intialize environment")

        #######

        ### make input placeholder
        self._inputs_ph = tf.placeholder(
            dtype=tf.int32)  # shape not specified,any shape

        x = tf.placeholder(dtype=tf.int32)
        x.set_shape([None, None])  # ? -> (?,?)
        x = tf.expand_dims(x, axis=[2])  # -> (?,?,1)
        x = tf.to_int32(x)
        self._inputs_ph = x

        #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1])
        batch_inputs = x
        ###

        # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1])

        #targets_ph = tf.placeholder(dtype=tf.int32)
        #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])

        #self.inputs_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='inputs')
        #self.targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets')
        self.inputs_ph = tf.placeholder(tf.int32,
                                        shape=(None, None, 1, 1),
                                        name='inputs')
        self.targets_ph = tf.placeholder(tf.int32,
                                         shape=(None, None, 1, 1),
                                         name='targets')
        self.targets_ph_2 = tf.placeholder(tf.int32,
                                           shape=(None, None, 1, 1),
                                           name='targets')

        self._features = {
            "inputs": self.inputs_ph,
            "problem_choice": 0,  # We run on the first problem here.
            "input_space_id": self._hparams.problem_hparams.input_space_id,
            "target_space_id": self._hparams.problem_hparams.target_space_id
        }
        ### 加入 decode length  变长的
        self.input_extra_length_ph = tf.placeholder(dtype=tf.int32)
        self._features['decode_length'] = self.input_extra_length_ph
        ## target
        #self._targets_ph= tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets')
        self._features['targets'] = self.targets_ph
        self._features['targets2'] = self.targets_ph_2
        target_pretend = np.zeros((1, 1, 1, 1))

        ## 去掉 整数的
        del self._features["problem_choice"]
        del self._features["input_space_id"]
        del self._features["target_space_id"]
        del self._features['decode_length']
        ####
        #mode = tf.estimator.ModeKeys.PREDICT # affect last_only  t2t_model._top_single  ,[1,?,1,512]->[1,1,1,1,64]
        # if self.predict_or_eval=='EVAL':
        #     mode = tf.estimator.ModeKeys.EVAL # affect last_only  t2t_model._top_single  ,[1,?,1,512]->[1,?,1,1,64]
        # # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams,
        # #                                         problem_names=[self._problem], decode_hparams=self._hparams_dc)
        # if self.predict_or_eval=='PREDICT':
        #     mode = tf.estimator.ModeKeys.PREDICT

        if self.predict_or_eval == 'and':
            mode = tf.estimator.ModeKeys.TRAIN

        ###########
        # registry.model
        ############
        translate_model = registry.model(self._model_name)(
            hparams=self._hparams,
            decode_hparams=self._hparams_decode,
            mode=mode)

        self.predict_dict = {}
        # if self.predict_or_eval == 'EVAL':
        #     self.logits,_=translate_model(self._features)
        #     self.predict_dict['scores']=self.logits
        #
        # if self.predict_or_eval == 'PREDICT':
        #
        #     self.predict_dict=translate_model.infer(features=self._features,
        #                             decode_length=50,
        #                             beam_size=1,
        #                             top_beams=1)
        #     print ''
        if self.predict_or_eval == 'and':
            ### get logit EVAL mode
            #self._features['targets'] = [[self._targets_ph]] # function body()
            self.logits, self.ret2 = translate_model(self._features)

        ##################
        ##  model_fn fetch logits FAIL : key not found
        #############
        # logits,_=translate_model.model_fn(self._features)

        # self._beam_result = model_i._fast_decode(self._features, decode_length=5, beam_size=10, top_beams=10,
        #                                          alpha=0.6) #fail
        # self._beam_result = model_i._beam_decode(self._features,
        #                                          decode_length=5,
        #                                          beam_size=self._beam_size,
        #                                          top_beams=self._beam_size,
        #                                          alpha=0.6)

        ##########

        # logits,_=model_i.model_fn(self._features)
        # assert len(logits.shape) == 5
        # logits = tf.squeeze(logits, [2, 3])
        # # Compute the log probabilities
        # from tensor2tensor.layers import common_layers
        # self.log_probs = common_layers.log_prob_from_logits(logits)

        ######

        #self._predictions = self._predictions_dict["outputs"]
        # self._scores=predictions_dict['scores'] not return when greedy search
        tf.logging.info("Start to init tf session")
        if self._isGpu:
            print('Using GPU in Decoder')
            gpu_options = tf.GPUOptions(
                per_process_gpu_memory_fraction=self._fraction)
            self._sess = tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False,
                                      gpu_options=gpu_options))
        else:
            print('Using CPU in Decoder')
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0)
            config = tf.ConfigProto(gpu_options=gpu_options)
            config.allow_soft_placement = True
            config.log_device_placement = False
            self._sess = tf.Session(config=config)
        with self._sess.as_default():
            #ckpt = saver_mod.get_checkpoint_state(self._model_dir)

            self._sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            #train_handle = sess.run(train_iterator.string_handle())
            #dev_handle = sess.run(dev_iterator.string_handle())
            if os.path.exists(os.path.join(self._model_dir, "checkpoint")):
                saver.restore(self._sess,
                              tf.train.latest_checkpoint(self._model_dir))
            global_step = max(sess.run(translate_model.global_step), 1)

            for _ in tqdm(range(global_step, 1000 + 1)):
                global_step = sess.run(translate_model.global_step) + 1
                loss, train_op = sess.run([self.ret2, model.train_op],
                                          feed_dict={
                                              handle: train_handle,
                                              model.dropout: config.dropout
                                          })
                if global_step % config.period == 0:
                    loss_sum = tf.Summary(value=[
                        tf.Summary.Value(tag="model/loss", simple_value=loss),
                    ])
                    writer.add_summary(loss_sum, global_step)
                if global_step % config.checkpoint == 0:
                    _, summ = evaluate_batch(model, config.val_num_batches,
                                             train_eval_file, sess, "train",
                                             handle, train_handle)
                    for s in summ:
                        writer.add_summary(s, global_step)

                    metrics, summ = evaluate_batch(
                        model, dev_total // config.batch_size + 1,
                        dev_eval_file, sess, "dev", handle, dev_handle)

                    dev_f1 = metrics["f1"]
                    dev_em = metrics["exact_match"]
                    if dev_f1 < best_f1 and dev_em < best_em:
                        patience += 1
                        if patience > config.early_stop:
                            break
                    else:
                        patience = 0
                        best_em = max(best_em, dev_em)
                        best_f1 = max(best_f1, dev_f1)

                    for s in summ:
                        writer.add_summary(s, global_step)
                    writer.flush()
                    filename = os.path.join(
                        config.save_dir, "model_{}.ckpt".format(global_step))
                    saver.save(sess, filename)
예제 #45
0
    def model_fn(features, labels, mode, params, config):
        """Model fn."""
        del params
        hparams = copy.deepcopy(hp)
        problem_hp = hparams.problems[0]
        orig_features = features

        # Instantiate model and retrieve modalities. Note that autoregressive models
        # have no input modality.
        model_class = registry.model(model)(hparams, mode, problem_hp)
        input_modality = problem_hp.input_modality.get("inputs")
        target_modality = problem_hp.target_modality

        # Transform features
        transformed_features = {}
        if input_modality is not None:
            transformed_features["inputs"] = input_modality.bottom(
                features["inputs"])
        transformed_features["targets"] = target_modality.targets_bottom(
            features["targets"])
        transformed_features["problem_choice"] = tf.constant(0)
        transformed_features["input_space_id"] = tf.constant(
            problem_hp.input_space_id)
        transformed_features["target_space_id"] = tf.constant(
            problem_hp.target_space_id)

        # Model construction
        outputs = model_class.model_fn_body(transformed_features)
        logits = target_modality.top(outputs, labels)

        # Ensure the length is known statically
        shape = [None] * logits.get_shape().ndims
        shape[1] = hparams.max_length
        logits.set_shape(logits.get_shape().merge_with(shape))

        # Loss
        loss_num, loss_den = target_modality.loss(logits, labels)
        loss = loss_num / tf.maximum(1.0, loss_den)

        if mode == tf.estimator.ModeKeys.EVAL:
            problem = hp.problem_instances[0]
            eval_metrics_fn = create_eval_metrics_fn(problem)
            _remove_summaries()
            return tf.contrib.tpu.TPUEstimatorSpec(
                mode,
                eval_metrics=(eval_metrics_fn,
                              [logits, orig_features["targets"]]),
                loss=loss)

        assert mode == tf.estimator.ModeKeys.TRAIN

        # Learning rate
        num_shards = config.tpu_config.num_shards
        lr = hparams.learning_rate * model_builder.learning_rate_decay(
            hparams, num_worker_replicas=num_shards)
        lr /= math.sqrt(float(num_shards))

        # Optimizer
        opt = model_builder.ConditionalOptimizer(hparams.optimizer, lr,
                                                 hparams)
        if use_tpu:
            opt = tf.contrib.tpu.CrossShardOptimizer(opt)

        # Optimize
        gradients = opt.compute_gradients(loss, tf.trainable_variables())
        if hparams.clip_grad_norm:
            gradients = _clip_gradients_by_norm(gradients,
                                                hparams.clip_grad_norm)
        train_op = opt.apply_gradients(
            gradients, global_step=tf.train.get_or_create_global_step())
        with tf.control_dependencies([train_op]):
            train_op = tf.identity(loss)

        _remove_summaries()
        return tf.contrib.tpu.TPUEstimatorSpec(mode,
                                               loss=loss,
                                               train_op=train_op)
예제 #46
0
tf.gfile.MakeDirs(train_dir)
tf.gfile.MakeDirs(checkpoint_dir)

gs_ckpt_dir = "gs://tensor2tensor-checkpoints/"

problem_name = "librispeech_clean"
asr_problem = problems.problem(problem_name)
encoders = asr_problem.feature_encoders(None)

model_name = "transformer"
hparams_set = "transformer_librispeech_tpu"

hparams = trainer_lib.create_hparams(hparams_set,
                                     data_dir=data_dir,
                                     problem_name=problem_name)
asr_model = registry.model(model_name)(hparams, Modes.PREDICT)


def encode(x):
    waveforms = encoders["waveforms"].encode(x)
    encoded_dict = asr_problem.preprocess_example(
        {
            "waveforms": waveforms,
            "targets": []
        }, Modes.PREDICT, hparams)

    return {
        "inputs": tf.expand_dims(encoded_dict["inputs"], 0),
        "targets": tf.expand_dims(encoded_dict["targets"], 0)
    }
예제 #47
0
def score_file(filename):
    """Score each line in a file and return the scores."""
    # Prepare model.
    hparams = create_hparams()
    encoders = registry.problem(FLAGS.problem).feature_encoders(FLAGS.data_dir)
    has_inputs = "inputs" in encoders

    # Prepare features for feeding into the model.
    if has_inputs:
        inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
        batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
    targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
    if has_inputs:
        features = {"inputs": batch_inputs, "targets": batch_targets}
    else:
        features = {"targets": batch_targets}

    # Prepare the model and the graph when model runs on features.
    model = registry.model(FLAGS.model)(hparams, tf.estimator.ModeKeys.EVAL)
    _, losses = model(features)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Load weights from checkpoint.
        ckpts = tf.train.get_checkpoint_state(FLAGS.output_dir)
        ckpt = ckpts.model_checkpoint_path
        saver.restore(sess, ckpt)
        # Run on each line.
        with tf.gfile.Open(filename) as f:
            lines = f.readlines()
        results = []
        for ix, line in enumerate(lines):
            if ix % 10000 == 0:
                print('id: {}'.format(ix))
            tab_split = line.split("\t")
            if len(tab_split) > 2:
                raise ValueError(
                    "Each line must have at most one tab separator.")
            if len(tab_split) == 1:
                targets = tab_split[0].strip()
            else:
                targets = tab_split[1].strip()
                inputs = tab_split[0].strip()
            # Run encoders and append EOS symbol.
            targets_numpy = encoders["targets"].encode(targets) + [
                text_encoder.EOS_ID
            ]
            if has_inputs:
                inputs_numpy = encoders["inputs"].encode(inputs) + [
                    text_encoder.EOS_ID
                ]
            # Prepare the feed.
            if has_inputs:
                feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy}
            else:
                feed = {targets_ph: targets_numpy}
            # Get the score.
            np_loss = sess.run(losses["training"], feed)
            results.append(np_loss)
    return results
def score_file(filename):
    """Score each line in a file and return the scores."""
    # Prepare model.
    hparams = create_hparams()
    encoders = registry.problem(FLAGS.problem).feature_encoders(FLAGS.data_dir)
    has_inputs = "inputs" in encoders

    # Prepare features for feeding into the model.
    if has_inputs:
        inputs_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
        batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1])  # Make it 4D.
    targets_ph = tf.placeholder(dtype=tf.int32)  # Just length dimension.
    batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])  # Make it 4D.
    if has_inputs:
        features = {"inputs": batch_inputs, "targets": batch_targets}
    else:
        features = {"targets": batch_targets}

    # Prepare the model and the graph when model runs on features.
    model = registry.model(FLAGS.model)(hparams, tf.estimator.ModeKeys.EVAL)
    _, losses = model(features)
    saver = tf.train.Saver()

    with tf.Session() as sess:  # SOLUTION SOLUTION SOLUTION SOLUTION
        # Load weights from checkpoint.
        if FLAGS.checkpoint_path is None:
            ckpts = tf.train.get_checkpoint_state(FLAGS.output_dir)
            ckpt = ckpts.model_checkpoint_path
        else:
            ckpt = FLAGS.checkpoint_path
        saver.restore(sess, ckpt)

        # # DELETE THAT Run the language model on each line.
        # with tf.gfile.Open(filename) as f:
        #     lines = f.readlines()

        DIR_FOR_DECODE = os.environ['DIR_FOR_DECODE']
        DIR_TO_DECODE = os.environ['DIR_TO_DECODE']

        # DIR_FOR_DECODE = 'rule-cleaned-summaries'
        # DIR_TO_DECODE = 'lm-cleaned-summaries'

        for file in os.listdir(DIR_FOR_DECODE):
            file_path = os.path.join(DIR_FOR_DECODE, file)
            cleaned_text = []
            with tf.gfile.Open(file_path) as f:
                lines = f.readlines()
                for line in lines:
                    # print and clean the line
                    print(line)
                    targets = reformat(line.strip())

                    # run the word elimination model
                    results = word_elimination(
                        [(targets, None)
                         ],  # pass the line as a list not as a string
                        encoders,
                        targets_ph,
                        sess,
                        losses,
                        top_k=5,
                        min_sent_length=10)

                    # drop duplicates and sort the list
                    results = sorted(dict(results).items(),
                                     key=operator.itemgetter(1))

                    # save best sentences
                    best_sent = reformat2(results[0][0])
                    cleaned_text.append(best_sent)

                # save corrected text
                with open(os.path.join(DIR_TO_DECODE, file),
                          'w',
                          encoding='utf-8') as new_file:
                    new_file.write("\n".join(cleaned_text))

                    # # word incrementation
                    # vocab = [word.strip() for word in targets.split("_")]
                    # results = word_incrementation([(vocab[0], None)],
                    #                               encoders,
                    #                               targets_ph,
                    #                               sess,
                    #                               losses,
                    #                               max_sent_length=len(vocab),
                    #                               vocab=vocab)
                    #
                    # # drop duplicates and sort the list
                    # results = sorted(dict(results).items(), key=operator.itemgetter(1))
                    #
                    # # save results
                    # write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "a")
                    # write_file.write('COMPRESS: {}'.format(line))
                    # for sentence, score in results[:15]:
                    #     write_file.write(sentence + "\t" + "SCORE:" + "%.6f\n" % score)

    return results
    """Input str to features dict, ready for inference"""
    inputs = encoders["inputs"].encode(input_str) + [1]  # add EOS id
    batch_inputs = tf.reshape(inputs, [1, -1, 1])  # Make it 3D.
    return {"inputs": batch_inputs}

def decode(integers):
    """List of ints to str"""
    integers = list(np.squeeze(integers))
    if 1 in integers:
        integers = integers[:integers.index(1)]
    return encoders["inputs"].decode(np.squeeze(integers))

# Create hparams and the model
model_name = "transformer"
hparams_set = "transformer_base"

hparams = tpu_trainer_lib.create_hparams(hparams_set, data_dir=ckpt_path, problem_name="translate_ende_wmt32k")

# NOTE: Only create the model once when restoring from a checkpoint; it's a
# Layer and so subsequent instantiations will have different variable scopes
# that will not match the checkpoint.
translate_model = registry.model(model_name)(hparams, Modes.EVAL)

with open(fin_name, encoding='utf-8') as fin, open(fout_name, mode='w', encoding='utf-8') as fout, tfe.restore_variables_on_create(ckpt_path):
    for inputs in fin:
        encoded_inputs = encode(inputs.strip())
        model_output = translate_model.infer(encoded_inputs, decode_length=100)
        res = decode(model_output)
        print(res, file=fout)
        fout.flush()
예제 #50
0
파일: tf_t2t.py 프로젝트: ucam-smt/sgnmt
 def __init__(self,
              src_vocab_size,
              trg_vocab_size,
              model_name,
              problem_name,
              hparams_set_name,
              trg_test_file,
              beam_size,
              t2t_usr_dir,
              checkpoint_dir,
              t2t_unk_id=None,
              n_cpu_threads=-1,
              max_terminal_id=-1,
              pop_id=-1):
     """Creates a new edit T2T predictor. This constructor is
     similar to the constructor of T2TPredictor but creates a
     different computation graph which retrieves scores at each
     target position, not only the last one.
     
     Args:
         src_vocab_size (int): Source vocabulary size.
         trg_vocab_size (int): Target vocabulary size.
         model_name (string): T2T model name.
         problem_name (string): T2T problem name.
         hparams_set_name (string): T2T hparams set name.
         trg_test_file (string): Path to a plain text file with
             initial target sentences. Can be empty.
         beam_size (int): Determines how many substitutions and
             insertions are considered at each position.
         t2t_usr_dir (string): See --t2t_usr_dir in tensor2tensor.
         checkpoint_dir (string): Path to the T2T checkpoint 
                                  directory. The predictor will load
                                  the top most checkpoint in the 
                                  `checkpoints` file.
         t2t_unk_id (int): If set, use this ID to get UNK scores. If
                           None, UNK is always scored with -inf.
         n_cpu_threads (int): Number of TensorFlow CPU threads.
         max_terminal_id (int): If positive, maximum terminal ID. Needs to
             be set for syntax-based T2T models.
         pop_id (int): If positive, ID of the POP or closing bracket symbol.
             Needs to be set for syntax-based T2T models.
     """
     super(EditT2TPredictor, self).__init__(t2t_usr_dir, 
                                            checkpoint_dir, 
                                            src_vocab_size,
                                            trg_vocab_size,
                                            t2t_unk_id, 
                                            n_cpu_threads,
                                            max_terminal_id,
                                            pop_id)
     if not model_name or not problem_name or not hparams_set_name:
         logging.fatal(
             "Please specify t2t_model, t2t_problem, and t2t_hparams_set!")
         raise AttributeError
     if trg_vocab_size >= EditT2TPredictor.POS_FACTOR:
         logging.fatal("Target vocabulary size (%d) must be less than %d!"
                       % (trg_vocab_size, EditT2TPredictor.POS_FACTOR))
         raise AttributeError
     self.beam_size = max(1, beam_size // 10) + 1
     self.batch_size = 2048 # TODO(fstahlberg): Move to config
     self.initial_trg_sentences = None
     if trg_test_file: 
         self.initial_trg_sentences = []
         with open(trg_test_file) as f:
             for line in f:
                 self.initial_trg_sentences.append(utils.oov_to_unk(
                    [int(w) for w in line.strip().split()] + [utils.EOS_ID],
                    self.trg_vocab_size, self._t2t_unk_id))
     predictor_graph = tf.Graph()
     with predictor_graph.as_default() as g:
         hparams = trainer_lib.create_hparams(hparams_set_name)
         self._add_problem_hparams(hparams, problem_name)
         translate_model = registry.model(model_name)(
             hparams, tf.estimator.ModeKeys.EVAL)
         self._inputs_var = tf.placeholder(dtype=tf.int32, shape=[None],
                                           name="sgnmt_inputs")
         self._targets_var = tf.placeholder(dtype=tf.int32, shape=[None, None], 
                                            name="sgnmt_targets")
         shp = tf.shape(self._targets_var)
         bsz = shp[0]
         inputs = tf.tile(tf.expand_dims(self._inputs_var, 0), [bsz, 1])
         features = {"inputs": expand_input_dims_for_t2t(inputs,
                                                         batched=True), 
                     "targets": expand_input_dims_for_t2t(self._targets_var,
                                                          batched=True)}
         translate_model.prepare_features_for_infer(features)
         translate_model._fill_problem_hparams_features(features)
         logits, _ = translate_model(features)
         logits = tf.squeeze(logits, [2, 3])
         self._log_probs = log_prob_from_logits(logits)
         diag_logits = gather_2d(logits, tf.expand_dims(tf.range(bsz), 1))
         self._diag_log_probs = log_prob_from_logits(diag_logits)
         no_pad = tf.cast(tf.not_equal(
             self._targets_var, text_encoder.PAD_ID), tf.float32)
         flat_bsz = shp[0] * shp[1]
         word_scores = gather_2d(
             tf.reshape(self._log_probs, [flat_bsz, -1]),
             tf.reshape(self._targets_var, [flat_bsz, 1]))
         word_scores = tf.reshape(word_scores, (shp[0], shp[1])) * no_pad
         self._sentence_scores = tf.reduce_sum(word_scores, -1)
         self.mon_sess = self.create_session()
예제 #51
0
def build_model(hparams_set, model_name, data_dir, problem_name, return_beams,beam_size,custom_problem_type,force_decode_len):
  """Build the graph required to fetch the attention weights.

  Args:
    hparams_set: HParams set to build the model with.
    model_name: Name of model.
    data_dir: Path to directory containing training data.
    problem_name: Name of problem.
    beam_size: (Optional) Number of beams to use when decoding a translation.
        If set to 1 (default) then greedy decoding is used.

  Returns:
    Tuple of (
        inputs: Input placeholder to feed in ids to be translated.
        targets: Targets placeholder to feed to translation when fetching
            attention weights.
        samples: Tensor representing the ids of the translation.
        att_mats: Tensors representing the attention weights.
    )
  """
  hparams = trainer_lib.create_hparams(
      hparams_set, data_dir=data_dir, problem_name=problem_name)
  hparams.add_hparam("force_decode_length", True)

  ##
  from problem_util_yr.t2t162.ProblemDecoder_predict import create_decode_hparams
  hparams_decode = create_decode_hparams(extra_length=111,
                                               batch_size=1,
                                               beam_size=beam_size,
                                               alpha=0.4,
                                               return_beams=return_beams,
                                               write_beam_scores=False,
                                               force_decode_length=force_decode_len)
  ###


  translate_model = registry.model(model_name)(
      hparams=hparams,decode_hparams=hparams_decode,mode=tf.estimator.ModeKeys.EVAL)

  inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs')
  targets = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets')
  input_extra_length_ph = tf.placeholder(dtype=tf.int32, shape=[])

  # translate_model([{
  #     'inputs': inputs,
  #     'targets': targets,
  # }])   ##########t2t_model.py call function ->

  features={
    'inputs': inputs,
    'targets': targets,
      'decode_length':input_extra_length_ph
  }
  translate_model(features)

  # translate_model({
  #   'inputs': [inputs],
  #   'targets': [targets],
  # }) # univeral_transformer

  # Must be called after building the training graph, so that the dict will
  # have been filled with the attention tensors. BUT before creating the
  # inference graph otherwise the dict will be filled with tensors from
  # inside a tf.while_loop from decoding and are marked unfetchable.
  att_mats = get_att_mats(translate_model,custom_problem_type,model_name)

  # with tf.variable_scope(tf.get_variable_scope(), reuse=True):
  #   samples = translate_model.infer(features, beam_size=beam_size)['outputs']
  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    samples = translate_model.infer(features, beam_size=beam_size,top_beams=beam_size)

  return inputs, targets, input_extra_length_ph,samples, att_mats
예제 #52
0
  """Input str to features dict, ready for inference"""
  inputs = encoders["inputs"].encode(input_str) + [1]  # add EOS id
  batch_inputs = tf.reshape(inputs, [1, -1, 1])  # Make it 3D.
  return {"inputs": batch_inputs}

def decode(integers):
  """List of ints to str"""
  integers = list(np.squeeze(integers))
  if 1 in integers:
    integers = integers[:integers.index(1)]
  return encoders["inputs"].decode(np.squeeze(integers))

#Predict

hparams = trainer_lib.create_hparams(HPARAMS, data_dir=DATA_DIR, problem_name=PROBLEM)
translate_model = registry.model(MODEL)(hparams, Modes.PREDICT)

#inputs = "那是一条狗"
#ref = "Đó là một con chó" ## this just a reference for evaluate the quality of the traduction
#outputs = translate(inputs)
with open("test.zh", "r", encoding="utf8") as f:
  lineList = f.readlines()
outputs = []
print(len(lineList))
i = 0
for line in lineList:
  print(i)
  outputs.append(translate(line))
  i = i + 1
with open("outputs.vi", "w+", encoding="utf8") as fo:
  for output in outputs:
def model(name):
    return registry.model(name)
예제 #54
0
def model(name):
  return registry.model(name)