Exemple #1
0
    def testGreedyWithCornerCase(self):
        batch_size = 1
        beam_size = 1
        vocab_size = 3
        decode_length = 2

        initial_ids = tf.constant([0] * batch_size)  # GO
        probabilities = tf.constant([[0.2, 0.1, 0.7], [0.4, 0.1, 0.5]])

        def symbols_to_logits(ids):
            pos = tf.shape(ids)[1]
            logits = tf.to_float(tf.log(probabilities[pos - 1, :]))
            return logits

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits,
                                                         initial_ids,
                                                         beam_size,
                                                         decode_length,
                                                         vocab_size,
                                                         0.0,
                                                         eos_id=1)

        with self.test_session():
            ids = final_ids.eval()
            probs = final_probs.eval()
        self.assertAllEqual([[[0, 2, 2]]], ids)
        self.assertAllClose([[0.7 * 0.5]], np.exp(probs))
Exemple #2
0
    def testNotGreedyBeamTwo(self):
        batch_size = 1
        beam_size = 2
        vocab_size = 3
        decode_length = 3

        initial_ids = tf.constant([0] * batch_size)  # GO
        probabilities = tf.constant([[[0.1, 0.1, 0.8], [0.1, 0.1, 0.8]],
                                     [[0.4, 0.5, 0.1], [0.2, 0.4, 0.4]],
                                     [[0.05, 0.9, 0.05], [0.4, 0.4, 0.2]]])

        def symbols_to_logits(ids):
            pos = tf.shape(ids)[1]
            logits = tf.to_float(tf.log(probabilities[pos - 1, :]))
            return logits

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits,
                                                         initial_ids,
                                                         beam_size,
                                                         decode_length,
                                                         vocab_size,
                                                         0.0,
                                                         eos_id=1)

        with self.test_session():
            ids = final_ids.eval()
            probs = final_probs.eval()
        self.assertAllEqual([[[0, 2, 1, 0], [0, 2, 0, 1]]], ids)
        self.assertAllClose([[0.8 * 0.5, 0.8 * 0.4 * 0.9]], np.exp(probs))
Exemple #3
0
    def testGreedyBatchOne(self):
        batch_size = 1
        beam_size = 1
        vocab_size = 2
        decode_length = 3

        initial_ids = tf.constant([0] * batch_size)  # GO

        # Test that beam search finds the most probable sequence.
        # These probabilities represent the following search
        #
        #               G0 (0)
        #                  / \
        #                /     \
        #              /         \
        #            /             \
        #         0(0.7)          1(0.3)
        #           / \
        #          /   \
        #         /     \
        #     0(0.4) 1(0.6)
        #        /\
        #       /  \
        #      /    \
        #    0(0.5) 1(0.5)
        # and the following decoding probabilities
        # 0000 - 0.7 * 0.4  * 0.1
        # 0001 - 0.7 * 0.4  * 0.9
        # 001 - 0.7 * 0.6 (Best)
        # 01 = 0.3
        #
        # 001 is the most likely sequence under these probabilities.
        probabilities = tf.constant([[[0.7, 0.3]], [[0.4, 0.6]], [[0.5, 0.5]]])

        def symbols_to_logits(ids):
            pos = tf.shape(ids)[1]
            logits = tf.to_float(tf.log(probabilities[pos - 1, :]))
            return logits

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits,
                                                         initial_ids,
                                                         beam_size,
                                                         decode_length,
                                                         vocab_size,
                                                         0.0,
                                                         eos_id=1)

        with self.test_session():
            ids = final_ids.eval()
            probs = final_probs.eval()
        self.assertAllEqual([[[0, 0, 1]]], ids)
        self.assertAllClose([[0.7 * 0.6]], np.exp(probs))
Exemple #4
0
    def testNotGreedyBatchTwoBeamTwoWithAlpha(self):
        batch_size = 2
        beam_size = 2
        vocab_size = 3
        decode_length = 3

        initial_ids = tf.constant([0] * batch_size)  # GO
        # Probabilities for position * batch * beam * vocab
        # Probabilities have been set such that with alpha = 3.5, the less probable
        # but longer sequence will have a better score than the shorter sequence
        # with higher log prob in batch 1, and the order will be reverse in batch
        # 2. That is, the shorter sequence will still have a higher score in spite
        # of the length penalty
        probabilities = tf.constant([[[[0.1, 0.1, 0.8], [0.1, 0.1, 0.8]],
                                      [[0.1, 0.1, 0.8], [0.1, 0.1, 0.8]]],
                                     [[[0.4, 0.5, 0.1], [0.2, 0.4, 0.4]],
                                      [[0.3, 0.6, 0.1], [0.2, 0.4, 0.4]]],
                                     [[[0.05, 0.9, 0.05], [0.4, 0.4, 0.2]],
                                      [[0.05, 0.9, 0.05], [0.4, 0.4, 0.2]]]])

        def symbols_to_logits(ids):
            pos = tf.shape(ids)[1]
            logits = tf.to_float(tf.log(probabilities[pos - 1, :]))
            return logits

        final_ids, final_scores = beam_search.beam_search(symbols_to_logits,
                                                          initial_ids,
                                                          beam_size,
                                                          decode_length,
                                                          vocab_size,
                                                          3.5,
                                                          eos_id=1)

        with self.test_session():
            ids = final_ids.eval()
            scores = final_scores.eval()
        self.assertAllEqual(
            [[[0, 2, 0, 1], [0, 2, 1, 0]], [[0, 2, 1, 0], [0, 2, 0, 1]]], ids)
        self.assertAllClose([[
            np.log(0.8 * 0.4 * 0.9) / (8. / 6.)**3.5,
            np.log(0.8 * 0.5) / (7. / 6.)**3.5
        ],
                             [
                                 np.log(0.8 * 0.6) / (7. / 6.)**3.5,
                                 np.log(0.8 * 0.3 * 0.9) / (8. / 6.)**3.5
                             ]], scores)
Exemple #5
0
    def testShapes(self):
        batch_size = 2
        beam_size = 3
        vocab_size = 4
        decode_length = 10

        initial_ids = tf.constant([0, 0])  # GO

        def symbols_to_logits(_):
            # Just return random logits
            return tf.random_uniform((batch_size * beam_size, vocab_size))

        final_ids, final_probs = beam_search.beam_search(
            symbols_to_logits, initial_ids, beam_size, decode_length,
            vocab_size, 0.)

        self.assertEqual(final_ids.get_shape().as_list(),
                         [None, beam_size, None])

        self.assertEqual(final_probs.get_shape().as_list(), [None, beam_size])
Exemple #6
0
  def _beam_decode(self, features, decode_length, beam_size, top_beams,
                   last_position_only, alpha):
    """Beam search decoding.

    Args:
      features: an map of string to `Tensor`
      decode_length: an integer.  How many additional timesteps to decode.
      beam_size: number of beams.
      top_beams: an integer. How many of the beams to return.
      last_position_only: a boolean, speed-up by computing last position only.
      alpha: Float that controls the length penalty. larger the alpha, stronger
        the preference for slonger translations.

    Returns:
       samples: an integer `Tensor`. Top samples from the beam search
    """

    batch_size = tf.shape(features["inputs"])[0]
    batch_size = tf.Print(batch_size, [batch_size], "beam_decode batch_size=")

    def symbols_to_logits_fn(ids):
      """Go from ids to logits."""
      ids = tf.expand_dims(tf.expand_dims(ids, axis=2), axis=3)
      ids = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0], [0, 0]])
      if "partial_targets" in features:
        pt = features["partial_targets"]
        pt_length = tf.shape(pt)[1]
        pt = tf.tile(pt, [1, beam_size])
        pt = tf.reshape(pt, [batch_size * beam_size, pt_length, 1, 1])
        ids = tf.concat([pt, ids], axis=1)

      features["targets"] = ids
      self._coverage = None
      sharded_logits, _ = self.model_fn(
          features, False, last_position_only=last_position_only)
      # now self._coverage is a coverage tensor for the first datashard.
      # it has shape [batch_size] and contains floats between 0 and
      # source_length.
      logits = sharded_logits[0]  # Assuming we have one shard.
      if last_position_only:
        return tf.squeeze(logits, axis=[1, 2, 3])
      current_output_position = tf.shape(ids)[1] - 1  # -1 due to the pad above.
      logits = logits[:, current_output_position, :, :]
      return tf.squeeze(logits, axis=[1, 2])

    initial_ids = tf.zeros([batch_size], dtype=tf.int32)

    inputs_old = features["inputs"]
    features["inputs"] = tf.expand_dims(features["inputs"], 1)
    if len(features["inputs"].shape) < 5:
      features["inputs"] = tf.expand_dims(features["inputs"], 4)
    # Expand the inputs in to the beam size.
    features["inputs"] = tf.tile(features["inputs"], [1, beam_size, 1, 1, 1])
    s = tf.shape(features["inputs"])
    features["inputs"] = tf.reshape(features["inputs"],
                                    [s[0] * s[1], s[2], s[3], s[4]])

    target_modality = self._hparams.problems[self._problem_idx].target_modality
    vocab_size = target_modality.top_dimensionality
    # Setting decode length to input length + decode_length
    decode_length = tf.constant(decode_length)
    if "partial_targets" not in features:
      decode_length += tf.shape(features["inputs"])[1]
    ids, scores = beam_search.beam_search(symbols_to_logits_fn, initial_ids,
                                          beam_size, decode_length, vocab_size,
                                          alpha)

    # Set inputs back to the unexpanded inputs to not to confuse the Estimator!
    features["inputs"] = inputs_old

    # Return `top_beams` decodings (also remove initial id from the beam search)
    return_scores = False  # TODO(lukaszkaiser): make it work multi-problem.
    if top_beams == 1:
      if return_scores:
        return {"outputs": ids[:, 0, 1:], "scores": scores}
      return ids[:, 0, 1:]
    else:
      if return_scores:
        return {"outputs": ids[:, :top_beams, 1:], "scores": scores}
      return ids[:, :top_beams, 1:]