Example #1
0
    def testTopKTerminatedHypsOp(self):
        with self.session(use_gpu=False) as sess:
            b_size = 8
            num_beams = 2
            num_hyps_per_beam = b_size / num_beams
            seq_len = 6
            scores = tf.random_uniform([b_size, 5])
            atten_probs = tf.random_uniform([b_size, 3])
            src_seq_lengths = [3, 3]
            best_scores = tf.zeros([num_beams])
            cumulative_scores = tf.zeros([b_size])
            in_scores = tf.zeros([seq_len, b_size])
            in_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
            in_prev_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
            in_done_hyps = tf.as_string(
                tf.zeros([seq_len, b_size], dtype=tf.int32))
            in_atten_probs = tf.zeros([seq_len, b_size, 3])

            (out_best_scores_0, out_cumulative_scores_0, out_scores_0,
             out_hyps_0, out_prev_hyps_0, out_done_hyps_0, out_atten_probs_0,
             _) = py_x_ops.beam_search_step(
                 scores,
                 atten_probs,
                 best_scores,
                 cumulative_scores,
                 in_scores,
                 in_hyps,
                 in_prev_hyps,
                 in_done_hyps,
                 in_atten_probs, [],
                 0,
                 eos_id=2,
                 beam_size=3.0,
                 num_hyps_per_beam=num_hyps_per_beam)

            outputs = py_x_ops.beam_search_step(
                scores,
                atten_probs,
                out_best_scores_0,
                out_cumulative_scores_0,
                out_scores_0,
                out_hyps_0,
                out_prev_hyps_0,
                out_done_hyps_0,
                out_atten_probs_0, [],
                1,
                eos_id=2,
                beam_size=3.0,
                num_hyps_per_beam=num_hyps_per_beam)

            # Get the topk terminated hyps.
            in_done_hyps = outputs[5]
            topk_hyps = py_x_ops.top_k_terminated_hyps(
                in_done_hyps,
                src_seq_lengths,
                k=2,
                num_hyps_per_beam=num_hyps_per_beam,
                length_normalization=0.2,
                coverage_penalty=0.2,
                target_seq_length_ratio=1.0)
            seq_ids, seq_lens, seq_scores = py_x_ops.unpack_hyp(
                tf.reshape(topk_hyps, [-1]), max_seq_length=5)

            k1, k2, k3, k4 = sess.run(
                [topk_hyps, seq_ids, seq_lens, seq_scores])
            print(np.array_repr(k1))
            assert k1.size == 4

            expected_top1_for_beam_0 = """
      beam_id: 0
      ids: 3
      ids: 2
      scores: 0.897901892662
      scores: 0.997961401939
      atten_vecs {
        prob: 0.857856750488
        prob: 0.608582258224
        prob: 0.725398182869
      }
      atten_vecs {
        prob: 0.857856750488
        prob: 0.608582258224
        prob: 0.725398182869
      }
      normalized_score: 1.35659193993
      """
            expected_top2_for_beam_1 = """
      beam_id: 1
      ids: 0
      ids: 2
      scores: 0.753268480301
      scores: 0.789751410484
      atten_vecs {
        prob: 0.689820885658
        prob: 0.216090679169
        prob: 0.40637075901
      }
      atten_vecs {
        prob: 0.452527046204
        prob: 0.374898076057
        prob: 0.127457261086
      }
      normalized_score: 1.02671170235
      """
            self._SameHyp(expected_top1_for_beam_0, k1[0, 0])
            self._SameHyp(expected_top2_for_beam_1, k1[1, 1])

            self.assertAllClose(k2, [[3, 2, 0, 0, 0], [0, 2, 0, 0, 0],
                                     [4, 2, 0, 0, 0], [0, 2, 0, 0, 0]])
            self.assertAllClose(k3, [2, 2, 2, 2])
            self.assertAllClose(
                k4, [1.35659194, 1.02759778, 1.21130753, 1.0267117])
    def BeamSearchDecode(self,
                         theta,
                         source_encs,
                         source_paddings,
                         num_hyps_per_beam_override=0,
                         init_beam_search_state=None,
                         pre_beam_search_step_callback=None,
                         post_beam_search_step_callback=None,
                         additional_source_info=None,
                         max_steps=None):
        """Performs beam-search based decoding.

    Args:
      theta: A NestedMap object containing weights' values of the decoder
        layer and its children layers.
      source_encs: source encoding, of shape [time, batch, depth]. In case of
        multi-source decoding, a `.NestedMap` object containing source encoding
        tensors, again each of shape [time, batch, depth].
      source_paddings: source encoding's padding, of shape [time, batch]. In
        case of multi-source decoding, A `.NestedMap` object containing source
        padding tensors, each of shape [time, batch].
      num_hyps_per_beam_override: If set to a value <= 0, this parameter is
        ignored. If set to a value > 0, then this value will be used to
        override `p.num_hyps_per_beam`.
      init_beam_search_state: The `InitBeamSearchState` callback. Please refer
          to the class header comments for more details.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
          Please refer to the class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
          Please refer to the class header comments for more details.
      additional_source_info: a `.NestedMap` of tensors containing extra context
          information about the source that may be useful for decoding.
      max_steps: maximum beam search steps. If None,
          use self.params.target_seq_len.

    Returns:
      A `BeamSearchDecodeOutput`.
    """
        p = self.params
        num_hyps_per_beam = p.num_hyps_per_beam
        if num_hyps_per_beam_override > 0:
            num_hyps_per_beam = num_hyps_per_beam_override
        if max_steps is None:
            max_steps = p.target_seq_len

        # Branch to multi-source according to type.
        is_multi_source = isinstance(source_encs, py_utils.NestedMap)

        if is_multi_source:
            num_beams = tf.shape(source_encs.Flatten()[0])[1]
        else:
            num_beams = tf.shape(source_encs)[1]

        num_hyps = num_beams * num_hyps_per_beam

        initial_results, other_states = init_beam_search_state(
            theta, source_encs, source_paddings, num_hyps_per_beam,
            additional_source_info)

        if is_multi_source:
            if isinstance(source_paddings, py_utils.NestedMap):
                source_seq_lengths = tf.to_int32(
                    tf.reduce_sum(
                        1.0 - tf.transpose(source_paddings.Flatten()[0]), 1))
            else:
                source_seq_lengths = tf.to_int32(
                    tf.reduce_sum(1.0 - tf.transpose(source_paddings), 1))
        else:
            source_seq_lengths = tf.to_int32(
                tf.reduce_sum(1.0 - tf.transpose(source_paddings), 1))

        step_ids = tf.fill([num_hyps, 1],
                           tf.constant(p.target_sos_id, dtype=tf.int32))
        min_score = -1e36
        best_scores = (tf.zeros(shape=[num_beams], dtype=p.dtype) + min_score)
        cumulative_scores = tf.zeros(shape=[num_hyps], dtype=p.dtype)
        in_scores = tf.zeros([max_steps, num_hyps], dtype=p.dtype)
        in_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_prev_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
        in_done_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.string)
        bs_atten_probs = tf.zeros(
            [max_steps, num_hyps,
             tf.shape(initial_results.atten_probs)[1]],
            dtype=p.dtype)
        cur_step = tf.constant(0, dtype=tf.int32)
        all_done = tf.constant(False, dtype=tf.bool)
        core_bs_states = (best_scores, cumulative_scores, in_scores, in_hyps,
                          in_prev_hyps, in_done_hyps, bs_atten_probs)

        def LoopContinue(cur_step, all_done, unused_step_ids,
                         unused_core_bs_states, unused_other_states_list):
            return tf.logical_and(cur_step < max_steps,
                                  tf.logical_not(all_done))

        def LoopBody(cur_step, unused_all_done, step_ids, core_bs_states,
                     other_states_list):
            (cur_step, all_done, new_step_ids, new_bs_states,
             new_other_states) = self._BeamSearchStep(
                 theta, source_encs, source_paddings,
                 cur_step, step_ids, core_bs_states,
                 other_states.Pack(other_states_list), num_hyps_per_beam,
                 pre_beam_search_step_callback, post_beam_search_step_callback,
                 additional_source_info)
            return (cur_step, all_done, new_step_ids, new_bs_states,
                    new_other_states.Flatten())

        flat_other_states = other_states.Flatten()
        _, _, _, final_bs_states, flat_final_other_states = tf.while_loop(
            LoopContinue,
            LoopBody,
            loop_vars=(cur_step, all_done, step_ids, core_bs_states,
                       flat_other_states),
            parallel_iterations=10,
            back_prop=False,
            swap_memory=False,
            shape_invariants=(tf.TensorShape(cur_step.get_shape()),
                              tf.TensorShape(all_done.get_shape()),
                              tf.TensorShape(step_ids.get_shape()),
                              _GetShapes(core_bs_states),
                              _GetShapes(flat_other_states, none_shapes=True)))
        # [target_seq_len, num_beams * num_hyps_per_beam].
        final_done_hyps = final_bs_states[5]
        final_other_states = other_states.Pack(flat_final_other_states)

        # [num_beams, num_hyps_per_beam].
        topk_hyps = py_x_ops.top_k_terminated_hyps(
            final_done_hyps,
            source_seq_lengths,
            k=num_hyps_per_beam,
            num_hyps_per_beam=num_hyps_per_beam,
            length_normalization=p.length_normalization,
            coverage_penalty=p.coverage_penalty,
            target_seq_length_ratio=p.target_seq_length_ratio,
            eoc_id=p.target_eoc_id,
            merge_paths=p.merge_paths)
        # [num_beams * num_hyps_per_beam, ...].
        max_seq_length = 0 if isinstance(max_steps, tf.Tensor) else max_steps
        topk_ids, topk_lens, topk_scores = py_x_ops.unpack_hyp(
            tf.reshape(topk_hyps, [-1]), max_seq_length=max_seq_length)
        # [num_beams, num_hyps_per_beam].
        topk_scores = tf.reshape(topk_scores, tf.shape(topk_hyps))

        return BeamSearchDecodeOutput(final_done_hyps, topk_hyps, topk_ids,
                                      topk_lens, topk_scores, None,
                                      final_other_states)
Example #3
0
  def testTopKTerminatedHypsOp(self):
    with self.session(use_gpu=False) as sess:
      b_size = 8
      num_beams = 2
      num_hyps_per_beam = b_size / num_beams
      seq_len = 6
      scores = tf.random_uniform([b_size, 5], seed=12345)
      atten_probs = tf.random_uniform([b_size, 3], seed=12345)
      src_seq_lengths = [3, 3]
      best_scores = tf.zeros([num_beams])
      cumulative_scores = tf.zeros([b_size])
      in_scores = tf.zeros([seq_len, b_size])
      in_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
      in_prev_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32)
      in_done_hyps = tf.as_string(tf.zeros([seq_len, b_size], dtype=tf.int32))
      in_atten_probs = tf.zeros([seq_len, b_size, 3])

      (out_best_scores_0, out_cumulative_scores_0, out_scores_0, out_hyps_0,
       out_prev_hyps_0, out_done_hyps_0, out_atten_probs_0,
       _) = py_x_ops.beam_search_step(
           scores,
           atten_probs,
           best_scores,
           cumulative_scores,
           in_scores,
           in_hyps,
           in_prev_hyps,
           in_done_hyps,
           in_atten_probs, [],
           0,
           eos_id=2,
           beam_size=3.0,
           num_hyps_per_beam=num_hyps_per_beam)

      outputs = py_x_ops.beam_search_step(
          scores,
          atten_probs,
          out_best_scores_0,
          out_cumulative_scores_0,
          out_scores_0,
          out_hyps_0,
          out_prev_hyps_0,
          out_done_hyps_0,
          out_atten_probs_0, [],
          1,
          eos_id=2,
          beam_size=3.0,
          num_hyps_per_beam=num_hyps_per_beam)

      # Get the topk terminated hyps.
      in_done_hyps = outputs[5]
      topk_hyps = py_x_ops.top_k_terminated_hyps(
          in_done_hyps,
          src_seq_lengths,
          k=2,
          num_hyps_per_beam=num_hyps_per_beam,
          length_normalization=0.2,
          coverage_penalty=0.2,
          target_seq_length_ratio=1.0)
      seq_ids, seq_lens, seq_scores = py_x_ops.unpack_hyp(
          tf.reshape(topk_hyps, [-1]), max_seq_length=5)

      k1, k2, k3, k4 = sess.run([topk_hyps, seq_ids, seq_lens, seq_scores])
      print(np.array_repr(k1))
      assert k1.size == 4

      expected_top1_for_beam_0 = """
      beam_id: 0
      ids: 1
      ids: 2
      scores: 0.86230338
      scores: 0.65504861
      atten_vecs {
        prob: 0.45372832
        prob: 0.86230338
        prob: 0.65504861
      }
      atten_vecs {
        prob: 0.45372832
        prob: 0.86230338
        prob: 0.65504861
      }
      normalized_score: 1.002714
      """
      expected_top2_for_beam_1 = """
      beam_id: 1
      ids: 3
      ids: 2
      scores: 0.38127339
      scores: 0.57700801
      atten_vecs {
        prob: 0.38612545
        prob: 0.42067075
        prob: 0.84442794
      }
      atten_vecs {
        prob: 0.18693292
        prob: 0.17821217
        prob: 0.66380036
      }
      normalized_score: 0.480028
      """
      self._SameHyp(expected_top1_for_beam_0, k1[0, 0])
      self._SameHyp(expected_top2_for_beam_1, k1[1, 1])

      self.assertAllClose(
          k2,
          [[1, 2, 0, 0, 0], [4, 2, 0, 0, 0], [4, 2, 0, 0, 0], [3, 2, 0, 0, 0]])
      self.assertAllClose(k3, [2, 2, 2, 2])
      self.assertAllClose(k4, [1.002714, 0.684296, 0.522484, 0.480028])