Ejemplo n.º 1
0
    def test_get_decoder_self_attention_bias(self):
        length = 5
        bias = model_utils.get_decoder_self_attention_bias(length)
        with self.test_session() as sess:
            bias = sess.run(bias)

        self.assertAllEqual(
            [[[[0, NEG_INF, NEG_INF, NEG_INF, NEG_INF],
               [0, 0, NEG_INF, NEG_INF, NEG_INF], [0, 0, 0, NEG_INF, NEG_INF],
               [0, 0, 0, 0, NEG_INF], [0, 0, 0, 0, 0]]]], bias)
    def _get_symbols_to_logits_fn(self, max_decode_length):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = model_utils.get_position_encoding(
            max_decode_length + 1, self.params["hidden_size"])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length)

        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1]
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            decoder_input = self.embedding_softmax_layer(decoder_input)
            decoder_input += timing_signal[i:i + 1]

            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i + 1]
            encdec_attention_bias = cache.get("encoder_decoder_attention_bias")
            encdec_attention_bias_query = cache.get("attention_bias_query")
            encdec_attention_bias_content = cache.get("attention_bias_content")
            encoder_outputs = cache.get("encoder_outputs")
            inputs = cache.get("inputs")  # encoder inputs
            D, C_query, C_content, M = self.decoder_stack(
                decoder_input, encoder_outputs, self_attention_bias,
                encdec_attention_bias, encdec_attention_bias_query,
                encdec_attention_bias_content, cache)
            logits = self.distribute_layer(D, C_query, C_content, M,
                                           encoder_outputs,
                                           encdec_attention_bias_query,
                                           encdec_attention_bias_content,
                                           inputs)
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache

        return symbols_to_logits_fn
    def decode(self, targets, encoder_outputs, attention_bias,
               attention_bias_query, attention_bias_content, inputs):
        """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
        with tf.name_scope("decode"):
            # Prepare inputs to decoder layers by shifting targets, adding positional
            # encoding and applying dropout.
            decoder_inputs = self.embedding_softmax_layer(targets)
            with tf.name_scope("shift_targets"):
                # Shift targets to the right, and remove the last element
                decoder_inputs = tf.pad(decoder_inputs,
                                        [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                decoder_inputs += model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
            if self.train:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            # Run values
            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
                length)
            D, C_query, C_content, M = self.decoder_stack(
                decoder_inputs, encoder_outputs, decoder_self_attention_bias,
                attention_bias, attention_bias_query, attention_bias_content)
            # Output distribution layer
            logits = self.distribute_layer(D, C_query, C_content, M,
                                           encoder_outputs,
                                           attention_bias_query,
                                           attention_bias_content, inputs)
            return logits
Ejemplo n.º 4
0
  def _get_symbols_to_logits_fn(self, max_decode_length):
    """Returns a decoding function that calculates logits of the next tokens."""

    timing_signal = model_utils.get_position_encoding(
        max_decode_length + 1, self.params["hidden_size"])   ###get position embedding
    decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
        max_decode_length)    ####get the decoding mask    upper triangular matrix(upper parameter all is -INF)

    def symbols_to_logits_fn(ids, i, cache):
      """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1]
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
      # Set decoder input to the last generated IDs
      decoder_input = ids[:, -1:]

      # Preprocess decoder input by getting embeddings and adding timing signal.
      decoder_input = self.embedding_softmax_layer(decoder_input)
      decoder_input += timing_signal[i:i + 1]  ##add position signal to word embedding

      self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]   #######??????
      decoder_outputs = self.decoder_stack(
          decoder_input, cache.get("encoder_outputs"), self_attention_bias,
          cache.get("encoder_decoder_attention_bias"), cache)
      logits = self.embedding_softmax_layer.linear(decoder_outputs)
      logits = tf.squeeze(logits, axis=[1])
      return logits, cache
    return symbols_to_logits_fn