Ejemplo n.º 1
0
  def decode(self, decoder_inputs, encoder_outputs, train, attention_bias):
    """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
    with tf.name_scope("decode"):
      # Prepare inputs to decoder layers by shifting targets, adding positional
      # encoding and applying dropout.
      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(decoder_inputs)[1]
        a = model_utils.get_position_encoding(
            length, self.params["hidden_size"])
        print(a.shape, decoder_inputs.shape, encoder_outputs.shape)
        decoder_inputs += a
      if self.train:
        decoder_inputs = tf.nn.dropout(
            decoder_inputs, 1 - self.params["layer_postprocess_dropout"])

      # Run values
      outputs = self.decoder_stack(
          decoder_inputs, encoder_outputs, train, decoder_self_attention_bias=None, attention_bias=None)
      return outputs
Ejemplo n.º 2
0
  def encode(self, inputs, train, attention_bias):
    """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
    with tf.name_scope("encode"):
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.

      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(inputs)[1]
        pos_encoding = model_utils.get_position_encoding(
            length, self.params["hidden_size"], max_timescale=500)
        encoder_inputs = self.input_normalization(inputs) + pos_encoding

      if self.train:
        encoder_inputs = tf.nn.dropout(
            encoder_inputs, 1 - self.params["layer_postprocess_dropout"])

      return self.encoder_stack(encoder_inputs, train, attention_bias, inputs_padding=None)
Ejemplo n.º 3
0
  def encode(self, inputs, attention_bias):
    """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
    with tf.name_scope("encode"):
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.

      ### get embedding  ---by zsw 2018.12.4
      embedded_inputs = self.embedding_softmax_layer(inputs)  # [batch_size, length, embedding_size]
      inputs_padding = model_utils.get_padding(inputs)  #ex.: [[0,0,0,1,1],
                                                        #      [0,0,1,1,1] ]

      ###get position encoding and add with embedding  ---by zsw 2018.12.4
      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(embedded_inputs)[1]  #[batch_size, length, embedding_size]
        pos_encoding = model_utils.get_position_encoding(
            length, self.params["hidden_size"])# [length, hidden_size]
        encoder_inputs = embedded_inputs + pos_encoding

      ### Tricks : embedding layer dropout ---by zsw 2018.12.4
      if self.train:
        encoder_inputs = tf.nn.dropout(
            encoder_inputs, 1 - self.params["layer_postprocess_dropout"])

      return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
    def encode(self, inputs, segments, attention_bias):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      segments: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            encoder_inputs = self.embedding_softmax_layer(inputs)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_segment_encoding"):
                segment_inputs = self.segment_embedding_layer(segments)
                encoder_inputs += segment_inputs

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(encoder_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs += pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
    def _get_symbols_to_logits_fn(self, max_decode_length):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = model_utils.get_position_encoding(
            max_decode_length + 1, self.params["hidden_size"])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length)

        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1]
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            decoder_input = self.embedding_softmax_layer(decoder_input)
            decoder_input += timing_signal[i:i + 1]

            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i + 1]
            encdec_attention_bias = cache.get("encoder_decoder_attention_bias")
            encdec_attention_bias_query = cache.get("attention_bias_query")
            encdec_attention_bias_content = cache.get("attention_bias_content")
            encoder_outputs = cache.get("encoder_outputs")
            inputs = cache.get("inputs")  # encoder inputs
            D, C_query, C_content, M = self.decoder_stack(
                decoder_input, encoder_outputs, self_attention_bias,
                encdec_attention_bias, encdec_attention_bias_query,
                encdec_attention_bias_content, cache)
            logits = self.distribute_layer(D, C_query, C_content, M,
                                           encoder_outputs,
                                           encdec_attention_bias_query,
                                           encdec_attention_bias_content,
                                           inputs)
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache

        return symbols_to_logits_fn
    def decode(self, targets, encoder_outputs, attention_bias,
               attention_bias_query, attention_bias_content, inputs):
        """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
        with tf.name_scope("decode"):
            # Prepare inputs to decoder layers by shifting targets, adding positional
            # encoding and applying dropout.
            decoder_inputs = self.embedding_softmax_layer(targets)
            with tf.name_scope("shift_targets"):
                # Shift targets to the right, and remove the last element
                decoder_inputs = tf.pad(decoder_inputs,
                                        [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                decoder_inputs += model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
            if self.train:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            # Run values
            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
                length)
            D, C_query, C_content, M = self.decoder_stack(
                decoder_inputs, encoder_outputs, decoder_self_attention_bias,
                attention_bias, attention_bias_query, attention_bias_content)
            # Output distribution layer
            logits = self.distribute_layer(D, C_query, C_content, M,
                                           encoder_outputs,
                                           attention_bias_query,
                                           attention_bias_content, inputs)
            return logits
Ejemplo n.º 7
0
  def _get_symbols_to_logits_fn(self, max_decode_length):
    """Returns a decoding function that calculates logits of the next tokens."""

    timing_signal = model_utils.get_position_encoding(
        max_decode_length + 1, self.params["hidden_size"])   ###get position embedding
    decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
        max_decode_length)    ####get the decoding mask    upper triangular matrix(upper parameter all is -INF)

    def symbols_to_logits_fn(ids, i, cache):
      """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1]
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
      # Set decoder input to the last generated IDs
      decoder_input = ids[:, -1:]

      # Preprocess decoder input by getting embeddings and adding timing signal.
      decoder_input = self.embedding_softmax_layer(decoder_input)
      decoder_input += timing_signal[i:i + 1]  ##add position signal to word embedding

      self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]   #######??????
      decoder_outputs = self.decoder_stack(
          decoder_input, cache.get("encoder_outputs"), self_attention_bias,
          cache.get("encoder_decoder_attention_bias"), cache)
      logits = self.embedding_softmax_layer.linear(decoder_outputs)
      logits = tf.squeeze(logits, axis=[1])
      return logits, cache
    return symbols_to_logits_fn