Example #1
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length].
      training: boolean, whether in training mode or not.

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """

        # Prepare inputs to the layer stack by adding positional encodings and
        # applying dropout.
        embedded_inputs = self.embedding_softmax_layer(inputs)
        inputs_padding = get_input_mask(inputs)

        length = tf.shape(embedded_inputs)[1]
        pos_encoding = positional_encoding(length, self.params.hidden_size)
        encoder_inputs = embedded_inputs + pos_encoding

        if self.is_train:
            encoder_inputs = tf.nn.dropout(encoder_inputs,
                                           rate=1 - self.params.keep_prob)
        return self.encoder_stack(encoder_inputs, input_mask=attention_bias)
  def forward(self, inputs, targets=None):
    """Calculate target logits or inferred target sequences.

    Args:
      inputs: input tensor list of size 1 or 2.
        First item, inputs: int tensor with shape [batch_size, input_length].
        Second item (optional), targets: None or int tensor with shape
          [batch_size, target_length].
      training: boolean, whether in training mode or not.

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          outputs: [batch_size, decoded length]
          scores: [batch_size, float]}
    """
    # # Variance scaling is used here because it seems to work in many problems.
    # # Other reasonable initializers may also work just as well.

    # Calculate attention bias for encoder self-attention and decoder
    # multi-headed attention layers.
    attention_bias = get_input_mask(inputs)

    # Run the inputs through the encoder layer to map the symbol
    # representations to continuous representations.
    # Prepare inputs to the layer stack by adding positional encodings and
    # applying dropout.
    embedded_inputs = self.embedding_softmax_layer(inputs)
    inputs_padding = get_input_mask(inputs)


    encoder_outputs = self.encode(inputs, inputs_padding)
    # Generate output sequence if targets is None, or return logits if target
    # sequence is known.
    if targets is None:
        return self.predict(encoder_outputs, attention_bias)
    else:
        logits = self.decode(targets, encoder_outputs, attention_bias)
    return logits