Example #1
0
def transformer_text_encoder(inputs,
                             target_space,
                             hparams,
                             name=None):
  """Transformer text encoder over inputs with unmasked full attention.

  Args:
    inputs: Tensor of shape [batch, length, 1, hparams.hidden_size].
    target_space: int. Used for encoding inputs under a target space id.
    hparams: tf.contrib.training.HParams.
    name: string, variable scope.

  Returns:
    encoder_output: Tensor of shape [batch, length, hparams.hidden_size].
    ed: Tensor of shape [batch, 1, 1, length]. Encoder-decoder attention bias
      for any padded tokens.
  """
  with tf.variable_scope(name, default_name="transformer_text_encoder"):
    inputs = common_layers.flatten4d3d(inputs)
    [
        encoder_input,
        encoder_self_attention_bias,
        ed,
    ] = transformer_layers.transformer_prepare_encoder(
        inputs, target_space=target_space, hparams=hparams)
    encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
    encoder_output = transformer_layers.transformer_encoder(
        encoder_input, encoder_self_attention_bias, hparams)
    return encoder_output, ed
def transformer_text_encoder(inputs, target_space, hparams, name=None):
    """Transformer text encoder over inputs with unmasked full attention.

  Args:
    inputs: Tensor of shape [batch, length, 1, hparams.hidden_size].
    target_space: int. Used for encoding inputs under a target space id.
    hparams: HParams.
    name: string, variable scope.

  Returns:
    encoder_output: Tensor of shape [batch, length, hparams.hidden_size].
    ed: Tensor of shape [batch, 1, 1, length]. Encoder-decoder attention bias
      for any padded tokens.
  """
    with tf.variable_scope(name, default_name="transformer_text_encoder"):
        inputs = common_layers.flatten4d3d(inputs)
        [
            encoder_input,
            encoder_self_attention_bias,
            ed,
        ] = transformer_layers.transformer_prepare_encoder(
            inputs, target_space=target_space, hparams=hparams)
        encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout)
        encoder_output = transformer_layers.transformer_encoder(
            encoder_input, encoder_self_attention_bias, hparams)
        return encoder_output, ed
Example #3
0
    def model(self, x, sen_len):
        x, enc_self_att_bias, _ = transformer_prepare_encoder(
            x, self.target_space, self.hparams)
        # x shape (batch_size, sequence_len, hidden_size)
        # enc_self_att_bias shape (batch_size, 1, 1, sequence_len)

        x = transformer_encoder(x, enc_self_att_bias, self.hparams)
        # shape (batch_size, sequence_length, hidden_size)

        indices = tf.stack(
            [tf.range(0,
                      tf.shape(x)[0]), sen_len - 1
             ],  # the last indices of each sentence
            axis=1)

        final_output_state = tf.gather_nd(x,
                                          indices,
                                          name="select_last_word_in_sentence")
        # shape: (batch_size, hidden_size)

        return final_output_state