Ejemplo n.º 1
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
Ejemplo n.º 2
0
    def encode(self, inputs, attention_bias, training):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length].
      training: boolean, whether in training mode or not.

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            embedded_inputs = tf.cast(embedded_inputs, self.params["dtype"])
            inputs_padding = model_utils.get_padding(inputs)
            attention_bias = tf.cast(attention_bias, self.params["dtype"])

            with tf.name_scope("add_pos_encoding"):
                pos_encoding = self.position_embedding(inputs=embedded_inputs)
                pos_encoding = tf.cast(pos_encoding, self.params["dtype"])
                encoder_inputs = embedded_inputs + pos_encoding

            if training:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    rate=self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs,
                                      attention_bias,
                                      inputs_padding,
                                      training=training)
Ejemplo n.º 3
0
def get_attention_bias(input_tensor,
                       bias_type,
                       padding_value=0,
                       max_length=None):
    """A helper function to get various attention bias tensors."""
    if bias_type not in ("single_cross", "multi_cross", "decoder_self"):
        raise ValueError("Invalid attention bias type: %s" % bias_type)
    if bias_type == "single_cross":
        length = tf_utils.get_shape_list(input_tensor, expected_rank=2)[1]
        bias = transformer_utils.get_padding_bias(input_tensor,
                                                  padding_value=padding_value)
    elif bias_type == "multi_cross":
        length = tf_utils.get_shape_list(input_tensor, expected_rank=3)[2]
        padding = transformer_utils.get_padding(input_tensor,
                                                padding_value=padding_value)
        bias = padding * -1e9
    else:
        if max_length is not None:
            length = max_length
        else:
            length = tf_utils.get_shape_list(input_tensor, expected_rank=2)[1]
        bias = transformer_utils.get_decoder_self_attention_bias(length)

    return tf.where(bias < 0, tf.zeros_like(bias), tf.ones_like(bias))
Ejemplo n.º 4
0
  def test_get_padding(self):
    x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]])
    padding = model_utils.get_padding(x, padding_value=0)

    self.assertAllEqual([[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]],
                        padding)