Example #1
0
  def encode(self, inputs, attention_bias):
    """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
    with tf.name_scope("encode"):
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.
      embedded_inputs = self.embedding_softmax_layer(inputs)
      inputs_padding = model_utils.get_padding(inputs)

      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(embedded_inputs)[1]
        pos_encoding = model_utils.get_position_encoding(
            length, self.params.hidden_size)
        encoder_inputs = embedded_inputs + pos_encoding

      if self.train:
        encoder_inputs = tf.nn.dropout(
            encoder_inputs, 1 - self.params.layer_postprocess_dropout)

      return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
Example #2
0
    def encode(self, inputs, attention_bias):
        """
        :param inputs: int tensor with shape [batch_size, input_length]
        :param attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
        :return: float tensor with shape [batch_size, input_length, hidden_size]
        """
        with tf.name_scope('encode'):
            #   [batch_size, length, hidden_size]
            embedded_inputs = self.embedding_layer(inputs)
            #   [batch_size, length]
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope('add_pos_embedding'):
                length = tf.shape(embedded_inputs)[1]
                #   use sin cos calculate position embeddings
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params.get('hidden_size'))

                encoder_inputs = tf.add(embedded_inputs, pos_encoding)

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params.get('encoder_decoder_dropout'))

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
Example #3
0
    def call(self, x):
        """Get token embeddings of x.

    Args:
      x: An int64 tensor with shape [batch_size, length]
    Returns:
      embeddings: float32 tensor with shape [batch_size, length, embedding_size]
      padding: float32 tensor with shape [batch_size, length] indicating the
        locations of the padding tokens in x.
    """
        with tf.name_scope("embedding"):
            # SSY 1 accessing embeddings
            embeddings = tf.gather(self.shared_weights, x)

            # Scale embedding by the sqrt of the hidden size
            # SSY 2 scaling embeddings
            embeddings *= self.hidden_size**0.5

            # Create binary array of size [batch_size, length]
            # where 1 = padding, 0 = not padding
            padding = model_utils.get_padding(x)

            # Set all padding embedding values to 0
            embeddings *= tf.expand_dims(1 - padding, -1)
            return embeddings
  def encode(self, inputs, attention_bias):
    """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
    with tf.compat.v1.name_scope("encode"):
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.
      embedded_inputs = self.embedding_softmax_layer(inputs)
      inputs_padding = model_utils.get_padding(inputs)

      with tf.compat.v1.name_scope("add_pos_encoding"):
        length = tf.shape(input=embedded_inputs)[1]
        pos_encoding = model_utils.get_position_encoding(
            length, self.params.hidden_size)
        encoder_inputs = embedded_inputs + pos_encoding

      with tf.compat.v1.tpu.bfloat16_scope():
        encoder_inputs = tf.cast(encoder_inputs, tf.bfloat16)
        #attention_bias = tf.cast(attention_bias, tf.bfloat16)
        inputs_padding = tf.cast(inputs_padding, tf.bfloat16)
        if self.train:
          mlperf_log.transformer_print(
              key=mlperf_log.MODEL_HP_LAYER_POSTPROCESS_DROPOUT,
              value=self.params.layer_postprocess_dropout)
          encoder_inputs = tf.nn.dropout(
              encoder_inputs, 1 - (1 - self.params.layer_postprocess_dropout))
          #encoder_outputs = self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
          #return encoder_outputs #  self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
        return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
    def encode(self, inputs, attention_bias):
        with tf.name_scope("encode"):
            embedded_inputs = self.encoder_embedding_layer(
                inputs, not ModeKeys.is_predict_one(self.mode))
            if ModeKeys.is_predict_one(self.mode):
                inputs_padding = None
            else:
                inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                if ModeKeys.is_predict_one(self.mode):
                    pos_encoding = model_utils.get_position_encoding(
                        self.params.max_length, self.params.hidden_size)
                    pos_encoding = tf.slice(pos_encoding, [0, 0],
                                            [length, self.params.hidden_size],
                                            name='slice_pos_encoding')
                else:
                    pos_encoding = model_utils.get_position_encoding(
                        length, self.params.hidden_size)

                encoder_inputs = embedded_inputs + pos_encoding

            if self.is_train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, 1 - self.params.layer_postprocess_dropout)

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
    def test_get_padding(self):
        x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]])
        padding = model_utils.get_padding(x, padding_value=0)
        with self.test_session() as sess:
            padding = sess.run(padding)

        self.assertAllEqual(
            [[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]], padding)
Example #7
0
    def _encode(self, inputs, attention_bias):
        embedded_inputs = self.embedding_layer(inputs)
        inputs_padding = model_utils.get_padding(inputs)

        if self.is_train:
            encoder_inputs = self.encoder_embedding_dropout(embedded_inputs)
        return self.encoder_stack(encoder_inputs, attention_bias,
                                  inputs_padding)
Example #8
0
    def _encode(self, inputs, attention_bias):
        embedded_inputs = self.embedding_layer(inputs)
        embedded_inputs += model_utils.get_position_encoding(
            self.hparams['max_length'], self.hparams['num_units'])
        inputs_padding = model_utils.get_padding(inputs)

        if self.is_train:
            encoder_inputs = self.encoder_embedding_dropout(embedded_inputs)
        return self.encoder_stack(encoder_inputs, attention_bias,
                                  inputs_padding)
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.

        Args:
          inputs: int tensor with shape [batch_size, input_length].
          attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

        Returns:
          float tensor with shape [batch_size, input_length, hidden_size]
        """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.encoder_embedding_layer(
                inputs, not ModeKeys.is_predict_one(self.mode))
            if ModeKeys.is_predict_one(self.mode):
                inputs_padding = None
            else:
                inputs_padding = model_utils.get_padding(inputs)

            # add_pos_encoding
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                if ModeKeys.is_predict_one(self.mode):
                    pos_encoding = model_utils.get_position_encoding(
                        self.params.max_length, self.params.hidden_size)
                    pos_encoding = tf.slice(pos_encoding, [0, 0],
                                            [length, self.params.hidden_size],
                                            name='slice_pos_encoding')
                else:
                    pos_encoding = model_utils.get_position_encoding(
                        length, self.params.hidden_size)

                encoder_inputs = embedded_inputs + pos_encoding

            if self.is_train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, 1 - self.params.layer_postprocess_dropout)

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
Example #10
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.

        Args:
            inputs: int tensor with shape [batch_size, input_length].
            attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

        Returns:
            float tensor with shape [batch_size, input_length, hidden_size]
        """
        embedded_inputs = self.embedding_softmax_layer(inputs)
        inputs_padding = model_utils.get_padding(inputs)

        length = embedded_inputs.shape[1]
        pos_encoding = model_utils.get_position_encoding(
            length, self.param.hidden_size, inputs.context)
        encoder_inputs = embedded_inputs + pos_encoding

        if self.train:
            encoder_inputs = self.dropout_input(encoder_inputs)

        return self.encoder_stack(encoder_inputs, attention_bias,
                                  inputs_padding)
Example #11
0
  def call(self, x):
    """Get token embeddings of x.

    Args:
      x: An int64 tensor with shape [batch_size, length]
    Returns:
      embeddings: float32 tensor with shape [batch_size, length, embedding_size]
      padding: float32 tensor with shape [batch_size, length] indicating the
        locations of the padding tokens in x.
    """
    with tf.name_scope("embedding"):
      embeddings = tf.gather(self.shared_weights, x)

      # Scale embedding by the sqrt of the hidden size
      embeddings *= self.hidden_size ** 0.5

      # Create binary array of size [batch_size, length]
      # where 1 = padding, 0 = not padding
      padding = model_utils.get_padding(x)

      # Set all padding embedding values to 0
      embeddings *= tf.expand_dims(1 - padding, -1)
      return embeddings