예제 #1
0
    def decode(self, start_tokens, targets, encoder_outputs, attention_bias):
        with tf.name_scope("decode"):
            with tf.name_scope("shift_targets"):
                decoder_inputs = tf.concat(
                    [tf.expand_dims(start_tokens, axis=1), targets[:, :-1]],
                    axis=1)
            decoder_inputs = self.decoder_embedding_layer(decoder_inputs)
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                decoder_inputs += model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
            if self.train:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            # Run values
            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
                length)
            decoder_outputs = self.decoder_stack(decoder_inputs,
                                                 encoder_outputs,
                                                 decoder_self_attention_bias,
                                                 attention_bias)
            outputs = self.output_embedding_layer(decoder_outputs)
            return outputs
예제 #2
0
  def encode(self, inputs, attention_bias):
    """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
    with tf.name_scope("encode"):
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.
      embedded_inputs = self.embedding_softmax_layer(inputs)
      inputs_padding = model_utils.get_padding(inputs)

      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(embedded_inputs)[1]
        pos_encoding = model_utils.get_position_encoding(
            length, self.params.hidden_size)
        encoder_inputs = embedded_inputs + pos_encoding

      if self.train:
        encoder_inputs = tf.nn.dropout(
            encoder_inputs, 1 - self.params.layer_postprocess_dropout)

      return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
예제 #3
0
    def decode(self, targets, encoder_outputs, attention_bias):
        """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
        with tf.name_scope("decode"):
            # Prepare inputs to decoder layers by shifting targets, adding positional
            # encoding and applying dropout.
            decoder_inputs = self.embedding_softmax_layer(targets)
            with tf.name_scope("shift_targets"):
                # Shift targets to the right, and remove the last element
                decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                decoder_inputs += model_utils.get_position_encoding(length, self.params["hidden_size"])
            if self.train:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs, 1 - self.params["layer_postprocess_dropout"])

            # Run values
            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(length)
            outputs = self.decoder_stack(decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias)
            logits = self.embedding_softmax_layer.linear(outputs)
            return logits
예제 #4
0
    def encode(self, embedded_inputs, attention_bias):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.

            # Not needed as we use our own embeddings
            # embedded_inputs = self.embedding_softmax_layer(inputs)

            inputs_padding = model_utils.get_embedding_padding(embedded_inputs)
            encoder_inputs = embedded_inputs

            if self.params["use_positional_encoding"]:
                with tf.name_scope("add_pos_encoding"):
                    length = tf.shape(embedded_inputs)[1]
                    pos_encoding = model_utils.get_position_encoding(
                        length, self.params["hidden_size"])
                    encoder_inputs = embedded_inputs + pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
예제 #5
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs. 生成输入的向量表示,即representation

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            # 添加postional encodings,输入给encoder,然后应用dropout
            embedded_inputs = self.embedding_softmax_layer(
                inputs)  # 将Input转化成embedding
            inputs_padding = model_utils.get_padding(
                inputs)  # 获得Input 的 padding过的位置

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(  # 获得position embedding
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding  # 相加作为输入

            if self.train:
                encoder_inputs = tf.nn.dropout(  # 输入前还要dropout
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
예제 #6
0
    def _get_symbols_to_logits_fn(self, max_decode_length, training):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = model_utils.get_position_encoding(
            max_decode_length + 1, self.params["hidden_size"])
        timing_signal = tf.cast(timing_signal, self.params["dtype"])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length, dtype=self.params["dtype"])

        # TODO(b/139770046): Refactor code with better naming of i.
        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences. int tensor with shape [batch_size *
          beam_size, i + 1].
        i: Loop index.
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            decoder_input = self.embedding_softmax_layer(decoder_input)

            if self.params["padded_decode"]:
                timing_signal_shape = timing_signal.shape.as_list()
                decoder_input += tf.slice(timing_signal, [i, 0],
                                          [1, timing_signal_shape[1]])

                bias_shape = decoder_self_attention_bias.shape.as_list()
                self_attention_bias = tf.slice(
                    decoder_self_attention_bias, [0, 0, i, 0],
                    [bias_shape[0], bias_shape[1], 1, bias_shape[3]])
            else:
                decoder_input += timing_signal[i:i + 1]

                self_attention_bias = decoder_self_attention_bias[:, :, i:i +
                                                                  1, :i + 1]

            decoder_outputs = self.decoder_stack(
                decoder_input,
                cache.get("encoder_outputs"),
                self_attention_bias,
                cache.get("encoder_decoder_attention_bias"),
                training=training,
                cache=cache,
                decode_loop_step=i if self.params["padded_decode"] else None)
            logits = self.embedding_softmax_layer(decoder_outputs,
                                                  mode="linear")
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache

        return symbols_to_logits_fn
예제 #7
0
파일: transformers.py 프로젝트: r-mal/sacar
    def encode_no_lookup(self, embedded_inputs, inputs_mask):
        """Encoder step for transformer given already-embedded inputs

      Args:
        model: transformer model
        embedded_inputs: int tensor with shape [batch_size, input_length, emb_size].
        inputs_mask: int tensor with shape [batch_size, input_length]
        params: transformer_params
        train: boolean flag

      Returns:
        float tensor with shape [batch_size, input_length, hidden_size]
      """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            inputs_padding = model_utils.get_padding(inputs_mask)
            attention_bias = model_utils.get_padding_bias(inputs_mask)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params.hidden_size)
                encoder_inputs = embedded_inputs + pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, 1 - self.params.layer_postprocess_dropout)

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
예제 #8
0
  def decode(self, _, inputs, encoder_outputs, attention_bias):
    """Generate logits for each value in the target sequence.

    Args:
      inputs:
        int tensor (old dst sentence) with shape [batch_size, input_length].
      encoder_outputs: continuous representation of diff sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
    with tf.name_scope("decode"):
      # Prepare inputs to decoder layers by adding positional
      # encoding and applying dropout.
      decoder_inputs = self.embedding_softmax_layer(inputs)

      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(decoder_inputs)[1]
        decoder_inputs += model_utils.get_position_encoding(
            length, self.params["hidden_size"])
      if self.train:
        decoder_inputs = tf.nn.dropout(
            decoder_inputs, 1 - self.params["layer_postprocess_dropout"])
      
      # Run values
      decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
          length)
      outputs = self.decoder_stack(
          decoder_inputs, encoder_outputs, decoder_self_attention_bias,
          attention_bias)
      logits = self.embedding_softmax_layer.linear(outputs)
      return logits
예제 #9
0
    def encode(self, inputs, attention_bias, training):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length].
      training: boolean, whether in training mode or not.

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding

            if training:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    rate=self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs,
                                      attention_bias,
                                      inputs_padding,
                                      training=training)
예제 #10
0
    def decode(self, targets, encoder_outputs, attention_bias, training):
        """Generate logits for each value in the target sequence.

        Args:
          targets: target values for the output sequence. int tensor with shape
            [batch_size, target_length]
          encoder_outputs: continuous representation of input sequence. float tensor
            with shape [batch_size, input_length, hidden_size]
          attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
          training: boolean, whether in training mode or not.

        Returns:
          float32 tensor with shape [batch_size, target_length, vocab_size]
        """
        with tf.name_scope("decode"):
            # Prepare inputs to decoder layers by shifting targets, adding positional
            # encoding and applying dropout.
            decoder_inputs = self.embedding_layer(targets)
            decoder_inputs = tf.cast(decoder_inputs, self.params["dtype"])
            attention_bias = tf.cast(attention_bias, self.params["dtype"])
            with tf.name_scope("shift_targets"):
                # Shift targets to the right, and remove the last element
                decoder_inputs = tf.pad(decoder_inputs,
                                        [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                pos_encoding = tf.cast(pos_encoding, self.params["dtype"])
                decoder_inputs += pos_encoding
            if training:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs, rate=self.params["layer_postprocess_dropout"])

            # Run values
            # decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            #     length, dtype=self.params["dtype"])
            decoder_self_attention_bias = tf.ones([1, 1, length, length], dtype=self.params["dtype"])
            logits = self.decoder_stack(
                decoder_inputs,
                encoder_outputs,
                decoder_self_attention_bias,
                attention_bias,
                training=training)
            batch_size = tf.shape(logits)[0]
            length = tf.shape(logits)[1]
            hidden_size = tf.shape(logits)[2]
            # logits = tf.reduce_mean(logits, axis=1)
            logits = tf.reshape(logits, [batch_size, length * hidden_size])
            logits = tf.reshape(logits, [batch_size, self.params['max_length'] * hidden_size])
            logits = self.embedding_layer(logits, mode="linear")

            # logits = tf.reshape(logits, [batch_size, 2])
            logits = tf.cast(logits, tf.float32)
            return logits
예제 #11
0
    def _get_symbols_to_logits_fn(self, max_decode_length):
        """Returns a decoding function that calculates logits of the next tokens."""
        # 返回一个能够计算下一个token的decode函数

        timing_signal = model_utils.get_position_encoding(  # 时序信息,形状是[length, hidden_size]
            max_decode_length + 1, self.params["hidden_size"])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length)  # self attention 的偏差, 形状是[1, 1, length, length]

        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.
      这个函数可以做到,给出已经预测的tokens的id,使用decoder和encode的信息,预测下一个token
      ids表示已经预测出来的tokens
      i表示当前是第i个位置,要被预测
      cache应该是因为:训练时的decode只需要做一次,但是inference时的decode需要做多次,因为要逐个单词预测,多次decode用的encode信息是一样的,因此需要提前存储好。

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1],忽略batch_size,可以看出,这个ids不是整个句子的ids,而是从开始到某一位置的候选tokens的id
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:,
                                -1:]  # 貌似是想要获得句子中当前位置的候选tokens的ids,也就是获得形状 [batch_size * beam_size, 1]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            # 做embedding,也就是[batch_size * beam_size, 1, hidden_size]
            # 从这一步可以看出,在inference的decode的输入,就是用已经预测的tokens的最后一个token,构成句子长度为1的句子,做embedding,输入到decode进行解码
            decoder_input = self.embedding_softmax_layer(decoder_input)
            decoder_input += timing_signal[i:i + 1]  # 加上第i个token的时序信息

            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i +
                                                              1]  # self attention,形状是[1, 1, 1, i+1]
            decoder_outputs = self.decoder_stack(  # 进行decode,输出tensor的形状和输入decoder_input一样,也是[batch_size * beam_size, 1, hidden_size]
                decoder_input,
                cache.get("encoder_outputs"), self_attention_bias,
                cache.get("encoder_decoder_attention_bias"), cache)
            # softmax,从[batch_size * beam_size, 1, hidden_size]映射到[batch_size * beam_size, 1, vocab_size]
            logits = self.embedding_softmax_layer.linear(decoder_outputs)
            logits = tf.squeeze(
                logits, axis=[1]
            )  # 去掉中间那个长度为1的维度,即由[batch_size * beam_size, 1, vocab_size]变为[batch_size * beam_size, vocab_size]
            return logits, cache

        return symbols_to_logits_fn
    def _get_symbols_to_logits_fn(self, max_decode_length):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = model_utils.get_position_encoding(
            max_decode_length + 1, self.params["hidden_size"])
        decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
            max_decode_length)

        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1]
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            ### domyounglee 2020.2.12
            cls_dec_bias = model_utils.get_cls_dec_attention_bias(
                tf.cast(tf.equal(decoder_input, 2), tf.int64))
            #self.cls_attention_bias=None
            # Preprocess decoder input by getting embeddings and adding timing signal.
            decoder_input = self.embedding_softmax_layer(decoder_input)
            decoder_input += timing_signal[i:i + 1]

            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i + 1]
            decoder_outputs = self.decoder_stack(
                decoder_input,
                cache.get("encoder_outputs"),
                self_attention_bias,
                cache.get("encoder_decoder_attention_bias"),
                cls_attention_bias=None,
                cls_dec_attention_bias=None,
                identity_mask=None,
                cache=cache)
            logits = self.embedding_softmax_layer.linear(decoder_outputs)
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache

        return symbols_to_logits_fn
예제 #13
0
  def encode(self, seq, seq_len=None, output_method='all'):
    with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
      if self.use_position_encoding:
        hidden_size = melt.get_shape(seq, -1)
        # Scale embedding by the sqrt of the hidden size
        seq *= hidden_size ** 0.5

        # Create binary array of size [batch_size, length]
        # where 1 = padding, 0 = not padding
        padding = tf.to_float(tf.sequence_mask(seq_len))

        # Set all padding embedding values to 0
        seq *= tf.expand_dims(padding, -1)

        pos_encoding = model_utils.get_position_encoding(
            tf.shape(seq)[1], tf.shape(seq)[-1])
        seq = seq + pos_encoding

      num_filters = self.num_filters
      seqs = [seq]
      #batch_size = melt.get_batch_size(seq)
     
      #kernel_sizes = [3, 5, 7, 9, 11, 13]
      kernel_sizes = [3] * 7
      assert self.num_layers <= len(kernel_sizes)

      for layer in range(self.num_layers):
        #input_size_ = melt.get_shape(seq, -1) if layer == 0 else num_filters
        seq = melt.dropout(seq, self.keep_prob, self.is_train)
        seq = tf.layers.conv1d(seqs[-1], num_filters, kernel_size=kernel_sizes[layer], padding='same', activation=tf.nn.relu)
        # mask = melt.dropout(tf.ones([batch_size, 1, input_size_], dtype=tf.float32),
        #                   keep_prob=self.keep_prob, is_train=self.is_train, mode=None)
        #seq = tf.layers.conv1d(seqs[-1] * mask, num_filters, kernel_size=3, padding='same', activation=tf.nn.relu)
        #seq = tf.layers.conv1d(seqs[-1] * mask, num_filters, kernel_size=kernel_sizes[layer], padding='same', activation=tf.nn.relu)
        
        # if self.is_train and self.keep_prob < 1:
        #   seq = tf.nn.dropout(seq, self.keep_prob)
        #seq = melt.layers.batch_norm(seq, self.is_train, name='layer_%d' % layer)
        seqs.append(seq)
      
      outputs = tf.concat(seqs[1:], 2)
      # not do any dropout in convet just dropout outside 
      # if self.is_train and self.keep_prob < 1:
      #   outputs = tf.nn.dropout(outputs, self.keep_prob)

      # compact for rnn with sate return
      return melt.rnn.encode_outputs(outputs, seq_len, output_method)
예제 #14
0
 def __call__(self, inputs, embedded_inputs):
   """1.get padding; 2.add position encoding.
     Args:
       inputs:          size with [batch_size, length]
       embedded_inputs: size with [batch_size, length, hidden_size]
     return: 
       encoder_inputs:  size with [batch_size, length, hidden_size]
       inputs_padding:  size with [batch_size, length]
   """
   with tf.name_scope("stack_input"):
     inputs_padding = model_utils.get_padding(inputs)
     length = tf.shape(inputs)[1]
     pos_encoding = model_utils.get_position_encoding(length, self.params["hidden_size"])
     encoder_inputs = embedded_inputs + pos_encoding
     if self.train:
       encoder_inputs = tf.nn.dropout(encoder_inputs, 1-self.params["layer_postprocess_dropout"])
   return encoder_inputs, inputs_padding
예제 #15
0
    def get_pointer_encodings(self,
                              images,
                              words,
                              tags,
                              word_paddings=None,
                              training=False):

        batch_size, image_locations, length = (tf.shape(images)[0],
                                               tf.shape(images)[1],
                                               tf.shape(words)[1])
        if word_paddings is None:
            word_paddings = tf.cast(tf.ones_like(words), self.params["dtype"])

        # Pass the image features [BATCH, 64, 2048] into an encoder
        images = self.image_layer(images)
        image_attention_bias = tf.zeros([batch_size, 1, 1, image_locations])
        image_attention_bias = tf.cast(image_attention_bias,
                                       self.params["dtype"])
        image_padding = tf.zeros_like(images)
        encoder_outputs = self.encoder(images,
                                       image_attention_bias,
                                       image_padding,
                                       training=training)

        # Add a positional encoding to the word embeddings
        pos_encoding = tf.cast(
            model_utils.get_position_encoding(length,
                                              self.params["hidden_size"]),
            self.params["dtype"])
        decoder_inputs = pos_encoding + self.merge_embeddings(
            tf.concat([
                self.word_embeddings(
                    words, mode="embedding", training=training),
                self.tag_embeddings(tags, mode="embedding", training=training)
            ], -1),
            training=training)

        # Use the decoder to merge image and word features
        word_attention_bias = -1e9 * (
            1.0 - word_paddings[:, tf.newaxis, tf.newaxis, :])
        return self.decoder(decoder_inputs,
                            encoder_outputs,
                            word_attention_bias,
                            image_attention_bias,
                            training=training)
예제 #16
0
    def encode(self, inputs, attention_bias):
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.encoder_embedding_layer(inputs)
            inputs_padding = seoul_get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, 1 - self.params["layer_postprocess_dropout"])

            return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
예제 #17
0
    def predict(self, start_tokens, encoder_outputs,
                encoder_decoder_attention_bias):
        """Return predicted sequence."""
        with tf.name_scope('decode'):
            batch_size = tf.shape(encoder_outputs)[0]
            max_decode_length = self.params['sequence_length']
            timing_signal = model_utils.get_position_encoding(
                max_decode_length, self.params['hidden_size'])
            decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
                max_decode_length)

            # Create cache storing decoder attention values for each layer.
            cache = {
                'layer_%d' % layer: {
                    'k': tf.zeros([batch_size, 0, self.params['hidden_size']]),
                    'v': tf.zeros([batch_size, 0, self.params['hidden_size']])
                }
                for layer in range(self.params['num_hidden_layers'])
            }

            # Add encoder output and attention bias to the cache.
            cache['encoder_outputs'] = encoder_outputs
            cache[
                'encoder_decoder_attention_bias'] = encoder_decoder_attention_bias

            # Forward decoder_inputs to decoder_stack max_decode_length times instead of applying beam search.
            decoder_outputs = tf.zeros(
                [batch_size, 0, self.params['output_size']])
            decoder_inputs = tf.expand_dims(start_tokens, axis=1)
            for i in range(max_decode_length):
                decoder_inputs = self.decoder_embedding_layer(decoder_inputs)
                decoder_inputs += timing_signal[i:i + 1]
                self_attention_bias = decoder_self_attention_bias[:, :, i:i +
                                                                  1, :i + 1]
                decoder_inputs = self.decoder_stack(
                    decoder_inputs, cache.get('encoder_outputs'),
                    self_attention_bias,
                    cache.get('encoder_decoder_attention_bias'), cache)
                decoder_inputs = self.output_embedding_layer(decoder_inputs)
                decoder_outputs = tf.concat([decoder_outputs, decoder_inputs],
                                            axis=1)
        return decoder_outputs
예제 #18
0
  def _get_symbols_to_logits_fn(self, max_decode_length):
    """Returns a decoding function that calculates logits of the next tokens."""

    timing_signal = model_utils.get_position_encoding(
        max_decode_length + 1, self.params.hidden_size)
    decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
        max_decode_length)

    def symbols_to_logits_fn(ids, i, cache):
      """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences.
          int tensor with shape [batch_size * beam_size, i + 1]
        i: Loop index
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
      # Set decoder input to the last generated IDs
      decoder_input = ids[:, -1:]

      # Preprocess decoder input by getting embeddings and adding timing signal.
      decoder_input = self.embedding_softmax_layer(decoder_input)
      decoder_input += timing_signal[i:i + 1]

      self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]
      decoder_outputs = self.decoder_stack(
          decoder_input, cache.get("encoder_outputs"), self_attention_bias,
          cache.get("encoder_decoder_attention_bias"), cache)
      logits = self.embedding_softmax_layer.linear(decoder_outputs)
      logits = tf.squeeze(logits, axis=[1])
      return logits, cache
    return symbols_to_logits_fn
예제 #19
0
  def call(self, seq, seq_len=None, masks=None, 
           output_method=OutputMethod.all, 
           training=False):
    if self.use_position_encoding:
      hidden_size = melt.get_shape(seq, -1)
      # Scale embedding by the sqrt of the hidden size
      seq *= hidden_size ** 0.5

      # Create binary array of size [batch_size, length]
      # where 1 = padding, 0 = not padding
      padding = tf.to_float(tf.sequence_mask(seq_len))

      # Set all padding embedding values to 0
      seq *= tf.expand_dims(padding, -1)

      pos_encoding = model_utils.get_position_encoding(
          tf.shape(seq)[1], tf.shape(seq)[-1])
      seq = seq + pos_encoding

    num_filters = self.num_filters
    seqs = [seq]
    #batch_size = melt.get_batch_size(seq)

    for layer in range(self.num_layers):
      if masks is None:
        seq_ = melt.dropout(seq, self.keep_prob, training)
      else:
        seq_ = seq * masks[layer]
      seq = self.conv1ds[layer](seq_)
      seqs.append(seq)
    
    outputs = tf.concat(seqs[1:], 2)
    # not do any dropout in convet just dropout outside 
    # if self.is_train and self.keep_prob < 1:
    #   outputs = tf.nn.dropout(outputs, self.keep_prob)

    # compact for rnn with sate return
    return melt.rnn.encode_outputs(outputs, seq_len, output_method)
예제 #20
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
      # 这个矩阵,不是padding的部分,都是0,是padding的部分,都是负无穷,

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(
                inputs)  # 将inputs做embedding

            # 获得padding information tensor,凡是padding部分是1,非padding部分是0,形状与inputs一样
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"
                               ):  # 给embedded_inputs添加pos_encoding,即添加时序信息
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding

            if self.train:  # 如果是训练模式,则需要加上dropout
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            # encoder_inputs 的 shape 应该是: [batch_size, input_length, hidden_size]
            # attention_bias 应该是: [batch_size, 1, 1, input_length]
            # inputs_padding 应该是: [batch_size, input_length]
            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)  # 将经过encoder的结果返回
예제 #21
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        print('LOOK AT ME')
        print(inputs.get_shape().as_list())
        print(attention_bias.get_shape().as_list())
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            print('YOOO')
            print(encoder_inputs)
            print(attention_bias)
            print(inputs_padding)

            return self.encoder_stack(encoder_inputs, attention_bias,
                                      inputs_padding)
예제 #22
0
  def decode(self, targets, encoder_outputs, attention_bias):
    """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """
    with tf.name_scope("decode"):
      # Prepare inputs to decoder layers by shifting targets, adding positional
      # encoding and applying dropout.
      decoder_inputs = self.embedding_softmax_layer(targets)
      with tf.name_scope("shift_targets"):
        # Shift targets to the right, and remove the last element
        decoder_inputs = tf.pad(
            decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(decoder_inputs)[1]
        decoder_inputs += model_utils.get_position_encoding(
            length, self.params.hidden_size)
      if self.train:
        decoder_inputs = tf.nn.dropout(
            decoder_inputs, 1 - self.params.layer_postprocess_dropout)

      # Run values
      decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
          length)
      outputs = self.decoder_stack(
          decoder_inputs, encoder_outputs, decoder_self_attention_bias,
          attention_bias)
      logits = self.embedding_softmax_layer.linear(outputs)
      return logits
예제 #23
0
    def encode(self, inputs, attention_bias):
        """Generate continuous representation for inputs.
    Args:
      inputs: int tensor with shape [batch_size, input_length].
      attention_bias: float tensor with shape [batch_size, 1, 1, input_length]
    Returns:
      float tensor with shape [batch_size, input_length, hidden_size]
    """
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            inputs_padding = model_utils.get_padding(inputs)

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = model_utils.get_position_encoding(
                    length, self.params["hidden_size"])
                encoder_inputs = embedded_inputs + pos_encoding  # shape (batch_size, input_len, h_size)

            with tf.name_scope("add_vir_entities"):
                encoder_inputs = self.add_vir_entities(
                    encoder_inputs
                )  # shape (batch_size, input_len + num_ve, h_size)

            if self.train:
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    1 - self.params["layer_postprocess_dropout"])

            x = self.encoder_stack(
                encoder_inputs, attention_bias,
                inputs_padding)  # shape (-1, length, h_size)
            # Remove virtual entities from the encoder output
            x = x[:, :-self.params["num_vir_entities"], :]
            return x  # shape (batch_size, input_length, hidden_size)
예제 #24
0
    tf_sess.run(tf.global_variables_initializer())
    tf_assign_list = get_assign_list(tf_transformer)
    assert len(tf_assign_list) == len(list(set(tf_assign_list)))
    tf_sess.run(tf_assign_list)

    tf_res = tf_sess.run(tf_output,
                         feed_dict={
                             tf_input_x_raw: my_input_x_raw,
                             tf_input_y_raw: my_input_y_raw
                         })
    print("tf output:")
    with printoptions(precision=3, suppress=True):
        print(tf_res)

    tf_embedded_inputs = tf_transformer.embedding_softmax_layer(tf_input_x_raw)
    tf_pos_encoding = tf_model_utils.get_position_encoding(
        seq_len_x, tf_transformer.params.hidden_size)
    tf_embedding_inputs = tf_embedded_inputs + tf_pos_encoding

    tf_attention_bias = tf_model_utils.get_padding_bias(tf_input_x_raw)
    tf_encoder_outputs = tf_transformer.encode(tf_input_x_raw,
                                               tf_attention_bias)

    tf_pred = tf_transformer(tf_input_x_raw)["outputs"]
    tf_pred_res = tf_sess.run(tf_pred,
                              feed_dict={tf_input_x_raw: my_input_x_raw})
    print("tf prediction:")
    with printoptions(threshold=2000):
        print(tf_pred_res)

    k_transformer = KTransformer(params)
    k_input_x_raw = Input(shape=(_seq_len_x, ))