Exemple #1
0
  def test_get_padding(self):
    x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]])
    padding = model_utils.get_padding(x, padding_value=0)
    with self.test_session() as sess:
      padding = sess.run(padding)

    self.assertAllEqual([[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]],
                        padding)
    def test_get_padding(self):
        x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]])
        padding = model_utils.get_padding(x, padding_value=0)
        with self.test_session() as sess:
            padding = sess.run(padding)

        self.assertAllEqual(
            [[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]], padding)
Exemple #3
0
  def _encode(self, input_dict):
    if len(self.layers) == 0:
      # prepare encoder graph
      self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
        self.params["src_vocab_size"], self.params["hidden_size"],
        pad_vocab_to_eight=self.params.get('pad_embeddings_2_eight', False))

      for _ in range(self.params['encoder_layers']):
        # Create sublayers for each layer.
        self_attention_layer = attention_layer.SelfAttention(
          self.params["hidden_size"], self.params["num_heads"],
          self.params["attention_dropout"], self.mode == "train")
        feed_forward_network = ffn_layer.FeedFowardNetwork(
          self.params["hidden_size"], self.params["filter_size"],
          self.params["relu_dropout"], self.mode == "train")

        self.layers.append([
          PrePostProcessingWrapper(self_attention_layer, self.params,
                                   self.mode == "train"),
          PrePostProcessingWrapper(feed_forward_network, self.params,
                                   self.mode == "train")])

      # Create final layer normalization layer.
      self.output_normalization = LayerNormalization(self.params["hidden_size"])

    # actual encoder part
    with tf.name_scope("encode"):
      #inputs = input_dict['src_sequence']
      inputs = input_dict['source_tensors'][0]
      # Prepare inputs to the layer stack by adding positional encodings and
      # applying dropout.
      embedded_inputs = self.embedding_softmax_layer(inputs)
      inputs_padding = utils.get_padding(inputs)
      inputs_attention_bias = utils.get_padding_bias(inputs)

      #inputs_attention_bias = tf.cast(utils.get_padding_bias(inputs),
      #                                dtype=self.params['dtype'])

      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(embedded_inputs)[1]
        pos_encoding = utils.get_position_encoding(
            length, self.params["hidden_size"])
        encoder_inputs = embedded_inputs + tf.cast(x=pos_encoding,
                                                   dtype=embedded_inputs.dtype)

      if self.mode == "train":
        encoder_inputs = tf.nn.dropout(
            encoder_inputs, 1 - self.params["layer_postprocess_dropout"])

      encoded = self._call(encoder_inputs, inputs_attention_bias,
                           inputs_padding)
      return {'outputs': encoded,
              'inputs_attention_bias': inputs_attention_bias,
              'state': None,
              'src_lengths': input_dict['source_tensors'][1],
              'embedding_softmax_layer': self.embedding_softmax_layer,
              'encoder_input': inputs}
Exemple #4
0
  def decode_pass(self, targets, encoder_outputs, encoder_outputs_b,
                  inputs_attention_bias):
    """Generate logits for each value in the target sequence.

    Args:
      targets: target values for the output sequence.
        int tensor with shape [batch_size, target_length]
      encoder_outputs: continuous representation of input sequence.
        float tensor with shape [batch_size, input_length, hidden_size]
        float tensor with shape [batch_size, input_length, hidden_size]
      encoder_outputs_b: continuous representation of input sequence
        which includes the source embeddings.
        float tensor with shape [batch_size, input_length, hidden_size]
      inputs_attention_bias: float tensor with shape [batch_size, 1, input_length]

    Returns:
      float32 tensor with shape [batch_size, target_length, vocab_size]
    """

    # Prepare inputs to decoder layers by applying embedding
    # and adding positional encoding.
    decoder_inputs = self.embedding_softmax_layer(targets)

    if self.position_embedding_layer is not None:
      with tf.name_scope("add_pos_encoding"):
        pos_input = tf.range(
            0,
            tf.shape(decoder_inputs)[1],
            delta=1,
            dtype=tf.int32,
            name='range')
        pos_encoding = self.position_embedding_layer(pos_input)
        decoder_inputs = decoder_inputs + tf.cast(
            x=pos_encoding, dtype=decoder_inputs.dtype)

    if self.mode == "train":
      decoder_inputs = tf.nn.dropout(decoder_inputs,
                                     self.params["embedding_dropout_keep_prob"])

    # mask the paddings in the target
    inputs_padding = get_padding(
        targets, padding_value=self._pad_sym, dtype=decoder_inputs.dtype)
    decoder_inputs *= tf.expand_dims(1.0 - inputs_padding, 2)

    # do decode
    logits = self._call(
        decoder_inputs=decoder_inputs,
        encoder_outputs_a=encoder_outputs,
        encoder_outputs_b=encoder_outputs_b,
        input_attention_bias=inputs_attention_bias)

    return logits
Exemple #5
0
    def _encode(self, input_dict):
        training = (self.mode == "train")

        if len(self.layers) == 0:
            # prepare encoder graph
            self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                self.params["src_vocab_size"],
                self.params["hidden_size"],
                pad_vocab_to_eight=self.params.get('pad_embeddings_2_eight',
                                                   False),
            )

            for _ in range(self.params['encoder_layers']):
                # Create sublayers for each layer.
                self_attention_layer = attention_layer.SelfAttention(
                    hidden_size=self.params["hidden_size"],
                    num_heads=self.params["num_heads"],
                    attention_dropout=self.params["attention_dropout"],
                    train=training,
                    regularizer=self.regularizer,
                    batch_size=self.batch_size,
                    num_feature=self.num_features)
                feed_forward_network = ffn_layer.FeedFowardNetwork(
                    hidden_size=self.params["hidden_size"],
                    filter_size=self.params["filter_size"],
                    relu_dropout=self.params["relu_dropout"],
                    train=training,
                    #num_features=self.num_features,
                    #batch_size=self.batch_size,
                    regularizer=self.regularizer)

                self.layers.append([
                    PrePostProcessingWrapper(self_attention_layer, self.params,
                                             training),
                    PrePostProcessingWrapper(feed_forward_network, self.params,
                                             training)
                ])

            # final normalization layer.
            print("Encoder:", self.norm_params["type"], self.mode)
            if self.norm_params["type"] == "batch_norm":
                self.output_normalization = Transformer_BatchNorm(
                    training=training, params=self.norm_params)
            else:
                self.output_normalization = LayerNormalization(
                    hidden_size=self.params["hidden_size"],
                    params=self.norm_params)

        # actual encoder part
        with tf.name_scope("encode"):
            inputs, src_lengths = input_dict['source_tensors']
            #inputs = input_dict['source_tensors'][0]
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            if self.params["remove_padding"]:
                inputs_padding = utils.get_padding(inputs)
                #inputs_padding = utils.get_padding(inputs,dtype=self._params["dtype"])
            else:
                inputs_padding = None
            inputs_attention_bias = utils.get_padding_bias(inputs)
            inputs_attention_bias = tf.transpose(inputs_attention_bias,
                                                 [0, 1, 3, 2, 4])
            # inputs_attention_bias = utils.get_padding_bias(inputs, dtype=self._params["dtype"])

            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(embedded_inputs)[1]
                pos_encoding = utils.get_position_encoding(
                    length,
                    self.params["hidden_size"],
                )
                #encoder_inputs = embedded_inputs + tf.cast(x=pos_encoding,
                #                                           dtype=embedded_inputs.dtype)
                pos_encoding = tf.cast(x=pos_encoding,
                                       dtype=embedded_inputs.dtype)
                pos_encoding_exp = pos_encoding[None, :, None, :]
                encoder_inputs = embedded_inputs + pos_encoding_exp

            if self.mode == "train":
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs,
                    keep_prob=1.0 - self.params["layer_postprocess_dropout"],
                )

            encoded = self._call(encoder_inputs, inputs_attention_bias,
                                 inputs_padding)
            return {
                'outputs': encoded,
                'inputs_attention_bias': inputs_attention_bias,
                'state': None,
                'src_lengths': src_lengths,
                #'src_lengths': input_dict['source_tensors'][1],
                'embedding_softmax_layer': self.embedding_softmax_layer,
                'encoder_input': inputs
            }
    def _encode(self, input_dict):
        inputs = input_dict['source_tensors'][0]
        source_length = input_dict['source_tensors'][1]

        with tf.variable_scope("encode"):
            # prepare encoder graph
            if len(self.layers) == 0:
                knum_list = list(
                    zip(*self.params.get("conv_nchannels_kwidth")))[0]
                kwidth_list = list(
                    zip(*self.params.get("conv_nchannels_kwidth")))[1]

                with tf.variable_scope("embedding"):
                    self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                        vocab_size=self._src_vocab_size,
                        hidden_size=self._src_emb_size,
                        pad_vocab_to_eight=self._pad2eight,
                        init_var=0.1,
                        embed_scale=False,
                        pad_sym=self._pad_sym,
                        mask_paddings=True)

                with tf.variable_scope("pos_embedding"):
                    self.position_embedding_layer = embedding_layer.EmbeddingSharedWeights(
                        vocab_size=self.params.get("max_input_length",
                                                   MAX_INPUT_LENGTH),
                        hidden_size=self._src_emb_size,
                        pad_vocab_to_eight=self._pad2eight,
                        init_var=0.1,
                        embed_scale=False,
                        pad_sym=self._pad_sym,
                        mask_paddings=True)

                # linear projection before cnn layers
                self.layers.append(
                    ffn_wn_layer.FeedFowardNetworkNormalized(
                        self._src_emb_size,
                        knum_list[0],
                        dropout=self.params["embedding_dropout_keep_prob"],
                        var_scope_name="linear_mapping_before_cnn_layers",
                        mode=self.mode,
                        normalization_type=self.normalization_type,
                        regularizer=self.regularizer,
                        init_var=self.init_var))

                for i in range(len(knum_list)):
                    in_dim = knum_list[i] if i == 0 else knum_list[i - 1]
                    out_dim = knum_list[i]

                    # linear projection is needed for residual connections if
                    # input and output of a cnn layer do not match
                    if in_dim != out_dim:
                        linear_proj = ffn_wn_layer.FeedFowardNetworkNormalized(
                            in_dim,
                            out_dim,
                            var_scope_name="linear_mapping_cnn_" + str(i + 1),
                            dropout=1.0,
                            mode=self.mode,
                            normalization_type=self.normalization_type,
                            regularizer=self.regularizer,
                            init_var=self.init_var)
                    else:
                        linear_proj = None

                    conv_layer = conv_wn_layer.Conv1DNetworkNormalized(
                        in_dim,
                        out_dim,
                        kernel_width=kwidth_list[i],
                        mode=self.mode,
                        layer_id=i + 1,
                        hidden_dropout=self.params["hidden_dropout_keep_prob"],
                        conv_padding="SAME",
                        decode_padding=False,
                        activation=self.conv_activation,
                        normalization_type=self.normalization_type,
                        regularizer=self.regularizer,
                        init_var=self.init_var)

                    self.layers.append([linear_proj, conv_layer])

                # linear projection after cnn layers
                self.layers.append(
                    ffn_wn_layer.FeedFowardNetworkNormalized(
                        knum_list[-1],
                        self._src_emb_size,
                        dropout=1.0,
                        var_scope_name="linear_mapping_after_cnn_layers",
                        mode=self.mode,
                        normalization_type=self.normalization_type,
                        regularizer=self.regularizer,
                        init_var=self.init_var))

            encoder_inputs = self.embedding_softmax_layer(inputs)
            inputs_attention_bias = get_padding_bias(inputs,
                                                     res_rank=3,
                                                     pad_sym=self._pad_sym)

            with tf.name_scope("add_pos_encoding"):
                pos_input = tf.range(0,
                                     tf.shape(encoder_inputs)[1],
                                     delta=1,
                                     dtype=tf.int32,
                                     name='range')
                pos_encoding = self.position_embedding_layer(pos_input)
                encoder_inputs = encoder_inputs + tf.cast(
                    x=pos_encoding, dtype=encoder_inputs.dtype)

            if self.mode == "train":
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, self.params["embedding_dropout_keep_prob"])

            # mask the paddings in the input given to cnn layers
            inputs_padding = get_padding(inputs,
                                         self._pad_sym,
                                         dtype=encoder_inputs.dtype)
            padding_mask = tf.expand_dims(1 - inputs_padding, 2)
            encoder_inputs *= padding_mask

            outputs, outputs_b, final_state = self._call(
                encoder_inputs, padding_mask)

        return {
            'outputs': outputs,
            'outputs_b': outputs_b,
            'inputs_attention_bias_cs2s': inputs_attention_bias,
            'state': final_state,
            'src_lengths': source_length,  # should it include paddings or not?
            'embedding_softmax_layer': self.embedding_softmax_layer,
            'encoder_input': inputs
        }