def _encode(self, input_dict):
        inputs = input_dict['source_tensors'][0]
        source_length = input_dict['source_tensors'][1]

        with tf.variable_scope("encode"):
            # prepare encoder graph
            if len(self.layers) == 0:
                knum_list = list(
                    zip(*self.params.get("conv_nchannels_kwidth")))[0]
                kwidth_list = list(
                    zip(*self.params.get("conv_nchannels_kwidth")))[1]

                with tf.variable_scope("embedding"):
                    self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                        vocab_size=self._src_vocab_size,
                        hidden_size=self._src_emb_size,
                        pad_vocab_to_eight=self._pad2eight,
                        init_var=0.1,
                        embed_scale=False,
                        pad_sym=self._pad_sym,
                        mask_paddings=True)

                with tf.variable_scope("pos_embedding"):
                    self.position_embedding_layer = embedding_layer.EmbeddingSharedWeights(
                        vocab_size=self.params.get("max_input_length",
                                                   MAX_INPUT_LENGTH),
                        hidden_size=self._src_emb_size,
                        pad_vocab_to_eight=self._pad2eight,
                        init_var=0.1,
                        embed_scale=False,
                        pad_sym=self._pad_sym,
                        mask_paddings=True)

                # linear projection before cnn layers
                self.layers.append(
                    ffn_wn_layer.FeedFowardNetworkNormalized(
                        self._src_emb_size,
                        knum_list[0],
                        dropout=self.params["embedding_dropout_keep_prob"],
                        var_scope_name="linear_mapping_before_cnn_layers",
                        mode=self.mode,
                        normalization_type=self.normalization_type,
                        regularizer=self.regularizer,
                        init_var=self.init_var))

                for i in range(len(knum_list)):
                    in_dim = knum_list[i] if i == 0 else knum_list[i - 1]
                    out_dim = knum_list[i]

                    # linear projection is needed for residual connections if
                    # input and output of a cnn layer do not match
                    if in_dim != out_dim:
                        linear_proj = ffn_wn_layer.FeedFowardNetworkNormalized(
                            in_dim,
                            out_dim,
                            var_scope_name="linear_mapping_cnn_" + str(i + 1),
                            dropout=1.0,
                            mode=self.mode,
                            normalization_type=self.normalization_type,
                            regularizer=self.regularizer,
                            init_var=self.init_var)
                    else:
                        linear_proj = None

                    conv_layer = conv_wn_layer.Conv1DNetworkNormalized(
                        in_dim,
                        out_dim,
                        kernel_width=kwidth_list[i],
                        mode=self.mode,
                        layer_id=i + 1,
                        hidden_dropout=self.params["hidden_dropout_keep_prob"],
                        conv_padding="SAME",
                        decode_padding=False,
                        activation=self.conv_activation,
                        normalization_type=self.normalization_type,
                        regularizer=self.regularizer,
                        init_var=self.init_var)

                    self.layers.append([linear_proj, conv_layer])

                # linear projection after cnn layers
                self.layers.append(
                    ffn_wn_layer.FeedFowardNetworkNormalized(
                        knum_list[-1],
                        self._src_emb_size,
                        dropout=1.0,
                        var_scope_name="linear_mapping_after_cnn_layers",
                        mode=self.mode,
                        normalization_type=self.normalization_type,
                        regularizer=self.regularizer,
                        init_var=self.init_var))

            encoder_inputs = self.embedding_softmax_layer(inputs)
            inputs_attention_bias = get_padding_bias(inputs,
                                                     res_rank=3,
                                                     pad_sym=self._pad_sym)

            with tf.name_scope("add_pos_encoding"):
                pos_input = tf.range(0,
                                     tf.shape(encoder_inputs)[1],
                                     delta=1,
                                     dtype=tf.int32,
                                     name='range')
                pos_encoding = self.position_embedding_layer(pos_input)
                encoder_inputs = encoder_inputs + tf.cast(
                    x=pos_encoding, dtype=encoder_inputs.dtype)

            if self.mode == "train":
                encoder_inputs = tf.nn.dropout(
                    encoder_inputs, self.params["embedding_dropout_keep_prob"])

            # mask the paddings in the input given to cnn layers
            inputs_padding = get_padding(inputs,
                                         self._pad_sym,
                                         dtype=encoder_inputs.dtype)
            padding_mask = tf.expand_dims(1 - inputs_padding, 2)
            encoder_inputs *= padding_mask

            outputs, outputs_b, final_state = self._call(
                encoder_inputs, padding_mask)

        return {
            'outputs': outputs,
            'outputs_b': outputs_b,
            'inputs_attention_bias_cs2s': inputs_attention_bias,
            'state': final_state,
            'src_lengths': source_length,  # should it include paddings or not?
            'embedding_softmax_layer': self.embedding_softmax_layer,
            'encoder_input': inputs
        }
Example #2
0
  def _decode(self, input_dict):
    targets = input_dict['target_tensors'][0] \
              if 'target_tensors' in input_dict else None

    encoder_outputs = input_dict['encoder_output']['outputs']
    encoder_outputs_b = input_dict['encoder_output'].get(
        'outputs_b', encoder_outputs)

    inputs_attention_bias = input_dict['encoder_output'].get(
        'inputs_attention_bias_cs2s', None)

    with tf.name_scope("decode"):
      # prepare decoder layers
      if len(self.layers) == 0:
        knum_list = list(zip(*self.params.get("conv_nchannels_kwidth")))[0]
        kwidth_list = list(zip(*self.params.get("conv_nchannels_kwidth")))[1]

        normalization_type = self.params.get("normalization_type",
                                             "weight_norm")
        conv_activation = self.params.get("conv_activation", gated_linear_units)

        # preparing embedding layers
        with tf.variable_scope("embedding"):
          if 'embedding_softmax_layer' in input_dict['encoder_output'] \
                  and self.params['shared_embed']:
            self.embedding_softmax_layer = \
              input_dict['encoder_output']['embedding_softmax_layer']
          else:
            self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                vocab_size=self._tgt_vocab_size,
                hidden_size=self._tgt_emb_size,
                pad_vocab_to_eight=self._pad2eight,
                init_var=0.1,
                embed_scale=False,
                pad_sym=self._pad_sym,
                mask_paddings=True)

        if self.params.get("pos_embed", True):
          with tf.variable_scope("pos_embedding"):
            if 'position_embedding_layer' in input_dict['encoder_output'] \
                    and self.params['shared_embed']:
              self.position_embedding_layer = \
                input_dict['encoder_output']['position_embedding_layer']
            else:
              self.position_embedding_layer = embedding_layer.EmbeddingSharedWeights(
                  vocab_size=self.params.get("max_input_length", MAX_INPUT_LENGTH),
                  hidden_size=self._tgt_emb_size,
                  pad_vocab_to_eight=self._pad2eight,
                  init_var=0.1,
                  embed_scale=False,
                  pad_sym=self._pad_sym,
                  mask_paddings=True)
        else:
          self.position_embedding_layer = None

        # linear projection before cnn layers
        self.layers.append(
            ffn_wn_layer.FeedFowardNetworkNormalized(
                self._tgt_emb_size,
                knum_list[0],
                dropout=self.params["embedding_dropout_keep_prob"],
                var_scope_name="linear_mapping_before_cnn_layers",
                mode=self.mode,
                normalization_type=normalization_type))

        for i in range(self.params['decoder_layers']):
          in_dim = knum_list[i] if i == 0 else knum_list[i - 1]
          out_dim = knum_list[i]

          # linear projection is needed for residual connections if
          # input and output of a cnn layer do not match
          if in_dim != out_dim:
            linear_proj = ffn_wn_layer.FeedFowardNetworkNormalized(
                in_dim,
                out_dim,
                var_scope_name="linear_mapping_cnn_" + str(i + 1),
                dropout=1.0,
                mode=self.mode,
                normalization_type=normalization_type)
          else:
            linear_proj = None

          conv_layer = conv_wn_layer.Conv1DNetworkNormalized(
              in_dim,
              out_dim,
              kernel_width=kwidth_list[i],
              mode=self.mode,
              layer_id=i + 1,
              hidden_dropout=self.params["hidden_dropout_keep_prob"],
              conv_padding="VALID",
              decode_padding=True,
              activation=conv_activation,
              normalization_type=normalization_type)

          att_layer = attention_wn_layer.AttentionLayerNormalized(
              out_dim,
              embed_size=self._tgt_emb_size,
              layer_id=i + 1,
              add_res=True,
              mode=self.mode)

          self.layers.append([linear_proj, conv_layer, att_layer])

        # linear projection after cnn layers
        self.layers.append(
            ffn_wn_layer.FeedFowardNetworkNormalized(
                knum_list[self.params['decoder_layers'] - 1],
                self.params.get("out_emb_size", self._tgt_emb_size),
                dropout=1.0,
                var_scope_name="linear_mapping_after_cnn_layers",
                mode=self.mode,
                normalization_type=normalization_type))

        if not self.params['shared_embed']:
          self.layers.append(
              ffn_wn_layer.FeedFowardNetworkNormalized(
                  self.params.get("out_emb_size", self._tgt_emb_size),
                  self._tgt_vocab_size,
                  dropout=self.params["out_dropout_keep_prob"],
                  var_scope_name="linear_mapping_to_vocabspace",
                  mode=self.mode,
                  normalization_type=normalization_type))
        else:
          # if embedding is shared,
          # the shared embedding is used as the final linear projection to vocab space
          self.layers.append(None)

      if targets is None:
        return self.predict(encoder_outputs, encoder_outputs_b,
                            inputs_attention_bias)
      else:
        logits = self.decode_pass(targets, encoder_outputs, encoder_outputs_b,
                                  inputs_attention_bias)
      return {
          "logits": logits,
          "outputs": [tf.argmax(logits, axis=-1)],
          "final_state": None,
          "final_sequence_lengths": None
      }