예제 #1
0
    def __init__(self, params, is_train, mode=None):
        self.is_train = is_train
        self.params = params

        if mode is not None:
            self.mode = mode
        elif self.is_train:
            self.mode = ModeKeys.TRAIN
        else:
            self.mode = ModeKeys.PREDICT

        if params.shared_embedding_softmax_weights:
            print("sharing embedding!!!")
            self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                params.vocab_size, params.hidden_size)
            self.encoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_softmax_layer = self.embedding_softmax_layer
        else:
            print("not sharing embedding!!!")
            self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.source_vocab_size, params.hidden_size, "source_embedding")
            self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, "target_embedding")
            self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, 'soft_max')
        # done
        self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode)
        self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode)
        self._initializer = tf.variance_scaling_initializer(
            self.params.initializer_gain, mode="fan_avg", distribution="uniform")
예제 #2
0
    def __init__(self, params, is_train, mode=None, scope=None):
        """Initialize layers to build Transformer model.

        Args:
          params: hyperparameter object defining layer sizes, dropout values, etc.
          is_train: boolean indicating whether the model is in training mode. Used to
            determine if dropout layers should be added.
        """
        self.dropout_rate = tf.placeholder_with_default(0.0,
                                                        shape=[],
                                                        name="dropout_rate")

        self.is_train = is_train
        self.params = params
        self.name_scope = scope

        # reset dropout rate using placeholder,
        # when inference, the dropout_rate is 0.0, when training is 0.1
        self.params.layer_postprocess_dropout = self.dropout_rate
        self.params.attention_dropout = self.dropout_rate
        self.params.relu_dropout = self.dropout_rate

        if mode is not None:
            self.mode = mode
        elif self.is_train:
            self.mode = ModeKeys.TRAIN
        else:
            self.mode = ModeKeys.PREDICT

        self.initializer = tf.variance_scaling_initializer(
            self.params.initializer_gain,
            mode="fan_avg",
            distribution="uniform")
        # done
        self.encoder_stack = EncoderStack(params, is_train, self.mode)
        self.decoder_stack = DecoderStack(params, is_train, self.mode)

        with tf.variable_scope(self.name_scope):
            if params.shared_embedding_softmax_weights:
                self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                    params.vocab_size, params.hidden_size)
                self.encoder_embedding_layer = self.embedding_softmax_layer
                self.decoder_embedding_layer = self.embedding_softmax_layer
                self.decoder_softmax_layer = self.embedding_softmax_layer
            else:
                self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                    params.source_vocab_size, params.hidden_size,
                    "source_embedding")
                self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                    params.target_vocab_size, params.hidden_size,
                    "target_embedding")
                self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                    params.target_vocab_size, params.hidden_size, 'sot_max')
예제 #3
0
 def init_embed(self, name_scope):
     with tf.variable_scope(name_scope, initializer=self._initializer, reuse=tf.AUTO_REUSE):
         if self.params.shared_embedding_softmax_weights:
             print("sharing embedding!!!")
             self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                 self.params.vocab_size, self.params.hidden_size)
             self.encoder_embedding_layer = self.embedding_softmax_layer
             self.decoder_embedding_layer = self.embedding_softmax_layer
             self.decoder_softmax_layer = self.embedding_softmax_layer
         else:
             print("not sharing embedding!!!")
             self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                 self.params.source_vocab_size, self.params.hidden_size, "source_embedding")
             self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                 self.params.target_vocab_size, self.params.hidden_size, "target_embedding")
             self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                 self.params.target_vocab_size, self.params.hidden_size, 'soft_max')
예제 #4
0
    def __init__(self, params, is_train, mode=None):
        self.is_train = is_train
        self.params = params

        if mode is not None:
            self.mode = mode
        elif self.is_train:
            self.mode = ModeKeys.TRAIN
        else:
            self.mode = ModeKeys.PREDICT
        
        #with tf.device('/cpu:0'):
        #   self.dropout_pl = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_pl")
        #   self.params.layer_postprocess_dropout = self.dropout_pl
        #   self.params.attention_dropout = self.dropout_pl
        #   self.relu_dropout = self.dropout_pl

        if params.shared_embedding_softmax_weights:
            print("sharing embedding!!!")
            self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                params.vocab_size, params.hidden_size)
            self.encoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_softmax_layer = self.embedding_softmax_layer
        else:
            print("not sharing embedding!!!")
            self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.source_vocab_size, params.hidden_size, "source_embedding")
            self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, "target_embedding")
            self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, 'soft_max')
        # done
        self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode)
        self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode)
        self._initializer = tf.variance_scaling_initializer(
            self.params.initializer_gain, mode="fan_avg", distribution="uniform")
예제 #5
0
        variance = tf.reduce_mean(tf.square(x - mean),
                                  axis=[-1],
                                  keepdims=True)
        norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
        return norm_x * self.scale + self.bias


if __name__ == "__main__":
    import os
    tf.enable_eager_execution()
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    params = model_params.TransformerBaseParams()
    x_inputs = tf.constant([[1, 2, 3, 0, 0], [3, 4, 5, 6, 8]], dtype=tf.int32)

    Enc_Embedding = embedding_layer.EmbeddingWeights(params.source_vocab_size,
                                                     params.hidden_size,
                                                     "source_embedding")
    embedded_inputs = Enc_Embedding(
        x_inputs, not ModeKeys.is_predict_one(ModeKeys.TRAIN))
    print(embedded_inputs.shape)
    attention_bias = model_utils.get_padding_bias(x_inputs)
    print(attention_bias.shape)
    encoder_stack = EncoderStack(params, is_train=True, mode=ModeKeys.TRAIN)
    enc_out = encoder_stack(embedded_inputs, attention_bias, None)
    print(enc_out.shape)
    decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
        10)
    self_attention_bias = decoder_self_attention_bias[:, :, 0:1, :1]
    print(self_attention_bias)
    attention_bias = model_utils.get_padding_bias(x_inputs)
    cache = {