def __init__(self, params, is_train, mode=None):
        self.is_train = is_train
        self.params = params

        if mode is not None:
            self.mode = mode
        elif self.is_train:
            self.mode = ModeKeys.TRAIN
        else:
            self.mode = ModeKeys.PREDICT

        if params.shared_embedding_softmax_weights:
            print("sharing embedding!!!")
            self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                params.vocab_size, params.hidden_size)
            self.encoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_softmax_layer = self.embedding_softmax_layer
        else:
            print("not sharing embedding!!!")
            self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.source_vocab_size, params.hidden_size, "source_embedding")
            self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, "target_embedding")
            self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, 'soft_max')
        # done
        self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode)
        self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode)
        self._initializer = tf.variance_scaling_initializer(
            self.params.initializer_gain, mode="fan_avg", distribution="uniform")
Example #2
0
    def __init__(self, params, train):
        """Initialize layers to build Transformer model.

        Args:
          params: hyperparameter object defining layer sizes, dropout values, etc.
          train: boolean indicating whether the model is in training mode. Used to
            determine if dropout layers should be added.
        """
        self.train = train
        self.params = params

        self.source_embedding_layer = embedding_layer.EmbeddingSharedWeights(
            params.vocab_size_src, params.hidden_size)
        self.target_embedding_layer = embedding_layer.EmbeddingSharedWeights(
            params.vocab_size_tar, params.hidden_size)
        self.encoder_stack = EncoderStack(params, train)
        self.decoder_stack = DecoderStack(params, train)
    def __init__(self, params, is_train, mode=None, scope=None):
        """Initialize layers to build Transformer model.

        Args:
          params: hyperparameter object defining layer sizes, dropout values, etc.
          is_train: boolean indicating whether the model is in training mode. Used to
            determine if dropout layers should be added.
        """
        self.dropout_rate = tf.placeholder_with_default(0.0,
                                                        shape=[],
                                                        name="dropout_rate")

        self.is_train = is_train
        self.params = params
        self.name_scope = scope

        # reset dropout rate using placeholder,
        # when inference, the dropout_rate is 0.0, when training is 0.1
        self.params.layer_postprocess_dropout = self.dropout_rate
        self.params.attention_dropout = self.dropout_rate
        self.params.relu_dropout = self.dropout_rate

        if mode is not None:
            self.mode = mode
        elif self.is_train:
            self.mode = ModeKeys.TRAIN
        else:
            self.mode = ModeKeys.PREDICT

        self.initializer = tf.variance_scaling_initializer(
            self.params.initializer_gain,
            mode="fan_avg",
            distribution="uniform")
        # done
        self.encoder_stack = EncoderStack(params, is_train, self.mode)
        self.decoder_stack = DecoderStack(params, is_train, self.mode)

        with tf.variable_scope(self.name_scope):
            if params.shared_embedding_softmax_weights:
                self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                    params.vocab_size, params.hidden_size)
                self.encoder_embedding_layer = self.embedding_softmax_layer
                self.decoder_embedding_layer = self.embedding_softmax_layer
                self.decoder_softmax_layer = self.embedding_softmax_layer
            else:
                self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                    params.source_vocab_size, params.hidden_size,
                    "source_embedding")
                self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                    params.target_vocab_size, params.hidden_size,
                    "target_embedding")
                self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                    params.target_vocab_size, params.hidden_size, 'sot_max')
Example #4
0
    def __init__(self, params, train):
        """Initialize layers to build Transformer model.

    Args:
      params: hyperparameter object defining layer sizes, dropout values, etc.
      train: boolean indicating whether the model is in training mode. Used to
        determine if dropout layers should be added.
    """
        self.train = train
        self.params = params

        self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
            params["vocab_size"],
            params["hidden_size"],
            method="matmul" if params["tpu"] else "gather")
        self.embedding_en_softmax_layer = embedding_layer.EmbeddingSharedWeights(
            params['vocab_size_en'],
            params["hidden_size"],
            method="matmul" if params["tpu"] else "gather")

        self.encoder_stack = EncoderStack(params, train)
        self.decoder_stack = DecoderStack(params, train)
Example #5
0
    def __init__(self, params, train, **kwargs):
        super(Transformer, self).__init__(**kwargs)
        self.train = train
        self.param = params

        with self.name_scope():
            self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                params.vocab_size, params.hidden_size)
            self.encoder_stack = EncoderStack(params, train)
            self.decoder_stack = DecoderStack(params, train)
            self.dropout_input = nn.Dropout(
                1 - self.param.layer_postprocess_dropout)
            self.dropout_output = nn.Dropout(
                1 - self.param.layer_postprocess_dropout)
 def init_embed(self, name_scope):
     with tf.variable_scope(name_scope, initializer=self._initializer, reuse=tf.AUTO_REUSE):
         if self.params.shared_embedding_softmax_weights:
             print("sharing embedding!!!")
             self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                 self.params.vocab_size, self.params.hidden_size)
             self.encoder_embedding_layer = self.embedding_softmax_layer
             self.decoder_embedding_layer = self.embedding_softmax_layer
             self.decoder_softmax_layer = self.embedding_softmax_layer
         else:
             print("not sharing embedding!!!")
             self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                 self.params.source_vocab_size, self.params.hidden_size, "source_embedding")
             self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                 self.params.target_vocab_size, self.params.hidden_size, "target_embedding")
             self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                 self.params.target_vocab_size, self.params.hidden_size, 'soft_max')
Example #7
0
  def __init__(self, params, train):
    """Initialize layers to build Transformer model.

    Args:
      params: hyperparameter object defining layer sizes, dropout values, etc.
      train: boolean indicating whether the model is in training mode. Used to
        determine if dropout layers should be added.
    """
    self.train = train
    self.params = params
    # SSY 1  transformer/model/embedding_layer.py transform each input word into emb vector 
    # SSY vocab_size 33708 and hidden_size 512 come from transformer/model/model_params.py
    # input to this emb layer is of [batch size , seq length]
    # output is [batch size , seq length , emb dim]
    self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
        params.vocab_size, params.hidden_size)
    # SSY 2 see below only matmul and Dense
    self.encoder_stack = EncoderStack(params, train)
    # SSY 3  see below
    self.decoder_stack = DecoderStack(params, train)
    def __init__(self, params, is_train, mode=None):
        self.is_train = is_train
        self.params = params

        if mode is not None:
            self.mode = mode
        elif self.is_train:
            self.mode = ModeKeys.TRAIN
        else:
            self.mode = ModeKeys.PREDICT
        
        #with tf.device('/cpu:0'):
        #   self.dropout_pl = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_pl")
        #   self.params.layer_postprocess_dropout = self.dropout_pl
        #   self.params.attention_dropout = self.dropout_pl
        #   self.relu_dropout = self.dropout_pl

        if params.shared_embedding_softmax_weights:
            print("sharing embedding!!!")
            self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
                params.vocab_size, params.hidden_size)
            self.encoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_embedding_layer = self.embedding_softmax_layer
            self.decoder_softmax_layer = self.embedding_softmax_layer
        else:
            print("not sharing embedding!!!")
            self.encoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.source_vocab_size, params.hidden_size, "source_embedding")
            self.decoder_embedding_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, "target_embedding")
            self.decoder_softmax_layer = embedding_layer.EmbeddingWeights(
                params.target_vocab_size, params.hidden_size, 'soft_max')
        # done
        self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode)
        self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode)
        self._initializer = tf.variance_scaling_initializer(
            self.params.initializer_gain, mode="fan_avg", distribution="uniform")