def __init__(self, params, name=None): super(Transformer, self).__init__(name=name) self.params = params self.encoder_stack = EncoderStack(params) self.decoder_stack = DecoderStack(params) self.input_embedding_layer = embedding_layer.EmbeddingSharedWeights( params['input_vocab_size'], params['hidden_size']) self.target_embedding_layer = embedding_layer.EmbeddingSharedWeights( params['target_vocab_size'], params['hidden_size'])
def __init__(self, params, name=None): super().__init__(name=name) self.params = params self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params["vocab_size"], params["hidden_size"]) self.encoder_stack = EncoderStack(params) self.decoder_stack = DecoderStack(params)
def Embedding(self, x): # args: x shape: [ batch_size, length] # return: [batch_size, length, hidden_size] hparams = self.hparams if hparams['embedding_model'] == 'transformer': self.embedding_layer = embedding_layer.EmbeddingSharedWeights( hparams["vocab_size"], hparams["hidden_size"]) embedded_inputs = self.embedding_layer(x) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, hparams["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if self.hparams['train']: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.hparams["layer_postprocess_dropout"]) self.inputs_padding = model_utils.get_padding(x) self.attention_bias = model_utils.get_padding_bias(x) return encoder_inputs
def __init__(self, params, train): """Initialize layers to build Transformer model. params: 初始化在 model_prarms中的对象 train: 是bool型变量,用于决定是否使用dropout """ super(Transformer, self).__init__() self.train = train self.params = params # encoder embedding self.embedding_layer_encoder = embedding_layer.EmbeddingSharedWeights( params["vocab_size_input"], params["hidden_size"], name="enc-embed") # decoder embedding self.embedding_softmax_layer_decoder = embedding_layer.EmbeddingSharedWeights( params["vocab_size_output"], params["hidden_size"], name="dec-embed") # self.encoder_stack = EncoderStack(params, train, name="Enc-stack") self.decoder_stack = DecoderStack(params, train, name="Dec-stack")
def __init__(self, params, name=None): """Initialize layers to build Transformer model. Args: params: hyperparameter object defining layer sizes, dropout values, etc. name: name of the model. """ super(Transformer, self).__init__(name=name) self.params = params self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params["vocab_size"], params["hidden_size"]) self.encoder_stack = EncoderStack(params) self.decoder_stack = DecoderStack(params)
def __init__(self, params, train): """Initialize layers to build Transformer model. Args: params: hyperparameter object defining layer sizes, dropout values, etc. train: boolean indicating whether the model is in training mode. Used to determine if dropout layers should be added. """ self.train = train self.params = params self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) self.encoder_stack = EncoderStack(params, train) self.decoder_stack = DecoderStack(params, train)
def test_embedding_layer(self): vocab_size = 50 batch_size = 32 hidden_size = 256 embedding_size = 64 length = 2 layer = embedding_layer.EmbeddingSharedWeights(vocab_size, embedding_size, hidden_size, 0.02) inputs = tf.ones([batch_size, length], dtype="int32") y = layer(inputs) self.assertEqual(y.shape, (batch_size, length, hidden_size,)) x = tf.ones([1, length, embedding_size]) output = layer.linear(x) self.assertEqual(output.shape, (1, length, vocab_size,))
def test_embedding_shared_weights(self): vocab_size = 50 hidden_size = 64 length = 2 layer = embedding_layer.EmbeddingSharedWeights(vocab_size, hidden_size) self.assertDictEqual(layer.get_config(), { "vocab_size": 50, "hidden_size": 64, }) idx = tf.ones([1, length], dtype="int32") y = layer(idx) self.assertEqual(y.shape, ( 1, length, hidden_size, )) x = tf.ones([1, length, hidden_size]) output = layer(x, "linear") self.assertEqual(output.shape, ( 1, length, vocab_size, ))