def _decode(self, input_dict): if 'target_tensors' in input_dict: targets = input_dict['target_tensors'][0] else: targets = None encoder_outputs = input_dict['encoder_output']['outputs'] inputs_attention_bias = ( input_dict['encoder_output']['inputs_attention_bias']) self.embedding_softmax_layer = ( input_dict['encoder_output']['embedding_softmax_layer']) with tf.name_scope("decode"): # prepare decoder layers if len(self.layers) == 0: for _ in range(self.params["num_hidden_layers"]): self_attention_layer = attention_layer.SelfAttention( self.params["hidden_size"], self.params["num_heads"], self.params["attention_dropout"], self.mode == "train", ) enc_dec_attention_layer = attention_layer.Attention( self.params["hidden_size"], self.params["num_heads"], self.params["attention_dropout"], self.mode == "train", ) feed_forward_network = ffn_layer.FeedFowardNetwork( self.params["hidden_size"], self.params["filter_size"], self.params["relu_dropout"], self.mode == "train", ) self.layers.append([ PrePostProcessingWrapper(self_attention_layer, self.params, self.mode == "train"), PrePostProcessingWrapper(enc_dec_attention_layer, self.params, self.mode == "train"), PrePostProcessingWrapper(feed_forward_network, self.params, self.mode == "train") ]) self.output_normalization = LayerNormalization( self.params["hidden_size"]) if targets is None: return self.predict(encoder_outputs, inputs_attention_bias) else: logits = self.decode_pass(targets, encoder_outputs, inputs_attention_bias) return { "logits": logits, "outputs": [tf.argmax(logits, axis=-1)], "final_state": None, "final_sequence_lengths": None }
def _encode(self, input_dict): if len(self.layers) == 0: # prepare encoder graph self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( self.params["src_vocab_size"], self.params["hidden_size"], pad_vocab_to_eight=self.params.get('pad_embeddings_2_eight', False)) for _ in range(self.params['encoder_layers']): # Create sublayers for each layer. self_attention_layer = attention_layer.SelfAttention( self.params["hidden_size"], self.params["num_heads"], self.params["attention_dropout"], self.mode == "train") feed_forward_network = ffn_layer.FeedFowardNetwork( self.params["hidden_size"], self.params["filter_size"], self.params["relu_dropout"], self.mode == "train") self.layers.append([ PrePostProcessingWrapper(self_attention_layer, self.params, self.mode == "train"), PrePostProcessingWrapper(feed_forward_network, self.params, self.mode == "train")]) # Create final layer normalization layer. self.output_normalization = LayerNormalization(self.params["hidden_size"]) # actual encoder part with tf.name_scope("encode"): #inputs = input_dict['src_sequence'] inputs = input_dict['source_tensors'][0] # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = utils.get_padding(inputs) inputs_attention_bias = utils.get_padding_bias(inputs) #inputs_attention_bias = tf.cast(utils.get_padding_bias(inputs), # dtype=self.params['dtype']) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + tf.cast(x=pos_encoding, dtype=embedded_inputs.dtype) if self.mode == "train": encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) encoded = self._call(encoder_inputs, inputs_attention_bias, inputs_padding) return {'outputs': encoded, 'inputs_attention_bias': inputs_attention_bias, 'state': None, 'src_lengths': input_dict['source_tensors'][1], 'embedding_softmax_layer': self.embedding_softmax_layer, 'encoder_input': inputs}
def _encode(self, input_dict): training = (self.mode == "train") if len(self.layers) == 0: # prepare encoder graph self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( self.params["src_vocab_size"], self.params["hidden_size"], pad_vocab_to_eight=self.params.get('pad_embeddings_2_eight', False), ) for _ in range(self.params['encoder_layers']): # Create sublayers for each layer. self_attention_layer = attention_layer.SelfAttention( hidden_size=self.params["hidden_size"], num_heads=self.params["num_heads"], attention_dropout=self.params["attention_dropout"], train=training, regularizer=self.regularizer, batch_size=self.batch_size, num_feature=self.num_features) feed_forward_network = ffn_layer.FeedFowardNetwork( hidden_size=self.params["hidden_size"], filter_size=self.params["filter_size"], relu_dropout=self.params["relu_dropout"], train=training, #num_features=self.num_features, #batch_size=self.batch_size, regularizer=self.regularizer) self.layers.append([ PrePostProcessingWrapper(self_attention_layer, self.params, training), PrePostProcessingWrapper(feed_forward_network, self.params, training) ]) # final normalization layer. print("Encoder:", self.norm_params["type"], self.mode) if self.norm_params["type"] == "batch_norm": self.output_normalization = Transformer_BatchNorm( training=training, params=self.norm_params) else: self.output_normalization = LayerNormalization( hidden_size=self.params["hidden_size"], params=self.norm_params) # actual encoder part with tf.name_scope("encode"): inputs, src_lengths = input_dict['source_tensors'] #inputs = input_dict['source_tensors'][0] # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) if self.params["remove_padding"]: inputs_padding = utils.get_padding(inputs) #inputs_padding = utils.get_padding(inputs,dtype=self._params["dtype"]) else: inputs_padding = None inputs_attention_bias = utils.get_padding_bias(inputs) inputs_attention_bias = tf.transpose(inputs_attention_bias, [0, 1, 3, 2, 4]) # inputs_attention_bias = utils.get_padding_bias(inputs, dtype=self._params["dtype"]) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = utils.get_position_encoding( length, self.params["hidden_size"], ) #encoder_inputs = embedded_inputs + tf.cast(x=pos_encoding, # dtype=embedded_inputs.dtype) pos_encoding = tf.cast(x=pos_encoding, dtype=embedded_inputs.dtype) pos_encoding_exp = pos_encoding[None, :, None, :] encoder_inputs = embedded_inputs + pos_encoding_exp if self.mode == "train": encoder_inputs = tf.nn.dropout( encoder_inputs, keep_prob=1.0 - self.params["layer_postprocess_dropout"], ) encoded = self._call(encoder_inputs, inputs_attention_bias, inputs_padding) return { 'outputs': encoded, 'inputs_attention_bias': inputs_attention_bias, 'state': None, 'src_lengths': src_lengths, #'src_lengths': input_dict['source_tensors'][1], 'embedding_softmax_layer': self.embedding_softmax_layer, 'encoder_input': inputs }