def decode(self, decoder_inputs, encoder_outputs, train, attention_bias): """Generate logits for each value in the target sequence. Args: targets: target values for the output sequence. int tensor with shape [batch_size, target_length] encoder_outputs: continuous representation of input sequence. float tensor with shape [batch_size, input_length, hidden_size] attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float32 tensor with shape [batch_size, target_length, vocab_size] """ with tf.name_scope("decode"): # Prepare inputs to decoder layers by shifting targets, adding positional # encoding and applying dropout. with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] a = model_utils.get_position_encoding( length, self.params["hidden_size"]) print(a.shape, decoder_inputs.shape, encoder_outputs.shape) decoder_inputs += a if self.train: decoder_inputs = tf.nn.dropout( decoder_inputs, 1 - self.params["layer_postprocess_dropout"]) # Run values outputs = self.decoder_stack( decoder_inputs, encoder_outputs, train, decoder_self_attention_bias=None, attention_bias=None) return outputs
def encode(self, inputs, train, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. with tf.name_scope("add_pos_encoding"): length = tf.shape(inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"], max_timescale=500) encoder_inputs = self.input_normalization(inputs) + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, train, attention_bias, inputs_padding=None)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. ### get embedding ---by zsw 2018.12.4 embedded_inputs = self.embedding_softmax_layer(inputs) # [batch_size, length, embedding_size] inputs_padding = model_utils.get_padding(inputs) #ex.: [[0,0,0,1,1], # [0,0,1,1,1] ] ###get position encoding and add with embedding ---by zsw 2018.12.4 with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] #[batch_size, length, embedding_size] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"])# [length, hidden_size] encoder_inputs = embedded_inputs + pos_encoding ### Tricks : embedding layer dropout ---by zsw 2018.12.4 if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, segments, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. segments: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. encoder_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_segment_encoding"): segment_inputs = self.segment_embedding_layer(segments) encoder_inputs += segment_inputs with tf.name_scope("add_pos_encoding"): length = tf.shape(encoder_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs += pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def _get_symbols_to_logits_fn(self, max_decode_length): """Returns a decoding function that calculates logits of the next tokens.""" timing_signal = model_utils.get_position_encoding( max_decode_length + 1, self.params["hidden_size"]) decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( max_decode_length) def symbols_to_logits_fn(ids, i, cache): """Generate logits for next potential IDs. Args: ids: Current decoded sequences. int tensor with shape [batch_size * beam_size, i + 1] i: Loop index cache: dictionary of values storing the encoder output, encoder-decoder attention bias, and previous decoder attention values. Returns: Tuple of (logits with shape [batch_size * beam_size, vocab_size], updated cache values) """ # Set decoder input to the last generated IDs decoder_input = ids[:, -1:] # Preprocess decoder input by getting embeddings and adding timing signal. decoder_input = self.embedding_softmax_layer(decoder_input) decoder_input += timing_signal[i:i + 1] self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] encdec_attention_bias = cache.get("encoder_decoder_attention_bias") encdec_attention_bias_query = cache.get("attention_bias_query") encdec_attention_bias_content = cache.get("attention_bias_content") encoder_outputs = cache.get("encoder_outputs") inputs = cache.get("inputs") # encoder inputs D, C_query, C_content, M = self.decoder_stack( decoder_input, encoder_outputs, self_attention_bias, encdec_attention_bias, encdec_attention_bias_query, encdec_attention_bias_content, cache) logits = self.distribute_layer(D, C_query, C_content, M, encoder_outputs, encdec_attention_bias_query, encdec_attention_bias_content, inputs) logits = tf.squeeze(logits, axis=[1]) return logits, cache return symbols_to_logits_fn
def decode(self, targets, encoder_outputs, attention_bias, attention_bias_query, attention_bias_content, inputs): """Generate logits for each value in the target sequence. Args: targets: target values for the output sequence. int tensor with shape [batch_size, target_length] encoder_outputs: continuous representation of input sequence. float tensor with shape [batch_size, input_length, hidden_size] attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float32 tensor with shape [batch_size, target_length, vocab_size] """ with tf.name_scope("decode"): # Prepare inputs to decoder layers by shifting targets, adding positional # encoding and applying dropout. decoder_inputs = self.embedding_softmax_layer(targets) with tf.name_scope("shift_targets"): # Shift targets to the right, and remove the last element decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] decoder_inputs += model_utils.get_position_encoding( length, self.params["hidden_size"]) if self.train: decoder_inputs = tf.nn.dropout( decoder_inputs, 1 - self.params["layer_postprocess_dropout"]) # Run values decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( length) D, C_query, C_content, M = self.decoder_stack( decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias, attention_bias_query, attention_bias_content) # Output distribution layer logits = self.distribute_layer(D, C_query, C_content, M, encoder_outputs, attention_bias_query, attention_bias_content, inputs) return logits
def _get_symbols_to_logits_fn(self, max_decode_length): """Returns a decoding function that calculates logits of the next tokens.""" timing_signal = model_utils.get_position_encoding( max_decode_length + 1, self.params["hidden_size"]) ###get position embedding decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias( max_decode_length) ####get the decoding mask upper triangular matrix(upper parameter all is -INF) def symbols_to_logits_fn(ids, i, cache): """Generate logits for next potential IDs. Args: ids: Current decoded sequences. int tensor with shape [batch_size * beam_size, i + 1] i: Loop index cache: dictionary of values storing the encoder output, encoder-decoder attention bias, and previous decoder attention values. Returns: Tuple of (logits with shape [batch_size * beam_size, vocab_size], updated cache values) """ # Set decoder input to the last generated IDs decoder_input = ids[:, -1:] # Preprocess decoder input by getting embeddings and adding timing signal. decoder_input = self.embedding_softmax_layer(decoder_input) decoder_input += timing_signal[i:i + 1] ##add position signal to word embedding self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] #######?????? decoder_outputs = self.decoder_stack( decoder_input, cache.get("encoder_outputs"), self_attention_bias, cache.get("encoder_decoder_attention_bias"), cache) logits = self.embedding_softmax_layer.linear(decoder_outputs) logits = tf.squeeze(logits, axis=[1]) return logits, cache return symbols_to_logits_fn