def _decode(self, batch_size, helper, z, max_length=None, x_input=None): initial_state = initial_cell_state_from_embedding( self._dec_cell, z, batch_size, name='decoder/z_to_initial_state') if (isinstance(helper, seq2seq.TrainingHelper) and self._cudnn_dec_lstm): rnn_output, _ = self._cudnn_dec_lstm( tf.transpose(x_input, [1, 0, 2]), initial_state=_cudnn_lstm_state(initial_state), training=self._is_training) with tf.variable_scope('decoder'): rnn_output = self._output_layer(rnn_output) final_output = seq2seq.BasicDecoderOutput(rnn_output=tf.transpose( rnn_output, [1, 0, 2]), sample_id=None) else: decoder = seq2seq.BasicDecoder(self._dec_cell, helper, initial_state=initial_state, output_layer=self._output_layer) final_output, _, _ = seq2seq.dynamic_decode( decoder, maximum_iterations=max_length, swap_memory=True, scope='decoder') return final_output
def step(self, time, inputs, state, name=None): cell_outputs, cell_state = self.cell( inputs, state) #cell_outputs [batch_size,num_units] #attention attn1 = self.Wh(self.encoder_outputs) + tf.expand_dims( tf.matmul(cell_outputs, self.Ws), 1) #[batch_size,enc_seq,num_units] attn2 = tf.squeeze(self.v(tf.tanh(attn1)), axis=[2]) #[batch_size,enc_seq] encoded_mask = (tf.sequence_mask(self.encoder_sequence_length, dtype=tf.float32, name='encoded_mask') - 1) * 1e6 attention_weight = tf.nn.softmax(attn2 + encoded_mask) #[batch_size,enc_seq] context = tf.reduce_sum(self.encoder_outputs * tf.expand_dims(attention_weight, 2), 1) #[batch_size,num_units] attn3 = self.Wh2(self.decoder_outputs) + tf.expand_dims( tf.matmul(cell_outputs, self.Ws2), 1) #[batch_size,enc_seq,num_units] attn4 = tf.squeeze(self.v2(tf.tanh(attn3)), axis=[2]) #[batch_size,enc_seq] decoded_mask = (tf.sequence_mask(self.decoder_sequence_length, dtype=tf.float32, name='decoded_mask') - 1) * 1e6 attention_weight2 = tf.nn.softmax(attn4 + decoded_mask) #[batch_size,enc_seq] context2 = tf.reduce_sum(self.decoder_outputs * tf.expand_dims(attention_weight2, 2), 1) #[batch_size,num_units] p_vocab = tf.matmul( tf.concat([cell_outputs, context, context2], axis=-1), self.V1) + self.b1 # [batch_size,target_vocab_size] p_vocab = tf.nn.softmax(p_vocab) p_vocab_to_mix = tf.concat([ p_vocab, tf.zeros(shape=[ self.batch_size, self.vocab_size - self.target_vocab_size ]) ], 1) p_mix = p_vocab_to_mix p_mix = tf.maximum(p_mix, tf.zeros_like(p_mix, tf.float32) + 1e-12) sample_ids = self.helper.sample(time=time, outputs=p_mix) (finished, next_inputs, next_state) = self.helper.next_inputs(time=time, outputs=p_mix, state=cell_state, sample_ids=sample_ids) outputs = seq2seq.BasicDecoderOutput(p_mix, sample_ids) return (outputs, next_state, next_inputs, finished)
def _decode(self, z, helper, max_length=None, x_input=None): """Decodes the given batch of latent vectors vectors, which may be 0-length. Args: z: Batch of latent vectors, sized `[batch_size, z_size]`, where `z_size` may be 0 for unconditioned decoding. helper: A seq2seq.Helper to use. If a TrainingHelper is passed and a CudnnLSTM has previously been defined, it will be used instead. max_length: (Optinal) The maximum iterations to decode. x_input: (Optional) The inputs to the decoder for teacher forcing. Required if CudnnLSTM is to be used. Returns: final_output: The final seq2seq.BasicDecoderOutput. final_state: The final states of the decoder, or None if using Cudnn. """ initial_state = initial_cell_state_from_embedding( self._dec_cell, z, name='decoder/z_to_initial_state') # CudnnLSTM does not support sampling so it can only replace TrainingHelper. if self._cudnn_dec_lstm and type(helper) is seq2seq.TrainingHelper: # pylint:disable=unidiomatic-typecheck rnn_output, _ = self._cudnn_dec_lstm( tf.transpose(x_input, [1, 0, 2]), initial_state=_cudnn_lstm_state(initial_state), training=self._is_training) with tf.variable_scope('decoder'): rnn_output = self._output_layer(rnn_output) final_output = seq2seq.BasicDecoderOutput(rnn_output=tf.transpose( rnn_output, [1, 0, 2]), sample_id=None) # TODO(adarob): Return a final state for fixed-length outputs. final_state = None else: if self._cudnn_dec_lstm: tf.logging.warning( 'CudnnLSTM does not support sampling. Using `dynamic_decode` ' 'instead.') decoder = seq2seq.BasicDecoder(self._dec_cell, helper, initial_state=initial_state, output_layer=self._output_layer) final_output, final_state, _ = seq2seq.dynamic_decode( decoder, maximum_iterations=max_length, swap_memory=True, scope='decoder') return final_output, final_state
def step(self, time, inputs, state, name=None): cell_outputs, cell_state = self.cell( inputs, state) #cell_outputs [batch_size,num_units] #attention attn1 = self.Wh(self.encoder_outputs) + tf.expand_dims( tf.matmul(cell_outputs, self.Ws), 1) #[batch_size,enc_seq,num_units] attn2 = tf.squeeze(self.v(tf.tanh(attn1)), axis=[2]) #[batch_size,enc_seq] encoded_mask = (tf.sequence_mask(self.encoder_sequence_length, dtype=tf.float32, name='encoded_mask') - 1) * 1e6 attention_weight = tf.nn.softmax(attn2 + encoded_mask) #[batch_size,enc_seq] context = tf.reduce_sum(self.encoder_outputs * tf.expand_dims(attention_weight, 2), 1) #[batch_size,num_units] #add p_copy to p_mix p_copy = attention_weight # [batch_size,enc_seq] expand_p_copy = tf.expand_dims(p_copy, 2) #[batch_size,enc_seq,1] encoder_inputs_mask = tf.one_hot( self.encoder_inputs_ids, self.vocab_size) #[batch_size,enc_seq,vocab_size] p_copy_to_mix = tf.reduce_sum(encoder_inputs_mask * expand_p_copy, 1) #[batch_size,vocab_size] p_mix = p_copy_to_mix p_mix = tf.maximum(p_mix, tf.zeros_like(p_mix, tf.float32) + 1e-12) sample_ids = self.helper.sample(time=time, outputs=p_mix) (finished, next_inputs, next_state) = self.helper.next_inputs(time=time, outputs=p_mix, state=cell_state, sample_ids=sample_ids) outputs = seq2seq.BasicDecoderOutput( tf.concat([p_mix, cell_outputs], -1), sample_ids) return (outputs, next_state, next_inputs, finished)
def output_dtype(self): return seq2seq.BasicDecoderOutput(rnn_output=tf.float32, sample_id=tf.int32)
def output_size(self): return seq2seq.BasicDecoderOutput(rnn_output=self.vocab_size, sample_id=tf.TensorShape([]))