Beispiel #1
0
    def _decode(self, batch_size, helper, z, max_length=None, x_input=None):
        initial_state = initial_cell_state_from_embedding(
            self._dec_cell, z, batch_size, name='decoder/z_to_initial_state')

        if (isinstance(helper, seq2seq.TrainingHelper)
                and self._cudnn_dec_lstm):
            rnn_output, _ = self._cudnn_dec_lstm(
                tf.transpose(x_input, [1, 0, 2]),
                initial_state=_cudnn_lstm_state(initial_state),
                training=self._is_training)
            with tf.variable_scope('decoder'):
                rnn_output = self._output_layer(rnn_output)
            final_output = seq2seq.BasicDecoderOutput(rnn_output=tf.transpose(
                rnn_output, [1, 0, 2]),
                                                      sample_id=None)
        else:
            decoder = seq2seq.BasicDecoder(self._dec_cell,
                                           helper,
                                           initial_state=initial_state,
                                           output_layer=self._output_layer)
            final_output, _, _ = seq2seq.dynamic_decode(
                decoder,
                maximum_iterations=max_length,
                swap_memory=True,
                scope='decoder')
        return final_output
    def step(self, time, inputs, state, name=None):
        cell_outputs, cell_state = self.cell(
            inputs, state)  #cell_outputs [batch_size,num_units]

        #attention
        attn1 = self.Wh(self.encoder_outputs) + tf.expand_dims(
            tf.matmul(cell_outputs, self.Ws),
            1)  #[batch_size,enc_seq,num_units]
        attn2 = tf.squeeze(self.v(tf.tanh(attn1)),
                           axis=[2])  #[batch_size,enc_seq]
        encoded_mask = (tf.sequence_mask(self.encoder_sequence_length,
                                         dtype=tf.float32,
                                         name='encoded_mask') - 1) * 1e6
        attention_weight = tf.nn.softmax(attn2 +
                                         encoded_mask)  #[batch_size,enc_seq]
        context = tf.reduce_sum(self.encoder_outputs *
                                tf.expand_dims(attention_weight, 2),
                                1)  #[batch_size,num_units]

        attn3 = self.Wh2(self.decoder_outputs) + tf.expand_dims(
            tf.matmul(cell_outputs, self.Ws2),
            1)  #[batch_size,enc_seq,num_units]
        attn4 = tf.squeeze(self.v2(tf.tanh(attn3)),
                           axis=[2])  #[batch_size,enc_seq]
        decoded_mask = (tf.sequence_mask(self.decoder_sequence_length,
                                         dtype=tf.float32,
                                         name='decoded_mask') - 1) * 1e6
        attention_weight2 = tf.nn.softmax(attn4 +
                                          decoded_mask)  #[batch_size,enc_seq]
        context2 = tf.reduce_sum(self.decoder_outputs *
                                 tf.expand_dims(attention_weight2, 2),
                                 1)  #[batch_size,num_units]

        p_vocab = tf.matmul(
            tf.concat([cell_outputs, context, context2], axis=-1),
            self.V1) + self.b1  # [batch_size,target_vocab_size]
        p_vocab = tf.nn.softmax(p_vocab)
        p_vocab_to_mix = tf.concat([
            p_vocab,
            tf.zeros(shape=[
                self.batch_size, self.vocab_size - self.target_vocab_size
            ])
        ], 1)
        p_mix = p_vocab_to_mix
        p_mix = tf.maximum(p_mix, tf.zeros_like(p_mix, tf.float32) + 1e-12)

        sample_ids = self.helper.sample(time=time, outputs=p_mix)
        (finished, next_inputs,
         next_state) = self.helper.next_inputs(time=time,
                                               outputs=p_mix,
                                               state=cell_state,
                                               sample_ids=sample_ids)

        outputs = seq2seq.BasicDecoderOutput(p_mix, sample_ids)
        return (outputs, next_state, next_inputs, finished)
Beispiel #3
0
    def _decode(self, z, helper, max_length=None, x_input=None):
        """Decodes the given batch of latent vectors vectors, which may be 0-length.

    Args:
      z: Batch of latent vectors, sized `[batch_size, z_size]`, where `z_size`
        may be 0 for unconditioned decoding.
      helper: A seq2seq.Helper to use. If a TrainingHelper is passed and a
        CudnnLSTM has previously been defined, it will be used instead.
      max_length: (Optinal) The maximum iterations to decode.
      x_input: (Optional) The inputs to the decoder for teacher forcing.
        Required if CudnnLSTM is to be used.

    Returns:
      final_output: The final seq2seq.BasicDecoderOutput.
      final_state: The final states of the decoder, or None if using Cudnn.
    """
        initial_state = initial_cell_state_from_embedding(
            self._dec_cell, z, name='decoder/z_to_initial_state')

        # CudnnLSTM does not support sampling so it can only replace TrainingHelper.
        if self._cudnn_dec_lstm and type(helper) is seq2seq.TrainingHelper:  # pylint:disable=unidiomatic-typecheck
            rnn_output, _ = self._cudnn_dec_lstm(
                tf.transpose(x_input, [1, 0, 2]),
                initial_state=_cudnn_lstm_state(initial_state),
                training=self._is_training)
            with tf.variable_scope('decoder'):
                rnn_output = self._output_layer(rnn_output)
            final_output = seq2seq.BasicDecoderOutput(rnn_output=tf.transpose(
                rnn_output, [1, 0, 2]),
                                                      sample_id=None)
            # TODO(adarob): Return a final state for fixed-length outputs.
            final_state = None
        else:
            if self._cudnn_dec_lstm:
                tf.logging.warning(
                    'CudnnLSTM does not support sampling. Using `dynamic_decode` '
                    'instead.')
            decoder = seq2seq.BasicDecoder(self._dec_cell,
                                           helper,
                                           initial_state=initial_state,
                                           output_layer=self._output_layer)
            final_output, final_state, _ = seq2seq.dynamic_decode(
                decoder,
                maximum_iterations=max_length,
                swap_memory=True,
                scope='decoder')
        return final_output, final_state
    def step(self, time, inputs, state, name=None):
        cell_outputs, cell_state = self.cell(
            inputs, state)  #cell_outputs [batch_size,num_units]

        #attention
        attn1 = self.Wh(self.encoder_outputs) + tf.expand_dims(
            tf.matmul(cell_outputs, self.Ws),
            1)  #[batch_size,enc_seq,num_units]
        attn2 = tf.squeeze(self.v(tf.tanh(attn1)),
                           axis=[2])  #[batch_size,enc_seq]
        encoded_mask = (tf.sequence_mask(self.encoder_sequence_length,
                                         dtype=tf.float32,
                                         name='encoded_mask') - 1) * 1e6
        attention_weight = tf.nn.softmax(attn2 +
                                         encoded_mask)  #[batch_size,enc_seq]
        context = tf.reduce_sum(self.encoder_outputs *
                                tf.expand_dims(attention_weight, 2),
                                1)  #[batch_size,num_units]

        #add p_copy to p_mix
        p_copy = attention_weight  # [batch_size,enc_seq]
        expand_p_copy = tf.expand_dims(p_copy, 2)  #[batch_size,enc_seq,1]
        encoder_inputs_mask = tf.one_hot(
            self.encoder_inputs_ids,
            self.vocab_size)  #[batch_size,enc_seq,vocab_size]
        p_copy_to_mix = tf.reduce_sum(encoder_inputs_mask * expand_p_copy,
                                      1)  #[batch_size,vocab_size]

        p_mix = p_copy_to_mix
        p_mix = tf.maximum(p_mix, tf.zeros_like(p_mix, tf.float32) + 1e-12)

        sample_ids = self.helper.sample(time=time, outputs=p_mix)
        (finished, next_inputs,
         next_state) = self.helper.next_inputs(time=time,
                                               outputs=p_mix,
                                               state=cell_state,
                                               sample_ids=sample_ids)

        outputs = seq2seq.BasicDecoderOutput(
            tf.concat([p_mix, cell_outputs], -1), sample_ids)
        return (outputs, next_state, next_inputs, finished)
 def output_dtype(self):
     return seq2seq.BasicDecoderOutput(rnn_output=tf.float32,
                                       sample_id=tf.int32)
 def output_size(self):
     return seq2seq.BasicDecoderOutput(rnn_output=self.vocab_size,
                                       sample_id=tf.TensorShape([]))