Exemple #1
0
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
                         end_of_sequence_id, max_target_sequence_length,
                         vocab_size, output_layer, batch_size, keep_prob):
    """
    Create a decoding layer for inference
    :param encoder_state: Encoder state
    :param dec_cell: Decoder RNN Cell
    :param dec_embeddings: Decoder embeddings
    :param start_of_sequence_id: GO ID
    :param end_of_sequence_id: EOS Id
    :param max_target_sequence_length: Maximum length of target sequences
    :param vocab_size: Size of decoder/target vocabulary
    :param decoding_scope: TenorFlow Variable Scope for decoding
    :param output_layer: Function to apply the output layer
    :param batch_size: Batch size
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing inference logits and sample_id
    """
    # TODO: Implement Function


    start_tokens = tf.tile(tf.constant([start_of_sequence_id], dtype=tf.int32),multiples=[batch_size],)
    greedy_embeddinghelper = seq2seq.GreedyEmbeddingHelper(dec_embeddings, 
                                                           start_tokens=start_tokens,
                                                           end_token=end_of_sequence_id)
    basic_decoder = seq2seq.BasicDecoder(dec_cell,
                                   helper=greedy_embeddinghelper,
                                   initial_state=encoder_state,
                                   output_layer=output_layer)
    dynamic_decode_output = seq2seq.dynamic_decode(basic_decoder,
                                    impute_finished=True,
                                    maximum_iterations=max_target_sequence_length)[0]

    return dynamic_decode_output
 def get_DecoderHelper(embedding_lookup,
                       seq_lengths,
                       token_dim,
                       gt_tokens=None,
                       unroll_type='teacher_forcing'):
     if unroll_type == 'teacher_forcing':
         if gt_tokens is None:
             raise ValueError('teacher_forcing requires gt_tokens')
         embedding = embedding_lookup(gt_tokens)
         helper = seq2seq.TrainingHelper(embedding, seq_lengths)
     elif unroll_type == 'scheduled_sampling':
         if gt_tokens is None:
             raise ValueError('scheduled_sampling requires gt_tokens')
         embedding = embedding_lookup(gt_tokens)
         # sample_prob 1.0: always sample from ground truth
         # sample_prob 0.0: always sample from prediction
         helper = seq2seq.ScheduledEmbeddingTrainingHelper(
             embedding,
             seq_lengths,
             embedding_lookup,
             1.0 - self.sample_prob,
             seed=None,
             scheduling_seed=None)
     elif unroll_type == 'greedy':
         # during evaluation, we perform greedy unrolling.
         start_token = tf.zeros([self.batch_size],
                                dtype=tf.int32) + token_dim
         end_token = token_dim - 1
         helper = seq2seq.GreedyEmbeddingHelper(embedding_lookup,
                                                start_token, end_token)
     else:
         raise ValueError('Unknown unroll type')
     return helper
Exemple #3
0
def get_helper(encoder_output, input_emb, input_len, embedding, mode, params):
    batch_size = tf.shape(encoder_output.output)[0]

    if mode == tf.estimator.ModeKeys.TRAIN:
        if params['conditional']:
            # conditional train helper with encoder output state as direct input
            # Reshape encoder state as auxiliary input: batch_size * hidden -> batch_size * max_len * hidden
            decoder_length = tf.shape(input_emb)[1]
            state_shape = tf.shape(encoder_output.state)
            encoder_state = tf.tile(
                tf.reshape(encoder_output.state,
                           [state_shape[1], state_shape[0], state_shape[2]]),
                [1, decoder_length, 1])

            input_emb = tf.concat([encoder_state, input_emb], axis=-1)

        helper = seq2seq.TrainingHelper(
            inputs=input_emb,  # batch_size * max_len-1 * emb_size
            sequence_length=input_len - 1,  # exclude last token
            time_major=False,
            name='training_helper')
    else:
        helper = seq2seq.GreedyEmbeddingHelper(
            embedding=embedding_func(embedding),
            start_tokens=tf.fill([batch_size], params['start_token']),
            end_token=params['end_token'])

    return helper
def language_decoder(inputs,
                     embed_seq,
                     seq_len,
                     embedding_lookup,
                     dim,
                     start_tokens,
                     end_token,
                     max_seq_len,
                     unroll_type='teacher_forcing',
                     output_layer=None,
                     is_train=True,
                     scope='language_decoder',
                     reuse=tf.AUTO_REUSE):
    """
    Args:
        seq: sequence of token (usually ground truth sequence)
        embed_seq: pre-embedded sequence of token for teacher forcing
        embedding_lookup: embedding lookup function for greedy unrolling
        start_token: tensor for start token [<s>] * bs
        end_token: integer for end token <e>
    """
    with tf.variable_scope(scope, reuse=reuse) as scope:
        init_c = fc_layer(inputs,
                          dim,
                          use_bias=True,
                          use_bn=False,
                          activation_fn=None,
                          is_training=is_train,
                          scope='Linear_c',
                          reuse=reuse)
        init_h = fc_layer(inputs,
                          dim,
                          use_bias=True,
                          use_bn=False,
                          activation_fn=None,
                          is_training=is_train,
                          scope='Linear_h',
                          reuse=reuse)
        init_state = rnn.LSTMStateTuple(init_c, init_h)
        log.warning(scope.name)
        if unroll_type == 'teacher_forcing':
            helper = seq2seq.TrainingHelper(embed_seq, seq_len)
        elif unroll_type == 'greedy':
            helper = seq2seq.GreedyEmbeddingHelper(embedding_lookup,
                                                   start_tokens, end_token)
        else:
            raise ValueError('Unknown unroll_type')

        cell = rnn.BasicLSTMCell(num_units=dim, state_is_tuple=True)
        decoder = seq2seq.BasicDecoder(cell,
                                       helper,
                                       init_state,
                                       output_layer=output_layer)
        outputs, _, pred_length = seq2seq.dynamic_decode(
            decoder, maximum_iterations=max_seq_len, scope='dynamic_decoder')

        output = outputs.rnn_output
        pred = outputs.sample_id

        return output, pred, pred_length
Exemple #5
0
    def _build_decoder_test_greedy(self):
        r"""
        Builds the greedy test decoder, which feeds the most likely decoded symbol as input for the
        next timestep
        """
        self._helper_greedy = seq2seq.GreedyEmbeddingHelper(
            embedding=self._embedding_matrix,
            start_tokens=tf.tile([self._GO_ID], [self._batch_size]),
            end_token=self._EOS_ID)

        if self._hparams.enable_attention is True:
            cells, initial_state = self._add_attention(decoder_cells=self._decoder_cells, beam_search=False)
        else:
            cells = self._decoder_cells
            initial_state = self._decoder_initial_state

        self._decoder_inference = seq2seq.BasicDecoder(
            cell=cells,
            helper=self._helper_greedy,
            initial_state=initial_state,
            output_layer=self._dense_layer)

        outputs, states, lengths = seq2seq.dynamic_decode(
            self._decoder_inference,
            impute_finished=True,
            swap_memory=False,
            maximum_iterations=self._hparams.max_label_length)

        self.inference_outputs = outputs.rnn_output
        self.inference_predicted_ids = outputs.sample_id

        if self._hparams.write_attention_alignment is True:
            self.attention_summary = self._create_attention_alignments_summary(states, )
Exemple #6
0
    def _response_generator(self):
        with tf.name_scope('response_generator'):
            batch_size, _ = tf.unstack(tf.shape(self.inputs))

            logits_projection = Dense(self._word_embeddings_shape[0],
                                      name='logits_projection')
            decoder_cell, decoder_initial_state = self._decoder_cell()

            if self._decoder_helper_initializer is not None:
                helper = self._decoder_helper_initializer(
                    self._word_embeddings)
            else:
                helper = seq2seq.GreedyEmbeddingHelper(
                    embedding=self._word_embeddings,
                    start_tokens=tf.tile([0], [batch_size]),
                    end_token=1)

            decoder = seq2seq.BasicDecoder(decoder_cell,
                                           helper=helper,
                                           initial_state=decoder_initial_state,
                                           output_layer=logits_projection)

            decoder_outputs, _, _ = seq2seq.dynamic_decode(
                decoder=decoder, impute_finished=True)

            self._decoder_logits = decoder_outputs.rnn_output
            self.decoder_token_ids = tf.argmax(self._decoder_logits,
                                               -1,
                                               output_type=tf.int32)
Exemple #7
0
        def get_DecoderHelper(embedding_lookup, seq_lengths, token_dim,
                              gt_tokens=None, sequence_type='program',
                              unroll_type='teacher_forcing'):
            if unroll_type == 'teacher_forcing':
                if gt_tokens is None:
                    raise ValueError('teacher_forcing requires gt_tokens')
                embedding = embedding_lookup(gt_tokens)
                helper = seq2seq.TrainingHelper(embedding, seq_lengths)
            elif unroll_type == 'scheduled_sampling':
                if gt_tokens is None:
                    raise ValueError('scheduled_sampling requires gt_tokens')
                embedding = embedding_lookup(gt_tokens)
                # sample_prob 1.0: always sample from ground truth
                # sample_prob 0.0: always sample from prediction
                helper = seq2seq.ScheduledEmbeddingTrainingHelper(
                    embedding, seq_lengths, embedding_lookup,
                    1.0 - self.sample_prob, seed=None,
                    scheduling_seed=None)
            elif unroll_type == 'greedy':
                # during evaluation, we perform greedy unrolling.
                start_token = tf.zeros([self.batch_size], dtype=tf.int32) + \
                              token_dim
                if sequence_type == 'program':
                    end_token = self.vocab.token2int['m)']
                elif sequence_type == 'action':
                    end_token = token_dim - 1
                else:
                    # Hack to have no end token, greater than number of perceptions
                    end_token = 11
                helper = seq2seq.GreedyEmbeddingHelper(
                    embedding_lookup, start_token, end_token)
            else:
                raise ValueError('Unknown unroll type')

            return helper
Exemple #8
0
    def _init_decoder(self):
        lstm_decoder = tf.contrib.rnn.DropoutWrapper(
            tf.contrib.rnn.LSTMCell(self.rnn_size),
            output_keep_prob=self.keep_prob)
        attention_mechanism = tf.contrib.seq2seq.LuongAttention(
            self.rnn_size, self.encoder_outputs, name='LuongAttention')
        self.decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
            lstm_decoder,
            attention_mechanism,
            attention_layer_size=self.rnn_size,
            name="AttentionWrapper")
        batch_size = tf.shape(self.encoder_inputs)[0]
        attn_zero = self.decoder_cell.zero_state(batch_size=batch_size,
                                                 dtype=tf.float32)
        init_state = attn_zero.clone(cell_state=self.encoder_final_state)
        with tf.variable_scope(
                "decoder"
        ) as scope:  # Need to understand why we aren't using the dynamic_rnn method here
            output_layer = layers_core.Dense(units=self.effective_vocab_size,
                                             activation=None)

            # Train decoding
            train_helper = seq2seq.TrainingHelper(
                inputs=self.decoder_train_inputs_embedded,
                sequence_length=self.decoder_train_length,
                time_major=False)
            train_decoder = seq2seq.BasicDecoder(cell=self.decoder_cell,
                                                 helper=train_helper,
                                                 initial_state=init_state)
            self.decoder_outputs_train, _, _ = seq2seq.dynamic_decode(
                decoder=train_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_decoder_seq_length,
                scope=scope)
            self.decoder_logits_train = output_layer.apply(
                self.decoder_outputs_train.rnn_output)
            self.decoder_prediction_train = tf.argmax(
                self.decoder_logits_train, 2)

            # Greedy decoding
            scope.reuse_variables()
            greedy_helper = seq2seq.GreedyEmbeddingHelper(
                embedding=self.embedding_matrix,
                start_tokens=self.decoder_start_tokens,
                end_token=self.eos)
            greedy_decoder = seq2seq.BasicDecoder(cell=self.decoder_cell,
                                                  helper=greedy_helper,
                                                  initial_state=init_state,
                                                  output_layer=output_layer)
            self.decoder_outputs_inference, _, _ = seq2seq.dynamic_decode(
                decoder=greedy_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.
                max_decoder_seq_length,  # Figure out a better way of setting this
                scope=scope)
            self.decoder_prediction_inference = tf.argmax(
                self.decoder_outputs_inference.rnn_output, 2)
Exemple #9
0
def greedy_eval_decoder(agenda,
                        embeddings,
                        extended_base_words,
                        oov,
                        start_token_id,
                        stop_token_id,
                        base_sent_hiddens,
                        insert_word_embeds,
                        delete_word_embeds,
                        base_length,
                        iw_length,
                        dw_length,
                        vocab_size,
                        attn_dim,
                        hidden_dim,
                        num_layer,
                        max_sentence_length,
                        swap_memory,
                        enable_dropout=False,
                        dropout_keep=1.,
                        no_insert_delete_attn=False):
    with tf.variable_scope(OPS_NAME, 'decoder', reuse=True):
        batch_size = tf.shape(base_sent_hiddens)[0]

        start_token_id = tf.cast(start_token_id, tf.int32)
        stop_token_id = tf.cast(stop_token_id, tf.int32)

        helper = seq2seq.GreedyEmbeddingHelper(
            create_embedding_fn(vocab_size),
            tf.fill([batch_size], start_token_id), stop_token_id)

        cell, zero_states = create_decoder_cell(
            agenda,
            extended_base_words,
            oov,
            base_sent_hiddens,
            insert_word_embeds,
            delete_word_embeds,
            base_length,
            iw_length,
            dw_length,
            vocab_size,
            attn_dim,
            hidden_dim,
            num_layer,
            enable_dropout=enable_dropout,
            dropout_keep=dropout_keep,
            no_insert_delete_attn=no_insert_delete_attn)

        decoder = seq2seq.BasicDecoder(cell, helper, zero_states)

        outputs, state, lengths = seq2seq.dynamic_decode(
            decoder,
            maximum_iterations=max_sentence_length,
            swap_memory=swap_memory)

        return outputs, state, lengths
Exemple #10
0
    def build_predict_decoder(self):
        # start_tokens: [batch_size,]
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.start_token
        end_token = self.end_token

        if not self.use_beamsearch_decode:

            # Helper to feed inputs for greedy decoding: use the argmax of the output
            if self.predict_mode == 'sample':
                print('Building sample decoder...')
                decoding_helper = seq2seq.SampleEmbeddingHelper(
                    start_tokens=start_tokens,
                    end_token=end_token,
                    embedding=lambda inputs: tf.nn.embedding_lookup(
                        self.embedding, inputs))
            elif self.predict_mode == 'greedy':
                print('Building greedy decoder...')
                # embedding:params argument for embedding_lookup,也就是 定义的embedding 变量传入即可。
                # start_tokens: batch中每个序列起始输入的token_id
                # end_token:序列终止的token_id
                decoding_helper = seq2seq.GreedyEmbeddingHelper(
                    start_tokens=start_tokens,
                    end_token=end_token,
                    embedding=lambda inputs: tf.nn.embedding_lookup(
                        self.embedding, inputs))
            else:
                raise NotImplementedError(
                    'Predict mode: {} is not yet implemented'.format(
                        self.predict_mode))

            inference_decoder = seq2seq.BasicDecoder(
                cell=self.decoder_cell,
                helper=decoding_helper,
                initial_state=self.decoder_initial_state,
                output_layer=self.output_layer)
        else:
            raise NotImplementedError(
                'Beamsearch decode is not yet implemented.')

        # nn_output: [batch_size, decoder_targets_length, vocab_size],保存decode每个时刻每个单词的概率,可以用来计算loss
        # sample_id: [batch_size], tf.int32,保存最终的编码结果
        self.decoder_outputs_decode, self.decoder_last_state_decode, self.decoder_outputs_length_decode = seq2seq.dynamic_decode(
            decoder=inference_decoder,
            output_time_major=False,
            maximum_iterations=self.max_decode_step)

        if not self.use_beamsearch_decode:
            self.decoder_pred_decode = tf.expand_dims(
                self.decoder_outputs_decode.sample_id,
                -1)  #在最后一维后面加上最后一维(转换为列向量?
        else:
            raise NotImplementedError('{} mode is not recognized.'.format(
                self.mode))
Exemple #11
0
 def _helper(self, train_test_predict, embeded_inputs, sequences_lengths,
             start_tokens, end_token):
     if train_test_predict == 'train' or train_test_predict == 'test':
         helper = seq2seq.TrainingHelper(embeded_inputs, sequences_lengths)
     elif train_test_predict == 'predict':
         helper = seq2seq.GreedyEmbeddingHelper(self.embedding_vector,
                                                start_tokens, end_token)
     else:
         raise TypeError(
             'train_test_predict should equals train, test, or predict')
     return helper
Exemple #12
0
    def __init__(self, config, w2i_target):
        self.build_inputs()
        with tf.variable_scope('encoder'):
            encoder_embedding = tf.Variable(tf.random_uniform([config.source_vocab_size, config.embedding_dim]),
                                            dtype=tf.float32, name='encoder_embedding')
            encoder_inputs_embedded = tf.nn.embedding_lookup(encoder_embedding, self.seq_inputs)

            with tf.variable_scope("gru_cell"):
                encoder_cell = tf.nn.rnn_cell.GRUCell(config.hidden_dim)

            ((encoder_fw_outputs, encoder_bw_outputs),
             (encoder_fw_final_state, encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
                                                                                                 cell_bw=encoder_cell,
                                                                                                 inputs=encoder_inputs_embedded,
                                                                                                 sequence_length=self.seq_inputs_length,
                                                                                                 dtype=tf.float32,
                                                                                                 time_major=False)
            encoder_state = tf.add(encoder_fw_final_state, encoder_bw_final_state)
            encoder_outputs = tf.add(encoder_fw_outputs, encoder_bw_outputs)

        with tf.variable_scope('decoder'):
            decoder_embedding = tf.Variable(tf.random_normal([config.target_vocab_size, config.embedding_dim]),
                                            dtype=tf.float32, name='decoder_embedding')
            token_go = tf.ones([self.batch_size], dtype=tf.int32, name='token_go') * w2i_target['_GO']

            # helper对象
            helper = seq2seq_contrib.GreedyEmbeddingHelper(decoder_embedding, token_go, w2i_target["_EOS"])

            with tf.variable_scope('gru_cell'):
                decoder_cell = tf.nn.rnn_cell.GRUCell(config.hidden_dim)

                decoder_initial_state = encoder_state

            # 构建decoder
            decoder = seq2seq_contrib.BasicDecoder(decoder_cell, helper, decoder_initial_state,
                                                   output_layer=tf.layers.Dense(config.target_vocab_size))
            decoder_outputs, decoder_state, final_sequence_lengths = seq2seq_contrib.dynamic_decode(decoder,
                                                                                                    maximum_iterations=tf.reduce_max(
                                                                                                        self.seq_targets_length))

            self.decoder_logits = decoder_outputs.rnn_output
            self.out = tf.argmax(self.decoder_logits, 2)

            # mask掉填充的0,使后边计算的时候0不参与计算。
            sequence_mask = tf.sequence_mask(self.seq_targets_length, dtype=tf.float32)
            self.loss = seq2seq_contrib.sequence_loss(logits=self.decoder_logits, targets=self.seq_targets,
                                                      weights=sequence_mask)
            # 防止梯度消失和梯度爆炸
            opt = tf.train.AdamOptimizer(config.learning_rate)
            gradients = opt.compute_gradients(self.loss)
            capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
            self.train_op = opt.apply_gradients(capped_gradients)
    def build_predict_decoder(self):
        # start_tokens: [batch_size,]
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.start_token
        end_token = self.end_token

        if not self.use_beamsearch_decode:

            # Helper to feed inputs for greedy decoding: use the argmax of the output
            if self.predict_mode == 'sample':
                print('Building sample decoder...')
                decoding_helper = seq2seq.SampleEmbeddingHelper(
                    start_tokens=start_tokens,
                    end_token=end_token,
                    embedding=lambda inputs: tf.nn.embedding_lookup(
                        self.embedding, inputs),
                    seed=random.randint(0, 1000))
            elif self.predict_mode == 'greedy':
                print('Building greedy decoder...')
                decoding_helper = seq2seq.GreedyEmbeddingHelper(
                    start_tokens=start_tokens,
                    end_token=end_token,
                    embedding=lambda inputs: tf.nn.embedding_lookup(
                        self.embedding, inputs))
            else:
                raise NotImplementedError(
                    'Predict mode: {} is not yet implemented'.format(
                        self.predict_mode))

            inference_decoder = seq2seq.BasicDecoder(
                cell=self.decoder_cell,
                helper=decoding_helper,
                initial_state=self.decoder_initial_state,
                output_layer=self.output_layer)
        else:
            raise NotImplementedError(
                'Beamsearch decode is not yet implemented.')

        self.decoder_outputs_decode, self.decoder_last_state_decode, self.decoder_outputs_length_decode = seq2seq.dynamic_decode(
            decoder=inference_decoder,
            output_time_major=False,
            maximum_iterations=self.max_decode_step)

        if not self.use_beamsearch_decode:
            self.decoder_pred_decode = tf.expand_dims(
                self.decoder_outputs_decode.sample_id, -1)
        else:
            raise NotImplementedError('{} mode is not recognized.'.format(
                self.mode))
Exemple #14
0
    def inference_decoding_layer(self,embeddings,start_token,end_token,dec_cell,
                                 initial_state,output_layer,max_summary_len):
        start_tokens = tf.tile(tf.constant([start_token],dtype=tf.int32),[self.batch_size],name='start_token')

        inference_helper = seq2seq.GreedyEmbeddingHelper(embeddings,start_tokens,end_token)

        inference_decoder = seq2seq.BasicDecoder(dec_cell,
                                                 inference_helper,
                                                 initial_state,
                                                 output_layer)

        inference_logits, _, _ = seq2seq.dynamic_decode(inference_decoder,impute_finished=True,
                                                        maximum_iterations=max_summary_len)

        return inference_logits
Exemple #15
0
    def _init_decoder(self):
        self.decoder_cell = tf.contrib.rnn.BasicLSTMCell(self.rnn_size)
        with tf.variable_scope(
                "decoder"
        ) as scope:  # Need to understand why we aren't using the dynamic_rnn method here
            output_layer = layers_core.Dense(units=self.effective_vocab_size,
                                             activation=None)

            # Train decoding
            train_helper = seq2seq.TrainingHelper(
                inputs=self.decoder_train_inputs_embedded,
                sequence_length=self.decoder_train_length,
                time_major=False)
            train_decoder = seq2seq.BasicDecoder(
                cell=self.decoder_cell,
                helper=train_helper,
                initial_state=self.encoder_final_state)
            self.decoder_outputs_train, _, _ = seq2seq.dynamic_decode(
                decoder=train_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_decoder_seq_length,
                scope=scope)
            self.decoder_logits_train = output_layer.apply(
                self.decoder_outputs_train.rnn_output)
            self.decoder_prediction_train = tf.argmax(
                self.decoder_logits_train, 2)

            # Greedy decoding
            scope.reuse_variables()
            greedy_helper = seq2seq.GreedyEmbeddingHelper(
                embedding=self.embedding_matrix,
                start_tokens=self.decoder_start_tokens,
                end_token=self.eos)
            greedy_decoder = seq2seq.BasicDecoder(
                cell=self.decoder_cell,
                helper=greedy_helper,
                initial_state=self.encoder_final_state,
                output_layer=output_layer)
            self.decoder_outputs_inference, _, _ = seq2seq.dynamic_decode(
                decoder=greedy_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.
                max_decoder_seq_length,  # Figure out a better way of setting this
                scope=scope)
            self.decoder_prediction_inference = tf.argmax(
                self.decoder_outputs_inference.rnn_output, 2)
Exemple #16
0
    def _build_decoder_greedy(self):

        batch_size, _ = tf.unstack(tf.shape(self._labels))
        self._helper_greedy = seq2seq.GreedyEmbeddingHelper(
            embedding=self._embedding_matrix,
            start_tokens=tf.tile([self._GO_ID], [batch_size]),
            end_token=self._EOS_ID)

        if self._hparams.enable_attention is True:
            attention_mechanisms, layer_sizes = self._create_attention_mechanisms()

            attention_cells = seq2seq.AttentionWrapper(
                cell=self._decoder_cells,
                attention_mechanism=attention_mechanisms,
                attention_layer_size=layer_sizes,
                initial_cell_state=self._decoder_initial_state,
                alignment_history=self._hparams.write_attention_alignment,
                output_attention=self._output_attention
            )
            attn_zero = attention_cells.zero_state(
                dtype=self._hparams.dtype, batch_size=batch_size
            )
            initial_state = attn_zero.clone(
                cell_state=self._decoder_initial_state
            )
            cells = attention_cells
        else:
            cells = self._decoder_cells
            initial_state = self._decoder_initial_state

        self._decoder_inference = seq2seq.BasicDecoder(
            cell=cells,
            helper=self._helper_greedy,
            initial_state=initial_state,
            output_layer=self._dense_layer)

        outputs, states, lengths = seq2seq.dynamic_decode(
            self._decoder_inference,
            impute_finished=True,
            swap_memory=False,
            maximum_iterations=self._hparams.max_label_length)

        # self._result = outputs, states, lengths
        self.inference_outputs = outputs.rnn_output
        self.inference_predicted_ids = outputs.sample_id

        if self._hparams.write_attention_alignment is True:
            self.attention_summary = self._create_attention_alignments_summary(states, )
    def inference_decoding_layer(self, embeddings, decoder_cell, initial_state,
                                 output_layer):
        start_token = self.vocab_dictionary['<GO>']
        end_token = self.vocab_dictionary['<EOS>']
        start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32),
                               [self.batch_size],
                               name='start_tokens')

        helper = seq2seq.GreedyEmbeddingHelper(embeddings, start_tokens,
                                               end_token)
        decoder = seq2seq.BasicDecoder(decoder_cell, helper, initial_state,
                                       output_layer)

        logits, _ = seq2seq.dynamic_decode(
            decoder, False, True, maximum_iterations=self.out_max_length)
        return logits
Exemple #18
0
    def _model(self, embed):
        graph = tf.Graph()
        with graph.as_default():
            embedding = tf.Variable(embed, trainable=False, name='embedding')  # 词向量
            lr = tf.placeholder(tf.float32, [], name='learning_rate')
            # 输入数据
            x_input = tf.placeholder(tf.int32, [None, None], name='x_input')  # 输入数据X
            x_sequence_length = tf.placeholder(tf.int32, [None], name='x_length')  # 输入数据每一条的长度
            x_embedding = tf.nn.embedding_lookup(embedding, x_input)  # 将输入的one-hot编码转换成向量
            y_input = tf.placeholder(tf.int32, [None, None], name='y_input')  # 输入数据Y
            y_sequence_length = tf.placeholder(tf.int32, [None], name='y_length')  # 每一个Y的长度
            y_embedding = tf.nn.embedding_lookup(embedding, y_input)  # 对Y向量化
            batch_size = tf.placeholder(tf.int32, [], name='batch_size')
            keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')

            encoder_output, encoder_state = self._encoder(keep_prob, x_embedding, x_sequence_length, batch_size)

            training_helper = seq2seq.TrainingHelper(inputs=y_embedding, sequence_length=y_sequence_length)
            predict_helper = seq2seq.GreedyEmbeddingHelper(embedding, tf.fill([batch_size], self.word2index['GO']),
                                                           self.word2index['EOS'])
            train_output = self._decoder(keep_prob, encoder_output, encoder_state, batch_size, 'decode',
                                         training_helper)
            predict_output = self._decoder(keep_prob, encoder_output, encoder_state, batch_size, 'decode',
                                           predict_helper, True)

            # loss function
            training_logits = tf.identity(train_output.rnn_output, name='training_logits')
            predicting_logits = tf.identity(predict_output.rnn_output, name='predicting')

            # target = tf.slice(y_input, [0, 1], [-1, -1])
            # target = tf.concat([tf.fill([batch_size, 1], self.word2index['GO']), y_input], 1)
            target = y_input

            masks = tf.sequence_mask(y_sequence_length, dtype=tf.float32, name='mask')

            loss = seq2seq.sequence_loss(training_logits, target, masks)
            optimizer = tf.train.AdamOptimizer(lr)
            gradients = optimizer.compute_gradients(loss)
            capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if
                                grad is not None]
            train_op = optimizer.apply_gradients(capped_gradients)
            # predicting_logits = tf.nn.softmax(predicting_logits, axis=1)
            tf.summary.scalar('loss', loss)
            tf.summary.scalar('learning rate', lr)
            # tf.summary.tensor_summary('learning rate', lr)

        return graph, loss, train_op, predicting_logits
Exemple #19
0
def decode_L(inputs, dim, embed_map, start_token,
             unroll_type='teacher_forcing', seq=None, seq_len=None,
             end_token=None, max_seq_len=None, output_layer=None,
             is_train=True, scope='decode_L', reuse=tf.AUTO_REUSE):

    with tf.variable_scope(scope, reuse=reuse) as scope:
        init_c = fc_layer(inputs, dim, use_bias=True, use_bn=False,
                          activation_fn=tf.nn.tanh, is_training=is_train,
                          scope='Linear_c', reuse=reuse)
        init_h = fc_layer(inputs, dim, use_bias=True, use_bn=False,
                          activation_fn=tf.nn.tanh, is_training=is_train,
                          scope='Linear_h', reuse=reuse)
        init_state = rnn.LSTMStateTuple(init_c, init_h)
        log.warning(scope.name)

        start_tokens = tf.zeros(
            [tf.shape(inputs)[0]], dtype=tf.int32) + start_token
        if unroll_type == 'teacher_forcing':
            if seq is None: raise ValueError('seq is None')
            if seq_len is None: raise ValueError('seq_len is None')
            seq_with_start = tf.concat([tf.expand_dims(start_tokens, axis=1),
                                        seq[:, :-1]], axis=1)
            helper = seq2seq.TrainingHelper(
                tf.nn.embedding_lookup(embed_map, seq_with_start), seq_len)
        elif unroll_type == 'greedy':
            if end_token is None: raise ValueError('end_token is None')
            helper = seq2seq.GreedyEmbeddingHelper(
                lambda e: tf.nn.embedding_lookup(embed_map, e),
                start_tokens, end_token)
        else:
            raise ValueError('Unknown unroll_type')

        cell = rnn.BasicLSTMCell(num_units=dim, state_is_tuple=True)
        decoder = seq2seq.BasicDecoder(cell, helper, init_state,
                                       output_layer=output_layer)
        outputs, _, pred_length = seq2seq.dynamic_decode(
            decoder, maximum_iterations=max_seq_len,
            scope='dynamic_decoder')

        output = outputs.rnn_output
        pred = outputs.sample_id

        return output, pred, pred_length
    def decoding_layer_inference(self, num_units, max_time, batch_size,
                                 char2numY, output_embedding, encoder_output,
                                 last_state, bidirectional):
        if not bidirectional:
            decoder_cell = rnn.LSTMCell(num_units)
        else:
            decoder_cell = rnn.LSTMCell(2 * num_units)
        infer_helper = seq2seq.GreedyEmbeddingHelper(
            output_embedding,  # Notice that different between data_output_embed
            tf.fill([batch_size], char2numY['<GO>']),
            char2numY['<EOS>'])
        attention_mechanism = seq2seq.BahdanauAttention(
            num_units=num_units,
            memory=encoder_output,
            memory_sequence_length=[max_time] * batch_size)
        attention_cell = seq2seq.AttentionWrapper(
            cell=decoder_cell,
            attention_mechanism=attention_mechanism,
            attention_layer_size=num_units,
        )
        state = attention_cell.zero_state(batch_size=batch_size,
                                          dtype=tf.float32)
        state = state.clone(cell_state=last_state)
        output_layer = tf.layers.Dense(
            len(char2numY) - 2,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))
        decoder = seq2seq.BasicDecoder(cell=attention_cell,
                                       helper=infer_helper,
                                       initial_state=state,
                                       output_layer=output_layer)

        infer_outputs, _, _ = seq2seq.dynamic_decode(
            decoder=decoder, impute_finished=True, maximum_iterations=max_time)

        return infer_outputs
Exemple #21
0
    saver.save(session, "model", epoch)

batch = next(test_data())
batch = collapse_documents(batch)

answers = session.run(
    answer_tags, {
        document_tokens: batch["document_tokens"],
        document_lengths: batch["document_lengths"],
    })
answers = np.argmax(answers, 2)

batch = expand_answers(batch, answers)

helper = seq2seq.GreedyEmbeddingHelper(embedding,
                                       tf.fill([batch["size"]], START_TOKEN),
                                       END_TOKEN)
decoder = seq2seq.BasicDecoder(decoder_cell,
                               helper,
                               encoder_state,
                               output_layer=projection)
decoder_outputs, _, _ = seq2seq.dynamic_decode(decoder, maximum_iterations=16)
decoder_outputs = decoder_outputs.rnn_output

questions = session.run(
    decoder_outputs, {
        document_tokens: batch["document_tokens"],
        document_lengths: batch["document_lengths"],
        answer_labels: batch["answer_labels"],
        encoder_input_mask: batch["answer_masks"],
        encoder_lengths: batch["answer_lengths"],
    def build_decoder(self):
        print("building decoder and attention..")
        with tf.variable_scope('decoder'):
            # Building decoder_cell and decoder_initial_state
            self.decoder_cell, self.decoder_initial_state = self.build_decoder_cell(
            )

            # Initialize decoder embeddings to have variance=1.
            sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
            initializer = tf.random_uniform_initializer(-sqrt3,
                                                        sqrt3,
                                                        dtype=self.dtype)

            self.decoder_embeddings = tf.get_variable(
                name='embedding',
                shape=[self.num_decoder_symbols, self.embedding_size],
                initializer=initializer,
                dtype=self.dtype)

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            input_layer = Dense(self.hidden_units,
                                dtype=self.dtype,
                                name='input_projection')

            # Output projection layer to convert cell_outputs to logits
            output_layer = Dense(self.num_decoder_symbols,
                                 name='output_projection')

            if self.mode == 'train':
                # decoder_inputs_embedded: [batch_size, max_time_step + 1, embedding_size]
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Embedded inputs having gone through input projection layer
                self.decoder_inputs_embedded = input_layer(
                    self.decoder_inputs_embedded)

                # Helper to feed inputs for training: read inputs from dense ground truth vectors
                training_helper = seq2seq.TrainingHelper(
                    inputs=self.decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length_train,
                    time_major=False,
                    name='training_helper')

                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,
                    output_layer=output_layer)
                #output_layer=None)

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length_train)

                # decoder_outputs_train: BasicDecoderOutput
                #                        namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                # decoder_outputs_train.sample_id: [batch_size], tf.int32
                (self.decoder_outputs_train, self.decoder_last_state_train,
                 self.decoder_outputs_length_train) = (seq2seq.dynamic_decode(
                     decoder=training_decoder,
                     output_time_major=False,
                     impute_finished=True,
                     maximum_iterations=max_decoder_length))

                # More efficient to do the projection on the batch-time-concatenated tensor
                # logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
                # self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs_train.rnn_output)
                # Use argmax to extract decoder symbols to emit
                self.decoder_pred_train = tf.argmax(self.decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
                masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length_train,
                    maxlen=max_decoder_length,
                    dtype=self.dtype,
                    name='masks')

                # Computes per word average cross-entropy over a batch
                # Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
                self.loss = seq2seq.sequence_loss(
                    logits=self.decoder_logits_train,
                    targets=self.decoder_targets_train,
                    weights=masks,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )
                # Training summary for the current batch_loss
                tf.summary.scalar('loss', self.loss)

                # Contruct graphs for minimizing loss
                self.init_optimizer()

            elif self.mode == 'decode':

                # Start_tokens: [batch_size,] `int32` vector
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * data_utils.start_token
                end_token = data_utils.end_token

                def embed_and_input_proj(inputs):
                    return input_layer(
                        tf.nn.embedding_lookup(self.decoder_embeddings,
                                               inputs))

                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding: uses the argmax of the output
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)
                    # Basic decoder performs greedy decoding at each time step
                    print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=output_layer)
                else:
                    # Beamsearch is used to approximately find the most likely translation
                    print("building beamsearch decoder..")
                    inference_decoder = beam_search_decoder.BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=output_layer,
                    )
                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #                         namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output: [batch_size, max_time_step, num_decoder_symbols] 	if output_time_major=False
                #                                    [max_time_step, batch_size, num_decoder_symbols] 	if output_time_major=True
                # decoder_outputs_decode.sample_id: [batch_size, max_time_step], tf.int32		if output_time_major=False
                #                                   [max_time_step, batch_size], tf.int32               if output_time_major=True

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #                         namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids: [batch_size, max_time_step, beam_width] if output_time_major=False
                #                                       [max_time_step, batch_size, beam_width] if output_time_major=True
                # decoder_outputs_decode.beam_search_decoder_output: BeamSearchDecoderOutput instance
                #                                                    namedtuple(scores, predicted_ids, parent_ids)

                (self.decoder_outputs_decode, self.decoder_last_state_decode,
                 self.decoder_outputs_length_decode) = (
                     seq2seq.dynamic_decode(
                         decoder=inference_decoder,
                         output_time_major=False,
                         #impute_finished=True,	# error occurs
                         maximum_iterations=self.max_decode_step))

                if not self.use_beamsearch_decode:
                    # decoder_outputs_decode.sample_id: [batch_size, max_time_step]
                    # Or use argmax to find decoder symbols to emit:
                    # self.decoder_pred_decode = tf.argmax(self.decoder_outputs_decode.rnn_output,
                    #                                      axis=-1, name='decoder_pred_decode')

                    # Here, we use expand_dims to be compatible with the result of the beamsearch decoder
                    # decoder_pred_decode: [batch_size, max_time_step, 1] (output_major=False)
                    self.decoder_pred_decode = tf.expand_dims(
                        self.decoder_outputs_decode.sample_id, -1)

                else:
                    # Use beam search to approximately find the most likely translation
                    # decoder_pred_decode: [batch_size, max_time_step, beam_width] (output_major=False)
                    self.decoder_pred_decode = self.decoder_outputs_decode.predicted_ids
Exemple #23
0
    def build_decoder(self, encoder_outputs, encoder_state):
        """构建解码器
        """
        with tf.variable_scope('decoder') as decoder_scope:
            #创建解码器单元
            (self.decoder_cell,self.decoder_initial_state)\
            = self.build_decoder_cell(encoder_outputs, encoder_state)

            # 解码器embedding 根据词表大小选择CPU还是GPU上训练
            with tf.device(_get_embed_device(self.target_vocab_size)):
                #如果是共享的embedding 则赋值,否则加载预训练 或者初始化进行后续的训练
                if self.share_embedding:
                    self.decoder_embeddings = self.encoder_embeddings
                #如果是预训练的embedding
                elif self.pretrained_embedding:

                    self.decoder_embeddings = tf.Variable(
                        tf.constant(
                            0.0,
                            shape=(self.target_vocab_size,self.embedding_size)),
                            trainable=True,#是否可以被训练
                            name='embeddings')
                    self.decoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.target_vocab_size, self.embedding_size))
                    self.decoder_embeddings_init = self.decoder_embeddings.assign(
                            self.decoder_embeddings_placeholder)
                else:
                    self.decoder_embeddings = tf.get_variable(
                        name='embeddings',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32
                    )
            
            #解码器的输出
            self.decoder_output_projection = layers.Dense(
                self.target_vocab_size,       #一共有词表大小个输出
                dtype=tf.float32,
                use_bias=False,
                name='decoder_output_projection'
            )

            if self.mode == 'train':
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train   #placeholder初始化时设定
                )
                inputs = self.decoder_inputs_embedded
                
                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))
                
                #帮助feed参数 一般用于训练阶段Decoder解码,辅助Decoder解码过程
                training_helper = seq2seq.TrainingHelper(
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length,
                    time_major=self.time_major,
                    name='training_helper'
                )

                # 训练的时候不在这里应用 output_layer
                # 因为这里会每个 time_step 的进行 output_layer 的投影计算,比较慢
                # 注意这个trick要成功必须设置 dynamic_decode 的 scope 参数
                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,  #用之前定义的初始化单元的状态进行初始化
                )

                # decoder在当前batch下最大的time_steps
                max_decoder_length = tf.reduce_max(self.decoder_inputs_length)
                
                #定义动态解码的输出
                (outputs,self.final_state,_)\
                 = seq2seq.dynamic_decode(    #动态decoder
                    decoder=training_decoder,
                    output_time_major=self.time_major, #True是以time(seq_length)为第一维,False是以batch_size为第一维
                    impute_finished=True,         #追踪finished,如果一个序列已经finished,那么后面的每一步output为0
                    maximum_iterations=max_decoder_length,#最大迭代次数(可以理解为decoder最多可以生成几个词)
                    parallel_iterations=self.parallel_iterations,##while_loop的并行次数
                    swap_memory=True, ##True时,当遇到OOM(out of memory),是否把张量从显存转到内存
                    scope=decoder_scope)
                #在训练时将所有的结果在全连接层一次性做投影运算 可以提高效率官方提倡
                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output     #上面定义的解码器的输出
                )

                # masks: masking for valid and padded time steps,
                #tf.sequence_mask的作用是构建序列长度的mask标志 
                """
                tf.sequence_mask([1,2], 4)
                -->
                [[ True False False False]
                 [ True  True False False]]
                """
                # [batch_size, max_time_step + 1]
                self.masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length,
                    maxlen=max_decoder_length,
                    dtype=tf.float32, 
                    name='masks'
                )

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(decoder_logits_train,
                                            (1,0,2))
                #解码器训练时的预测输出 decoder_logits_train一共有词表大小个输出,现仅取值最大的那个下标即为预测的对应下标
                self.decoder_pred_train = tf.argmax(
                    decoder_logits_train, 
                    axis=-1,
                    name='decoder_pred_train')

                # 下面的一些变量用于特殊的学习训练
                # 自定义rewards,其实我这里是修改了masks 损失之类
                # train_entropy = cross entropy
                self.train_entropy = \
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.decoder_inputs,#标签
                        logits=decoder_logits_train)#预测

                self.masks_rewards = self.masks * self.rewards
                
                #seq2sqe中的损失函数 就是将各个时间步输出相加求平均 权重为mask 当句子长度短于最大长度,为0部分的权重为0
                self.loss_rewards = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks_rewards,    #这里权重跟下面的不同
                    average_across_timesteps=True,  #损失将除以总的权重
                    average_across_batch=True,     #损失将是总的损失处于批次大小
                )

                self.loss = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss_add = self.loss + self.add_loss

            elif self.mode == 'decode':
                # 预测模式,非训练

                #对原数据进行扩展 参考
                #https://blog.csdn.net/tsyccnh/article/details/82459859
                start_tokens = tf.tile(
                    [WordSequence.START],
                    [self.batch_size]
                )
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    """输入层的投影层wrapper
                    将输入转换成对应词表对应下的embedding
                    """
                    return tf.nn.embedding_lookup(
                        self.decoder_embeddings,
                        inputs
                    )
                
                #如果不使用集束搜索解码 这里定义helper和decoder的结构
                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding:
                    # uses the argmax of the output
                    #贪婪搜索解码
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,#起始token
                        end_token=end_token,    #结束token
                        embedding=embed_and_input_proj  #已经将输入转换成对应的embedding
                    )
                    # Basic decoder performs greedy decoding at each time step
                    # print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection
                    )
                else:
                    #使用beamsearch解码
                    # Beamsearch is used to approximately
                    # find the most likely translation
                    # print("building beamsearch decoder..")
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection,
                    )
                
                
                #一般使用最大值
                if self.max_decode_step is not None:
                    max_decode_step = self.max_decode_step
                else:
                    # 默认 4 倍输入长度的输出解码
                    max_decode_step = tf.round(tf.reduce_max(
                        self.encoder_inputs_length) * 4)

                
                (    self.decoder_outputs_decode,#输出
                    self.final_state,        #最后的状态
                    _ # self.decoder_outputs_length_decode
                ) = seq2seq.dynamic_decode(
                    decoder=inference_decoder,            #这里包含了使用哪种解码方式
                    output_time_major=self.time_major,
                    # impute_finished=True,	# error occurs
                    maximum_iterations=max_decode_step,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope
                )
                
                #如果不使用beamsearch解码,使用贪婪解码
                 #调用dynamic_decode进行解码,decoder_outputs_decode是一个namedtuple,里面包含两项(rnn_outputs, sample_id)
                # rnn_output: [batch_size, decoder_targets_length, vocab_size],保存decode每个时刻每个单词的概率,可以用来计算loss
                # sample_id: [batch_size], tf.int32,保存最终的编码结果。可以表示最后的答案
                
                if not self.use_beamsearch_decode:

                    dod = self.decoder_outputs_decode
                    self.decoder_pred_decode = dod.sample_id #就是最终的答案

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0))
                #如果使用beamsearch
                #参考 https://blog.csdn.net/liuchonge/article/details/79021938
                # 对于使用beam_search的时候,decoder_outputs_decode它里面包含两项(predicted_ids, beam_search_decoder_output)
                # predicted_ids: [batch_size, decoder_targets_length, beam_size],保存输出结果
                # beam_search_decoder_output: BeamSearchDecoderOutput instance namedtuple(scores, predicted_ids, parent_ids)
                # 所以对应只需要返回predicted_ids或者sample_id即可翻译成最终的结果
                else:
                    self.decoder_pred_decode = \
                        self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2))

                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode,
                        perm=[0, 2, 1])
                    dod = self.decoder_outputs_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
Exemple #24
0
    def __init__(self, mode, vocab_size, target_vocab_size, emb_dim,
                 encoder_num_units, encoder_num_layers, decoder_num_units,
                 decoder_num_layers, dropout_emb, dropout_hidden, tgt_sos_id,
                 tgt_eos_id, learning_rate, clip_norm, attention_option,
                 beam_size, optimizer, maximum_iterations):

        assert mode in ["train", "infer"], "invalid mode!"
        assert encoder_num_units == decoder_num_units, "encoder num_units **must** match decoder num_units"
        self.target_vocab_size = target_vocab_size

        # inputs
        self.encoder_inputs = tf.placeholder(tf.int32,
                                             shape=[None, None],
                                             name='encoder_inputs')
        self.decoder_inputs = tf.placeholder(tf.int32,
                                             shape=[None, None],
                                             name='decoder_inputs')
        self.decoder_outputs = tf.placeholder(tf.int32,
                                              shape=[None, None],
                                              name='decoder_outputs')
        self.encoder_lengths = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name='encoder_lengths')
        self.decoder_lengths = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name='decoder_lengths')

        # cell
        def cell(num_units):
            cell = rnn.BasicLSTMCell(num_units=num_units)
            if mode == 'train':
                cell = rnn.DropoutWrapper(cell=cell,
                                          output_keep_prob=1 - dropout_hidden)
            return cell

        # embeddings
        self.embeddings = tf.get_variable('embeddings',
                                          shape=[vocab_size, emb_dim],
                                          dtype=tf.float32)

        # Encoder
        with tf.variable_scope('encoder'):
            # embeddings
            encoder_inputs_emb = tf.nn.embedding_lookup(
                self.embeddings, self.encoder_inputs)
            if mode == 'train':
                encoder_inputs_emb = tf.nn.dropout(encoder_inputs_emb,
                                                   1 - dropout_emb)

            # encoder_rnn_cell
            fw_encoder_cell = cell(encoder_num_units)
            bw_encoder_cell = cell(encoder_num_units)

            # bi_lstm encoder
            (encoder_outputs_fw, encoder_outputs_bw), (
                encoder_state_fw,
                encoder_state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=fw_encoder_cell,
                    cell_bw=bw_encoder_cell,
                    inputs=encoder_inputs_emb,
                    sequence_length=self.encoder_lengths,
                    dtype=tf.float32)
            encoder_outputs = tf.concat(
                [encoder_outputs_fw, encoder_outputs_bw], 2)

            # A linear layer to reduce the encoder's final FW and BW state into a single initial state for the decoder.
            # This is needed because the encoder is bidirectional but the decoder is not.
            encoder_states_c = tf.layers.dense(inputs=tf.concat(
                [encoder_state_fw.c, encoder_state_bw.c], axis=-1),
                                               units=encoder_num_units,
                                               activation=None,
                                               use_bias=False)
            encoder_states_h = tf.layers.dense(inputs=tf.concat(
                [encoder_state_fw.h, encoder_state_bw.h], axis=-1),
                                               units=encoder_num_units,
                                               activation=None,
                                               use_bias=False)
            encoder_states = rnn.LSTMStateTuple(encoder_states_c,
                                                encoder_states_h)

            encoder_lengths = self.encoder_lengths

        # Decoder
        with tf.variable_scope('decoder'):
            decoder_inputs_emb = tf.nn.embedding_lookup(
                self.embeddings, self.decoder_inputs)
            if mode == 'train':
                decoder_inputs_emb = tf.nn.dropout(decoder_inputs_emb,
                                                   1 - dropout_emb)
            # decoder_rnn_cell
            decoder_cell = cell(decoder_num_units)

            batch_size = tf.shape(self.encoder_inputs)[0]
            decoder_initial_state = decoder_cell.zero_state(
                batch_size=batch_size, dtype=tf.float32)
            decoder_initial_state = encoder_states

            projection_layer = layers_core.Dense(units=target_vocab_size,
                                                 use_bias=False)

            # train/infer
            if mode == 'train':
                # helper
                helper = seq2seq.TrainingHelper(
                    inputs=decoder_inputs_emb,
                    sequence_length=self.decoder_lengths)
                # decoder
                decoder = seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=helper,
                    initial_state=decoder_initial_state,
                    output_layer=projection_layer)
                # dynamic decoding
                self.final_outputs, self.final_state, self.final_sequence_lengths = seq2seq.dynamic_decode(
                    decoder=decoder, swap_memory=True)
            else:
                start_tokens = tf.fill([batch_size], tgt_sos_id)
                end_token = tgt_eos_id

                # helper
                helper = seq2seq.GreedyEmbeddingHelper(
                    embedding=self.embeddings,
                    start_tokens=start_tokens,
                    end_token=end_token)
                # decoder
                decoder = seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=helper,
                    initial_state=decoder_initial_state,
                    output_layer=projection_layer)

                # dynamic decoding
                self.final_outputs, self.final_state, self.final_sequence_lengths = seq2seq.dynamic_decode(
                    decoder=decoder,
                    maximum_iterations=maximum_iterations,
                    swap_memory=True)

            self.logits = self.final_outputs.rnn_output
            self.sample_id = self.final_outputs.sample_id

        if mode == 'train':
            # loss
            with tf.variable_scope('loss'):
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.decoder_outputs, logits=self.logits)
                masks = tf.sequence_mask(lengths=self.decoder_lengths,
                                         dtype=tf.float32)
                self.loss = tf.reduce_sum(
                    cross_entropy * masks) / tf.to_float(batch_size)
                tf.summary.scalar('loss', self.loss)

            # summaries
            self.merged = tf.summary.merge_all()

            # train_op
            self.learning_rate = tf.Variable(learning_rate, trainable=False)
            self.global_step = tf.Variable(0, dtype=tf.int32)
            tvars = tf.trainable_variables()
            clipped_gradients, _ = tf.clip_by_global_norm(tf.gradients(
                self.loss, tvars),
                                                          clip_norm=clip_norm)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate)
            self.train_op = optimizer.apply_gradients(
                zip(clipped_gradients, tvars), global_step=self.global_step)
    def build_decoder(self):
        print("building decoder and attention..")
        with tf.variable_scope('decoder'):
            # Building decoder_cell and decoder_initial_state
            self.decoder_cell, self.decoder_initial_state = self.build_decoder_cell(
            )

            # Initialize decoder embeddings to have variance=1.
            sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
            initializer = tf.random_uniform_initializer(-sqrt3,
                                                        sqrt3,
                                                        dtype=self.dtype)

            self.decoder_embeddings = tf.get_variable(
                name='embedding',
                shape=[self.num_decoder_symbols, self.embedding_size],
                initializer=initializer,
                dtype=self.dtype)

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            input_layer = Dense(self.hidden_units,
                                dtype=self.dtype,
                                name='input_projection')

            # Output projection layer to convert cell_outputs to logits
            output_layer = Dense(self.num_decoder_symbols,
                                 name='output_projection')

            if self.mode == 'train':
                # decoder_inputs_embedded: [batch_size, max_time_step + 1, embedding_size]
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Embedded inputs having gone through input projection layer
                self.decoder_inputs_embedded = input_layer(
                    self.decoder_inputs_embedded)

                # Helper to feed inputs for training: read inputs from dense ground truth vectors
                training_helper = seq2seq.TrainingHelper(
                    inputs=self.decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length_train,
                    time_major=False,
                    name='training_helper')

                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,
                    output_layer=output_layer)
                #output_layer=None)

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length_train)

                # decoder_outputs_train: BasicDecoderOutput
                #                        namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                # decoder_outputs_train.sample_id: [batch_size], tf.int32
                (self.decoder_outputs_train, self.decoder_last_state_train,
                 self.decoder_outputs_length_train) = (seq2seq.dynamic_decode(
                     decoder=training_decoder,
                     output_time_major=False,
                     impute_finished=True,
                     maximum_iterations=max_decoder_length))

                # More efficient to do the projection on the batch-time-concatenated tensor
                # logits_train: [batch_size, max_time_step + 1, num_decoder_symbols]
                # self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs_train.rnn_output)
                # Use argmax to extract decoder symbols to emit
                self.decoder_pred_train = tf.argmax(self.decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # masks: masking for valid and padded time steps, [batch_size, max_time_step + 1]
                masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length_train,
                    maxlen=max_decoder_length,
                    dtype=self.dtype,
                    name='masks')

                def class_weighted_loss(labels, logits):
                    class_weights = tf.constant([
                        0.00017234778799135608, 0.00017234778799135608,
                        0.00017234778799135608, 1.6821366229319637e-05,
                        4.898869308918329e-05, 7.106575604186823e-05,
                        7.126891354944498e-05, 7.514392550863835e-05,
                        7.719102618435312e-05, 8.89973910758995e-05,
                        0.00010430076292140834, 0.00010567508046918493,
                        0.00011254233356378444, 0.00013745981039146453,
                        0.00015365550520395147, 0.00016343173716428013,
                        0.00016623641703291143, 0.00018462654135821253,
                        0.0001873476479039208, 0.00018800477750021655,
                        0.00020981274294876723, 0.00021602805964389768,
                        0.00024354484846033354, 0.00024936107032012903,
                        0.0002495739348066665, 0.000319111899575184,
                        0.00033594586064125193, 0.0003818581956683335,
                        0.0003838636576651593, 0.0005417806138677063,
                        0.0006711205600832021, 0.0006750650134170244,
                        0.0006953534538202605, 0.0007032603813511271,
                        0.0007207552048226591, 0.0007264535179396215,
                        0.0007633538390502503, 0.000891602363160162,
                        0.0009813883808113227, 0.0010641991144668115,
                        0.0011028839931134101, 0.0012656472742694626,
                        0.0013067898106130453, 0.0013988733031399323,
                        0.0016671901108961662, 0.0017748398034871436,
                        0.0022286969673726295, 0.0022647955802244397,
                        0.0022727983914619817, 0.002481488984505173,
                        0.002566647824356508, 0.0026578592759658715,
                        0.002682243306020604, 0.002818588715090889,
                        0.002964064261676225, 0.0029888566207422903,
                        0.0030339714376591553, 0.0032127969269917125,
                        0.0032616731479905726, 0.0033361096721148385,
                        0.00424275689171333, 0.004594299605598149,
                        0.004750383639466329, 0.005306946739139776,
                        0.005497452519519153, 0.005911782580732912,
                        0.007162605175765489, 0.007194652626216341,
                        0.007496526162980663, 0.007960420108709664,
                        0.007960420108709664, 0.008691918172753256,
                        0.009110509132914177, 0.011323977901122198,
                        0.011652209144632988, 0.012711500885054168,
                        0.013180367720978298, 0.015169857188295775,
                        0.016242473353124773, 0.022971498027990745,
                        0.024000072566557496, 0.024549692548997745,
                        0.029504676366226647, 0.035733441376874495,
                        0.03828583004665124, 0.03874710510745427,
                        0.058472904071249165, 0.0630590141944844,
                        0.08040024309796762, 0.3573344137687449
                    ])
                    weights = tf.gather(class_weights, labels)
                    unweighted_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=labels, logits=logits)
                    return unweighted_losses * weights

                # Computes per word average cross-entropy over a batch
                # Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
                if self.loss_type == 'weighted':
                    print 'using weighted loss!'
                    self.loss = seq2seq.sequence_loss(
                        logits=self.decoder_logits_train,
                        targets=self.decoder_targets_train,
                        weights=masks,
                        average_across_timesteps=True,
                        average_across_batch=True,
                        softmax_loss_function=class_weighted_loss,
                    )
                else:
                    self.loss = seq2seq.sequence_loss(
                        logits=self.decoder_logits_train,
                        targets=self.decoder_targets_train,
                        weights=masks,
                        average_across_timesteps=True,
                        average_across_batch=True,
                    )
                # Training summary for the current batch_loss
                tf.summary.scalar('loss', self.loss)

                # Contruct graphs for minimizing loss
                self.init_optimizer()

            elif self.mode == 'decode':

                # Start_tokens: [batch_size,] `int32` vector
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * data_utils.start_token
                end_token = data_utils.end_token

                def embed_and_input_proj(inputs):
                    return input_layer(
                        tf.nn.embedding_lookup(self.decoder_embeddings,
                                               inputs))

                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding: uses the argmax of the output
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)
                    # Basic decoder performs greedy decoding at each time step
                    print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=output_layer)
                else:
                    # Beamsearch is used to approximately find the most likely translation
                    print("building beamsearch decoder..")
                    inference_decoder = beam_search_decoder.BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=output_layer,
                    )
                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #                         namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output: [batch_size, max_time_step, num_decoder_symbols] 	if output_time_major=False
                #                                    [max_time_step, batch_size, num_decoder_symbols] 	if output_time_major=True
                # decoder_outputs_decode.sample_id: [batch_size, max_time_step], tf.int32		if output_time_major=False
                #                                   [max_time_step, batch_size], tf.int32               if output_time_major=True

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #                         namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids: [batch_size, max_time_step, beam_width] if output_time_major=False
                #                                       [max_time_step, batch_size, beam_width] if output_time_major=True
                # decoder_outputs_decode.beam_search_decoder_output: BeamSearchDecoderOutput instance
                #                                                    namedtuple(scores, predicted_ids, parent_ids)

                (self.decoder_outputs_decode, self.decoder_last_state_decode,
                 self.decoder_outputs_length_decode) = (
                     seq2seq.dynamic_decode(
                         decoder=inference_decoder,
                         output_time_major=False,
                         #impute_finished=True,	# error occurs
                         maximum_iterations=self.max_decode_step))

                if not self.use_beamsearch_decode:
                    # decoder_outputs_decode.sample_id: [batch_size, max_time_step]
                    # Or use argmax to find decoder symbols to emit:
                    # self.decoder_pred_decode = tf.argmax(self.decoder_outputs_decode.rnn_output,
                    #                                      axis=-1, name='decoder_pred_decode')

                    # Here, we use expand_dims to be compatible with the result of the beamsearch decoder
                    # decoder_pred_decode: [batch_size, max_time_step, 1] (output_major=False)
                    self.decoder_pred_decode = tf.expand_dims(
                        self.decoder_outputs_decode.sample_id, -1)

                else:
                    # Use beam search to approximately find the most likely translation
                    # decoder_pred_decode: [batch_size, max_time_step, beam_width] (output_major=False)
                    self.decoder_pred_decode = self.decoder_outputs_decode.predicted_ids
Exemple #26
0
  def call(self, inputs, training=None, mask=None):
    dec_emb_fn = lambda ids: self.embed(ids)
    if self.is_infer:
      enc_outputs, enc_state, enc_seq_len = inputs
      batch_size = tf.shape(enc_outputs)[0]
      helper = seq2seq.GreedyEmbeddingHelper(embedding=dec_emb_fn,
                                             start_tokens=tf.fill([batch_size],
                                                                  self.dec_start_id),
                                             end_token=self.dec_end_id)
    else:
      dec_inputs, dec_seq_len, enc_outputs, enc_state, \
      enc_seq_len = inputs
      batch_size = tf.shape(enc_outputs)[0]
      dec_inputs = self.embed(dec_inputs)
      helper = seq2seq.TrainingHelper(inputs=dec_inputs,
                                      sequence_length=dec_seq_len)

    if self.is_infer and self.beam_size > 1:
      tiled_enc_outputs = seq2seq.tile_batch(enc_outputs,
                                             multiplier=self.beam_size)
      tiled_seq_len = seq2seq.tile_batch(enc_seq_len,
                                         multiplier=self.beam_size)
      attn_mech = self._build_attention(enc_outputs=tiled_enc_outputs,
                                        enc_seq_len=tiled_seq_len)
      dec_cell = seq2seq.AttentionWrapper(self.cell, attn_mech)
      tiled_enc_last_state = seq2seq.tile_batch(enc_state,
                                                multiplier=self.beam_size)
      tiled_dec_init_state = dec_cell.zero_state(batch_size=batch_size * self.beam_size,
                                                 dtype=tf.float32)
      if self.initial_decode_state:
        tiled_dec_init_state = tiled_dec_init_state.clone(cell_state=tiled_enc_last_state)

      dec = seq2seq.BeamSearchDecoder(cell=dec_cell,
                                      embedding=dec_emb_fn,
                                      start_tokens=tf.tile([self.dec_start_id],
                                                           [batch_size]),
                                      end_token=self.dec_end_id,
                                      initial_state=tiled_dec_init_state,
                                      beam_width=self.beam_size,
                                      output_layer=tf.layers.Dense(self.vocab_size),
                                      length_penalty_weight=self.length_penalty)
    else:
      attn_mech = self._build_attention(enc_outputs=enc_outputs,
                                        enc_seq_len=enc_seq_len)
      dec_cell = seq2seq.AttentionWrapper(cell=self.cell,
                                          attention_mechanism=attn_mech)
      dec_init_state = dec_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
      if self.initial_decode_state:
        dec_init_state = dec_init_state.clone(cell_state=enc_state)
      dec = seq2seq.BasicDecoder(cell=dec_cell,
                                 helper=helper,
                                 initial_state=dec_init_state,
                                 output_layer=tf.layers.Dense(self.vocab_size))
    if self.is_infer:
      dec_outputs, _, _ = \
        seq2seq.dynamic_decode(decoder=dec,
                               maximum_iterations=self.max_dec_len,
                               swap_memory=self.swap_memory,
                               output_time_major=self.time_major)
      return dec_outputs.predicted_ids[:, :, 0]
    else:
      dec_outputs, _, _ = \
        seq2seq.dynamic_decode(decoder=dec,
                               maximum_iterations=tf.reduce_max(dec_seq_len),
                               swap_memory=self.swap_memory,
                               output_time_major=self.time_major)
    return dec_outputs.rnn_output
Exemple #27
0
    def build_decoder(self, encoder_outputs, encoder_state):
        with tf.variable_scope('decoder') as decoder_scope:
            (
                self.decoder_cell,
                self.decoder_initial_state
            ) = self.build_decoder_cell(encoder_outputs, encoder_state)

            with tf.device(_get_embed_device(self.target_vocab_size)):
                if self.share_embedding:
                    self.decoder_embeddings = self.encoder_embeddings
                elif self.pretrained_embedding:

                    self.decoder_embeddings = tf.Variable(
                        tf.constant(0.0, shape=(self.target_vocab_size, self.embedding_size)),
                        trainable=True,
                        name='embeddings'
                    )

                    self.decoder_embeddings_placeholder =\
                        tf.placeholder(tf.float32, (self.target_vocab_size,
                                                    self.embedding_size))

                    self.decoder_embeddings_init = self.decoder_embeddings.assign(self.decoder_embeddings_placeholder)
                else:
                    self.decoder_embeddings = tf.get_variable(
                        name='embedding',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32
                    )

            self.decoder_output_projection = layers.Dense(self.target_vocab_size,
                                                          dtype=tf.float32,
                                                          use_bias=False,
                                                          name='decoder_output_projection')

            if self.mode == 'train':
                self.decoder_inputs_embdedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train
                )

                inputs = self.decoder_inputs_embdedded

                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))

                training_helper = seq2seq.TrainingHelper(
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length,
                    time_major=self.time_major,
                    name='training_helper'
                )

                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state
                )

                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length
                )

                (
                    outputs,
                    self.final_state,
                    _
                ) = seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=self.time_major,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope
                )

                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output
                )

                self.masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks'
                )

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(decoder_logits_train, (1, 0, 2))

                self.decoder_pred_train = tf.argmax(
                    decoder_logits_train, axis=-1, name='decoder_pred_train'
                )

                self.train_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.decoder_inputs,
                    logits=decoder_logits_train)

                self.masks_rewards = self.masks * self.rewards

                self.loss_rewards = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks_rewards,
                    average_across_timesteps=True,
                    average_across_batch=True
                )

                self.loss = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks,
                    average_across_timesteps=True,
                    average_across_batch=True
                )

                self.add_loss = self.loss + self.add_loss

            elif self.mode == 'decode':
                start_token = tf.tile(
                    [WordSequence.START],
                    [self.batch_size]
                )
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    return tf.nn.embedding_lookup(
                        self.decoder_embeddings,
                        inputs
                    )

                if not self.use_beamsearch_decode:
                    decoder_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_token,
                        end_token=end_token,
                        embedding=embed_and_input_proj
                    )

                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoder_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection
                    )
                else:
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_token,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection
                    )
                if self.max_decode_step is not None:
                    max_decoder_step = self.max_decode_step
                else:
                    max_decoder_step = tf.round(tf.reduce_max(
                        self.encoder_inputs_length
                    ) * 4)
                (
                    self.decoder_outputs_decode,
                    self.final_state
                ) = (seq2seq.dynamic_decode(
                    decoder=inference_decoder,
                    output_time_major=self.time_major,
                    maximum_iterations=max_decoder_step,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope
                ))

                if not self.use_beamsearch_decode:
                    dod = self.decoder_outputs_decode
                    self.decoder_pred_train = tf.transpose(
                        self.decoder_pred_decode, (1, 0)
                    )
                else:
                    self.decoder_pred_decode = self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2)
                        )
                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode,
                        perm=[0, 2, 1]
                    )
                    dod = self.decoder_pred_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
    def build_decoder(self, encoder_outputs, encoder_state):
        """构建解码器
        """
        with tf.variable_scope('decoder') as decoder_scope:
            # Building decoder_cell and decoder_initial_state
            (self.decoder_cell,
             self.decoder_initial_state) = self.build_decoder_cell(
                 encoder_outputs, encoder_state)

            # 解码器embedding
            with tf.device(_get_embed_device(self.target_vocab_size)):
                if self.share_embedding:
                    self.decoder_embeddings = self.encoder_embeddings
                elif self.pretrained_embedding:

                    self.decoder_embeddings = tf.Variable(tf.constant(
                        0.0,
                        shape=(self.target_vocab_size, self.embedding_size)),
                                                          trainable=True,
                                                          name='embeddings')
                    self.decoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.target_vocab_size, self.embedding_size))
                    self.decoder_embeddings_init = \
                        self.decoder_embeddings.assign(
                            self.decoder_embeddings_placeholder)
                else:
                    self.decoder_embeddings = tf.get_variable(
                        name='embeddings',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32)

            self.decoder_output_projection = layers.Dense(
                self.target_vocab_size,
                dtype=tf.float32,
                use_bias=False,
                name='decoder_output_projection')

            if self.mode == 'train':
                # decoder_inputs_embedded:
                # [batch_size, max_time_step + 1, embedding_size]
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Helper to feed inputs for training:
                # read inputs from dense ground truth vectors
                inputs = self.decoder_inputs_embedded

                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))

                training_helper = seq2seq.TrainingHelper(
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length,
                    time_major=self.time_major,
                    name='training_helper')

                # 训练的时候不在这里应用 output_layer
                # 因为这里会每个 time_step 的进行 output_layer 的投影计算,比较慢
                # 注意这个trick要成功必须设置 dynamic_decode 的 scope 参数
                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,
                    # output_layer=self.decoder_output_projection
                )

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(self.decoder_inputs_length)

                # decoder_outputs_train: BasicDecoderOutput
                #     namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output:
                #     if output_time_major=False:
                #         [batch_size, max_time_step + 1, num_decoder_symbols]
                #     if output_time_major=True:
                #         [max_time_step + 1, batch_size, num_decoder_symbols]
                # decoder_outputs_train.sample_id: [batch_size], tf.int32

                (
                    outputs,
                    self.final_state,  # contain attention
                    _  # self.final_sequence_lengths
                ) = seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=self.time_major,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope)

                # More efficient to do the projection
                # on the batch-time-concatenated tensor
                # logits_train:
                # [batch_size, max_time_step + 1, num_decoder_symbols]
                # 训练的时候一次性对所有的结果进行 output_layer 的投影运算
                # 官方NMT库说这样能提高10~20%的速度
                # 实际上我提高的速度会更大
                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output)

                # masks: masking for valid and padded time steps,
                # [batch_size, max_time_step + 1]
                self.masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks')

                # Computes per word average cross-entropy over a batch
                # Internally calls
                # 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(
                        decoder_logits_train, (1, 0, 2))

                self.decoder_pred_train = tf.argmax(decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # 下面的一些变量用于特殊的学习训练
                # 自定义rewards,其实我这里是修改了masks
                # train_entropy = cross entropy
                self.train_entropy = \
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.decoder_inputs,
                        logits=decoder_logits_train)

                self.masks_rewards = self.masks * self.rewards

                self.loss_rewards = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks_rewards,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss_add = self.loss + self.add_loss

            elif self.mode == 'decode':
                # 预测模式,非训练

                start_tokens = tf.tile([WordSequence.START], [self.batch_size])
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    """输入层的投影层wrapper
                    """
                    return tf.nn.embedding_lookup(self.decoder_embeddings,
                                                  inputs)

                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding:
                    # uses the argmax of the output
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)
                    # Basic decoder performs greedy decoding at each time step
                    # print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection)
                else:
                    # Beamsearch is used to approximately
                    # find the most likely translation
                    # print("building beamsearch decoder..")
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection,
                    )

                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #     namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output:
                # if output_time_major=False:
                #     [batch_size, max_time_step, num_decoder_symbols]
                # if output_time_major=True
                #     [max_time_step, batch_size, num_decoder_symbols]
                # decoder_outputs_decode.sample_id:
                # if output_time_major=False
                #     [batch_size, max_time_step], tf.int32
                # if output_time_major=True
                #     [max_time_step, batch_size], tf.int32

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #     namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids:
                # if output_time_major=False:
                #     [batch_size, max_time_step, beam_width]
                # if output_time_major=True
                #     [max_time_step, batch_size, beam_width]
                # decoder_outputs_decode.beam_search_decoder_output:
                #     BeamSearchDecoderOutput instance
                #     namedtuple(scores, predicted_ids, parent_ids)

                # 官方文档提到的一个潜在的最大长度选择
                # 我这里改为 * 4
                # maximum_iterations = tf.round(tf.reduce_max(source_sequence_length) * 2)
                # https://www.tensorflow.org/tutorials/seq2seq

                if self.max_decode_step is not None:
                    max_decode_step = self.max_decode_step
                else:
                    # 默认 4 倍输入长度的输出解码
                    max_decode_step = tf.round(
                        tf.reduce_max(self.encoder_inputs_length) * 4)

                (
                    self.decoder_outputs_decode,
                    self.final_state,
                    _  # self.decoder_outputs_length_decode
                ) = (
                    seq2seq.dynamic_decode(
                        decoder=inference_decoder,
                        output_time_major=self.time_major,
                        # impute_finished=True,	# error occurs
                        maximum_iterations=max_decode_step,
                        parallel_iterations=self.parallel_iterations,
                        swap_memory=True,
                        scope=decoder_scope))

                if not self.use_beamsearch_decode:
                    # decoder_outputs_decode.sample_id:
                    #     [batch_size, max_time_step]
                    # Or use argmax to find decoder symbols to emit:
                    # self.decoder_pred_decode = tf.argmax(
                    #     self.decoder_outputs_decode.rnn_output,
                    #     axis=-1, name='decoder_pred_decode')

                    # Here, we use expand_dims to be compatible with
                    # the result of the beamsearch decoder
                    # decoder_pred_decode:
                    #     [batch_size, max_time_step, 1] (output_major=False)

                    # self.decoder_pred_decode = tf.expand_dims(
                    #     self.decoder_outputs_decode.sample_id,
                    #     -1
                    # )

                    dod = self.decoder_outputs_decode
                    self.decoder_pred_decode = dod.sample_id

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0))

                else:
                    # Use beam search to approximately
                    # find the most likely translation
                    # decoder_pred_decode:
                    # [batch_size, max_time_step, beam_width] (output_major=False)
                    self.decoder_pred_decode = \
                        self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2))

                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode, perm=[0, 2, 1])
                    dod = self.decoder_outputs_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
Exemple #29
0
    def _init_decoder(self):
        data_y = process_decoding_input(self.data_y, self.vocab_to_int_y,
                                        self.batch_size)

        self.dec_embeddings = tf.Variable(tf.random_uniform(
            [self.vocab_size_y, self.embedding_size], -1.0, 1.0),
                                          dtype=tf.float32)

        dec_embedded = tf.nn.embedding_lookup(self.dec_embeddings, data_y)

        with tf.variable_scope("decoder"):
            dec_cell = rnn_cell(self.cell_size, self.dec_num_layers,
                                self.dec_keep_prob)

        out_layer = Dense(self.vocab_size_y,
                          kernel_initializer=tf.truncated_normal_initializer(
                              mean=0.0, stddev=0.1))

        att_mechanism = seq2seq.BahdanauAttention(self.cell_size,
                                                  self.enc_outputs,
                                                  self.x_length,
                                                  normalize=False)

        dec_cell = seq2seq.DynamicAttentionWrapper(
            dec_cell, att_mechanism, attention_size=self.cell_size)

        init_state = seq2seq.DynamicAttentionWrapperState(
            cell_state=self.enc_states[0],
            attention=_zero_state_tensors(self.cell_size, self.batch_size,
                                          tf.float32))

        with tf.variable_scope("decoding"):
            train_helper = seq2seq.TrainingHelper(
                dec_embedded, sequence_length=self.y_length, time_major=False)

            train_decoder = seq2seq.BasicDecoder(dec_cell, train_helper,
                                                 init_state, out_layer)

            train_out, _ = seq2seq.dynamic_decode(
                train_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_length,
                swap_memory=True)

            self.decoder_train = train_out.rnn_output

        with tf.variable_scope("decoding", reuse=True):
            start_tokens = tf.tile(
                tf.constant([self.vocab_to_int_y[START]], dtype=tf.int32),
                [self.batch_size])

            infer_helper = seq2seq.GreedyEmbeddingHelper(
                embedding=self.dec_embeddings,
                start_tokens=start_tokens,
                end_token=self.vocab_to_int_y[STOP])

            infer_decoder = seq2seq.BasicDecoder(dec_cell, infer_helper,
                                                 init_state, out_layer)

            infer_out, _ = seq2seq.dynamic_decode(
                infer_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_length)

            self.decoder_inference = infer_out.sample_id

        tf.identity(self.decoder_train, 'decoder_train')
        tf.identity(self.decoder_inference, 'decoder_inference')
Exemple #30
0
    def __init__(self, lstm_size, lstm_layers, source_vocab_size,
                 enc_embedding_size, tgt_word_to_int, dec_embedding_size,
                 tgt_max_length):

        #-----------------------------------------------------------------------
        # Placeholders
        #-----------------------------------------------------------------------
        self.inputs = tf.placeholder(tf.int32, [None, None], name='inputs')
        self.targets = tf.placeholder(tf.int32, [None, None], name='targets')
        self.batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        self.tgt_seq_length = tf.placeholder(tf.int32, [None],
                                             name='tgt_seq_length')
        self.src_seq_length = tf.placeholder(tf.int32, [None],
                                             name='src_seq_length')

        #-----------------------------------------------------------------------
        # Encoder
        #-----------------------------------------------------------------------
        with tf.variable_scope('encoder'):
            with tf.variable_scope('embedding'):
                enc_embed = tf.contrib.layers.embed_sequence(
                    self.inputs, source_vocab_size, enc_embedding_size)
            with tf.variable_scope('rnn'):
                enc_cell = tf.contrib.rnn.MultiRNNCell(
                         [tf.contrib.rnn.BasicLSTMCell(lstm_size) \
                          for _ in range(lstm_layers)])

            self.initial_state = enc_cell.zero_state(self.batch_size,
                                                     tf.float32)

            _, self.enc_state = tf.nn.dynamic_rnn(
                enc_cell,
                enc_embed,
                sequence_length=self.src_seq_length,
                initial_state=self.initial_state)

        #-----------------------------------------------------------------------
        # Decoder
        #-----------------------------------------------------------------------
        target_vocab_size = len(tgt_word_to_int)
        with tf.variable_scope('decoder'):

            #-------------------------------------------------------------------
            # Embedding
            #-------------------------------------------------------------------
            with tf.variable_scope('embedding'):
                self.dec_embed = tf.Variable(
                    tf.random_uniform([target_vocab_size, dec_embedding_size]))

            #-------------------------------------------------------------------
            # Final classifier
            #-------------------------------------------------------------------
            with tf.variable_scope('classifier') as classifier_scope:
                self.output_layer = Dense(target_vocab_size,
                                      kernel_initializer = \
                                        tf.truncated_normal_initializer(
                                          mean = 0.0, stddev=0.1))

            #-------------------------------------------------------------------
            # RNN
            #-------------------------------------------------------------------
            with tf.variable_scope('rnn'):
                self.dec_cell = tf.contrib.rnn.MultiRNNCell(
                                  [tf.contrib.rnn.BasicLSTMCell(lstm_size) \
                                   for _ in range(lstm_layers)])

            #-------------------------------------------------------------------
            # Inference decoder
            #-------------------------------------------------------------------
            with tf.variable_scope('decoder'):
                start_tokens = tf.tile([tgt_word_to_int['<s>']],
                                       [self.batch_size])

                helper = seq2seq.GreedyEmbeddingHelper(self.dec_embed,
                                                       start_tokens,
                                                       tgt_word_to_int['</s>'])

                decoder = seq2seq.BasicDecoder(self.dec_cell, helper,
                                               self.enc_state,
                                               self.output_layer)
                outputs, _, _ = seq2seq.dynamic_decode(decoder,
                                                       impute_finished=\
                                                         True,
                                                       maximum_iterations=\
                                                         tgt_max_length)

        self.outputs = tf.identity(outputs.sample_id, 'predictions')