Ejemplo n.º 1
0
    def decoder_decode(self, decoder_cell, decoder_initial_state,
                       output_layer):
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.word_to_idx['<GO>']
        end_token = self.word_to_idx['<EOS>']

        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=decoder_initial_state,
                beam_width=self.beam_size,
                output_layer=output_layer)
        else:
            decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding,
                                                    start_tokens=start_tokens,
                                                    end_token=end_token)
            inference_decoder = BasicDecoder(
                cell=decoder_cell,
                helper=decoding_helper,
                initial_state=decoder_initial_state,
                output_layer=output_layer)

        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder,
                                               maximum_iterations=50)
        if self.beam_search:
            decoder_predict_decode = decoder_outputs.predicted_ids
        else:
            decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id,
                                                    -1)
        return decoder_predict_decode
Ejemplo n.º 2
0
    def __build_decoder(self, n_decoder_layers, hidden_size, vocab_size,
                        max_iter, start_symbol_id, end_symbol_id):
        # Use start symbols as the decoder inputs at the first time step
        batch_size = tf.shape(self.input_batch)[0]
        start_tokens = tf.fill([batch_size], start_symbol_id)
        ground_truth_as_input = tf.concat(
            [tf.expand_dims(start_tokens, 1), self.ground_truth], 1)

        # Use the embedding layer defined before to lookup embeddings for ground_truth_as_input
        self.ground_truth_embedded = tf.nn.embedding_lookup(
            self.embeddings, ground_truth_as_input)

        # Create TrainingHelper for the train stage
        train_helper = TrainingHelper(self.ground_truth_embedded,
                                      self.ground_truth_lengths)

        # Create GreedyEmbeddingHelper for the inference stage
        infer_helper = GreedyEmbeddingHelper(self.embeddings, start_tokens,
                                             end_symbol_id)

        def decode(helper, scope, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                rnn_layers = []
                for i in range(n_decoder_layers):
                    # Create GRUCell with dropout. Do not forget to set the reuse flag properly.
                    cell = tf.nn.rnn_cell.GRUCell(hidden_size, reuse=reuse)
                    cell = tf.nn.rnn_cell.DropoutWrapper(
                        cell, input_keep_prob=self.dropout_ph)
                    rnn_layers.append(cell)

                decoder_cell = MultiRNNCell(rnn_layers)

                # Create a projection wrapper
                decoder_cell = OutputProjectionWrapper(decoder_cell,
                                                       vocab_size,
                                                       reuse=reuse)

                # Create BasicDecoder, pass the defined cell, a helper, and initial state
                # The initial state should be equal to the final state of the encoder!
                initial_state = decoder_cell.zero_state(batch_size=batch_size,
                                                        dtype=tf.float32)
                decoder = BasicDecoder(decoder_cell,
                                       helper,
                                       initial_state=initial_state)

                # The first returning argument of dynamic_decode contains two fields:
                #   * rnn_output (predicted logits)
                #   * sample_id (predictions)
                max_iters = tf.reduce_max(self.ground_truth_lengths)
                # max_iters = max_iter
                outputs, _, _ = dynamic_decode(decoder=decoder,
                                               maximum_iterations=max_iters,
                                               output_time_major=False,
                                               impute_finished=True)

                return outputs

        self.train_outputs = decode(train_helper, 'decode')
        self.infer_outputs = decode(infer_helper, 'decode', reuse=True)
Ejemplo n.º 3
0
def inference_decoding_layer(embeddings, start_token, end_token, decoding_cell,
                             initial_state, op_layer, max_en_len, batch_size):

    start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32),
                           [batch_size],
                           name='start_tokens')
    inf_helper = GreedyEmbeddingHelper(embeddings, start_tokens, end_token)
    inf_decoder = BasicDecoder(decoding_cell, inf_helper, initial_state,
                               op_layer)
    inf_logits, _, _ = dynamic_decode(inf_decoder,
                                      output_time_major=False,
                                      impute_finished=True,
                                      maximum_iterations=max_en_len)
    return inf_logits
Ejemplo n.º 4
0
    def decoder_decode(self, decoder_cell, decoder_initial_state,
                       output_layer):
        # 每句的开始用<GO>标记
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.word_to_idx['<GO>']
        # 每句的结束用<EOS>标记
        end_token = self.word_to_idx['<EOS>']

        # 如果使用BeamSearch,使用BeamSearchDecoder进行解码.
        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=decoder_initial_state,
                beam_width=self.beam_size,
                output_layer=output_layer)
        else:  # 不使用BeamSearch,使用GreedyEmbeddingHelper帮助类.
            decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding,
                                                    start_tokens=start_tokens,
                                                    end_token=end_token)
            # 用BasicDecoder进行解码.
            inference_decoder = BasicDecoder(
                cell=decoder_cell,
                helper=decoding_helper,
                initial_state=decoder_initial_state,
                output_layer=output_layer)

        # dynamic_decode
        # 参数:
        # decoder: BasicDecoder、BeamSearchDecoder或者自己定义的decoder类对象
        # output_time_major: 见RNN,为真时step*batch_size*...,为假时batch_size*step*...
        # impute_finished: Boolean,为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,在反向传播时忽略最后一个完成步。但是会降低程序运行速度。
        # maximum_iterations: 最大解码步数,一般训练设置为decoder_inputs_length,预测时设置一个想要的最大序列长度即可。程序会在产生<eos>或者到达最大步数处停止。
        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder,
                                               maximum_iterations=50)
        if self.beam_search:  # 如果使用BeamSearch,输出为预测的predicted_ids
            decoder_predict_decode = decoder_outputs.predicted_ids
        else:  # 扩充一个维度,即在最后添加一列 TODO:干什么?
            decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id,
                                                    -1)
        return decoder_predict_decode
Ejemplo n.º 5
0
    def build_predict_decoder(self):
        start_tokens = tf.ones([
            self.batch_size,
        ], tf.int32) * self.word_to_id['<GO>']
        end_token = self.word_to_id['<EOS>']

        decoder_cell, deocder_initial_state = self.build_decoder_cell()
        output_layer = tf.layers.Dense(
            self.vocab_size,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=deocder_initial_state,
                beam_width=self.beam_size,
                output_layer=output_layer)

        else:
            decoding_helper = GreedyEmbeddingHelper(embedding=self.embedding,
                                                    start_tokens=start_tokens,
                                                    end_token=end_token)
            inference_decoder = BasicDecoder(
                cell=decoder_cell,
                helper=decoding_helper,
                initial_state=deocder_initial_state,
                output_layer=output_layer)

        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder,
                                               maximum_iterations=50)

        if self.beam_search:
            decoder_predict_decode = decoder_outputs.predicted_ids
        else:
            decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id,
                                                    -1)

        return decoder_predict_decode
Ejemplo n.º 6
0
    def decode(self, cell_dec, enc_final_state, output_size, output_embed_matrix, training, grammar_helper=None):
        linear_layer = tf_core_layers.Dense(output_size, use_bias=False)

        go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start
        if training:
            output_ids_with_go = tf.concat([tf.expand_dims(go_vector, axis=1), self.output_placeholder], axis=1)
            outputs = tf.nn.embedding_lookup([output_embed_matrix], output_ids_with_go)
            helper = TrainingHelper(outputs, self.output_length_placeholder+1)
        else:
            helper = GreedyEmbeddingHelper(output_embed_matrix, go_vector, self.config.grammar.end)
        
        if self.config.use_grammar_constraints:
            decoder = GrammarBasicDecoder(self.config.grammar, cell_dec, helper, enc_final_state, output_layer = linear_layer, training_output = self.output_placeholder if training else None,
                                          grammar_helper=grammar_helper)
        else:
            decoder = BasicDecoder(cell_dec, helper, enc_final_state, output_layer = linear_layer)

        final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True, maximum_iterations=self.max_length)
        
        return final_outputs
Ejemplo n.º 7
0
    def build_predict_decoder(self):
        print('Building predict decoder...')

        start_tokens = tf.ones([self.batch_size, ], tf.int32) * self.word_to_id['<GO>']
        end_token = self.word_to_id['<EOS>']

        if self.beam_search:
            inference_decoder = BeamSearchDecoder(
                cell=self.decoder_cell,
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token,
                initial_state=self.decoder_initial_state,
                beam_width=self.beam_size,
                output_layer=self.output_layer
            )

        else:
            decoding_helper = GreedyEmbeddingHelper(
                embedding=self.embedding,
                start_tokens=start_tokens,
                end_token=end_token
            )
            ##Uses the argmax of the output (treated as logits) and passes the result through an embedding layer to get the next input.
            ##embedding: A callable that takes a vector tensor of ids (argmax ids), or the params argument for embedding_lookup. The returned tensor will be passed to the decoder input.
            ##start_tokens: int32 vector shaped [batch_size], the start tokens.
            ##end_token: int32 scalar, the token that marks end of decoding.
            inference_decoder = BasicDecoder(
                cell=self.decoder_cell,
                helper=decoding_helper,
                initial_state=self.decoder_initial_state,
                output_layer=self.output_layer
            )

        decoder_outputs, _, _ = dynamic_decode(decoder=inference_decoder, maximum_iterations=50)
        ##predicted_ids: Final outputs returned by the beam search after all decoding is finished. A tensor of shape [batch_size, num_steps, beam_width] (or [num_steps, batch_size, beam_width] if output_time_major is True). Beams are ordered from best to worst.
        if self.beam_search:
            self.decoder_predict_decode = decoder_outputs.predicted_ids
        else:
            self.decoder_predict_decode = tf.expand_dims(decoder_outputs.sample_id, -1)
Ejemplo n.º 8
0
def pointer_net(inputs, input_lengths, n_pointers, word_matrix, cell_type, n_layers, n_units,
                dropout_prob, is_training=True):
    """Pointer network.

    Args:
        inputs (tensor):        Inputs to pointer network (typically output of previous RNN)
        input_lengths (tensor): Actual non-padded lengths of each input sequence
        n_pointers (int):       Number of pointers to generate
        word_matrix (tensor):   Embedding matrix of word vectors
        cell_type (method):     Cell type to use
        n_layers (int):         Number of layers in RNN (same for encoder & decoder)
        n_units (int):          Number of units in RNN cell (same for encoder & decoder)
        dropout_prob (float):   Dropout probability
        is_training (bool):     Whether the model is training or testing
    """
    batch_size, seq_length, _ = inputs.get_shape().as_list()
    vocab_size = word_matrix.get_shape().as_list()[0]

    # instantiate RNN cell; only use dropout during training
    def _rnn_cell():
        keep_prob = 1 - dropout_prob if is_training else 1
        return DropoutWrapper(cell_type(n_units), output_keep_prob=keep_prob)

    enc_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell()
    encoded, _ = tf.nn.dynamic_rnn(enc_cell, inputs, input_lengths, dtype=tf.float32)

    attention = BahdanauAttention(n_units, encoded, memory_sequence_length=input_lengths)
    # TODO: find permanent solution (InferenceHelper?)
    start_tokens = tf.constant(START_TOKEN, shape=[batch_size], dtype=tf.int32)
    helper = GreedyEmbeddingHelper(word_matrix, start_tokens, END_TOKEN)

    dec_cell = MultiRNNCell([_rnn_cell() for _ in range(n_layers)]) if n_layers > 1 else _rnn_cell()
    attn_cell = AttentionWrapper(dec_cell, attention, alignment_history=True)
    out_cell = tf.contrib.rnn.OutputProjectionWrapper(attn_cell, vocab_size)
    decoder = BasicDecoder(out_cell, helper, attn_cell.zero_state(batch_size, tf.float32))
    _, states, _ = dynamic_decode(decoder, maximum_iterations=n_pointers, impute_finished=True)
    probs = tf.reshape(states.alignment_history.stack(), [n_pointers, batch_size, seq_length])
    return probs
Ejemplo n.º 9
0
    def build_model(self):
        print('building model... ...')
        with tf.variable_scope('seq2seq_placeholder'):
            self.encoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="encoder_inputs")
            self.decoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="decoder_inputs")
            self.decoder_targets = tf.placeholder(tf.int32, [None, None],
                                                  name="decoder_targets")
            self.decoder_targets_masks = tf.placeholder(tf.float32,
                                                        [None, None],
                                                        name="mask")
            self.encoder_length = tf.placeholder(tf.int32, [None],
                                                 name="encoder_length")
            self.decoder_length = tf.placeholder(tf.int32, [None],
                                                 name="decoder_length")
            self.max_target_sequence_length = tf.reduce_max(
                self.decoder_length, name='max_target_len')

        with tf.variable_scope('seq2seq_embedding'):
            self.embedding = self.init_embedding(self.vocab_size,
                                                 self.embedding_size)

        with tf.variable_scope('seq2seq_encoder'):
            encoder_outputs, encoder_states = build_encoder(
                self.embedding,
                self.encoder_inputs,
                self.encoder_length,
                self.enc_num_layers,
                self.enc_num_units,
                self.enc_cell_type,
                bidir=self.enc_bidir)

        with tf.variable_scope('seq2seq_decoder'):
            encoder_length = self.encoder_length
            if self.beam_search:
                print("use beamsearch decoding..")
                encoder_outputs = tile_batch(encoder_outputs,
                                             multiplier=self.beam_size)
                encoder_states = tile_batch(encoder_states,
                                            multiplier=self.beam_size)
                encoder_length = tile_batch(encoder_length,
                                            multiplier=self.beam_size)

            attention_mechanism = BahdanauAttention(
                num_units=self.attn_num_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_length)

            decoder_cell = create_rnn_cell(self.dec_num_layers,
                                           self.dec_num_units,
                                           self.dec_cell_type)
            decoder_cell = AttentionWrapper(
                cell=decoder_cell,
                attention_mechanism=attention_mechanism,
                attention_layer_size=self.dec_num_units,
                name='Attention_Wrapper')

            batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

            decoder_initial_state = decoder_cell.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=encoder_states)

            output_layer = tf.layers.Dense(self.vocab_size,
                                           use_bias=False,
                                           name='output_projection')

            if self.mode == 'train':
                decoder_inputs_embedded = tf.nn.embedding_lookup(
                    self.embedding, self.decoder_inputs)
                # training helper的作用就是决定下一个时序的decoder的输入为给定的decoder inputs, 而不是上一个时刻的输出
                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=decoder_inputs_embedded,
                    sequence_length=self.decoder_length,
                    name='training_helper')

                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=training_helper,
                    initial_state=decoder_initial_state,
                    output_layer=output_layer)

                decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)

                self.decoder_logits_train = decoder_outputs.rnn_output

                self.loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.decoder_logits_train,
                    targets=self.decoder_targets,
                    weights=self.decoder_targets_masks)

                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                trainable_params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, trainable_params)
                clip_gradients, _ = tf.clip_by_global_norm(
                    gradients, self.max_gradient_norm)
                self.train_op = optimizer.apply_gradients(
                    zip(clip_gradients, trainable_params))

            elif self.mode == 'infer':
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * SOS_ID  # 这里的batch_size不需要复制
                end_token = EOS_ID

                if self.beam_search:
                    inference_decoder = BeamSearchDecoder(
                        cell=decoder_cell,
                        embedding=self.embedding,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=self.beam_size,
                        output_layer=output_layer)
                else:
                    decoding_helper = GreedyEmbeddingHelper(
                        embedding=self.embedding,
                        start_tokens=start_tokens,
                        end_token=end_token)

                    inference_decoder = BasicDecoder(
                        cell=decoder_cell,
                        helper=decoding_helper,
                        initial_state=decoder_initial_state,
                        output_layer=output_layer)

                decoder_outputs, _, _ = dynamic_decode(
                    decoder=inference_decoder,
                    maximum_iterations=self.infer_max_iter)
                if self.beam_search:
                    infer_outputs = decoder_outputs.predicted_ids  # [batch_size, decoder_targets_length, beam_size]
                    self.infer_outputs = tf.transpose(
                        infer_outputs,
                        [0, 2, 1
                         ])  # [batch_size, beam_size, decoder_targets_length]
                else:
                    self.infer_outputs = decoder_outputs.sample_id  # [batch_size, decoder_targets_length]

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=self.max_to_keep)
Ejemplo n.º 10
0
    def build_decoder(self, phase):
        print("building decoder and attention..")
        with tf.variable_scope('decoder'):
            # Building decoder_cell and decoder_initial_state
            decoder_cells, decoder_initial_state = self.build_decoder_cell()

            # Initialize decoder embeddings to have variance=1.
            initializer = tf.random_uniform_initializer(-sqrt(3),
                                                        sqrt(3),
                                                        dtype=tf.float32)

            self.decoder_embeddings = tf.get_variable(
                name='embedding',
                shape=(self.config.decoder_symbols_num,
                       self.config.embedding_size),
                initializer=initializer,
                dtype=tf.float32)

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            input_layer = Dense(self.config.hidden_units,
                                dtype=tf.float32,
                                name='input_projection')

            # Output projection layer to convert cell_outputs to logits
            output_layer = Dense(self.config.decoder_symbols_num,
                                 name='output_projection')

            if phase == 'train':
                # decoder_inputs_embedded: [batch_size, max_time_step + 1, embedding_size]
                decoder_inputs_embedded = embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Embedded inputs having gone through input projection layer
                decoder_inputs_embedded = input_layer(decoder_inputs_embedded)

                # Helper to feed inputs for training: read inputs from dense ground truth vectors
                training_helper = TrainingHelper(
                    inputs=decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length_train,
                    time_major=False,
                    name='training_helper')

                training_decoder = BasicDecoder(
                    cell=decoder_cells,
                    helper=training_helper,
                    initial_state=decoder_initial_state,
                    output_layer=output_layer)

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length_train)

                # decoder_outputs_train: BasicDecoderOutput
                #                        namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                # decoder_outputs_train.sample_id: [batch_size], tf.int32
                self.decoder_outputs_train, self.decoder_last_state_train, \
                self.decoder_outputs_length_train = dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length)

                # More efficient to do the projection on the batch-time-concatenated tensor
                # logits_train: (batch_size, max_time_step + 1, num_decoder_symbols)
                # self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs_train.rnn_output)

                # Use argmax to extract decoder symbols to emit
                self.decoder_pred_train = tf.argmax(self.decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # masks: masking for valid and padded time steps, (batch_size, max_time_step + 1)
                masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length_train,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks')

                # Computes per word average cross-entropy over a batch
                # Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
                self.loss = sequence_loss(logits=self.decoder_logits_train,
                                          targets=self.decoder_targets_train,
                                          weights=masks,
                                          average_across_timesteps=True,
                                          average_across_batch=True)

                # Training summary for the current batch_loss
                tf.summary.scalar('loss', self.loss)

                # Contruct graphs for minimizing loss
                self.build_optimizer()

            elif phase == 'decode':

                # Start_tokens: [batch_size,] `int32` vector
                start_tokens = tf.ones(
                    (self.batch_size, ), tf.int32) * self.config._GO
                end_token = self.config._EOS

                def embed_and_input_proj(inputs):
                    return input_layer(
                        tf.nn.embedding_lookup(self.decoder_embeddings,
                                               inputs))

                # Helper to feed inputs for greedy decoding: uses the argmax of the output
                decoding_helper = GreedyEmbeddingHelper(
                    start_tokens=start_tokens,
                    end_token=end_token,
                    embedding=embed_and_input_proj)

                # Basic decoder performs greedy decoding at each time step
                inference_decoder = BasicDecoder(
                    cell=decoder_cells,
                    helper=decoding_helper,
                    initial_state=decoder_initial_state,
                    output_layer=output_layer)

                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #                         namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output: [batch_size, max_time_step, num_decoder_symbols] 	if output_time_major=False
                #                                    [max_time_step, batch_size, num_decoder_symbols] 	if output_time_major=True
                # decoder_outputs_decode.sample_id: [batch_size, max_time_step], tf.int32		if output_time_major=False
                #                                   [max_time_step, batch_size], tf.int32               if output_time_major=True

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #                         namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids: [batch_size, max_time_step, beam_width] if output_time_major=False
                #                                       [max_time_step, batch_size, beam_width] if output_time_major=True
                # decoder_outputs_decode.beam_search_decoder_output: BeamSearchDecoderOutput instance
                #                                                    namedtuple(scores, predicted_ids, parent_ids)

                self.decoder_outputs_decode, self.decoder_last_state_decode, \
                self.decoder_outputs_length_decode = dynamic_decode(
                    decoder=inference_decoder,
                    output_time_major=False,
                    # impute_finished=True,	# error occurs??
                    maximum_iterations=self.config.max_decode_step)

                # decoder_outputs_decode.sample_id: [batch_size, max_time_step]
                # Or use argmax to find decoder symbols to emit:
                # self.decoder_pred_decode = tf.argmax(self.decoder_outputs_decode.rnn_output,
                #                                      axis=-1, name='decoder_pred_decode')

                # Here, we use expand_dims to be compatible with the result of the beamsearch decoder
                # decoder_pred_decode: [batch_size, max_time_step, 1] (output_major=False)
                self.decoder_pred_decode = tf.expand_dims(
                    self.decoder_outputs_decode.sample_id, -1)
Ejemplo n.º 11
0
    def __init__(self,
                 inputs,
                 targets,
                 src_vocab_size,
                 src_max_length,
                 tgt_vocab_size,
                 tgt_max_length,
                 emb_dim,
                 num_units,
                 batch_size,
                 eos_token,
                 is_train,
                 share_embeddings=False,
                 teacher_forcing=False):

        xavier = tf.contrib.layers.xavier_initializer
        start_tokens = tf.zeros([batch_size], dtype=tf.int32)
        input_lengths = tf.argmin(tf.abs(inputs - eos_token), axis=-1, output_type=tf.int32)

        target_lengths = tf.argmin(tf.abs(targets - eos_token), axis=-1, output_type=tf.int32)

        input_embedding_table = tf.get_variable("encoder_embedding", [src_vocab_size, emb_dim], initializer=xavier(), dtype=tf.float32)
        input_embedding = tf.nn.embedding_lookup(input_embedding_table, inputs)
        encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units, state_is_tuple=False)
        encoder_cell = tf.nn.rnn_cell.DropoutWrapper(cell=encoder_cell,
                                                     input_keep_prob=0.8,
                                                     output_keep_prob=1.0)

        #   encoder_outputs: [max_time, batch_size, num_units]
        #   encoder_state: [batch_size, num_units]
        (encoder_output,
         encoder_state) = tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
                                                          cell_bw=encoder_cell,
                                                          inputs=input_embedding,
                                                          sequence_length=input_lengths,
                                                          dtype=tf.float32,
                                                          time_major=False)

        encoder_output = tf.concat(encoder_output, axis=2)
        encoder_state = tf.concat([encoder_state[0], encoder_state[1]], axis=1)

        if share_embeddings:
            assert src_vocab_size == tgt_vocab_size
            target_embedding_table = input_embedding_table
        else:
            target_embedding_table = tf.get_variable("decoder_embedding", [src_vocab_size, emb_dim], initializer=xavier(), dtype=tf.float32)

        prefixed_targets = tf.concat([tf.expand_dims(start_tokens, 1), targets], axis=1)
        target_embedding = tf.nn.embedding_lookup(target_embedding_table, prefixed_targets)

        if teacher_forcing:
            helper = TrainingHelper(target_embedding,
                                    target_lengths + 1,
                                    time_major=False)
        else:
            helper = GreedyEmbeddingHelper(target_embedding_table, start_tokens, eos_token)

        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units * 2, state_is_tuple=False)
        projection_layer = tf.layers.Dense(tgt_vocab_size, use_bias=False)

        attention_mechanism = BahdanauAttention(num_units,
                                                encoder_output,
                                                memory_sequence_length=input_lengths)

        decoder_cell = AttentionWrapper(decoder_cell,
                                        attention_mechanism,
                                        attention_layer_size=num_units)
        #decoder_cell = tf.nn.rnn_cell.DropoutWrapper(cell=decoder_cell,
        #                                             input_keep_prob=0.8,
        #                                             output_keep_prob=1.0)

        encoder_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state)
        decoder = BasicDecoder(cell=decoder_cell,
                               helper=helper,
                               initial_state=encoder_state,
                               output_layer=projection_layer)

        decoder_outputs, states, lengths = dynamic_decode(decoder,
                                                          output_time_major=False,
                                                          impute_finished=True,
                                                          maximum_iterations=tgt_max_length)
        unpadded_logits = decoder_outputs.rnn_output
        missing_elems = tgt_max_length - tf.shape(unpadded_logits)[1]
        padding = [[0, 0], [0, missing_elems], [0, 0]]
        logits = tf.pad(unpadded_logits, padding, 'CONSTANT', constant_values=0.)

        weights = tf.sequence_mask(target_lengths + 1, # the "+1" is to include EOS
                                   maxlen=tgt_max_length,
                                   dtype=tf.float32)
        #self.mle_loss = sequence_loss(targets=targets,
        #                              logits=logits,
        #                              weights=weights,
        #                              average_across_batch=True)

        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits)
        mle_loss = (tf.reduce_sum(crossent * weights) / batch_size)
        preds = decoder_outputs.sample_id

        self.preds = preds
        self.logits = logits
        self.mle_loss = mle_loss
Ejemplo n.º 12
0
def model_fn(features, labels, mode, params):
    embedding_encoder = tf.get_variable('embedding_encoder',
                                        shape=(params.vocab_size,
                                               params.emb_size))
    table = lookup_ops.index_to_string_table_from_file(params.word_vocab_file)

    question_emb = tf.nn.embedding_lookup(embedding_encoder,
                                          features['question_words'])
    passage_emb = tf.nn.embedding_lookup(embedding_encoder,
                                         features['passage_words'])

    question_words_length = features['question_length']
    passage_words_length = features['passage_length']

    answer_start, answer_end = features['answer_start'], features['answer_end']
    answer_start = tf.concat([tf.expand_dims(answer_start, -1)] * 50, -1)
    answer_end = tf.concat([tf.expand_dims(answer_end, -1)] * 50, -1)

    with tf.variable_scope('passage_encoding'):
        passage_enc, (_, passage_bw_state) = biGRU(tf.concat(
            [passage_emb, answer_start, answer_end], -1),
                                                   passage_words_length,
                                                   params,
                                                   layers=params.layers)

    with tf.variable_scope('question_encoding'):
        question_enc, (_, question_bw_state) = biGRU(question_emb,
                                                     question_words_length,
                                                     params,
                                                     layers=params.layers)

    # output_enc = masked_concat(question_enc, passage_enc, question_words_length, passage_words_length)

    decoder_state_layer = Dense(params.units,
                                activation=tf.tanh,
                                use_bias=True,
                                name='decoder_state_init')
    decoder_init_state = tuple(
        decoder_state_layer(
            tf.concat([passage_bw_state[i], question_bw_state[i]], -1))
        for i in range(params.layers))

    question_att = BahdanauAttention(
        params.units,
        question_enc,
        memory_sequence_length=question_words_length)
    passage_att = BahdanauAttention(
        params.units, passage_enc, memory_sequence_length=passage_words_length)

    decoder_cell = AttentionWrapper(MultiRNNCell(
        [GRUCell(params.units) for _ in range(params.layers)]),
                                    [question_att, passage_att],
                                    initial_cell_state=decoder_init_state)

    batch_size = params.batch_size  # if mode != tf.estimator.ModeKeys.PREDICT else 1

    if mode == tf.estimator.ModeKeys.TRAIN:
        answer_emb = tf.nn.embedding_lookup(embedding_encoder,
                                            features['answer_words'])
        helper = TrainingHelper(answer_emb, features['answer_length'])
    else:
        helper = GreedyEmbeddingHelper(
            embedding_encoder, tf.fill([batch_size], params.tgt_sos_id),
            params.tgt_eos_id)

    projection_layer = Dense(params.vocab_size, use_bias=False)

    decoder = SNetDecoder(decoder_cell,
                          helper,
                          decoder_cell.zero_state(batch_size, tf.float32),
                          output_layer=projection_layer,
                          params=params)

    outputs, _, outputs_length = dynamic_decode(
        decoder, maximum_iterations=params.answer_max_words)
    logits = outputs.rnn_output

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'answer': table.lookup(tf.cast(outputs.sample_id, tf.int64))
        }
        export_outputs = {
            'prediction': tf.estimator.export.PredictOutput(predictions)
        }

        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)

    # logits = tf.Print(logits, [outputs.sample_id, labels], summarize=1000)

    labels = tf.stop_gradient(labels[:, :tf.reduce_max(outputs_length)])

    crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                              logits=logits)
    target_weights = tf.sequence_mask(outputs_length, dtype=logits.dtype)
    loss = tf.reduce_sum(crossent * target_weights) / params.batch_size

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdadeltaOptimizer(learning_rate=1)
        global_step = tf.train.get_or_create_global_step()

        grads = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads)
        capped_grads, _ = tf.clip_by_global_norm(gradients, params.grad_clip)
        train_op = optimizer.apply_gradients(zip(capped_grads, variables),
                                             global_step=global_step)

        return EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
        )

    if mode == tf.estimator.ModeKeys.EVAL:
        return EstimatorSpec(mode,
                             loss=loss,
                             eval_metric_ops={
                                 'rouge-l':
                                 rouge_l(outputs.sample_id, labels,
                                         outputs_length,
                                         features['answer_length'], params,
                                         table),
                             })
Ejemplo n.º 13
0
    def _build_model(self):
        with tf.variable_scope("embeddings"):
            self.source_embs = tf.get_variable(
                name="source_embs",
                shape=[self.cfg.source_vocab_size, self.cfg.emb_dim],
                dtype=tf.float32,
                trainable=True)
            self.target_embs = tf.get_variable(
                name="embeddings",
                shape=[self.cfg.vocab_size, self.cfg.emb_dim],
                dtype=tf.float32,
                trainable=True)
            source_emb = tf.nn.embedding_lookup(self.source_embs,
                                                self.enc_source)
            target_emb = tf.nn.embedding_lookup(self.target_embs,
                                                self.dec_target_in)
            print("source embedding shape: {}".format(
                source_emb.get_shape().as_list()))
            print("target input embedding shape: {}".format(
                target_emb.get_shape().as_list()))

        with tf.variable_scope("encoder"):
            if self.cfg.use_bi_rnn:
                with tf.variable_scope("bi-directional_rnn"):
                    cell_fw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    cell_bw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    bi_outputs, _ = bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        source_emb,
                        dtype=tf.float32,
                        sequence_length=self.enc_seq_len)
                    source_emb = tf.concat(bi_outputs, axis=-1)
                    print("bi-directional rnn output shape: {}".format(
                        source_emb.get_shape().as_list()))
            input_project = tf.layers.Dense(units=self.cfg.num_units,
                                            dtype=tf.float32,
                                            name="input_projection")
            source_emb = input_project(source_emb)
            print("encoder input projection shape: {}".format(
                source_emb.get_shape().as_list()))
            enc_cells = self._create_encoder_cell()
            self.enc_outputs, self.enc_states = dynamic_rnn(
                enc_cells,
                source_emb,
                sequence_length=self.enc_seq_len,
                dtype=tf.float32)
            print("encoder output shape: {}".format(
                self.enc_outputs.get_shape().as_list()))

        with tf.variable_scope("decoder"):
            self.max_dec_seq_len = tf.reduce_max(self.dec_seq_len,
                                                 name="max_dec_seq_len")
            self.dec_cells, self.dec_init_states = self._create_decoder_cell()
            # define input and output projection layer
            input_project = tf.layers.Dense(units=self.cfg.num_units,
                                            name="input_projection")
            self.dense_layer = tf.layers.Dense(units=self.cfg.vocab_size,
                                               name="output_projection")
            if self.mode == "train":  # either "train" or "decode"
                # for training
                target_emb = input_project(target_emb)
                train_helper = TrainingHelper(target_emb,
                                              sequence_length=self.dec_seq_len,
                                              name="train_helper")
                train_decoder = BasicDecoder(
                    self.dec_cells,
                    helper=train_helper,
                    output_layer=self.dense_layer,
                    initial_state=self.dec_init_states)
                self.dec_output, _, _ = dynamic_decode(
                    train_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_dec_seq_len)
                print("decoder output shape: {} (vocab size)".format(
                    self.dec_output.rnn_output.get_shape().as_list()))

                # for decode
                start_token = tf.ones(
                    shape=[
                        self.batch_size,
                    ], dtype=tf.int32) * self.cfg.target_dict[GO]
                end_token = self.cfg.target_dict[EOS]

                def inputs_project(inputs):
                    return input_project(
                        tf.nn.embedding_lookup(self.target_embs, inputs))

                dec_helper = GreedyEmbeddingHelper(embedding=inputs_project,
                                                   start_tokens=start_token,
                                                   end_token=end_token)
                infer_decoder = BasicDecoder(
                    self.dec_cells,
                    helper=dec_helper,
                    initial_state=self.dec_init_states,
                    output_layer=self.dense_layer)
                infer_dec_output, _, _ = dynamic_decode(
                    infer_decoder,
                    maximum_iterations=self.cfg.maximum_iterations)
                self.dec_predicts = infer_dec_output.sample_id
Ejemplo n.º 14
0
    def model(self,inputs,targets,en_len_sequence,zh_len_sequence):
        # global step
        with tf.device(self.cpu_device):
            global_step = tf.contrib.framework.get_or_create_global_step()

            start_tokens = tf.tile([0],[self.batch_size])
            end_token = tf.convert_to_tensor(0)

            en_embedding_matrix = tf.get_variable(name='embedding_matrix',
                                                  shape=(FLAGS.en_vocab_size, FLAGS.en_embedded_size),
                                                  dtype=tf.float32,
                                                  # regularizer=tf.nn.l2_loss,
                                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01)
                                                  )
            zh_embedding_matrix = tf.get_variable(name='zh_embedding_matrix',
                                                  shape=(FLAGS.zh_vocab_size, FLAGS.zh_embedded_size),
                                                  dtype=tf.float32,
                                                  # regularizer=tf.nn.l2_loss,
                                                  initializer=tf.truncated_normal_initializer(mean=0, stddev=0.01))

            tf.add_to_collection(tf.GraphKeys.LOSSES, tf.nn.l2_loss(en_embedding_matrix))
            tf.add_to_collection(tf.GraphKeys.LOSSES, tf.nn.l2_loss(zh_embedding_matrix))

            tf.summary.histogram('zh_embedding_matrix', zh_embedding_matrix)  # 是否应该使用
            tf.summary.histogram('en_embedding_matrix', en_embedding_matrix)

            en_embedded = tf.nn.embedding_lookup(en_embedding_matrix, inputs)
            zh_embedded = tf.nn.embedding_lookup(zh_embedding_matrix, targets)

        # inference
        with tf.name_scope('encoder'):
            cells_fw = [DeviceWrapper(LayerNormBasicLSTMCell(num), self.devices[i]) for i,num in enumerate(config.encoder_fw_units)]
            cells_bw = [DeviceWrapper(LayerNormBasicLSTMCell(num), self.devices[i]) for i,num in enumerate(config.encoder_bw_units)]

            # outputs with shape [batch_size,max_len,output_size]
            # states_fw and states_bw is a list with length len(cells_fw)
            # [LSTMStateTuple_1,...,LSTMStateTuple_n]
            # LSTMStateTuple has attribute of c and h
            outputs, states_fw,states_bw = \
                stack_bidirectional_dynamic_rnn(cells_fw,
                                                cells_bw,
                                                en_embedded,
                                                dtype = tf.float32,
                                                sequence_length = en_len_sequence)

            # 将fw和bw的state按层concat起来形成decoder的initial_state
            states = [LSTMStateTuple(c=tf.concat([states_fw[i].c,states_bw[i].c],1),
                                     h=tf.concat([states_fw[i].h,states_bw[i].h],1))
                      for i in range(len(states_fw))]
            tf.summary.histogram('encoder_state', states)


        with tf.name_scope('decoder'):
            # 使用decoder的output计算attention
            attention_m = BahdanauAttention(FLAGS.attention_size,
                                            outputs,
                                            en_len_sequence)


            # 使用layer normalization,dropout
            cells_out = [DeviceWrapper(LayerNormBasicLSTMCell(num,
                                                              dropout_keep_prob=FLAGS.dropout_keep_prob),
                                       self.devices[-1]) for num in config.decoder_units]
            # attention wrapper
            cells_attention = [AttentionWrapper(cells_out[i],attention_m) for i in range(len(config.decoder_units))]

            # stack wrappper
            cells = MultiRNNCell(cells_attention)

            initial_cell_states = cells.zero_state(dtype=tf.float32,batch_size=self.batch_size)

            initial_states = tuple(initial_cell_states[i].clone(cell_state=states[i]) for i in range(len(states)))

            # # beam search
            # decoder = BeamSearchDecoder(cells,zh_embedding_matrix,start_tokens,end_token,initial_state=initial_states,beam_width=12)
            # beam search has some problem here , may be needed to imply by ourselves.

            # basic_decoder_helper

            if FLAGS.is_inference:
                helper = GreedyEmbeddingHelper(zh_embedding_matrix, start_tokens, end_token)
            else:
                helper = TrainingHelper(zh_embedded,zh_len_sequence)
            dense = Dense(FLAGS.zh_vocab_size, use_bias=False)

            # basic decoder
            decoder = BasicDecoder(cells, helper, initial_states, dense)  # 在这里初始化cell的state

            # dynamic decode
            logits, final_states, final_sequence_lengths = dynamic_decode(decoder)

            # loss
            max_zh_len = tf.reduce_max(zh_len_sequence)
            weights = tf.sequence_mask(zh_len_sequence, max_zh_len, dtype=tf.float32)
            inference_losses = tf.contrib.seq2seq.sequence_loss(logits.rnn_output, targets, weights)
            tf.summary.scalar('inference_loss', inference_losses)
            tf.add_to_collection(tf.GraphKeys.LOSSES, inference_losses)
            losses = tf.add_n(tf.get_collection(tf.GraphKeys.LOSSES))
            tf.summary.scalar('losses', losses)


            # train detail
            learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                       global_step,
                                                       FLAGS.decay_step,
                                                       FLAGS.decay_rate)
            tf.summary.scalar('learning_rate', learning_rate)

            opt = tf.train.GradientDescentOptimizer(learning_rate)

            # using clipped gradient
            grads_and_vars = opt.compute_gradients(losses)
            clipped_grads_and_vars = tf.contrib.training.clip_gradient_norms(grads_and_vars, FLAGS.max_gradient)
            apply_grads_op = opt.apply_gradients(clipped_grads_and_vars, global_step)

            if FLAGS.is_inference:
                return logits.sample_id, [inputs, en_len_sequence, start_tokens, end_token]
            elif FLAGS.is_train:
                return {'loss': losses, 'train_op': apply_grads_op}
            else:
                return [global_step, losses]