def _build_train(self, config):
     # decode
     if config.model_name == "fasttext_flat":
         self.logits = tf.contrib.layers.fully_connected(
             self.first_attention, config.fn_classes, activation_fn=None)
         print("logits:", self.logits.get_shape())
         self.logits = tf.reshape(self.logits, [-1, config.fn_classes])
     elif config.model_name == "RCNN_flat":
         self.logits = tf.contrib.layers.fully_connected(self.xx_final,
                                                         config.fn_classes,
                                                         activation_fn=None)
         print("logits:", self.logits.get_shape())
         self.logits = tf.reshape(self.logits, [-1, config.fn_classes])
     else:
         encoder_state = rnn.LSTMStateTuple(self.xx_final, self.xx_final)
         if config.use_att:
             attention_mechanism = BahdanauAttention(
                 config.decode_size,
                 memory=self.xx_context,
                 memory_sequence_length=self.x_seq_length)
             cell = AttentionWrapper(self.lstm,
                                     attention_mechanism,
                                     output_attention=False)
             cell_state = cell.zero_state(dtype=tf.float32,
                                          batch_size=config.batch_size)
             cell_state = cell_state.clone(cell_state=encoder_state,
                                           attention=self.first_attention)
             train_helper = TrainingHelper(self.yy, self.y_seq_length)
             train_decoder = BasicDecoder(cell,
                                          train_helper,
                                          cell_state,
                                          output_layer=self.output_l)
             self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
                 train_decoder, impute_finished=True)
             self.logits = self.decoder_outputs_train.rnn_output
             # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes])
             print("logits:", self.logits.get_shape())
         else:
             cell = self.lstm
             train_helper = TrainingHelper(self.yy, self.y_seq_length)
             train_decoder = BasicDecoder(cell,
                                          train_helper,
                                          encoder_state,
                                          output_layer=self.output_l)
             self.decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
                 train_decoder, impute_finished=True)
             self.logits = self.decoder_outputs_train.rnn_output
             # self.logits = tf.reshape(self.logits, [-1, config.max_seq_length, config.hn_classes])
             print("logits:", self.logits.get_shape())
Ejemplo n.º 2
0
    def decoder_train(self, decoder_cell, decoder_initial_state, output_layer):
        '''
        创建train的decoder部分
        :param encoder_outputs: encoder的输出
        :param encoder_state: encoder的state
        :return: decoder_logits_train: decoder的predict
        '''
        ending = tf.strided_slice(self.decoder_targets, [0, 0],
                                  [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat(
            [tf.fill([self.batch_size, 1], self.word_to_idx['<GO>']), ending],
            1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            self.embedding, decoder_input)

        training_helper = TrainingHelper(
            inputs=decoder_inputs_embedded,
            sequence_length=self.decoder_targets_length,
            time_major=False,
            name='training_helper')
        training_decoder = BasicDecoder(cell=decoder_cell,
                                        helper=training_helper,
                                        initial_state=decoder_initial_state,
                                        output_layer=output_layer)
        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length)
        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)
        return decoder_logits_train
Ejemplo n.º 3
0
    def __build_decoder(self, n_decoder_layers, hidden_size, vocab_size,
                        max_iter, start_symbol_id, end_symbol_id):
        # Use start symbols as the decoder inputs at the first time step
        batch_size = tf.shape(self.input_batch)[0]
        start_tokens = tf.fill([batch_size], start_symbol_id)
        ground_truth_as_input = tf.concat(
            [tf.expand_dims(start_tokens, 1), self.ground_truth], 1)

        # Use the embedding layer defined before to lookup embeddings for ground_truth_as_input
        self.ground_truth_embedded = tf.nn.embedding_lookup(
            self.embeddings, ground_truth_as_input)

        # Create TrainingHelper for the train stage
        train_helper = TrainingHelper(self.ground_truth_embedded,
                                      self.ground_truth_lengths)

        # Create GreedyEmbeddingHelper for the inference stage
        infer_helper = GreedyEmbeddingHelper(self.embeddings, start_tokens,
                                             end_symbol_id)

        def decode(helper, scope, reuse=None):
            with tf.variable_scope(scope, reuse=reuse):
                rnn_layers = []
                for i in range(n_decoder_layers):
                    # Create GRUCell with dropout. Do not forget to set the reuse flag properly.
                    cell = tf.nn.rnn_cell.GRUCell(hidden_size, reuse=reuse)
                    cell = tf.nn.rnn_cell.DropoutWrapper(
                        cell, input_keep_prob=self.dropout_ph)
                    rnn_layers.append(cell)

                decoder_cell = MultiRNNCell(rnn_layers)

                # Create a projection wrapper
                decoder_cell = OutputProjectionWrapper(decoder_cell,
                                                       vocab_size,
                                                       reuse=reuse)

                # Create BasicDecoder, pass the defined cell, a helper, and initial state
                # The initial state should be equal to the final state of the encoder!
                initial_state = decoder_cell.zero_state(batch_size=batch_size,
                                                        dtype=tf.float32)
                decoder = BasicDecoder(decoder_cell,
                                       helper,
                                       initial_state=initial_state)

                # The first returning argument of dynamic_decode contains two fields:
                #   * rnn_output (predicted logits)
                #   * sample_id (predictions)
                max_iters = tf.reduce_max(self.ground_truth_lengths)
                # max_iters = max_iter
                outputs, _, _ = dynamic_decode(decoder=decoder,
                                               maximum_iterations=max_iters,
                                               output_time_major=False,
                                               impute_finished=True)

                return outputs

        self.train_outputs = decode(train_helper, 'decode')
        self.infer_outputs = decode(infer_helper, 'decode', reuse=True)
Ejemplo n.º 4
0
 def build_decoder(self, encoder_outputs, encoder_final_state):
     """
     构建完整解码器
     :return:
     """
     with tf.variable_scope("decode"):
         decoder_cell, decoder_initial_state = self.build_decoder_cell(
             encoder_outputs, encoder_final_state, self.hidden_size,
             self.cell_type, self.layer_size)
         # 输出层投影
         decoder_output_projection = layers.Dense(
             self.decoder_vocab_size,
             dtype=tf.float32,
             use_bias=False,
             kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1),
             name='decoder_output_projection')
         if self.mode == 'train':
             # 训练模式
             decoder_inputs_embdedded = tf.nn.embedding_lookup(
                 self.decoder_embeddings, self.decoder_inputs_train)
             training_helper = TrainingHelper(
                 inputs=decoder_inputs_embdedded,
                 sequence_length=self.decoder_inputs_length,
                 name='training_helper')
             training_decoder = BasicDecoder(decoder_cell, training_helper,
                                             decoder_initial_state,
                                             decoder_output_projection)
             max_decoder_length = tf.reduce_max(self.decoder_inputs_length)
             training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                 training_decoder, maximum_iterations=max_decoder_length)
             self.masks = tf.sequence_mask(self.decoder_inputs_length,
                                           maxlen=max_decoder_length,
                                           dtype=tf.float32,
                                           name='masks')
             self.loss = tf.contrib.seq2seq.sequence_loss(
                 logits=training_decoder_output.rnn_output,
                 targets=self.decoder_inputs,
                 weights=self.masks,
                 average_across_timesteps=True,
                 average_across_batch=True)
         else:
             # 预测模式
             start_token = [DataUnit.START_INDEX] * self.batch_size
             end_token = DataUnit.END_INDEX
             inference_decoder = BeamSearchDecoder(
                 cell=decoder_cell,
                 embedding=lambda x: tf.nn.embedding_lookup(
                     self.decoder_embeddings, x),
                 start_tokens=start_token,
                 end_token=end_token,
                 initial_state=decoder_initial_state,
                 beam_width=self.beam_width,
                 output_layer=decoder_output_projection)
             inference_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                 inference_decoder, maximum_iterations=self.max_decode_step)
             self.decoder_pred_decode = inference_decoder_output.predicted_ids
             self.decoder_pred_decode = tf.transpose(
                 self.decoder_pred_decode, perm=[0, 2, 1])
Ejemplo n.º 5
0
    def build_train_decoder(self):
        print('Building train decoder...')

        ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probability,
                time_major=False,
                name='teacher_forcing_training_helper'
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                time_major=False,
                name='training_helper'
            )

        training_decoder = BasicDecoder(
            cell=self.decoder_cell,
            helper=training_helper,
            initial_state=self.decoder_initial_state,
            output_layer=self.output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length
        )

        self.decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
                #This is the weighted cross-entropy loss for a sequence of logits.
                #Param:
                    #logits: [batch_size, sequence_length, num_decoder_symbols].
                    #        The logits is the prediction across all classes at each timestep.
                    #targets: [batch_size, sequence_length], representing true class at each time step
                    #weights: [batch_size, sequence_length], This is the weighting of each prediction in the sequence. 
      
        self.loss = sequence_loss(
            logits=self.decoder_logits_train,
            targets=self.decoder_targets,
            weights=self.mask
        )

         # summary
        tf.summary.scalar('loss', self.loss) #Outputs a Summary protocol buffer containing a single scalar value.
        self.summary_op = tf.summary.merge_all() #Merges all summaries collected in the default graph.

        self.build_optimizer()
Ejemplo n.º 6
0
def decoder(x, decoder_inputs, keep_prob, sequence_length, memory,
            memory_length, first_attention):
    with tf.variable_scope("Decoder") as scope:
        label_embeddings = tf.get_variable(name="embeddings",
                                           shape=[n_classes, embedding_size],
                                           dtype=tf.float32)
        train_inputs_embedded = tf.nn.embedding_lookup(label_embeddings,
                                                       decoder_inputs)
        lstm = rnn.LayerNormBasicLSTMCell(n_hidden,
                                          dropout_keep_prob=keep_prob)
        output_l = layers_core.Dense(n_classes, use_bias=True)
        encoder_state = rnn.LSTMStateTuple(x, x)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=memory,
            memory_sequence_length=memory_length)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=train_batch_size)
        cell_state = cell_state.clone(cell_state=encoder_state,
                                      attention=first_attention)
        train_helper = TrainingHelper(train_inputs_embedded, sequence_length)
        train_decoder = BasicDecoder(cell,
                                     train_helper,
                                     cell_state,
                                     output_layer=output_l)
        decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
            train_decoder, impute_finished=True)
        tiled_inputs = tile_batch(memory, multiplier=beam_width)
        tiled_sequence_length = tile_batch(memory_length,
                                           multiplier=beam_width)
        tiled_first_attention = tile_batch(first_attention,
                                           multiplier=beam_width)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=tiled_inputs,
            memory_sequence_length=tiled_sequence_length)
        x2 = tile_batch(x, beam_width)
        encoder_state2 = rnn.LSTMStateTuple(x2, x2)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=test_batch_size * beam_width)
        cell_state = cell_state.clone(cell_state=encoder_state2,
                                      attention=tiled_first_attention)
        infer_decoder = BeamSearchDecoder(cell,
                                          embedding=label_embeddings,
                                          start_tokens=[GO] * test_len,
                                          end_token=EOS,
                                          initial_state=cell_state,
                                          beam_width=beam_width,
                                          output_layer=output_l)
        decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode(
            infer_decoder, maximum_iterations=4)
        return decoder_outputs_train, decoder_outputs_infer, decoder_state_infer
Ejemplo n.º 7
0
def training_decoding_layer(decoding_embed_input, en_len, decoding_cell,
                            initial_state, op_layer, v_size, max_en_len):
    helper = TrainingHelper(inputs=decoding_embed_input,
                            sequence_length=en_len,
                            time_major=False)
    dec = BasicDecoder(decoding_cell, helper, initial_state, op_layer)
    logits, _, _ = dynamic_decode(dec,
                                  output_time_major=False,
                                  impute_finished=True,
                                  maximum_iterations=max_en_len)
    return logits
Ejemplo n.º 8
0
    def decoder_train(self, decoder_cell, decoder_initial_state, output_layer):
        '''
        创建train的decoder部分
        :param encoder_outputs: encoder的输出
        :param encoder_state: encoder的state
        :return: decoder_logits_train: decoder的predict
        '''
        # tf.strided_slice(data,begin,end,stride):对数据进行跨步切片,起始位置,截止位置,步长,各个维度对应。
        # 这里对真实的输出进行batch_size长的切片操作,-1:后面在每一行最前面加了一个<GO>。
        ending = tf.strided_slice(self.decoder_targets, [0, 0],
                                  [self.batch_size, -1], [1, 1])
        # 每一行最前面加一个<GO>,tf.fill(dim,value),dim:维度,value:值。
        decoder_input = tf.concat(
            [tf.fill([self.batch_size, 1], self.word_to_idx['<GO>']), ending],
            1)
        # 将每一行的句子embeding。
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            self.embedding, decoder_input)

        # TrainingHelper:封装好的训练帮助类。训练时最常用的Helper,下一时刻的输入就是上一时刻的真实值。
        # time_major:是否调换维度,时间步(即max_input_length)是否为第一维。加速训练?
        # False:shape(batch_size,max_input_length,embedding_size),
        # True:shape(max_input_length,batch_size,embedding_size) ,
        training_helper = TrainingHelper(
            inputs=decoder_inputs_embedded,
            sequence_length=self.decoder_targets_length,
            time_major=False,
            name='training_helper')
        # BasicDecoder
        # 参数:
        # cell: 一个 `RNNCell` 实例.
        # helper: 一个 `Helper` 实例.
        # initial_state: 一个 (可能组成一个tulpe)tensors 和 TensorArrays.RNNCell 的初始状态.
        # output_layer: (可选) 一个 `tf.layers.Layer` 实例, 例如:`tf.layers.Dense`. 应用于RNN 输出层之前的可选层,用于存储结果或者采样.
        # Raises:TypeError: 如果 `cell`, `helper` 或 `output_layer` 的类型不正确.
        training_decoder = BasicDecoder(cell=decoder_cell,
                                        helper=training_helper,
                                        initial_state=decoder_initial_state,
                                        output_layer=output_layer)
        # dynamic_decode
        # 参数:
        # decoder: BasicDecoder、BeamSearchDecoder或者自己定义的decoder类对象
        # output_time_major: 见RNN,为真时step*batch_size*...,为假时batch_size*step*...
        # impute_finished: Boolean,为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,在反向传播时忽略最后一个完成步。但是会降低程序运行速度。
        # maximum_iterations: 最大解码步数,一般训练设置为decoder_inputs_length,预测时设置一个想要的最大序列长度即可。程序会在产生<eos>或者到达最大步数处停止。
        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length)
        # TODO:identity作用?
        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)
        return decoder_logits_train
Ejemplo n.º 9
0
    def build_train_decoder(self):
        print('Building train decoder...')

        ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probability,
                time_major=False,
                name='teacher_forcing_training_helper'
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                time_major=False,
                name='training_helper'
            )

        training_decoder = BasicDecoder(
            cell=self.decoder_cell,
            helper=training_helper,
            initial_state=self.decoder_initial_state,
            output_layer=self.output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length
        )

        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
        self.loss = sequence_loss(
            logits=decoder_logits_train,
            targets=self.decoder_targets,
            weights=self.mask
        )

        # summary
        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter('log/train', self.sess.graph)

        self.build_optimizer()
Ejemplo n.º 10
0
    def build_train_decoder(self, decoder_targets, decoder_targets_length, max_target_sequence_length, mask, name):
        ending = tf.strided_slice(decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        decoder_cell, deocder_initial_state = self.build_decoder_cell()
        output_layer = tf.layers.Dense(
            self.vocab_size,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)
        )

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probility,
                time_major=False,
                name='teacher_forcing_training_helper_' + name
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=decoder_targets_length,
                time_major=False,
                name='training_helper_' + name
            )

        training_decoder = BasicDecoder(
            cell=decoder_cell,
            helper=training_helper,
            initial_state=deocder_initial_state,
            output_layer=output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length
        )

        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
        loss = sequence_loss(
            logits=decoder_logits_train,
            targets=decoder_targets,
            weights=mask
        )

        return loss
Ejemplo n.º 11
0
    def _init(self, sequence, targets, authors):
        batch_size = tf.shape(sequence)[0]

        sequence_lengths = tf.cast(tf.count_nonzero(sequence, axis=1), tf.int32)
        embedding = tf.Variable(
            tf.random_normal((self._vocab_size, self._embed_size)),
            name='char_embedding'
        )
        context = tf.Variable(
            tf.random_normal((self._author_size, self._ctx_size)),
            name='ctx_embedding'
        )

        embedded_sequence = tf.nn.embedding_lookup(embedding, sequence)
        embedded_authors = tf.nn.embedding_lookup(context, authors)

        gpu = lambda x: '/gpu:{}'.format(x % self._num_gpu)

        if self._training:
            dropout = lambda x: DropoutWrapper(
                x, 1.0-self._input_dropout, 1.0-self._output_dropout)
            helper = TrainingHelper(embedded_sequence, sequence_lengths)
        else:
            dropout = lambda x: x
            helper = SampleEmbeddingHelper(embedding, sequence[:,0], 2)

        base = lambda x: ContextWrapper(self._cell(x), embedded_authors)
        wrap = lambda i, cell: DeviceWrapper(dropout(cell), gpu(i))
        cells = [wrap(i, base(self._cell_size)) for i in range(self._cell_num)]
        cell = MultiRNNCell(cells)

        init_state = cell.zero_state(batch_size, tf.float32)
        dense = tf.layers.Dense(
            self._vocab_size, self._activation, name='fully_connected'
        )
        decoder = BasicDecoder(cell, helper, init_state, dense)
        output, _, _ = dynamic_decode(decoder, swap_memory=True)
        logits = output.rnn_output

        weights = tf.sequence_mask(sequence_lengths, dtype=tf.float32)
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,
            targets,
            weights
        )

        out = output.sample_id

        return targets, loss, out
Ejemplo n.º 12
0
 def decoder(self, encoder_outputs, encoder_states):
     decoder_cell, decoder_init_state = self.add_decoder_cell(
         encoder_outputs, encoder_states, self.hidden_size, self.cell_type,
         self.num_layers)
     output_proj = tf.layers.Dense(
         self.tgt_vcb_size,
         dtype=tf.float32,
         use_bias=False,
         kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
         name='output_proj')
     if self.mode == 'train':
         target_embedding = tf.nn.embedding_lookup(self.decoder_embeddings,
                                                   self.decoder_input_train)
         training_helper = TrainingHelper(target_embedding,
                                          self.target_len,
                                          name='training_helper')
         training_decoder = BasicDecoder(decoder_cell, training_helper,
                                         decoder_init_state, output_proj)
         max_dec_len = tf.reduce_max(self.target_len)
         output, _, _ = tf.contrib.seq2seq.dynamic_decode(
             training_decoder, maximum_iterations=max_dec_len)
         self.d_masks = tf.sequence_mask(self.target_len,
                                         max_dec_len,
                                         dtype=tf.float32,
                                         name='d_masks')
         self.prob = output.rnn_output
         self.loss = tf.contrib.seq2seq.sequence_loss(
             logits=self.prob,
             targets=self.target,
             weights=self.d_masks,
             average_across_timesteps=True,
             average_across_batch=True)
     else:
         start_token = [DataUnit.START_INDEX] * self.batch_size
         end_token = DataUnit.END_INDEX
         inference_decoder = BeamSearchDecoder(
             cell=decoder_cell,
             embedding=lambda x: tf.nn.embedding_lookup(
                 self.decoder_embeddings, x),
             start_tokens=start_token,
             end_token=end_token,
             initial_state=decoder_init_state,
             beam_width=self.beam_size,
             output_layer=output_proj)
         output, _, _ = tf.contrib.seq2seq.dynamic_decode(
             inference_decoder, maximum_iterations=self.max_decode_step)
         output_pred_ = output.predicted_ids
         self.decoder_output = tf.transpose(output_pred_, perm=[0, 2, 1])
Ejemplo n.º 13
0
def attention_alignment(inputs, input_lengths, memory, memory_lengths, n_layers, n_units,
                        dropout_prob, cell_type=GRUCell, attention_mechanism=BahdanauAttention, is_training=True):
    """Performs alignment over inputs, attending over memory

    Args:
        inputs (tensor):              Input sequence, with the shape of [Batch x seq_length x dimension]
        input_lengths (tensor):       The length of input sequences. Used for dynamic unrolling
        memory (tensor):              Sequence to attend
        memory_lengths (tensor):      The length of memory. Used for dynamic unrolling
        n_layers (int):               Number of layers in RNN
        n_units  (int):               Number of units in RNN
        dropout_prob (float):         Drop out rate for RNN cell
        cell_type (method):           Type of RNN cell, GRU by default
        attention_mechanism (method): Type of attention mechanism, Bahdanau by default
        is_training (bool):           Whether the model is training or testing

    returns:
        (tensor, tensor, tensor):
    """
    # get tensor dimensions
    batch_size, seq_length, dim = inputs.get_shape().as_list()
    # create a attention over the memory
    attention = attention_mechanism(n_units, memory, memory_sequence_length=memory_lengths, dtype=tf.float32)
    # build an encoder RNN over the input sequence
    dropout_prob = 0 if not is_training else dropout_prob
    if n_layers > 1:
        attention_cell = MultiRNNCell([DropoutWrapper(cell_type(n_units), output_keep_prob=1-dropout_prob)
                                       for _ in range(n_layers)])
    else:
        attention_cell = cell_type(n_units)
        attention_cell = DropoutWrapper(attention_cell, output_keep_prob=1-dropout_prob)
    # for each input to the next RNN cell, wire the attention mechanism
    a_cell = AttentionWrapper(attention_cell, attention, alignment_history=True)
    # define the initial state
    # TODO: Do we ever feed an init state?
    attention_state = a_cell.zero_state(batch_size, dtype=tf.float32)
    # read input while attending over memory
    helper = TrainingHelper(inputs=inputs, sequence_length=input_lengths)
    decoder = BasicDecoder(a_cell, helper, attention_state)
    # output of the decoder is a new representation of input sentence with attention over the question
    outputs, states, _ = tf.contrib.seq2seq.dynamic_decode(decoder, maximum_iterations=seq_length, impute_finished=True)
    outputs = tf.pad(outputs.rnn_output, [[0, 0], [0, seq_length - tf.reduce_max(input_lengths)], [0, 0]])
    outputs = tf.reshape(outputs, [batch_size, seq_length, dim])
    # attention matrix for visualizing heatmap
    aligned = tf.transpose(states.alignment_history.stack(), [1, 0, 2])
    return outputs, states, aligned
Ejemplo n.º 14
0
    def decode(self, cell_dec, enc_final_state, output_size, output_embed_matrix, training, grammar_helper=None):
        linear_layer = tf_core_layers.Dense(output_size, use_bias=False)

        go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start
        if training:
            output_ids_with_go = tf.concat([tf.expand_dims(go_vector, axis=1), self.output_placeholder], axis=1)
            outputs = tf.nn.embedding_lookup([output_embed_matrix], output_ids_with_go)
            helper = TrainingHelper(outputs, self.output_length_placeholder+1)
        else:
            helper = GreedyEmbeddingHelper(output_embed_matrix, go_vector, self.config.grammar.end)
        
        if self.config.use_grammar_constraints:
            decoder = GrammarBasicDecoder(self.config.grammar, cell_dec, helper, enc_final_state, output_layer = linear_layer, training_output = self.output_placeholder if training else None,
                                          grammar_helper=grammar_helper)
        else:
            decoder = BasicDecoder(cell_dec, helper, enc_final_state, output_layer = linear_layer)

        final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True, maximum_iterations=self.max_length)
        
        return final_outputs
def training_decoding_layer(decoding_embed_input, en_len, decoding_cell,
                            encoding_op, encoding_st, op_layer, v_size, fr_len,
                            max_en_len):
    with variable_scope.variable_scope(
            "decoder", initializer=init_ops.constant_initializer(0.1)) as vs:

        print("args:", args)
        if (args.attention_architecture is not None):
            decoding_cell, encoding_st = create_attention(
                decoding_cell, encoding_op, encoding_st, fr_len)

        helper = TrainingHelper(inputs=decoding_embed_input,
                                sequence_length=en_len,
                                time_major=False)
        dec = BasicDecoder(decoding_cell, helper, encoding_st, op_layer)
        logits, _, _ = dynamic_decode(dec,
                                      output_time_major=False,
                                      impute_finished=True,
                                      maximum_iterations=max_en_len)
        return logits
Ejemplo n.º 16
0
    def decoder_ops(self, decoder_emb_inp, encoder_outputs, encoder_state,
                    hparams):
        """

        :param decoder_emb_inp:
        :return:
        """
        decoder_cell = self._build_cell(self.cell_type, self.num_units,
                                        self.num_layers)
        helper = TrainingHelper(decoder_emb_inp,
                                self.target_seq_length,
                                time_major=True)
        decoder = BasicDecoder(decoder_cell,
                               helper,
                               encoder_state,
                               output_layer=project_layer)

        # 动态 decoding
        outputs, _ = dynamic_decode((decoder))
        logits = outputs.rnn_output
        core.Dense()
Ejemplo n.º 17
0
    def build_mmi_decoder(self):
        with tf.name_scope('mmi_scorer'):
            training_helper = TrainingHelper(
                inputs=self.inputs_dense,
                sequence_length=self.inputs_length,
                time_major=False,
                name='mmi_training_helper')
            with tf.name_scope('mmi_basic_decoder'):
                training_decoder = MMIDecoder(cell=self.cell,
                                              helper=training_helper,
                                              initial_state=self.initial_state,
                                              output_layer=self.output_layer)
            with tf.name_scope('mmi_dynamic_decoder'):
                (outputs, self.last_state,
                 self.outputs_length) = seq2seq.dynamic_decode(
                     decoder=training_decoder,
                     output_time_major=False,
                     impute_finished=True,
                     maximum_iterations=self.inputs_max_length)

            self.scores_raw = tf.identity(
                tf.transpose(outputs.scores, [1, 2, 0]))
            targets = self.features["targets"]
            targets = tf.cast(targets, dtype=tf.int32)
            target_len = tf.cast(tf.count_nonzero(
                targets - self.vocab.end_token_id, -1),
                                 dtype=tf.int32)
            max_target_len = tf.reduce_max(target_len)
            pruned_targets = tf.slice(targets, [0, 0], [-1, max_target_len])

            index = (tf.range(0, max_target_len, 1)) * \
                tf.ones(shape=[self.batch_size, 1], dtype=tf.int32)
            row_no = tf.transpose(
                tf.range(0, self.batch_size, 1) *
                tf.ones(shape=(max_target_len, 1), dtype=tf.int32))
            indices = tf.stack([index, pruned_targets, row_no], axis=2)

            # Retrieve scores corresponding to indices
            batch_scores = tf.gather_nd(self.scores_raw, indices)
            self.mmi_scores = tf.reduce_sum(batch_scores, axis=1)
Ejemplo n.º 18
0
 def build_train_decoder(self):
     with tf.name_scope('train_decoder'):
         training_helper = TrainingHelper(
             inputs=self.inputs_dense,
             sequence_length=self.inputs_length,
             time_major=False,
             name='training_helper')
         with tf.name_scope('basic_decoder'):
             training_decoder = BasicDecoder(
                 cell=self.cell,
                 helper=training_helper,
                 initial_state=self.initial_state,
                 output_layer=self.output_layer)
         with tf.name_scope('dynamic_decode'):
             (outputs, self.last_state,
              self.outputs_length) = (seq2seq.dynamic_decode(
                  decoder=training_decoder,
                  output_time_major=False,
                  impute_finished=True,
                  maximum_iterations=self.inputs_max_length))
             self.logits = tf.identity(outputs.rnn_output)
             self.log_probs = tf.nn.log_softmax(self.logits)
             self.gs_hypotheses = tf.argmax(self.log_probs, -1)
Ejemplo n.º 19
0
def model_fn(features, labels, mode, params):
    embedding_encoder = tf.get_variable('embedding_encoder',
                                        shape=(params.vocab_size,
                                               params.emb_size))
    table = lookup_ops.index_to_string_table_from_file(params.word_vocab_file)

    question_emb = tf.nn.embedding_lookup(embedding_encoder,
                                          features['question_words'])
    passage_emb = tf.nn.embedding_lookup(embedding_encoder,
                                         features['passage_words'])

    question_words_length = features['question_length']
    passage_words_length = features['passage_length']

    answer_start, answer_end = features['answer_start'], features['answer_end']
    answer_start = tf.concat([tf.expand_dims(answer_start, -1)] * 50, -1)
    answer_end = tf.concat([tf.expand_dims(answer_end, -1)] * 50, -1)

    with tf.variable_scope('passage_encoding'):
        passage_enc, (_, passage_bw_state) = biGRU(tf.concat(
            [passage_emb, answer_start, answer_end], -1),
                                                   passage_words_length,
                                                   params,
                                                   layers=params.layers)

    with tf.variable_scope('question_encoding'):
        question_enc, (_, question_bw_state) = biGRU(question_emb,
                                                     question_words_length,
                                                     params,
                                                     layers=params.layers)

    # output_enc = masked_concat(question_enc, passage_enc, question_words_length, passage_words_length)

    decoder_state_layer = Dense(params.units,
                                activation=tf.tanh,
                                use_bias=True,
                                name='decoder_state_init')
    decoder_init_state = tuple(
        decoder_state_layer(
            tf.concat([passage_bw_state[i], question_bw_state[i]], -1))
        for i in range(params.layers))

    question_att = BahdanauAttention(
        params.units,
        question_enc,
        memory_sequence_length=question_words_length)
    passage_att = BahdanauAttention(
        params.units, passage_enc, memory_sequence_length=passage_words_length)

    decoder_cell = AttentionWrapper(MultiRNNCell(
        [GRUCell(params.units) for _ in range(params.layers)]),
                                    [question_att, passage_att],
                                    initial_cell_state=decoder_init_state)

    batch_size = params.batch_size  # if mode != tf.estimator.ModeKeys.PREDICT else 1

    if mode == tf.estimator.ModeKeys.TRAIN:
        answer_emb = tf.nn.embedding_lookup(embedding_encoder,
                                            features['answer_words'])
        helper = TrainingHelper(answer_emb, features['answer_length'])
    else:
        helper = GreedyEmbeddingHelper(
            embedding_encoder, tf.fill([batch_size], params.tgt_sos_id),
            params.tgt_eos_id)

    projection_layer = Dense(params.vocab_size, use_bias=False)

    decoder = SNetDecoder(decoder_cell,
                          helper,
                          decoder_cell.zero_state(batch_size, tf.float32),
                          output_layer=projection_layer,
                          params=params)

    outputs, _, outputs_length = dynamic_decode(
        decoder, maximum_iterations=params.answer_max_words)
    logits = outputs.rnn_output

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'answer': table.lookup(tf.cast(outputs.sample_id, tf.int64))
        }
        export_outputs = {
            'prediction': tf.estimator.export.PredictOutput(predictions)
        }

        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)

    # logits = tf.Print(logits, [outputs.sample_id, labels], summarize=1000)

    labels = tf.stop_gradient(labels[:, :tf.reduce_max(outputs_length)])

    crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                              logits=logits)
    target_weights = tf.sequence_mask(outputs_length, dtype=logits.dtype)
    loss = tf.reduce_sum(crossent * target_weights) / params.batch_size

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdadeltaOptimizer(learning_rate=1)
        global_step = tf.train.get_or_create_global_step()

        grads = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads)
        capped_grads, _ = tf.clip_by_global_norm(gradients, params.grad_clip)
        train_op = optimizer.apply_gradients(zip(capped_grads, variables),
                                             global_step=global_step)

        return EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
        )

    if mode == tf.estimator.ModeKeys.EVAL:
        return EstimatorSpec(mode,
                             loss=loss,
                             eval_metric_ops={
                                 'rouge-l':
                                 rouge_l(outputs.sample_id, labels,
                                         outputs_length,
                                         features['answer_length'], params,
                                         table),
                             })
Ejemplo n.º 20
0
    def build_model(self):
        print('building model... ...')
        with tf.variable_scope('seq2seq_placeholder'):
            self.encoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="encoder_inputs")
            self.decoder_inputs = tf.placeholder(tf.int32, [None, None],
                                                 name="decoder_inputs")
            self.decoder_targets = tf.placeholder(tf.int32, [None, None],
                                                  name="decoder_targets")
            self.decoder_targets_masks = tf.placeholder(tf.bool, [None, None],
                                                        name="mask")
            self.encoder_length = tf.placeholder(tf.int32, [None],
                                                 name="encoder_length")
            self.decoder_length = tf.placeholder(tf.int32, [None],
                                                 name="decoder_length")
            # ECM placeholder
            self.choice_qs = tf.placeholder(tf.float32, [None, None],
                                            name="choice")
            self.emo_cat = tf.placeholder(tf.int32, [None],
                                          name="emotion_category")
            self.max_target_sequence_length = tf.reduce_max(
                self.decoder_length, name='max_target_len')

        with tf.variable_scope('seq2seq_embedding'):
            self.embedding = self.init_embedding(self.vocab_size,
                                                 self.embedding_size)
            # create emotion category embeddings
            emo_initializer = tf.contrib.layers.xavier_initializer()
            emo_cat_embeddings = tf.get_variable(
                "emo_cat_embeddings",
                [self.num_emotion, self.emo_cat_emb_size],
                initializer=emo_initializer,
                dtype=tf.float32)
            self.emo_internal_memory_embedding = tf.get_variable(
                "emo_internal_memory_embedding",
                [self.num_emotion, self.emo_internal_memory_units],
                initializer=emo_initializer,
                dtype=tf.float32)
            self.emo_cat_embs = tf.nn.embedding_lookup(emo_cat_embeddings,
                                                       self.emo_cat)

        with tf.variable_scope('seq2seq_encoder'):
            encoder_outputs, encoder_states = build_encoder(
                self.embedding,
                self.encoder_inputs,
                self.encoder_length,
                self.enc_num_layers,
                self.enc_num_units,
                self.enc_cell_type,
                bidir=self.enc_bidir)

        with tf.variable_scope('seq2seq_decoder'):
            encoder_length = self.encoder_length
            emo_cat = self.emo_cat
            emo_cat_embs = self.emo_cat_embs
            if self.beam_search:
                print("use beamsearch decoding..")
                encoder_outputs = tile_batch(encoder_outputs,
                                             multiplier=self.beam_size)
                encoder_states = tile_batch(encoder_states,
                                            multiplier=self.beam_size)
                encoder_length = tile_batch(encoder_length,
                                            multiplier=self.beam_size)
                emo_cat = tile_batch(emo_cat, multiplier=self.beam_size)
                emo_cat_embs = tile_batch(emo_cat_embs,
                                          multiplier=self.beam_size)

            attention_mechanism = BahdanauAttention(
                num_units=self.attn_num_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_length)

            decoder_cell = create_rnn_cell(self.dec_num_layers,
                                           self.dec_num_units,
                                           self.dec_cell_type)

            self.read_g = tf.layers.Dense(self.emo_internal_memory_units,
                                          use_bias=False,
                                          name="internal_read_gate")

            self.write_g = tf.layers.Dense(self.emo_internal_memory_units,
                                           use_bias=False,
                                           name="internal_write_gate")

            decoder_cell = ECMWrapper(
                cell=decoder_cell,
                attention_mechanism=attention_mechanism,
                emo_cat_embs=emo_cat_embs,  # emotion category embedding
                emo_cat=emo_cat,  # emotion category
                emo_internal_memory_units=self.
                emo_internal_memory_units,  # emotion memory size
                emo_internal_memory_embedding=self.
                emo_internal_memory_embedding,  # num of emotions
                read_gate=self.read_g,
                write_gate=self.write_g,
                attention_layer_size=self.dec_num_units,
                name='ECMWrapper')

            batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size

            decoder_initial_state = decoder_cell.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=encoder_states)

            output_layer = tf.layers.Dense(
                self.vocab_size, use_bias=False,
                name='output_projection')  # 普通词典projection

            # ECM external memory module
            emo_output_layer = tf.layers.Dense(
                self.vocab_size, use_bias=False,
                name="emo_output_projection")  # 情感词典projection

            emo_choice_layer = tf.layers.Dense(
                1, use_bias=False,
                name="emo_choice_alpha")  # 选择情感词概率projection

            if self.mode == 'train':
                decoder_inputs_embedded = tf.nn.embedding_lookup(
                    self.embedding, self.decoder_inputs)
                # training helper的作用就是决定下一个时序的decoder的输入为给定的decoder inputs, 而不是上一个时刻的输出
                training_helper = TrainingHelper(
                    inputs=decoder_inputs_embedded,
                    sequence_length=self.decoder_length,
                    name='training_helper')

                training_decoder = BasicDecoder(
                    cell=decoder_cell,
                    helper=training_helper,
                    initial_state=decoder_initial_state)

                self.decoder_outputs, self.final_state, self.final_sequence_length = dynamic_decode(
                    decoder=training_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)

                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs.rnn_output)

                with tf.variable_scope('decoder'):
                    self.generic_logits = output_layer(
                        self.decoder_logits_train)  # 得到普通词的概率分布logits
                    self.emo_ext_logits = emo_output_layer(
                        self.decoder_logits_train)  # 得到情感词的概率分布logits
                    self.alphas = tf.nn.sigmoid(
                        emo_choice_layer(
                            self.decoder_logits_train))  # 得到选择情感词的概率
                    self.int_M_emo = self.final_state.internal_memory  # internal_memory的最终状态

                g_probs = tf.nn.softmax(
                    self.generic_logits) * (1 - self.alphas)
                e_probs = tf.nn.softmax(self.emo_ext_logits) * self.alphas
                train_log_probs = tf.log(g_probs + e_probs)

                # compute losses
                self.alphas = tf.squeeze(self.alphas, axis=-1)

                self.g_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.generic_logits,
                    labels=self.decoder_targets) - tf.log(1 - self.alphas)

                self.e_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.emo_ext_logits,
                    labels=self.decoder_targets) - tf.log(self.alphas)

                losses = self.g_losses * (
                    1 - self.choice_qs) + self.e_losses * self.choice_qs

                # alpha and internal memory regularizations
                self.alpha_reg = tf.reduce_mean(self.choice_qs *
                                                -tf.log(self.alphas))
                self.int_mem_reg = tf.reduce_mean(
                    tf.norm(self.int_M_emo + 1e-7, axis=1))
                losses = tf.boolean_mask(losses, self.decoder_targets_masks)
                self.loss = tf.reduce_mean(
                    losses) + self.alpha_reg + self.int_mem_reg

                # prepare for perlexity computations
                CE = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=train_log_probs, labels=self.decoder_targets)
                CE = tf.boolean_mask(CE, self.decoder_targets_masks)
                self.CE = tf.reduce_mean(CE)

                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                trainable_params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, trainable_params)
                clip_gradients, _ = tf.clip_by_global_norm(
                    gradients, self.max_gradient_norm)
                self.train_op = optimizer.apply_gradients(
                    zip(clip_gradients, trainable_params))

            elif self.mode == 'infer':
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * SOS_ID
                end_token = EOS_ID

                inference_decoder = ECMBeamSearchDecoder(
                    cell=decoder_cell,
                    embedding=self.embedding,
                    start_tokens=start_tokens,
                    end_token=end_token,
                    initial_state=decoder_initial_state,
                    beam_width=self.beam_size,
                    output_layer=output_layer,
                    emo_output_layer=emo_output_layer,
                    emo_choice_layer=emo_choice_layer)

                decoder_outputs, _, _ = dynamic_decode(
                    decoder=inference_decoder,
                    maximum_iterations=self.infer_max_iter)

                infer_outputs = decoder_outputs.predicted_ids  # [batch_size, decoder_targets_length, beam_size]
                self.infer_outputs = tf.transpose(
                    infer_outputs, [0, 2, 1], name='infer_outputs'
                )  # [batch_size, beam_size, decoder_targets_length]

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=self.max_to_keep)
Ejemplo n.º 21
0
    def construct(self):
        self.saved_session_name = os.path.join(self.tmp_folder, self.uuid_code)
        self.input_data = tf.placeholder(tf.float32,
                                         [None, None, self.input_dim])
        self.output_data = tf.placeholder(tf.float32,
                                          [None, None, self.output_dim])
        self.start_tokens = tf.placeholder(tf.float32, [None, self.output_dim])
        self.go_tokens = tf.placeholder(tf.float32, [None, 1, self.output_dim])
        self.sequence_length = tf.placeholder(tf.int32, [None])
        self.mask = tf.placeholder(tf.float32, [None, None])
        self.target_sequence_length = tf.placeholder(
            tf.int32, (None, ), name='target_sequence_length')
        self.max_target_sequence_length = tf.reduce_max(
            self.target_sequence_length, name='max_target_len')
        self.source_sequence_length = tf.placeholder(
            tf.int32, (None, ), name='source_sequence_length')
        self.x_stopping = np.full((self.stop_pad_length, self.input_dim),
                                  self.stop_pad_token,
                                  dtype=np.float32)
        self.y_stopping = np.full((self.stop_pad_length, self.output_dim),
                                  self.stop_pad_token,
                                  dtype=np.float32)
        self.learning_rate = tf.placeholder(tf.float32)
        self.batch_size = tf.placeholder(tf.float32)

        enc_cell = make_cell(self.layer_sizes, self.keep_prob)

        # We want to train the decoder to learn the stopping point as well,
        # so the sequence lengths is extended for both the decoder and the encoder
        # logic: the encoder will learn that the stopping token is the signal that the input is finished
        #        the decoder will learn to produce the stopping token to match the expected output
        #        the inferer will learn to produce the stopping token for us to recognise that and stop inferring
        self.source_sequence_length_padded = self.source_sequence_length + self.stop_pad_length
        self.target_sequence_length_padded = self.target_sequence_length + self.stop_pad_length
        max_target_sequence_length_padded = self.max_target_sequence_length + self.stop_pad_length

        _, self.enc_state = dynamic_rnn(
            enc_cell,
            self.input_data,
            sequence_length=self.source_sequence_length_padded,
            dtype=tf.float32,
            time_major=False,
            swap_memory=True)
        self.enc_state_centre = self.enc_state[-1]

        if self.symmetric:
            self.enc_state = self.enc_state[::-1]
            dec_cell = make_cell(self.layer_sizes[::-1], self.keep_prob)
        else:
            dec_cell = make_cell(self.layer_sizes, self.keep_prob)

        # 3. Dense layer to translate the decoder's output at each time
        # step into a choice from the target vocabulary
        projection_layer = tf.layers.Dense(
            units=self.output_dim,
            # kernel_initializer=tf.initializers.he_normal(),
            # kernel_regularizer=regularizer,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        # 4. Set up a training decoder and an inference decoder
        # Training Decoder
        with tf.variable_scope("decode"):
            # During PREDICT mode, the output data is none so we can't have a training model.
            # Helper for the training process. Used by BasicDecoder to read inputs.
            dec_input = tf.concat([self.go_tokens, self.output_data], 1)
            training_helper = TrainingHelper(
                inputs=dec_input,
                sequence_length=self.target_sequence_length_padded,
                time_major=False)

            # Basic decoder
            training_decoder = BasicDecoder(dec_cell, training_helper,
                                            self.enc_state, projection_layer)

            # Perform dynamic decoding using the decoder
            self.training_decoder_output\
                = dynamic_decode(training_decoder,
                                 # True because we're using variable length sequences, which have finish points
                                 impute_finished=True,
                                 maximum_iterations=max_target_sequence_length_padded)[0]
        # 5. Inference Decoder
        # Reuses the same parameters trained by the training process
        with tf.variable_scope("decode", reuse=True):

            def end_fn(time_step_value):
                # Ideally, the inferer should produce the stopping token
                # Which can be assessed as being equal to the modelled stop token, and this should be return:
                # return tf.reduce_all(tf.equal(time_step_value, self.y_stopping))

                # However due to the nature of training, the produced stop token will never be exactly the same
                # as the modelled one. If we use an embedded layer, then this top token can be learned
                # however as we are not using the embedded layer, this function should return False
                # meaning there is no early stop
                return False

            inference_helper = InferenceHelper(sample_fn=lambda x: x,
                                               sample_shape=[self.output_dim],
                                               sample_dtype=dtypes.float32,
                                               start_inputs=self.start_tokens,
                                               end_fn=end_fn)

            # Basic decoder
            inference_decoder = BasicDecoder(dec_cell, inference_helper,
                                             self.enc_state, projection_layer)

            # Perform dynamic decoding using the decoder
            self.inference_decoder_output = dynamic_decode(
                inference_decoder,
                # True because we're using variable length sequences, which have finish points
                impute_finished=True,
                maximum_iterations=max_target_sequence_length_padded)[0]
Ejemplo n.º 22
0
    def __init__(self,
                 vocab_size,
                 learning_rate,
                 encoder_size,
                 max_length,
                 embedding_size,
                 sos_token,
                 eos_token,
                 unk_token,
                 beam_size=5):
        self.vocab_size = vocab_size
        self.lr = learning_rate
        self.encoder_size = encoder_size
        self.max_length = max_length
        self.embedding_size = embedding_size
        self.SOS_token = sos_token
        self.EOS_token = eos_token
        self.UNK_token = unk_token
        self.beam_search_size = beam_size
        with tf.variable_scope('placeholder_and_embedding'):
            self.query = tf.placeholder(shape=(None, None), dtype=tf.int32)
            self.query_length = tf.placeholder(shape=(None, ), dtype=tf.int32)
            self.reply = tf.placeholder(shape=(None, None), dtype=tf.int32)
            self.reply_length = tf.placeholder(shape=(None, ), dtype=tf.int32)
            self.decoder_inputs = tf.placeholder(shape=(None, None),
                                                 dtype=tf.int32)
            self.decoder_target = tf.placeholder(shape=(None, None),
                                                 dtype=tf.int32)
            self.decoder_length = tf.placeholder(shape=(None, ),
                                                 dtype=tf.int32)
            self.batch_size = tf.placeholder(shape=(), dtype=tf.int32)
            self.embedding_pl = tf.placeholder(dtype=tf.float32,
                                               shape=(self.vocab_size,
                                                      embedding_size),
                                               name='embedding_source_pl')
            word_embedding = tf.get_variable(name='word_embedding',
                                             shape=(self.vocab_size,
                                                    embedding_size),
                                             dtype=tf.float32,
                                             trainable=True)
            self.init_embedding = word_embedding.assign(self.embedding_pl)
            self.max_target_sequence_length = tf.reduce_max(
                self.decoder_length, name='max_target_len')
            self.mask = tf.sequence_mask(self.decoder_length,
                                         self.max_target_sequence_length,
                                         dtype=tf.float32,
                                         name='masks')

        with tf.variable_scope("query_encoder"):
            self.query_encoder = deep_components.gru_encoder(
                word_embedding, self.encoder_size)
            query_out, query_state = self.query_encoder(
                seq_index=self.query, seq_len=self.query_length)
        with tf.variable_scope("reply_encoder"):
            self.reply_encoder = deep_components.gru_encoder(
                word_embedding, self.encoder_size)
            reply_out, reply_state = self.reply_encoder(
                seq_index=self.reply, seq_len=self.reply_length)
        with tf.variable_scope("decoder"):
            combined_encoder_state = tf.concat([query_state, reply_state],
                                               axis=1)
            tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
                combined_encoder_state, multiplier=self.beam_search_size)
            tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
                query_out, multiplier=self.beam_search_size)
            tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
                self.query_length, multiplier=self.beam_search_size)
            decoder_cell = deep_components.AttentionGRUCell(
                memory=tiled_encoder_outputs,
                memory_size=self.encoder_size,
                attention_size=self.encoder_size,
                embedding_dims=self.embedding_size,
                rnn_units=self.encoder_size * 2)
            '''decoder_gru = GRUCell(self.encoder_size * 2)
            attention_mechanism = BahdanauAttention(
                num_units=self.encoder_size,
                memory=tiled_encoder_outputs,
                memory_sequence_length=tiled_sequence_length)
            attention_cell = AttentionWrapper(decoder_gru, attention_mechanism,
                                              attention_layer_size=self.encoder_size)
            decoder_initial_state_beam = attention_cell.zero_state(
                dtype=tf.float32, batch_size=tf.cast(self.batch_size * self.beam_search_size,dtype=tf.int32)).clone(
                cell_state=tiled_encoder_final_state)'''
            #############################
            #attention_cell=decoder_gru
            #decoder_initial_state_beam = tiled_encoder_final_state
            ##############################
            decode_out_layer = tf.layers.Dense(self.vocab_size,
                                               name='output_layer',
                                               _reuse=tf.AUTO_REUSE)
        with tf.variable_scope("seq2seq-train"):
            # train
            self.tiled_d_in = tile_batch(self.decoder_inputs,
                                         multiplier=self.beam_search_size)
            self.tiled_d_tgt = tile_batch(self.decoder_target,
                                          multiplier=self.beam_search_size)
            train_helper = TrainingHelper(
                tf.contrib.seq2seq.tile_batch(
                    tf.nn.embedding_lookup(word_embedding,
                                           self.decoder_inputs),
                    multiplier=self.beam_search_size),
                sequence_length=tile_batch(self.decoder_length,
                                           multiplier=self.beam_search_size),
                name="train_helper")
            train_decoder = BasicDecoder(
                decoder_cell,
                train_helper,
                initial_state=tiled_encoder_final_state,
                output_layer=decode_out_layer)
            self.dec_output, _, self.gen_len = dynamic_decode(
                train_decoder,
                impute_finished=True,
                maximum_iterations=self.max_target_sequence_length)
            #self.gen_max_len=tf.reduce_max(self.gen_len)
            #self.padding=tf.zeros(shape=(self.batch_size,self.max_length-self.gen_max_len,self.vocab_size),dtype=tf.float32)
            #self.padding=tile_batch(self.padding,multiplier=self.beam_search_size)
            self.dec_logits = tf.identity(self.dec_output.rnn_output)
            #self.dec_logits = tf.concat((self.dec_logits,self.padding),axis=1)
            self.decoder_target_mask = tile_batch(
                self.mask, multiplier=self.beam_search_size)
            self.cost = sequence_loss(
                self.dec_logits,
                tile_batch(self.decoder_target,
                           multiplier=self.beam_search_size),
                self.decoder_target_mask)
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.lr).minimize(self.cost)
        with tf.variable_scope("seq2seq_beam_search_generate"):
            start_tokens = tf.ones([
                self.batch_size,
            ], tf.int32) * self.SOS_token
            beam_infer_decoder = BeamSearchDecoder(
                decoder_cell,
                embedding=word_embedding,
                end_token=self.EOS_token,
                start_tokens=start_tokens,
                initial_state=tiled_encoder_final_state,
                beam_width=self.beam_search_size,
                output_layer=decode_out_layer)
            self.bs_outputs, _, _ = dynamic_decode(
                beam_infer_decoder, maximum_iterations=self.max_length)
        with tf.variable_scope("greedy_generate"):
            decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=word_embedding,
                start_tokens=start_tokens,
                end_token=self.EOS_token)
            inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=decoder_cell,
                helper=decoding_helper,
                initial_state=tiled_encoder_final_state,
                output_layer=decode_out_layer)
            self.greedy_outputs, _, _ = dynamic_decode(
                inference_decoder, maximum_iterations=self.max_length)
Ejemplo n.º 23
0
    def build_decoder(self, phase):
        print("building decoder and attention..")
        with tf.variable_scope('decoder'):
            # Building decoder_cell and decoder_initial_state
            decoder_cells, decoder_initial_state = self.build_decoder_cell()

            # Initialize decoder embeddings to have variance=1.
            initializer = tf.random_uniform_initializer(-sqrt(3),
                                                        sqrt(3),
                                                        dtype=tf.float32)

            self.decoder_embeddings = tf.get_variable(
                name='embedding',
                shape=(self.config.decoder_symbols_num,
                       self.config.embedding_size),
                initializer=initializer,
                dtype=tf.float32)

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            input_layer = Dense(self.config.hidden_units,
                                dtype=tf.float32,
                                name='input_projection')

            # Output projection layer to convert cell_outputs to logits
            output_layer = Dense(self.config.decoder_symbols_num,
                                 name='output_projection')

            if phase == 'train':
                # decoder_inputs_embedded: [batch_size, max_time_step + 1, embedding_size]
                decoder_inputs_embedded = embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Embedded inputs having gone through input projection layer
                decoder_inputs_embedded = input_layer(decoder_inputs_embedded)

                # Helper to feed inputs for training: read inputs from dense ground truth vectors
                training_helper = TrainingHelper(
                    inputs=decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length_train,
                    time_major=False,
                    name='training_helper')

                training_decoder = BasicDecoder(
                    cell=decoder_cells,
                    helper=training_helper,
                    initial_state=decoder_initial_state,
                    output_layer=output_layer)

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length_train)

                # decoder_outputs_train: BasicDecoderOutput
                #                        namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output: [batch_size, max_time_step + 1, num_decoder_symbols] if output_time_major=False
                #                                   [max_time_step + 1, batch_size, num_decoder_symbols] if output_time_major=True
                # decoder_outputs_train.sample_id: [batch_size], tf.int32
                self.decoder_outputs_train, self.decoder_last_state_train, \
                self.decoder_outputs_length_train = dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length)

                # More efficient to do the projection on the batch-time-concatenated tensor
                # logits_train: (batch_size, max_time_step + 1, num_decoder_symbols)
                # self.decoder_logits_train = output_layer(self.decoder_outputs_train.rnn_output)
                self.decoder_logits_train = tf.identity(
                    self.decoder_outputs_train.rnn_output)

                # Use argmax to extract decoder symbols to emit
                self.decoder_pred_train = tf.argmax(self.decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # masks: masking for valid and padded time steps, (batch_size, max_time_step + 1)
                masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length_train,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks')

                # Computes per word average cross-entropy over a batch
                # Internally calls 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default
                self.loss = sequence_loss(logits=self.decoder_logits_train,
                                          targets=self.decoder_targets_train,
                                          weights=masks,
                                          average_across_timesteps=True,
                                          average_across_batch=True)

                # Training summary for the current batch_loss
                tf.summary.scalar('loss', self.loss)

                # Contruct graphs for minimizing loss
                self.build_optimizer()

            elif phase == 'decode':

                # Start_tokens: [batch_size,] `int32` vector
                start_tokens = tf.ones(
                    (self.batch_size, ), tf.int32) * self.config._GO
                end_token = self.config._EOS

                def embed_and_input_proj(inputs):
                    return input_layer(
                        tf.nn.embedding_lookup(self.decoder_embeddings,
                                               inputs))

                # Helper to feed inputs for greedy decoding: uses the argmax of the output
                decoding_helper = GreedyEmbeddingHelper(
                    start_tokens=start_tokens,
                    end_token=end_token,
                    embedding=embed_and_input_proj)

                # Basic decoder performs greedy decoding at each time step
                inference_decoder = BasicDecoder(
                    cell=decoder_cells,
                    helper=decoding_helper,
                    initial_state=decoder_initial_state,
                    output_layer=output_layer)

                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #                         namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output: [batch_size, max_time_step, num_decoder_symbols] 	if output_time_major=False
                #                                    [max_time_step, batch_size, num_decoder_symbols] 	if output_time_major=True
                # decoder_outputs_decode.sample_id: [batch_size, max_time_step], tf.int32		if output_time_major=False
                #                                   [max_time_step, batch_size], tf.int32               if output_time_major=True

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #                         namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids: [batch_size, max_time_step, beam_width] if output_time_major=False
                #                                       [max_time_step, batch_size, beam_width] if output_time_major=True
                # decoder_outputs_decode.beam_search_decoder_output: BeamSearchDecoderOutput instance
                #                                                    namedtuple(scores, predicted_ids, parent_ids)

                self.decoder_outputs_decode, self.decoder_last_state_decode, \
                self.decoder_outputs_length_decode = dynamic_decode(
                    decoder=inference_decoder,
                    output_time_major=False,
                    # impute_finished=True,	# error occurs??
                    maximum_iterations=self.config.max_decode_step)

                # decoder_outputs_decode.sample_id: [batch_size, max_time_step]
                # Or use argmax to find decoder symbols to emit:
                # self.decoder_pred_decode = tf.argmax(self.decoder_outputs_decode.rnn_output,
                #                                      axis=-1, name='decoder_pred_decode')

                # Here, we use expand_dims to be compatible with the result of the beamsearch decoder
                # decoder_pred_decode: [batch_size, max_time_step, 1] (output_major=False)
                self.decoder_pred_decode = tf.expand_dims(
                    self.decoder_outputs_decode.sample_id, -1)
Ejemplo n.º 24
0
    def build_train_graph(self, train_examples):
        """
        Building train graph with train examples
        :param train_examples: Examples from train data
        :return: Predicted outputs, parameters of generator, decoder cell, attention zero state
        """

        # Unpack subject, content and answers and corresponding length
        subject, len_subject, content, len_content, target_input, target_output, len_target = train_examples

        # Choose best answer per question
        target_input = target_input[:, 0, :]
        target_output = target_output[:, 0, :]
        len_target = tf.to_int32(len_target[:, 0])

        # Look up word vectors for decoder input
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            self.embeddings_english, target_input)

        # Concat subject and content to feed it into encoder
        sub_cont_concat_op = tf.map_fn(
            self.concat_seqs, [subject, len_subject, content, len_content])[0]
        len_both = len_subject + len_content

        # Load inital graph twice, one for train and another for output with beam decoder
        decoder_cell, attn_zero_state = self.build_initial_graph(
            sub_cont_concat_op, len_both)
        decoder_cell_beam, attn_zero_state_beam = self.build_initial_graph(
            sub_cont_concat_op,
            len_both,
            reuse=True,
            beam_width=self.beam_width)

        # Make train decoder
        helper = TrainingHelper(decoder_inputs_embedded,
                                len_target,
                                time_major=False)
        decoder = BasicDecoder(decoder_cell,
                               helper,
                               attn_zero_state,
                               output_layer=self.projection_layer)

        # Make beam search decoder
        beam_search_decoder = BeamSearchDecoder(
            decoder_cell_beam,
            self.embeddings_english,
            self.start_token,
            self.end_token,
            attn_zero_state_beam,
            self.beam_width,
            output_layer=self.projection_layer)

        # Define variable scope train decoder to initialize the train decoder and beam search decoder
        # with dynamic decode
        with tf.variable_scope("train_decoder"):
            final_outputs, final_state, final_seq_len = dynamic_decode(
                decoder, output_time_major=False)
        with tf.variable_scope("train_decoder", reuse=True):
            beam_outputs, _, beam_out_len = dynamic_decode(
                beam_search_decoder,
                output_time_major=False,
                maximum_iterations=self.max_seq_len)

        # Output of train decoder
        final_outputs_max_len = tf.shape(final_outputs.sample_id)[1]
        target_output = target_output[:, :final_outputs_max_len]

        # Output of beam search decoder
        beam_outputs = tf.transpose(beam_outputs.predicted_ids, [2, 0, 1])
        beam_outputs = tf.reshape(beam_outputs[0, :, :], [self.batch_size, -1])
        beam_out_len = tf.transpose(beam_out_len)
        beam_out_len = tf.reshape(beam_out_len[0, :], [-1])

        # Get generator parameters
        generator_params = [
            param for param in tf.trainable_variables()
            if "discriminator" not in param.name
        ]

        return target_output, final_outputs, final_seq_len, generator_params, decoder_cell, attn_zero_state, beam_outputs, beam_out_len
Ejemplo n.º 25
0
    def _build_model(self):
        with tf.variable_scope("embeddings"):
            self.source_embs = tf.get_variable(
                name="source_embs",
                shape=[self.cfg.source_vocab_size, self.cfg.emb_dim],
                dtype=tf.float32,
                trainable=True)
            self.target_embs = tf.get_variable(
                name="embeddings",
                shape=[self.cfg.vocab_size, self.cfg.emb_dim],
                dtype=tf.float32,
                trainable=True)
            source_emb = tf.nn.embedding_lookup(self.source_embs,
                                                self.enc_source)
            target_emb = tf.nn.embedding_lookup(self.target_embs,
                                                self.dec_target_in)
            print("source embedding shape: {}".format(
                source_emb.get_shape().as_list()))
            print("target input embedding shape: {}".format(
                target_emb.get_shape().as_list()))

        with tf.variable_scope("encoder"):
            if self.cfg.use_bi_rnn:
                with tf.variable_scope("bi-directional_rnn"):
                    cell_fw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    cell_bw = GRUCell(self.cfg.num_units) if self.cfg.cell_type == "gru" else \
                        LSTMCell(self.cfg.num_units)
                    bi_outputs, _ = bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        source_emb,
                        dtype=tf.float32,
                        sequence_length=self.enc_seq_len)
                    source_emb = tf.concat(bi_outputs, axis=-1)
                    print("bi-directional rnn output shape: {}".format(
                        source_emb.get_shape().as_list()))
            input_project = tf.layers.Dense(units=self.cfg.num_units,
                                            dtype=tf.float32,
                                            name="input_projection")
            source_emb = input_project(source_emb)
            print("encoder input projection shape: {}".format(
                source_emb.get_shape().as_list()))
            enc_cells = self._create_encoder_cell()
            self.enc_outputs, self.enc_states = dynamic_rnn(
                enc_cells,
                source_emb,
                sequence_length=self.enc_seq_len,
                dtype=tf.float32)
            print("encoder output shape: {}".format(
                self.enc_outputs.get_shape().as_list()))

        with tf.variable_scope("decoder"):
            self.max_dec_seq_len = tf.reduce_max(self.dec_seq_len,
                                                 name="max_dec_seq_len")
            self.dec_cells, self.dec_init_states = self._create_decoder_cell()
            # define input and output projection layer
            input_project = tf.layers.Dense(units=self.cfg.num_units,
                                            name="input_projection")
            self.dense_layer = tf.layers.Dense(units=self.cfg.vocab_size,
                                               name="output_projection")
            if self.mode == "train":  # either "train" or "decode"
                # for training
                target_emb = input_project(target_emb)
                train_helper = TrainingHelper(target_emb,
                                              sequence_length=self.dec_seq_len,
                                              name="train_helper")
                train_decoder = BasicDecoder(
                    self.dec_cells,
                    helper=train_helper,
                    output_layer=self.dense_layer,
                    initial_state=self.dec_init_states)
                self.dec_output, _, _ = dynamic_decode(
                    train_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_dec_seq_len)
                print("decoder output shape: {} (vocab size)".format(
                    self.dec_output.rnn_output.get_shape().as_list()))

                # for decode
                start_token = tf.ones(
                    shape=[
                        self.batch_size,
                    ], dtype=tf.int32) * self.cfg.target_dict[GO]
                end_token = self.cfg.target_dict[EOS]

                def inputs_project(inputs):
                    return input_project(
                        tf.nn.embedding_lookup(self.target_embs, inputs))

                dec_helper = GreedyEmbeddingHelper(embedding=inputs_project,
                                                   start_tokens=start_token,
                                                   end_token=end_token)
                infer_decoder = BasicDecoder(
                    self.dec_cells,
                    helper=dec_helper,
                    initial_state=self.dec_init_states,
                    output_layer=self.dense_layer)
                infer_dec_output, _, _ = dynamic_decode(
                    infer_decoder,
                    maximum_iterations=self.cfg.maximum_iterations)
                self.dec_predicts = infer_dec_output.sample_id
Ejemplo n.º 26
0
    def buildModel(self):
        T_in = self.args.T_in
        T_out = self.args.T_out
        D_in = self.args.D_in
        D_out = self.args.D_out
        E = self.args.embedding_dim
        H = self.args.hidden_dim
        SOS = self.args.SOS
        EOS = self.args.EOS
        PAD = self.args.PAD
        beam_width = 3

        # Input
        with tf.name_scope('input'):
            x = tf.placeholder(shape=(None, T_in),
                               dtype=tf.int32,
                               name='encoder_inputs')
            # N, T_out
            y = tf.placeholder(shape=(None, T_out),
                               dtype=tf.int32,
                               name='decoder_inputs')
            # N
            x_len = tf.placeholder(shape=(None, ), dtype=tf.int32)
            # N
            y_len = tf.placeholder(shape=(None, ), dtype=tf.int32)
            # dynamic sample num
            batch_size = tf.shape(x)[0]

            # symbol mask
            sos = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * SOS
            eos = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * EOS
            pad = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * PAD

            # input mask
            x_mask = tf.sequence_mask(x_len, T_in, dtype=tf.float32)
            y_with_sos_mask = tf.sequence_mask(y_len,
                                               T_out + 1,
                                               dtype=tf.float32)
            y_with_pad = tf.concat([y, pad], axis=1)
            eos_mask = tf.one_hot(y_len, depth=T_out + 1, dtype=tf.int32) * EOS

            # masked inputs
            y_with_eos = y_with_pad + eos_mask
            y_with_sos = tf.concat([sos, y], axis=1)

        ## Embedding
        with tf.name_scope('embedding'):
            if self.args.use_pretrained:
                embedding_pretrained = np.fromfile(self.args.pretrained_file,
                                                   dtype=np.float32).reshape(
                                                       (-1, E))
                embedding = tf.Variable(embedding_pretrained, trainable=False)
            else:
                embedding = tf.get_variable(name='embedding',
                                            shape=(D_in, E),
                                            dtype=tf.float32,
                                            initializer=xavier_initializer())
            e_x = tf.nn.embedding_lookup(embedding, x)
            e_y = tf.nn.embedding_lookup(embedding, y_with_sos)
            if self.args.mode == 'train':
                e_x = tf.nn.dropout(e_x, self.args.keep_prob)

        ## Encoder
        with tf.name_scope('encoder'):
            ## Multi-BiLSTM
            fw_cell = rnn.MultiRNNCell([
                rnn.BasicLSTMCell(num_units=H)
                for i in range(self.args.layer_size)
            ])
            bw_cell = rnn.MultiRNNCell([
                rnn.BasicLSTMCell(num_units=H)
                for i in range(self.args.layer_size)
            ])
            bi_encoder_output, bi_encoder_state = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                e_x,
                sequence_length=x_len,
                dtype=tf.float32,
                time_major=False,
                scope=None)
            encoder_output = bi_encoder_output[0] + bi_encoder_output[1]
            encoder_final_state = bi_encoder_state[0]

        ## Decoder
        with tf.name_scope('decoder'):
            decoder_cell = rnn.MultiRNNCell([
                rnn.BasicLSTMCell(num_units=H)
                for i in range(self.args.layer_size)
            ])
            decoder_lengths = tf.ones(shape=[batch_size],
                                      dtype=tf.int32) * (T_out + 1)

            ## Trainning decoder
            with tf.variable_scope('attention'):
                attention_mechanism = LuongAttention(
                    num_units=H,
                    memory=encoder_output,
                    memory_sequence_length=x_len,
                    name='attention_fn')
            projection_layer = Dense(units=D_out,
                                     kernel_initializer=xavier_initializer())

            train_decoder_cell = AttentionWrapper(
                cell=decoder_cell,
                attention_mechanism=attention_mechanism,
                attention_layer_size=H)
            train_decoder_init_state = train_decoder_cell.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=encoder_final_state)
            training_helper = TrainingHelper(e_y,
                                             decoder_lengths,
                                             time_major=False)
            train_decoder = BasicDecoder(
                cell=train_decoder_cell,
                helper=training_helper,
                initial_state=train_decoder_init_state,
                output_layer=projection_layer)
            train_decoder_outputs, _, _ = dynamic_decode(
                train_decoder,
                impute_finished=True,
                maximum_iterations=T_out + 1)
            # N, T_out+1, D_out
            train_decoder_outputs = ln(train_decoder_outputs.rnn_output)

            ## Beam_search decoder
            beam_memory = tile_batch(encoder_output, beam_width)
            beam_memory_state = tile_batch(encoder_final_state, beam_width)
            beam_memory_length = tile_batch(x_len, beam_width)

            with tf.variable_scope('attention', reuse=True):
                beam_attention_mechanism = LuongAttention(
                    num_units=H,
                    memory=beam_memory,
                    memory_sequence_length=beam_memory_length,
                    name='attention_fn')
            beam_decoder_cell = AttentionWrapper(
                cell=decoder_cell,
                attention_mechanism=beam_attention_mechanism,
                attention_layer_size=None)
            beam_decoder_init_state = beam_decoder_cell.zero_state(
                batch_size=batch_size * beam_width,
                dtype=tf.float32).clone(cell_state=beam_memory_state)
            start_tokens = tf.ones((batch_size), dtype=tf.int32) * SOS
            beam_decoder = BeamSearchDecoder(
                cell=beam_decoder_cell,
                embedding=embedding,
                start_tokens=start_tokens,
                end_token=EOS,
                initial_state=beam_decoder_init_state,
                beam_width=beam_width,
                output_layer=projection_layer)
            beam_decoder_outputs, _, _ = dynamic_decode(
                beam_decoder,
                scope=tf.get_variable_scope(),
                maximum_iterations=T_out + 1)
            beam_decoder_result_ids = beam_decoder_outputs.predicted_ids

        with tf.name_scope('loss'):
            logits = tf.nn.softmax(train_decoder_outputs)
            cross_entropy = tf.keras.losses.sparse_categorical_crossentropy(
                y_with_eos, logits)
            loss_mask = tf.sequence_mask(y_len + 1,
                                         T_out + 1,
                                         dtype=tf.float32)
            loss = tf.reduce_sum(cross_entropy * loss_mask) / tf.cast(
                batch_size, dtype=tf.float32)
            prediction = tf.argmax(logits, 2)

        ## train_op
        with tf.name_scope('train'):
            global_step = tf.train.get_or_create_global_step()
            lr = noam_scheme(self.args.lr, global_step, self.args.warmup_steps)
            optimizer = tf.train.AdamOptimizer(lr)

            ## gradient clips
            trainable_params = tf.trainable_variables()
            gradients = tf.gradients(loss, trainable_params)
            clip_gradients, _ = tf.clip_by_global_norm(
                gradients, self.args.gradient_clip_num)
            train_op = optimizer.apply_gradients(zip(clip_gradients,
                                                     trainable_params),
                                                 global_step=global_step)

        # Summary
        with tf.name_scope('summary'):
            tf.summary.scalar('lr', lr)
            tf.summary.scalar('loss', loss)
            tf.summary.scalar('global_step', global_step)
            summaries = tf.summary.merge_all()
        return x, y, x_len, y_len, logits, loss, prediction, beam_decoder_result_ids, global_step, train_op, summaries
Ejemplo n.º 27
0
    def Tensor_Generate(self):
        placeholder_Dict = self.pattern_Feeder.placeholder_Dict

        with tf.variable_scope('speaker_Embedding') as scope:
            batch_Size = tf.shape(placeholder_Dict["Mel"])[0]

            input_Activation = tf.layers.dense(
                inputs=placeholder_Dict["Mel"],
                units=speaker_Embedding_Parameters.embedding_Size)

            rnn_Cell = MultiRNNCell(cells=[
                ResidualWrapper(
                    MultiRNNCell(cells=[
                        ResidualWrapper(
                            LSTMCell(num_units=768,
                                     num_proj=speaker_Embedding_Parameters.
                                     embedding_Size,
                                     activation=tf.nn.tanh)),
                        ResidualWrapper(
                            LSTMCell(num_units=768,
                                     num_proj=speaker_Embedding_Parameters.
                                     embedding_Size,
                                     activation=tf.nn.tanh)),
                    ])),
                LSTMCell(num_units=768,
                         num_proj=speaker_Embedding_Parameters.embedding_Size,
                         activation=tf.nn.tanh),
            ])

            helper = TrainingHelper(
                inputs=input_Activation,
                sequence_length=placeholder_Dict["Mel_Length"],
                time_major=False)

            decoder_Initial_State = rnn_Cell.zero_state(batch_size=batch_Size,
                                                        dtype=tf.float32)

            final_Outputs, final_States, final_Sequence_Lengths = dynamic_decode(
                decoder=BasicDecoder(rnn_Cell, helper, decoder_Initial_State),
                maximum_iterations=speaker_Embedding_Parameters.
                pattern_Frame_Range[1],
            )

            #hidden_Activation = tf.nn.sigmoid(final_Outputs.rnn_output[:, -1, :]);
            hidden_Activation = final_Outputs.rnn_output[:, -1, :]
            embedding_Activation = tf.nn.l2_normalize(hidden_Activation,
                                                      axis=1)

            self.averaged_Embedding_Tensor = tf.reduce_mean(
                embedding_Activation, axis=0)
            #For single wav

            if not self.is_Training:
                self.tf_Session.run(tf.global_variables_initializer())
                return

        #Back-prob.
        with tf.variable_scope('training_Loss') as scope:
            speaker_Size = tf.cast(
                batch_Size /
                speaker_Embedding_Parameters.batch_Pattern_per_Speaker,
                tf.int32)

            reshaped_Embedding_Activation = tf.reshape(
                embedding_Activation,
                shape=(
                    speaker_Size,
                    speaker_Embedding_Parameters.batch_Pattern_per_Speaker,
                    speaker_Embedding_Parameters.embedding_Size,
                )  #[speaker, pattern_per_Speaker, embedding]
            )

            centroid_for_Within = (
                tf.tile(
                    tf.reduce_sum(reshaped_Embedding_Activation,
                                  axis=1,
                                  keepdims=True),  #[speaker, 1, embedding]
                    multiples=[
                        1, speaker_Embedding_Parameters.
                        batch_Pattern_per_Speaker, 1
                    ]  #[speaker, pattern_per_Speaker, embedding]
                ) - reshaped_Embedding_Activation) / (
                    speaker_Embedding_Parameters.batch_Pattern_per_Speaker - 1
                )  #[speaker, pattern_per_Speaker, embedding]
            centroid_for_Between = tf.reduce_mean(
                reshaped_Embedding_Activation, axis=1)  #[speaker, embedding]

            cosine_Similarity_Weight = tf.Variable(
                10.0, name='cosine_Similarity_Weight', trainable=True)
            cosine_Similarity_Bias = tf.Variable(-5.0,
                                                 name='cosine_Similarity_Bias',
                                                 trainable=True)

            within_Cosine_Similarity = cosine_Similarity_Weight * Cosine_Similarity(
                reshaped_Embedding_Activation, centroid_for_Within
            ) - cosine_Similarity_Bias  #[speaker, pattern_per_Speaker]

            between_Cosine_Similarity_Filter = 1 - tf.tile(
                tf.expand_dims(tf.eye(speaker_Size), axis=1),
                multiples=[
                    1, speaker_Embedding_Parameters.batch_Pattern_per_Speaker,
                    1
                ])  #[speaker, pattern_per_Speaker, Speaker]
            between_Cosine_Similarity = tf.reshape(
                cosine_Similarity_Weight * Cosine_Similarity2D(
                    embedding_Activation, centroid_for_Between) -
                cosine_Similarity_Bias,  #[speaker * pattern_per_Speaker, speaker]
                shape=(
                    speaker_Size,
                    speaker_Embedding_Parameters.batch_Pattern_per_Speaker,
                    speaker_Size,
                ))  #[speaker, pattern_per_Speaker, Speaker]

            between_Cosine_Similarity = tf.reshape(
                tf.boolean_mask(between_Cosine_Similarity,
                                between_Cosine_Similarity_Filter),
                shape=(
                    speaker_Size,
                    speaker_Embedding_Parameters.batch_Pattern_per_Speaker,
                    speaker_Size - 1,
                )
            )  #[speaker, pattern_per_Speaker, Speaker - 1]     Same speaker of first dimension was removed at last dimension.

            ##softmax_Loss = within_Cosine_Similarity - tf.log(tf.reduce_sum(tf.exp(tf.concat([tf.expand_dims(within_Cosine_Similarity, axis=2), between_Cosine_Similarity], axis=2)), axis = 2));
            softmax_Loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=tf.concat([
                    tf.expand_dims(within_Cosine_Similarity, axis=2),
                    between_Cosine_Similarity
                ],
                                 axis=2),
                labels=tf.zeros(shape=(
                    speaker_Size,
                    speaker_Embedding_Parameters.batch_Pattern_per_Speaker),
                                dtype=tf.int32))  #Almost same

            contrast_Loss = 1 - tf.nn.sigmoid(
                within_Cosine_Similarity) + tf.reduce_max(
                    between_Cosine_Similarity, axis=2)

            if speaker_Embedding_Parameters.loss_Method.upper(
            ) == "Softmax".upper():
                loss = tf.reduce_mean(softmax_Loss)
            elif speaker_Embedding_Parameters.loss_Method.upper(
            ) == "Contrast".upper():
                loss = tf.reduce_mean(contrast_Loss)

            global_Step = tf.Variable(0, name='global_Step', trainable=False)

            #Noam decay of learning rate
            step = tf.cast(global_Step + 1, dtype=tf.float32)
            warmup_Steps = 4000.0
            learning_Rate = speaker_Embedding_Parameters.learning_Rate * warmup_Steps**0.5 * tf.minimum(
                step * warmup_Steps**-1.5, step**-0.5)

            #Weight update. We use the ADAM optimizer
            optimizer = tf.train.AdamOptimizer(learning_Rate)
            gradients, variables = zip(*optimizer.compute_gradients(loss))
            clipped_Gradients, global_Norm = tf.clip_by_global_norm(
                gradients, 1.0)
            optimize = optimizer.apply_gradients(zip(clipped_Gradients,
                                                     variables),
                                                 global_step=global_Step)

        self.training_Tensor_List = [
            global_Step, learning_Rate, loss, optimize
        ]
        self.test_Tensor_List = [global_Step, embedding_Activation]

        if not os.path.exists(speaker_Embedding_Parameters.extract_Path +
                              "/Summary"):
            os.makedirs(speaker_Embedding_Parameters.extract_Path + "/Summary")
        graph_Writer = tf.summary.FileWriter(
            speaker_Embedding_Parameters.extract_Path + "/Summary",
            self.tf_Session.graph)
        graph_Writer.close()
        self.tf_Session.run(tf.global_variables_initializer())
Ejemplo n.º 28
0
    def __init__(self,
                 inputs,
                 targets,
                 src_vocab_size,
                 src_max_length,
                 tgt_vocab_size,
                 tgt_max_length,
                 emb_dim,
                 num_units,
                 batch_size,
                 eos_token,
                 is_train,
                 share_embeddings=False,
                 teacher_forcing=False):

        xavier = tf.contrib.layers.xavier_initializer
        start_tokens = tf.zeros([batch_size], dtype=tf.int32)
        input_lengths = tf.argmin(tf.abs(inputs - eos_token), axis=-1, output_type=tf.int32)

        target_lengths = tf.argmin(tf.abs(targets - eos_token), axis=-1, output_type=tf.int32)

        input_embedding_table = tf.get_variable("encoder_embedding", [src_vocab_size, emb_dim], initializer=xavier(), dtype=tf.float32)
        input_embedding = tf.nn.embedding_lookup(input_embedding_table, inputs)
        encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units, state_is_tuple=False)
        encoder_cell = tf.nn.rnn_cell.DropoutWrapper(cell=encoder_cell,
                                                     input_keep_prob=0.8,
                                                     output_keep_prob=1.0)

        #   encoder_outputs: [max_time, batch_size, num_units]
        #   encoder_state: [batch_size, num_units]
        (encoder_output,
         encoder_state) = tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
                                                          cell_bw=encoder_cell,
                                                          inputs=input_embedding,
                                                          sequence_length=input_lengths,
                                                          dtype=tf.float32,
                                                          time_major=False)

        encoder_output = tf.concat(encoder_output, axis=2)
        encoder_state = tf.concat([encoder_state[0], encoder_state[1]], axis=1)

        if share_embeddings:
            assert src_vocab_size == tgt_vocab_size
            target_embedding_table = input_embedding_table
        else:
            target_embedding_table = tf.get_variable("decoder_embedding", [src_vocab_size, emb_dim], initializer=xavier(), dtype=tf.float32)

        prefixed_targets = tf.concat([tf.expand_dims(start_tokens, 1), targets], axis=1)
        target_embedding = tf.nn.embedding_lookup(target_embedding_table, prefixed_targets)

        if teacher_forcing:
            helper = TrainingHelper(target_embedding,
                                    target_lengths + 1,
                                    time_major=False)
        else:
            helper = GreedyEmbeddingHelper(target_embedding_table, start_tokens, eos_token)

        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units * 2, state_is_tuple=False)
        projection_layer = tf.layers.Dense(tgt_vocab_size, use_bias=False)

        attention_mechanism = BahdanauAttention(num_units,
                                                encoder_output,
                                                memory_sequence_length=input_lengths)

        decoder_cell = AttentionWrapper(decoder_cell,
                                        attention_mechanism,
                                        attention_layer_size=num_units)
        #decoder_cell = tf.nn.rnn_cell.DropoutWrapper(cell=decoder_cell,
        #                                             input_keep_prob=0.8,
        #                                             output_keep_prob=1.0)

        encoder_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state)
        decoder = BasicDecoder(cell=decoder_cell,
                               helper=helper,
                               initial_state=encoder_state,
                               output_layer=projection_layer)

        decoder_outputs, states, lengths = dynamic_decode(decoder,
                                                          output_time_major=False,
                                                          impute_finished=True,
                                                          maximum_iterations=tgt_max_length)
        unpadded_logits = decoder_outputs.rnn_output
        missing_elems = tgt_max_length - tf.shape(unpadded_logits)[1]
        padding = [[0, 0], [0, missing_elems], [0, 0]]
        logits = tf.pad(unpadded_logits, padding, 'CONSTANT', constant_values=0.)

        weights = tf.sequence_mask(target_lengths + 1, # the "+1" is to include EOS
                                   maxlen=tgt_max_length,
                                   dtype=tf.float32)
        #self.mle_loss = sequence_loss(targets=targets,
        #                              logits=logits,
        #                              weights=weights,
        #                              average_across_batch=True)

        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits)
        mle_loss = (tf.reduce_sum(crossent * weights) / batch_size)
        preds = decoder_outputs.sample_id

        self.preds = preds
        self.logits = logits
        self.mle_loss = mle_loss
Ejemplo n.º 29
0
    def decode_train(self,
                     dec_input_tokens,
                     dec_lengths,
                     init_state,
                     *attention_args,
                     decoder_class=BasicDecoder,
                     decoder_kwoptions={}):
        '''
    <Args>
    - dec_input_tokens:
    - dec_length:
    - init_state:
    - decoder_class:
    - decoder_options:
    '''
        with tf.variable_scope(self.scope or "Decoder") as scope:
            train_cell, init_state = self.setup_decoder_cell(
                self.config, self.keep_prob, False, init_state,
                *attention_args)

            self.input_project = tf.layers.Dense(units=self.config.hidden_size,
                                                 name="input_projection",
                                                 activation=self.activation)

            if hasattr(self.config, 'use_emb_as_out_proj') and \
               self.config.use_emb_as_out_proj == True:
                # Make the dim of decoder's output be hidden_size to emb_size.
                emb_project = tf.layers.Dense(units=self.config.hidden_size,
                                              use_bias=False,
                                              activation=None,
                                              name='emb_projection')
                output_kernel = emb_project(self.embeddings)
                output_kernel = tf.transpose(output_kernel)

                self.output_project = SharedKernelDense(
                    units=shape(self.embeddings, 0),
                    shared_kernel=output_kernel,
                    use_bias=False,
                    activation=None,
                    name='output_projection')
            else:
                self.output_project = tf.layers.Dense(units=shape(
                    self.embeddings, 0),
                                                      name='output_projection',
                                                      use_bias=False,
                                                      activation=None)
            #use_bias=False, trainable=False)
            # self.output_project = tf.layers.Dense(units=shape(self.embeddings, 0),
            #                                       name='output_projection')

            with tf.name_scope('Train'):
                inputs = tf.nn.embedding_lookup(self.embeddings,
                                                dec_input_tokens)
                inputs = self.input_project(inputs)
                inputs = tf.nn.dropout(inputs, self.keep_prob)

                helper = TrainingHelper(inputs,
                                        sequence_length=dec_lengths,
                                        time_major=False)
                train_decoder = decoder_class(train_cell,
                                              helper,
                                              init_state,
                                              output_layer=self.output_project,
                                              **decoder_kwoptions)

                max_dec_len = tf.reduce_max(dec_lengths, name="max_dec_len")
                outputs, final_state, _ = dynamic_decode(
                    train_decoder,
                    impute_finished=True,
                    maximum_iterations=max_dec_len,
                    scope=scope)
                logits = outputs.rnn_output

                # To prevent the training loss to be NaN.
                logits += 1e-9
                logits = tf.clip_by_value(logits,
                                          -20.0,
                                          20.0,
                                          name='clip_logits')
                self.train_decoder = train_decoder

        return logits, final_state
Ejemplo n.º 30
0
 def build_decoder(self, encoder_outputs, encoder_final_state):
     """
     构建完整解码器
     :return:
     """
     with tf.variable_scope("decode"):
         decoder_cell, decoder_initial_state = self.build_decoder_cell(
             encoder_outputs, encoder_final_state, self.hidden_size,
             self.cell_type, self.layer_size)
         # 输出层投影
         decoder_output_projection = layers.Dense(
             self.decoder_vocab_size,
             dtype=tf.float32,
             use_bias=False,
             kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                stddev=0.1),
             name='decoder_output_projection')
         if self.mode == 'train':
             # 训练模式
             decoder_inputs_embdedded = tf.nn.embedding_lookup(
                 self.decoder_embeddings, self.decoder_inputs_train)
             '''
             TrainingHelper用于train阶段,next_inputs方法一样也接收outputs与sample_ids,但是只是从初始化时的inputs返回下一时刻的输入。
             TrainingHelper
             __init__( inputs, sequence_length, time_major=False, name=None )
             - inputs: A (structure of) input tensors.
             - sequence_length: An int32 vector tensor.
             - time_major: Python bool. Whether the tensors in inputs are time major. If False (default), they are assumed to be batch major.
             - name: Name scope for any created operations.
             inputs:对应Decoder框架图中的embedded_input,time_major=False的时候,inputs的shape就是[batch_size, sequence_length, embedding_size] ,time_major=True时,inputs的shape为[sequence_length, batch_size, embedding_size]
             sequence_length:这个文档写的太简略了,不过在源码中可以看出指的是当前batch中每个序列的长度(self._batch_size = array_ops.size(sequence_length))。
             time_major:决定inputs Tensor前两个dim表示的含义
             name:如文档所述
             '''
             training_helper = TrainingHelper(
                 inputs=decoder_inputs_embdedded,
                 sequence_length=self.decoder_inputs_length,
                 name='training_helper')
             '''
             BasicDecoder的作用就是定义一个封装了decoder应该有的功能的实例,根据Helper实例的不同,这个decoder可以实现不同的功能,比如在train的阶段,不把输出重新作为输入,而在inference阶段,将输出接到输入。
             BasicDecoder
             __init__( cell, helper, initial_state, output_layer=None )
             - cell: An RNNCell instance.
             - helper: A Helper instance.
             - initial_state: A (possibly nested tuple of…) tensors and TensorArrays. The initial state of the RNNCell.
             - output_layer: (Optional) An instance of tf.layers.Layer, i.e., tf.layers.Dense. Optional layer to apply to the RNN output prior to storing the result or sampling.
             cell:在这里就是一个多层LSTM的实例,与定义encoder时无异
             helper:这里只是简单说明是一个Helper实例,第一次看文档的时候肯定还不知道这个Helper是什么,不用着急,看到具体的Helper实例就明白了
             initial_state:encoder的final state,类型要一致,也就是说如果encoder的final state是tuple类型(如LSTM的包含了cell state与hidden state),那么这里的输入也必须是tuple。直接将encoder的final_state作为这个参数输入即可
             output_layer:对应的就是框架图中的Dense_Layer,只不过文档里写tf.layers.Dense,但是tf.layers下只有dense方法,Dense的实例还需要from tensorflow.python.layers.core import Dense。
             '''
             training_decoder = BasicDecoder(decoder_cell, training_helper,
                                             decoder_initial_state,
                                             decoder_output_projection)
             max_decoder_length = tf.reduce_max(self.decoder_inputs_length)
             '''
             首先tf.contrib.seq2seq.dynamic_decode主要作用是接收一个Decoder类,然后依据Encoder进行解码,实现序列的生成(映射)。
             其中,这个函数主要的一个思想是一步一步地调用Decoder的step函数(该函数接收当前的输入和隐层状态会生成下一个词),实现最后的一句话的生成。该函数类似tf.nn.dynamic_rnn。
             '''
             training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                 training_decoder, maximum_iterations=max_decoder_length)
             '''
             tf.sequence_mask函数返回的一个mask张量。经过tf.Session()打印可以得到一个array数据。
             decoder_inputs_length范围内的数据用1填充,[decoder_inputs_length,max_decoder_length]区间用0填充
             '''
             self.masks = tf.sequence_mask(self.decoder_inputs_length,
                                           maxlen=max_decoder_length,
                                           dtype=tf.float32,
                                           name='masks')
             '''
             tf.contrib.seq2seq.sequence_loss可以直接计算序列的损失函数,重要参数:
             logits:尺寸[batch_size, sequence_length, num_decoder_symbols]
             targets:尺寸[batch_size, sequence_length],不用做one_hot。
             weights:[batch_size, sequence_length],即mask,滤去padding的loss计算,使loss计算更准确。
             '''
             self.loss = tf.contrib.seq2seq.sequence_loss(
                 logits=training_decoder_output.rnn_output,
                 targets=self.decoder_inputs,
                 weights=self.masks,  # mask,滤去padding的loss计算,使loss计算更准确。
                 average_across_timesteps=True,
                 average_across_batch=True)
         else:
             # 预测模式
             start_token = [DataUnit.START_INDEX] * self.batch_size
             end_token = DataUnit.END_INDEX
             '''
             BeamSearchDecoder             
             cell: An RNNCell instance.
             embedding: A callable that takes a vector tensor of ids (argmax ids), or the params argument for embedding_lookup.
             start_tokens: int32 vector shaped [batch_size], the start tokens.
             end_token: int32 scalar, the token that marks end of decoding.
             initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
             beam_width: Python integer, the number of beams.
             output_layer: (Optional) An instance of tf.keras.layers.Layer, i.e., tf.keras.layers.Dense. Optional layer to apply to the RNN output prior to storing the result or sampling.
             length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
             coverage_penalty_weight: Float weight to penalize the coverage of source sentence. Disabled with 0.0.
             reorder_tensor_arrays: If True, TensorArrays' elements within the cell state will be reordered according to the beam search path. 
             If the TensorArray can be reordered, the stacked form will be returned. Otherwise, 
             the TensorArray will be returned as is. Set this flag to False if the cell state contains TensorArrays that are not amenable to reordering.   
             '''
             inference_decoder = BeamSearchDecoder(
                 cell=decoder_cell,
                 embedding=lambda x: tf.nn.embedding_lookup(
                     self.decoder_embeddings, x),
                 start_tokens=start_token,
                 end_token=end_token,
                 initial_state=decoder_initial_state,
                 beam_width=self.beam_width,
                 output_layer=decoder_output_projection)
             '''
             首先tf.contrib.seq2seq.dynamic_decode主要作用是接收一个Decoder类,然后依据Encoder进行解码,实现序列的生成(映射)。
             其中,这个函数主要的一个思想是一步一步地调用Decoder的step函数(该函数接收当前的输入和隐层状态会生成下一个词),实现最后的一句话的生成。该函数类似tf.nn.dynamic_rnn。
              '''
             inference_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                 inference_decoder, maximum_iterations=self.max_decode_step)
             self.decoder_pred_decode = inference_decoder_output.predicted_ids
             self.decoder_pred_decode = tf.transpose(
                 self.decoder_pred_decode, perm=[0, 2, 1])