Ejemplo n.º 1
0
    def build_train_decoder(self):
        print('Building train decoder...')

        ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probability,
                time_major=False,
                name='teacher_forcing_training_helper'
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                time_major=False,
                name='training_helper'
            )

        training_decoder = BasicDecoder(
            cell=self.decoder_cell,
            helper=training_helper,
            initial_state=self.decoder_initial_state,
            output_layer=self.output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length
        )

        self.decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
                #This is the weighted cross-entropy loss for a sequence of logits.
                #Param:
                    #logits: [batch_size, sequence_length, num_decoder_symbols].
                    #        The logits is the prediction across all classes at each timestep.
                    #targets: [batch_size, sequence_length], representing true class at each time step
                    #weights: [batch_size, sequence_length], This is the weighting of each prediction in the sequence. 
      
        self.loss = sequence_loss(
            logits=self.decoder_logits_train,
            targets=self.decoder_targets,
            weights=self.mask
        )

         # summary
        tf.summary.scalar('loss', self.loss) #Outputs a Summary protocol buffer containing a single scalar value.
        self.summary_op = tf.summary.merge_all() #Merges all summaries collected in the default graph.

        self.build_optimizer()
Ejemplo n.º 2
0
    def build_train_decoder(self):
        print('Building train decoder...')

        ending = tf.strided_slice(self.decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probability,
                time_major=False,
                name='teacher_forcing_training_helper'
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=self.decoder_targets_length,
                time_major=False,
                name='training_helper'
            )

        training_decoder = BasicDecoder(
            cell=self.decoder_cell,
            helper=training_helper,
            initial_state=self.decoder_initial_state,
            output_layer=self.output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=self.max_target_sequence_length
        )

        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
        self.loss = sequence_loss(
            logits=decoder_logits_train,
            targets=self.decoder_targets,
            weights=self.mask
        )

        # summary
        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter('log/train', self.sess.graph)

        self.build_optimizer()
Ejemplo n.º 3
0
    def build_train_decoder(self, decoder_targets, decoder_targets_length, max_target_sequence_length, mask, name):
        ending = tf.strided_slice(decoder_targets, [0, 0], [self.batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending], 1)
        decoder_inputs_embedded = tf.nn.embedding_lookup(self.embedding, decoder_input)

        decoder_cell, deocder_initial_state = self.build_decoder_cell()
        output_layer = tf.layers.Dense(
            self.vocab_size,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)
        )

        if self.teacher_forcing:
            training_helper = ScheduledEmbeddingTrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=decoder_targets_length,
                embedding=self.embedding,
                sampling_probability=self.teacher_forcing_probility,
                time_major=False,
                name='teacher_forcing_training_helper_' + name
            )
        else:
            training_helper = TrainingHelper(
                inputs=decoder_inputs_embedded,
                sequence_length=decoder_targets_length,
                time_major=False,
                name='training_helper_' + name
            )

        training_decoder = BasicDecoder(
            cell=decoder_cell,
            helper=training_helper,
            initial_state=deocder_initial_state,
            output_layer=output_layer
        )

        decoder_outputs, _, _ = dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length
        )

        decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # loss
        loss = sequence_loss(
            logits=decoder_logits_train,
            targets=decoder_targets,
            weights=mask
        )

        return loss
Ejemplo n.º 4
0
    def setup_decoder(self):
        output_embed = tf.nn.embedding_lookup([self.embedding],
                                              self.att_label[:, :-1])
        decoder_lengths = tf.tile([self.max_dec_iteration[0] - 1],
                                  [self.batch_size])
        helper = ScheduledEmbeddingTrainingHelper(output_embed,
                                                  decoder_lengths,
                                                  self.embedding, 0.1)

        output_layer = Dense(units=self.vocab_size)
        self.decoder = BasicDecoder(cell=self.cell,
                                    helper=helper,
                                    initial_state=self.cell.zero_state(
                                        dtype=tf.float32,
                                        batch_size=self.batch_size),
                                    output_layer=output_layer)
Ejemplo n.º 5
0
    def build_train_decoder(self):
        print('Building train decoder...')

        # tf.strided_slice(data,begin,end,stride)是一个跨步切片操作,切片区间左闭右开
        # 如果原来并不清楚,我在这里也讲不太清楚,也没找到一个特别好的网站解释,个人建议自己搜索一下
        # 本例中data为decoder_targets,对真实的下一句子进行切片,end中的-1会得到那一维度的最后一个
        # 得到的ending为一个batch中的一行行target句子
        ending = tf.strided_slice(self.decoder_targets, [0, 0],
                                  [self.batch_size, -1], [1, 1])
        # tf.fill(dim,value)的功能是创建一个dim维度,值为value的tensor对象
        # tf.concat(values,axis)的功能是将values在axis维上进行拼接
        # 在本例中,是将每一个target句子的前面加上<GO>
        decoder_input = tf.concat(
            [tf.fill([self.batch_size, 1], self.word_to_id['<GO>']), ending],
            1)
        # tensorflow.nn.embedding_lookup()方法在张量中寻找索引对应的元素
        # 第一个参数是张量,第二个参数是索引
        # 将decoder_inputs中的词语id转换为embedding向量
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            self.embedding, decoder_input)

        # 定义一个Helper,是Decoder的一部分,决定Decoder的输入是什么。
        # 官网给出了下面几种Helper类:
        # "Helper":最基本的抽象类
        # "TrainingHelper":训练过程中最常使用的Helper,下一时刻输入就是上一时刻target的真实值
        # "GreedyEmbeddingHelper":预测阶段最常使用的Helper,下一时刻输入是上一时刻概率最大的单词通过embedding之后的向量
        # "SampleEmbeddingHelper":预测时helper,继承自GreedyEmbeddingHelper,下一时刻输入是上一时刻通过某种概率分布采样而来在经过embedding之后的向量
        # "CustomHelper":最简单的helper,一般用户自定义helper时会基于此,需要用户自己定义如何根据输出得到下一时刻输入
        # "ScheduledEmbeddingTrainingHelper":训练时Helper,继承自TrainingHelper,添加了广义伯努利分布,对id的embedding向量进行sampling
        # "ScheduledOutputTrainingHelper":训练时Helper,继承自TrainingHelper,直接对输出进行采样
        # "InferenceHelper":CustomHelper的特例,只用于预测的helper,也需要用户自定义如何得到下一时刻输入

        if self.teacher_forcing:  # 如果使用teacher_forcing
            training_helper = ScheduledEmbeddingTrainingHelper(  # 定义一个ScheduledEmbeddingTrainingHelper
                inputs=decoder_inputs_embedded,  # decoder的输入
                sequence_length=self.decoder_targets_length,  # 输入的长度
                embedding=self.embedding,  # embedding矩阵
                sampling_probability=self.
                teacher_forcing_probability,  # teacher_forcing中使用target或是output的概率
                # time_major表示是否时间序列为第一维,如果是True,则输入需要是T×B×E,否则,为B×T×E
                # 其中T代表时间序列的长度,B代表batch size。 E代表词向量的维度。
                time_major=False,
                name='teacher_forcing_training_helper')
        else:  # 如果不使用teacher_forcing
            training_helper = TrainingHelper(  # 定义一个TrainingHelper
                inputs=decoder_inputs_embedded,  # decoder的输入
                sequence_length=self.decoder_targets_length,  # 输入的长度
                # time_major表示是否时间序列为第一维,如果是True,则输入需要是T×B×E,否则,为B×T×E
                # 其中T代表时间序列的长度,B代表batch size。 E代表词向量的维度。
                time_major=False,
                name='training_helper')

        training_decoder = BasicDecoder(  # 基础的取样解码器
            cell=self.decoder_cell,  # 使用的RNN网络
            helper=training_helper,  # 使用的helper
            initial_state=self.decoder_initial_state,  # 使用的h0
            output_layer=self.output_layer  # 使用的输出层
        )

        decoder_outputs, _, _ = dynamic_decode(  # 动态解码器
            decoder=training_decoder,  # decoder实例
            # impute_finished为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,
            # 在反向传播时忽略最后一个完成步。但是会降低程序运行速度 TODO:不懂实际用处
            impute_finished=True,
            maximum_iterations=self.
            max_target_sequence_length  # 最大解码步数,这里就设置为最大的target长度
        )

        # 那这里就卖个萌吧,我也不知道为什么要用tf.identity TODO:为什么需要tf.identity()?
        self.decoder_logits_train = tf.identity(decoder_outputs.rnn_output)

        # 定义损失函数
        self.loss = sequence_loss(  # 将损失函数定义为sequence_loss
            logits=self.decoder_logits_train,  # 输出logits
            targets=self.decoder_targets,  # 真实targets
            weights=self.mask  # 即mask,滤去padding的loss计算,使loss计算更准确
        )

        # summary,用于可视化
        tf.summary.scalar('loss', self.loss)
        self.summary_op = tf.summary.merge_all()

        # 进入build_optimizer()
        self.build_optimizer()