Exemple #1
0
    def train_step(enc_inp, enc_extended_inp, dec_inp, dec_tar, batch_oov_len,
                   enc_padding_mask, padding_mask):
        # loss = 0
        with tf.GradientTape() as tape:
            enc_output, enc_hidden = model.call_encoder(enc_inp)
            dec_hidden = enc_hidden
            # 训练阶段时参考摘要词依次输入(测试阶段时是上一步的生成词)
            outputs = model(
                enc_output,  # shape=(3, 200, 256)
                dec_hidden,  # shape=(3, 256)
                enc_inp,  # shape=(3, 200)
                enc_extended_inp,  # shape=(3, 200)
                dec_inp,  # shape=(3, 50)
                batch_oov_len,  # shape=()
                enc_padding_mask,  # shape=(3, 200)
                params['is_coverage'],
                prev_coverage=None)
            loss = loss_function(dec_tar, outputs, padding_mask,
                                 params["cov_loss_wt"], params['is_coverage'])

        # variables = model.trainable_variables
        variables = model.encoder.trainable_variables +\
                    model.attention.trainable_variables +\
                    model.decoder.trainable_variables +\
                    model.pointer.trainable_variables
        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))
        return loss
    def train_step(enc_inp, enc_extended_inp, dec_inp, dec_tar, batch_oov_len,
                   enc_padding_mask, padding_mask):
        # loss = 0
        #enc_inp,dec_tar,dec_inp: (16, 200) (16, 40) (16, 40)
        #print("enc_inp,dec_tar,dec_inp:",enc_inp.get_shape(),dec_tar.get_shape(),dec_inp.get_shape())
        with tf.GradientTape() as tape:
            #初始化encoder状态,一堆0
            enc_output, enc_hidden = model.call_encoder(
                enc_inp)  #(16,200,200),(16,200)

            #初始化decoder状态等于encoder状态
            dec_hidden = enc_hidden
            outputs = model(
                enc_output,  # shape=(3, 200, 256)
                dec_hidden,  # shape=(3, 256)
                enc_inp,  # shape=(3, 200)
                enc_extended_inp,  # shape=(3, 200)
                dec_inp,  # shape=(3, 50)
                batch_oov_len,  # shape=()
                enc_padding_mask,  # shape=(3, 200)
                params['is_coverage'],
                prev_coverage=None)
            loss = loss_function(dec_tar, outputs, padding_mask,
                                 params["cov_loss_wt"], params['is_coverage'])

        # variables = model.trainable_variables
        variables = model.encoder.trainable_variables +\
                    model.attention.trainable_variables +\
                    model.decoder.trainable_variables +\
                    model.pointer.trainable_variables
        gradients = tape.gradient(loss, variables)
        #print("gradients",len(gradients),gradients[0].get_shape)#len=epoch(sample_num//batch_size)-1,shape(256, 128+128+128)
        optimizer.apply_gradients(zip(gradients, variables))
        return loss