Ejemplo n.º 1
0
    def call(self, enc_output, dec_hidden, enc_inp,
             enc_extended_inp, dec_inp, batch_oov_len,
             enc_padding_mask, use_coverage, prev_coverage):
        predictions = []
        attentions = []
        coverages = []
        p_gens = []
        """
        通过调用attention得到decoder第一步所需的context_vector,coverage等值
        your code
        """
        context_vector, attn_dist, coverage_next = self.attention(dec_hidden, 
                                                                  enc_output,
                                                                  enc_padding_mask,
                                                                  use_coverage,
                                                                  prev_coverage)

        for t in range(dec_inp.shape[1]):
            # Teachering Forcing
            dec_x, pred, dec_hidden = self.decoder(tf.expand_dims(dec_inp[:, t], 1),
                                                   dec_hidden,
                                                   enc_output,
                                                   context_vector)
            context_vector, attn_dist, coverage_next = self.attention(dec_hidden,
                                                                      enc_output,
                                                                      enc_padding_mask,
                                                                      use_coverage,
                                                                      coverage_next)
            p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1))
            predictions.append(pred)
            coverages.append(coverage_next)
            attentions.append(attn_dist)
            p_gens.append(p_gen)
        
        """
        调用calc_final_dist函数完成PGN最终预测概率输出
        your code
        """
        final_dists = decoding.calc_final_dist(enc_extended_inp,
                                               predictions,
                                               attentions,
                                               p_gens,
                                               batch_oov_len,
                                               self.params['vocab_size'],
                                               self.params['batch_size'])
        
        # outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=attentions, coverages=coverages)
        if self.params['mode'] == "train":
            outputs = dict(logits=final_dists, dec_hidden=dec_hidden, attentions=attentions, coverages=coverages, p_gens=p_gens)
        else:
            outputs = dict(logits=tf.stack(final_dists, 1),
                           dec_hidden=dec_hidden,
                           attentions=tf.stack(attentions, 1),
                           coverages=tf.stack(coverages, 1),
                           p_gens=tf.stack(p_gens, 1))
        
        return outputs
Ejemplo n.º 2
0
    def call(self, enc_output, dec_hidden, enc_inp, enc_extended_inp, dec_inp,
             batch_oov_len, enc_padding_mask, use_coverage, prev_coverage):
        predictions = []
        attentions = []
        coverages = []
        p_gens = []
        """
        通过调用attention得到decoder第一步所需的context_vector,coverage等值
        """
        context_vector, _, coverage_next = self.attention(
            dec_hidden, enc_output, enc_padding_mask, use_coverage,
            prev_coverage)
        # print(dec_inp.shape)
        for t in range(dec_inp.shape[1]):
            # Teachering Forcing
            dec_x, pred, dec_hidden = self.decoder(
                tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output,
                context_vector)
            context_vector, attn_dist, coverage_next = self.attention(
                dec_hidden, enc_output, enc_padding_mask, use_coverage,
                coverage_next)
            p_gen = self.pointer(context_vector, dec_hidden,
                                 tf.squeeze(dec_x, axis=1))
            predictions.append(pred)  # (seq_len,batch_size, vocab)
            coverages.append(coverage_next)
            attentions.append(attn_dist)
            p_gens.append(p_gen)
        """
        调用calc_final_dist函数完成PGN最终预测概率输出
        """
        final_dists = decoding.calc_final_dist(
            _enc_batch_extend_vocab=enc_extended_inp,  # enc序列输入(unk全部被替换了)
            vocab_dists=predictions,
            attn_dists=attentions,
            p_gens=p_gens,
            batch_oov_len=batch_oov_len,
            vocab_size=self.params["vocab_size"],
            batch_size=self.params["batch_size"])
        # print("predictions", len(predictions))
        # print("final_dists", len(final_dists))
        # outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=attentions, coverages=coverages)
        if self.params['mode'] == "train":
            outputs = dict(logits=final_dists,
                           dec_hidden=dec_hidden,
                           attentions=attentions,
                           coverages=coverages,
                           p_gens=p_gens)
        else:
            outputs = dict(
                logits=tf.stack(final_dists, 1),  # 1
                dec_hidden=dec_hidden,
                attentions=tf.stack(attentions, 1),
                coverages=tf.stack(coverages, 1),
                p_gens=tf.stack(p_gens, 1))

        return outputs
Ejemplo n.º 3
0
    def call(self, enc_output, dec_hidden, enc_inp,
             enc_extended_inp, dec_inp, batch_oov_len,
             enc_padding_mask, use_coverage, prev_coverage):
        predictions = []
        attentions = []
        coverages = []
        p_gens = []
        context_vector, attn_dist, coverage_next = self.attention(dec_hidden,
                                                   enc_output,
                                                   enc_padding_mask,
                                                   use_coverage=use_coverage,
                                                   prev_coverage=prev_coverage)

        for t in range(dec_inp.shape[1]):
            # Teachering Forcing 将真实结果导入下一个时间部而不是预测的结果
            dec_x, pred, dec_hidden = self.decoder(tf.expand_dims(dec_inp[:, t], 1),
                                                   dec_hidden,
                                                   enc_output,
                                                   context_vector)
            context_vector, attn_dist, coverage_next = self.attention(dec_hidden,
                                                                      enc_output,
                                                                      enc_padding_mask,
                                                                      use_coverage,
                                                                      coverage_next)
            p_gen = self.pointer(context_vector, dec_hidden, tf.squeeze(dec_x, axis=1))
            predictions.append(pred)
            coverages.append(coverage_next)
            attentions.append(attn_dist)
            p_gens.append(p_gen)

        final_dists = decoding.calc_final_dist(_enc_batch_extend_vocab = enc_extended_inp,
                                              vocab_dists = predictions,
                                              attn_dists = attentions,
                                              p_gens = p_gens,
                                              batch_oov_len = batch_oov_len,
                                              vocab_size = self.params["vocab_size"],
                                              batch_size = self.params["batch_size"])
        
        # outputs = dict(logits=tf.stack(final_dists, 1), dec_hidden=dec_hidden, attentions=attentions, coverages=coverages)
        if self.params['mode'] == "train":
            outputs = dict(logits=final_dists, dec_hidden=dec_hidden, attentions=attentions, coverages=coverages, p_gens=p_gens)
        else:
            outputs = dict(logits=tf.stack(final_dists, 1),
                           dec_hidden=dec_hidden,
                           attentions=tf.stack(attentions, 1),
                           coverages=tf.stack(coverages, 1),
                           p_gens=tf.stack(p_gens, 1))
        
        return outputs
Ejemplo n.º 4
0
    def call(self, inp, tar, training, enc_padding_mask, 
            look_ahead_mask, dec_padding_mask):

        enc_output = self.encoder(inp, training, enc_padding_mask)  # (batch_size, inp_seq_len, d_model)
        
        # dec_output.shape == (batch_size, tar_seq_len, d_model)
        dec_output, attention_weights = self.decoder(tar, enc_output, training, look_ahead_mask, dec_padding_mask)
        
        final_output = self.final_layer(dec_output)  # (batch_size, tar_seq_len, target_vocab_size)
        if self.params["pointer_gen"]:
            final_dists = calc_final_dist(enc_extended_inp,
                                          predictions,
                                          attentions,
                                          p_gens,
                                          batch_oov_len,
                                          self.params["vocab_size"],
                                          self.params["batch_size"])
        outputs = dict(logits=tf.stack(final_dists, 1), attentions=attention_weights)
        return outputs
    def _decode_target(self, enc_output, dec_hidden, enc_extended_inp, dec_inp,
                       batch_oov_len, enc_padding_mask, use_coverage,
                       prev_coverage):
        context_vector, attn_dist, coverage_next = self.attention(
            dec_hidden,  # shape=(16, 256)
            enc_output,  # shape=(16, 200, 256)
            enc_padding_mask,  # (16, 200)
            use_coverage,
            prev_coverage)  # None
        predictions = []
        attentions = []
        coverages = []
        p_gens = []
        for t in range(dec_inp.shape[1]):
            # Teachering Forcing
            dec_x, pred, dec_hidden = self.decoder(
                tf.expand_dims(dec_inp[:, t], 1), dec_hidden, enc_output,
                context_vector)
            context_vector, attn_dist, coverage_next = self.attention(
                dec_hidden, enc_output, enc_padding_mask, use_coverage,
                coverage_next)
            p_gen = self.pointer(context_vector, dec_hidden,
                                 tf.squeeze(dec_x, axis=1))
            predictions.append(pred)
            coverages.append(coverage_next)
            attentions.append(attn_dist)
            p_gens.append(p_gen)

        final_dists = decoding.calc_final_dist(enc_extended_inp, predictions,
                                               attentions, p_gens,
                                               batch_oov_len,
                                               self.params["vocab_size"],
                                               self.params["batch_size"])

        outputs = dict(logits=tf.stack(final_dists, 1),
                       dec_hidden=dec_hidden,
                       attentions=attentions,
                       coverages=coverages)
        return outputs