Ejemplo n.º 1
0
def bleu_score_enc_dec(encoder, decoder, src, tar, batch_size=64):
    n_batches = src.shape[0] // batch_size
    pred = np.zeros((batch_size * n_batches, tar.shape[1]), dtype=np.int32)
    for b, (s, _) in enumerate(nmt_infer_generator(src, tar, batch_size)):
        pred[b * batch_size:(b + 1) * batch_size] = nmt_infer(
            encoder, decoder, s)

    tar = [np.trim_zeros(t, trim='b') for t in tar]
    pred = [np.trim_zeros(p, trim='b') for p in pred]
    return bleu_score(tar, pred, smooth=True)
Ejemplo n.º 2
0
def bleu_score_enc_dec(encoder, decoder, src, tar, batch_size=64):
    '''
    Computes bleu score for the encoder and decoder outputs
    by creating a numpy array of zeros as per the generator of
    NMT inference which basically computes whether the RNNs gave
    out the best translation or not.
     
    '''
    n_batches = src.shape[0] // batch_size
    pred = np.zeros((batch_size * n_batches, tar.shape[1]), dtype=np.int32)
    for b, (s, _) in enumerate(nmt_infer_generator(src, tar, batch_size)):
        pred[b * batch_size:(b + 1) * batch_size] = nmt_infer(
            encoder, decoder, s)

    tar = [np.trim_zeros(t, trim='b') for t in tar]
    pred = [np.trim_zeros(p, trim='b') for p in pred]
    return bleu_score(tar, pred, smooth=True)
Ejemplo n.º 3
0
def get_pairwise_edits(text_before, text_after, tokenizer):

    min_bleu = 0.5
    min_leven = 3

    sen_before = [sen.strip() for sen in tokenizer.tokenize(text_before)]
    sen_after = [sen.strip() for sen in tokenizer.tokenize(text_after)]
    # sen_before = [sent.string.strip() for sent in tokenizer(text_before).sents]
    # sen_after = [sent.string.strip() for sent in tokenizer(text_after).sents]

    w_size = 4
    edits = set()
    for i in range(len(sen_before)):
        start = max(0, i - w_size)
        end = min(i + w_size, len(sen_after))
        nei_bleus = []
        match_idx = []
        prev_sents_tok = word_tokenize(sen_before[i])
        # prev_sents_tok = sen_before[i].split()
        for j in range(start, end):
            post_sents_tok = word_tokenize(sen_after[j])
            # post_sents_tok = sen_after[j].split()
            bleu = bleu_score(prev_sents_tok, post_sents_tok)
            nei_bleus.append(bleu)
            match_idx.append(j)
        if not nei_bleus:
            continue
        max_bleu = max(nei_bleus)
        idx = nei_bleus.index(max_bleu)
        lev_dist = Levenshtein.distance(sen_before[i],
                                        sen_after[match_idx[idx]])
        if max_bleu > min_bleu and max_bleu < 1.0 and lev_dist > min_leven:
            if i == 0:
                context_before = 'NA'
            else:
                context_before = sen_before[i - 1]
            if i == len(sen_before) - 1:
                context_after = 'NA'
            else:
                context_after = sen_before[i + 1]
            edits.add((sen_before[i], sen_after[match_idx[idx]],
                       context_before, context_after))

    return list(edits)
Ejemplo n.º 4
0
    def run(self):
        self.model.eval()

        total_bleu = 0
        total_f1 = 0
        total_dist1 = 0
        total_dist2 = 0
        total_loss = 0

        print('Run eval...')
        with torch.no_grad():
            for batch_idx, feature in enumerate(self.test_iter):
                utils.feature_to_device(feature, self.device)

                out, out_lm = self.model(feature)
                print(self.vocab.itos(out[3, 0].argmax(dim=0).item()),
                      self.vocab.itos(out_lm[3, 0].argmax(dim=0).item()))
                loss, loss_lm = models.AR.loss(self.out_loss_fn, out, out_lm,
                                               feature.resp, feature.lm.y)
                print(loss, loss_lm)
                loss = loss + self.model_config.alpha * loss_lm
                total_loss += loss.item()

                # target include w1, w2...[EOS], len: max_seq_length + 1
                target = copy.deepcopy(feature.resp[1:])
                # feature will be changed
                pred, pred_padded = utils.sample_sequence(
                    feature, self.vocab, self.model, self.args)

                pred_tokens = [[self.vocab.itos(k) for k in ks] for ks in pred]
                target_tokens = [[[self.vocab.itos(k) for k in ks]]
                                 for ks in target.T.tolist()]
                print('----------------------------------')
                print(
                    'Context: ', ''.join([
                        self.vocab.itos(k)
                        for k in feature.context.T.tolist()[0]
                    ]))
                print(
                    'LM x: ', ''.join([
                        self.vocab.itos(k) for k in feature.lm.x.T.tolist()[0]
                    ]))
                print(
                    'LM y: ', ''.join([
                        self.vocab.itos(k) for k in feature.lm.y.T.tolist()[0]
                    ]))
                print(
                    'Pred: ', ''.join([
                        self.vocab.itos(k) for k in pred_padded.T.tolist()[0]
                    ]))
                print('Target: ', ''.join(target_tokens[0][0]))
                print(
                    'Pred: ', ''.join([
                        self.vocab.itos(k) for k in pred_padded.T.tolist()[-1]
                    ]))
                print('Target: ', ''.join(target_tokens[-1][0]))
                print('----------------------------------')
                bleu = metrics.bleu_score(pred_tokens, target_tokens)
                f1 = metrics.f1_score(pred_padded.T.to('cpu'),
                                      target.T.to('cpu'))
                # dist1 = metrics.distinct_score([v[:-1] for v in pred])
                dist1 = metrics.distinct_score(pred_tokens)
                dist2 = metrics.distinct_score(pred_tokens, 2)

                total_bleu += bleu
                total_f1 += f1
                total_dist1 += dist1
                total_dist2 += dist2

        l = len(self.test_iter)
        bleu = total_bleu / l
        f1 = total_f1 / l
        dist1 = total_dist1 / l
        dist2 = total_dist2 / l
        # https://stackoverflow.com/questions/59209086/calculate-perplexity-in-pytorch
        # see per-word perplexity:
        # https://github.com/huggingface/transfer-learning-conv-ai/blob/master/convai_evaluation.py#L161
        # https://github.com/facebookresearch/ParlAI/blob/56d46551190a7ffaedccd13534412d43bc7076e5/parlai/scripts/eval_ppl.py
        ppl = math.exp(total_loss / l)

        print(f'\tBleu: {bleu:.8f} | F1: {f1:.8f} | '
              f'Dist1: {dist1:.3f} | Dist2: {dist2:.3f} | PPL: {ppl:7.3f}')