コード例 #1
0
ファイル: eval.py プロジェクト: wly-thu/QGforQA
def evaluate(eval_file, answer_dict):
    reference_corpus = []
    translation_corpus = []
    translation_corpus_rouge_oracle = []
    translation_corpus_bleu_oracle = []
    rouges = []
    div_bleus = []
    rouges_oracle = []
    meteor = Meteor()
    res, res_oracle, gts = [], [], []
    for key, answers in answer_dict.items():
        answers = sorted(answers, key=lambda x: x[0], reverse=True)
        ground_truths = [
            list(map(lambda x: x.lower(), eval_file[key]["question"][1:-1]))
        ]
        prediction = answers[0][1].lower().split()
        answers_tmp = []
        for i, answer in enumerate(answers):
            rouge = compute_rouge_L(answer[1].lower().split(), ground_truths)
            mete = meteor.compute_score(
                [[' '.join(ground_truth) for ground_truth in ground_truths]],
                [' '.join(answer[1].lower().split())])
            bleu = compute_bleu([ground_truths], [answer[1].lower().split()],
                                smooth=True)
            answers_tmp.append((rouge, mete[0], bleu[0], answer[0], answer[1]))
        answers_rouge = sorted(answers_tmp, key=lambda x: x[0], reverse=True)
        answers_mete = sorted(answers_tmp, key=lambda x: x[1], reverse=True)
        answers_bleu = sorted(answers_tmp, key=lambda x: x[2], reverse=True)
        prediction_rouge_oracle = answers_rouge[0][4].lower().split()
        prediction_mete_oracle = answers_mete[0][4].lower().split()
        prediction_bleu_oracle = answers_bleu[0][4].lower().split()
        translation_corpus.append(prediction)
        translation_corpus_rouge_oracle.append(prediction_rouge_oracle)
        translation_corpus_bleu_oracle.append(prediction_bleu_oracle)
        reference_corpus.append(ground_truths)
        rouge = compute_rouge_L(prediction, ground_truths)
        rouge_oracle = compute_rouge_L(prediction_rouge_oracle, ground_truths)
        rouges.append(rouge)
        rouges_oracle.append(rouge_oracle)
        res.append(' '.join(prediction))
        res_oracle.append(' '.join(prediction_mete_oracle))
        gts.append([' '.join(ground_truth) for ground_truth in ground_truths])
        div_bleus.append(diverse_bleu(answers))
    bleu = compute_bleu(reference_corpus, translation_corpus)
    bleu_oracle = compute_bleu(reference_corpus,
                               translation_corpus_bleu_oracle)
    mete = meteor.compute_score(gts, res)
    mete_oracle = meteor.compute_score(gts, res_oracle)
    return {
        "bleu": bleu[0] * 100,
        "meteor": mete[0] * 100,
        "rougeL": np.mean(rouges) * 100,
        "bleu_oracle": bleu_oracle[0] * 100,
        "meteor_oracle": mete_oracle[0] * 100,
        "rougeL_oracle": np.mean(rouges_oracle) * 100,
        "diverse_bleu": np.mean(div_bleus) * 100
    }
コード例 #2
0
ファイル: eval.py プロジェクト: wly-thu/QGforQA
def evaluate_simple(eval_file, answer_dict):
    reference_corpus = []
    translation_corpus = []
    rouges = []
    meteor = Meteor()
    res, gts = [], []
    for key, answers in answer_dict.items():
        answers = sorted(answers, key=lambda x: x[0], reverse=True)
        ground_truths = [
            list(map(lambda x: x.lower(), eval_file[key]["question"][1:-1]))
        ]
        prediction = answers[0][1].lower().split()
        translation_corpus.append(prediction)
        reference_corpus.append(ground_truths)
        rouge = compute_rouge_L(prediction, ground_truths)
        rouges.append(rouge)
        res.append(' '.join(prediction))
        gts.append([' '.join(ground_truth) for ground_truth in ground_truths])
    bleu = compute_bleu(reference_corpus, translation_corpus)
    mete = meteor.compute_score(gts, res)
    return {
        "bleu": bleu[0] * 100,
        "meteor": mete[0] * 100,
        "rougeL": np.mean(rouges) * 100
    }
コード例 #3
0
ファイル: eval.py プロジェクト: swidi/poemo-generation
def evaluate_full(gts, gen_res, gen_dict, gts_arc, emo_feat, method=None):
    assert (len(gts) == len(gen_res))
    reward = 0
    if method == "comet":
        assert (len(gen_dict) == len(gts_arc))
        reward = compute_edit_distance_v2(gen_dict, gts_arc)
    elif method == "clf":
        pred_dist = np.zeros_like(emo_feat)
        for k, v in gen_dict.items():
            pred_dist[k, :] = v
        reward = compute_emotion_distance(pred_dist,
                                          emo_feat,
                                          batch_normalize=True,
                                          method=method)
    elif method == 'clf_prob':
        pred_dist = np.zeros_like(emo_feat)
        for k, v in gen_dict.items():
            pred_dist[k, :] = v
        reward = get_emotion_prob(pred_dist, gts_arc, batch_normalize=True)

    translation_corpus = gen_res
    reference_corpus = gts
    rouges = []
    meteor = Meteor()
    bleu = compute_bleu([[j.split()] for j in reference_corpus],
                        [i.split() for i in translation_corpus])
    mete = meteor.compute_score([[r] for r in reference_corpus],
                                translation_corpus)

    return {
        "bleu": bleu[0] * 100,
        "meteor": mete[0] * 100,
        "best_reward": reward
    }
コード例 #4
0
ファイル: eval.py プロジェクト: wly-thu/QGforQA
def evaluate_rl(eval_file,
                qa_id,
                symbols,
                symbols_rl,
                id2word,
                metric="rouge"):
    meteor = Meteor()
    ques_limit = len(symbols)
    batch_size, _ = symbols[0].shape
    rewards = np.zeros([batch_size], dtype=np.float32)
    rewards_rl = np.zeros([batch_size], dtype=np.float32)
    rewards_base = np.zeros([batch_size], dtype=np.float32)
    ques_rl = np.zeros([batch_size, ques_limit], dtype=np.int32)
    for i, (qid, syms,
            syms_rl) in enumerate(zip(qa_id, zip(*symbols), zip(*symbols_rl))):
        syms = list(np.reshape(syms, [-1]))
        syms_rl = list(syms_rl)
        ground_truths = [
            list(
                map(lambda x: x.lower(),
                    eval_file[str(qid)]["question"][1:-1]))
        ]
        context_tokens = eval_file[str(qid)]["paragraph"]
        if 3 in syms:
            syms = syms[:syms.index(3)]
        if 3 in syms_rl:
            syms_rl = syms_rl[:syms_rl.index(3)]
        prediction = [
            id2word[sym] if sym in id2word else context_tokens[sym -
                                                               len(id2word)]
            for sym in syms
        ]
        prediction_rl = [
            id2word[sym] if sym in id2word else context_tokens[sym -
                                                               len(id2word)]
            for sym in syms_rl
        ]
        if metric == "rouge":
            reward_base = compute_rouge_L(prediction, ground_truths)
            reward_rl = compute_rouge_L(prediction_rl, ground_truths)
        elif metric == "bleu":
            reward_base, _ = compute_bleu([ground_truths], [prediction],
                                          smooth=True)
            reward_rl, _ = compute_bleu([ground_truths], [prediction_rl],
                                        smooth=True)
        elif metric == "meteor":
            reward_base = meteor.compute_score(
                [[' '.join(ground_truth) for ground_truth in ground_truths]],
                [' '.join(prediction)])[0]
            reward_rl = meteor.compute_score(
                [[' '.join(ground_truth) for ground_truth in ground_truths]],
                [' '.join(prediction_rl)])[0]
        else:
            print("Wrong Metric!")
            exit()
        rewards[i] = reward_rl - reward_base
        rewards_rl[i] = reward_rl
        rewards_base[i] = reward_base
        # add GO and EOS
        syms_rl = [2] + syms_rl[:ques_limit - 2] + [3]
        for j, sym_rl in enumerate(syms_rl):
            ques_rl[i, j] = sym_rl
    return rewards, rewards_rl, rewards_base, ques_rl