Beispiel #1
0
 def validate(self):
     self.pred_labels = self.classify()
     acc = accuracy(self.true_labels, self.pred_labels)
     ematch = exact_match(self.true_labels, self.pred_labels)
     pre_micro, rec_micro, f_micro = f_score_micro(self.true_labels, self.pred_labels)
     pre_label, rec_label, f_label, prec_result, recall_result = f_score_by_label(self.true_labels, self.pred_labels, len(self.label_dict))
     return acc, ematch, pre_micro, rec_micro, f_micro, pre_label, rec_label, f_label, prec_result, recall_result
Beispiel #2
0
def evaluate():
    """
    使用老师给出的metrics来计算bleu得分
    :return: 评测结果
    """
    with open(TRAIN_DATA, 'r', encoding='utf-8') as f:
        train_data = [json.loads(line.strip()) for line in f.readlines()]
    with open(TRAIN_ANSWER, 'r', encoding='utf-8') as f:
        train_ans = [json.loads(line.strip()) for line in f.readlines()]
    cnt = len(train_data)
    all_prediction = []
    all_ground_truth = []
    bleu = 0.0
    p = 0.0
    r = 0.0
    f1 = 0.0
    for i in range(cnt):
        bleu += bleu1(train_ans[i]['answer'], train_data[i]['answer'])
        p_1, r_1, f1_1 = precision_recall_f1(train_ans[i]['answer'],
                                             train_data[i]['answer'])
        p += p_1
        r += r_1
        f1 += f1_1
        all_prediction.append(train_ans[i]['answer'])
        all_ground_truth.append(train_data[i]['answer'])
    em = exact_match(all_prediction, all_ground_truth)
    print("bleu1:{}, exact_match:{},\np:{}, r:{}, f1:{}".format(
        bleu / cnt, em, p / cnt, r / cnt, f1 / cnt))
Beispiel #3
0
def main():
    # lcs_list = lcs(['1', '2', '3','3','3'], ['1','3','3'])
    # print(lcs_list)
    text, question, ans = read_data()
    stopwords = read_stopwords()
    most_lcs_sentences = []
    # for i in range(len(text)):
    #     most_lcs_sentence = get_most_lcs_sentence(text[i], question[i])
    #     most_lcs_sentences.append(most_lcs_sentence)
    # most_lcs_sentences = [[word for word in sen if (word not in stopwords)] for sen in most_lcs_sentences]
    # question = [[word for word in sen if (word not in stopwords)] for sen in question]
    #
    # assert len(most_lcs_sentences) == len(question)
    # for i in range(len(question)):
    #     print(i)
    #     print(most_lcs_sentences[i])
    #     print(question[i])

    answers = []
    for i in range(len(question)):
        answer = extract_answer(text[i], question[i])
        answers.append(answer)

    bleu = 0
    bleu2 = 0
    a = []
    b = []
    with open(data_path.train_output, 'w', encoding='utf-8') as f:
        for i in range(len(answers)):
            # if len(answers[i]) == 0:
            #     answers[i] = most_lcs_sentences[i]
            bleu += bleu1(''.join(answers[i]), ''.join(ans[i]))
            bleu2 += bleu1(''.join(text[i]), ''.join(ans[i]))
            f.write(
                '<qid_' + str(i) + '> ||| ' + ''.join(answers[i]) + ' ||| ' + ''.join(ans[i]) + '\n')
            a.append(''.join(answers[i]))
            b.append(''.join(ans[i]))
    print("bleu : ", bleu / len(question))
    # print("bleu2 : ", bleu2 / 5352)
    print("exact_match: ", metric.exact_match(a, b))
    print("p, r, f1: ", metric.precision_recall_f1(a, b))
def answer_the_questions():
    train_dicts = read_json_data('data/train_new.json')
    num = 0
    bleu1_sum = 0
    answer_list = []
    ground_truth_list = []
    for train_dict in train_dicts:
        num = num + 1
        question = train_dict['question']
        ground_truth = train_dict['answer']
        answer_sentence = ''.join(train_dict['answer_sentence'])
        answer = answerB1(question, answer_sentence)
        print('问题:', train_dict['question'])
        print('答案对比:', answer, ground_truth)
        c = metric.bleu1(answer, ground_truth)
        answer_list.append(answer)
        ground_truth_list.append(ground_truth)
        bleu1_sum = bleu1_sum + c
    print('bleu1 value:', bleu1_sum / num)
    print('exact match rank:',
          metric.exact_match(answer_list, ground_truth_list))
Beispiel #5
0
     for i, row in enumerate(pred_sheet.get_rows()):
         if i == 0:  # (Optionally) skip headers
             print(row)
             continue
         sample_true_labels.append([
             label_dict[i.strip()]
             for i in pred_sheet.cell_value(i, 0).split('\n')
         ])
         sample_pred_labels.append([
             label_dict[i.strip()]
             for i in pred_sheet.cell_value(i, 1).split('\n')
         ])
     print("Accuracy is ",
           accuracy(sample_true_labels, sample_pred_labels))
     print("Ematch is ",
           exact_match(sample_true_labels, sample_pred_labels))
     print("F_micro is ",
           f_score_micro(sample_true_labels, sample_pred_labels))
     pre_label, rec_label, f_label, prec_result, recall_result = f_score_by_label(
         sample_true_labels, sample_pred_labels, len(label_dict))
     print("F_label is ", pre_label, rec_label, f_label)
     print("Precision by label result:")
     for i in prec_result:
         print(i)
     print("Recall by label result:")
     for i in recall_result:
         print(i)
 else:
     print(label_dict)
     print("No. of test reviews is ", len(ad_review))
     print("No. of reviews labeled with other is ", count_other)