def validate(self): self.pred_labels = self.classify() acc = accuracy(self.true_labels, self.pred_labels) ematch = exact_match(self.true_labels, self.pred_labels) pre_micro, rec_micro, f_micro = f_score_micro(self.true_labels, self.pred_labels) pre_label, rec_label, f_label, prec_result, recall_result = f_score_by_label(self.true_labels, self.pred_labels, len(self.label_dict)) return acc, ematch, pre_micro, rec_micro, f_micro, pre_label, rec_label, f_label, prec_result, recall_result
def evaluate(): """ 使用老师给出的metrics来计算bleu得分 :return: 评测结果 """ with open(TRAIN_DATA, 'r', encoding='utf-8') as f: train_data = [json.loads(line.strip()) for line in f.readlines()] with open(TRAIN_ANSWER, 'r', encoding='utf-8') as f: train_ans = [json.loads(line.strip()) for line in f.readlines()] cnt = len(train_data) all_prediction = [] all_ground_truth = [] bleu = 0.0 p = 0.0 r = 0.0 f1 = 0.0 for i in range(cnt): bleu += bleu1(train_ans[i]['answer'], train_data[i]['answer']) p_1, r_1, f1_1 = precision_recall_f1(train_ans[i]['answer'], train_data[i]['answer']) p += p_1 r += r_1 f1 += f1_1 all_prediction.append(train_ans[i]['answer']) all_ground_truth.append(train_data[i]['answer']) em = exact_match(all_prediction, all_ground_truth) print("bleu1:{}, exact_match:{},\np:{}, r:{}, f1:{}".format( bleu / cnt, em, p / cnt, r / cnt, f1 / cnt))
def main(): # lcs_list = lcs(['1', '2', '3','3','3'], ['1','3','3']) # print(lcs_list) text, question, ans = read_data() stopwords = read_stopwords() most_lcs_sentences = [] # for i in range(len(text)): # most_lcs_sentence = get_most_lcs_sentence(text[i], question[i]) # most_lcs_sentences.append(most_lcs_sentence) # most_lcs_sentences = [[word for word in sen if (word not in stopwords)] for sen in most_lcs_sentences] # question = [[word for word in sen if (word not in stopwords)] for sen in question] # # assert len(most_lcs_sentences) == len(question) # for i in range(len(question)): # print(i) # print(most_lcs_sentences[i]) # print(question[i]) answers = [] for i in range(len(question)): answer = extract_answer(text[i], question[i]) answers.append(answer) bleu = 0 bleu2 = 0 a = [] b = [] with open(data_path.train_output, 'w', encoding='utf-8') as f: for i in range(len(answers)): # if len(answers[i]) == 0: # answers[i] = most_lcs_sentences[i] bleu += bleu1(''.join(answers[i]), ''.join(ans[i])) bleu2 += bleu1(''.join(text[i]), ''.join(ans[i])) f.write( '<qid_' + str(i) + '> ||| ' + ''.join(answers[i]) + ' ||| ' + ''.join(ans[i]) + '\n') a.append(''.join(answers[i])) b.append(''.join(ans[i])) print("bleu : ", bleu / len(question)) # print("bleu2 : ", bleu2 / 5352) print("exact_match: ", metric.exact_match(a, b)) print("p, r, f1: ", metric.precision_recall_f1(a, b))
def answer_the_questions(): train_dicts = read_json_data('data/train_new.json') num = 0 bleu1_sum = 0 answer_list = [] ground_truth_list = [] for train_dict in train_dicts: num = num + 1 question = train_dict['question'] ground_truth = train_dict['answer'] answer_sentence = ''.join(train_dict['answer_sentence']) answer = answerB1(question, answer_sentence) print('问题:', train_dict['question']) print('答案对比:', answer, ground_truth) c = metric.bleu1(answer, ground_truth) answer_list.append(answer) ground_truth_list.append(ground_truth) bleu1_sum = bleu1_sum + c print('bleu1 value:', bleu1_sum / num) print('exact match rank:', metric.exact_match(answer_list, ground_truth_list))
for i, row in enumerate(pred_sheet.get_rows()): if i == 0: # (Optionally) skip headers print(row) continue sample_true_labels.append([ label_dict[i.strip()] for i in pred_sheet.cell_value(i, 0).split('\n') ]) sample_pred_labels.append([ label_dict[i.strip()] for i in pred_sheet.cell_value(i, 1).split('\n') ]) print("Accuracy is ", accuracy(sample_true_labels, sample_pred_labels)) print("Ematch is ", exact_match(sample_true_labels, sample_pred_labels)) print("F_micro is ", f_score_micro(sample_true_labels, sample_pred_labels)) pre_label, rec_label, f_label, prec_result, recall_result = f_score_by_label( sample_true_labels, sample_pred_labels, len(label_dict)) print("F_label is ", pre_label, rec_label, f_label) print("Precision by label result:") for i in prec_result: print(i) print("Recall by label result:") for i in recall_result: print(i) else: print(label_dict) print("No. of test reviews is ", len(ad_review)) print("No. of reviews labeled with other is ", count_other)