def show_prediction_for_dureader(paths, outpath, reader_exp_name, para_selection_method, decoder_dict=None): print('show_prediction_for_dureader') loader = DureaderLoader( paths, para_selection_method, sample_fields=['question', 'answers', 'question_id', 'question_type']) sample_list = loader.sample_list reader = ReaderFactory.from_exp_name(reader_exp_name, decoder_dict=decoder_dict) _preds = reader.evaluate_on_records(sample_list, batch_size=128) _preds = group_dict_list(_preds, 'question_id') pred_answers = MaxAllJudger().judge(_preds) pred_answer_list = RecordGrouper.from_group_dict('question_id', pred_answers).records print('bidaf evaluation') ranked_list_formatter = QARankedListFormater(pred_answer_list) formated_result = ranked_list_formatter.format_result() with open(outpath, 'w', encoding='utf-8') as f: f.write('experiment settings\n') f.write('reader_exp_name : %s\n' % (reader_exp_name)) f.write('para_selection_method : %s\n' % (str(para_selection_method))) f.write('decoder : %s\n' % (str(decoder_dict))) f.write('##' * 20) f.write('Content:\n\n') f.write(formated_result)
def get_answer_list(self,mrc_input,k=3): mrc_input['id'] = 0 x = DureaderRawExample(mrc_input) records = x.flatten(['question','id'],['url','title']) ranked_records = self.selector.evaluate_scores(records) selected_records = self.selector.select_top_k_each_doc(ranked_records) reader_results = self.reader.evaluate_on_records(selected_records,batch_size=128) reader_results = group_dict_list(reader_results,'id') ret_list= TopKJudger(k=k).judge(reader_results)[0] ret = [] for x in ret_list: ret.append({'paragraph':x['passage'],'answer':x['span'],'title':x['title'],'url':x['url']}) return ret
def get_answer_list(self,mrc_input,k=3): mrc_input['id'] = 0 x = DureaderRawExample(mrc_input) records = x.flatten(['question','id'],['url','title']) doc_num = len(mrc_input['documents']) ranked_records = self.ranker.evaluate_on_records(records,64) group = RecordGrouper(ranked_records) ranked_records = group.group_sort('id','rank_score')[0] ranked_records = ranked_records[0:doc_num] reader_results = self.reader.evaluate_on_records(ranked_records,64) reader_results = group_dict_list(reader_results,'id') ret_list= TopKJudger(k=k).judge(reader_results)[0] ret = [] for x in ret_list: ret.append({'paragraph':x['passage'],'answer':x['span'],'title':x['title'],'url':x['url']}) return ret
def test_dureader_bert_rc(test_path, reader_exp_name, para_selection_method, decoder_dict=None): print('test_dureader_bert_rc loading samples...') loader = DureaderLoader( test_path, para_selection_method, sample_fields=['question', 'answers', 'question_id', 'question_type']) sample_list = loader.sample_list reader = ReaderFactory.from_exp_name(reader_exp_name, decoder_dict=decoder_dict) _preds = reader.evaluate_on_records(sample_list, batch_size=128) _preds = group_dict_list(_preds, 'question_id') pred_answers = MaxAllJudger().judge(_preds) print('bidaf evaluation') evaluate_mrc_bidaf(pred_answers)
def evaluate3(evaluate_files, bert_config_path, weight_path, metric_dir, eval_method='bidaf_script'): from common.util import group_dict_list print('load model') with torch.no_grad(): reader = ReaderFactory.from_exp_name('reader/bert_default', READER_CLASS='bert_reader') #dataset = make_dataset(evaluate_files) #iterator = make_batch_iterator(dataset,bs=128) loader = DureaderLoader(evaluate_files, 'most_related_para', sample_fields=[ 'question', 'answers', 'question_id', 'question_type' ]) dataset = BertRCDataset(loader.sample_list, args.max_query_length, args.max_seq_length, device=args.device) iterator = dataset.make_batchiter(batch_size=128) print('Iterate Batch') preds = reader.evaluate_on_batch(iterator) tmp = {} tmp = group_dict_list(preds, 'question_id') pred_result, ref_result = {}, {} # find max score predcition(dict) of qid for qid in tmp: l = tmp[qid] max_answer = max(l, key=lambda d: d['span_score']) max_answer.update({'entity_answers': [[]], 'yesno_answers': []}) ref = {k: v for k, v in max_answer.items()} ref_result[qid] = ref #順序不能倒過來... max_answer['answers'] = [max_answer['span']] pred_result[qid] = max_answer mrc_eval.evaluate(pred_result, ref_result)
def test_mrc_baseline(): print('test_dureader_bert_rc loading samples...') from dataloader.dureader import DureaderLoader from qa.reader import ReaderFactory,BertRCDataset from qa.judger import MaxAllJudger from common.util import group_dict_list,evaluate_mrc_bidaf loader = DureaderLoader( ['./data/demo/devset/search.dev.2.json'],'most_related_para',sample_fields=['question','answers','question_id','question_type']) sample_list = loader.sample_list reader = ReaderFactory.from_exp_name('reader/bert_default',decoder_dict={'class':'default','kwargs':{'k':1}}) reader_config = reader.config dataset = BertRCDataset(sample_list,reader_config.MAX_QUERY_LEN,reader_config.MAX_SEQ_LEN,device=reader.device) print('make batch') iterator = dataset.make_batchiter(batch_size=128) _preds = reader.evaluate_on_batch(iterator) _preds = group_dict_list(_preds,'question_id') pred_answers = MaxAllJudger().judge(_preds) res_dict = evaluate_mrc_bidaf(pred_answers) assert res_dict == {'Bleu-1': 0.19711538461443695, 'Bleu-2': 0.15154174071281326, 'Bleu-3': 0.11637351097094059, 'Bleu-4': 0.0983666932134996, 'Rouge-L': 0.260079879764384}
records = [] for json_obj in jsonl_reader(path): question = json_obj['question'] if len(question) == 0: continue for paragraph in json_obj['paragraphs']: records.append({'question': question, 'passage': paragraph}) return records if __name__ == '__main__': from qa.reader import ReaderFactory from qa.judger import MaxAllJudger from common.util import group_dict_list, RecordGrouper from qa.eval import QARankedListFormater reader_exp_name = 'reader/bert_default' sample_list = load_beta_file('./data/news2paragraph.jsonl') reader = ReaderFactory.from_exp_name(reader_exp_name) _preds = reader.evaluate_on_records(sample_list, batch_size=32) _preds = group_dict_list(_preds, 'question') pred_answers = MaxAllJudger().judge(_preds) pred_answer_list = RecordGrouper.from_group_dict('question', pred_answers).records ranked_list_formatter = QARankedListFormater(pred_answer_list) formated_result = ranked_list_formatter.format_result() with open('news_beta.txt', 'w', encoding='utf-8') as f: f.write('experiment settings\n') f.write('reader_exp_name : %s\n' % (reader_exp_name)) f.write('##' * 20) f.write('Content:\n\n') f.write(formated_result)