def multi_doc_model_factory(config): if config['model_type'] == 'mock': return MockMRCModel() elif config['model_type'] == 'pipeline': name2cls = {'RankerReaderModel':RankerReaderModel,'SelectorReaderModel':SelectorReaderModel} _cls = name2cls[config['class']] if 'device' in config: import torch device = config['device'] if device == 'cpu': device = torch.device('cpu') else: device = get_default_device() run_time_kwargs = {} if 'ranker_config_path' in config: ranker = RankerFactory.from_config_path(config['ranker_config_path']) run_time_kwargs['ranker'] = ranker try: ranker.model = ranker.model.to(device) except: pass if 'reader_config_path' in config: reader = ReaderFactory.from_config_path(config['reader_config_path']) run_time_kwargs['reader'] = ReaderFactory.from_config_path(config['reader_config_path']) try: reader.model = reader.model.to(device) except: pass if 'selector' in config : run_time_kwargs['selector'] = ParagraphSelectorFactory.create_selector(config['selector']) kwargs = config['kwargs'] kwargs.update(run_time_kwargs) return _cls(**kwargs)
def show_prediction_for_dureader(paths, outpath, reader_exp_name, para_selection_method, decoder_dict=None): print('show_prediction_for_dureader') loader = DureaderLoader( paths, para_selection_method, sample_fields=['question', 'answers', 'question_id', 'question_type']) sample_list = loader.sample_list reader = ReaderFactory.from_exp_name(reader_exp_name, decoder_dict=decoder_dict) _preds = reader.evaluate_on_records(sample_list, batch_size=128) _preds = group_dict_list(_preds, 'question_id') pred_answers = MaxAllJudger().judge(_preds) pred_answer_list = RecordGrouper.from_group_dict('question_id', pred_answers).records print('bidaf evaluation') ranked_list_formatter = QARankedListFormater(pred_answer_list) formated_result = ranked_list_formatter.format_result() with open(outpath, 'w', encoding='utf-8') as f: f.write('experiment settings\n') f.write('reader_exp_name : %s\n' % (reader_exp_name)) f.write('para_selection_method : %s\n' % (str(para_selection_method))) f.write('decoder : %s\n' % (str(decoder_dict))) f.write('##' * 20) f.write('Content:\n\n') f.write(formated_result)
def test_dureader_bert_rc(test_path, reader_exp_name, para_selection_method, decoder_dict=None): print('test_dureader_bert_rc loading samples...') loader = DureaderLoader( test_path, para_selection_method, sample_fields=['question', 'answers', 'question_id', 'question_type']) sample_list = loader.sample_list reader = ReaderFactory.from_exp_name(reader_exp_name, decoder_dict=decoder_dict) _preds = reader.evaluate_on_records(sample_list, batch_size=128) _preds = group_dict_list(_preds, 'question_id') pred_answers = MaxAllJudger().judge(_preds) print('bidaf evaluation') evaluate_mrc_bidaf(pred_answers)
def evaluate3(evaluate_files, bert_config_path, weight_path, metric_dir, eval_method='bidaf_script'): from common.util import group_dict_list print('load model') with torch.no_grad(): reader = ReaderFactory.from_exp_name('reader/bert_default', READER_CLASS='bert_reader') #dataset = make_dataset(evaluate_files) #iterator = make_batch_iterator(dataset,bs=128) loader = DureaderLoader(evaluate_files, 'most_related_para', sample_fields=[ 'question', 'answers', 'question_id', 'question_type' ]) dataset = BertRCDataset(loader.sample_list, args.max_query_length, args.max_seq_length, device=args.device) iterator = dataset.make_batchiter(batch_size=128) print('Iterate Batch') preds = reader.evaluate_on_batch(iterator) tmp = {} tmp = group_dict_list(preds, 'question_id') pred_result, ref_result = {}, {} # find max score predcition(dict) of qid for qid in tmp: l = tmp[qid] max_answer = max(l, key=lambda d: d['span_score']) max_answer.update({'entity_answers': [[]], 'yesno_answers': []}) ref = {k: v for k, v in max_answer.items()} ref_result[qid] = ref #順序不能倒過來... max_answer['answers'] = [max_answer['span']] pred_result[qid] = max_answer mrc_eval.evaluate(pred_result, ref_result)
def show_prediction_for_demo_examples(reader_name, decoder_dict, test_path='./data/examples.txt', out_path='demo_mrc.txt'): samples = demo_files.read_from_demo_txt_file(test_path) reader = ReaderFactory.from_exp_name(reader_name, decoder_dict=decoder_dict) _preds = reader.evaluate_on_records(samples, batch_size=128) f = open(out_path, 'w', encoding='utf-8') for sample in _preds: print('Question', file=f) print(sample['question'], file=f) print('Passage', file=f) print('%s' % (sample['passage']), file=f) print('--' * 20, file=f) print('Answer:', file=f) print('%s' % (sample['span']), file=f) print('# # #' * 20, file=f)
def test_mrc_baseline(): print('test_dureader_bert_rc loading samples...') from dataloader.dureader import DureaderLoader from qa.reader import ReaderFactory,BertRCDataset from qa.judger import MaxAllJudger from common.util import group_dict_list,evaluate_mrc_bidaf loader = DureaderLoader( ['./data/demo/devset/search.dev.2.json'],'most_related_para',sample_fields=['question','answers','question_id','question_type']) sample_list = loader.sample_list reader = ReaderFactory.from_exp_name('reader/bert_default',decoder_dict={'class':'default','kwargs':{'k':1}}) reader_config = reader.config dataset = BertRCDataset(sample_list,reader_config.MAX_QUERY_LEN,reader_config.MAX_SEQ_LEN,device=reader.device) print('make batch') iterator = dataset.make_batchiter(batch_size=128) _preds = reader.evaluate_on_batch(iterator) _preds = group_dict_list(_preds,'question_id') pred_answers = MaxAllJudger().judge(_preds) res_dict = evaluate_mrc_bidaf(pred_answers) assert res_dict == {'Bleu-1': 0.19711538461443695, 'Bleu-2': 0.15154174071281326, 'Bleu-3': 0.11637351097094059, 'Bleu-4': 0.0983666932134996, 'Rouge-L': 0.260079879764384}
records = [] for json_obj in jsonl_reader(path): question = json_obj['question'] if len(question) == 0: continue for paragraph in json_obj['paragraphs']: records.append({'question': question, 'passage': paragraph}) return records if __name__ == '__main__': from qa.reader import ReaderFactory from qa.judger import MaxAllJudger from common.util import group_dict_list, RecordGrouper from qa.eval import QARankedListFormater reader_exp_name = 'reader/bert_default' sample_list = load_beta_file('./data/news2paragraph.jsonl') reader = ReaderFactory.from_exp_name(reader_exp_name) _preds = reader.evaluate_on_records(sample_list, batch_size=32) _preds = group_dict_list(_preds, 'question') pred_answers = MaxAllJudger().judge(_preds) pred_answer_list = RecordGrouper.from_group_dict('question', pred_answers).records ranked_list_formatter = QARankedListFormater(pred_answer_list) formated_result = ranked_list_formatter.format_result() with open('news_beta.txt', 'w', encoding='utf-8') as f: f.write('experiment settings\n') f.write('reader_exp_name : %s\n' % (reader_exp_name)) f.write('##' * 20) f.write('Content:\n\n') f.write(formated_result)
print('preprocessing span for train data') train_loader.sample_list = list( filter(lambda x: len(x['answers']) > 0 and len(x['answer_docs']) > 0, train_loader.sample_list)) for sample in train_loader.sample_list: if sample["doc_id"] == sample['answer_docs'][0]: preprocessing_charspan(sample) else: sample['char_spans'] = [0, 0] del sample['answer_spans'] del sample['segmented_paragraphs'] print('load ranker') ranker = RankerFactory.from_exp_name(experiment.config.ranker_name, eval_flag=False) print('load reader') reader = ReaderFactory.from_exp_name(experiment.config.reader_name, eval_flag=False) tokenizer = Tokenizer() reader_optimizer = SGD(reader.model.parameters(), lr=0.00001, momentum=0.9) ranker_optimizer = SGD(ranker.model.parameters(), lr=0.00001, momentum=0.9) BATCH_SIZE = 12 print('ranker performance before traning') ranker.model = ranker.model.eval() evaluate_dureader_ranker(DEV_PATH, ranker, BATCH_SIZE, print_detail=False) ranker.model = ranker.model.train() for epcoch in range(EPOCH): print('start of epoch %d' % (epcoch)) reader_loss, ranker_loss, reward_tracer = MetricTracer(), MetricTracer( ), MetricTracer() print('start training loop')
def evaluate2(evaluate_files, bert_config_path, weight_path, metric_dir, eval_method='bidaf_script'): print('load model') with torch.no_grad(): model = ReaderFactory.from_exp_name('reader/bert_default', READER_CLASS='bert_reader').model model = model.eval() #dataset = make_dataset(evaluate_files) #iterator = make_batch_iterator(dataset,bs=128) loader = DureaderLoader(evaluate_files, 'most_related_para', sample_fields=[ 'question', 'answers', 'question_id', 'question_type' ]) dataset = BertRCDataset(loader.sample_list, args.max_query_length, args.max_seq_length, device=args.device) iterator = dataset.make_batchiter(batch_size=128) print('Iterate Batch') preds = [] for i, batch in enumerate(iterator): if i % 20 == 0: print('evaluate on %d batch' % (i)) start_probs, end_probs = model(batch.input_ids, token_type_ids=batch.segment_ids, attention_mask=batch.input_mask) for i in range(len(start_probs)): sb, eb = start_probs[i].unsqueeze(0), end_probs[i].unsqueeze(0) span, score = find_best_span_from_probs(sb, eb) score = score.item() #輸出的score不是機率 所以不會介於0~1之間 answer = extact_answer_from_span(batch.question[i], batch.passage[i], span) preds.append({ 'question_id': batch.question_id[i], 'question': batch.question[i], 'question_type': batch.question_type[i], 'answers': [answer], 'entity_answers': [[]], 'yesno_answers': [], 'score': score, 'gold': batch.answers[i] }) tmp = {} for pred in preds: qid = pred['question_id'] if qid not in tmp: tmp[qid] = [] tmp[qid].append(pred) pred_result, ref_result = {}, {} # find max score predcition(dict) of qid for qid in tmp: l = tmp[qid] max_answer = max(l, key=lambda d: d['score']) pred_result[qid] = max_answer ref = {k: v for k, v in max_answer.items()} ref['answers'] = max_answer['gold'] ref_result[qid] = ref mrc_eval.evaluate(pred_result, ref_result)