def evaluate(documents, dictionary, dictionary_reverse, model): model.eval() ct_predicted = 0 ct_gold = 0 ct_correct = 0 for document in documents: # copy entities from gold entities pred_entities = [] for gold in document.entities: pred = Entity() pred.id = gold.id pred.type = gold.type pred.spans = gold.spans pred.section = gold.section pred.name = gold.name pred_entities.append(pred) model.process_one_doc(document, pred_entities, dictionary, dictionary_reverse) p1, p2, p3 = evaluate_for_ehr(document.entities, pred_entities, dictionary) ct_gold += p1 ct_predicted += p2 ct_correct += p3 if ct_gold == 0: precision = 0 recall = 0 else: precision = ct_correct * 1.0 / ct_predicted recall = ct_correct * 1.0 / ct_gold if precision + recall == 0: f_measure = 0 else: f_measure = 2 * precision * recall / (precision + recall) return precision, recall, f_measure
def metamap_ner_my_norm(d): print("load umls ...") UMLS_dict, UMLS_dict_reverse = umls.load_umls_MRCONSO(d.config['norm_dict']) predict_dir = "/Users/feili/Desktop/umass/CancerADE_SnoM_30Oct2017_test/metamap" annotation_dir = os.path.join(opt.test_file, 'bioc') corpus_dir = os.path.join(opt.test_file, 'txt') annotation_files = [f for f in os.listdir(annotation_dir) if os.path.isfile(os.path.join(annotation_dir, f))] if opt.test_in_cpu: model = torch.load(os.path.join(opt.output, 'norm_neural.pkl'), map_location='cpu') else: model = torch.load(os.path.join(opt.output, 'norm_neural.pkl')) model.eval() ct_norm_predict = 0 ct_norm_gold = 0 ct_norm_correct = 0 correct_counter = Counter() wrong_counter = Counter() for gold_file_name in annotation_files: print("# begin {}".format(gold_file_name)) gold_document = parse_one_gold_file(annotation_dir, corpus_dir, gold_file_name) predict_document = metamap.load_metamap_result_from_file( os.path.join(predict_dir, gold_file_name[:gold_file_name.find('.')] + ".field.txt")) # copy entities from metamap entities pred_entities = [] for gold in predict_document.entities: pred = Entity() pred.id = gold.id pred.type = gold.type pred.spans = gold.spans pred.section = gold.section pred.name = gold.name pred_entities.append(pred) model.process_one_doc(gold_document, pred_entities, UMLS_dict, UMLS_dict_reverse) p1, p2, p3 = evaluate_for_ehr(gold_document.entities, pred_entities, UMLS_dict, predict_document.entities, correct_counter, wrong_counter) ct_norm_gold += p1 ct_norm_predict += p2 ct_norm_correct += p3 sorted_correct_entities = OrderedDict(correct_counter.most_common()) sorted_correct_entities = json.dumps(sorted_correct_entities, indent=4) with codecs.open("sorted_correct_entities.txt", 'w', 'UTF-8') as fp: fp.write(sorted_correct_entities) sorted_wrong_entities = OrderedDict(wrong_counter.most_common()) sorted_wrong_entities = json.dumps(sorted_wrong_entities, indent=4) with codecs.open("sorted_wrong_entities.txt", 'w', 'UTF-8') as fp: fp.write(sorted_wrong_entities) p = ct_norm_correct * 1.0 / ct_norm_predict r = ct_norm_correct * 1.0 / ct_norm_gold f1 = 2.0 * p * r / (p + r) print("NORM p: %.4f | r: %.4f | f1: %.4f" % (p, r, f1))
print("# begin {}".format(gold_file_name)) if file_count < 1: file_count += 1 continue file_count += 1 gold_document = parse_one_gold_file(annotation_dir, corpus_dir, gold_file_name) pred_entities = [] for gold in gold_document.entities: pred = Entity() pred.id = gold.id pred.type = gold.type pred.spans = gold.spans pred.section = gold.section pred.name = gold.name pred_entities.append(pred) Xs, Ys = generate_instances_ehr(pred_entities, model.dict_alphabet, UMLS_dict_reverse) data_loader = DataLoader(MyDataset(Xs, Ys), opt.batch_size, shuffle=False, collate_fn=my_collate) data_iter = iter(data_loader) num_iter = len(data_loader) entity_start = 0 for i in range(num_iter): x, mask, sentences, _, lengths = next(data_iter)
def evaluate(documents, dictionary, dictionary_reverse, vsm_model, neural_model, ensemble_model, d, isMeddra_dict): if vsm_model is not None: vsm_model.eval() if neural_model is not None: neural_model.eval() if ensemble_model is not None: ensemble_model.eval() ct_predicted = 0 ct_gold = 0 ct_correct = 0 # if opt.norm_rule and opt.norm_vsm and opt.norm_neural: # ct_correct_rule = 0 # ct_correct_vsm = 0 # ct_correct_neural = 0 # ct_correct_all = 0 # ct_correct_rule_vsm = 0 # ct_correct_rule_neural = 0 # ct_correct_vsm_neural = 0 for document in documents: # copy entities from gold entities pred_entities = [] for gold in document.entities: pred = Entity() pred.id = gold.id pred.type = gold.type pred.spans = gold.spans pred.section = gold.section pred.name = gold.name pred_entities.append(pred) if opt.norm_rule and opt.norm_vsm and opt.norm_neural: if opt.ensemble == 'learn': ensemble_model.process_one_doc(document, pred_entities, dictionary, dictionary_reverse, isMeddra_dict) else: pred_entities2 = copy.deepcopy(pred_entities) pred_entities3 = copy.deepcopy(pred_entities) merge_entities = copy.deepcopy(pred_entities) multi_sieve.runMultiPassSieve(document, pred_entities, dictionary, isMeddra_dict) vsm_model.process_one_doc(document, pred_entities2, dictionary, dictionary_reverse, isMeddra_dict) neural_model.process_one_doc(document, pred_entities3, dictionary, dictionary_reverse, isMeddra_dict) elif opt.norm_rule: multi_sieve.runMultiPassSieve(document, pred_entities, dictionary, isMeddra_dict) elif opt.norm_vsm: vsm_model.process_one_doc(document, pred_entities, dictionary, dictionary_reverse, isMeddra_dict) elif opt.norm_neural: neural_model.process_one_doc(document, pred_entities, dictionary, dictionary_reverse, isMeddra_dict) else: raise RuntimeError("wrong configuration") if opt.norm_rule and opt.norm_vsm and opt.norm_neural: # ct_gold += len(document.entities) # ct_predicted += len(pred_entities) # up bound of ensemble, if at least one system makes a correct prediction, we count it as correct. # for idx, gold in enumerate(document.entities): # if (pred_entities[idx].rule_id is not None and pred_entities[idx].rule_id in gold.norm_ids)\ # and (pred_entities2[idx].vsm_id is not None and pred_entities2[idx].vsm_id in gold.norm_ids) \ # and (pred_entities3[idx].neural_id is not None and pred_entities3[idx].neural_id in gold.norm_ids): # ct_correct_all += 1 # ct_correct += 1 # # if (pred_entities[idx].rule_id is not None and pred_entities[idx].rule_id in gold.norm_ids)\ # and (pred_entities2[idx].vsm_id is None or pred_entities2[idx].vsm_id not in gold.norm_ids) \ # and (pred_entities3[idx].neural_id is None or pred_entities3[idx].neural_id not in gold.norm_ids): # ct_correct_rule += 1 # ct_correct += 1 # # if (pred_entities[idx].rule_id is None or pred_entities[idx].rule_id not in gold.norm_ids)\ # and (pred_entities2[idx].vsm_id is not None and pred_entities2[idx].vsm_id in gold.norm_ids) \ # and (pred_entities3[idx].neural_id is None or pred_entities3[idx].neural_id not in gold.norm_ids): # ct_correct_vsm += 1 # ct_correct += 1 # # if (pred_entities[idx].rule_id is None or pred_entities[idx].rule_id not in gold.norm_ids)\ # and (pred_entities2[idx].vsm_id is None or pred_entities2[idx].vsm_id not in gold.norm_ids) \ # and (pred_entities3[idx].neural_id is not None and pred_entities3[idx].neural_id in gold.norm_ids): # ct_correct_neural += 1 # ct_correct += 1 # # if (pred_entities[idx].rule_id is not None and pred_entities[idx].rule_id in gold.norm_ids)\ # and (pred_entities2[idx].vsm_id is not None and pred_entities2[idx].vsm_id in gold.norm_ids) \ # and (pred_entities3[idx].neural_id is None or pred_entities3[idx].neural_id not in gold.norm_ids): # ct_correct_rule_vsm += 1 # ct_correct += 1 # # if (pred_entities[idx].rule_id is not None and pred_entities[idx].rule_id in gold.norm_ids)\ # and (pred_entities2[idx].vsm_id is None or pred_entities2[idx].vsm_id not in gold.norm_ids) \ # and (pred_entities3[idx].neural_id is not None and pred_entities3[idx].neural_id in gold.norm_ids): # ct_correct_rule_neural += 1 # ct_correct += 1 # # if (pred_entities[idx].rule_id is None or pred_entities[idx].rule_id not in gold.norm_ids)\ # and (pred_entities2[idx].vsm_id is not None and pred_entities2[idx].vsm_id in gold.norm_ids) \ # and (pred_entities3[idx].neural_id is not None and pred_entities3[idx].neural_id in gold.norm_ids): # ct_correct_vsm_neural += 1 # ct_correct += 1 if opt.ensemble == 'learn': if isMeddra_dict: p1, p2, p3 = evaluate_for_fda(document.entities, pred_entities) else: p1, p2, p3 = evaluate_for_ehr(document.entities, pred_entities, dictionary) ct_gold += p1 ct_predicted += p2 ct_correct += p3 else: ensemble.merge_result(pred_entities, pred_entities2, pred_entities3, merge_entities, dictionary, isMeddra_dict, vsm_model.dict_alphabet, d) if isMeddra_dict: p1, p2, p3 = evaluate_for_fda(document.entities, merge_entities) else: p1, p2, p3 = evaluate_for_ehr(document.entities, merge_entities, dictionary) ct_gold += p1 ct_predicted += p2 ct_correct += p3 else: if isMeddra_dict: p1, p2, p3 = evaluate_for_fda(document.entities, pred_entities) else: p1, p2, p3 = evaluate_for_ehr(document.entities, pred_entities, dictionary) ct_gold += p1 ct_predicted += p2 ct_correct += p3 # if opt.norm_rule and opt.norm_vsm and opt.norm_neural: # logging.info("ensemble correct. all:{} rule:{} vsm:{} neural:{} rule_vsm:{} rule_neural:{} vsm_neural:{}" # .format(ct_correct_all, ct_correct_rule, ct_correct_vsm, ct_correct_neural, ct_correct_rule_vsm, # ct_correct_rule_neural, ct_correct_vsm_neural)) # # logging.info("gold:{} pred:{} correct:{}".format(ct_gold, ct_predicted, ct_correct)) if ct_gold == 0: precision = 0 recall = 0 else: precision = ct_correct * 1.0 / ct_predicted recall = ct_correct * 1.0 / ct_gold if precision + recall == 0: f_measure = 0 else: f_measure = 2 * precision * recall / (precision + recall) return precision, recall, f_measure
def metamap_ner_my_norm(d): print("load umls ...") UMLS_dict, UMLS_dict_reverse = umls.load_umls_MRCONSO( d.config['norm_dict']) predict_dir = "/Users/feili/Desktop/umass/CancerADE_SnoM_30Oct2017_test/metamap" annotation_dir = os.path.join(opt.test_file, 'bioc') corpus_dir = os.path.join(opt.test_file, 'txt') annotation_files = [ f for f in listdir(annotation_dir) if isfile(join(annotation_dir, f)) ] if opt.norm_rule: multi_sieve.init(opt, None, d, UMLS_dict, UMLS_dict_reverse, False) elif opt.norm_neural: logging.info("use neural-based normer") if opt.test_in_cpu: neural_model = torch.load(os.path.join(opt.output, 'norm_neural.pkl'), map_location='cpu') else: neural_model = torch.load( os.path.join(opt.output, 'norm_neural.pkl')) neural_model.eval() elif opt.norm_vsm: logging.info("use vsm-based normer") if opt.test_in_cpu: vsm_model = torch.load(os.path.join(opt.output, 'vsm.pkl'), map_location='cpu') else: vsm_model = torch.load(os.path.join(opt.output, 'vsm.pkl')) vsm_model.eval() ct_norm_predict = 0 ct_norm_gold = 0 ct_norm_correct = 0 for gold_file_name in annotation_files: print("# begin {}".format(gold_file_name)) gold_document = parse_one_gold_file(annotation_dir, corpus_dir, gold_file_name) predict_document = metamap.load_metamap_result_from_file( join(predict_dir, gold_file_name[:gold_file_name.find('.')] + ".field.txt")) # copy entities from metamap entities pred_entities = [] for gold in predict_document.entities: pred = Entity() pred.id = gold.id pred.type = gold.type pred.spans = gold.spans pred.section = gold.section pred.name = gold.name pred_entities.append(pred) if opt.norm_rule: multi_sieve.runMultiPassSieve(gold_document, pred_entities, UMLS_dict, False) elif opt.norm_neural: neural_model.process_one_doc(gold_document, pred_entities, UMLS_dict, UMLS_dict_reverse, False) elif opt.norm_vsm: vsm_model.process_one_doc(gold_document, pred_entities, UMLS_dict, UMLS_dict_reverse, False) else: raise RuntimeError("wrong configuration") p1, p2, p3 = evaluate_for_ehr(gold_document.entities, pred_entities, UMLS_dict) ct_norm_gold += p1 ct_norm_predict += p2 ct_norm_correct += p3 p = ct_norm_correct * 1.0 / ct_norm_predict r = ct_norm_correct * 1.0 / ct_norm_gold f1 = 2.0 * p * r / (p + r) print("NORM p: %.4f | r: %.4f | f1: %.4f" % (p, r, f1))
def generate_instances(document, word_alphabet, dict_alphabet, dictionary, dictionary_reverse, isMeddra_dict): Xs = [] Ys = [] # copy entities from gold entities pred_entities = [] for gold in document.entities: pred = Entity() pred.id = gold.id pred.type = gold.type pred.spans = gold.spans pred.section = gold.section pred.name = gold.name pred_entities.append(pred) multi_sieve.runMultiPassSieve(document, pred_entities, dictionary, isMeddra_dict) for idx, entity in enumerate(document.entities): if isMeddra_dict: if len(entity.norm_ids) > 0: Y = norm_utils.get_dict_index(dict_alphabet, entity.norm_ids[0]) if Y >= 0 and Y < norm_utils.get_dict_size(dict_alphabet): Ys.append(Y) else: continue else: Ys.append(0) else: if len(entity.norm_ids) > 0: if entity.norm_ids[0] in dictionary_reverse: cui_list = dictionary_reverse[entity.norm_ids[0]] Y = norm_utils.get_dict_index( dict_alphabet, cui_list[0]) # use the first id to generate instance if Y >= 0 and Y < norm_utils.get_dict_size(dict_alphabet): Ys.append(Y) else: raise RuntimeError( "entity {}, {}, cui not in dict_alphabet".format( entity.id, entity.name)) else: logging.info( "entity {}, {}, can't map to umls, ignored".format( entity.id, entity.name)) continue else: Ys.append(0) X = dict() tokens = my_tokenize(entity.name) word_ids = [] for token in tokens: token = norm_utils.word_preprocess(token) word_id = word_alphabet.get_index(token) word_ids.append(word_id) X['word'] = word_ids if pred_entities[idx].rule_id is None: X['rule'] = [0] * norm_utils.get_dict_size(dict_alphabet) else: X['rule'] = [0] * norm_utils.get_dict_size(dict_alphabet) X['rule'][norm_utils.get_dict_index( dict_alphabet, pred_entities[idx].rule_id)] = 1 Xs.append(X) return Xs, Ys
def error_analysis(d, dictionary, dictionary_reverse, opt, isMeddra_dict): logging.info("error_analysis ...") test_data = loadData(opt.test_file, False, opt.types, opt.type_filter) logging.info("use my tokenizer") nlp_tool = None logging.info("use neural-based normer") if opt.test_in_cpu: neural_model = torch.load(os.path.join(opt.output, 'norm_neural.pkl'), map_location='cpu') else: neural_model = torch.load(os.path.join(opt.output, 'norm_neural.pkl')) neural_model.eval() ct_predicted = 0 ct_gold = 0 ct_correct = 0 for document in test_data: logging.info("###### begin {}".format(document.name)) # copy entities from gold entities pred_entities = [] for gold in document.entities: pred = Entity() pred.id = gold.id pred.type = gold.type pred.spans = gold.spans pred.name = gold.name pred_entities.append(pred) neural_model.process_one_doc(document, pred_entities, dictionary, dictionary_reverse, isMeddra_dict) ct_norm_gold = len(document.entities) ct_norm_predict = len(pred_entities) ct_norm_correct = 0 for predict_entity in pred_entities: for gold_entity in document.entities: if predict_entity.equals_span(gold_entity): b_right = False if len(gold_entity.norm_ids) == 0: # if gold_entity not annotated, we count it as TP b_right = True ct_norm_correct += 1 else: if len(predict_entity.norm_ids) != 0 and predict_entity.norm_ids[0] in dictionary: concept = dictionary[predict_entity.norm_ids[0]] if gold_entity.norm_ids[0] in concept.codes: ct_norm_correct += 1 b_right = True if b_right == False: if len(predict_entity.norm_ids) != 0 and predict_entity.norm_ids[0] in dictionary: concept = dictionary[predict_entity.norm_ids[0]] logging.info("entity name: {} | gold id, name: {}, {} | pred cui, codes, names: {}, {}, {}" .format(predict_entity.name, gold_entity.norm_ids[0], gold_entity.norm_names[0], concept.cui, concept.codes, concept.names)) break ct_predicted += ct_norm_predict ct_gold += ct_norm_gold ct_correct += ct_norm_correct if ct_gold == 0: precision = 0 recall = 0 else: precision = ct_correct * 1.0 / ct_predicted recall = ct_correct * 1.0 / ct_gold if precision+recall == 0: f_measure = 0 else: f_measure = 2*precision*recall/(precision+recall) logging.info("Dev: p: %.4f, r: %.4f, f: %.4f" % (precision, recall, f_measure))