def clean_ner_result(result_file): ord_mention_list = list() med_mention_list = list() fin = open(result_file, 'rb') for line in fin: line = line.strip() if len(line) == 0: continue vals = line.strip().split('\t') # TODO if vals[3] == 'Disease' or vals[3] == 'Chemical': span = (int(vals[0]), int(vals[1]) - 1) else: span = (int(vals[0]), int(vals[1])) mention = Mention() mention.span = span mention.mtype = vals[3] if len(vals) == 4: ord_mention_list.append(mention) else: if vals[4].startswith('MESH'): mention.mesh_id = vals[4][5:] elif vals[4].startswith('CHEBI'): mention.chebi_id = int(vals[4][6:]) med_mention_list.append(mention) fin.close() merged_mention_list = list() Mention.merge_mention_list(med_mention_list, merged_mention_list) Mention.merge_mention_list(ord_mention_list, merged_mention_list) return merged_mention_list
def link_text(self, text, mention_detection_result): result_dict = dict() mesh_mention_list = self.__find_mesh_mentions(text) merged_mention_list = list() Mention.merge_mention_list(mention_detection_result, merged_mention_list) Mention.merge_mention_list(mesh_mention_list, merged_mention_list) self.__link_mention_to_wiki(text, merged_mention_list) mesh_idx_dict, wiki_idx_dict, chebi_idx_dict, idx_list = MedLink.__asign_indices( merged_mention_list) result_dict['entities'] = entities_dict = dict() self.__add_wiki_mention_info(wiki_idx_dict, entities_dict) self.__add_mesh_mention_info(mesh_idx_dict, entities_dict) self.__add_chebi_mention_info(chebi_idx_dict, entities_dict) result_span_list = list() mention_type_list = list() for mention in merged_mention_list: result_span_list.append(mention.span) mention_type_list.append(mention.mtype) result_dict['spans'] = result_span_list result_dict['idx'] = idx_list result_dict['type'] = mention_type_list return json.dumps(result_dict, indent=2)
def link_mentions_info(self, text, mention_detection_result, find_mesh_mentions_by_dict=False): merged_mention_list = list() if find_mesh_mentions_by_dict: mesh_mention_list = self.__find_mesh_mentions(text) Mention.merge_mention_list(mention_detection_result, merged_mention_list) Mention.merge_mention_list(mesh_mention_list, merged_mention_list) linked_mentions = self.link_mentions(merged_mention_list, text) if find_mesh_mentions_by_dict: for mention in merged_mention_list: if mention.mesh_id or mention.chebi_id > -1: for mention1 in merged_mention_list: if mention.name.lower() == mention1.name.lower(): mention1.mesh_id = mention.mesh_id mention1.chebi_id = mention.chebi_id mesh_idx_dict, wiki_idx_dict, chebi_idx_dict, idx_list = MedLink.__asign_indices(linked_mentions) # print wiki_idx_dict result_dict = dict() result_dict['entities'] = entities_dict = dict() self.__add_wiki_mention_info(wiki_idx_dict, entities_dict) self.__add_mesh_mention_info(mesh_idx_dict, entities_dict) self.__add_chebi_mention_info(chebi_idx_dict, entities_dict) result_span_list = list() mention_type_list = list() for mention in linked_mentions: result_span_list.append(mention.span) mention_type_list.append(mention.mtype) self.__fix_types(mesh_idx_dict, idx_list, mention_type_list) result_dict['spans'] = result_span_list result_dict['idx'] = idx_list result_dict['type'] = mention_type_list return json.dumps(result_dict, indent=2)