def clean_ner_result(result_file): ord_mention_list = list() med_mention_list = list() fin = open(result_file, 'rb') for line in fin: line = line.strip() if len(line) == 0: continue vals = line.strip().split('\t') # TODO if vals[3] == 'Disease' or vals[3] == 'Chemical': span = (int(vals[0]), int(vals[1]) - 1) else: span = (int(vals[0]), int(vals[1])) mention = Mention() mention.span = span mention.mtype = vals[3] if len(vals) == 4: ord_mention_list.append(mention) else: if vals[4].startswith('MESH'): mention.mesh_id = vals[4][5:] elif vals[4].startswith('CHEBI'): mention.chebi_id = int(vals[4][6:]) med_mention_list.append(mention) fin.close() merged_mention_list = list() Mention.merge_mention_list(med_mention_list, merged_mention_list) Mention.merge_mention_list(ord_mention_list, merged_mention_list) return merged_mention_list
def __find_mesh_mentions(self, text): mesh_spans, mesh_ids = self.mesh_match.find_all_terms(text) mention_list = list() for mesh_span, mesh_id in izip(mesh_spans, mesh_ids): mention = Mention() mention.span = mesh_span mention.mtype = 'MISC' mention.mesh_id = mesh_id mention_list.append(mention) return mention_list