コード例 #1
0
def clean_ner_result(result_file):
    ord_mention_list = list()
    med_mention_list = list()

    fin = open(result_file, 'rb')
    for line in fin:
        line = line.strip()
        if len(line) == 0:
            continue

        vals = line.strip().split('\t')
        # TODO
        if vals[3] == 'Disease' or vals[3] == 'Chemical':
            span = (int(vals[0]), int(vals[1]) - 1)
        else:
            span = (int(vals[0]), int(vals[1]))
        mention = Mention()
        mention.span = span
        mention.mtype = vals[3]
        if len(vals) == 4:
            ord_mention_list.append(mention)
        else:
            if vals[4].startswith('MESH'):
                mention.mesh_id = vals[4][5:]
            elif vals[4].startswith('CHEBI'):
                mention.chebi_id = int(vals[4][6:])
            med_mention_list.append(mention)
    fin.close()

    merged_mention_list = list()
    Mention.merge_mention_list(med_mention_list, merged_mention_list)
    Mention.merge_mention_list(ord_mention_list, merged_mention_list)

    return merged_mention_list
コード例 #2
0
ファイル: medlink.py プロジェクト: s0sbazinga/demo
    def link_text(self, text, mention_detection_result):
        result_dict = dict()

        mesh_mention_list = self.__find_mesh_mentions(text)
        merged_mention_list = list()
        Mention.merge_mention_list(mention_detection_result,
                                   merged_mention_list)
        Mention.merge_mention_list(mesh_mention_list, merged_mention_list)

        self.__link_mention_to_wiki(text, merged_mention_list)

        mesh_idx_dict, wiki_idx_dict, chebi_idx_dict, idx_list = MedLink.__asign_indices(
            merged_mention_list)

        result_dict['entities'] = entities_dict = dict()
        self.__add_wiki_mention_info(wiki_idx_dict, entities_dict)
        self.__add_mesh_mention_info(mesh_idx_dict, entities_dict)
        self.__add_chebi_mention_info(chebi_idx_dict, entities_dict)

        result_span_list = list()
        mention_type_list = list()
        for mention in merged_mention_list:
            result_span_list.append(mention.span)
            mention_type_list.append(mention.mtype)
        result_dict['spans'] = result_span_list
        result_dict['idx'] = idx_list
        result_dict['type'] = mention_type_list

        return json.dumps(result_dict, indent=2)
コード例 #3
0
ファイル: medlink.py プロジェクト: XYHHH/ks-studio-el
    def link_mentions_info(self, text, mention_detection_result, find_mesh_mentions_by_dict=False):
        merged_mention_list = list()
        if find_mesh_mentions_by_dict:
            mesh_mention_list = self.__find_mesh_mentions(text)
            Mention.merge_mention_list(mention_detection_result, merged_mention_list)
            Mention.merge_mention_list(mesh_mention_list, merged_mention_list)

        linked_mentions = self.link_mentions(merged_mention_list, text)

        if find_mesh_mentions_by_dict:
            for mention in merged_mention_list:
                if mention.mesh_id or mention.chebi_id > -1:
                    for mention1 in merged_mention_list:
                        if mention.name.lower() == mention1.name.lower():
                            mention1.mesh_id = mention.mesh_id
                            mention1.chebi_id = mention.chebi_id

        mesh_idx_dict, wiki_idx_dict, chebi_idx_dict, idx_list = MedLink.__asign_indices(linked_mentions)
        # print wiki_idx_dict

        result_dict = dict()
        result_dict['entities'] = entities_dict = dict()
        self.__add_wiki_mention_info(wiki_idx_dict, entities_dict)
        self.__add_mesh_mention_info(mesh_idx_dict, entities_dict)
        self.__add_chebi_mention_info(chebi_idx_dict, entities_dict)

        result_span_list = list()
        mention_type_list = list()
        for mention in linked_mentions:
            result_span_list.append(mention.span)
            mention_type_list.append(mention.mtype)

        self.__fix_types(mesh_idx_dict, idx_list, mention_type_list)

        result_dict['spans'] = result_span_list
        result_dict['idx'] = idx_list
        result_dict['type'] = mention_type_list

        return json.dumps(result_dict, indent=2)