Beispiel #1
0
    def query_all_kb(self, kb: KnowledgeBase):
        """
        Iterate through KB entities, query synonyms and definition, write to file.
        :param kb:
        :return:
        """
        for ent in tqdm.tqdm(kb.entities, total=len(kb.entities)):
            mesh_syn, dbp_syn = self.syn_enricher.get_synonyms_to_entity(
                ent.aliases)
            wiki_ents, definition = self.wiki_enricher.get_definition_to_entity(
                ent.canonical_name)
            ent.additional_details['mesh_synonyms'] = mesh_syn
            ent.additional_details['dbpedia_synonyms'] = dbp_syn
            ent.additional_details['wiki_entities'] = wiki_ents
            if len(ent.definition) < 5:
                ent.definition = definition

        kb.dump(kb, self.out_path)
        return
    def create_umls_kbs(self, entities):
        """
        From entity list, create several KnowledgeBase objects with entities from different KBs
        :param entities: dict of entities
        :return:
        """
        for kb_name in constants.TRAINING_KBS:
            sys.stdout.write("\tCreating KB %s\n" % kb_name)
            kb = KnowledgeBase()
            kb.name = kb_name

            entities_to_add = entities[kb_name]

            for ent_id, ent_val in entities_to_add.items():
                new_ent = KBEntity(ent_val['research_entity_id'],
                                   ent_val['canonical_name'],
                                   ent_val['aliases'],
                                   ' '.join(ent_val['definition']))
                for ent1_id, ent2_id, rel_type, symmetric in ent_val[
                        'relations']:
                    rel_id1 = '{}:{}'.format(ent1_id[0], ent1_id[1])
                    rel_id2 = '{}:{}'.format(ent2_id[0], ent2_id[1])
                    new_rel = KBRelation(rel_type, [rel_id1, rel_id2],
                                         symmetric)
                    kb.add_relation(new_rel)
                    rel_ind = len(kb.relations) - 1
                    new_ent.relation_ids.append(rel_ind)
                kb.add_entity(new_ent)

            # write plain KB to json
            out_fname = 'kb-{}.json'.format(kb_name)
            kb.dump(kb, os.path.join(self.OUTPUT_KB_DIR, out_fname))

            # add context to kb and write to file
            self.add_context_to_kb(kb)
        return