Esempio n. 1
0
def expand_disont_disease(orangeboard, node):
    disont_id = node.name
    child_disease_ids_dict = QueryDisont.query_disont_to_child_disonts_desc(
        disont_id)
    for child_disease_id in child_disease_ids_dict.keys():
        target_node = orangeboard.add_node(
            'disont_disease',
            child_disease_id,
            desc=child_disease_ids_dict[child_disease_id])
        orangeboard.add_rel('is_parent_of', 'DiseaseOntology', node,
                            target_node)
    mesh_ids_set = QueryDisont.query_disont_to_mesh_id(disont_id)
    for mesh_id in mesh_ids_set:
        uniprot_ids_dict = QueryDisGeNet.query_mesh_id_to_uniprot_ids_desc(
            mesh_id)
        for uniprot_id in uniprot_ids_dict.keys():
            source_node = orangeboard.add_node(
                'uniprot_protein',
                uniprot_id,
                desc=uniprot_ids_dict[uniprot_id])
            orangeboard.add_rel('gene_assoc_with', 'DisGeNet', source_node,
                                node)
    ## query for phenotypes associated with this disease
    phenotype_id_dict = QueryBioLink.get_phenotypes_for_disease_desc(disont_id)
    for phenotype_id_str in phenotype_id_dict.keys():
        phenotype_node = orangeboard.add_node(
            'phenont_phenotype',
            phenotype_id_str,
            desc=phenotype_id_dict[phenotype_id_str])
        orangeboard.add_rel('phenotype_assoc_with', 'BioLink', phenotype_node,
                            node)
Esempio n. 2
0
    def expand_disease(self, node):
        assert node.nodetype == "disease"
        disease_name = node.name

        gene_ontology_dict = QuerySciGraph.get_gene_ontology_curie_ids_for_disease_curie_id(disease_name)
        for gene_ontology_curie_id_str, gene_ontology_term_dict in gene_ontology_dict.items():
            gene_ontology_type_str = gene_ontology_term_dict["ontology"].replace(" ", "_")
            target_node = self.add_node_smart(gene_ontology_type_str, gene_ontology_curie_id_str,
                                              desc=gene_ontology_term_dict["name"])
            if target_node is not None:
                predicate_str = gene_ontology_term_dict["predicate"].replace(" ", "_")
                self.orangeboard.add_rel("affects", "Monarch_SciGraph", node, target_node, extended_reltype=predicate_str)

        if "OMIM:" in disease_name:
            self.expand_genetic_condition(node)
            return

        if "MONDO:" in disease_name:
            self.expand_mondo_disease(node)
            return

        # if we get here, this is a Disease Ontology disease
        disont_id = disease_name

        child_disease_ids_dict = QueryDisont.query_disont_to_child_disonts_desc(disont_id)
        for child_disease_id in child_disease_ids_dict.keys():
            target_node = self.add_node_smart('disease', child_disease_id,
                                              desc=child_disease_ids_dict[child_disease_id])
            if target_node is not None:
                self.orangeboard.add_rel('subclass_of', 'DiseaseOntology',
                                         target_node, node, extended_reltype="subclass_of")

        mesh_ids_set = QueryDisont.query_disont_to_mesh_id(disont_id)
        for mesh_id in mesh_ids_set:
            uniprot_ids_dict = QueryDisGeNet.query_mesh_id_to_uniprot_ids_desc(mesh_id)
            for uniprot_id in uniprot_ids_dict.keys():
                assert '-' not in uniprot_id
                source_node = self.add_node_smart('protein', uniprot_id,
                                                  desc=uniprot_ids_dict[uniprot_id])
                if source_node is not None:
                    self.orangeboard.add_rel("gene_associated_with_condition", "DisGeNet", source_node,
                                             node, extended_reltype="gene_associated_with_condition")

        # query for phenotypes associated with this disease
        phenotype_id_dict = QueryBioLink.get_phenotypes_for_disease_desc(disont_id)
        for phenotype_id_str in phenotype_id_dict.keys():
            phenotype_node = self.add_node_smart("phenotypic_feature", phenotype_id_str,
                                                 desc=phenotype_id_dict[phenotype_id_str])
            if phenotype_node is not None:
                self.orangeboard.add_rel("has_phenotype", 'BioLink', node, phenotype_node, extended_reltype="has_phenotype")
Esempio n. 3
0
    def test_query_disont_to_child_disonts_desc(self):
        ret_dict = QD.query_disont_to_child_disonts_desc(
            "DOID:9352")  # type 2 diabetes mellitus
        known_dict = {
            'DOID:1837': 'diabetic ketoacidosis',
            'DOID:10182': 'diabetic peripheral angiopathy',
            'DOID:11712': 'lipoatrophic diabetes'
        }

        self.assertDictEqual(ret_dict, known_dict)
 def get_mesh_term_for_all(curie_id, description):
     """
     Takes a curie ID, detects the ontology from the curie id, and then finds the mesh term
     Params:
         curie_id - A string containing the curie id of the node. Formatted <source abbreviation>:<number> e.g. DOID:8398
         description - A string containing the English name for the node
     current functionality (+ means has it, - means does not have it)
         "Reactome" +
         "GO" - found gene conversion but no biological process conversion
         "UniProt" +
         "HP" - +
         "UBERON" +
         "CL" - not supposed to be here?
         "NCBIGene" +
         "DOID" +
         "OMIM" +
         "ChEMBL" +
     """
     if type(description) != str:
         description = str(description)
     curie_list = curie_id.split(':')
     names = None
     if QueryNCBIeUtils.is_mesh_term(description):
         return [description + '[MeSH Terms]']
     names = NormGoogleDistance.get_mesh_from_oxo(curie_id)
     if names is None:
         if curie_list[0].lower().startswith("react"):
             res = QueryNCBIeUtils.get_reactome_names(curie_list[1])
             if res is not None:
                 names = res.split('|')
         elif curie_list[0] == "GO":
             pass
         elif curie_list[0].startswith("UniProt"):
             res = QueryNCBIeUtils.get_uniprot_names(curie_list[1])
             if res is not None:
                 names = res.split('|')
         elif curie_list[0] == "HP":
             names = QueryNCBIeUtils.get_mesh_terms_for_hp_id(curie_id)
         elif curie_list[0] == "UBERON":
             if curie_id.endswith('PHENOTYPE'):
                 curie_id = curie_id[:-9]
             mesh_id = QueryEBIOLS.get_mesh_id_for_uberon_id(curie_id)
             names = []
             for entry in mesh_id:
                 if len(entry.split('.')) > 1:
                     uids=QueryNCBIeUtils.get_mesh_uids_for_mesh_tree(entry.split(':')[1])
                     for uid in uids:
                         try:
                             uid_num = int(uid.split(':')[1][1:]) + 68000000
                             names += QueryNCBIeUtils.get_mesh_terms_for_mesh_uid(uid_num)
                         except IndexError:
                             uid_num = int(uid)
                             names += QueryNCBIeUtils.get_mesh_terms_for_mesh_uid(uid_num)
                 else:
                     try:
                         uid = entry.split(':')[1]
                         uid_num = int(uid[1:]) + 68000000
                         names += QueryNCBIeUtils.get_mesh_terms_for_mesh_uid(uid_num)
                     except IndexError:
                         uid_num = int(entry)
                         names += QueryNCBIeUtils.get_mesh_terms_for_mesh_uid(uid_num)
             if len(names) == 0:
                 names = None
             else:
                 names[0] = names[0] + '[MeSH Terms]'
         elif curie_list[0] == "NCBIGene":
             gene_id = curie_id.split(':')[1]
             names = QueryNCBIeUtils.get_pubmed_from_ncbi_gene(gene_id)
         elif curie_list[0] == "DOID":
             mesh_id = QueryDisont.query_disont_to_mesh_id(curie_id)
             names = []
             for uid in mesh_id:
                 uid_num = int(uid[1:]) + 68000000
                 name = QueryNCBIeUtils.get_mesh_terms_for_mesh_uid(uid_num)
                 if name is not None:
                     names += name
             if len(names) == 0:
                 names = None
             else:
                 names[0] = names[0] + '[MeSH Terms]'
         elif curie_list[0] == "OMIM":
             names = QueryNCBIeUtils.get_mesh_terms_for_omim_id(curie_list[1])
         elif curie_list[0] == "ChEMBL":
             chembl_id = curie_id.replace(':', '').upper()
             mesh_id = QueryMyChem.get_mesh_id(chembl_id)
             if mesh_id is not None:
                 mesh_id = int(mesh_id[1:]) + 68000000
                 names = QueryNCBIeUtils.get_mesh_terms_for_mesh_uid(mesh_id)
     if names is not None:
         if type(names) == list:
             for name in names:
                 if name.endswith('[MeSH Terms]'):
                     return [name]
         return names
     return [description.replace(';', '|')]
Esempio n. 5
0
def seed_and_expand_kg_q2(num_expansions=3, seed_parts=None):

    drug_dis_df = pandas.read_csv('../../data/q2/q2-drugandcondition-list.txt',
                                  sep='\t')

    if seed_parts is None or 'conditions' in seed_parts:

        print('=====================> seeding disease nodes for Q2')
        first_row = True
        mesh_terms_set = set()
        mesh_term_to_curie_ids_dict = dict()
        curie_ids_for_df = []
        for index, row in drug_dis_df.iterrows():
            mesh_term = row['Condition']
            if mesh_term not in mesh_terms_set:
                mesh_term_to_curie_ids_dict[mesh_term] = None
                mesh_terms_set.add(mesh_term)
                curie_ids = get_curie_ont_ids_for_mesh_term(mesh_term)
                if len(curie_ids) > 0:
                    assert type(curie_ids) == list
                    for curie_id in curie_ids:
                        if 'DOID:' in curie_id:
                            disont_desc = QueryDisont.query_disont_to_label(
                                curie_id)
                            ob.add_node('disont_disease',
                                        curie_id,
                                        desc=disont_desc,
                                        seed_node_bool=first_row)
                            mesh_term_to_curie_ids_dict[mesh_term] = curie_id
                            first_row = False
                        else:
                            if 'HP:' in curie_id:
                                ob.add_node('phenont_phenotype',
                                            curie_id,
                                            desc=mesh_term,
                                            seed_node_bool=first_row)
                                mesh_term_to_curie_ids_dict[
                                    mesh_term] = curie_id
                                first_row = False
                            else:
                                assert False  ## should never get here
            curie_ids_for_df.append(mesh_term_to_curie_ids_dict[mesh_term])
        drug_dis_df['CURIE_ID'] = pandas.Series(curie_ids_for_df,
                                                index=drug_dis_df.index)
        drug_dis_df.to_csv(
            '../../data/q2/q2-drugandcondition-list-mapped-output.txt',
            sep='\t')
        ## triple-expand the knowledge graph
        for _ in range(0, num_expansions):
            bne.expand_all_nodes()

    if seed_parts is None or 'drugs' in seed_parts:
        print('=====================> seeding drug nodes for Q2')
        first_row = True
        all_drugs = set()

        for index, row in drug_dis_df.iterrows():
            drug_name = row['Drug'].lower()
            all_drugs.add(drug_name)

        fda_drug_df = pandas.read_csv('../../data/q2/drugset2017_filt.txt',
                                      sep='\t')

        for index, row in fda_drug_df.iterrows():
            drug_name = row['NAME'].lower()
            all_drugs.add(drug_name)

        for drug_name in all_drugs:
            print(drug_name)
            chembl_ids = QueryChEMBL.get_chembl_ids_for_drug(drug_name)
            if chembl_ids is not None and len(chembl_ids) > 0:
                chembl_id = next(iter(chembl_ids))
            else:
                chembl_id = ''
            ob.add_node('pharos_drug',
                        drug_name,
                        desc=chembl_id,
                        seed_node_bool=first_row)
            first_row = False

        ## triple-expand the knowledge graph
        for _ in range(0, num_expansions):
            bne.expand_all_nodes()
Esempio n. 6
0
 def setUpClass(cls):
     cls.disont = QueryDisont()
Esempio n. 7
0
 def test_query_disont_to_child_disonts(self):
     ret_set = QD.query_disont_to_child_disonts('DOID:9352')
     known_set = {11712, 1837, 10182}
     self.assertSetEqual(ret_set, known_set)
Esempio n. 8
0
 def test_query_disont_to_label(self):
     ret_label = QD.query_disont_to_label("DOID:0050741")
     self.assertEqual(ret_label, "alcohol dependence")