예제 #1
0
class ChemBioKS(Service):
    """ Generic service endpoints for medical and bio-chemical data. This set
        comprises portions of chem2bio2rdf (CTD, KEGG, PubChem, DRUGBANK) """
    def __init__(self, context):  #triplestore):
        super(ChemBioKS, self).__init__("chembio", context)
        self.triplestore = TripleStore(self.url)

    def query_chembio(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def get_exposure_conditions(self, chemicals):
        """ Identify conditions (MeSH IDs) triggered by the specified stressor
            agent ids (also MeSH IDs).

        :param chemicals: List of IDs for substances of interest.
        :type chemicals: list of MeSH IDs, eg. D052638
        """
        id_list = ' '.join(
            list(map(lambda d: "( mesh:{0} )".format(d), chemicals)))
        text = self.triplestore.get_template ("ctd_gene_expo_disease").\
            safe_substitute (chemicals=id_list)
        results = self.triplestore.execute_query(text)
        return list(
            map(
                lambda b: {
                    "chemical": b['chemical'].value,
                    "gene": b['gene'].value,
                    "pathway": b['kegg_pathway'].value,
                    "pathName": b['pathway_name'].value,
                    "pathID": b['pathway_id'].value,
                    "human": '(human)' in b['pathway_name'].value
                }, results.bindings))

    def get_drugs_by_condition(self, conditions):
        """ Get drugs associated with a set of conditions.

        :param conditions: Conditions to find associated drugs for.
        :type conditions: List of MeSH IDs for conditions, eg.: D001249
        """
        if not isinstance(conditions, list):
            conditions = [conditions]

        conditions = list(
            map(lambda v: v.replace("MESH:", "mesh:"), conditions))
        prefix = "mesh:"
        if any(map(lambda v: v.startswith(prefix), conditions)):
            prefix = ""
        condition_list = ', '.join(
            list(map(lambda d: " {0}{1} ".format(prefix, d), conditions)))
        result = self.triplestore.query_template(
            inputs={"diseaseIds": condition_list.lower()},
            outputs=[
                'drugID', 'drugGenericName', 'pubChemCID', 'diseasePMIDs'
            ],
            template_text="""
            prefix mesh:           <http://bio2rdf.org/mesh:> 
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
            prefix db_resource:    <http://chem2bio2rdf.org/drugbank/resource/>
            select ?drugID ?drugGenericName ?diseasePMIDs ?ctdChemDis ?pubChemCID where {
               values ( ?diseaseId ) { ( $diseaseIds ) }
               ?ctdChemDis  ctd:cid                        ?pubChemCID;
                            ctd:diseaseid                  ?diseaseId;
                            ctd:pubmedids                  ?diseasePMIDs.
               ?dbInter     db_resource:Name               ?name ;
	                    db_resource:DBID               ?drugID .
               ?drugID      db_resource:CID                ?pubChemCID ;
  	                    db_resource:Generic_Name       ?drugGenericName .
            }""")
        return result

    def get_drugs_by_condition_graph(self, conditions):
        drugs = self.get_drugs_by_condition(conditions.identifier)
        results = []
        for r in drugs:
            edge = KEdge('c2b2r', 'conditionToDrug', {
                'cid': r['pubChemCID'],
                'pmids': r['diseasePMIDs']
            })
            node = KNode(
                r['drugID'].split('/')[-1:][0],
                #"http://chem2bio2rdf.org/drugbank/resource/drugbank_drug",
                node_types.DRUG,
                r['drugGenericName'])
            results.append((edge, node))
        #logger.debug ("chembio drugs by condition: {}".format (results))
        return results

    def get_genes_pathways_by_disease(self, diseases):
        """ Get genes and pathways associated with specified conditions.

        :param diseases: List of conditions designated by MeSH ID.
        :return: Returns a list of dicts containing gene and path information.
        """
        diseaseMeshIDList = ' '.join(
            list(map(lambda d: "( mesh:{0} )".format(d), diseases)))
        text = self.triplestore.get_template(
            "genes_pathways_by_disease").safe_substitute(
                diseaseMeshIDList=diseaseMeshIDList)
        results = self.triplestore.execute_query(text)
        return list(
            map(
                lambda b: {
                    "uniprotGene": b['uniprotGeneID'].value,
                    "keggPath": b['keggPath'].value,
                    "pathName": b['pathwayName'].value,
                    "human": '(human)' in b['pathwayName'].value
                }, results.bindings))

    def get_drug_gene_disease(self, disease_name, drug_name):
        """ Identify targets and diseases assocaited with a drug name.
        :param disease_name: MeSH name of a disease condition.
        :type str: String
        :param drug_name: Name of a drug.
        :type str: String
        """
        text = self.triplestore.get_template(
            "drug_gene_disease").safe_substitute(diseaseName=disease_name,
                                                 drugName=drug_name)
        results = self.triplestore.execute_query(text)
        return list(
            map(
                lambda b: {
                    "uniprotSymbol": b['uniprotSym'].value,
                    "diseaseId": b['diseaseID'].value
                }, results.bindings))

    def pubchem_to_ncbigene(self, pubchemID):
        result = self.triplestore.query_template(
            inputs={"pubchemID": "pubchem:{}".format(pubchemID)},
            outputs=[
                'NCBIGene', 'meshID', 'interaction', 'interactionTypes',
                'pubmedids'
            ],
            template_text="""
            prefix pubchem:        <http://chem2bio2rdf.org/pubchem/resource/pubchem_compound/>
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
	    select distinct ?NCBIGene ?meshID ?interaction ?interactionTypes ?pubmedids where {
  		?ctdChemGene 	ctd:cid                     $pubchemID;
               			ctd:chemicalid              ?meshID ;
                                ctd:geneid                  ?NCBIGene;
                                ctd:interaction             ?interaction;
                                ctd:interactiontypes        ?interactionTypes;
                                ctd:pubmedids               ?pubmedids.
            }""")
        return list(
            map(
                lambda r: {
                    'NCBIGene': r['NCBIGene'],
                    'meshID': r['meshID'],
                    'interaction': r['interaction'],
                    'interactionTypes': r['interactionTypes'],
                    'pubmedids': r['pubmedids']
                }, result))

    def drug_name_to_gene_symbol(self, drug_name):
        result = self.triplestore.query_template(
            inputs={"drugName": drug_name},
            outputs=['uniprotSym', 'pmids', 'drugID'],
            template_text="""
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
            prefix db_resource:    <http://chem2bio2rdf.org/drugbank/resource/>
            select ?drugGenericName ?pmids ?drugID ?uniprotSym where {
               values ( ?drugName ) { ( "$drugName" ) }
               ?ctdChemGene ctd:cid                        ?pubChemCID;
                            ctd:pubmedids                  ?pmids;
                            ctd:gene                       ?uniprotSym .
               ?drugID      db_resource:CID                ?pubChemCID ;
  	                    db_resource:Generic_Name       ?drugGenericName .
               filter regex(lcase(str(?drugGenericName)), lcase(?drugName))
            }""")
        return list(
            map(
                lambda r: {
                    'uniprotSym': r['uniprotSym'],
                    'pmids': r.get('pmids', None),
                    'drugID': r['drugID']
                }, result))

    def drugname_to_pubchem(self, drug_name):
        result = self.triplestore.query_template(
            inputs={"drugName": drug_name},
            outputs=['pubChemID', 'drugGenericName'],
            template_text="""
            prefix db_resource:    <http://chem2bio2rdf.org/drugbank/resource/>
            select distinct ?pubChemID ?drugGenericName where {
               values ( ?drugName ) { ( "$drugName" ) }
               ?drugID      db_resource:CID                ?pubChemID ;
  	                    db_resource:Generic_Name       ?drugGenericName .
               filter regex(lcase(str(?drugGenericName)), lcase(?drugName))
            }""")
        return list(
            map(
                lambda r: {
                    'drugID': r['pubChemID'],
                    'drugName': r['drugGenericName']
                }, result))

    def gene_symbol_to_pathway(self, uniprot_symbol):
        return self.triplestore.query_template(
            inputs={"uniprotSymbol": uniprot_symbol},
            outputs=["keggPath"],
            template_text="""
            prefix kegg:           <http://chem2bio2rdf.org/kegg/resource/>
            prefix pharmgkb:       <http://chem2bio2rdf.org/pharmgkb/resource/>
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
            select ?ctdGene ?uniprotID ?pathwayName ?keggPath where {
               values ( ?ctdGene ) { ( <$uniprotSymbol> ) }
               ?keggPath    kegg:protein    ?uniprotID ; kegg:Pathway_name ?pathwayName .
               ?pharmGene   pharmgkb:Symbol ?ctdGene ; pharmgkb:UniProt_Id ?uniprotID.
               ?ctdChemGene ctd:gene        ?ctdGene.
            } LIMIT 500
            """)

    def uniprot_to_hgnc(self, uniprot_symbol):
        return self.triplestore.query_template(
            inputs={"uniprotID": Text.un_curie(uniprot_symbol.identifier)},
            outputs=["hgncID"],
            template_text="""
            prefix uniprot:    <http://chem2bio2rdf.org/uniprot/resource/gene/>
            prefix owl:        <http://www.w3.org/2002/07/owl#>
            prefix hgnc:       <http://chem2bio2rdf.org/rdf/resource/hgnc/>
            select distinct ?hgncID where {
               values ( ?uniprotID ) { ( uniprot:${uniprotID} ) }
               ?uniprotID <http://www.w3.org/2002/07/owl#sameAs> ?hgncID.
               filter ( strstarts (str(?hgncID), "http://bio2rdf.org/gene:"))
            }
            """)

    def graph_uniprot_to_hgnc(self, uniprot_symbol):
        result = self.uniprot_to_hgnc(uniprot_symbol)
        return [(self.get_edge(r, predicate='synonym'),
                 KNode('HGNC:{0}'.format(r['hgncID'].split(':')[-1]),
                       node_types.GENE)) for r in result]

    def graph_get_genes_by_disease(self, disease):  #reasoner
        disease = disease.identifier.split(':')[1].lower()
        response = self.get_genes_pathways_by_disease([disease])
        results = []
        for r in response:
            edge = KEdge('c2b2r', 'diseaseToGene', {'keggPath': r['keggPath']})
            node = KNode(
                "UNIPROT:{0}".format(r['uniprotGene'].split('/')[-1:][0]),
                node_types.GENE)
            results.append((edge, node))
        return results

    @cachier(stale_after=datetime.timedelta(days=20))
    def graph_get_pathways_by_gene(self, gene):  #reasoner
        response = self.triplestore.query_template(
            inputs={"gene": gene.identifier.split(':')[1].upper()},
            outputs=['keggPath'],
            template_text="""
            prefix kegg:      <http://chem2bio2rdf.org/kegg/resource/>
            prefix drugbank:  <http://chem2bio2rdf.org/drugbank/resource/>
            prefix uniprot:   <http://chem2bio2rdf.org/uniprot/resource/gene/>
            prefix ctd:       <http://chem2bio2rdf.org/ctd/resource/>
            prefix mesh:      <http://bio2rdf.org/mesh:>
            select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where {
               ?keggPath    kegg:protein                ?swissProtID ;
                            kegg:Pathway_name           ?pathwayName .
               ?keggInter   kegg:cid                    ?pubchemCID .
               ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                            drugbank:SwissProt_ID       ?swissProtID ;
                            drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID ;
                            drugbank:Generic_Name       ?drugGenericName .
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
               values ( ?uniprotGeneID ) {
                  ( uniprot:$gene )
               }
            } LIMIT 2000""")
        results = []
        for r in response:
            edge = KEdge('c2b2r', 'geneToPathway', {})
            node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]),
                         node_types.PATHWAY)
            results.append((edge, node))
        return results

    def graph_drugname_to_gene_symbol(self, drug_name_node):
        drug_name = Text.un_curie(drug_name_node.identifier)
        response = self.drug_name_to_gene_symbol(drug_name)
        results = []
        for r in response:
            edge = self.get_edge(r, predicate="targets")
            node = KNode("UNIPROT:{0}".format(Text.path_last(r['uniprotSym'])),
                         node_types.GENE)
            results.append((edge, node))
        return results

    def graph_name_to_drugbank(self, drug_name_node):
        drug_name = Text.un_curie(drug_name_node.identifier)
        response = self.drug_name_to_gene_symbol(drug_name)
        results = []
        for r in response:
            edge = self.get_edge(r, predicate="drugname")
            node = KNode ("DRUGBANK:{0}".format (Text.path_last (r['drugID'])), \
                          node_types.DRUG, \
                          label=r['drugName'])
            results.append((edge, node))
        return results

    def graph_get_pathways_by_gene(self, gene):  #reasoner
        response = self.triplestore.query_template(
            inputs={"gene": gene.identifier.split(':')[1].upper()},
            outputs=['keggPath'],
            template_text="""
            prefix kegg:      <http://chem2bio2rdf.org/kegg/resource/>
            prefix drugbank:  <http://chem2bio2rdf.org/drugbank/resource/>
            prefix uniprot:   <http://chem2bio2rdf.org/uniprot/resource/gene/>
            prefix ctd:       <http://chem2bio2rdf.org/ctd/resource/>
            prefix mesh:      <http://bio2rdf.org/mesh:>
            select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where {
               ?keggPath    kegg:protein                ?swissProtID ;
                            kegg:Pathway_name           ?pathwayName .
               ?keggInter   kegg:cid                    ?pubchemCID .
               ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                            drugbank:SwissProt_ID       ?swissProtID ;
                            drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID ;
                            drugbank:Generic_Name       ?drugGenericName .
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
               values ( ?uniprotGeneID ) {
                  ( uniprot:$gene )
               }
            } LIMIT 2000""")
        results = []
        for r in response:
            edge = KEdge('c2b2r', 'geneToPathway', {})
            node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]),
                         node_types.PATHWAY)
            results.append((edge, node))
        return results

    def graph_drugbank_to_uniprot(self, drugbank):
        response = self.triplestore.query_template(inputs={
            "drugID":
            "DB{0}".format(Text.un_curie(drugbank.identifier))
        },
                                                   outputs=["uniprotGeneID"],
                                                   template_text="""
            prefix drugbank:      <http://chem2bio2rdf.org/drugbank/resource/>
            prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/>
            prefix ctd:           <http://chem2bio2rdf.org/ctd/resource/>
            select distinct ?uniprotGeneID where {
               values ( ?drugID ) { ( drugbank_drug:${drugID} ) }
               ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                            drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID ;
                            drugbank:Generic_Name       ?drugGenericName .
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
            }""")
        return [
            (self.get_edge(r, predicate='targets'),
             KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]),
                   node_types.GENE)) for r in response
        ]

    def graph_diseasename_to_uniprot(self, disease):
        results = []
        response = self.triplestore.query_template(
            inputs={"diseaseName": Text.un_curie(disease.identifier)},
            outputs=["pubChemCID"],
            template_text="""
            prefix ctd: <http://chem2bio2rdf.org/ctd/resource/>
            select distinct ?pubChemCID where {
               values ( ?diseaseName ) { ( "$diseaseName" ) }
               ?ctdChemDis  ctd:cid         ?pubChemCID;
                            ctd:diseasename ?diseaseNameRec.
               filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName))
            } LIMIT 1""")
        if len(response) > 0:  # This is a disease.
            response = self.triplestore.query_template(
                inputs={"diseaseName": Text.un_curie(disease.identifier)},
                outputs=["disPmids", "chemPmids", "uniprotSym"],
                template_text="""
                prefix ctd: <http://chem2bio2rdf.org/ctd/resource/>
                select ?disPmids ?ctdChemDis ?chemPmids ?uniprotSym ?diseaseId where {
                  values ( ?diseaseName ) { ( "$diseaseName" ) }
                  ?ctdChemGene ctd:cid         ?pubChemCID;
                               ctd:pubmedids   ?chemPmids;
                               ctd:gene        ?uniprotSym.
                  ?ctdChemDis  ctd:cid         ?pubChemCID;
                               ctd:diseaseid   ?diseaseId;
                               ctd:diseasename ?diseaseNameRec;
                               ctd:pubmedids   ?disPmids.
                  filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName))
                } LIMIT 500""")
            for r in response:
                chemPmids = r['chemPmids']
                disPmids = r['disPmids']
                pmids = chemPmids + "|" + disPmids
                edge = self.get_edge(r, predicate='caused_by', pmids=pmids),
                node = KNode(
                    "UNIPROT:{0}".format(r['uniprotSym'].split('/')[-1:][0]),
                    node_types.GENE)
                results.append((edge, node))
        return results

    def graph_diseaseid_to_uniprot(self, drugbank):
        print(drugbank.identifier.lower())
        response = self.triplestore.query_template(
            inputs={"diseaseID": drugbank.identifier.lower()},
            outputs=["uniprotGeneID"],
            template_text="""
            prefix drugbank:      <http://chem2bio2rdf.org/drugbank/resource/>
            prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/>
            prefix ctd:           <http://chem2bio2rdf.org/ctd/resource/>
            prefix mesh.disease:          <http://bio2rdf.org/mesh:> 
            select distinct ?uniprotGeneID where {
               values ( ?diseaseID ) { ( $diseaseID ) }
               ?dbInter     drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID.
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
            }""")
        return [
            (self.get_edge(r, predicate='targets'),
             KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]),
                   node_types.GENE)) for r in response
        ]

    def graph_drugname_to_pubchem(self, drugname_node):
        drug_name = Text.un_curie(drugname_node.identifier)
        response = self.drugname_to_pubchem(drug_name)
        return [ (self.get_edge( r, predicate='drugname_to_pubchem'), \
                  KNode( "PUBCHEM:{}".format( r['drugID'].split('/')[-1]), node_types.DRUG, label=r['drugName'])) for r in response  ]

    #       'NCBIGene'   : r['NCBIGene'],
    #        'meshID'     : r['meshID'],
    #        'interaction': r['interaction'],
    #        'interactionTypes': r['interactionTypes']
    #        'pubmedids'  : r['pubmedids']
    def graph_pubchem_to_ncbigene(self, pubchem_node):
        #The compound mesh coming back from here is very out of date.  Ignore.
        pubchemid = Text.un_curie(pubchem_node.identifier)
        response = self.pubchem_to_ncbigene(pubchemid)
        retvals = []
        for r in response:
            props = {}
            props['interaction'] = r['interaction']
            props['interactionTypes'] = r['interactionTypes']
            props['publications'] = r['pubmedids'].split('|')
            retvals.append((self.get_edge(props,
                                          predicate='pubchem_to_ncbigene'),
                            KNode("NCBIGene:{}".format(r['NCBIGene']),
                                  node_types.GENE)))
        return retvals
class UberonGraphKS(Service):
    """A knowledge source created by 1) Combining cell ontology, uberon, and
    HPO, 2) Reasoning over the total graph to realize many implicit edges.
    Created by Jim Balhoff"""
    def __init__(self, context):  #triplestore):
        super(UberonGraphKS, self).__init__("uberongraph", context)
        self.triplestore = TripleStore(self.url)
        #TODO: Pull this from the biolink model?
        self.class_defs = {
            node_types.CELL: 'CL:0000000',
            node_types.ANATOMICAL_ENTITY: 'UBERON:0001062',
            node_types.BIOLOGICAL_PROCESS: 'GO:0008150',
            node_types.MOLECULAR_ACTIVITY: 'GO:0003674',
            node_types.CHEMICAL_SUBSTANCE: 'CHEBI:24431',
            node_types.DISEASE: 'MONDO:0000001',
            node_types.PHENOTYPIC_FEATURE: 'UPHENO:0001002'
        }

    def query_uberongraph(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def get_edges(self, source_type, obj_type):
        """Given an UBERON id, find other UBERONS that are parts of the query"""
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        prefix GO: <http://purl.obolibrary.org/obo/GO_>
        prefix CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
        prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_>
        prefix UPHENO: <http://purl.obolibrary.org/obo/UPHENO_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?p ?pLabel
        from <http://reasoner.renci.org/ontology>
        where {
            graph <http://reasoner.renci.org/redundant> {
                ?sourceID ?p ?objID .
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                ?sourceID rdfs:subClassOf $sourcedefclass .
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                ?objID rdfs:subClassOf $objdefclass .
                hint:Prior hint:runFirst true .
            }
            ?p rdfs:label ?pLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs  = { 'sourcedefclass': self.class_defs[source_type], 'objdefclass': self.class_defs[obj_type] }, \
            outputs = [ 'p', 'pLabel' ], \
            template_text = text \
        )
        return results

    def get_label(self, identifier):
        obo_id = Text.curie_to_obo(identifier)
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        select distinct ?label
        from <http://reasoner.renci.org/ontology>
        where {
            $obo_id rdfs:label ?label .
        }
        """
        results = self.triplestore.query_template(inputs={'obo_id': obo_id},
                                                  outputs=['label'],
                                                  template_text=text)
        if len(results) < 1:
            return ''
        return results[0]['label']

    def cell_get_cellname(self, cell_identifier):
        """ Identify label for a cell type
        :param cell: CL identifier for cell type 
        """
        text = """
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        select distinct ?cellLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://reasoner.renci.org/ontology>
        where {
                  $cellID rdfs:label ?cellLabel .
              }
        """
        results = self.triplestore.query_template(
            inputs = { 'cellID': cell_identifier }, \
            outputs = [ 'cellLabel' ], \
            template_text = text \
        )
        return results

    def get_anatomy_parts(self, anatomy_identifier):
        """Given an UBERON id, find other UBERONS that are parts of the query"""
        anatomy_identifier = f"<{anatomy_identifier}>"
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?part ?partlabel
        from <http://reasoner.renci.org/nonredundant> 
        from <http://reasoner.renci.org/ontology>
        where {
                $anatomy_id BFO:0000051 ?part .
                graph <http://reasoner.renci.org/ontology/closure> {
                  ?part rdfs:subClassOf UBERON:0001062 .
                }
                ?part rdfs:label ?partlabel .
        }
        """
        results = self.triplestore.query_template(
            inputs  = { 'anatomy_id': anatomy_identifier }, \
            outputs = [ 'part', 'partlabel' ], \
            template_text = text \
        )
        for result in results:
            result['curie'] = Text.obo_to_curie(result['part'])
        return results

    def get_neighbor(self, input_id, output_type, subject=True):
        parents = {
            node_types.ANATOMICAL_ENTITY:
            "<http://purl.obolibrary.org/obo/UBERON_0001062>",
            node_types.DISEASE:
            "<http://purl.obolibrary.org/obo/MONDO_0000001>",
            node_types.MOLECULAR_ACTIVITY:
            "<http://purl.obolibrary.org/obo/GO_0003674>",
            node_types.BIOLOGICAL_PROCESS:
            "<http://purl.obolibrary.org/obo/GO_0008150>",
            node_types.CHEMICAL_SUBSTANCE:
            "<http://purl.obolibrary.org/obo/CHEBI_24431>",
            node_types.PHENOTYPIC_FEATURE:
            "<http://purl.obolibrary.org/obo/HP_0000118>"
        }
        pref = Text.get_curie(input_id)
        obo_prefix = f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>'
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        """ + obo_prefix + """
        select distinct ?output_id ?output_label ?p ?pLabel 
        from <http://reasoner.renci.org/nonredundant>
        from <http://reasoner.renci.org/ontology>
        where {
            graph <http://reasoner.renci.org/nonredundant> {
        """
        if subject:
            text += '	 $input_id ?p ?output_id .'
        else:
            text += '  ?output_id ?p $input_id .'
        text += """
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                ?output_id rdfs:subClassOf $parent .
            }
            ?output_id rdfs:label ?output_label .
  			?p rdfs:label ?pLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs={
                'input_id': input_id,
                'parent': parents[output_type]
            },
            outputs=['output_id', 'output_label', 'p', 'pLabel'],
            template_text=text)
        return results

    def anatomy_to_anatomy(self, identifier):
        results = {'subject': [], 'object': []}
        for direction,query in \
            (('subject','      ?input_id ?p ?output_id .'),
             ('object','       ?output_id ?p ?input_id .')):
            text=""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            select distinct ?output_id ?output_label ?p ?pLabel 
            from <http://reasoner.renci.org/nonredundant>
            from <http://reasoner.renci.org/ontology>
            where {
                graph <http://reasoner.renci.org/redundant> {
            """ + query + \
            """
                }
                graph <http://reasoner.renci.org/ontology/closure> {
                    ?output_id rdfs:subClassOf UBERON:0001062 . 
                }
                ?output_id rdfs:label ?output_label .
                ?p rdfs:label ?pLabel .
            }
            """
            results[direction] += self.triplestore.query_template(
                inputs={'input_id': identifier},
                outputs=['output_id', 'output_label', 'p', 'pLabel'],
                template_text=text)
        return results

    def anatomy_to_go(self, anatomy_identifier):
        """ Identify process and functions related to anatomical terms (anatomy, cell, components).

        """
        #This is a bit messy, but we need to do 4 things.  We are looking for go terms
        # that are either biological processes or activities and we are looking for predicates
        # that point either direction.
        results = {'subject': [], 'object': []}
        for goParent in ('GO:0008150', 'GO:0003674'):
            for direction, query in (('subject', '      $anatID ?p ?goID'),
                                     ('object', '        ?goID ?p $anatID')):
                text = """
                prefix GO: <http://purl.obolibrary.org/obo/GO_>
                select distinct ?goID ?goLabel ?p ?pLabel
                from <http://reasoner.renci.org/nonredundant>
                from <http://reasoner.renci.org/ontology>
                where {
                    graph <http://reasoner.renci.org/redundant> {
                """ + query + """
                    }
                    graph <http://reasoner.renci.org/ontology/closure> {
                        ?goID rdfs:subClassOf $goParent .
                    }
                    ?goID rdfs:label ?goLabel .
                    ?p rdfs:label ?pLabel
                }
                """
                results[direction] += self.triplestore.query_template(
                    inputs = { 'anatID': anatomy_identifier, 'goParent': goParent }, \
                    outputs = [ 'goID', 'goLabel', 'p', 'pLabel' ], \
                    template_text = text \
                )
        return results

    def go_to_anatomy(self, input_identifier):
        """ Identify anatomy terms related to process/functions.

        :param input_identifier: identifier for anatomy (including cell and cellular component)
        """
        # we are looking for predicates that point either direction.
        results = {'subject': [], 'object': []}
        for direction, query in (('subject', '      ?anatID ?p $goID'),
                                 ('object', '        $goID ?p ?anatID')):
            text = """
            prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            prefix GO: <http://purl.obolibrary.org/obo/GO_>
            select distinct ?anatID ?anatLabel ?p ?pLabel
            from <http://reasoner.renci.org/nonredundant>
            from <http://reasoner.renci.org/ontology>
            where {
                graph <http://reasoner.renci.org/redundant> {
            """ + query + """
                }
                graph <http://reasoner.renci.org/ontology/closure> {
                    ?anatID rdfs:subClassOf UBERON:0001062 .
                }
                ?anatID rdfs:label ?anatLabel .
                ?p rdfs:label ?pLabel
            }
            """
            results[direction] += self.triplestore.query_template(
                inputs={'goID': input_identifier},
                outputs=['anatID', 'anatLabel', 'p', 'pLabel'],
                template_text=text)
        return results

    def pheno_or_disease_to_go(self, identifier):
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        prefix GO: <http://purl.obolibrary.org/obo/GO_>
        prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_>
        prefix HP: <http://purl.obolibrary.org/obo/MONDO_>
        select distinct ?goID ?goLabel ?p ?pLabel 
        from <http://reasoner.renci.org/nonredundant>
        from <http://reasoner.renci.org/ontology>
        where {
            graph <http://reasoner.renci.org/redundant> {
    			$input_id ?p ?goID .
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                { ?goID rdfs:subClassOf GO:0008150 . }
                UNION
                { ?goID rdfs:subClassOf GO:0003674 . }
            }
            ?goID rdfs:label ?goLabel .
  			?p rdfs:label ?pLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs={'input_id': identifier},
            outputs=['goID', 'goLabel', 'p', 'pLabel'],
            template_text=text)
        return results

    def phenotype_to_anatomy(self, hp_identifier):
        """ Identify anatomies related to phenotypes.

        :param cell: HP identifier for phenotype
        """

        #The subclassof uberon:0001062 ensures that the result
        #is an anatomical entity.
        #We don't need to do the subject/object game because there's nothing in ubergraph
        # that goes that direction
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            SELECT DISTINCT ?anatomy_id ?anatomy_label ?predicate ?predicate_label             
            FROM <http://reasoner.renci.org/ontology>
            WHERE {
                graph <http://reasoner.renci.org/redundant>{
                    $HPID ?predicate ?anatomy_id.
                }                
                graph <http://reasoner.renci.org/ontology/closure>{
                    ?anatomy_id rdfs:subClassOf UBERON:0001062.
                }
                ?anatomy_id rdfs:label ?anatomy_label .
                OPTIONAL {?predicate rdfs:label ?predicate_label.}
            }
        """
        results = self.triplestore.query_template(
            inputs = { 'HPID': hp_identifier }, \
            outputs = [ 'anatomy_id', 'anatomy_label', 'predicate', 'predicate_label'],\
            template_text = text \
        )
        return results

    def anatomy_to_phenotype(self, uberon_id):
        #sparql very identical to phenotype_to_anatomy. could not find any anatomical
        # entity that is a subject of subclass of HP:0000118, in ubergraph at this point.
        # treating this as another version of pheno -> anatomical_entity but when
        # anatomical_entity is known an
        # we want to go back to  a phenotype.
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX HP:<http://purl.obolibrary.org/obo/HP_>
            SELECT DISTINCT ?pheno_id ?pheno_label ?predicate ?predicate_label 
            FROM <http://reasoner.renci.org/ontology>
            WHERE {
                graph <http://reasoner.renci.org/redundant> {
                    ?pheno_id ?predicate $UBERONID.
                }                
                graph <http://reasoner.renci.org/ontology/closure>{
                    ?pheno_id rdfs:subClassOf HP:0000118.
                }
                ?pheno_id rdfs:label ?pheno_label.
                OPTIONAL {?predicate rdfs:label ?predicate_label.}
            }"""
        results = self.triplestore.query_template(
            inputs = { 'UBERONID': uberon_id }, \
            outputs = [ 'pheno_id', 'pheno_label', 'predicate', 'predicate_label' ],\
            template_text = text \
        )
        return results

    def biological_process_or_activity_to_chemical(self, go_id):
        """
        Given a chemical finds associated GO Molecular Activities.
        """
        results = []

        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX GO:  <http://purl.obolibrary.org/obo/GO_>
            PREFIX RO: <http://purl.obolibrary.org/obo/RO_>
            PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431>
            PREFIX chemical_class: <http://purl.obolibrary.org/obo/CHEBI_24431>
            SELECT DISTINCT ?chebi_id ?predicate ?label_predicate ?chebi_label
            from <http://reasoner.renci.org/ontology>
            from <http://reasoner.renci.org/nonredundant>
            where {
            $GO_ID ?predicate ?chebi_id. 
            ?chebi_id rdfs:label ?chebi_label.
            GRAPH <http://reasoner.renci.org/ontology/closure>
  	            { ?chebi_id rdfs:subClassOf chemical_class:.} 
            ?predicate rdfs:label ?label_predicate.
            FILTER ( datatype(?label_predicate) = xsd:string) 
            }
        """
        results = self.triplestore.query_template(template_text=text,
                                                  outputs=[
                                                      'chebi_id', 'predicate',
                                                      'label_predicate',
                                                      'chebi_label'
                                                  ],
                                                  inputs={'GO_ID': go_id})
        return results

    def pheno_to_biological_activity(self, pheno_id):
        """
        Finds biological activities related to a phenotype
        :param :pheno_id phenotype identifier
        """
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX GO: <http://purl.obolibrary.org/obo/GO_>
            PREFIX has_phenotype_affecting: <http://purl.obolibrary.org/obo/UPHENO_0000001>
            PREFIX RO: <http://purl.obolibrary.org/obo/RO_>
            prefix HP: <http://purl.obolibrary.org/obo/HP_>

            SELECT DISTINCT ?go_id ?predicate ?predicate_label ?go_label
            from <http://reasoner.renci.org/nonredundant>
            from <http://reasoner.renci.org/ontology>
            WHERE {
            $pheno_type ?predicate  ?go_id.
            ?go_id rdfs:label ?go_label.
            graph <http://reasoner.renci.org/ontology/closure> {
                { ?go_id rdfs:subClassOf GO:0008150 . }
                UNION
                { ?go_id rdfs:subClassOf GO:0003674 . }
            }
            ?predicate rdfs:label ?predicate_label.
            }
        """
        results = self.triplestore.query_template(
            template_text=text,
            inputs={'pheno_type': pheno_id},
            outputs=['go_id', 'predicate', 'predicate_label', 'go_label'])
        return results

    def disease_to_anatomy(self, disease_id):
        #THere are no anatomy-(predicate)->disease triples
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX anatomicalEntity: <http://purl.obolibrary.org/obo/UBERON_0001062>
            SELECT DISTINCT ?anatomyID ?predicate ?predicate_label ?anatomy_label
            FROM <http://reasoner.renci.org/nonredundant>
            FROM <http://reasoner.renci.org/ontology>
            WHERE {
            graph <http://reasoner.renci.org/redundant> {
                $diseaseID ?predicate ?anatomyID.
            }
            ?anatomyID rdfs:label ?anatomy_label.
            graph <http://reasoner.renci.org/ontology/closure> {
                ?anatomyID rdfs:subClassOf anatomicalEntity: .
            }
            ?predicate rdfs:label ?predicate_label.
            }
        """
        results = []
        results = self.triplestore.query_template(
            template_text=text,
            outputs=[
                'anatomyID', 'predicate', 'predicate_label', 'anatomy_label'
            ],
            inputs={'diseaseID': disease_id})
        return results

    def anatomy_to_chemical_substance(self, anatomy_id):
        #There's no chemical-(predicate)->anatomy
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431>
        SELECT DISTINCT ?predicate ?predicate_label ?chemical_entity ?chemical_label
        FROM <http://reasoner.renci.org/ontology>
        FROM <http://reasoner.renci.org/redundant>
        WHERE {
            $anatomy_id ?predicate ?chemical_entity.
            graph <http://reasoner.renci.org/ontology/closure> 
            {
                ?chemical_entity rdfs:subClassOf chemical_entity:.
            }
            ?predicate rdfs:label ?predicate_label .
            ?chemical_entity rdfs:label ?chemical_label.
        }
        """
        results = []
        results = self.triplestore.query_template(
            template_text=text,
            outputs=[
                'predicate', 'predicate_label', 'chemical_entity',
                'chemical_label'
            ],
            inputs={'anatomy_id': anatomy_id})
        return results

    def anatomy_to_disease(self, anatomy_id):
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX disease: <http://purl.obolibrary.org/obo/MONDO_0000001>
        SELECT DISTINCT  ?predicate ?predicate_label ?disease ?disease_label
        FROM <http://reasoner.renci.org/ontology>
        FROM <http://reasoner.renci.org/redundant>{
        ?disease ?predicate $anatomy_id.
        graph <http://reasoner.renci.org/ontology/closure> 
        {
            ?disease rdfs:subClassOf disease:.
        }
        ?predicate rdfs:label ?predicate_label .
        ?disease rdfs:label ?disease_label.
        }
        """
        results = []
        results = self.triplestore.query_template(
            template_text=text,
            outputs=[
                'predicate', 'predicate_label', 'disease', 'disease_label'
            ],
            inputs={'anatomy_id': anatomy_id})
        return results

    def create_phenotype_anatomy_edge(self, node_id, node_label, input_id,
                                      phenotype_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        anatomy_node = KNode(Text.obo_to_curie(node_id),
                             type=node_types.ANATOMICAL_ENTITY,
                             name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_anatomy_by_phenotype_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, anatomy_node

    def create_anatomy_phenotype_edge(self, node_id, node_label, input_id,
                                      anatomy_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        phenotype_node = KNode(Text.obo_to_curie(node_id),
                               type=node_types.PHENOTYPIC_FEATURE,
                               name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_phenotype_by_anatomy_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, phenotype_node

    def dep_get_anatomy_by_phenotype_graph(self, phenotype_node):
        results = []
        for curie in phenotype_node.get_synonyms_by_prefix('HP'):
            anatomies = self.phenotype_to_anatomy(curie)
            for r in anatomies:
                node = KNode(r['anatomy_id'],
                             type=node_types.ANATOMICAL_ENTITY,
                             name=r['anatomy_label'])
                # try to derive the label from the relation for the new ubergraph axioms
                predicate_label = r['predicate_label'] or '_'.join(
                    r['predicate'].split('#')[-1].split('.'))
                predicate = LabeledID(Text.obo_to_curie(r['predicate']),
                                      predicate_label)
                edge = self.create_edge(
                    phenotype_node, node,
                    'uberongraph.get_anatomy_by_phenotype_graph',
                    phenotype_node.id, predicate)
                # edge, node = self.create_phenotype_anatomy_edge(r['anatomy_id'],r['anatomy_label'],curie,phenotype_node)
                if phenotype_node.name is None:
                    phenotype_node.name = r['input_label']
                results.append((edge, node))
                #These tend to be very high level terms.  Let's also get their parts to
                #be more inclusive.
                #TODO: there ought to be a more principled way to take care of this, but
                #it highlights the uneasy relationship between the high level world of
                #smartapi and the low-level sparql-vision.
                part_results = self.get_anatomy_parts(r['anatomy_id'])
                for pr in part_results:
                    # pedge, pnode = self.create_phenotype_anatomy_edge(pr['part'],pr['partlabel'],curie,phenotype_node)
                    pnode = KNode(pr['part'],
                                  type=node_types.ANATOMICAL_ENTITY,
                                  name=pr['partlabel'])
                    pedge = self.create_edge(
                        phenotype_node, pnode,
                        'uberongraph.get_anatomy_by_phenotype_graph',
                        phenotype_node.id, predicate)
                    results.append((pedge, pnode))
        return results

    def get_out_by_in(self,
                      input_node,
                      output_type,
                      prefixes,
                      subject=True,
                      object=True):
        returnresults = []
        caller = f'uberongraph.{inspect.stack()[1][3]}'
        results = {'subject': [], 'object': []}
        curies = set()
        for pre in prefixes:
            curies.update(input_node.get_synonyms_by_prefix(pre))
        for curie in curies:
            results['subject'] += self.get_neighbor(curie,
                                                    output_type,
                                                    subject=True)
            results['object'] += self.get_neighbor(curie,
                                                   output_type,
                                                   subject=False)
        for direction in ['subject', 'object']:
            done = set()
            for r in results[direction]:
                key = (r['p'], r['output_id'])
                if key in done:
                    continue
                predicate_curie = Text.obo_to_curie(r['p'])
                prefix = Text.get_curie(predicate_curie)
                prefix = prefix if prefix == 'ubergraph-axioms.ofn' else prefix.upper(
                )
                upper_cased_predicate_curie = prefix + ":" + Text.un_curie(
                    predicate_curie)
                predicate = LabeledID(upper_cased_predicate_curie, r['pLabel'])
                output_node = KNode(r['output_id'],
                                    type=output_type,
                                    name=r['output_label'])
                if direction == 'subject':
                    edge = self.create_edge(input_node, output_node, caller,
                                            curie, predicate)
                else:
                    edge = self.create_edge(output_node, input_node, caller,
                                            curie, predicate)
                done.add(key)
                returnresults.append((edge, output_node))
        return returnresults

    #Don't get confused.  There is the direction of the statement (who is the subject
    # and who is the object) and which of them we are querying by.  We want to query
    # independent of direction i.e. let the input node be either the subject or the object.

    def get_anatomy_by_anatomy_graph(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.ANATOMICAL_ENTITY,
                                  ['UBERON', 'CL', 'GO'])

    def get_phenotype_by_anatomy_graph(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.PHENOTYPIC_FEATURE,
                                  ['UBERON', 'CL', 'GO'])

    def get_chemical_substance_by_anatomy(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['UBERON', 'CL', 'GO'])

    def get_process_by_anatomy(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.BIOLOGICAL_PROCESS,
                                  ['UBERON', 'CL', 'GO'])

    def get_activity_by_anatomy(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.MOLECULAR_ACTIVITY,
                                  ['UBERON', 'CL', 'GO'])

    def get_disease_by_anatomy_graph(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.DISEASE,
                                  ['UBERON', 'CL', 'GO'])

    def get_anatomy_by_process_or_activity(self, go_node):
        return self.get_out_by_in(go_node, node_types.ANATOMICAL_ENTITY,
                                  ['GO'])

    def get_chemical_entity_by_process_or_activity(self, go_node):
        return self.get_out_by_in(go_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['GO'])

    def get_process_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.BIOLOGICAL_PROCESS,
                                  ['MONDO'])

    def get_activity_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.MOLECULAR_ACTIVITY,
                                  ['MONDO'])

    def get_anatomy_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.ANATOMICAL_ENTITY,
                                  ['MONDO'])

    def get_chemical_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['MONDO'])

    def get_process_by_phenotype(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.BIOLOGICAL_PROCESS,
                                  ['HP'])

    def get_chemical_by_phenotype(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['HP'])

    def get_activity_by_phenotype(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.MOLECULAR_ACTIVITY,
                                  ['HP'])

    def get_anatomy_by_phenotype_graph(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.ANATOMICAL_ENTITY,
                                  ['HP'])

    def get_chemical_by_chemical(self, chem_node):
        return self.get_out_by_in(chem_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['CHEBI'])

    def disease_get_ancestors(self, disease_node):
        curie = disease_node.id
        prefix = Text.get_curie(curie)
        if "MONDO" != prefix:
            return []
        query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_>
        select distinct ?output_id ?label
        where {{
          graph <http://reasoner.renci.org/ontology/closure> {{
            $disease_id  rdfs:subClassOf ?output_id .
            ?output_id rdfs:subClassOf MONDO:0000001 .
          }}      
          
          graph <http://reasoner.renci.org/ontology>{{
          ?output_id rdfs:label ?label.
          }}
        }}
        """
        results = self.triplestore.query_template(
            template_text=query,
            inputs={'disease_id': curie},
            outputs=['output_id', 'label'])
        outputs = []
        for row in results:
            ancestor_node = KNode(
                row['output_id'],
                label=row['label'],
                type=node_types.DISEASE_OR_PHENOTYPIC_FEATURE)
            if ancestor_node.id == disease_node.id:
                # refrain from adding edge to the node itself
                continue
            predicate = LabeledID(identifier='rdfs:subClassOf',
                                  label='subclass of')
            edge = self.create_edge(
                source_node=disease_node,
                target_node=ancestor_node,
                predicate=predicate,
                provided_by='uberongraph.disease_get_ancestors',
                input_id=disease_node.id)
            outputs.append((edge, ancestor_node))
        return outputs
예제 #3
0
class OmniCorp(Service):
    def __init__(self, context):  #triplestore):
        super(OmniCorp, self).__init__("omnicorp", context)
        self.triplestore = TripleStore(self.url)
        self.prefix_to_uri = {
            'UBERON': 'http://purl.obolibrary.org/obo/UBERON_',
            'BSPO': 'http://purl.obolibrary.org/obo/BSPO_',
            'PATO': 'http://purl.obolibrary.org/obo/PATO_',
            'GO': 'http://purl.obolibrary.org/obo/GO_',
            'MONDO': 'http://purl.obolibrary.org/obo/MONDO_',
            'HP': 'http://purl.obolibrary.org/obo/HP_',
            'ENVO:': 'http://purl.obolibrary.org/obo/ENVO_',
            'OBI': 'http://purl.obolibrary.org/obo/OBI_',
            'CL': 'http://purl.obolibrary.org/obo/CL_',
            'SO': 'http://purl.obolibrary.org/obo/SO_',
            'CHEBI': 'http://purl.obolibrary.org/obo/CHEBI_',
            'HGNC': 'http://identifiers.org/hgnc/HGNC:',
            'MESH': 'http://id.nlm.nih.gov/mesh/'
        }

    def get_omni_identifier(self, node):
        #Let's start with just the 'best' identifier
        identifier = node.id
        prefix = Text.get_curie(node.id)
        if prefix not in self.prefix_to_uri:
            logger.warn("What kinda tomfoolery is this?")
            logger.warn(f"{node.id} {node.type}")
            logger.warn(f"{node.synonyms}")
            return None
        oident = f'{self.prefix_to_uri[prefix]}{Text.un_curie(node.id)}'
        return oident

    def query_omnicorp(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def sparql_get_all_shared_pmids(self, identifier_list):
        text = """
        PREFIX dct: <http://purl.org/dc/terms/>
        SELECT DISTINCT ?pubmed ?term1 ?term2
        WHERE {
          hint:Query hint:analytic true .
          VALUES ?term1 $id_list_a
          VALUES ?term2 $id_list_b
          ?pubmed dct:references ?term1 .
          ?pubmed dct:references ?term2 .
          FILTER(STR(?term1) < STR(?term2))
        }
        """
        start = datetime.datetime.now()
        results = self.triplestore.query_template(
            inputs={
                'id_list_a': identifier_list,
                'id_list_b': identifier_list
            },
            outputs=['term1', 'term2', 'pubmed'],
            template_text=text,
            post=True)
        end = datetime.datetime.now()
        logger.debug(f'Completed in: {end-start}')
        return results

    def sparql_count_pmids(self, identifier):
        text = """
        PREFIX dct: <http://purl.org/dc/terms/>
        SELECT (COUNT(DISTINCT ?pubmed) as ?count) 
        WHERE {
          hint:Query hint:analytic true .
          ?pubmed dct:references <$identifier> .
        }
        """
        results = self.triplestore.query_template(
            inputs={'identifier': identifier},
            outputs=['count'],
            template_text=text,
        )
        return results

    def sparql_get_shared_pmids(self, identifier_a, identifier_b):
        text = """
        PREFIX dct: <http://purl.org/dc/terms/>
        SELECT DISTINCT ?pubmed
        WHERE {
          hint:Query hint:analytic true .
          ?pubmed dct:references <$id_a> .
          ?pubmed dct:references <$id_b> .
        }
        """
        results = self.triplestore.query_template(inputs={
            'id_a': identifier_a,
            'id_b': identifier_b
        },
                                                  outputs=['pubmed'],
                                                  template_text=text,
                                                  post=True)
        return results

    def get_all_shared_pmids(self, nodes):
        oiddict = {self.get_omni_identifier(n): n for n in nodes}
        oids = [
            f'<{x}>' for x in filter(lambda n: n is not None, oiddict.keys())
        ]
        oidsstring = '{ ' + ' '.join(oids) + '}'
        results = self.sparql_get_all_shared_pmids(oidsstring)
        pubmeds = defaultdict(list)
        for r in results:
            k = (oiddict[r['term1']], oiddict[r['term2']])
            pubmeds[k].append(f"PMID:{r['pubmed'].split('/')[-1]}")
        for i, node_i in enumerate(nodes):
            for node_j in nodes[:i]:
                k_ij = (node_i, node_j)
                k_ji = (node_j, node_i)
                if k_ij not in pubmeds and k_ji not in pubmeds:
                    pubmeds[k_ij] = []
        return pubmeds

    def call_with_retries(self, fnc, args):
        done = False
        ntries = 0
        maxtries = 100
        rest_time = 10  #seconds
        start = datetime.datetime.now()
        while not done and ntries < maxtries:
            try:
                result = fnc(*args)
                done = True
            except:
                logger.warn("OmniCorp error, retrying")
                time.sleep(rest_time)
                ntries += 1
        if not done:
            return None
        else:
            end = datetime.datetime.now()
            logger.debug(f'Total call ntries: {ntries}, time: {end-start}')
            return result

    def count_pmids(self, node):
        identifier = self.get_omni_identifier(node)
        if identifier is None:
            return 0
        res = self.call_with_retries(self.sparql_count_pmids, [identifier])
        if res is None:
            return None
        else:
            logger.debug(f"Returned {res[0]['count']}")
            return res[0]['count']

    def get_shared_pmids(self, node1, node2):
        id1 = self.get_omni_identifier(node1)
        id2 = self.get_omni_identifier(node2)
        if id1 is None or id2 is None:
            return []
        done = False
        ntries = 0
        pmids = self.call_with_retries(self.sparql_get_shared_pmids,
                                       [id1, id2])
        if pmids is None:
            logger.error("OmniCorp gave up")
            return None
        return [p['pubmed'] for p in pmids]
예제 #4
0
class UberonGraphKS(Service):
    """A knowledge source created by 1) Combining cell ontology, uberon, and
    HPO, 2) Reasoning over the total graph to realize many implicit edges.
    Created by Jim Balhoff"""
    def __init__(self, context):  #triplestore):
        super(UberonGraphKS, self).__init__("uberongraph", context)
        self.triplestore = TripleStore(self.url)

    def query_uberongraph(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def cell_get_cellname(self, cell_identifier):
        """ Identify label for a cell type
        :param cell: CL identifier for cell type 
        """
        text = """
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        select distinct ?cellLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                  $cellID rdfs:label ?cellLabel .
              }
        """
        results = self.triplestore.query_template(
            inputs = { 'cellID': cell_identifier }, \
            outputs = [ 'cellLabel' ], \
            template_text = text \
        )
        return results

    def get_anatomy_parts(self, anatomy_identifier):
        """Given an UBERON id, find other UBERONS that are parts of the query"""
        if anatomy_identifier.startswith('http'):
            anatomy_identifier = Text.obo_to_curie(anatomy_identifier)
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?part ?partlabel
        from <http://reasoner.renci.org/nonredundant> 
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                $anatomy_id BFO:0000051 ?part .
                graph <http://reasoner.renci.org/redundant> {
                  ?part rdfs:subClassOf UBERON:0001062 .
                }
                ?part rdfs:label ?partlabel .
        }
        """
        results = self.triplestore.query_template(
            inputs  = { 'anatomy_id': anatomy_identifier }, \
            outputs = [ 'part', 'partlabel' ], \
            template_text = text \
        )
        for result in results:
            result['curie'] = Text.obo_to_curie(result['part'])
        return results

    def anatomy_to_cell(self, anatomy_identifier):
        """ Identify anatomy terms related to cells.

        :param cell: CL identifier for cell type
        """
        text = """
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?cellID ?cellLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
            graph <http://reasoner.renci.org/redundant> {
                ?cellID rdfs:subClassOf CL:0000000 .
                ?cellID BFO:0000050 $anatomyID .
            }
            ?cellID rdfs:label ?cellLabel .
        }

        """
        results = self.triplestore.query_template(
            inputs = { 'anatomyID': anatomy_identifier }, \
            outputs = [ 'cellID', 'cellLabel' ], \
            template_text = text \
        )
        return results

    def cell_to_anatomy(self, cell_identifier):
        """ Identify anatomy terms related to cells.

        :param cell: CL identifier for cell type 
        """
        text = """
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        select distinct ?anatomyID ?anatomyLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
            graph <http://reasoner.renci.org/redundant> {
                ?anatomyID rdfs:subClassOf UBERON:0001062 .
                $cellID BFO:0000050 ?anatomyID .
            }
            ?anatomyID rdfs:label ?anatomyLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs = { 'cellID': cell_identifier }, \
            outputs = [ 'anatomyID', 'anatomyLabel' ], \
            template_text = text \
        )
        return results

    def phenotype_to_anatomy(self, hp_identifier):
        """ Identify anatomy terms related to cells.

        :param cell: HP identifier for phenotype
        """

        #The subclassof uberon:0001062 ensures that the result
        #is an anatomical entity.
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix HP: <http://purl.obolibrary.org/obo/HP_>
        prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050>
        prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051>
        prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502>
        prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001>
        select distinct ?anatomy_id ?anatomy_label ?input_label
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                  graph <http://reasoner.renci.org/redundant> {
                    ?anatomy_id rdfs:subClassOf UBERON:0001062 .
                  }
                  ?anatomy_id rdfs:label ?anatomy_label .
                  graph <http://reasoner.renci.org/nonredundant> {
                       ?phenotype phenotype_of: ?anatomy_id .
                  }
                  graph <http://reasoner.renci.org/redundant> {
                    $HPID rdfs:subClassOf ?phenotype .
                  }
                  $HPID rdfs:label ?input_label .
              }
        """
        results = self.triplestore.query_template(
            inputs = { 'HPID': hp_identifier }, \
            outputs = [ 'anatomy_id', 'anatomy_label', 'input_label'],\
            template_text = text \
        )
        return results

    def anatomy_to_phenotype(self, uberon_id):
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix HP: <http://purl.obolibrary.org/obo/HP_>
        prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050>
        prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051>
        prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502>
        prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001>
        select distinct ?pheno_id ?anatomy_label ?pheno_label
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                  $UBERONID rdfs:label ?anatomy_label .
                  graph <http://reasoner.renci.org/nonredundant> {
                       ?phenotype phenotype_of: $UBERONID .
                  }
                  graph <http://reasoner.renci.org/redundant> {
                    ?pheno_id rdfs:subClassOf ?phenotype .
                  }
                  ?pheno_id rdfs:label ?pheno_label .
              }
        """
        #The subclassof uberon:0001062 ensures that the result
        #is an anatomical entity.
        results = self.triplestore.query_template(
            inputs = { 'UBERONID': uberon_id }, \
            outputs = [ 'pheno_id', 'anatomy_label', 'pheno_label'],\
            template_text = text \
        )
        return results

    def get_anatomy_by_cell_graph(self, cell_node):
        anatomies = self.cell_to_anatomy(cell_node.id)
        results = []
        predicate = LabeledID(identifier='BFO:0000050', label='part_of')
        for r in anatomies:
            anatomy_node = KNode(Text.obo_to_curie(r['anatomyID']),
                                 type=node_types.ANATOMY,
                                 name=r['anatomyLabel'])
            edge = self.create_edge(cell_node, anatomy_node,
                                    'uberongraph.get_anatomy_by_cell_graph',
                                    cell_node.id, predicate)
            results.append((edge, anatomy_node))
        return results

    def get_cell_by_anatomy_graph(self, anatomy_node):
        cells = self.anatomy_to_cell(anatomy_node.id)
        results = []
        predicate = LabeledID(identifier='BFO:0000050', label='part_of')
        for r in cells:
            cell_node = KNode(Text.obo_to_curie(r['cellID']),
                              type=node_types.CELL,
                              name=r['cellLabel'])
            edge = self.create_edge(cell_node, anatomy_node,
                                    'uberongraph.get_cell_by_anatomy_graph',
                                    anatomy_node.id, predicate)
            results.append((edge, cell_node))
        return results

    def create_phenotype_anatomy_edge(self, node_id, node_label, input_id,
                                      phenotype_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        anatomy_node = KNode(Text.obo_to_curie(node_id),
                             type=node_types.ANATOMY,
                             name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_anatomy_by_phenotype_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, anatomy_node

    def create_anatomy_phenotype_edge(self, node_id, node_label, input_id,
                                      anatomy_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        phenotype_node = KNode(Text.obo_to_curie(node_id),
                               type=node_types.PHENOTYPE,
                               name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_phenotype_by_anatomy_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, phenotype_node

    def get_anatomy_by_phenotype_graph(self, phenotype_node):
        results = []
        for curie in phenotype_node.get_synonyms_by_prefix('HP'):
            anatomies = self.phenotype_to_anatomy(curie)
            for r in anatomies:
                edge, node = self.create_phenotype_anatomy_edge(
                    r['anatomy_id'], r['anatomy_label'], curie, phenotype_node)
                if phenotype_node.name is None:
                    phenotype_node.name = r['input_label']
                results.append((edge, node))
                #These tend to be very high level terms.  Let's also get their parts to
                #be more inclusive.
                #TODO: there ought to be a more principled way to take care of this, but
                #it highlights the uneasy relationship between the high level world of
                #smartapi and the low-level sparql-vision.
                part_results = self.get_anatomy_parts(r['anatomy_id'])
                for pr in part_results:
                    pedge, pnode = self.create_phenotype_anatomy_edge(
                        pr['part'], pr['partlabel'], curie, phenotype_node)
                    results.append((pedge, pnode))
        return results

    def get_phenotype_by_anatomy_graph(self, anatomy_node):
        results = []
        for curie in anatomy_node.get_synonyms_by_prefix('UBERON'):
            phenotypes = self.anatomy_to_phenotype(curie)
            for r in phenotypes:
                edge, node = self.create_anatomy_phenotype_edge(
                    r['pheno_id'], r['pheno_label'], curie, anatomy_node)
                if anatomy_node.name is None:
                    anatomy_node.name = r['anatomy_label']
                results.append((edge, node))
        return results