class ChemBioKS(Service): """ Generic service endpoints for medical and bio-chemical data. This set comprises portions of chem2bio2rdf (CTD, KEGG, PubChem, DRUGBANK) """ def __init__(self, context): #triplestore): super(ChemBioKS, self).__init__("chembio", context) self.triplestore = TripleStore(self.url) def query_chembio(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def get_exposure_conditions(self, chemicals): """ Identify conditions (MeSH IDs) triggered by the specified stressor agent ids (also MeSH IDs). :param chemicals: List of IDs for substances of interest. :type chemicals: list of MeSH IDs, eg. D052638 """ id_list = ' '.join( list(map(lambda d: "( mesh:{0} )".format(d), chemicals))) text = self.triplestore.get_template ("ctd_gene_expo_disease").\ safe_substitute (chemicals=id_list) results = self.triplestore.execute_query(text) return list( map( lambda b: { "chemical": b['chemical'].value, "gene": b['gene'].value, "pathway": b['kegg_pathway'].value, "pathName": b['pathway_name'].value, "pathID": b['pathway_id'].value, "human": '(human)' in b['pathway_name'].value }, results.bindings)) def get_drugs_by_condition(self, conditions): """ Get drugs associated with a set of conditions. :param conditions: Conditions to find associated drugs for. :type conditions: List of MeSH IDs for conditions, eg.: D001249 """ if not isinstance(conditions, list): conditions = [conditions] conditions = list( map(lambda v: v.replace("MESH:", "mesh:"), conditions)) prefix = "mesh:" if any(map(lambda v: v.startswith(prefix), conditions)): prefix = "" condition_list = ', '.join( list(map(lambda d: " {0}{1} ".format(prefix, d), conditions))) result = self.triplestore.query_template( inputs={"diseaseIds": condition_list.lower()}, outputs=[ 'drugID', 'drugGenericName', 'pubChemCID', 'diseasePMIDs' ], template_text=""" prefix mesh: <http://bio2rdf.org/mesh:> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix db_resource: <http://chem2bio2rdf.org/drugbank/resource/> select ?drugID ?drugGenericName ?diseasePMIDs ?ctdChemDis ?pubChemCID where { values ( ?diseaseId ) { ( $diseaseIds ) } ?ctdChemDis ctd:cid ?pubChemCID; ctd:diseaseid ?diseaseId; ctd:pubmedids ?diseasePMIDs. ?dbInter db_resource:Name ?name ; db_resource:DBID ?drugID . ?drugID db_resource:CID ?pubChemCID ; db_resource:Generic_Name ?drugGenericName . }""") return result def get_drugs_by_condition_graph(self, conditions): drugs = self.get_drugs_by_condition(conditions.identifier) results = [] for r in drugs: edge = KEdge('c2b2r', 'conditionToDrug', { 'cid': r['pubChemCID'], 'pmids': r['diseasePMIDs'] }) node = KNode( r['drugID'].split('/')[-1:][0], #"http://chem2bio2rdf.org/drugbank/resource/drugbank_drug", node_types.DRUG, r['drugGenericName']) results.append((edge, node)) #logger.debug ("chembio drugs by condition: {}".format (results)) return results def get_genes_pathways_by_disease(self, diseases): """ Get genes and pathways associated with specified conditions. :param diseases: List of conditions designated by MeSH ID. :return: Returns a list of dicts containing gene and path information. """ diseaseMeshIDList = ' '.join( list(map(lambda d: "( mesh:{0} )".format(d), diseases))) text = self.triplestore.get_template( "genes_pathways_by_disease").safe_substitute( diseaseMeshIDList=diseaseMeshIDList) results = self.triplestore.execute_query(text) return list( map( lambda b: { "uniprotGene": b['uniprotGeneID'].value, "keggPath": b['keggPath'].value, "pathName": b['pathwayName'].value, "human": '(human)' in b['pathwayName'].value }, results.bindings)) def get_drug_gene_disease(self, disease_name, drug_name): """ Identify targets and diseases assocaited with a drug name. :param disease_name: MeSH name of a disease condition. :type str: String :param drug_name: Name of a drug. :type str: String """ text = self.triplestore.get_template( "drug_gene_disease").safe_substitute(diseaseName=disease_name, drugName=drug_name) results = self.triplestore.execute_query(text) return list( map( lambda b: { "uniprotSymbol": b['uniprotSym'].value, "diseaseId": b['diseaseID'].value }, results.bindings)) def pubchem_to_ncbigene(self, pubchemID): result = self.triplestore.query_template( inputs={"pubchemID": "pubchem:{}".format(pubchemID)}, outputs=[ 'NCBIGene', 'meshID', 'interaction', 'interactionTypes', 'pubmedids' ], template_text=""" prefix pubchem: <http://chem2bio2rdf.org/pubchem/resource/pubchem_compound/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select distinct ?NCBIGene ?meshID ?interaction ?interactionTypes ?pubmedids where { ?ctdChemGene ctd:cid $pubchemID; ctd:chemicalid ?meshID ; ctd:geneid ?NCBIGene; ctd:interaction ?interaction; ctd:interactiontypes ?interactionTypes; ctd:pubmedids ?pubmedids. }""") return list( map( lambda r: { 'NCBIGene': r['NCBIGene'], 'meshID': r['meshID'], 'interaction': r['interaction'], 'interactionTypes': r['interactionTypes'], 'pubmedids': r['pubmedids'] }, result)) def drug_name_to_gene_symbol(self, drug_name): result = self.triplestore.query_template( inputs={"drugName": drug_name}, outputs=['uniprotSym', 'pmids', 'drugID'], template_text=""" prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix db_resource: <http://chem2bio2rdf.org/drugbank/resource/> select ?drugGenericName ?pmids ?drugID ?uniprotSym where { values ( ?drugName ) { ( "$drugName" ) } ?ctdChemGene ctd:cid ?pubChemCID; ctd:pubmedids ?pmids; ctd:gene ?uniprotSym . ?drugID db_resource:CID ?pubChemCID ; db_resource:Generic_Name ?drugGenericName . filter regex(lcase(str(?drugGenericName)), lcase(?drugName)) }""") return list( map( lambda r: { 'uniprotSym': r['uniprotSym'], 'pmids': r.get('pmids', None), 'drugID': r['drugID'] }, result)) def drugname_to_pubchem(self, drug_name): result = self.triplestore.query_template( inputs={"drugName": drug_name}, outputs=['pubChemID', 'drugGenericName'], template_text=""" prefix db_resource: <http://chem2bio2rdf.org/drugbank/resource/> select distinct ?pubChemID ?drugGenericName where { values ( ?drugName ) { ( "$drugName" ) } ?drugID db_resource:CID ?pubChemID ; db_resource:Generic_Name ?drugGenericName . filter regex(lcase(str(?drugGenericName)), lcase(?drugName)) }""") return list( map( lambda r: { 'drugID': r['pubChemID'], 'drugName': r['drugGenericName'] }, result)) def gene_symbol_to_pathway(self, uniprot_symbol): return self.triplestore.query_template( inputs={"uniprotSymbol": uniprot_symbol}, outputs=["keggPath"], template_text=""" prefix kegg: <http://chem2bio2rdf.org/kegg/resource/> prefix pharmgkb: <http://chem2bio2rdf.org/pharmgkb/resource/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select ?ctdGene ?uniprotID ?pathwayName ?keggPath where { values ( ?ctdGene ) { ( <$uniprotSymbol> ) } ?keggPath kegg:protein ?uniprotID ; kegg:Pathway_name ?pathwayName . ?pharmGene pharmgkb:Symbol ?ctdGene ; pharmgkb:UniProt_Id ?uniprotID. ?ctdChemGene ctd:gene ?ctdGene. } LIMIT 500 """) def uniprot_to_hgnc(self, uniprot_symbol): return self.triplestore.query_template( inputs={"uniprotID": Text.un_curie(uniprot_symbol.identifier)}, outputs=["hgncID"], template_text=""" prefix uniprot: <http://chem2bio2rdf.org/uniprot/resource/gene/> prefix owl: <http://www.w3.org/2002/07/owl#> prefix hgnc: <http://chem2bio2rdf.org/rdf/resource/hgnc/> select distinct ?hgncID where { values ( ?uniprotID ) { ( uniprot:${uniprotID} ) } ?uniprotID <http://www.w3.org/2002/07/owl#sameAs> ?hgncID. filter ( strstarts (str(?hgncID), "http://bio2rdf.org/gene:")) } """) def graph_uniprot_to_hgnc(self, uniprot_symbol): result = self.uniprot_to_hgnc(uniprot_symbol) return [(self.get_edge(r, predicate='synonym'), KNode('HGNC:{0}'.format(r['hgncID'].split(':')[-1]), node_types.GENE)) for r in result] def graph_get_genes_by_disease(self, disease): #reasoner disease = disease.identifier.split(':')[1].lower() response = self.get_genes_pathways_by_disease([disease]) results = [] for r in response: edge = KEdge('c2b2r', 'diseaseToGene', {'keggPath': r['keggPath']}) node = KNode( "UNIPROT:{0}".format(r['uniprotGene'].split('/')[-1:][0]), node_types.GENE) results.append((edge, node)) return results @cachier(stale_after=datetime.timedelta(days=20)) def graph_get_pathways_by_gene(self, gene): #reasoner response = self.triplestore.query_template( inputs={"gene": gene.identifier.split(':')[1].upper()}, outputs=['keggPath'], template_text=""" prefix kegg: <http://chem2bio2rdf.org/kegg/resource/> prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix uniprot: <http://chem2bio2rdf.org/uniprot/resource/gene/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix mesh: <http://bio2rdf.org/mesh:> select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where { ?keggPath kegg:protein ?swissProtID ; kegg:Pathway_name ?pathwayName . ?keggInter kegg:cid ?pubchemCID . ?dbInter drugbank:GeneBank_ID ?geneBankID ; drugbank:SwissProt_ID ?swissProtID ; drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID ; drugbank:Generic_Name ?drugGenericName . ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . values ( ?uniprotGeneID ) { ( uniprot:$gene ) } } LIMIT 2000""") results = [] for r in response: edge = KEdge('c2b2r', 'geneToPathway', {}) node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]), node_types.PATHWAY) results.append((edge, node)) return results def graph_drugname_to_gene_symbol(self, drug_name_node): drug_name = Text.un_curie(drug_name_node.identifier) response = self.drug_name_to_gene_symbol(drug_name) results = [] for r in response: edge = self.get_edge(r, predicate="targets") node = KNode("UNIPROT:{0}".format(Text.path_last(r['uniprotSym'])), node_types.GENE) results.append((edge, node)) return results def graph_name_to_drugbank(self, drug_name_node): drug_name = Text.un_curie(drug_name_node.identifier) response = self.drug_name_to_gene_symbol(drug_name) results = [] for r in response: edge = self.get_edge(r, predicate="drugname") node = KNode ("DRUGBANK:{0}".format (Text.path_last (r['drugID'])), \ node_types.DRUG, \ label=r['drugName']) results.append((edge, node)) return results def graph_get_pathways_by_gene(self, gene): #reasoner response = self.triplestore.query_template( inputs={"gene": gene.identifier.split(':')[1].upper()}, outputs=['keggPath'], template_text=""" prefix kegg: <http://chem2bio2rdf.org/kegg/resource/> prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix uniprot: <http://chem2bio2rdf.org/uniprot/resource/gene/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix mesh: <http://bio2rdf.org/mesh:> select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where { ?keggPath kegg:protein ?swissProtID ; kegg:Pathway_name ?pathwayName . ?keggInter kegg:cid ?pubchemCID . ?dbInter drugbank:GeneBank_ID ?geneBankID ; drugbank:SwissProt_ID ?swissProtID ; drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID ; drugbank:Generic_Name ?drugGenericName . ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . values ( ?uniprotGeneID ) { ( uniprot:$gene ) } } LIMIT 2000""") results = [] for r in response: edge = KEdge('c2b2r', 'geneToPathway', {}) node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]), node_types.PATHWAY) results.append((edge, node)) return results def graph_drugbank_to_uniprot(self, drugbank): response = self.triplestore.query_template(inputs={ "drugID": "DB{0}".format(Text.un_curie(drugbank.identifier)) }, outputs=["uniprotGeneID"], template_text=""" prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select distinct ?uniprotGeneID where { values ( ?drugID ) { ( drugbank_drug:${drugID} ) } ?dbInter drugbank:GeneBank_ID ?geneBankID ; drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID ; drugbank:Generic_Name ?drugGenericName . ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . }""") return [ (self.get_edge(r, predicate='targets'), KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]), node_types.GENE)) for r in response ] def graph_diseasename_to_uniprot(self, disease): results = [] response = self.triplestore.query_template( inputs={"diseaseName": Text.un_curie(disease.identifier)}, outputs=["pubChemCID"], template_text=""" prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select distinct ?pubChemCID where { values ( ?diseaseName ) { ( "$diseaseName" ) } ?ctdChemDis ctd:cid ?pubChemCID; ctd:diseasename ?diseaseNameRec. filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName)) } LIMIT 1""") if len(response) > 0: # This is a disease. response = self.triplestore.query_template( inputs={"diseaseName": Text.un_curie(disease.identifier)}, outputs=["disPmids", "chemPmids", "uniprotSym"], template_text=""" prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select ?disPmids ?ctdChemDis ?chemPmids ?uniprotSym ?diseaseId where { values ( ?diseaseName ) { ( "$diseaseName" ) } ?ctdChemGene ctd:cid ?pubChemCID; ctd:pubmedids ?chemPmids; ctd:gene ?uniprotSym. ?ctdChemDis ctd:cid ?pubChemCID; ctd:diseaseid ?diseaseId; ctd:diseasename ?diseaseNameRec; ctd:pubmedids ?disPmids. filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName)) } LIMIT 500""") for r in response: chemPmids = r['chemPmids'] disPmids = r['disPmids'] pmids = chemPmids + "|" + disPmids edge = self.get_edge(r, predicate='caused_by', pmids=pmids), node = KNode( "UNIPROT:{0}".format(r['uniprotSym'].split('/')[-1:][0]), node_types.GENE) results.append((edge, node)) return results def graph_diseaseid_to_uniprot(self, drugbank): print(drugbank.identifier.lower()) response = self.triplestore.query_template( inputs={"diseaseID": drugbank.identifier.lower()}, outputs=["uniprotGeneID"], template_text=""" prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix mesh.disease: <http://bio2rdf.org/mesh:> select distinct ?uniprotGeneID where { values ( ?diseaseID ) { ( $diseaseID ) } ?dbInter drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID. ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . }""") return [ (self.get_edge(r, predicate='targets'), KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]), node_types.GENE)) for r in response ] def graph_drugname_to_pubchem(self, drugname_node): drug_name = Text.un_curie(drugname_node.identifier) response = self.drugname_to_pubchem(drug_name) return [ (self.get_edge( r, predicate='drugname_to_pubchem'), \ KNode( "PUBCHEM:{}".format( r['drugID'].split('/')[-1]), node_types.DRUG, label=r['drugName'])) for r in response ] # 'NCBIGene' : r['NCBIGene'], # 'meshID' : r['meshID'], # 'interaction': r['interaction'], # 'interactionTypes': r['interactionTypes'] # 'pubmedids' : r['pubmedids'] def graph_pubchem_to_ncbigene(self, pubchem_node): #The compound mesh coming back from here is very out of date. Ignore. pubchemid = Text.un_curie(pubchem_node.identifier) response = self.pubchem_to_ncbigene(pubchemid) retvals = [] for r in response: props = {} props['interaction'] = r['interaction'] props['interactionTypes'] = r['interactionTypes'] props['publications'] = r['pubmedids'].split('|') retvals.append((self.get_edge(props, predicate='pubchem_to_ncbigene'), KNode("NCBIGene:{}".format(r['NCBIGene']), node_types.GENE))) return retvals
class UberonGraphKS(Service): """A knowledge source created by 1) Combining cell ontology, uberon, and HPO, 2) Reasoning over the total graph to realize many implicit edges. Created by Jim Balhoff""" def __init__(self, context): #triplestore): super(UberonGraphKS, self).__init__("uberongraph", context) self.triplestore = TripleStore(self.url) #TODO: Pull this from the biolink model? self.class_defs = { node_types.CELL: 'CL:0000000', node_types.ANATOMICAL_ENTITY: 'UBERON:0001062', node_types.BIOLOGICAL_PROCESS: 'GO:0008150', node_types.MOLECULAR_ACTIVITY: 'GO:0003674', node_types.CHEMICAL_SUBSTANCE: 'CHEBI:24431', node_types.DISEASE: 'MONDO:0000001', node_types.PHENOTYPIC_FEATURE: 'UPHENO:0001002' } def query_uberongraph(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def get_edges(self, source_type, obj_type): """Given an UBERON id, find other UBERONS that are parts of the query""" text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix CL: <http://purl.obolibrary.org/obo/CL_> prefix GO: <http://purl.obolibrary.org/obo/GO_> prefix CHEBI: <http://purl.obolibrary.org/obo/CHEBI_> prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_> prefix UPHENO: <http://purl.obolibrary.org/obo/UPHENO_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?p ?pLabel from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { ?sourceID ?p ?objID . } graph <http://reasoner.renci.org/ontology/closure> { ?sourceID rdfs:subClassOf $sourcedefclass . } graph <http://reasoner.renci.org/ontology/closure> { ?objID rdfs:subClassOf $objdefclass . hint:Prior hint:runFirst true . } ?p rdfs:label ?pLabel . } """ results = self.triplestore.query_template( inputs = { 'sourcedefclass': self.class_defs[source_type], 'objdefclass': self.class_defs[obj_type] }, \ outputs = [ 'p', 'pLabel' ], \ template_text = text \ ) return results def get_label(self, identifier): obo_id = Text.curie_to_obo(identifier) text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> select distinct ?label from <http://reasoner.renci.org/ontology> where { $obo_id rdfs:label ?label . } """ results = self.triplestore.query_template(inputs={'obo_id': obo_id}, outputs=['label'], template_text=text) if len(results) < 1: return '' return results[0]['label'] def cell_get_cellname(self, cell_identifier): """ Identify label for a cell type :param cell: CL identifier for cell type """ text = """ prefix CL: <http://purl.obolibrary.org/obo/CL_> select distinct ?cellLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { $cellID rdfs:label ?cellLabel . } """ results = self.triplestore.query_template( inputs = { 'cellID': cell_identifier }, \ outputs = [ 'cellLabel' ], \ template_text = text \ ) return results def get_anatomy_parts(self, anatomy_identifier): """Given an UBERON id, find other UBERONS that are parts of the query""" anatomy_identifier = f"<{anatomy_identifier}>" text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?part ?partlabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { $anatomy_id BFO:0000051 ?part . graph <http://reasoner.renci.org/ontology/closure> { ?part rdfs:subClassOf UBERON:0001062 . } ?part rdfs:label ?partlabel . } """ results = self.triplestore.query_template( inputs = { 'anatomy_id': anatomy_identifier }, \ outputs = [ 'part', 'partlabel' ], \ template_text = text \ ) for result in results: result['curie'] = Text.obo_to_curie(result['part']) return results def get_neighbor(self, input_id, output_type, subject=True): parents = { node_types.ANATOMICAL_ENTITY: "<http://purl.obolibrary.org/obo/UBERON_0001062>", node_types.DISEASE: "<http://purl.obolibrary.org/obo/MONDO_0000001>", node_types.MOLECULAR_ACTIVITY: "<http://purl.obolibrary.org/obo/GO_0003674>", node_types.BIOLOGICAL_PROCESS: "<http://purl.obolibrary.org/obo/GO_0008150>", node_types.CHEMICAL_SUBSTANCE: "<http://purl.obolibrary.org/obo/CHEBI_24431>", node_types.PHENOTYPIC_FEATURE: "<http://purl.obolibrary.org/obo/HP_0000118>" } pref = Text.get_curie(input_id) obo_prefix = f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>' text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> """ + obo_prefix + """ select distinct ?output_id ?output_label ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/nonredundant> { """ if subject: text += ' $input_id ?p ?output_id .' else: text += ' ?output_id ?p $input_id .' text += """ } graph <http://reasoner.renci.org/ontology/closure> { ?output_id rdfs:subClassOf $parent . } ?output_id rdfs:label ?output_label . ?p rdfs:label ?pLabel . } """ results = self.triplestore.query_template( inputs={ 'input_id': input_id, 'parent': parents[output_type] }, outputs=['output_id', 'output_label', 'p', 'pLabel'], template_text=text) return results def anatomy_to_anatomy(self, identifier): results = {'subject': [], 'object': []} for direction,query in \ (('subject',' ?input_id ?p ?output_id .'), ('object',' ?output_id ?p ?input_id .')): text=""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> select distinct ?output_id ?output_label ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { """ + query + \ """ } graph <http://reasoner.renci.org/ontology/closure> { ?output_id rdfs:subClassOf UBERON:0001062 . } ?output_id rdfs:label ?output_label . ?p rdfs:label ?pLabel . } """ results[direction] += self.triplestore.query_template( inputs={'input_id': identifier}, outputs=['output_id', 'output_label', 'p', 'pLabel'], template_text=text) return results def anatomy_to_go(self, anatomy_identifier): """ Identify process and functions related to anatomical terms (anatomy, cell, components). """ #This is a bit messy, but we need to do 4 things. We are looking for go terms # that are either biological processes or activities and we are looking for predicates # that point either direction. results = {'subject': [], 'object': []} for goParent in ('GO:0008150', 'GO:0003674'): for direction, query in (('subject', ' $anatID ?p ?goID'), ('object', ' ?goID ?p $anatID')): text = """ prefix GO: <http://purl.obolibrary.org/obo/GO_> select distinct ?goID ?goLabel ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { """ + query + """ } graph <http://reasoner.renci.org/ontology/closure> { ?goID rdfs:subClassOf $goParent . } ?goID rdfs:label ?goLabel . ?p rdfs:label ?pLabel } """ results[direction] += self.triplestore.query_template( inputs = { 'anatID': anatomy_identifier, 'goParent': goParent }, \ outputs = [ 'goID', 'goLabel', 'p', 'pLabel' ], \ template_text = text \ ) return results def go_to_anatomy(self, input_identifier): """ Identify anatomy terms related to process/functions. :param input_identifier: identifier for anatomy (including cell and cellular component) """ # we are looking for predicates that point either direction. results = {'subject': [], 'object': []} for direction, query in (('subject', ' ?anatID ?p $goID'), ('object', ' $goID ?p ?anatID')): text = """ prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix GO: <http://purl.obolibrary.org/obo/GO_> select distinct ?anatID ?anatLabel ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { """ + query + """ } graph <http://reasoner.renci.org/ontology/closure> { ?anatID rdfs:subClassOf UBERON:0001062 . } ?anatID rdfs:label ?anatLabel . ?p rdfs:label ?pLabel } """ results[direction] += self.triplestore.query_template( inputs={'goID': input_identifier}, outputs=['anatID', 'anatLabel', 'p', 'pLabel'], template_text=text) return results def pheno_or_disease_to_go(self, identifier): text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> prefix GO: <http://purl.obolibrary.org/obo/GO_> prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_> prefix HP: <http://purl.obolibrary.org/obo/MONDO_> select distinct ?goID ?goLabel ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { $input_id ?p ?goID . } graph <http://reasoner.renci.org/ontology/closure> { { ?goID rdfs:subClassOf GO:0008150 . } UNION { ?goID rdfs:subClassOf GO:0003674 . } } ?goID rdfs:label ?goLabel . ?p rdfs:label ?pLabel . } """ results = self.triplestore.query_template( inputs={'input_id': identifier}, outputs=['goID', 'goLabel', 'p', 'pLabel'], template_text=text) return results def phenotype_to_anatomy(self, hp_identifier): """ Identify anatomies related to phenotypes. :param cell: HP identifier for phenotype """ #The subclassof uberon:0001062 ensures that the result #is an anatomical entity. #We don't need to do the subject/object game because there's nothing in ubergraph # that goes that direction text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_> SELECT DISTINCT ?anatomy_id ?anatomy_label ?predicate ?predicate_label FROM <http://reasoner.renci.org/ontology> WHERE { graph <http://reasoner.renci.org/redundant>{ $HPID ?predicate ?anatomy_id. } graph <http://reasoner.renci.org/ontology/closure>{ ?anatomy_id rdfs:subClassOf UBERON:0001062. } ?anatomy_id rdfs:label ?anatomy_label . OPTIONAL {?predicate rdfs:label ?predicate_label.} } """ results = self.triplestore.query_template( inputs = { 'HPID': hp_identifier }, \ outputs = [ 'anatomy_id', 'anatomy_label', 'predicate', 'predicate_label'],\ template_text = text \ ) return results def anatomy_to_phenotype(self, uberon_id): #sparql very identical to phenotype_to_anatomy. could not find any anatomical # entity that is a subject of subclass of HP:0000118, in ubergraph at this point. # treating this as another version of pheno -> anatomical_entity but when # anatomical_entity is known an # we want to go back to a phenotype. text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX HP:<http://purl.obolibrary.org/obo/HP_> SELECT DISTINCT ?pheno_id ?pheno_label ?predicate ?predicate_label FROM <http://reasoner.renci.org/ontology> WHERE { graph <http://reasoner.renci.org/redundant> { ?pheno_id ?predicate $UBERONID. } graph <http://reasoner.renci.org/ontology/closure>{ ?pheno_id rdfs:subClassOf HP:0000118. } ?pheno_id rdfs:label ?pheno_label. OPTIONAL {?predicate rdfs:label ?predicate_label.} }""" results = self.triplestore.query_template( inputs = { 'UBERONID': uberon_id }, \ outputs = [ 'pheno_id', 'pheno_label', 'predicate', 'predicate_label' ],\ template_text = text \ ) return results def biological_process_or_activity_to_chemical(self, go_id): """ Given a chemical finds associated GO Molecular Activities. """ results = [] text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX GO: <http://purl.obolibrary.org/obo/GO_> PREFIX RO: <http://purl.obolibrary.org/obo/RO_> PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431> PREFIX chemical_class: <http://purl.obolibrary.org/obo/CHEBI_24431> SELECT DISTINCT ?chebi_id ?predicate ?label_predicate ?chebi_label from <http://reasoner.renci.org/ontology> from <http://reasoner.renci.org/nonredundant> where { $GO_ID ?predicate ?chebi_id. ?chebi_id rdfs:label ?chebi_label. GRAPH <http://reasoner.renci.org/ontology/closure> { ?chebi_id rdfs:subClassOf chemical_class:.} ?predicate rdfs:label ?label_predicate. FILTER ( datatype(?label_predicate) = xsd:string) } """ results = self.triplestore.query_template(template_text=text, outputs=[ 'chebi_id', 'predicate', 'label_predicate', 'chebi_label' ], inputs={'GO_ID': go_id}) return results def pheno_to_biological_activity(self, pheno_id): """ Finds biological activities related to a phenotype :param :pheno_id phenotype identifier """ text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX GO: <http://purl.obolibrary.org/obo/GO_> PREFIX has_phenotype_affecting: <http://purl.obolibrary.org/obo/UPHENO_0000001> PREFIX RO: <http://purl.obolibrary.org/obo/RO_> prefix HP: <http://purl.obolibrary.org/obo/HP_> SELECT DISTINCT ?go_id ?predicate ?predicate_label ?go_label from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> WHERE { $pheno_type ?predicate ?go_id. ?go_id rdfs:label ?go_label. graph <http://reasoner.renci.org/ontology/closure> { { ?go_id rdfs:subClassOf GO:0008150 . } UNION { ?go_id rdfs:subClassOf GO:0003674 . } } ?predicate rdfs:label ?predicate_label. } """ results = self.triplestore.query_template( template_text=text, inputs={'pheno_type': pheno_id}, outputs=['go_id', 'predicate', 'predicate_label', 'go_label']) return results def disease_to_anatomy(self, disease_id): #THere are no anatomy-(predicate)->disease triples text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX anatomicalEntity: <http://purl.obolibrary.org/obo/UBERON_0001062> SELECT DISTINCT ?anatomyID ?predicate ?predicate_label ?anatomy_label FROM <http://reasoner.renci.org/nonredundant> FROM <http://reasoner.renci.org/ontology> WHERE { graph <http://reasoner.renci.org/redundant> { $diseaseID ?predicate ?anatomyID. } ?anatomyID rdfs:label ?anatomy_label. graph <http://reasoner.renci.org/ontology/closure> { ?anatomyID rdfs:subClassOf anatomicalEntity: . } ?predicate rdfs:label ?predicate_label. } """ results = [] results = self.triplestore.query_template( template_text=text, outputs=[ 'anatomyID', 'predicate', 'predicate_label', 'anatomy_label' ], inputs={'diseaseID': disease_id}) return results def anatomy_to_chemical_substance(self, anatomy_id): #There's no chemical-(predicate)->anatomy text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431> SELECT DISTINCT ?predicate ?predicate_label ?chemical_entity ?chemical_label FROM <http://reasoner.renci.org/ontology> FROM <http://reasoner.renci.org/redundant> WHERE { $anatomy_id ?predicate ?chemical_entity. graph <http://reasoner.renci.org/ontology/closure> { ?chemical_entity rdfs:subClassOf chemical_entity:. } ?predicate rdfs:label ?predicate_label . ?chemical_entity rdfs:label ?chemical_label. } """ results = [] results = self.triplestore.query_template( template_text=text, outputs=[ 'predicate', 'predicate_label', 'chemical_entity', 'chemical_label' ], inputs={'anatomy_id': anatomy_id}) return results def anatomy_to_disease(self, anatomy_id): text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX disease: <http://purl.obolibrary.org/obo/MONDO_0000001> SELECT DISTINCT ?predicate ?predicate_label ?disease ?disease_label FROM <http://reasoner.renci.org/ontology> FROM <http://reasoner.renci.org/redundant>{ ?disease ?predicate $anatomy_id. graph <http://reasoner.renci.org/ontology/closure> { ?disease rdfs:subClassOf disease:. } ?predicate rdfs:label ?predicate_label . ?disease rdfs:label ?disease_label. } """ results = [] results = self.triplestore.query_template( template_text=text, outputs=[ 'predicate', 'predicate_label', 'disease', 'disease_label' ], inputs={'anatomy_id': anatomy_id}) return results def create_phenotype_anatomy_edge(self, node_id, node_label, input_id, phenotype_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') anatomy_node = KNode(Text.obo_to_curie(node_id), type=node_types.ANATOMICAL_ENTITY, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_anatomy_by_phenotype_graph', input_id, predicate) #node.name = node_label return edge, anatomy_node def create_anatomy_phenotype_edge(self, node_id, node_label, input_id, anatomy_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') phenotype_node = KNode(Text.obo_to_curie(node_id), type=node_types.PHENOTYPIC_FEATURE, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_phenotype_by_anatomy_graph', input_id, predicate) #node.name = node_label return edge, phenotype_node def dep_get_anatomy_by_phenotype_graph(self, phenotype_node): results = [] for curie in phenotype_node.get_synonyms_by_prefix('HP'): anatomies = self.phenotype_to_anatomy(curie) for r in anatomies: node = KNode(r['anatomy_id'], type=node_types.ANATOMICAL_ENTITY, name=r['anatomy_label']) # try to derive the label from the relation for the new ubergraph axioms predicate_label = r['predicate_label'] or '_'.join( r['predicate'].split('#')[-1].split('.')) predicate = LabeledID(Text.obo_to_curie(r['predicate']), predicate_label) edge = self.create_edge( phenotype_node, node, 'uberongraph.get_anatomy_by_phenotype_graph', phenotype_node.id, predicate) # edge, node = self.create_phenotype_anatomy_edge(r['anatomy_id'],r['anatomy_label'],curie,phenotype_node) if phenotype_node.name is None: phenotype_node.name = r['input_label'] results.append((edge, node)) #These tend to be very high level terms. Let's also get their parts to #be more inclusive. #TODO: there ought to be a more principled way to take care of this, but #it highlights the uneasy relationship between the high level world of #smartapi and the low-level sparql-vision. part_results = self.get_anatomy_parts(r['anatomy_id']) for pr in part_results: # pedge, pnode = self.create_phenotype_anatomy_edge(pr['part'],pr['partlabel'],curie,phenotype_node) pnode = KNode(pr['part'], type=node_types.ANATOMICAL_ENTITY, name=pr['partlabel']) pedge = self.create_edge( phenotype_node, pnode, 'uberongraph.get_anatomy_by_phenotype_graph', phenotype_node.id, predicate) results.append((pedge, pnode)) return results def get_out_by_in(self, input_node, output_type, prefixes, subject=True, object=True): returnresults = [] caller = f'uberongraph.{inspect.stack()[1][3]}' results = {'subject': [], 'object': []} curies = set() for pre in prefixes: curies.update(input_node.get_synonyms_by_prefix(pre)) for curie in curies: results['subject'] += self.get_neighbor(curie, output_type, subject=True) results['object'] += self.get_neighbor(curie, output_type, subject=False) for direction in ['subject', 'object']: done = set() for r in results[direction]: key = (r['p'], r['output_id']) if key in done: continue predicate_curie = Text.obo_to_curie(r['p']) prefix = Text.get_curie(predicate_curie) prefix = prefix if prefix == 'ubergraph-axioms.ofn' else prefix.upper( ) upper_cased_predicate_curie = prefix + ":" + Text.un_curie( predicate_curie) predicate = LabeledID(upper_cased_predicate_curie, r['pLabel']) output_node = KNode(r['output_id'], type=output_type, name=r['output_label']) if direction == 'subject': edge = self.create_edge(input_node, output_node, caller, curie, predicate) else: edge = self.create_edge(output_node, input_node, caller, curie, predicate) done.add(key) returnresults.append((edge, output_node)) return returnresults #Don't get confused. There is the direction of the statement (who is the subject # and who is the object) and which of them we are querying by. We want to query # independent of direction i.e. let the input node be either the subject or the object. def get_anatomy_by_anatomy_graph(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.ANATOMICAL_ENTITY, ['UBERON', 'CL', 'GO']) def get_phenotype_by_anatomy_graph(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.PHENOTYPIC_FEATURE, ['UBERON', 'CL', 'GO']) def get_chemical_substance_by_anatomy(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.CHEMICAL_SUBSTANCE, ['UBERON', 'CL', 'GO']) def get_process_by_anatomy(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.BIOLOGICAL_PROCESS, ['UBERON', 'CL', 'GO']) def get_activity_by_anatomy(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.MOLECULAR_ACTIVITY, ['UBERON', 'CL', 'GO']) def get_disease_by_anatomy_graph(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.DISEASE, ['UBERON', 'CL', 'GO']) def get_anatomy_by_process_or_activity(self, go_node): return self.get_out_by_in(go_node, node_types.ANATOMICAL_ENTITY, ['GO']) def get_chemical_entity_by_process_or_activity(self, go_node): return self.get_out_by_in(go_node, node_types.CHEMICAL_SUBSTANCE, ['GO']) def get_process_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.BIOLOGICAL_PROCESS, ['MONDO']) def get_activity_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.MOLECULAR_ACTIVITY, ['MONDO']) def get_anatomy_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.ANATOMICAL_ENTITY, ['MONDO']) def get_chemical_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.CHEMICAL_SUBSTANCE, ['MONDO']) def get_process_by_phenotype(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.BIOLOGICAL_PROCESS, ['HP']) def get_chemical_by_phenotype(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.CHEMICAL_SUBSTANCE, ['HP']) def get_activity_by_phenotype(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.MOLECULAR_ACTIVITY, ['HP']) def get_anatomy_by_phenotype_graph(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.ANATOMICAL_ENTITY, ['HP']) def get_chemical_by_chemical(self, chem_node): return self.get_out_by_in(chem_node, node_types.CHEMICAL_SUBSTANCE, ['CHEBI']) def disease_get_ancestors(self, disease_node): curie = disease_node.id prefix = Text.get_curie(curie) if "MONDO" != prefix: return [] query = f""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_> select distinct ?output_id ?label where {{ graph <http://reasoner.renci.org/ontology/closure> {{ $disease_id rdfs:subClassOf ?output_id . ?output_id rdfs:subClassOf MONDO:0000001 . }} graph <http://reasoner.renci.org/ontology>{{ ?output_id rdfs:label ?label. }} }} """ results = self.triplestore.query_template( template_text=query, inputs={'disease_id': curie}, outputs=['output_id', 'label']) outputs = [] for row in results: ancestor_node = KNode( row['output_id'], label=row['label'], type=node_types.DISEASE_OR_PHENOTYPIC_FEATURE) if ancestor_node.id == disease_node.id: # refrain from adding edge to the node itself continue predicate = LabeledID(identifier='rdfs:subClassOf', label='subclass of') edge = self.create_edge( source_node=disease_node, target_node=ancestor_node, predicate=predicate, provided_by='uberongraph.disease_get_ancestors', input_id=disease_node.id) outputs.append((edge, ancestor_node)) return outputs
class OmniCorp(Service): def __init__(self, context): #triplestore): super(OmniCorp, self).__init__("omnicorp", context) self.triplestore = TripleStore(self.url) self.prefix_to_uri = { 'UBERON': 'http://purl.obolibrary.org/obo/UBERON_', 'BSPO': 'http://purl.obolibrary.org/obo/BSPO_', 'PATO': 'http://purl.obolibrary.org/obo/PATO_', 'GO': 'http://purl.obolibrary.org/obo/GO_', 'MONDO': 'http://purl.obolibrary.org/obo/MONDO_', 'HP': 'http://purl.obolibrary.org/obo/HP_', 'ENVO:': 'http://purl.obolibrary.org/obo/ENVO_', 'OBI': 'http://purl.obolibrary.org/obo/OBI_', 'CL': 'http://purl.obolibrary.org/obo/CL_', 'SO': 'http://purl.obolibrary.org/obo/SO_', 'CHEBI': 'http://purl.obolibrary.org/obo/CHEBI_', 'HGNC': 'http://identifiers.org/hgnc/HGNC:', 'MESH': 'http://id.nlm.nih.gov/mesh/' } def get_omni_identifier(self, node): #Let's start with just the 'best' identifier identifier = node.id prefix = Text.get_curie(node.id) if prefix not in self.prefix_to_uri: logger.warn("What kinda tomfoolery is this?") logger.warn(f"{node.id} {node.type}") logger.warn(f"{node.synonyms}") return None oident = f'{self.prefix_to_uri[prefix]}{Text.un_curie(node.id)}' return oident def query_omnicorp(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def sparql_get_all_shared_pmids(self, identifier_list): text = """ PREFIX dct: <http://purl.org/dc/terms/> SELECT DISTINCT ?pubmed ?term1 ?term2 WHERE { hint:Query hint:analytic true . VALUES ?term1 $id_list_a VALUES ?term2 $id_list_b ?pubmed dct:references ?term1 . ?pubmed dct:references ?term2 . FILTER(STR(?term1) < STR(?term2)) } """ start = datetime.datetime.now() results = self.triplestore.query_template( inputs={ 'id_list_a': identifier_list, 'id_list_b': identifier_list }, outputs=['term1', 'term2', 'pubmed'], template_text=text, post=True) end = datetime.datetime.now() logger.debug(f'Completed in: {end-start}') return results def sparql_count_pmids(self, identifier): text = """ PREFIX dct: <http://purl.org/dc/terms/> SELECT (COUNT(DISTINCT ?pubmed) as ?count) WHERE { hint:Query hint:analytic true . ?pubmed dct:references <$identifier> . } """ results = self.triplestore.query_template( inputs={'identifier': identifier}, outputs=['count'], template_text=text, ) return results def sparql_get_shared_pmids(self, identifier_a, identifier_b): text = """ PREFIX dct: <http://purl.org/dc/terms/> SELECT DISTINCT ?pubmed WHERE { hint:Query hint:analytic true . ?pubmed dct:references <$id_a> . ?pubmed dct:references <$id_b> . } """ results = self.triplestore.query_template(inputs={ 'id_a': identifier_a, 'id_b': identifier_b }, outputs=['pubmed'], template_text=text, post=True) return results def get_all_shared_pmids(self, nodes): oiddict = {self.get_omni_identifier(n): n for n in nodes} oids = [ f'<{x}>' for x in filter(lambda n: n is not None, oiddict.keys()) ] oidsstring = '{ ' + ' '.join(oids) + '}' results = self.sparql_get_all_shared_pmids(oidsstring) pubmeds = defaultdict(list) for r in results: k = (oiddict[r['term1']], oiddict[r['term2']]) pubmeds[k].append(f"PMID:{r['pubmed'].split('/')[-1]}") for i, node_i in enumerate(nodes): for node_j in nodes[:i]: k_ij = (node_i, node_j) k_ji = (node_j, node_i) if k_ij not in pubmeds and k_ji not in pubmeds: pubmeds[k_ij] = [] return pubmeds def call_with_retries(self, fnc, args): done = False ntries = 0 maxtries = 100 rest_time = 10 #seconds start = datetime.datetime.now() while not done and ntries < maxtries: try: result = fnc(*args) done = True except: logger.warn("OmniCorp error, retrying") time.sleep(rest_time) ntries += 1 if not done: return None else: end = datetime.datetime.now() logger.debug(f'Total call ntries: {ntries}, time: {end-start}') return result def count_pmids(self, node): identifier = self.get_omni_identifier(node) if identifier is None: return 0 res = self.call_with_retries(self.sparql_count_pmids, [identifier]) if res is None: return None else: logger.debug(f"Returned {res[0]['count']}") return res[0]['count'] def get_shared_pmids(self, node1, node2): id1 = self.get_omni_identifier(node1) id2 = self.get_omni_identifier(node2) if id1 is None or id2 is None: return [] done = False ntries = 0 pmids = self.call_with_retries(self.sparql_get_shared_pmids, [id1, id2]) if pmids is None: logger.error("OmniCorp gave up") return None return [p['pubmed'] for p in pmids]
class UberonGraphKS(Service): """A knowledge source created by 1) Combining cell ontology, uberon, and HPO, 2) Reasoning over the total graph to realize many implicit edges. Created by Jim Balhoff""" def __init__(self, context): #triplestore): super(UberonGraphKS, self).__init__("uberongraph", context) self.triplestore = TripleStore(self.url) def query_uberongraph(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def cell_get_cellname(self, cell_identifier): """ Identify label for a cell type :param cell: CL identifier for cell type """ text = """ prefix CL: <http://purl.obolibrary.org/obo/CL_> select distinct ?cellLabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { $cellID rdfs:label ?cellLabel . } """ results = self.triplestore.query_template( inputs = { 'cellID': cell_identifier }, \ outputs = [ 'cellLabel' ], \ template_text = text \ ) return results def get_anatomy_parts(self, anatomy_identifier): """Given an UBERON id, find other UBERONS that are parts of the query""" if anatomy_identifier.startswith('http'): anatomy_identifier = Text.obo_to_curie(anatomy_identifier) text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?part ?partlabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { $anatomy_id BFO:0000051 ?part . graph <http://reasoner.renci.org/redundant> { ?part rdfs:subClassOf UBERON:0001062 . } ?part rdfs:label ?partlabel . } """ results = self.triplestore.query_template( inputs = { 'anatomy_id': anatomy_identifier }, \ outputs = [ 'part', 'partlabel' ], \ template_text = text \ ) for result in results: result['curie'] = Text.obo_to_curie(result['part']) return results def anatomy_to_cell(self, anatomy_identifier): """ Identify anatomy terms related to cells. :param cell: CL identifier for cell type """ text = """ prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix CL: <http://purl.obolibrary.org/obo/CL_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?cellID ?cellLabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { graph <http://reasoner.renci.org/redundant> { ?cellID rdfs:subClassOf CL:0000000 . ?cellID BFO:0000050 $anatomyID . } ?cellID rdfs:label ?cellLabel . } """ results = self.triplestore.query_template( inputs = { 'anatomyID': anatomy_identifier }, \ outputs = [ 'cellID', 'cellLabel' ], \ template_text = text \ ) return results def cell_to_anatomy(self, cell_identifier): """ Identify anatomy terms related to cells. :param cell: CL identifier for cell type """ text = """ prefix CL: <http://purl.obolibrary.org/obo/CL_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> select distinct ?anatomyID ?anatomyLabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { graph <http://reasoner.renci.org/redundant> { ?anatomyID rdfs:subClassOf UBERON:0001062 . $cellID BFO:0000050 ?anatomyID . } ?anatomyID rdfs:label ?anatomyLabel . } """ results = self.triplestore.query_template( inputs = { 'cellID': cell_identifier }, \ outputs = [ 'anatomyID', 'anatomyLabel' ], \ template_text = text \ ) return results def phenotype_to_anatomy(self, hp_identifier): """ Identify anatomy terms related to cells. :param cell: HP identifier for phenotype """ #The subclassof uberon:0001062 ensures that the result #is an anatomical entity. text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix HP: <http://purl.obolibrary.org/obo/HP_> prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050> prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051> prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502> prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001> select distinct ?anatomy_id ?anatomy_label ?input_label from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { graph <http://reasoner.renci.org/redundant> { ?anatomy_id rdfs:subClassOf UBERON:0001062 . } ?anatomy_id rdfs:label ?anatomy_label . graph <http://reasoner.renci.org/nonredundant> { ?phenotype phenotype_of: ?anatomy_id . } graph <http://reasoner.renci.org/redundant> { $HPID rdfs:subClassOf ?phenotype . } $HPID rdfs:label ?input_label . } """ results = self.triplestore.query_template( inputs = { 'HPID': hp_identifier }, \ outputs = [ 'anatomy_id', 'anatomy_label', 'input_label'],\ template_text = text \ ) return results def anatomy_to_phenotype(self, uberon_id): text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix HP: <http://purl.obolibrary.org/obo/HP_> prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050> prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051> prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502> prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001> select distinct ?pheno_id ?anatomy_label ?pheno_label from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { $UBERONID rdfs:label ?anatomy_label . graph <http://reasoner.renci.org/nonredundant> { ?phenotype phenotype_of: $UBERONID . } graph <http://reasoner.renci.org/redundant> { ?pheno_id rdfs:subClassOf ?phenotype . } ?pheno_id rdfs:label ?pheno_label . } """ #The subclassof uberon:0001062 ensures that the result #is an anatomical entity. results = self.triplestore.query_template( inputs = { 'UBERONID': uberon_id }, \ outputs = [ 'pheno_id', 'anatomy_label', 'pheno_label'],\ template_text = text \ ) return results def get_anatomy_by_cell_graph(self, cell_node): anatomies = self.cell_to_anatomy(cell_node.id) results = [] predicate = LabeledID(identifier='BFO:0000050', label='part_of') for r in anatomies: anatomy_node = KNode(Text.obo_to_curie(r['anatomyID']), type=node_types.ANATOMY, name=r['anatomyLabel']) edge = self.create_edge(cell_node, anatomy_node, 'uberongraph.get_anatomy_by_cell_graph', cell_node.id, predicate) results.append((edge, anatomy_node)) return results def get_cell_by_anatomy_graph(self, anatomy_node): cells = self.anatomy_to_cell(anatomy_node.id) results = [] predicate = LabeledID(identifier='BFO:0000050', label='part_of') for r in cells: cell_node = KNode(Text.obo_to_curie(r['cellID']), type=node_types.CELL, name=r['cellLabel']) edge = self.create_edge(cell_node, anatomy_node, 'uberongraph.get_cell_by_anatomy_graph', anatomy_node.id, predicate) results.append((edge, cell_node)) return results def create_phenotype_anatomy_edge(self, node_id, node_label, input_id, phenotype_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') anatomy_node = KNode(Text.obo_to_curie(node_id), type=node_types.ANATOMY, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_anatomy_by_phenotype_graph', input_id, predicate) #node.name = node_label return edge, anatomy_node def create_anatomy_phenotype_edge(self, node_id, node_label, input_id, anatomy_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') phenotype_node = KNode(Text.obo_to_curie(node_id), type=node_types.PHENOTYPE, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_phenotype_by_anatomy_graph', input_id, predicate) #node.name = node_label return edge, phenotype_node def get_anatomy_by_phenotype_graph(self, phenotype_node): results = [] for curie in phenotype_node.get_synonyms_by_prefix('HP'): anatomies = self.phenotype_to_anatomy(curie) for r in anatomies: edge, node = self.create_phenotype_anatomy_edge( r['anatomy_id'], r['anatomy_label'], curie, phenotype_node) if phenotype_node.name is None: phenotype_node.name = r['input_label'] results.append((edge, node)) #These tend to be very high level terms. Let's also get their parts to #be more inclusive. #TODO: there ought to be a more principled way to take care of this, but #it highlights the uneasy relationship between the high level world of #smartapi and the low-level sparql-vision. part_results = self.get_anatomy_parts(r['anatomy_id']) for pr in part_results: pedge, pnode = self.create_phenotype_anatomy_edge( pr['part'], pr['partlabel'], curie, phenotype_node) results.append((pedge, pnode)) return results def get_phenotype_by_anatomy_graph(self, anatomy_node): results = [] for curie in anatomy_node.get_synonyms_by_prefix('UBERON'): phenotypes = self.anatomy_to_phenotype(curie) for r in phenotypes: edge, node = self.create_anatomy_phenotype_edge( r['pheno_id'], r['pheno_label'], curie, anatomy_node) if anatomy_node.name is None: anatomy_node.name = r['anatomy_label'] results.append((edge, node)) return results