class MeshKS(Service): def __init__(self, context, url): super(MeshKS, self).__init__("mesh", context) self.triplestore = TripleStore(self.url) def get_label_by_id(self, term_id): result = self.triplestore.query_template(inputs={"term_id": term_id}, outputs=['label'], template_text=""" PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#> PREFIX mesh: <http://id.nlm.nih.gov/mesh/> PREFIX mesh2015: <http://id.nlm.nih.gov/mesh/2015/> PREFIX mesh2016: <http://id.nlm.nih.gov/mesh/2016/> PREFIX mesh2017: <http://id.nlm.nih.gov/mesh/2017/> PREFIX mesh2018: <http://id.nlm.nih.gov/mesh/2018/> SELECT DISTINCT ?label FROM <http://id.nlm.nih.gov/mesh> WHERE { VALUES (?id) { ( $term_id ) } ?id rdfs:label ?label . } ORDER BY ?label """) return list(map(lambda r: {'label': r['label']}, result))
class MeSH(object): def __init__(self, uri="http://id.nlm.nih.gov/mesh/sparql"): self.triplestore = TripleStore (uri) def get_broader (self, term): return self.triplestore.query_template ( inputs={ "term" : term, "prefixes" : self.get_prefixes () }, outputs= [ "obj", "name" ], template_text=""" $prefixes SELECT DISTINCT ?obj ?name ?itemName FROM <http://id.nlm.nih.gov/mesh> WHERE { ?item meshv:broaderDescriptor ?obj ; rdfs:label ?itemName. ?obj rdfs:label ?name . filter (regex(lcase(str(?itemName)), lcase(str("$term")))) } ORDER BY ?p """) """ SELECT DISTINCT ?obj ?name FROM <http://id.nlm.nih.gov/mesh> WHERE { $term meshv:broaderDescriptor ?obj . ?obj rdfs:label ?name . } ORDER BY ?p """ def get_prefixes (self): return """
class GenericOntology(Service): """ Sure, don't just dig around in obo files they say. But when the SPARQL is dry, we will drink straight from the obo if need be. """ def __init__(self, context, obo): """ Load an obo file. """ super(GenericOntology, self).__init__("go", context) self.url = context.config.get_service('uberongraph').get("url", None) self.triplestore = TripleStore(self.url) self.sparql_wrapper = SPARQLWrapper(self.url) curie_prefix_map = Curie_Resolver.get_curie_to_uri_map() self.resolve_uri = Curie_Resolver.uri_to_curie self.ontology_prefixes = list( map(lambda x: f'PREFIX {x}: <{curie_prefix_map[x]}>', curie_prefix_map)) def add_sparql_prefixes(self, query_template): return '\n'.join(self.ontology_prefixes) + '\n' + query_template def run_sparql_query_raw(self, query): query = self.add_sparql_prefixes(query) self.sparql_wrapper.setQuery(query) self.sparql_wrapper.setReturnFormat(JSON) results = self.sparql_wrapper.query().convert() return results def query_sparql(self, query_template, inputs, outputs): # prepend prefixes here to avoid every one doing the same thing q = self.add_sparql_prefixes(query_template) logger.error(q) return self.triplestore.query_template(template_text=q, inputs=inputs, outputs=outputs) def label(self, identifier): """Return the label for an identifier""" query_text = f""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?labels FROM <http://reasoner.renci.org/ontology> WHERE {{ $identifier rdfs:label ?labels. }} """ results = self.query_sparql(query_text, inputs={'identifier': identifier}, outputs=['labels']) return results[0]["labels"] if len(results) > 0 else '' def is_a(self, identifier, ancestors): """Determine whether a term has a particular ancestor""" query_template = lambda ancestor: f""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> ASK FROM <http://reasoner.renci.org/ontology/closure> FROM <http://reasoner.renci.org/ontology> WHERE {{ {identifier} rdfs:subClassOf {ancestor}. }} """ result = [] for ancestor in ancestors.split(','): ancestor = ancestor.strip(' ') response = self.run_sparql_query_raw(query_template(ancestor)) if response['boolean']: result.append(ancestor) is_a = len(result) > 0 return is_a, result def single_level_is_a(self, identifier): """ Get single-level 'is_a' descendants. """ query_text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?descendant ?descendant_id FROM <http://reasoner.renci.org/ontology> WHERE { ?descendant rdfs:subClassOf $identifier OPTIONAL { ?descendant ID: ?descendant_id } } """ results = self.query_sparql(query_text, inputs={'identifier': identifier}, outputs=['descendant', 'descendant_id']) response_curies = reduce( lambda x, y: x + [ y['descendant_id'] if 'descendant_id' in y else self.resolve_uri(y['descendant']) ], results, []) return response_curies def descendants(self, identifier): """ This is also known as a recursive-'is_a' function, returning all levels below the input""" query_text = f""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?descendant ?descendant_id FROM <http://reasoner.renci.org/ontology/closure> WHERE {{ ?descendant rdfs:subClassOf $identifier. OPTIONAL {{ ?descendant ID: ?descendant_id. }} }} """ results = self.query_sparql(query_template=query_text, inputs={'identifier': identifier}, outputs=['descendant', 'descendant_id']) result_list = reduce( lambda x, y: x + [ y['descendant_id'] if 'descendant_id' in y else self.resolve_uri(y['descendant']) ], results, []) return result_list def ancestors(self, identifier): """ This is also known as a recursive-'is_a' function, returning all levels below the input""" query_text = f""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?ancestor ?ancestor_id FROM <http://reasoner.renci.org/ontology/closure> WHERE {{ $identifier rdfs:subClassOf ?ancestor. OPTIONAL {{ ?ancestor ID: ?ancestor_id. }} }} """ results = self.query_sparql(query_template=query_text, inputs={'identifier': identifier}, outputs=['ancestor', 'ancestor_id']) result_list = reduce( lambda x, y: x + [ y['ancestor_id'] if 'ancestor_id' in y else self.resolve_uri(y['ancestor']) ], results, []) return result_list def xrefs(self, identifier): """ Get external references. """ query_text = f"""prefix xref: <http://www.geneontology.org/formats/oboInOwl#hasDbXref> SELECT DISTINCT ?xrefs FROM <http://reasoner.renci.org/ontology> WHERE {{ $identifier xref: ?xrefs }} """ results = self.query_sparql(query_template=query_text, inputs={'identifier': identifier}, outputs=['xrefs']) results = reduce(lambda x, y: x + [y['xrefs']], results, []) return results def synonyms(self, identifier, curie_pattern=None): """ Get synonyms. """ query_template = lambda predicate: f""" PREFIX RELATED_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym> PREFIX EXACT_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasExactSynonym> PREFIX XREF: <http://www.geneontology.org/formats/oboInOwl#hasDbXref> PREFIX DEFENITION: <http://purl.obolibrary.org/obo/IAO_0000115> SELECT DISTINCT ?desc ?xref ?defn WHERE {{ $identifier {predicate} ?desc. OPTIONAL {{ ?desc XREF: ?xref. ?desc DEFENITION: ?defn. }} }} """ exact = self.query_sparql( query_template=query_template("EXACT_SYNONYM:"), inputs={'identifier': identifier}, outputs=['desc', 'xref', 'defn']) for row in exact: row['scope'] = 'EXACT' related = self.query_sparql( query_template=query_template("RELATED_SYNONYM:"), inputs={'identifier': identifier}, outputs=['desc', 'xref', 'defn']) for row in related: row['scope'] = 'RELATED' return exact + related def search(self, text, is_regex=False, ignore_case=True): """ Search for the text, treating it as a regular expression if indicated. """ search_string = text if is_regex and ignore_case: filtr = f""" ( regex(str(?definition), "$search_string","i") || regex(str(?label), "$search_string","i") || regex(str(?related_synonym), "$search_string","i") || regex(str(?exact_synonym), "$search_string","i") )""" elif is_regex and not ignore_case: filtr = f""" ( regex(str(?definition), "$search_string") || regex(str(?label), "$search_string") || regex(str(?related_synonym), "$search_string") || regex(str(?exact_synonym), "$search_string") ) """ elif not is_regex and ignore_case: search_string = search_string.lower() filtr = f""" ( lcase(str(?label))= "$search_string" || lcase(str(?definition))= "$search_string" || lcase(str(?related_synonym))= "$search_string" || lcase(str(?exact_synonym))= "$search_string" ) """ else: filtr = f""" ( str(?label) = "$search_string" || str(?definition) = "$search_string" || str(?exact_synonym) = "$search_string" || str(?related_synonym) = "$search_string" )""" query_text = f""" PREFIX DEFINED_BY: <http://www.w3.org/2000/01/rdf-schema#isDefinedBy> PREFIX DEFINITION: <http://purl.obolibrary.org/obo/IAO_0000115> PREFIX RELATED_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym> PREFIX EXACT_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasExactSynonym> SELECT DISTINCT ?id ?label ?definition ?defined_by WHERE {{ OPTIONAL{{ ?id EXACT_SYNONYM: ?exact_synonym. }} OPTIONAL {{ ?id RELATED_SYNONYM: ?related_synonym. }} OPTIONAL {{ ?id rdfs:label ?label. }} OPTIONAL {{ ?id DEFINITION: ?definition. }} OPTIONAL {{ ?id DEFINED_BY: ?defined_by. }} FILTER {filtr}. }} """ response = self.query_sparql( query_template=query_text, inputs={'search_string': search_string}, outputs=['id', 'label', 'defined_by', 'definition']) for row in response: row['id'] = Curie_Resolver.uri_to_curie(row['id']) return response def lookup(self, identifier): """ Given an identifier, find ids in the ontology for which it is an xref. """ assert identifier and ':' in identifier, "Must provide a valid curie. Curie must have format " \ "<PREFIX>:<ID>" query_template = """ PREFIX XREF: <http://www.geneontology.org/formats/oboInOwl#hasDbXref> PREFIX LABEL: <http://www.w3.org/2000/01/rdf-schema#label> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?xrefs ?term_id ?term_label FROM <http://reasoner.renci.org/ontology> WHERE { ?term XREF: ?o; XREF: ?xrefs; ID: ?term_id; LABEL: ?term_label. FILTER(?o = '$identifier' && !isBlank(?term)). } """ result = self.query_sparql(query_template=query_template, inputs={'identifier': identifier}, outputs=['xrefs', 'term_id', 'term_label']) response = [] buffer = {} for row in result: if row['term_id'] not in buffer: buffer[row['term_id']] = { 'label': row['term_label'], 'xrefs': [] } xref = row['xrefs'] buffer[row['term_id']]['xrefs'] += [ row['xrefs'] ] if row['xrefs'] not in buffer[row['term_id']]['xrefs'] else [] for term_id in buffer: response.append({ 'id': term_id, 'label': buffer[term_id]['label'], 'xrefs': buffer[term_id]['xrefs'] }) return response def id_list(self, identifier): identifier_uri = Curie_Resolver.get_curie_to_uri_map().get( identifier.upper(), None) if identifier_uri == None: return [] query = f""" PREFIX TYPE: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> PREFIX CLASS: <http://www.w3.org/2002/07/owl#Class> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?term ?term_id FROM <http://reasoner.renci.org/ontology> WHERE {{ ?term TYPE: CLASS:. FILTER (strstarts(lcase(str(?term)), lcase("$identifier"))) OPTIONAL {{ ?term ID: ?term_id #try to get the id from sparql else parse ?? }} }} """ result = self.query_sparql(query_template=query, inputs={'identifier': identifier_uri}, outputs=['term', 'term_id']) return reduce( lambda x, y: x + [y['term_id'] if 'term_id' in y else self.resolve_uri(y['term'])], result, []) def exactMatch(self, identifier): #if ontolgies are missing we should be looking here query_string = lambda predicate: f""" PREFIX EXACT_MATCH: <http://www.w3.org/2004/02/skos/core#exactMatch> PREFIX EQUIVALENT_CLASS: <http://www.w3.org/2002/07/owl#equivalentClass> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?match ?match_id FROM <http://reasoner.renci.org/ontology> WHERE {{ $identifier {predicate} ?match. OPTIONAL {{ ?match ID: ?match_id. }} FILTER (!isBlank(?match)) #This sometimes returns blank nodes }} """ result = reduce( lambda x, y: x + [ y['match_id'] if 'match_id' in y else self.resolve_uri(y['match']) ], self.query_sparql(query_template=query_string('EXACT_MATCH:'), inputs={'identifier': identifier}, outputs=['match', 'match_id']), []) result += list( filter( lambda x: x not in result, reduce( lambda x, y: x + [ y['match_id'] if 'match_id' in y else self.resolve_uri(y['match']) ], self.query_sparql( query_template=query_string('EQUIVALENT_CLASS:'), inputs={'identifier': identifier}, outputs=['match', 'match_id']), []))) return result def closeMatch(self, identifier): query_template = """ PREFIX CLOSE_MATCH: <http://www.w3.org/2004/02/skos/core#closeMatch> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?match ?match_id FROM <http://reasoner.renci.org/ontology> WHERE { $identifier CLOSE_MATCH: ?match. OPTIONAL { ?match ID: ?match_id. } FILTER (!isBlank(?match)) #This sometimes returns blank nodes } """ results = reduce( lambda x, y: x + [ y['match_id'] if 'match_id' in y else self.resolve_uri(y['match']) ], self.query_sparql(query_template=query_template, inputs={'identifier': identifier}, outputs=['match', 'match_id']), []) return results def subterms(self, identifier): return self.descendants(identifier) def superterms(self, identifier): return self.ancestors(identifier) def parents(self, identifier): """First generation ancestors""" query_template = f"""PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id> SELECT DISTINCT ?parent ?parent_id FROM <http://reasoner.renci.org/ontology> WHERE {{ $identifier rdfs:subClassOf ?parent. OPTIONAL {{ ?parent ID: ?parent_id }} FILTER(!isBlank(?parent)) }}""" result = reduce( lambda x, y: x + [ y['parent_id'] if 'parent_id' in y else self.resolve_uri(y['parent']) ], self.query_sparql(query_template=query_template, inputs={'identifier': identifier}, outputs=['parent', 'parent_id']), []) return result def children(self, identifier): """first generation descedants""" result = self.single_level_is_a(identifier) return result def siblings(self, identifier): """ Common parents """ parents = self.parents(identifier) sibilings = [] for parent in parents: sibilings += list( filter( lambda x: x != identifier and x not in sibilings, self.children(parent if 'http' not in parent else f'<{parent}>'))) return sibilings def property_value(self, identifier, property_key): """ Get properties """ query_template = """ SELECT ?property_value WHERE { $identifier <$property_key> ?property_value. } """ result = self.query_sparql(query_template=query_template, inputs={ 'identifier': identifier, 'property_key': property_key }, outputs=['property_value']) response = reduce(lambda x, y: x + [y['property_value']], result, []) if len(response) == 1: return response[0] else: return response def all_properties(self, identifier): """ Get ALL properties for a CURIE """ query_template = """ SELECT ?property_key ?property_value ?property_label FROM <http://reasoner.renci.org/ontology> WHERE { $identifier ?property_key ?property_value. OPTIONAL { ?property_key rdfs:label ?property_label. } FILTER (!isBlank(?property_value)) } """ results = self.query_sparql( query_template=query_template, inputs={'identifier': identifier}, outputs={'property_value', 'property_key', 'property_label'}) # group it by property label for those which have label grouped = {} for row in results: label = row['property_label'] if 'property_label' in row else None key = row['property_key'] if key not in grouped: grouped[key] = {'property_label': label, 'property_values': []} if row['property_value'] not in grouped[key]['property_values']: grouped[key]['property_values'].append(row['property_value']) for key in grouped: grouped[key].update({'property_key': key}) return list(map(lambda x: grouped[x], grouped))
class ChemBioKS(Service): """ Generic service endpoints for medical and bio-chemical data. This set comprises portions of chem2bio2rdf (CTD, KEGG, PubChem, DRUGBANK) """ def __init__(self, context): #triplestore): super(ChemBioKS, self).__init__("chembio", context) self.triplestore = TripleStore(self.url) def query_chembio(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def get_exposure_conditions(self, chemicals): """ Identify conditions (MeSH IDs) triggered by the specified stressor agent ids (also MeSH IDs). :param chemicals: List of IDs for substances of interest. :type chemicals: list of MeSH IDs, eg. D052638 """ id_list = ' '.join( list(map(lambda d: "( mesh:{0} )".format(d), chemicals))) text = self.triplestore.get_template ("ctd_gene_expo_disease").\ safe_substitute (chemicals=id_list) results = self.triplestore.execute_query(text) return list( map( lambda b: { "chemical": b['chemical'].value, "gene": b['gene'].value, "pathway": b['kegg_pathway'].value, "pathName": b['pathway_name'].value, "pathID": b['pathway_id'].value, "human": '(human)' in b['pathway_name'].value }, results.bindings)) def get_drugs_by_condition(self, conditions): """ Get drugs associated with a set of conditions. :param conditions: Conditions to find associated drugs for. :type conditions: List of MeSH IDs for conditions, eg.: D001249 """ if not isinstance(conditions, list): conditions = [conditions] conditions = list( map(lambda v: v.replace("MESH:", "mesh:"), conditions)) prefix = "mesh:" if any(map(lambda v: v.startswith(prefix), conditions)): prefix = "" condition_list = ', '.join( list(map(lambda d: " {0}{1} ".format(prefix, d), conditions))) result = self.triplestore.query_template( inputs={"diseaseIds": condition_list.lower()}, outputs=[ 'drugID', 'drugGenericName', 'pubChemCID', 'diseasePMIDs' ], template_text=""" prefix mesh: <http://bio2rdf.org/mesh:> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix db_resource: <http://chem2bio2rdf.org/drugbank/resource/> select ?drugID ?drugGenericName ?diseasePMIDs ?ctdChemDis ?pubChemCID where { values ( ?diseaseId ) { ( $diseaseIds ) } ?ctdChemDis ctd:cid ?pubChemCID; ctd:diseaseid ?diseaseId; ctd:pubmedids ?diseasePMIDs. ?dbInter db_resource:Name ?name ; db_resource:DBID ?drugID . ?drugID db_resource:CID ?pubChemCID ; db_resource:Generic_Name ?drugGenericName . }""") return result def get_drugs_by_condition_graph(self, conditions): drugs = self.get_drugs_by_condition(conditions.identifier) results = [] for r in drugs: edge = KEdge('c2b2r', 'conditionToDrug', { 'cid': r['pubChemCID'], 'pmids': r['diseasePMIDs'] }) node = KNode( r['drugID'].split('/')[-1:][0], #"http://chem2bio2rdf.org/drugbank/resource/drugbank_drug", node_types.DRUG, r['drugGenericName']) results.append((edge, node)) #logger.debug ("chembio drugs by condition: {}".format (results)) return results def get_genes_pathways_by_disease(self, diseases): """ Get genes and pathways associated with specified conditions. :param diseases: List of conditions designated by MeSH ID. :return: Returns a list of dicts containing gene and path information. """ diseaseMeshIDList = ' '.join( list(map(lambda d: "( mesh:{0} )".format(d), diseases))) text = self.triplestore.get_template( "genes_pathways_by_disease").safe_substitute( diseaseMeshIDList=diseaseMeshIDList) results = self.triplestore.execute_query(text) return list( map( lambda b: { "uniprotGene": b['uniprotGeneID'].value, "keggPath": b['keggPath'].value, "pathName": b['pathwayName'].value, "human": '(human)' in b['pathwayName'].value }, results.bindings)) def get_drug_gene_disease(self, disease_name, drug_name): """ Identify targets and diseases assocaited with a drug name. :param disease_name: MeSH name of a disease condition. :type str: String :param drug_name: Name of a drug. :type str: String """ text = self.triplestore.get_template( "drug_gene_disease").safe_substitute(diseaseName=disease_name, drugName=drug_name) results = self.triplestore.execute_query(text) return list( map( lambda b: { "uniprotSymbol": b['uniprotSym'].value, "diseaseId": b['diseaseID'].value }, results.bindings)) def pubchem_to_ncbigene(self, pubchemID): result = self.triplestore.query_template( inputs={"pubchemID": "pubchem:{}".format(pubchemID)}, outputs=[ 'NCBIGene', 'meshID', 'interaction', 'interactionTypes', 'pubmedids' ], template_text=""" prefix pubchem: <http://chem2bio2rdf.org/pubchem/resource/pubchem_compound/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select distinct ?NCBIGene ?meshID ?interaction ?interactionTypes ?pubmedids where { ?ctdChemGene ctd:cid $pubchemID; ctd:chemicalid ?meshID ; ctd:geneid ?NCBIGene; ctd:interaction ?interaction; ctd:interactiontypes ?interactionTypes; ctd:pubmedids ?pubmedids. }""") return list( map( lambda r: { 'NCBIGene': r['NCBIGene'], 'meshID': r['meshID'], 'interaction': r['interaction'], 'interactionTypes': r['interactionTypes'], 'pubmedids': r['pubmedids'] }, result)) def drug_name_to_gene_symbol(self, drug_name): result = self.triplestore.query_template( inputs={"drugName": drug_name}, outputs=['uniprotSym', 'pmids', 'drugID'], template_text=""" prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix db_resource: <http://chem2bio2rdf.org/drugbank/resource/> select ?drugGenericName ?pmids ?drugID ?uniprotSym where { values ( ?drugName ) { ( "$drugName" ) } ?ctdChemGene ctd:cid ?pubChemCID; ctd:pubmedids ?pmids; ctd:gene ?uniprotSym . ?drugID db_resource:CID ?pubChemCID ; db_resource:Generic_Name ?drugGenericName . filter regex(lcase(str(?drugGenericName)), lcase(?drugName)) }""") return list( map( lambda r: { 'uniprotSym': r['uniprotSym'], 'pmids': r.get('pmids', None), 'drugID': r['drugID'] }, result)) def drugname_to_pubchem(self, drug_name): result = self.triplestore.query_template( inputs={"drugName": drug_name}, outputs=['pubChemID', 'drugGenericName'], template_text=""" prefix db_resource: <http://chem2bio2rdf.org/drugbank/resource/> select distinct ?pubChemID ?drugGenericName where { values ( ?drugName ) { ( "$drugName" ) } ?drugID db_resource:CID ?pubChemID ; db_resource:Generic_Name ?drugGenericName . filter regex(lcase(str(?drugGenericName)), lcase(?drugName)) }""") return list( map( lambda r: { 'drugID': r['pubChemID'], 'drugName': r['drugGenericName'] }, result)) def gene_symbol_to_pathway(self, uniprot_symbol): return self.triplestore.query_template( inputs={"uniprotSymbol": uniprot_symbol}, outputs=["keggPath"], template_text=""" prefix kegg: <http://chem2bio2rdf.org/kegg/resource/> prefix pharmgkb: <http://chem2bio2rdf.org/pharmgkb/resource/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select ?ctdGene ?uniprotID ?pathwayName ?keggPath where { values ( ?ctdGene ) { ( <$uniprotSymbol> ) } ?keggPath kegg:protein ?uniprotID ; kegg:Pathway_name ?pathwayName . ?pharmGene pharmgkb:Symbol ?ctdGene ; pharmgkb:UniProt_Id ?uniprotID. ?ctdChemGene ctd:gene ?ctdGene. } LIMIT 500 """) def uniprot_to_hgnc(self, uniprot_symbol): return self.triplestore.query_template( inputs={"uniprotID": Text.un_curie(uniprot_symbol.identifier)}, outputs=["hgncID"], template_text=""" prefix uniprot: <http://chem2bio2rdf.org/uniprot/resource/gene/> prefix owl: <http://www.w3.org/2002/07/owl#> prefix hgnc: <http://chem2bio2rdf.org/rdf/resource/hgnc/> select distinct ?hgncID where { values ( ?uniprotID ) { ( uniprot:${uniprotID} ) } ?uniprotID <http://www.w3.org/2002/07/owl#sameAs> ?hgncID. filter ( strstarts (str(?hgncID), "http://bio2rdf.org/gene:")) } """) def graph_uniprot_to_hgnc(self, uniprot_symbol): result = self.uniprot_to_hgnc(uniprot_symbol) return [(self.get_edge(r, predicate='synonym'), KNode('HGNC:{0}'.format(r['hgncID'].split(':')[-1]), node_types.GENE)) for r in result] def graph_get_genes_by_disease(self, disease): #reasoner disease = disease.identifier.split(':')[1].lower() response = self.get_genes_pathways_by_disease([disease]) results = [] for r in response: edge = KEdge('c2b2r', 'diseaseToGene', {'keggPath': r['keggPath']}) node = KNode( "UNIPROT:{0}".format(r['uniprotGene'].split('/')[-1:][0]), node_types.GENE) results.append((edge, node)) return results @cachier(stale_after=datetime.timedelta(days=20)) def graph_get_pathways_by_gene(self, gene): #reasoner response = self.triplestore.query_template( inputs={"gene": gene.identifier.split(':')[1].upper()}, outputs=['keggPath'], template_text=""" prefix kegg: <http://chem2bio2rdf.org/kegg/resource/> prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix uniprot: <http://chem2bio2rdf.org/uniprot/resource/gene/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix mesh: <http://bio2rdf.org/mesh:> select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where { ?keggPath kegg:protein ?swissProtID ; kegg:Pathway_name ?pathwayName . ?keggInter kegg:cid ?pubchemCID . ?dbInter drugbank:GeneBank_ID ?geneBankID ; drugbank:SwissProt_ID ?swissProtID ; drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID ; drugbank:Generic_Name ?drugGenericName . ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . values ( ?uniprotGeneID ) { ( uniprot:$gene ) } } LIMIT 2000""") results = [] for r in response: edge = KEdge('c2b2r', 'geneToPathway', {}) node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]), node_types.PATHWAY) results.append((edge, node)) return results def graph_drugname_to_gene_symbol(self, drug_name_node): drug_name = Text.un_curie(drug_name_node.identifier) response = self.drug_name_to_gene_symbol(drug_name) results = [] for r in response: edge = self.get_edge(r, predicate="targets") node = KNode("UNIPROT:{0}".format(Text.path_last(r['uniprotSym'])), node_types.GENE) results.append((edge, node)) return results def graph_name_to_drugbank(self, drug_name_node): drug_name = Text.un_curie(drug_name_node.identifier) response = self.drug_name_to_gene_symbol(drug_name) results = [] for r in response: edge = self.get_edge(r, predicate="drugname") node = KNode ("DRUGBANK:{0}".format (Text.path_last (r['drugID'])), \ node_types.DRUG, \ label=r['drugName']) results.append((edge, node)) return results def graph_get_pathways_by_gene(self, gene): #reasoner response = self.triplestore.query_template( inputs={"gene": gene.identifier.split(':')[1].upper()}, outputs=['keggPath'], template_text=""" prefix kegg: <http://chem2bio2rdf.org/kegg/resource/> prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix uniprot: <http://chem2bio2rdf.org/uniprot/resource/gene/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix mesh: <http://bio2rdf.org/mesh:> select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where { ?keggPath kegg:protein ?swissProtID ; kegg:Pathway_name ?pathwayName . ?keggInter kegg:cid ?pubchemCID . ?dbInter drugbank:GeneBank_ID ?geneBankID ; drugbank:SwissProt_ID ?swissProtID ; drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID ; drugbank:Generic_Name ?drugGenericName . ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . values ( ?uniprotGeneID ) { ( uniprot:$gene ) } } LIMIT 2000""") results = [] for r in response: edge = KEdge('c2b2r', 'geneToPathway', {}) node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]), node_types.PATHWAY) results.append((edge, node)) return results def graph_drugbank_to_uniprot(self, drugbank): response = self.triplestore.query_template(inputs={ "drugID": "DB{0}".format(Text.un_curie(drugbank.identifier)) }, outputs=["uniprotGeneID"], template_text=""" prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select distinct ?uniprotGeneID where { values ( ?drugID ) { ( drugbank_drug:${drugID} ) } ?dbInter drugbank:GeneBank_ID ?geneBankID ; drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID ; drugbank:Generic_Name ?drugGenericName . ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . }""") return [ (self.get_edge(r, predicate='targets'), KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]), node_types.GENE)) for r in response ] def graph_diseasename_to_uniprot(self, disease): results = [] response = self.triplestore.query_template( inputs={"diseaseName": Text.un_curie(disease.identifier)}, outputs=["pubChemCID"], template_text=""" prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select distinct ?pubChemCID where { values ( ?diseaseName ) { ( "$diseaseName" ) } ?ctdChemDis ctd:cid ?pubChemCID; ctd:diseasename ?diseaseNameRec. filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName)) } LIMIT 1""") if len(response) > 0: # This is a disease. response = self.triplestore.query_template( inputs={"diseaseName": Text.un_curie(disease.identifier)}, outputs=["disPmids", "chemPmids", "uniprotSym"], template_text=""" prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> select ?disPmids ?ctdChemDis ?chemPmids ?uniprotSym ?diseaseId where { values ( ?diseaseName ) { ( "$diseaseName" ) } ?ctdChemGene ctd:cid ?pubChemCID; ctd:pubmedids ?chemPmids; ctd:gene ?uniprotSym. ?ctdChemDis ctd:cid ?pubChemCID; ctd:diseaseid ?diseaseId; ctd:diseasename ?diseaseNameRec; ctd:pubmedids ?disPmids. filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName)) } LIMIT 500""") for r in response: chemPmids = r['chemPmids'] disPmids = r['disPmids'] pmids = chemPmids + "|" + disPmids edge = self.get_edge(r, predicate='caused_by', pmids=pmids), node = KNode( "UNIPROT:{0}".format(r['uniprotSym'].split('/')[-1:][0]), node_types.GENE) results.append((edge, node)) return results def graph_diseaseid_to_uniprot(self, drugbank): print(drugbank.identifier.lower()) response = self.triplestore.query_template( inputs={"diseaseID": drugbank.identifier.lower()}, outputs=["uniprotGeneID"], template_text=""" prefix drugbank: <http://chem2bio2rdf.org/drugbank/resource/> prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/> prefix ctd: <http://chem2bio2rdf.org/ctd/resource/> prefix mesh.disease: <http://bio2rdf.org/mesh:> select distinct ?uniprotGeneID where { values ( ?diseaseID ) { ( $diseaseID ) } ?dbInter drugbank:gene ?uniprotGeneID . ?drugID drugbank:CID ?pubchemCID. ?ctd_disease ctd:diseaseid ?diseaseID ; ctd:cid ?pubchemCID . }""") return [ (self.get_edge(r, predicate='targets'), KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]), node_types.GENE)) for r in response ] def graph_drugname_to_pubchem(self, drugname_node): drug_name = Text.un_curie(drugname_node.identifier) response = self.drugname_to_pubchem(drug_name) return [ (self.get_edge( r, predicate='drugname_to_pubchem'), \ KNode( "PUBCHEM:{}".format( r['drugID'].split('/')[-1]), node_types.DRUG, label=r['drugName'])) for r in response ] # 'NCBIGene' : r['NCBIGene'], # 'meshID' : r['meshID'], # 'interaction': r['interaction'], # 'interactionTypes': r['interactionTypes'] # 'pubmedids' : r['pubmedids'] def graph_pubchem_to_ncbigene(self, pubchem_node): #The compound mesh coming back from here is very out of date. Ignore. pubchemid = Text.un_curie(pubchem_node.identifier) response = self.pubchem_to_ncbigene(pubchemid) retvals = [] for r in response: props = {} props['interaction'] = r['interaction'] props['interactionTypes'] = r['interactionTypes'] props['publications'] = r['pubmedids'].split('|') retvals.append((self.get_edge(props, predicate='pubchem_to_ncbigene'), KNode("NCBIGene:{}".format(r['NCBIGene']), node_types.GENE))) return retvals
class OntologicalHeirarchy(Service): """ Service that makes call to uberongraph to resolve subclass relationships between ontological terms """ def __init__(self): self.url = "https://stars-app.renci.org/uberongraph/sparql" self.triplestore = TripleStore(self.url) self.prefix_set = { node_types.DISEASE_OR_PHENOTYPIC_FEATURE: ['HP', 'MONDO'], node_types.CELLULAR_COMPONENT: ['CL'], node_types.BIOLOGICAL_PROCESS_OR_ACTIVITY: ['GO'], node_types.ANATOMICAL_ENTITY: ['UBERON'], node_types.CHEMICAL_SUBSTANCE: ['CHEBI'] } self.root_uris = { node_types.ANATOMICAL_ENTITY: "<http://purl.obolibrary.org/obo/UBERON_0001062>", node_types.DISEASE: "<http://purl.obolibrary.org/obo/MONDO_0000001>", node_types.MOLECULAR_ACTIVITY: "<http://purl.obolibrary.org/obo/GO_0003674>", node_types.BIOLOGICAL_PROCESS: "<http://purl.obolibrary.org/obo/GO_0008150>", node_types.CHEMICAL_SUBSTANCE: "<http://purl.obolibrary.org/obo/CHEBI_24431>", node_types.PHENOTYPIC_FEATURE: "<http://purl.obolibrary.org/obo/HP_0000118>", node_types.CELL: "<http://purl.obolibrary.org/obo/CL_0000000>", node_types.CELLULAR_COMPONENT: "<http://purl.orolibrary.org/obo/GO_0005575>" } obo_prefixes = '\n'.join([ f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>' for pref in set( reduce(lambda x, y: x + y, self.prefix_set.values(), [])) ]) self.query = f""" {obo_prefixes} PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> select ?parent_id ?parent_label ?child_id ?child_label where {{ ?parent_id rdfs:subClassOf $root_uri . ?child_id rdfs:subClassOf ?parent_id. OPTIONAL {{ ?parent_id rdfs:label ?parent_label. ?child_id rdfs:label ?child_label. }} }} """ rosetta = Rosetta() self.wdg = WriterDelegator(rosetta) def runner(self): for node_type, root_iri in self.root_uris.items(): nodes, edges = self.term_get_ancestors(node_type, root_iri) for index, n in enumerate(nodes): self.wdg.write_node(n, annotate=False) if ((index / len(nodes)) * 100) % 10 == 0: print((index / len(nodes)) * 100, '% complete') for index, e in enumerate(edges): self.wdg.write_edge(e) if index % 100 == 0: self.wdg.flush() if ((index / len(edges)) * 100) % 10 == 0: print((index / len(edges)) * 100, '% complete') return def term_get_ancestors(self, node_type, root_iri): results = self.triplestore.query_template( template_text=self.query, inputs={'root_uri': root_iri}, outputs=['parent_id', 'parent_label', 'child_id', 'child_label']) print('found total ', len(results), ' results.') nodes = set() edges = set() for index, row in enumerate(results): # Output type would be same as input type? ancestor_node = KNode(Text.obo_to_curie(row['parent_id']), name=row['parent_label'], type=node_type) child_node = KNode(Text.obo_to_curie(row['child_id']), name=row['child_label'], type=node_type) if ancestor_node.id == child_node.id: # refrain from adding edge to the node itself continue predicate = LabeledID(identifier='rdfs:subClassOf', label='subclass of') edge = self.create_edge( source_node=child_node, target_node=ancestor_node, predicate=predicate, provided_by='uberongraph.term_get_ancestors', input_id=child_node.id) nodes.add(child_node) nodes.add(ancestor_node) edges.add(edge) return nodes, edges
class OmniCorp(Service): def __init__(self, context): #triplestore): super(OmniCorp, self).__init__("omnicorp", context) self.triplestore = TripleStore(self.url) self.prefix_to_uri = { 'UBERON': 'http://purl.obolibrary.org/obo/UBERON_', 'BSPO': 'http://purl.obolibrary.org/obo/BSPO_', 'PATO': 'http://purl.obolibrary.org/obo/PATO_', 'GO': 'http://purl.obolibrary.org/obo/GO_', 'MONDO': 'http://purl.obolibrary.org/obo/MONDO_', 'HP': 'http://purl.obolibrary.org/obo/HP_', 'ENVO:': 'http://purl.obolibrary.org/obo/ENVO_', 'OBI': 'http://purl.obolibrary.org/obo/OBI_', 'CL': 'http://purl.obolibrary.org/obo/CL_', 'SO': 'http://purl.obolibrary.org/obo/SO_', 'CHEBI': 'http://purl.obolibrary.org/obo/CHEBI_', 'HGNC': 'http://identifiers.org/hgnc/HGNC:', 'MESH': 'http://id.nlm.nih.gov/mesh/' } def get_omni_identifier(self, node): #Let's start with just the 'best' identifier identifier = node.id prefix = Text.get_curie(node.id) if prefix not in self.prefix_to_uri: logger.warn("What kinda tomfoolery is this?") logger.warn(f"{node.id} {node.type}") logger.warn(f"{node.synonyms}") return None oident = f'{self.prefix_to_uri[prefix]}{Text.un_curie(node.id)}' return oident def query_omnicorp(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def sparql_get_all_shared_pmids(self, identifier_list): text = """ PREFIX dct: <http://purl.org/dc/terms/> SELECT DISTINCT ?pubmed ?term1 ?term2 WHERE { hint:Query hint:analytic true . VALUES ?term1 $id_list_a VALUES ?term2 $id_list_b ?pubmed dct:references ?term1 . ?pubmed dct:references ?term2 . FILTER(STR(?term1) < STR(?term2)) } """ start = datetime.datetime.now() results = self.triplestore.query_template( inputs={ 'id_list_a': identifier_list, 'id_list_b': identifier_list }, outputs=['term1', 'term2', 'pubmed'], template_text=text, post=True) end = datetime.datetime.now() logger.debug(f'Completed in: {end-start}') return results def sparql_count_pmids(self, identifier): text = """ PREFIX dct: <http://purl.org/dc/terms/> SELECT (COUNT(DISTINCT ?pubmed) as ?count) WHERE { hint:Query hint:analytic true . ?pubmed dct:references <$identifier> . } """ results = self.triplestore.query_template( inputs={'identifier': identifier}, outputs=['count'], template_text=text, ) return results def sparql_get_shared_pmids(self, identifier_a, identifier_b): text = """ PREFIX dct: <http://purl.org/dc/terms/> SELECT DISTINCT ?pubmed WHERE { hint:Query hint:analytic true . ?pubmed dct:references <$id_a> . ?pubmed dct:references <$id_b> . } """ results = self.triplestore.query_template(inputs={ 'id_a': identifier_a, 'id_b': identifier_b }, outputs=['pubmed'], template_text=text, post=True) return results def get_all_shared_pmids(self, nodes): oiddict = {self.get_omni_identifier(n): n for n in nodes} oids = [ f'<{x}>' for x in filter(lambda n: n is not None, oiddict.keys()) ] oidsstring = '{ ' + ' '.join(oids) + '}' results = self.sparql_get_all_shared_pmids(oidsstring) pubmeds = defaultdict(list) for r in results: k = (oiddict[r['term1']], oiddict[r['term2']]) pubmeds[k].append(f"PMID:{r['pubmed'].split('/')[-1]}") for i, node_i in enumerate(nodes): for node_j in nodes[:i]: k_ij = (node_i, node_j) k_ji = (node_j, node_i) if k_ij not in pubmeds and k_ji not in pubmeds: pubmeds[k_ij] = [] return pubmeds def call_with_retries(self, fnc, args): done = False ntries = 0 maxtries = 100 rest_time = 10 #seconds start = datetime.datetime.now() while not done and ntries < maxtries: try: result = fnc(*args) done = True except: logger.warn("OmniCorp error, retrying") time.sleep(rest_time) ntries += 1 if not done: return None else: end = datetime.datetime.now() logger.debug(f'Total call ntries: {ntries}, time: {end-start}') return result def count_pmids(self, node): identifier = self.get_omni_identifier(node) if identifier is None: return 0 res = self.call_with_retries(self.sparql_count_pmids, [identifier]) if res is None: return None else: logger.debug(f"Returned {res[0]['count']}") return res[0]['count'] def get_shared_pmids(self, node1, node2): id1 = self.get_omni_identifier(node1) id2 = self.get_omni_identifier(node2) if id1 is None or id2 is None: return [] done = False ntries = 0 pmids = self.call_with_retries(self.sparql_get_shared_pmids, [id1, id2]) if pmids is None: logger.error("OmniCorp gave up") return None return [p['pubmed'] for p in pmids]
class UberonGraphKS(Service): """A knowledge source created by 1) Combining cell ontology, uberon, and HPO, 2) Reasoning over the total graph to realize many implicit edges. Created by Jim Balhoff""" def __init__(self, context): #triplestore): super(UberonGraphKS, self).__init__("uberongraph", context) self.triplestore = TripleStore(self.url) #TODO: Pull this from the biolink model? self.class_defs = { node_types.CELL: 'CL:0000000', node_types.ANATOMICAL_ENTITY: 'UBERON:0001062', node_types.BIOLOGICAL_PROCESS: 'GO:0008150', node_types.MOLECULAR_ACTIVITY: 'GO:0003674', node_types.CHEMICAL_SUBSTANCE: 'CHEBI:24431', node_types.DISEASE: 'MONDO:0000001', node_types.PHENOTYPIC_FEATURE: 'UPHENO:0001002' } def query_uberongraph(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def get_edges(self, source_type, obj_type): """Given an UBERON id, find other UBERONS that are parts of the query""" text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix CL: <http://purl.obolibrary.org/obo/CL_> prefix GO: <http://purl.obolibrary.org/obo/GO_> prefix CHEBI: <http://purl.obolibrary.org/obo/CHEBI_> prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_> prefix UPHENO: <http://purl.obolibrary.org/obo/UPHENO_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?p ?pLabel from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { ?sourceID ?p ?objID . } graph <http://reasoner.renci.org/ontology/closure> { ?sourceID rdfs:subClassOf $sourcedefclass . } graph <http://reasoner.renci.org/ontology/closure> { ?objID rdfs:subClassOf $objdefclass . hint:Prior hint:runFirst true . } ?p rdfs:label ?pLabel . } """ results = self.triplestore.query_template( inputs = { 'sourcedefclass': self.class_defs[source_type], 'objdefclass': self.class_defs[obj_type] }, \ outputs = [ 'p', 'pLabel' ], \ template_text = text \ ) return results def get_label(self, identifier): obo_id = Text.curie_to_obo(identifier) text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> select distinct ?label from <http://reasoner.renci.org/ontology> where { $obo_id rdfs:label ?label . } """ results = self.triplestore.query_template(inputs={'obo_id': obo_id}, outputs=['label'], template_text=text) if len(results) < 1: return '' return results[0]['label'] def cell_get_cellname(self, cell_identifier): """ Identify label for a cell type :param cell: CL identifier for cell type """ text = """ prefix CL: <http://purl.obolibrary.org/obo/CL_> select distinct ?cellLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { $cellID rdfs:label ?cellLabel . } """ results = self.triplestore.query_template( inputs = { 'cellID': cell_identifier }, \ outputs = [ 'cellLabel' ], \ template_text = text \ ) return results def get_anatomy_parts(self, anatomy_identifier): """Given an UBERON id, find other UBERONS that are parts of the query""" anatomy_identifier = f"<{anatomy_identifier}>" text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?part ?partlabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { $anatomy_id BFO:0000051 ?part . graph <http://reasoner.renci.org/ontology/closure> { ?part rdfs:subClassOf UBERON:0001062 . } ?part rdfs:label ?partlabel . } """ results = self.triplestore.query_template( inputs = { 'anatomy_id': anatomy_identifier }, \ outputs = [ 'part', 'partlabel' ], \ template_text = text \ ) for result in results: result['curie'] = Text.obo_to_curie(result['part']) return results def get_neighbor(self, input_id, output_type, subject=True): parents = { node_types.ANATOMICAL_ENTITY: "<http://purl.obolibrary.org/obo/UBERON_0001062>", node_types.DISEASE: "<http://purl.obolibrary.org/obo/MONDO_0000001>", node_types.MOLECULAR_ACTIVITY: "<http://purl.obolibrary.org/obo/GO_0003674>", node_types.BIOLOGICAL_PROCESS: "<http://purl.obolibrary.org/obo/GO_0008150>", node_types.CHEMICAL_SUBSTANCE: "<http://purl.obolibrary.org/obo/CHEBI_24431>", node_types.PHENOTYPIC_FEATURE: "<http://purl.obolibrary.org/obo/HP_0000118>" } pref = Text.get_curie(input_id) obo_prefix = f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>' text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> """ + obo_prefix + """ select distinct ?output_id ?output_label ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/nonredundant> { """ if subject: text += ' $input_id ?p ?output_id .' else: text += ' ?output_id ?p $input_id .' text += """ } graph <http://reasoner.renci.org/ontology/closure> { ?output_id rdfs:subClassOf $parent . } ?output_id rdfs:label ?output_label . ?p rdfs:label ?pLabel . } """ results = self.triplestore.query_template( inputs={ 'input_id': input_id, 'parent': parents[output_type] }, outputs=['output_id', 'output_label', 'p', 'pLabel'], template_text=text) return results def anatomy_to_anatomy(self, identifier): results = {'subject': [], 'object': []} for direction,query in \ (('subject',' ?input_id ?p ?output_id .'), ('object',' ?output_id ?p ?input_id .')): text=""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> select distinct ?output_id ?output_label ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { """ + query + \ """ } graph <http://reasoner.renci.org/ontology/closure> { ?output_id rdfs:subClassOf UBERON:0001062 . } ?output_id rdfs:label ?output_label . ?p rdfs:label ?pLabel . } """ results[direction] += self.triplestore.query_template( inputs={'input_id': identifier}, outputs=['output_id', 'output_label', 'p', 'pLabel'], template_text=text) return results def anatomy_to_go(self, anatomy_identifier): """ Identify process and functions related to anatomical terms (anatomy, cell, components). """ #This is a bit messy, but we need to do 4 things. We are looking for go terms # that are either biological processes or activities and we are looking for predicates # that point either direction. results = {'subject': [], 'object': []} for goParent in ('GO:0008150', 'GO:0003674'): for direction, query in (('subject', ' $anatID ?p ?goID'), ('object', ' ?goID ?p $anatID')): text = """ prefix GO: <http://purl.obolibrary.org/obo/GO_> select distinct ?goID ?goLabel ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { """ + query + """ } graph <http://reasoner.renci.org/ontology/closure> { ?goID rdfs:subClassOf $goParent . } ?goID rdfs:label ?goLabel . ?p rdfs:label ?pLabel } """ results[direction] += self.triplestore.query_template( inputs = { 'anatID': anatomy_identifier, 'goParent': goParent }, \ outputs = [ 'goID', 'goLabel', 'p', 'pLabel' ], \ template_text = text \ ) return results def go_to_anatomy(self, input_identifier): """ Identify anatomy terms related to process/functions. :param input_identifier: identifier for anatomy (including cell and cellular component) """ # we are looking for predicates that point either direction. results = {'subject': [], 'object': []} for direction, query in (('subject', ' ?anatID ?p $goID'), ('object', ' $goID ?p ?anatID')): text = """ prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix GO: <http://purl.obolibrary.org/obo/GO_> select distinct ?anatID ?anatLabel ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { """ + query + """ } graph <http://reasoner.renci.org/ontology/closure> { ?anatID rdfs:subClassOf UBERON:0001062 . } ?anatID rdfs:label ?anatLabel . ?p rdfs:label ?pLabel } """ results[direction] += self.triplestore.query_template( inputs={'goID': input_identifier}, outputs=['anatID', 'anatLabel', 'p', 'pLabel'], template_text=text) return results def pheno_or_disease_to_go(self, identifier): text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> prefix GO: <http://purl.obolibrary.org/obo/GO_> prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_> prefix HP: <http://purl.obolibrary.org/obo/MONDO_> select distinct ?goID ?goLabel ?p ?pLabel from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> where { graph <http://reasoner.renci.org/redundant> { $input_id ?p ?goID . } graph <http://reasoner.renci.org/ontology/closure> { { ?goID rdfs:subClassOf GO:0008150 . } UNION { ?goID rdfs:subClassOf GO:0003674 . } } ?goID rdfs:label ?goLabel . ?p rdfs:label ?pLabel . } """ results = self.triplestore.query_template( inputs={'input_id': identifier}, outputs=['goID', 'goLabel', 'p', 'pLabel'], template_text=text) return results def phenotype_to_anatomy(self, hp_identifier): """ Identify anatomies related to phenotypes. :param cell: HP identifier for phenotype """ #The subclassof uberon:0001062 ensures that the result #is an anatomical entity. #We don't need to do the subject/object game because there's nothing in ubergraph # that goes that direction text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_> SELECT DISTINCT ?anatomy_id ?anatomy_label ?predicate ?predicate_label FROM <http://reasoner.renci.org/ontology> WHERE { graph <http://reasoner.renci.org/redundant>{ $HPID ?predicate ?anatomy_id. } graph <http://reasoner.renci.org/ontology/closure>{ ?anatomy_id rdfs:subClassOf UBERON:0001062. } ?anatomy_id rdfs:label ?anatomy_label . OPTIONAL {?predicate rdfs:label ?predicate_label.} } """ results = self.triplestore.query_template( inputs = { 'HPID': hp_identifier }, \ outputs = [ 'anatomy_id', 'anatomy_label', 'predicate', 'predicate_label'],\ template_text = text \ ) return results def anatomy_to_phenotype(self, uberon_id): #sparql very identical to phenotype_to_anatomy. could not find any anatomical # entity that is a subject of subclass of HP:0000118, in ubergraph at this point. # treating this as another version of pheno -> anatomical_entity but when # anatomical_entity is known an # we want to go back to a phenotype. text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX HP:<http://purl.obolibrary.org/obo/HP_> SELECT DISTINCT ?pheno_id ?pheno_label ?predicate ?predicate_label FROM <http://reasoner.renci.org/ontology> WHERE { graph <http://reasoner.renci.org/redundant> { ?pheno_id ?predicate $UBERONID. } graph <http://reasoner.renci.org/ontology/closure>{ ?pheno_id rdfs:subClassOf HP:0000118. } ?pheno_id rdfs:label ?pheno_label. OPTIONAL {?predicate rdfs:label ?predicate_label.} }""" results = self.triplestore.query_template( inputs = { 'UBERONID': uberon_id }, \ outputs = [ 'pheno_id', 'pheno_label', 'predicate', 'predicate_label' ],\ template_text = text \ ) return results def biological_process_or_activity_to_chemical(self, go_id): """ Given a chemical finds associated GO Molecular Activities. """ results = [] text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX GO: <http://purl.obolibrary.org/obo/GO_> PREFIX RO: <http://purl.obolibrary.org/obo/RO_> PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431> PREFIX chemical_class: <http://purl.obolibrary.org/obo/CHEBI_24431> SELECT DISTINCT ?chebi_id ?predicate ?label_predicate ?chebi_label from <http://reasoner.renci.org/ontology> from <http://reasoner.renci.org/nonredundant> where { $GO_ID ?predicate ?chebi_id. ?chebi_id rdfs:label ?chebi_label. GRAPH <http://reasoner.renci.org/ontology/closure> { ?chebi_id rdfs:subClassOf chemical_class:.} ?predicate rdfs:label ?label_predicate. FILTER ( datatype(?label_predicate) = xsd:string) } """ results = self.triplestore.query_template(template_text=text, outputs=[ 'chebi_id', 'predicate', 'label_predicate', 'chebi_label' ], inputs={'GO_ID': go_id}) return results def pheno_to_biological_activity(self, pheno_id): """ Finds biological activities related to a phenotype :param :pheno_id phenotype identifier """ text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX GO: <http://purl.obolibrary.org/obo/GO_> PREFIX has_phenotype_affecting: <http://purl.obolibrary.org/obo/UPHENO_0000001> PREFIX RO: <http://purl.obolibrary.org/obo/RO_> prefix HP: <http://purl.obolibrary.org/obo/HP_> SELECT DISTINCT ?go_id ?predicate ?predicate_label ?go_label from <http://reasoner.renci.org/nonredundant> from <http://reasoner.renci.org/ontology> WHERE { $pheno_type ?predicate ?go_id. ?go_id rdfs:label ?go_label. graph <http://reasoner.renci.org/ontology/closure> { { ?go_id rdfs:subClassOf GO:0008150 . } UNION { ?go_id rdfs:subClassOf GO:0003674 . } } ?predicate rdfs:label ?predicate_label. } """ results = self.triplestore.query_template( template_text=text, inputs={'pheno_type': pheno_id}, outputs=['go_id', 'predicate', 'predicate_label', 'go_label']) return results def disease_to_anatomy(self, disease_id): #THere are no anatomy-(predicate)->disease triples text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX anatomicalEntity: <http://purl.obolibrary.org/obo/UBERON_0001062> SELECT DISTINCT ?anatomyID ?predicate ?predicate_label ?anatomy_label FROM <http://reasoner.renci.org/nonredundant> FROM <http://reasoner.renci.org/ontology> WHERE { graph <http://reasoner.renci.org/redundant> { $diseaseID ?predicate ?anatomyID. } ?anatomyID rdfs:label ?anatomy_label. graph <http://reasoner.renci.org/ontology/closure> { ?anatomyID rdfs:subClassOf anatomicalEntity: . } ?predicate rdfs:label ?predicate_label. } """ results = [] results = self.triplestore.query_template( template_text=text, outputs=[ 'anatomyID', 'predicate', 'predicate_label', 'anatomy_label' ], inputs={'diseaseID': disease_id}) return results def anatomy_to_chemical_substance(self, anatomy_id): #There's no chemical-(predicate)->anatomy text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431> SELECT DISTINCT ?predicate ?predicate_label ?chemical_entity ?chemical_label FROM <http://reasoner.renci.org/ontology> FROM <http://reasoner.renci.org/redundant> WHERE { $anatomy_id ?predicate ?chemical_entity. graph <http://reasoner.renci.org/ontology/closure> { ?chemical_entity rdfs:subClassOf chemical_entity:. } ?predicate rdfs:label ?predicate_label . ?chemical_entity rdfs:label ?chemical_label. } """ results = [] results = self.triplestore.query_template( template_text=text, outputs=[ 'predicate', 'predicate_label', 'chemical_entity', 'chemical_label' ], inputs={'anatomy_id': anatomy_id}) return results def anatomy_to_disease(self, anatomy_id): text = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX disease: <http://purl.obolibrary.org/obo/MONDO_0000001> SELECT DISTINCT ?predicate ?predicate_label ?disease ?disease_label FROM <http://reasoner.renci.org/ontology> FROM <http://reasoner.renci.org/redundant>{ ?disease ?predicate $anatomy_id. graph <http://reasoner.renci.org/ontology/closure> { ?disease rdfs:subClassOf disease:. } ?predicate rdfs:label ?predicate_label . ?disease rdfs:label ?disease_label. } """ results = [] results = self.triplestore.query_template( template_text=text, outputs=[ 'predicate', 'predicate_label', 'disease', 'disease_label' ], inputs={'anatomy_id': anatomy_id}) return results def create_phenotype_anatomy_edge(self, node_id, node_label, input_id, phenotype_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') anatomy_node = KNode(Text.obo_to_curie(node_id), type=node_types.ANATOMICAL_ENTITY, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_anatomy_by_phenotype_graph', input_id, predicate) #node.name = node_label return edge, anatomy_node def create_anatomy_phenotype_edge(self, node_id, node_label, input_id, anatomy_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') phenotype_node = KNode(Text.obo_to_curie(node_id), type=node_types.PHENOTYPIC_FEATURE, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_phenotype_by_anatomy_graph', input_id, predicate) #node.name = node_label return edge, phenotype_node def dep_get_anatomy_by_phenotype_graph(self, phenotype_node): results = [] for curie in phenotype_node.get_synonyms_by_prefix('HP'): anatomies = self.phenotype_to_anatomy(curie) for r in anatomies: node = KNode(r['anatomy_id'], type=node_types.ANATOMICAL_ENTITY, name=r['anatomy_label']) # try to derive the label from the relation for the new ubergraph axioms predicate_label = r['predicate_label'] or '_'.join( r['predicate'].split('#')[-1].split('.')) predicate = LabeledID(Text.obo_to_curie(r['predicate']), predicate_label) edge = self.create_edge( phenotype_node, node, 'uberongraph.get_anatomy_by_phenotype_graph', phenotype_node.id, predicate) # edge, node = self.create_phenotype_anatomy_edge(r['anatomy_id'],r['anatomy_label'],curie,phenotype_node) if phenotype_node.name is None: phenotype_node.name = r['input_label'] results.append((edge, node)) #These tend to be very high level terms. Let's also get their parts to #be more inclusive. #TODO: there ought to be a more principled way to take care of this, but #it highlights the uneasy relationship between the high level world of #smartapi and the low-level sparql-vision. part_results = self.get_anatomy_parts(r['anatomy_id']) for pr in part_results: # pedge, pnode = self.create_phenotype_anatomy_edge(pr['part'],pr['partlabel'],curie,phenotype_node) pnode = KNode(pr['part'], type=node_types.ANATOMICAL_ENTITY, name=pr['partlabel']) pedge = self.create_edge( phenotype_node, pnode, 'uberongraph.get_anatomy_by_phenotype_graph', phenotype_node.id, predicate) results.append((pedge, pnode)) return results def get_out_by_in(self, input_node, output_type, prefixes, subject=True, object=True): returnresults = [] caller = f'uberongraph.{inspect.stack()[1][3]}' results = {'subject': [], 'object': []} curies = set() for pre in prefixes: curies.update(input_node.get_synonyms_by_prefix(pre)) for curie in curies: results['subject'] += self.get_neighbor(curie, output_type, subject=True) results['object'] += self.get_neighbor(curie, output_type, subject=False) for direction in ['subject', 'object']: done = set() for r in results[direction]: key = (r['p'], r['output_id']) if key in done: continue predicate_curie = Text.obo_to_curie(r['p']) prefix = Text.get_curie(predicate_curie) prefix = prefix if prefix == 'ubergraph-axioms.ofn' else prefix.upper( ) upper_cased_predicate_curie = prefix + ":" + Text.un_curie( predicate_curie) predicate = LabeledID(upper_cased_predicate_curie, r['pLabel']) output_node = KNode(r['output_id'], type=output_type, name=r['output_label']) if direction == 'subject': edge = self.create_edge(input_node, output_node, caller, curie, predicate) else: edge = self.create_edge(output_node, input_node, caller, curie, predicate) done.add(key) returnresults.append((edge, output_node)) return returnresults #Don't get confused. There is the direction of the statement (who is the subject # and who is the object) and which of them we are querying by. We want to query # independent of direction i.e. let the input node be either the subject or the object. def get_anatomy_by_anatomy_graph(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.ANATOMICAL_ENTITY, ['UBERON', 'CL', 'GO']) def get_phenotype_by_anatomy_graph(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.PHENOTYPIC_FEATURE, ['UBERON', 'CL', 'GO']) def get_chemical_substance_by_anatomy(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.CHEMICAL_SUBSTANCE, ['UBERON', 'CL', 'GO']) def get_process_by_anatomy(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.BIOLOGICAL_PROCESS, ['UBERON', 'CL', 'GO']) def get_activity_by_anatomy(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.MOLECULAR_ACTIVITY, ['UBERON', 'CL', 'GO']) def get_disease_by_anatomy_graph(self, anatomy_node): return self.get_out_by_in(anatomy_node, node_types.DISEASE, ['UBERON', 'CL', 'GO']) def get_anatomy_by_process_or_activity(self, go_node): return self.get_out_by_in(go_node, node_types.ANATOMICAL_ENTITY, ['GO']) def get_chemical_entity_by_process_or_activity(self, go_node): return self.get_out_by_in(go_node, node_types.CHEMICAL_SUBSTANCE, ['GO']) def get_process_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.BIOLOGICAL_PROCESS, ['MONDO']) def get_activity_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.MOLECULAR_ACTIVITY, ['MONDO']) def get_anatomy_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.ANATOMICAL_ENTITY, ['MONDO']) def get_chemical_by_disease(self, disease_node): return self.get_out_by_in(disease_node, node_types.CHEMICAL_SUBSTANCE, ['MONDO']) def get_process_by_phenotype(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.BIOLOGICAL_PROCESS, ['HP']) def get_chemical_by_phenotype(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.CHEMICAL_SUBSTANCE, ['HP']) def get_activity_by_phenotype(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.MOLECULAR_ACTIVITY, ['HP']) def get_anatomy_by_phenotype_graph(self, pheno_node): return self.get_out_by_in(pheno_node, node_types.ANATOMICAL_ENTITY, ['HP']) def get_chemical_by_chemical(self, chem_node): return self.get_out_by_in(chem_node, node_types.CHEMICAL_SUBSTANCE, ['CHEBI']) def disease_get_ancestors(self, disease_node): curie = disease_node.id prefix = Text.get_curie(curie) if "MONDO" != prefix: return [] query = f""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_> select distinct ?output_id ?label where {{ graph <http://reasoner.renci.org/ontology/closure> {{ $disease_id rdfs:subClassOf ?output_id . ?output_id rdfs:subClassOf MONDO:0000001 . }} graph <http://reasoner.renci.org/ontology>{{ ?output_id rdfs:label ?label. }} }} """ results = self.triplestore.query_template( template_text=query, inputs={'disease_id': curie}, outputs=['output_id', 'label']) outputs = [] for row in results: ancestor_node = KNode( row['output_id'], label=row['label'], type=node_types.DISEASE_OR_PHENOTYPIC_FEATURE) if ancestor_node.id == disease_node.id: # refrain from adding edge to the node itself continue predicate = LabeledID(identifier='rdfs:subClassOf', label='subclass of') edge = self.create_edge( source_node=disease_node, target_node=ancestor_node, predicate=predicate, provided_by='uberongraph.disease_get_ancestors', input_id=disease_node.id) outputs.append((edge, ancestor_node)) return outputs
class UberonGraphKS(Service): """A knowledge source created by 1) Combining cell ontology, uberon, and HPO, 2) Reasoning over the total graph to realize many implicit edges. Created by Jim Balhoff""" def __init__(self, context): #triplestore): super(UberonGraphKS, self).__init__("uberongraph", context) self.triplestore = TripleStore(self.url) def query_uberongraph(self, query): """ Execute and return the result of a SPARQL query. """ return self.triplestore.execute_query(query) def cell_get_cellname(self, cell_identifier): """ Identify label for a cell type :param cell: CL identifier for cell type """ text = """ prefix CL: <http://purl.obolibrary.org/obo/CL_> select distinct ?cellLabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { $cellID rdfs:label ?cellLabel . } """ results = self.triplestore.query_template( inputs = { 'cellID': cell_identifier }, \ outputs = [ 'cellLabel' ], \ template_text = text \ ) return results def get_anatomy_parts(self, anatomy_identifier): """Given an UBERON id, find other UBERONS that are parts of the query""" if anatomy_identifier.startswith('http'): anatomy_identifier = Text.obo_to_curie(anatomy_identifier) text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?part ?partlabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { $anatomy_id BFO:0000051 ?part . graph <http://reasoner.renci.org/redundant> { ?part rdfs:subClassOf UBERON:0001062 . } ?part rdfs:label ?partlabel . } """ results = self.triplestore.query_template( inputs = { 'anatomy_id': anatomy_identifier }, \ outputs = [ 'part', 'partlabel' ], \ template_text = text \ ) for result in results: result['curie'] = Text.obo_to_curie(result['part']) return results def anatomy_to_cell(self, anatomy_identifier): """ Identify anatomy terms related to cells. :param cell: CL identifier for cell type """ text = """ prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix CL: <http://purl.obolibrary.org/obo/CL_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?cellID ?cellLabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { graph <http://reasoner.renci.org/redundant> { ?cellID rdfs:subClassOf CL:0000000 . ?cellID BFO:0000050 $anatomyID . } ?cellID rdfs:label ?cellLabel . } """ results = self.triplestore.query_template( inputs = { 'anatomyID': anatomy_identifier }, \ outputs = [ 'cellID', 'cellLabel' ], \ template_text = text \ ) return results def cell_to_anatomy(self, cell_identifier): """ Identify anatomy terms related to cells. :param cell: CL identifier for cell type """ text = """ prefix CL: <http://purl.obolibrary.org/obo/CL_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> select distinct ?anatomyID ?anatomyLabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { graph <http://reasoner.renci.org/redundant> { ?anatomyID rdfs:subClassOf UBERON:0001062 . $cellID BFO:0000050 ?anatomyID . } ?anatomyID rdfs:label ?anatomyLabel . } """ results = self.triplestore.query_template( inputs = { 'cellID': cell_identifier }, \ outputs = [ 'anatomyID', 'anatomyLabel' ], \ template_text = text \ ) return results def phenotype_to_anatomy(self, hp_identifier): """ Identify anatomy terms related to cells. :param cell: HP identifier for phenotype """ #The subclassof uberon:0001062 ensures that the result #is an anatomical entity. text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix HP: <http://purl.obolibrary.org/obo/HP_> prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050> prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051> prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502> prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001> select distinct ?anatomy_id ?anatomy_label ?input_label from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { graph <http://reasoner.renci.org/redundant> { ?anatomy_id rdfs:subClassOf UBERON:0001062 . } ?anatomy_id rdfs:label ?anatomy_label . graph <http://reasoner.renci.org/nonredundant> { ?phenotype phenotype_of: ?anatomy_id . } graph <http://reasoner.renci.org/redundant> { $HPID rdfs:subClassOf ?phenotype . } $HPID rdfs:label ?input_label . } """ results = self.triplestore.query_template( inputs = { 'HPID': hp_identifier }, \ outputs = [ 'anatomy_id', 'anatomy_label', 'input_label'],\ template_text = text \ ) return results def anatomy_to_phenotype(self, uberon_id): text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix HP: <http://purl.obolibrary.org/obo/HP_> prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050> prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051> prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502> prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001> select distinct ?pheno_id ?anatomy_label ?pheno_label from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { $UBERONID rdfs:label ?anatomy_label . graph <http://reasoner.renci.org/nonredundant> { ?phenotype phenotype_of: $UBERONID . } graph <http://reasoner.renci.org/redundant> { ?pheno_id rdfs:subClassOf ?phenotype . } ?pheno_id rdfs:label ?pheno_label . } """ #The subclassof uberon:0001062 ensures that the result #is an anatomical entity. results = self.triplestore.query_template( inputs = { 'UBERONID': uberon_id }, \ outputs = [ 'pheno_id', 'anatomy_label', 'pheno_label'],\ template_text = text \ ) return results def get_anatomy_by_cell_graph(self, cell_node): anatomies = self.cell_to_anatomy(cell_node.id) results = [] predicate = LabeledID(identifier='BFO:0000050', label='part_of') for r in anatomies: anatomy_node = KNode(Text.obo_to_curie(r['anatomyID']), type=node_types.ANATOMY, name=r['anatomyLabel']) edge = self.create_edge(cell_node, anatomy_node, 'uberongraph.get_anatomy_by_cell_graph', cell_node.id, predicate) results.append((edge, anatomy_node)) return results def get_cell_by_anatomy_graph(self, anatomy_node): cells = self.anatomy_to_cell(anatomy_node.id) results = [] predicate = LabeledID(identifier='BFO:0000050', label='part_of') for r in cells: cell_node = KNode(Text.obo_to_curie(r['cellID']), type=node_types.CELL, name=r['cellLabel']) edge = self.create_edge(cell_node, anatomy_node, 'uberongraph.get_cell_by_anatomy_graph', anatomy_node.id, predicate) results.append((edge, cell_node)) return results def create_phenotype_anatomy_edge(self, node_id, node_label, input_id, phenotype_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') anatomy_node = KNode(Text.obo_to_curie(node_id), type=node_types.ANATOMY, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_anatomy_by_phenotype_graph', input_id, predicate) #node.name = node_label return edge, anatomy_node def create_anatomy_phenotype_edge(self, node_id, node_label, input_id, anatomy_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') phenotype_node = KNode(Text.obo_to_curie(node_id), type=node_types.PHENOTYPE, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_phenotype_by_anatomy_graph', input_id, predicate) #node.name = node_label return edge, phenotype_node def get_anatomy_by_phenotype_graph(self, phenotype_node): results = [] for curie in phenotype_node.get_synonyms_by_prefix('HP'): anatomies = self.phenotype_to_anatomy(curie) for r in anatomies: edge, node = self.create_phenotype_anatomy_edge( r['anatomy_id'], r['anatomy_label'], curie, phenotype_node) if phenotype_node.name is None: phenotype_node.name = r['input_label'] results.append((edge, node)) #These tend to be very high level terms. Let's also get their parts to #be more inclusive. #TODO: there ought to be a more principled way to take care of this, but #it highlights the uneasy relationship between the high level world of #smartapi and the low-level sparql-vision. part_results = self.get_anatomy_parts(r['anatomy_id']) for pr in part_results: pedge, pnode = self.create_phenotype_anatomy_edge( pr['part'], pr['partlabel'], curie, phenotype_node) results.append((pedge, pnode)) return results def get_phenotype_by_anatomy_graph(self, anatomy_node): results = [] for curie in anatomy_node.get_synonyms_by_prefix('UBERON'): phenotypes = self.anatomy_to_phenotype(curie) for r in phenotypes: edge, node = self.create_anatomy_phenotype_edge( r['pheno_id'], r['pheno_label'], curie, anatomy_node) if anatomy_node.name is None: anatomy_node.name = r['anatomy_label'] results.append((edge, node)) return results
class OntologicalHeirarchy(Service): """ Service that makes call to uberongraph to resolve subclass relationships between ontological terms """ def __init__(self, context): super(OntologicalHeirarchy, self).__init__("ontological_hierarchy", context) self.triplestore = TripleStore(self.url) self.prefix_set = { node_types.DISEASE_OR_PHENOTYPIC_FEATURE: ['HP', 'MONDO'], node_types.CELLULAR_COMPONENT: ['CL'], node_types.BIOLOGICAL_PROCESS_OR_ACTIVITY: ['GO'], node_types.ANATOMICAL_ENTITY: ['UBERON'], node_types.CHEMICAL_SUBSTANCE: ['CHEBI'] } self.root_uris = { node_types.ANATOMICAL_ENTITY: "<http://purl.obolibrary.org/obo/UBERON_0001062>", node_types.DISEASE: "<http://purl.obolibrary.org/obo/MONDO_0000001>", node_types.MOLECULAR_ACTIVITY: "<http://purl.obolibrary.org/obo/GO_0003674>", node_types.BIOLOGICAL_PROCESS: "<http://purl.obolibrary.org/obo/GO_0008150>", node_types.CHEMICAL_SUBSTANCE: "<http://purl.obolibrary.org/obo/CHEBI_24431>", node_types.PHENOTYPIC_FEATURE: "<http://purl.obolibrary.org/obo/HP_0000118>", node_types.CELL: "http://purl.obolibrary.org/obo/CL_0000000", node_types.CELLULAR_COMPONENT: "http://purl.orolibrary.org/obo/GO_0005575" } obo_prefixes = '\n'.join([ f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>' for pref in set( reduce(lambda x, y: x + y, self.prefix_set.values(), [])) ]) self.query = f""" {obo_prefixes} PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> select distinct ?parent_id ?label where {{ graph <http://reasoner.renci.org/ontology/closure> {{ $child_curie rdfs:subClassOf ?parent_id . ?parent_id rdfs:subClassOf $root_uri . }} graph <http://reasoner.renci.org/ontology>{{ ?parent_id rdfs:label ?label. }} }} """ def term_get_ancestors(self, child_node): root_uri = self.root_uris.get(child_node.type, None) if not root_uri: return [] ### # Query does have an upper bound so for ontologies that start from # # Step 1 get prefixes that are supported for input node curie_set = set() for node_type in child_node.export_labels: ps = self.prefix_set.get(node_type, []) for prefix in ps: synonyms = child_node.get_synonyms_by_prefix(prefix) curie_set.update(synonyms) # Step 2 get parents for those curies we support from uberon graph outputs = [] for curie in curie_set: results = self.triplestore.query_template( template_text=self.query, inputs={ 'child_curie': curie, 'root_uri': root_uri }, outputs=['parent_id', 'label']) for row in results: # Output type would be same as input type? ancestor_node = KNode(Text.obo_to_curie(row['parent_id']), name=row['label'], type=child_node.type) if ancestor_node.id == child_node.id: # refrain from adding edge to the node itself continue predicate = LabeledID(identifier='rdfs:subClassOf', label='subclass of') edge = self.create_edge( source_node=child_node, target_node=ancestor_node, predicate=predicate, provided_by='uberongraph.term_get_ancestors', input_id=child_node.id) outputs.append((edge, ancestor_node)) return outputs