Python TripleStore.query_templateの例

プログラミング言語: Python

名前空間/パッケージ名: greent.triplestore

クラス/型: TripleStore

メソッド/関数: query_template

hotexamples.comのコード掲載数: 9

Python TripleStore.query_template - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgreent.triplestore.TripleStore.query_templateの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

TripleStore(9)

query_template(9)

execute_query(4)

get_template(1)

コード例 #1

ファイルを表示

class MeshKS(Service):
    def __init__(self, context, url):
        super(MeshKS, self).__init__("mesh", context)
        self.triplestore = TripleStore(self.url)

    def get_label_by_id(self, term_id):
        result = self.triplestore.query_template(inputs={"term_id": term_id},
                                                 outputs=['label'],
                                                 template_text="""
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
            PREFIX mesh: <http://id.nlm.nih.gov/mesh/>
            PREFIX mesh2015: <http://id.nlm.nih.gov/mesh/2015/>
            PREFIX mesh2016: <http://id.nlm.nih.gov/mesh/2016/>
            PREFIX mesh2017: <http://id.nlm.nih.gov/mesh/2017/>
            PREFIX mesh2018: <http://id.nlm.nih.gov/mesh/2018/>
            SELECT DISTINCT ?label
            FROM <http://id.nlm.nih.gov/mesh>
            WHERE {
               VALUES (?id) { ( $term_id ) }
               ?id rdfs:label ?label .
            }
            ORDER BY ?label
            """)

        return list(map(lambda r: {'label': r['label']}, result))

コード例 #2

ファイルを表示

class MeSH(object):
    def __init__(self, uri="http://id.nlm.nih.gov/mesh/sparql"):
        self.triplestore = TripleStore (uri)
    def get_broader (self, term):
        return self.triplestore.query_template (
            inputs={ "term" : term, "prefixes" : self.get_prefixes () },
            outputs= [ "obj", "name" ],
            template_text="""
            $prefixes
            SELECT DISTINCT ?obj ?name ?itemName FROM  <http://id.nlm.nih.gov/mesh>
            WHERE {
               ?item  meshv:broaderDescriptor ?obj ;
                      rdfs:label              ?itemName.
               ?obj   rdfs:label              ?name .
              filter (regex(lcase(str(?itemName)), lcase(str("$term"))))
            } 
            ORDER BY ?p
            """)

    """
    SELECT DISTINCT ?obj ?name FROM  <http://id.nlm.nih.gov/mesh>
            WHERE {
               $term   meshv:broaderDescriptor ?obj .
               ?obj    rdfs:label              ?name .
            } 
            ORDER BY ?p
            """
    def get_prefixes (self):
        return """

コード例 #3

ファイルを表示

ファイル: ontology.py プロジェクト: NCATS-Tangerine/reasoner-tools

class GenericOntology(Service):
    """ Sure, don't just dig around in obo files they say. But when the SPARQL is dry, we will drink straight from the obo if need be. """
    def __init__(self, context, obo):
        """ Load an obo file. """
        super(GenericOntology, self).__init__("go", context)
        self.url = context.config.get_service('uberongraph').get("url", None)
        self.triplestore = TripleStore(self.url)
        self.sparql_wrapper = SPARQLWrapper(self.url)
        curie_prefix_map = Curie_Resolver.get_curie_to_uri_map()
        self.resolve_uri = Curie_Resolver.uri_to_curie
        self.ontology_prefixes = list(
            map(lambda x: f'PREFIX {x}: <{curie_prefix_map[x]}>',
                curie_prefix_map))

    def add_sparql_prefixes(self, query_template):
        return '\n'.join(self.ontology_prefixes) + '\n' + query_template

    def run_sparql_query_raw(self, query):
        query = self.add_sparql_prefixes(query)
        self.sparql_wrapper.setQuery(query)
        self.sparql_wrapper.setReturnFormat(JSON)
        results = self.sparql_wrapper.query().convert()
        return results

    def query_sparql(self, query_template, inputs, outputs):
        # prepend prefixes here to avoid every one doing the same thing
        q = self.add_sparql_prefixes(query_template)
        logger.error(q)
        return self.triplestore.query_template(template_text=q,
                                               inputs=inputs,
                                               outputs=outputs)

    def label(self, identifier):
        """Return the label for an identifier"""
        query_text = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            SELECT DISTINCT ?labels
            FROM     <http://reasoner.renci.org/ontology>
            WHERE {{    
               $identifier rdfs:label ?labels.               
            }}
        """
        results = self.query_sparql(query_text,
                                    inputs={'identifier': identifier},
                                    outputs=['labels'])
        return results[0]["labels"] if len(results) > 0 else ''

    def is_a(self, identifier, ancestors):
        """Determine whether a term has a particular ancestor"""
        query_template = lambda ancestor: f"""
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            ASK
            FROM <http://reasoner.renci.org/ontology/closure>
            FROM <http://reasoner.renci.org/ontology>
            WHERE {{    
                {identifier} rdfs:subClassOf {ancestor}.               
            }}
            """
        result = []
        for ancestor in ancestors.split(','):
            ancestor = ancestor.strip(' ')
            response = self.run_sparql_query_raw(query_template(ancestor))
            if response['boolean']:
                result.append(ancestor)
        is_a = len(result) > 0
        return is_a, result

    def single_level_is_a(self, identifier):
        """ Get single-level 'is_a' descendants. """
        query_text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
            SELECT DISTINCT ?descendant ?descendant_id
            FROM     <http://reasoner.renci.org/ontology>
            WHERE {    
              ?descendant rdfs:subClassOf  $identifier
              OPTIONAL {                
                ?descendant ID: ?descendant_id
              }
            }
            """
        results = self.query_sparql(query_text,
                                    inputs={'identifier': identifier},
                                    outputs=['descendant', 'descendant_id'])
        response_curies = reduce(
            lambda x, y: x + [
                y['descendant_id']
                if 'descendant_id' in y else self.resolve_uri(y['descendant'])
            ], results, [])
        return response_curies

    def descendants(self, identifier):
        """ This is also known as a recursive-'is_a' function, returning all levels below the input"""
        query_text = f"""
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
            SELECT DISTINCT ?descendant ?descendant_id
            FROM <http://reasoner.renci.org/ontology/closure>
            WHERE {{    
                ?descendant rdfs:subClassOf $identifier.
                OPTIONAL {{ ?descendant ID: ?descendant_id. }}
            }}
            """
        results = self.query_sparql(query_template=query_text,
                                    inputs={'identifier': identifier},
                                    outputs=['descendant', 'descendant_id'])

        result_list = reduce(
            lambda x, y: x + [
                y['descendant_id']
                if 'descendant_id' in y else self.resolve_uri(y['descendant'])
            ], results, [])
        return result_list

    def ancestors(self, identifier):
        """ This is also known as a recursive-'is_a' function, returning all levels below the input"""
        query_text = f"""
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
            SELECT DISTINCT ?ancestor ?ancestor_id
            FROM <http://reasoner.renci.org/ontology/closure>
            WHERE {{    
                $identifier  rdfs:subClassOf ?ancestor.
                OPTIONAL {{
                    ?ancestor ID: ?ancestor_id.
                }}
            }}
            """
        results = self.query_sparql(query_template=query_text,
                                    inputs={'identifier': identifier},
                                    outputs=['ancestor', 'ancestor_id'])
        result_list = reduce(
            lambda x, y: x + [
                y['ancestor_id']
                if 'ancestor_id' in y else self.resolve_uri(y['ancestor'])
            ], results, [])
        return result_list

    def xrefs(self, identifier):
        """ Get external references. """
        query_text = f"""prefix xref: <http://www.geneontology.org/formats/oboInOwl#hasDbXref>
        SELECT DISTINCT ?xrefs
            FROM     <http://reasoner.renci.org/ontology>
            WHERE {{    
                $identifier xref: ?xrefs
            }}
        """
        results = self.query_sparql(query_template=query_text,
                                    inputs={'identifier': identifier},
                                    outputs=['xrefs'])
        results = reduce(lambda x, y: x + [y['xrefs']], results, [])
        return results

    def synonyms(self, identifier, curie_pattern=None):
        """ Get synonyms. """
        query_template = lambda predicate: f"""
        PREFIX RELATED_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym>
        PREFIX EXACT_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasExactSynonym>
        PREFIX XREF: <http://www.geneontology.org/formats/oboInOwl#hasDbXref>
        PREFIX DEFENITION: <http://purl.obolibrary.org/obo/IAO_0000115>
        SELECT DISTINCT ?desc ?xref ?defn
        WHERE {{
            $identifier {predicate} ?desc.
            OPTIONAL {{
                ?desc XREF: ?xref.
                ?desc DEFENITION: ?defn.
             }}
        }}
        """
        exact = self.query_sparql(
            query_template=query_template("EXACT_SYNONYM:"),
            inputs={'identifier': identifier},
            outputs=['desc', 'xref', 'defn'])
        for row in exact:
            row['scope'] = 'EXACT'
        related = self.query_sparql(
            query_template=query_template("RELATED_SYNONYM:"),
            inputs={'identifier': identifier},
            outputs=['desc', 'xref', 'defn'])
        for row in related:
            row['scope'] = 'RELATED'
        return exact + related

    def search(self, text, is_regex=False, ignore_case=True):
        """ Search for the text, treating it as a regular expression if indicated. """
        search_string = text
        if is_regex and ignore_case:
            filtr = f"""
                (
                   regex(str(?definition), "$search_string","i") || 
                   regex(str(?label), "$search_string","i") ||
                   regex(str(?related_synonym), "$search_string","i") ||
                   regex(str(?exact_synonym), "$search_string","i")
                )"""
        elif is_regex and not ignore_case:
            filtr = f"""
                (
                   regex(str(?definition), "$search_string") || 
                   regex(str(?label), "$search_string") ||
                   regex(str(?related_synonym), "$search_string") ||
                   regex(str(?exact_synonym), "$search_string")
                )
            """
        elif not is_regex and ignore_case:
            search_string = search_string.lower()
            filtr = f"""
                (
                    lcase(str(?label))= "$search_string" ||
                    lcase(str(?definition))= "$search_string" ||
                    lcase(str(?related_synonym))= "$search_string" ||
                    lcase(str(?exact_synonym))= "$search_string" 
                )
                    """
        else:
            filtr = f"""
                (
                    str(?label) = "$search_string" ||
                    str(?definition) = "$search_string" ||
                    str(?exact_synonym) = "$search_string" ||
                    str(?related_synonym) = "$search_string" 
                )"""

        query_text = f"""
        PREFIX DEFINED_BY: <http://www.w3.org/2000/01/rdf-schema#isDefinedBy>
        PREFIX DEFINITION: <http://purl.obolibrary.org/obo/IAO_0000115>
        PREFIX RELATED_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym>
        PREFIX EXACT_SYNONYM: <http://www.geneontology.org/formats/oboInOwl#hasExactSynonym>        
        SELECT DISTINCT ?id ?label ?definition ?defined_by        
        WHERE {{
            OPTIONAL{{
                ?id EXACT_SYNONYM: ?exact_synonym.                
            }}
            OPTIONAL {{
                ?id RELATED_SYNONYM: ?related_synonym.
            }}          
            OPTIONAL {{
                ?id rdfs:label ?label.
            }}
            OPTIONAL {{
                ?id DEFINITION: ?definition.
            }}
            OPTIONAL {{
                ?id DEFINED_BY: ?defined_by.
            }}
            FILTER {filtr}.
        }}
        """
        response = self.query_sparql(
            query_template=query_text,
            inputs={'search_string': search_string},
            outputs=['id', 'label', 'defined_by', 'definition'])
        for row in response:
            row['id'] = Curie_Resolver.uri_to_curie(row['id'])
        return response

    def lookup(self, identifier):
        """ Given an identifier, find ids in the ontology for which it is an xref. """
        assert identifier and ':' in identifier, "Must provide a valid curie. Curie must have format " \
                                                 "<PREFIX>:<ID>"
        query_template = """
        PREFIX XREF: <http://www.geneontology.org/formats/oboInOwl#hasDbXref>
        PREFIX LABEL: <http://www.w3.org/2000/01/rdf-schema#label>
        PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
                SELECT DISTINCT  ?xrefs ?term_id ?term_label
                    FROM     <http://reasoner.renci.org/ontology>
                    WHERE {
                    ?term XREF: ?o;
                            XREF: ?xrefs;
                            ID: ?term_id;
                            LABEL: ?term_label.
                    FILTER(?o = '$identifier' && !isBlank(?term)).               
                    } 
        """
        result = self.query_sparql(query_template=query_template,
                                   inputs={'identifier': identifier},
                                   outputs=['xrefs', 'term_id', 'term_label'])
        response = []
        buffer = {}
        for row in result:
            if row['term_id'] not in buffer:
                buffer[row['term_id']] = {
                    'label': row['term_label'],
                    'xrefs': []
                }
            xref = row['xrefs']
            buffer[row['term_id']]['xrefs'] += [
                row['xrefs']
            ] if row['xrefs'] not in buffer[row['term_id']]['xrefs'] else []
        for term_id in buffer:
            response.append({
                'id': term_id,
                'label': buffer[term_id]['label'],
                'xrefs': buffer[term_id]['xrefs']
            })
        return response

    def id_list(self, identifier):
        identifier_uri = Curie_Resolver.get_curie_to_uri_map().get(
            identifier.upper(), None)
        if identifier_uri == None:
            return []
        query = f"""
                PREFIX TYPE: <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
                PREFIX CLASS: <http://www.w3.org/2002/07/owl#Class>
                PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
                SELECT DISTINCT ?term ?term_id
                FROM <http://reasoner.renci.org/ontology>
                        WHERE {{
                        ?term TYPE: CLASS:.
                        FILTER (strstarts(lcase(str(?term)), lcase("$identifier")))
                        OPTIONAL {{
                            ?term ID: ?term_id #try to get the id from sparql else parse ?? 
                        }}
                        }} 
                """
        result = self.query_sparql(query_template=query,
                                   inputs={'identifier': identifier_uri},
                                   outputs=['term', 'term_id'])
        return reduce(
            lambda x, y: x +
            [y['term_id']
             if 'term_id' in y else self.resolve_uri(y['term'])], result, [])

    def exactMatch(self, identifier):
        #if ontolgies are missing we should be looking here
        query_string = lambda predicate: f"""
            PREFIX EXACT_MATCH: <http://www.w3.org/2004/02/skos/core#exactMatch>
            PREFIX EQUIVALENT_CLASS: <http://www.w3.org/2002/07/owl#equivalentClass>
            PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
            SELECT DISTINCT ?match ?match_id
            FROM <http://reasoner.renci.org/ontology>
                    WHERE {{
                     $identifier {predicate} ?match.      
                     OPTIONAL {{
                         ?match ID: ?match_id.
                     }} 
                     FILTER (!isBlank(?match)) #This sometimes returns blank nodes         
                    }} 
            """
        result = reduce(
            lambda x, y: x + [
                y['match_id']
                if 'match_id' in y else self.resolve_uri(y['match'])
            ],
            self.query_sparql(query_template=query_string('EXACT_MATCH:'),
                              inputs={'identifier': identifier},
                              outputs=['match', 'match_id']), [])
        result += list(
            filter(
                lambda x: x not in result,
                reduce(
                    lambda x, y: x + [
                        y['match_id']
                        if 'match_id' in y else self.resolve_uri(y['match'])
                    ],
                    self.query_sparql(
                        query_template=query_string('EQUIVALENT_CLASS:'),
                        inputs={'identifier': identifier},
                        outputs=['match', 'match_id']), [])))
        return result

    def closeMatch(self, identifier):
        query_template = """
            PREFIX CLOSE_MATCH: <http://www.w3.org/2004/02/skos/core#closeMatch>
            PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
            SELECT DISTINCT ?match ?match_id
            FROM <http://reasoner.renci.org/ontology>
                    WHERE {
                     $identifier CLOSE_MATCH: ?match.      
                     OPTIONAL {
                         ?match ID: ?match_id.
                     } 
                     FILTER (!isBlank(?match)) #This sometimes returns blank nodes         
                    } 
        """
        results = reduce(
            lambda x, y: x + [
                y['match_id']
                if 'match_id' in y else self.resolve_uri(y['match'])
            ],
            self.query_sparql(query_template=query_template,
                              inputs={'identifier': identifier},
                              outputs=['match', 'match_id']), [])
        return results

    def subterms(self, identifier):
        return self.descendants(identifier)

    def superterms(self, identifier):
        return self.ancestors(identifier)

    def parents(self, identifier):
        """First generation ancestors"""
        query_template = f"""PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX ID: <http://www.geneontology.org/formats/oboInOwl#id>
            SELECT DISTINCT ?parent ?parent_id
            FROM     <http://reasoner.renci.org/ontology>
            WHERE {{    
             $identifier  rdfs:subClassOf  ?parent.
             OPTIONAL {{
                 ?parent ID: ?parent_id
             }} 
            FILTER(!isBlank(?parent))
            }}"""
        result = reduce(
            lambda x, y: x + [
                y['parent_id']
                if 'parent_id' in y else self.resolve_uri(y['parent'])
            ],
            self.query_sparql(query_template=query_template,
                              inputs={'identifier': identifier},
                              outputs=['parent', 'parent_id']), [])
        return result

    def children(self, identifier):
        """first generation descedants"""
        result = self.single_level_is_a(identifier)
        return result

    def siblings(self, identifier):
        """
        Common parents 
        """
        parents = self.parents(identifier)
        sibilings = []
        for parent in parents:
            sibilings += list(
                filter(
                    lambda x: x != identifier and x not in sibilings,
                    self.children(parent if 'http' not in
                                  parent else f'<{parent}>')))
        return sibilings

    def property_value(self, identifier, property_key):
        """ Get properties """
        query_template = """
        SELECT ?property_value 
        WHERE {
            $identifier <$property_key> ?property_value.
        }
        """
        result = self.query_sparql(query_template=query_template,
                                   inputs={
                                       'identifier': identifier,
                                       'property_key': property_key
                                   },
                                   outputs=['property_value'])
        response = reduce(lambda x, y: x + [y['property_value']], result, [])
        if len(response) == 1:
            return response[0]
        else:
            return response

    def all_properties(self, identifier):
        """ Get ALL properties for a CURIE """
        query_template = """
        SELECT ?property_key ?property_value ?property_label
        FROM <http://reasoner.renci.org/ontology>
        WHERE
         {
            $identifier ?property_key ?property_value.
            OPTIONAL {
                ?property_key rdfs:label ?property_label.
                }
            FILTER (!isBlank(?property_value))
        }
        """
        results = self.query_sparql(
            query_template=query_template,
            inputs={'identifier': identifier},
            outputs={'property_value', 'property_key', 'property_label'})
        # group it by property label for those which have label
        grouped = {}
        for row in results:
            label = row['property_label'] if 'property_label' in row else None
            key = row['property_key']
            if key not in grouped:
                grouped[key] = {'property_label': label, 'property_values': []}
            if row['property_value'] not in grouped[key]['property_values']:
                grouped[key]['property_values'].append(row['property_value'])
            for key in grouped:
                grouped[key].update({'property_key': key})
        return list(map(lambda x: grouped[x], grouped))

コード例 #4

ファイルを表示

class ChemBioKS(Service):
    """ Generic service endpoints for medical and bio-chemical data. This set
        comprises portions of chem2bio2rdf (CTD, KEGG, PubChem, DRUGBANK) """
    def __init__(self, context):  #triplestore):
        super(ChemBioKS, self).__init__("chembio", context)
        self.triplestore = TripleStore(self.url)

    def query_chembio(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def get_exposure_conditions(self, chemicals):
        """ Identify conditions (MeSH IDs) triggered by the specified stressor
            agent ids (also MeSH IDs).

        :param chemicals: List of IDs for substances of interest.
        :type chemicals: list of MeSH IDs, eg. D052638
        """
        id_list = ' '.join(
            list(map(lambda d: "( mesh:{0} )".format(d), chemicals)))
        text = self.triplestore.get_template ("ctd_gene_expo_disease").\
            safe_substitute (chemicals=id_list)
        results = self.triplestore.execute_query(text)
        return list(
            map(
                lambda b: {
                    "chemical": b['chemical'].value,
                    "gene": b['gene'].value,
                    "pathway": b['kegg_pathway'].value,
                    "pathName": b['pathway_name'].value,
                    "pathID": b['pathway_id'].value,
                    "human": '(human)' in b['pathway_name'].value
                }, results.bindings))

    def get_drugs_by_condition(self, conditions):
        """ Get drugs associated with a set of conditions.

        :param conditions: Conditions to find associated drugs for.
        :type conditions: List of MeSH IDs for conditions, eg.: D001249
        """
        if not isinstance(conditions, list):
            conditions = [conditions]

        conditions = list(
            map(lambda v: v.replace("MESH:", "mesh:"), conditions))
        prefix = "mesh:"
        if any(map(lambda v: v.startswith(prefix), conditions)):
            prefix = ""
        condition_list = ', '.join(
            list(map(lambda d: " {0}{1} ".format(prefix, d), conditions)))
        result = self.triplestore.query_template(
            inputs={"diseaseIds": condition_list.lower()},
            outputs=[
                'drugID', 'drugGenericName', 'pubChemCID', 'diseasePMIDs'
            ],
            template_text="""
            prefix mesh:           <http://bio2rdf.org/mesh:> 
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
            prefix db_resource:    <http://chem2bio2rdf.org/drugbank/resource/>
            select ?drugID ?drugGenericName ?diseasePMIDs ?ctdChemDis ?pubChemCID where {
               values ( ?diseaseId ) { ( $diseaseIds ) }
               ?ctdChemDis  ctd:cid                        ?pubChemCID;
                            ctd:diseaseid                  ?diseaseId;
                            ctd:pubmedids                  ?diseasePMIDs.
               ?dbInter     db_resource:Name               ?name ;
	                    db_resource:DBID               ?drugID .
               ?drugID      db_resource:CID                ?pubChemCID ;
  	                    db_resource:Generic_Name       ?drugGenericName .
            }""")
        return result

    def get_drugs_by_condition_graph(self, conditions):
        drugs = self.get_drugs_by_condition(conditions.identifier)
        results = []
        for r in drugs:
            edge = KEdge('c2b2r', 'conditionToDrug', {
                'cid': r['pubChemCID'],
                'pmids': r['diseasePMIDs']
            })
            node = KNode(
                r['drugID'].split('/')[-1:][0],
                #"http://chem2bio2rdf.org/drugbank/resource/drugbank_drug",
                node_types.DRUG,
                r['drugGenericName'])
            results.append((edge, node))
        #logger.debug ("chembio drugs by condition: {}".format (results))
        return results

    def get_genes_pathways_by_disease(self, diseases):
        """ Get genes and pathways associated with specified conditions.

        :param diseases: List of conditions designated by MeSH ID.
        :return: Returns a list of dicts containing gene and path information.
        """
        diseaseMeshIDList = ' '.join(
            list(map(lambda d: "( mesh:{0} )".format(d), diseases)))
        text = self.triplestore.get_template(
            "genes_pathways_by_disease").safe_substitute(
                diseaseMeshIDList=diseaseMeshIDList)
        results = self.triplestore.execute_query(text)
        return list(
            map(
                lambda b: {
                    "uniprotGene": b['uniprotGeneID'].value,
                    "keggPath": b['keggPath'].value,
                    "pathName": b['pathwayName'].value,
                    "human": '(human)' in b['pathwayName'].value
                }, results.bindings))

    def get_drug_gene_disease(self, disease_name, drug_name):
        """ Identify targets and diseases assocaited with a drug name.
        :param disease_name: MeSH name of a disease condition.
        :type str: String
        :param drug_name: Name of a drug.
        :type str: String
        """
        text = self.triplestore.get_template(
            "drug_gene_disease").safe_substitute(diseaseName=disease_name,
                                                 drugName=drug_name)
        results = self.triplestore.execute_query(text)
        return list(
            map(
                lambda b: {
                    "uniprotSymbol": b['uniprotSym'].value,
                    "diseaseId": b['diseaseID'].value
                }, results.bindings))

    def pubchem_to_ncbigene(self, pubchemID):
        result = self.triplestore.query_template(
            inputs={"pubchemID": "pubchem:{}".format(pubchemID)},
            outputs=[
                'NCBIGene', 'meshID', 'interaction', 'interactionTypes',
                'pubmedids'
            ],
            template_text="""
            prefix pubchem:        <http://chem2bio2rdf.org/pubchem/resource/pubchem_compound/>
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
	    select distinct ?NCBIGene ?meshID ?interaction ?interactionTypes ?pubmedids where {
  		?ctdChemGene 	ctd:cid                     $pubchemID;
               			ctd:chemicalid              ?meshID ;
                                ctd:geneid                  ?NCBIGene;
                                ctd:interaction             ?interaction;
                                ctd:interactiontypes        ?interactionTypes;
                                ctd:pubmedids               ?pubmedids.
            }""")
        return list(
            map(
                lambda r: {
                    'NCBIGene': r['NCBIGene'],
                    'meshID': r['meshID'],
                    'interaction': r['interaction'],
                    'interactionTypes': r['interactionTypes'],
                    'pubmedids': r['pubmedids']
                }, result))

    def drug_name_to_gene_symbol(self, drug_name):
        result = self.triplestore.query_template(
            inputs={"drugName": drug_name},
            outputs=['uniprotSym', 'pmids', 'drugID'],
            template_text="""
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
            prefix db_resource:    <http://chem2bio2rdf.org/drugbank/resource/>
            select ?drugGenericName ?pmids ?drugID ?uniprotSym where {
               values ( ?drugName ) { ( "$drugName" ) }
               ?ctdChemGene ctd:cid                        ?pubChemCID;
                            ctd:pubmedids                  ?pmids;
                            ctd:gene                       ?uniprotSym .
               ?drugID      db_resource:CID                ?pubChemCID ;
  	                    db_resource:Generic_Name       ?drugGenericName .
               filter regex(lcase(str(?drugGenericName)), lcase(?drugName))
            }""")
        return list(
            map(
                lambda r: {
                    'uniprotSym': r['uniprotSym'],
                    'pmids': r.get('pmids', None),
                    'drugID': r['drugID']
                }, result))

    def drugname_to_pubchem(self, drug_name):
        result = self.triplestore.query_template(
            inputs={"drugName": drug_name},
            outputs=['pubChemID', 'drugGenericName'],
            template_text="""
            prefix db_resource:    <http://chem2bio2rdf.org/drugbank/resource/>
            select distinct ?pubChemID ?drugGenericName where {
               values ( ?drugName ) { ( "$drugName" ) }
               ?drugID      db_resource:CID                ?pubChemID ;
  	                    db_resource:Generic_Name       ?drugGenericName .
               filter regex(lcase(str(?drugGenericName)), lcase(?drugName))
            }""")
        return list(
            map(
                lambda r: {
                    'drugID': r['pubChemID'],
                    'drugName': r['drugGenericName']
                }, result))

    def gene_symbol_to_pathway(self, uniprot_symbol):
        return self.triplestore.query_template(
            inputs={"uniprotSymbol": uniprot_symbol},
            outputs=["keggPath"],
            template_text="""
            prefix kegg:           <http://chem2bio2rdf.org/kegg/resource/>
            prefix pharmgkb:       <http://chem2bio2rdf.org/pharmgkb/resource/>
            prefix ctd:            <http://chem2bio2rdf.org/ctd/resource/>
            select ?ctdGene ?uniprotID ?pathwayName ?keggPath where {
               values ( ?ctdGene ) { ( <$uniprotSymbol> ) }
               ?keggPath    kegg:protein    ?uniprotID ; kegg:Pathway_name ?pathwayName .
               ?pharmGene   pharmgkb:Symbol ?ctdGene ; pharmgkb:UniProt_Id ?uniprotID.
               ?ctdChemGene ctd:gene        ?ctdGene.
            } LIMIT 500
            """)

    def uniprot_to_hgnc(self, uniprot_symbol):
        return self.triplestore.query_template(
            inputs={"uniprotID": Text.un_curie(uniprot_symbol.identifier)},
            outputs=["hgncID"],
            template_text="""
            prefix uniprot:    <http://chem2bio2rdf.org/uniprot/resource/gene/>
            prefix owl:        <http://www.w3.org/2002/07/owl#>
            prefix hgnc:       <http://chem2bio2rdf.org/rdf/resource/hgnc/>
            select distinct ?hgncID where {
               values ( ?uniprotID ) { ( uniprot:${uniprotID} ) }
               ?uniprotID <http://www.w3.org/2002/07/owl#sameAs> ?hgncID.
               filter ( strstarts (str(?hgncID), "http://bio2rdf.org/gene:"))
            }
            """)

    def graph_uniprot_to_hgnc(self, uniprot_symbol):
        result = self.uniprot_to_hgnc(uniprot_symbol)
        return [(self.get_edge(r, predicate='synonym'),
                 KNode('HGNC:{0}'.format(r['hgncID'].split(':')[-1]),
                       node_types.GENE)) for r in result]

    def graph_get_genes_by_disease(self, disease):  #reasoner
        disease = disease.identifier.split(':')[1].lower()
        response = self.get_genes_pathways_by_disease([disease])
        results = []
        for r in response:
            edge = KEdge('c2b2r', 'diseaseToGene', {'keggPath': r['keggPath']})
            node = KNode(
                "UNIPROT:{0}".format(r['uniprotGene'].split('/')[-1:][0]),
                node_types.GENE)
            results.append((edge, node))
        return results

    @cachier(stale_after=datetime.timedelta(days=20))
    def graph_get_pathways_by_gene(self, gene):  #reasoner
        response = self.triplestore.query_template(
            inputs={"gene": gene.identifier.split(':')[1].upper()},
            outputs=['keggPath'],
            template_text="""
            prefix kegg:      <http://chem2bio2rdf.org/kegg/resource/>
            prefix drugbank:  <http://chem2bio2rdf.org/drugbank/resource/>
            prefix uniprot:   <http://chem2bio2rdf.org/uniprot/resource/gene/>
            prefix ctd:       <http://chem2bio2rdf.org/ctd/resource/>
            prefix mesh:      <http://bio2rdf.org/mesh:>
            select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where {
               ?keggPath    kegg:protein                ?swissProtID ;
                            kegg:Pathway_name           ?pathwayName .
               ?keggInter   kegg:cid                    ?pubchemCID .
               ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                            drugbank:SwissProt_ID       ?swissProtID ;
                            drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID ;
                            drugbank:Generic_Name       ?drugGenericName .
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
               values ( ?uniprotGeneID ) {
                  ( uniprot:$gene )
               }
            } LIMIT 2000""")
        results = []
        for r in response:
            edge = KEdge('c2b2r', 'geneToPathway', {})
            node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]),
                         node_types.PATHWAY)
            results.append((edge, node))
        return results

    def graph_drugname_to_gene_symbol(self, drug_name_node):
        drug_name = Text.un_curie(drug_name_node.identifier)
        response = self.drug_name_to_gene_symbol(drug_name)
        results = []
        for r in response:
            edge = self.get_edge(r, predicate="targets")
            node = KNode("UNIPROT:{0}".format(Text.path_last(r['uniprotSym'])),
                         node_types.GENE)
            results.append((edge, node))
        return results

    def graph_name_to_drugbank(self, drug_name_node):
        drug_name = Text.un_curie(drug_name_node.identifier)
        response = self.drug_name_to_gene_symbol(drug_name)
        results = []
        for r in response:
            edge = self.get_edge(r, predicate="drugname")
            node = KNode ("DRUGBANK:{0}".format (Text.path_last (r['drugID'])), \
                          node_types.DRUG, \
                          label=r['drugName'])
            results.append((edge, node))
        return results

    def graph_get_pathways_by_gene(self, gene):  #reasoner
        response = self.triplestore.query_template(
            inputs={"gene": gene.identifier.split(':')[1].upper()},
            outputs=['keggPath'],
            template_text="""
            prefix kegg:      <http://chem2bio2rdf.org/kegg/resource/>
            prefix drugbank:  <http://chem2bio2rdf.org/drugbank/resource/>
            prefix uniprot:   <http://chem2bio2rdf.org/uniprot/resource/gene/>
            prefix ctd:       <http://chem2bio2rdf.org/ctd/resource/>
            prefix mesh:      <http://bio2rdf.org/mesh:>
            select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where {
               ?keggPath    kegg:protein                ?swissProtID ;
                            kegg:Pathway_name           ?pathwayName .
               ?keggInter   kegg:cid                    ?pubchemCID .
               ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                            drugbank:SwissProt_ID       ?swissProtID ;
                            drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID ;
                            drugbank:Generic_Name       ?drugGenericName .
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
               values ( ?uniprotGeneID ) {
                  ( uniprot:$gene )
               }
            } LIMIT 2000""")
        results = []
        for r in response:
            edge = KEdge('c2b2r', 'geneToPathway', {})
            node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]),
                         node_types.PATHWAY)
            results.append((edge, node))
        return results

    def graph_drugbank_to_uniprot(self, drugbank):
        response = self.triplestore.query_template(inputs={
            "drugID":
            "DB{0}".format(Text.un_curie(drugbank.identifier))
        },
                                                   outputs=["uniprotGeneID"],
                                                   template_text="""
            prefix drugbank:      <http://chem2bio2rdf.org/drugbank/resource/>
            prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/>
            prefix ctd:           <http://chem2bio2rdf.org/ctd/resource/>
            select distinct ?uniprotGeneID where {
               values ( ?drugID ) { ( drugbank_drug:${drugID} ) }
               ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                            drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID ;
                            drugbank:Generic_Name       ?drugGenericName .
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
            }""")
        return [
            (self.get_edge(r, predicate='targets'),
             KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]),
                   node_types.GENE)) for r in response
        ]

    def graph_diseasename_to_uniprot(self, disease):
        results = []
        response = self.triplestore.query_template(
            inputs={"diseaseName": Text.un_curie(disease.identifier)},
            outputs=["pubChemCID"],
            template_text="""
            prefix ctd: <http://chem2bio2rdf.org/ctd/resource/>
            select distinct ?pubChemCID where {
               values ( ?diseaseName ) { ( "$diseaseName" ) }
               ?ctdChemDis  ctd:cid         ?pubChemCID;
                            ctd:diseasename ?diseaseNameRec.
               filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName))
            } LIMIT 1""")
        if len(response) > 0:  # This is a disease.
            response = self.triplestore.query_template(
                inputs={"diseaseName": Text.un_curie(disease.identifier)},
                outputs=["disPmids", "chemPmids", "uniprotSym"],
                template_text="""
                prefix ctd: <http://chem2bio2rdf.org/ctd/resource/>
                select ?disPmids ?ctdChemDis ?chemPmids ?uniprotSym ?diseaseId where {
                  values ( ?diseaseName ) { ( "$diseaseName" ) }
                  ?ctdChemGene ctd:cid         ?pubChemCID;
                               ctd:pubmedids   ?chemPmids;
                               ctd:gene        ?uniprotSym.
                  ?ctdChemDis  ctd:cid         ?pubChemCID;
                               ctd:diseaseid   ?diseaseId;
                               ctd:diseasename ?diseaseNameRec;
                               ctd:pubmedids   ?disPmids.
                  filter regex(lcase(str(?diseaseNameRec)), lcase(?diseaseName))
                } LIMIT 500""")
            for r in response:
                chemPmids = r['chemPmids']
                disPmids = r['disPmids']
                pmids = chemPmids + "|" + disPmids
                edge = self.get_edge(r, predicate='caused_by', pmids=pmids),
                node = KNode(
                    "UNIPROT:{0}".format(r['uniprotSym'].split('/')[-1:][0]),
                    node_types.GENE)
                results.append((edge, node))
        return results

    def graph_diseaseid_to_uniprot(self, drugbank):
        print(drugbank.identifier.lower())
        response = self.triplestore.query_template(
            inputs={"diseaseID": drugbank.identifier.lower()},
            outputs=["uniprotGeneID"],
            template_text="""
            prefix drugbank:      <http://chem2bio2rdf.org/drugbank/resource/>
            prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/>
            prefix ctd:           <http://chem2bio2rdf.org/ctd/resource/>
            prefix mesh.disease:          <http://bio2rdf.org/mesh:> 
            select distinct ?uniprotGeneID where {
               values ( ?diseaseID ) { ( $diseaseID ) }
               ?dbInter     drugbank:gene               ?uniprotGeneID .
               ?drugID      drugbank:CID                ?pubchemCID.
               ?ctd_disease ctd:diseaseid               ?diseaseID ;
                            ctd:cid                     ?pubchemCID .
            }""")
        return [
            (self.get_edge(r, predicate='targets'),
             KNode("UNIPROT:{0}".format(r['uniprotGeneID'].split('/')[-1:][0]),
                   node_types.GENE)) for r in response
        ]

    def graph_drugname_to_pubchem(self, drugname_node):
        drug_name = Text.un_curie(drugname_node.identifier)
        response = self.drugname_to_pubchem(drug_name)
        return [ (self.get_edge( r, predicate='drugname_to_pubchem'), \
                  KNode( "PUBCHEM:{}".format( r['drugID'].split('/')[-1]), node_types.DRUG, label=r['drugName'])) for r in response  ]

    #       'NCBIGene'   : r['NCBIGene'],
    #        'meshID'     : r['meshID'],
    #        'interaction': r['interaction'],
    #        'interactionTypes': r['interactionTypes']
    #        'pubmedids'  : r['pubmedids']
    def graph_pubchem_to_ncbigene(self, pubchem_node):
        #The compound mesh coming back from here is very out of date.  Ignore.
        pubchemid = Text.un_curie(pubchem_node.identifier)
        response = self.pubchem_to_ncbigene(pubchemid)
        retvals = []
        for r in response:
            props = {}
            props['interaction'] = r['interaction']
            props['interactionTypes'] = r['interactionTypes']
            props['publications'] = r['pubmedids'].split('|')
            retvals.append((self.get_edge(props,
                                          predicate='pubchem_to_ncbigene'),
                            KNode("NCBIGene:{}".format(r['NCBIGene']),
                                  node_types.GENE)))
        return retvals

コード例 #5

ファイルを表示

class OntologicalHeirarchy(Service):
    """
    Service that makes call to uberongraph to resolve subclass relationships between ontological terms
    """
    def __init__(self):
        self.url = "https://stars-app.renci.org/uberongraph/sparql"
        self.triplestore = TripleStore(self.url)
        self.prefix_set = {
            node_types.DISEASE_OR_PHENOTYPIC_FEATURE: ['HP', 'MONDO'],
            node_types.CELLULAR_COMPONENT: ['CL'],
            node_types.BIOLOGICAL_PROCESS_OR_ACTIVITY: ['GO'],
            node_types.ANATOMICAL_ENTITY: ['UBERON'],
            node_types.CHEMICAL_SUBSTANCE: ['CHEBI']
        }
        self.root_uris = {
            node_types.ANATOMICAL_ENTITY:
            "<http://purl.obolibrary.org/obo/UBERON_0001062>",
            node_types.DISEASE:
            "<http://purl.obolibrary.org/obo/MONDO_0000001>",
            node_types.MOLECULAR_ACTIVITY:
            "<http://purl.obolibrary.org/obo/GO_0003674>",
            node_types.BIOLOGICAL_PROCESS:
            "<http://purl.obolibrary.org/obo/GO_0008150>",
            node_types.CHEMICAL_SUBSTANCE:
            "<http://purl.obolibrary.org/obo/CHEBI_24431>",
            node_types.PHENOTYPIC_FEATURE:
            "<http://purl.obolibrary.org/obo/HP_0000118>",
            node_types.CELL:
            "<http://purl.obolibrary.org/obo/CL_0000000>",
            node_types.CELLULAR_COMPONENT:
            "<http://purl.orolibrary.org/obo/GO_0005575>"
        }
        obo_prefixes = '\n'.join([
            f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>'
            for pref in set(
                reduce(lambda x, y: x + y, self.prefix_set.values(), []))
        ])
        self.query = f"""
                    {obo_prefixes}
                    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>        
                    select  ?parent_id ?parent_label ?child_id ?child_label
                    where {{                        
                        ?parent_id rdfs:subClassOf $root_uri .
                        ?child_id rdfs:subClassOf ?parent_id.
                   OPTIONAL {{
                    ?parent_id rdfs:label ?parent_label.
                    ?child_id rdfs:label ?child_label.
                    }}                      
                    }}
                        """
        rosetta = Rosetta()
        self.wdg = WriterDelegator(rosetta)

    def runner(self):
        for node_type, root_iri in self.root_uris.items():
            nodes, edges = self.term_get_ancestors(node_type, root_iri)
            for index, n in enumerate(nodes):
                self.wdg.write_node(n, annotate=False)
                if ((index / len(nodes)) * 100) % 10 == 0:
                    print((index / len(nodes)) * 100, '% complete')
            for index, e in enumerate(edges):
                self.wdg.write_edge(e)
                if index % 100 == 0:
                    self.wdg.flush()
                if ((index / len(edges)) * 100) % 10 == 0:
                    print((index / len(edges)) * 100, '% complete')
        return

    def term_get_ancestors(self, node_type, root_iri):
        results = self.triplestore.query_template(
            template_text=self.query,
            inputs={'root_uri': root_iri},
            outputs=['parent_id', 'parent_label', 'child_id', 'child_label'])
        print('found total ', len(results), ' results.')
        nodes = set()
        edges = set()
        for index, row in enumerate(results):
            # Output type would be same as input type?
            ancestor_node = KNode(Text.obo_to_curie(row['parent_id']),
                                  name=row['parent_label'],
                                  type=node_type)
            child_node = KNode(Text.obo_to_curie(row['child_id']),
                               name=row['child_label'],
                               type=node_type)
            if ancestor_node.id == child_node.id:
                # refrain from adding edge to the node itself
                continue
            predicate = LabeledID(identifier='rdfs:subClassOf',
                                  label='subclass of')
            edge = self.create_edge(
                source_node=child_node,
                target_node=ancestor_node,
                predicate=predicate,
                provided_by='uberongraph.term_get_ancestors',
                input_id=child_node.id)
            nodes.add(child_node)
            nodes.add(ancestor_node)
            edges.add(edge)
        return nodes, edges

コード例 #6

ファイルを表示

class OmniCorp(Service):
    def __init__(self, context):  #triplestore):
        super(OmniCorp, self).__init__("omnicorp", context)
        self.triplestore = TripleStore(self.url)
        self.prefix_to_uri = {
            'UBERON': 'http://purl.obolibrary.org/obo/UBERON_',
            'BSPO': 'http://purl.obolibrary.org/obo/BSPO_',
            'PATO': 'http://purl.obolibrary.org/obo/PATO_',
            'GO': 'http://purl.obolibrary.org/obo/GO_',
            'MONDO': 'http://purl.obolibrary.org/obo/MONDO_',
            'HP': 'http://purl.obolibrary.org/obo/HP_',
            'ENVO:': 'http://purl.obolibrary.org/obo/ENVO_',
            'OBI': 'http://purl.obolibrary.org/obo/OBI_',
            'CL': 'http://purl.obolibrary.org/obo/CL_',
            'SO': 'http://purl.obolibrary.org/obo/SO_',
            'CHEBI': 'http://purl.obolibrary.org/obo/CHEBI_',
            'HGNC': 'http://identifiers.org/hgnc/HGNC:',
            'MESH': 'http://id.nlm.nih.gov/mesh/'
        }

    def get_omni_identifier(self, node):
        #Let's start with just the 'best' identifier
        identifier = node.id
        prefix = Text.get_curie(node.id)
        if prefix not in self.prefix_to_uri:
            logger.warn("What kinda tomfoolery is this?")
            logger.warn(f"{node.id} {node.type}")
            logger.warn(f"{node.synonyms}")
            return None
        oident = f'{self.prefix_to_uri[prefix]}{Text.un_curie(node.id)}'
        return oident

    def query_omnicorp(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def sparql_get_all_shared_pmids(self, identifier_list):
        text = """
        PREFIX dct: <http://purl.org/dc/terms/>
        SELECT DISTINCT ?pubmed ?term1 ?term2
        WHERE {
          hint:Query hint:analytic true .
          VALUES ?term1 $id_list_a
          VALUES ?term2 $id_list_b
          ?pubmed dct:references ?term1 .
          ?pubmed dct:references ?term2 .
          FILTER(STR(?term1) < STR(?term2))
        }
        """
        start = datetime.datetime.now()
        results = self.triplestore.query_template(
            inputs={
                'id_list_a': identifier_list,
                'id_list_b': identifier_list
            },
            outputs=['term1', 'term2', 'pubmed'],
            template_text=text,
            post=True)
        end = datetime.datetime.now()
        logger.debug(f'Completed in: {end-start}')
        return results

    def sparql_count_pmids(self, identifier):
        text = """
        PREFIX dct: <http://purl.org/dc/terms/>
        SELECT (COUNT(DISTINCT ?pubmed) as ?count) 
        WHERE {
          hint:Query hint:analytic true .
          ?pubmed dct:references <$identifier> .
        }
        """
        results = self.triplestore.query_template(
            inputs={'identifier': identifier},
            outputs=['count'],
            template_text=text,
        )
        return results

    def sparql_get_shared_pmids(self, identifier_a, identifier_b):
        text = """
        PREFIX dct: <http://purl.org/dc/terms/>
        SELECT DISTINCT ?pubmed
        WHERE {
          hint:Query hint:analytic true .
          ?pubmed dct:references <$id_a> .
          ?pubmed dct:references <$id_b> .
        }
        """
        results = self.triplestore.query_template(inputs={
            'id_a': identifier_a,
            'id_b': identifier_b
        },
                                                  outputs=['pubmed'],
                                                  template_text=text,
                                                  post=True)
        return results

    def get_all_shared_pmids(self, nodes):
        oiddict = {self.get_omni_identifier(n): n for n in nodes}
        oids = [
            f'<{x}>' for x in filter(lambda n: n is not None, oiddict.keys())
        ]
        oidsstring = '{ ' + ' '.join(oids) + '}'
        results = self.sparql_get_all_shared_pmids(oidsstring)
        pubmeds = defaultdict(list)
        for r in results:
            k = (oiddict[r['term1']], oiddict[r['term2']])
            pubmeds[k].append(f"PMID:{r['pubmed'].split('/')[-1]}")
        for i, node_i in enumerate(nodes):
            for node_j in nodes[:i]:
                k_ij = (node_i, node_j)
                k_ji = (node_j, node_i)
                if k_ij not in pubmeds and k_ji not in pubmeds:
                    pubmeds[k_ij] = []
        return pubmeds

    def call_with_retries(self, fnc, args):
        done = False
        ntries = 0
        maxtries = 100
        rest_time = 10  #seconds
        start = datetime.datetime.now()
        while not done and ntries < maxtries:
            try:
                result = fnc(*args)
                done = True
            except:
                logger.warn("OmniCorp error, retrying")
                time.sleep(rest_time)
                ntries += 1
        if not done:
            return None
        else:
            end = datetime.datetime.now()
            logger.debug(f'Total call ntries: {ntries}, time: {end-start}')
            return result

    def count_pmids(self, node):
        identifier = self.get_omni_identifier(node)
        if identifier is None:
            return 0
        res = self.call_with_retries(self.sparql_count_pmids, [identifier])
        if res is None:
            return None
        else:
            logger.debug(f"Returned {res[0]['count']}")
            return res[0]['count']

    def get_shared_pmids(self, node1, node2):
        id1 = self.get_omni_identifier(node1)
        id2 = self.get_omni_identifier(node2)
        if id1 is None or id2 is None:
            return []
        done = False
        ntries = 0
        pmids = self.call_with_retries(self.sparql_get_shared_pmids,
                                       [id1, id2])
        if pmids is None:
            logger.error("OmniCorp gave up")
            return None
        return [p['pubmed'] for p in pmids]

コード例 #7

ファイルを表示

ファイル: uberongraph.py プロジェクト: TranslatorIIPrototypes/robo-commons

class UberonGraphKS(Service):
    """A knowledge source created by 1) Combining cell ontology, uberon, and
    HPO, 2) Reasoning over the total graph to realize many implicit edges.
    Created by Jim Balhoff"""
    def __init__(self, context):  #triplestore):
        super(UberonGraphKS, self).__init__("uberongraph", context)
        self.triplestore = TripleStore(self.url)
        #TODO: Pull this from the biolink model?
        self.class_defs = {
            node_types.CELL: 'CL:0000000',
            node_types.ANATOMICAL_ENTITY: 'UBERON:0001062',
            node_types.BIOLOGICAL_PROCESS: 'GO:0008150',
            node_types.MOLECULAR_ACTIVITY: 'GO:0003674',
            node_types.CHEMICAL_SUBSTANCE: 'CHEBI:24431',
            node_types.DISEASE: 'MONDO:0000001',
            node_types.PHENOTYPIC_FEATURE: 'UPHENO:0001002'
        }

    def query_uberongraph(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def get_edges(self, source_type, obj_type):
        """Given an UBERON id, find other UBERONS that are parts of the query"""
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        prefix GO: <http://purl.obolibrary.org/obo/GO_>
        prefix CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
        prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_>
        prefix UPHENO: <http://purl.obolibrary.org/obo/UPHENO_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?p ?pLabel
        from <http://reasoner.renci.org/ontology>
        where {
            graph <http://reasoner.renci.org/redundant> {
                ?sourceID ?p ?objID .
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                ?sourceID rdfs:subClassOf $sourcedefclass .
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                ?objID rdfs:subClassOf $objdefclass .
                hint:Prior hint:runFirst true .
            }
            ?p rdfs:label ?pLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs  = { 'sourcedefclass': self.class_defs[source_type], 'objdefclass': self.class_defs[obj_type] }, \
            outputs = [ 'p', 'pLabel' ], \
            template_text = text \
        )
        return results

    def get_label(self, identifier):
        obo_id = Text.curie_to_obo(identifier)
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        select distinct ?label
        from <http://reasoner.renci.org/ontology>
        where {
            $obo_id rdfs:label ?label .
        }
        """
        results = self.triplestore.query_template(inputs={'obo_id': obo_id},
                                                  outputs=['label'],
                                                  template_text=text)
        if len(results) < 1:
            return ''
        return results[0]['label']

    def cell_get_cellname(self, cell_identifier):
        """ Identify label for a cell type
        :param cell: CL identifier for cell type 
        """
        text = """
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        select distinct ?cellLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://reasoner.renci.org/ontology>
        where {
                  $cellID rdfs:label ?cellLabel .
              }
        """
        results = self.triplestore.query_template(
            inputs = { 'cellID': cell_identifier }, \
            outputs = [ 'cellLabel' ], \
            template_text = text \
        )
        return results

    def get_anatomy_parts(self, anatomy_identifier):
        """Given an UBERON id, find other UBERONS that are parts of the query"""
        anatomy_identifier = f"<{anatomy_identifier}>"
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?part ?partlabel
        from <http://reasoner.renci.org/nonredundant> 
        from <http://reasoner.renci.org/ontology>
        where {
                $anatomy_id BFO:0000051 ?part .
                graph <http://reasoner.renci.org/ontology/closure> {
                  ?part rdfs:subClassOf UBERON:0001062 .
                }
                ?part rdfs:label ?partlabel .
        }
        """
        results = self.triplestore.query_template(
            inputs  = { 'anatomy_id': anatomy_identifier }, \
            outputs = [ 'part', 'partlabel' ], \
            template_text = text \
        )
        for result in results:
            result['curie'] = Text.obo_to_curie(result['part'])
        return results

    def get_neighbor(self, input_id, output_type, subject=True):
        parents = {
            node_types.ANATOMICAL_ENTITY:
            "<http://purl.obolibrary.org/obo/UBERON_0001062>",
            node_types.DISEASE:
            "<http://purl.obolibrary.org/obo/MONDO_0000001>",
            node_types.MOLECULAR_ACTIVITY:
            "<http://purl.obolibrary.org/obo/GO_0003674>",
            node_types.BIOLOGICAL_PROCESS:
            "<http://purl.obolibrary.org/obo/GO_0008150>",
            node_types.CHEMICAL_SUBSTANCE:
            "<http://purl.obolibrary.org/obo/CHEBI_24431>",
            node_types.PHENOTYPIC_FEATURE:
            "<http://purl.obolibrary.org/obo/HP_0000118>"
        }
        pref = Text.get_curie(input_id)
        obo_prefix = f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>'
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        """ + obo_prefix + """
        select distinct ?output_id ?output_label ?p ?pLabel 
        from <http://reasoner.renci.org/nonredundant>
        from <http://reasoner.renci.org/ontology>
        where {
            graph <http://reasoner.renci.org/nonredundant> {
        """
        if subject:
            text += '	 $input_id ?p ?output_id .'
        else:
            text += '  ?output_id ?p $input_id .'
        text += """
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                ?output_id rdfs:subClassOf $parent .
            }
            ?output_id rdfs:label ?output_label .
  			?p rdfs:label ?pLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs={
                'input_id': input_id,
                'parent': parents[output_type]
            },
            outputs=['output_id', 'output_label', 'p', 'pLabel'],
            template_text=text)
        return results

    def anatomy_to_anatomy(self, identifier):
        results = {'subject': [], 'object': []}
        for direction,query in \
            (('subject','      ?input_id ?p ?output_id .'),
             ('object','       ?output_id ?p ?input_id .')):
            text=""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            select distinct ?output_id ?output_label ?p ?pLabel 
            from <http://reasoner.renci.org/nonredundant>
            from <http://reasoner.renci.org/ontology>
            where {
                graph <http://reasoner.renci.org/redundant> {
            """ + query + \
            """
                }
                graph <http://reasoner.renci.org/ontology/closure> {
                    ?output_id rdfs:subClassOf UBERON:0001062 . 
                }
                ?output_id rdfs:label ?output_label .
                ?p rdfs:label ?pLabel .
            }
            """
            results[direction] += self.triplestore.query_template(
                inputs={'input_id': identifier},
                outputs=['output_id', 'output_label', 'p', 'pLabel'],
                template_text=text)
        return results

    def anatomy_to_go(self, anatomy_identifier):
        """ Identify process and functions related to anatomical terms (anatomy, cell, components).

        """
        #This is a bit messy, but we need to do 4 things.  We are looking for go terms
        # that are either biological processes or activities and we are looking for predicates
        # that point either direction.
        results = {'subject': [], 'object': []}
        for goParent in ('GO:0008150', 'GO:0003674'):
            for direction, query in (('subject', '      $anatID ?p ?goID'),
                                     ('object', '        ?goID ?p $anatID')):
                text = """
                prefix GO: <http://purl.obolibrary.org/obo/GO_>
                select distinct ?goID ?goLabel ?p ?pLabel
                from <http://reasoner.renci.org/nonredundant>
                from <http://reasoner.renci.org/ontology>
                where {
                    graph <http://reasoner.renci.org/redundant> {
                """ + query + """
                    }
                    graph <http://reasoner.renci.org/ontology/closure> {
                        ?goID rdfs:subClassOf $goParent .
                    }
                    ?goID rdfs:label ?goLabel .
                    ?p rdfs:label ?pLabel
                }
                """
                results[direction] += self.triplestore.query_template(
                    inputs = { 'anatID': anatomy_identifier, 'goParent': goParent }, \
                    outputs = [ 'goID', 'goLabel', 'p', 'pLabel' ], \
                    template_text = text \
                )
        return results

    def go_to_anatomy(self, input_identifier):
        """ Identify anatomy terms related to process/functions.

        :param input_identifier: identifier for anatomy (including cell and cellular component)
        """
        # we are looking for predicates that point either direction.
        results = {'subject': [], 'object': []}
        for direction, query in (('subject', '      ?anatID ?p $goID'),
                                 ('object', '        $goID ?p ?anatID')):
            text = """
            prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            prefix GO: <http://purl.obolibrary.org/obo/GO_>
            select distinct ?anatID ?anatLabel ?p ?pLabel
            from <http://reasoner.renci.org/nonredundant>
            from <http://reasoner.renci.org/ontology>
            where {
                graph <http://reasoner.renci.org/redundant> {
            """ + query + """
                }
                graph <http://reasoner.renci.org/ontology/closure> {
                    ?anatID rdfs:subClassOf UBERON:0001062 .
                }
                ?anatID rdfs:label ?anatLabel .
                ?p rdfs:label ?pLabel
            }
            """
            results[direction] += self.triplestore.query_template(
                inputs={'goID': input_identifier},
                outputs=['anatID', 'anatLabel', 'p', 'pLabel'],
                template_text=text)
        return results

    def pheno_or_disease_to_go(self, identifier):
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        prefix GO: <http://purl.obolibrary.org/obo/GO_>
        prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_>
        prefix HP: <http://purl.obolibrary.org/obo/MONDO_>
        select distinct ?goID ?goLabel ?p ?pLabel 
        from <http://reasoner.renci.org/nonredundant>
        from <http://reasoner.renci.org/ontology>
        where {
            graph <http://reasoner.renci.org/redundant> {
    			$input_id ?p ?goID .
            }
            graph <http://reasoner.renci.org/ontology/closure> {
                { ?goID rdfs:subClassOf GO:0008150 . }
                UNION
                { ?goID rdfs:subClassOf GO:0003674 . }
            }
            ?goID rdfs:label ?goLabel .
  			?p rdfs:label ?pLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs={'input_id': identifier},
            outputs=['goID', 'goLabel', 'p', 'pLabel'],
            template_text=text)
        return results

    def phenotype_to_anatomy(self, hp_identifier):
        """ Identify anatomies related to phenotypes.

        :param cell: HP identifier for phenotype
        """

        #The subclassof uberon:0001062 ensures that the result
        #is an anatomical entity.
        #We don't need to do the subject/object game because there's nothing in ubergraph
        # that goes that direction
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
            SELECT DISTINCT ?anatomy_id ?anatomy_label ?predicate ?predicate_label             
            FROM <http://reasoner.renci.org/ontology>
            WHERE {
                graph <http://reasoner.renci.org/redundant>{
                    $HPID ?predicate ?anatomy_id.
                }                
                graph <http://reasoner.renci.org/ontology/closure>{
                    ?anatomy_id rdfs:subClassOf UBERON:0001062.
                }
                ?anatomy_id rdfs:label ?anatomy_label .
                OPTIONAL {?predicate rdfs:label ?predicate_label.}
            }
        """
        results = self.triplestore.query_template(
            inputs = { 'HPID': hp_identifier }, \
            outputs = [ 'anatomy_id', 'anatomy_label', 'predicate', 'predicate_label'],\
            template_text = text \
        )
        return results

    def anatomy_to_phenotype(self, uberon_id):
        #sparql very identical to phenotype_to_anatomy. could not find any anatomical
        # entity that is a subject of subclass of HP:0000118, in ubergraph at this point.
        # treating this as another version of pheno -> anatomical_entity but when
        # anatomical_entity is known an
        # we want to go back to  a phenotype.
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX HP:<http://purl.obolibrary.org/obo/HP_>
            SELECT DISTINCT ?pheno_id ?pheno_label ?predicate ?predicate_label 
            FROM <http://reasoner.renci.org/ontology>
            WHERE {
                graph <http://reasoner.renci.org/redundant> {
                    ?pheno_id ?predicate $UBERONID.
                }                
                graph <http://reasoner.renci.org/ontology/closure>{
                    ?pheno_id rdfs:subClassOf HP:0000118.
                }
                ?pheno_id rdfs:label ?pheno_label.
                OPTIONAL {?predicate rdfs:label ?predicate_label.}
            }"""
        results = self.triplestore.query_template(
            inputs = { 'UBERONID': uberon_id }, \
            outputs = [ 'pheno_id', 'pheno_label', 'predicate', 'predicate_label' ],\
            template_text = text \
        )
        return results

    def biological_process_or_activity_to_chemical(self, go_id):
        """
        Given a chemical finds associated GO Molecular Activities.
        """
        results = []

        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX GO:  <http://purl.obolibrary.org/obo/GO_>
            PREFIX RO: <http://purl.obolibrary.org/obo/RO_>
            PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431>
            PREFIX chemical_class: <http://purl.obolibrary.org/obo/CHEBI_24431>
            SELECT DISTINCT ?chebi_id ?predicate ?label_predicate ?chebi_label
            from <http://reasoner.renci.org/ontology>
            from <http://reasoner.renci.org/nonredundant>
            where {
            $GO_ID ?predicate ?chebi_id. 
            ?chebi_id rdfs:label ?chebi_label.
            GRAPH <http://reasoner.renci.org/ontology/closure>
  	            { ?chebi_id rdfs:subClassOf chemical_class:.} 
            ?predicate rdfs:label ?label_predicate.
            FILTER ( datatype(?label_predicate) = xsd:string) 
            }
        """
        results = self.triplestore.query_template(template_text=text,
                                                  outputs=[
                                                      'chebi_id', 'predicate',
                                                      'label_predicate',
                                                      'chebi_label'
                                                  ],
                                                  inputs={'GO_ID': go_id})
        return results

    def pheno_to_biological_activity(self, pheno_id):
        """
        Finds biological activities related to a phenotype
        :param :pheno_id phenotype identifier
        """
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX GO: <http://purl.obolibrary.org/obo/GO_>
            PREFIX has_phenotype_affecting: <http://purl.obolibrary.org/obo/UPHENO_0000001>
            PREFIX RO: <http://purl.obolibrary.org/obo/RO_>
            prefix HP: <http://purl.obolibrary.org/obo/HP_>

            SELECT DISTINCT ?go_id ?predicate ?predicate_label ?go_label
            from <http://reasoner.renci.org/nonredundant>
            from <http://reasoner.renci.org/ontology>
            WHERE {
            $pheno_type ?predicate  ?go_id.
            ?go_id rdfs:label ?go_label.
            graph <http://reasoner.renci.org/ontology/closure> {
                { ?go_id rdfs:subClassOf GO:0008150 . }
                UNION
                { ?go_id rdfs:subClassOf GO:0003674 . }
            }
            ?predicate rdfs:label ?predicate_label.
            }
        """
        results = self.triplestore.query_template(
            template_text=text,
            inputs={'pheno_type': pheno_id},
            outputs=['go_id', 'predicate', 'predicate_label', 'go_label'])
        return results

    def disease_to_anatomy(self, disease_id):
        #THere are no anatomy-(predicate)->disease triples
        text = """
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX anatomicalEntity: <http://purl.obolibrary.org/obo/UBERON_0001062>
            SELECT DISTINCT ?anatomyID ?predicate ?predicate_label ?anatomy_label
            FROM <http://reasoner.renci.org/nonredundant>
            FROM <http://reasoner.renci.org/ontology>
            WHERE {
            graph <http://reasoner.renci.org/redundant> {
                $diseaseID ?predicate ?anatomyID.
            }
            ?anatomyID rdfs:label ?anatomy_label.
            graph <http://reasoner.renci.org/ontology/closure> {
                ?anatomyID rdfs:subClassOf anatomicalEntity: .
            }
            ?predicate rdfs:label ?predicate_label.
            }
        """
        results = []
        results = self.triplestore.query_template(
            template_text=text,
            outputs=[
                'anatomyID', 'predicate', 'predicate_label', 'anatomy_label'
            ],
            inputs={'diseaseID': disease_id})
        return results

    def anatomy_to_chemical_substance(self, anatomy_id):
        #There's no chemical-(predicate)->anatomy
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431>
        SELECT DISTINCT ?predicate ?predicate_label ?chemical_entity ?chemical_label
        FROM <http://reasoner.renci.org/ontology>
        FROM <http://reasoner.renci.org/redundant>
        WHERE {
            $anatomy_id ?predicate ?chemical_entity.
            graph <http://reasoner.renci.org/ontology/closure> 
            {
                ?chemical_entity rdfs:subClassOf chemical_entity:.
            }
            ?predicate rdfs:label ?predicate_label .
            ?chemical_entity rdfs:label ?chemical_label.
        }
        """
        results = []
        results = self.triplestore.query_template(
            template_text=text,
            outputs=[
                'predicate', 'predicate_label', 'chemical_entity',
                'chemical_label'
            ],
            inputs={'anatomy_id': anatomy_id})
        return results

    def anatomy_to_disease(self, anatomy_id):
        text = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX disease: <http://purl.obolibrary.org/obo/MONDO_0000001>
        SELECT DISTINCT  ?predicate ?predicate_label ?disease ?disease_label
        FROM <http://reasoner.renci.org/ontology>
        FROM <http://reasoner.renci.org/redundant>{
        ?disease ?predicate $anatomy_id.
        graph <http://reasoner.renci.org/ontology/closure> 
        {
            ?disease rdfs:subClassOf disease:.
        }
        ?predicate rdfs:label ?predicate_label .
        ?disease rdfs:label ?disease_label.
        }
        """
        results = []
        results = self.triplestore.query_template(
            template_text=text,
            outputs=[
                'predicate', 'predicate_label', 'disease', 'disease_label'
            ],
            inputs={'anatomy_id': anatomy_id})
        return results

    def create_phenotype_anatomy_edge(self, node_id, node_label, input_id,
                                      phenotype_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        anatomy_node = KNode(Text.obo_to_curie(node_id),
                             type=node_types.ANATOMICAL_ENTITY,
                             name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_anatomy_by_phenotype_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, anatomy_node

    def create_anatomy_phenotype_edge(self, node_id, node_label, input_id,
                                      anatomy_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        phenotype_node = KNode(Text.obo_to_curie(node_id),
                               type=node_types.PHENOTYPIC_FEATURE,
                               name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_phenotype_by_anatomy_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, phenotype_node

    def dep_get_anatomy_by_phenotype_graph(self, phenotype_node):
        results = []
        for curie in phenotype_node.get_synonyms_by_prefix('HP'):
            anatomies = self.phenotype_to_anatomy(curie)
            for r in anatomies:
                node = KNode(r['anatomy_id'],
                             type=node_types.ANATOMICAL_ENTITY,
                             name=r['anatomy_label'])
                # try to derive the label from the relation for the new ubergraph axioms
                predicate_label = r['predicate_label'] or '_'.join(
                    r['predicate'].split('#')[-1].split('.'))
                predicate = LabeledID(Text.obo_to_curie(r['predicate']),
                                      predicate_label)
                edge = self.create_edge(
                    phenotype_node, node,
                    'uberongraph.get_anatomy_by_phenotype_graph',
                    phenotype_node.id, predicate)
                # edge, node = self.create_phenotype_anatomy_edge(r['anatomy_id'],r['anatomy_label'],curie,phenotype_node)
                if phenotype_node.name is None:
                    phenotype_node.name = r['input_label']
                results.append((edge, node))
                #These tend to be very high level terms.  Let's also get their parts to
                #be more inclusive.
                #TODO: there ought to be a more principled way to take care of this, but
                #it highlights the uneasy relationship between the high level world of
                #smartapi and the low-level sparql-vision.
                part_results = self.get_anatomy_parts(r['anatomy_id'])
                for pr in part_results:
                    # pedge, pnode = self.create_phenotype_anatomy_edge(pr['part'],pr['partlabel'],curie,phenotype_node)
                    pnode = KNode(pr['part'],
                                  type=node_types.ANATOMICAL_ENTITY,
                                  name=pr['partlabel'])
                    pedge = self.create_edge(
                        phenotype_node, pnode,
                        'uberongraph.get_anatomy_by_phenotype_graph',
                        phenotype_node.id, predicate)
                    results.append((pedge, pnode))
        return results

    def get_out_by_in(self,
                      input_node,
                      output_type,
                      prefixes,
                      subject=True,
                      object=True):
        returnresults = []
        caller = f'uberongraph.{inspect.stack()[1][3]}'
        results = {'subject': [], 'object': []}
        curies = set()
        for pre in prefixes:
            curies.update(input_node.get_synonyms_by_prefix(pre))
        for curie in curies:
            results['subject'] += self.get_neighbor(curie,
                                                    output_type,
                                                    subject=True)
            results['object'] += self.get_neighbor(curie,
                                                   output_type,
                                                   subject=False)
        for direction in ['subject', 'object']:
            done = set()
            for r in results[direction]:
                key = (r['p'], r['output_id'])
                if key in done:
                    continue
                predicate_curie = Text.obo_to_curie(r['p'])
                prefix = Text.get_curie(predicate_curie)
                prefix = prefix if prefix == 'ubergraph-axioms.ofn' else prefix.upper(
                )
                upper_cased_predicate_curie = prefix + ":" + Text.un_curie(
                    predicate_curie)
                predicate = LabeledID(upper_cased_predicate_curie, r['pLabel'])
                output_node = KNode(r['output_id'],
                                    type=output_type,
                                    name=r['output_label'])
                if direction == 'subject':
                    edge = self.create_edge(input_node, output_node, caller,
                                            curie, predicate)
                else:
                    edge = self.create_edge(output_node, input_node, caller,
                                            curie, predicate)
                done.add(key)
                returnresults.append((edge, output_node))
        return returnresults

    #Don't get confused.  There is the direction of the statement (who is the subject
    # and who is the object) and which of them we are querying by.  We want to query
    # independent of direction i.e. let the input node be either the subject or the object.

    def get_anatomy_by_anatomy_graph(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.ANATOMICAL_ENTITY,
                                  ['UBERON', 'CL', 'GO'])

    def get_phenotype_by_anatomy_graph(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.PHENOTYPIC_FEATURE,
                                  ['UBERON', 'CL', 'GO'])

    def get_chemical_substance_by_anatomy(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['UBERON', 'CL', 'GO'])

    def get_process_by_anatomy(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.BIOLOGICAL_PROCESS,
                                  ['UBERON', 'CL', 'GO'])

    def get_activity_by_anatomy(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.MOLECULAR_ACTIVITY,
                                  ['UBERON', 'CL', 'GO'])

    def get_disease_by_anatomy_graph(self, anatomy_node):
        return self.get_out_by_in(anatomy_node, node_types.DISEASE,
                                  ['UBERON', 'CL', 'GO'])

    def get_anatomy_by_process_or_activity(self, go_node):
        return self.get_out_by_in(go_node, node_types.ANATOMICAL_ENTITY,
                                  ['GO'])

    def get_chemical_entity_by_process_or_activity(self, go_node):
        return self.get_out_by_in(go_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['GO'])

    def get_process_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.BIOLOGICAL_PROCESS,
                                  ['MONDO'])

    def get_activity_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.MOLECULAR_ACTIVITY,
                                  ['MONDO'])

    def get_anatomy_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.ANATOMICAL_ENTITY,
                                  ['MONDO'])

    def get_chemical_by_disease(self, disease_node):
        return self.get_out_by_in(disease_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['MONDO'])

    def get_process_by_phenotype(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.BIOLOGICAL_PROCESS,
                                  ['HP'])

    def get_chemical_by_phenotype(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['HP'])

    def get_activity_by_phenotype(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.MOLECULAR_ACTIVITY,
                                  ['HP'])

    def get_anatomy_by_phenotype_graph(self, pheno_node):
        return self.get_out_by_in(pheno_node, node_types.ANATOMICAL_ENTITY,
                                  ['HP'])

    def get_chemical_by_chemical(self, chem_node):
        return self.get_out_by_in(chem_node, node_types.CHEMICAL_SUBSTANCE,
                                  ['CHEBI'])

    def disease_get_ancestors(self, disease_node):
        curie = disease_node.id
        prefix = Text.get_curie(curie)
        if "MONDO" != prefix:
            return []
        query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix MONDO: <http://purl.obolibrary.org/obo/MONDO_>
        select distinct ?output_id ?label
        where {{
          graph <http://reasoner.renci.org/ontology/closure> {{
            $disease_id  rdfs:subClassOf ?output_id .
            ?output_id rdfs:subClassOf MONDO:0000001 .
          }}      
          
          graph <http://reasoner.renci.org/ontology>{{
          ?output_id rdfs:label ?label.
          }}
        }}
        """
        results = self.triplestore.query_template(
            template_text=query,
            inputs={'disease_id': curie},
            outputs=['output_id', 'label'])
        outputs = []
        for row in results:
            ancestor_node = KNode(
                row['output_id'],
                label=row['label'],
                type=node_types.DISEASE_OR_PHENOTYPIC_FEATURE)
            if ancestor_node.id == disease_node.id:
                # refrain from adding edge to the node itself
                continue
            predicate = LabeledID(identifier='rdfs:subClassOf',
                                  label='subclass of')
            edge = self.create_edge(
                source_node=disease_node,
                target_node=ancestor_node,
                predicate=predicate,
                provided_by='uberongraph.disease_get_ancestors',
                input_id=disease_node.id)
            outputs.append((edge, ancestor_node))
        return outputs

コード例 #8

ファイルを表示

ファイル: uberongraph.py プロジェクト: patlsc/robokop-interfaces

class UberonGraphKS(Service):
    """A knowledge source created by 1) Combining cell ontology, uberon, and
    HPO, 2) Reasoning over the total graph to realize many implicit edges.
    Created by Jim Balhoff"""
    def __init__(self, context):  #triplestore):
        super(UberonGraphKS, self).__init__("uberongraph", context)
        self.triplestore = TripleStore(self.url)

    def query_uberongraph(self, query):
        """ Execute and return the result of a SPARQL query. """
        return self.triplestore.execute_query(query)

    def cell_get_cellname(self, cell_identifier):
        """ Identify label for a cell type
        :param cell: CL identifier for cell type 
        """
        text = """
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        select distinct ?cellLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                  $cellID rdfs:label ?cellLabel .
              }
        """
        results = self.triplestore.query_template(
            inputs = { 'cellID': cell_identifier }, \
            outputs = [ 'cellLabel' ], \
            template_text = text \
        )
        return results

    def get_anatomy_parts(self, anatomy_identifier):
        """Given an UBERON id, find other UBERONS that are parts of the query"""
        if anatomy_identifier.startswith('http'):
            anatomy_identifier = Text.obo_to_curie(anatomy_identifier)
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?part ?partlabel
        from <http://reasoner.renci.org/nonredundant> 
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                $anatomy_id BFO:0000051 ?part .
                graph <http://reasoner.renci.org/redundant> {
                  ?part rdfs:subClassOf UBERON:0001062 .
                }
                ?part rdfs:label ?partlabel .
        }
        """
        results = self.triplestore.query_template(
            inputs  = { 'anatomy_id': anatomy_identifier }, \
            outputs = [ 'part', 'partlabel' ], \
            template_text = text \
        )
        for result in results:
            result['curie'] = Text.obo_to_curie(result['part'])
        return results

    def anatomy_to_cell(self, anatomy_identifier):
        """ Identify anatomy terms related to cells.

        :param cell: CL identifier for cell type
        """
        text = """
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        select distinct ?cellID ?cellLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
            graph <http://reasoner.renci.org/redundant> {
                ?cellID rdfs:subClassOf CL:0000000 .
                ?cellID BFO:0000050 $anatomyID .
            }
            ?cellID rdfs:label ?cellLabel .
        }

        """
        results = self.triplestore.query_template(
            inputs = { 'anatomyID': anatomy_identifier }, \
            outputs = [ 'cellID', 'cellLabel' ], \
            template_text = text \
        )
        return results

    def cell_to_anatomy(self, cell_identifier):
        """ Identify anatomy terms related to cells.

        :param cell: CL identifier for cell type 
        """
        text = """
        prefix CL: <http://purl.obolibrary.org/obo/CL_>
        prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        select distinct ?anatomyID ?anatomyLabel
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
            graph <http://reasoner.renci.org/redundant> {
                ?anatomyID rdfs:subClassOf UBERON:0001062 .
                $cellID BFO:0000050 ?anatomyID .
            }
            ?anatomyID rdfs:label ?anatomyLabel .
        }
        """
        results = self.triplestore.query_template(
            inputs = { 'cellID': cell_identifier }, \
            outputs = [ 'anatomyID', 'anatomyLabel' ], \
            template_text = text \
        )
        return results

    def phenotype_to_anatomy(self, hp_identifier):
        """ Identify anatomy terms related to cells.

        :param cell: HP identifier for phenotype
        """

        #The subclassof uberon:0001062 ensures that the result
        #is an anatomical entity.
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix HP: <http://purl.obolibrary.org/obo/HP_>
        prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050>
        prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051>
        prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502>
        prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001>
        select distinct ?anatomy_id ?anatomy_label ?input_label
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                  graph <http://reasoner.renci.org/redundant> {
                    ?anatomy_id rdfs:subClassOf UBERON:0001062 .
                  }
                  ?anatomy_id rdfs:label ?anatomy_label .
                  graph <http://reasoner.renci.org/nonredundant> {
                       ?phenotype phenotype_of: ?anatomy_id .
                  }
                  graph <http://reasoner.renci.org/redundant> {
                    $HPID rdfs:subClassOf ?phenotype .
                  }
                  $HPID rdfs:label ?input_label .
              }
        """
        results = self.triplestore.query_template(
            inputs = { 'HPID': hp_identifier }, \
            outputs = [ 'anatomy_id', 'anatomy_label', 'input_label'],\
            template_text = text \
        )
        return results

    def anatomy_to_phenotype(self, uberon_id):
        text = """
        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
        prefix HP: <http://purl.obolibrary.org/obo/HP_>
        prefix part_of: <http://purl.obolibrary.org/obo/BFO_0000050>
        prefix has_part: <http://purl.obolibrary.org/obo/BFO_0000051>
        prefix depends_on: <http://purl.obolibrary.org/obo/RO_0002502>
        prefix phenotype_of: <http://purl.obolibrary.org/obo/UPHENO_0000001>
        select distinct ?pheno_id ?anatomy_label ?pheno_label
        from <http://reasoner.renci.org/nonredundant>
        from <http://example.org/uberon-hp-cl.ttl>
        where {
                  $UBERONID rdfs:label ?anatomy_label .
                  graph <http://reasoner.renci.org/nonredundant> {
                       ?phenotype phenotype_of: $UBERONID .
                  }
                  graph <http://reasoner.renci.org/redundant> {
                    ?pheno_id rdfs:subClassOf ?phenotype .
                  }
                  ?pheno_id rdfs:label ?pheno_label .
              }
        """
        #The subclassof uberon:0001062 ensures that the result
        #is an anatomical entity.
        results = self.triplestore.query_template(
            inputs = { 'UBERONID': uberon_id }, \
            outputs = [ 'pheno_id', 'anatomy_label', 'pheno_label'],\
            template_text = text \
        )
        return results

    def get_anatomy_by_cell_graph(self, cell_node):
        anatomies = self.cell_to_anatomy(cell_node.id)
        results = []
        predicate = LabeledID(identifier='BFO:0000050', label='part_of')
        for r in anatomies:
            anatomy_node = KNode(Text.obo_to_curie(r['anatomyID']),
                                 type=node_types.ANATOMY,
                                 name=r['anatomyLabel'])
            edge = self.create_edge(cell_node, anatomy_node,
                                    'uberongraph.get_anatomy_by_cell_graph',
                                    cell_node.id, predicate)
            results.append((edge, anatomy_node))
        return results

    def get_cell_by_anatomy_graph(self, anatomy_node):
        cells = self.anatomy_to_cell(anatomy_node.id)
        results = []
        predicate = LabeledID(identifier='BFO:0000050', label='part_of')
        for r in cells:
            cell_node = KNode(Text.obo_to_curie(r['cellID']),
                              type=node_types.CELL,
                              name=r['cellLabel'])
            edge = self.create_edge(cell_node, anatomy_node,
                                    'uberongraph.get_cell_by_anatomy_graph',
                                    anatomy_node.id, predicate)
            results.append((edge, cell_node))
        return results

    def create_phenotype_anatomy_edge(self, node_id, node_label, input_id,
                                      phenotype_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        anatomy_node = KNode(Text.obo_to_curie(node_id),
                             type=node_types.ANATOMY,
                             name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_anatomy_by_phenotype_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, anatomy_node

    def create_anatomy_phenotype_edge(self, node_id, node_label, input_id,
                                      anatomy_node):
        predicate = LabeledID(identifier='GAMMA:0000002',
                              label='inverse of has phenotype affecting')
        phenotype_node = KNode(Text.obo_to_curie(node_id),
                               type=node_types.PHENOTYPE,
                               name=node_label)
        edge = self.create_edge(anatomy_node, phenotype_node,
                                'uberongraph.get_phenotype_by_anatomy_graph',
                                input_id, predicate)
        #node.name = node_label
        return edge, phenotype_node

    def get_anatomy_by_phenotype_graph(self, phenotype_node):
        results = []
        for curie in phenotype_node.get_synonyms_by_prefix('HP'):
            anatomies = self.phenotype_to_anatomy(curie)
            for r in anatomies:
                edge, node = self.create_phenotype_anatomy_edge(
                    r['anatomy_id'], r['anatomy_label'], curie, phenotype_node)
                if phenotype_node.name is None:
                    phenotype_node.name = r['input_label']
                results.append((edge, node))
                #These tend to be very high level terms.  Let's also get their parts to
                #be more inclusive.
                #TODO: there ought to be a more principled way to take care of this, but
                #it highlights the uneasy relationship between the high level world of
                #smartapi and the low-level sparql-vision.
                part_results = self.get_anatomy_parts(r['anatomy_id'])
                for pr in part_results:
                    pedge, pnode = self.create_phenotype_anatomy_edge(
                        pr['part'], pr['partlabel'], curie, phenotype_node)
                    results.append((pedge, pnode))
        return results

    def get_phenotype_by_anatomy_graph(self, anatomy_node):
        results = []
        for curie in anatomy_node.get_synonyms_by_prefix('UBERON'):
            phenotypes = self.anatomy_to_phenotype(curie)
            for r in phenotypes:
                edge, node = self.create_anatomy_phenotype_edge(
                    r['pheno_id'], r['pheno_label'], curie, anatomy_node)
                if anatomy_node.name is None:
                    anatomy_node.name = r['anatomy_label']
                results.append((edge, node))
        return results

コード例 #9

ファイルを表示

ファイル: ontological_heirarchy.py プロジェクト: TranslatorIIPrototypes/robo-commons

class OntologicalHeirarchy(Service):
    """
    Service that makes call to uberongraph to resolve subclass relationships between ontological terms
    """
    def __init__(self, context):
        super(OntologicalHeirarchy, self).__init__("ontological_hierarchy",
                                                   context)
        self.triplestore = TripleStore(self.url)
        self.prefix_set = {
            node_types.DISEASE_OR_PHENOTYPIC_FEATURE: ['HP', 'MONDO'],
            node_types.CELLULAR_COMPONENT: ['CL'],
            node_types.BIOLOGICAL_PROCESS_OR_ACTIVITY: ['GO'],
            node_types.ANATOMICAL_ENTITY: ['UBERON'],
            node_types.CHEMICAL_SUBSTANCE: ['CHEBI']
        }
        self.root_uris = {
            node_types.ANATOMICAL_ENTITY:
            "<http://purl.obolibrary.org/obo/UBERON_0001062>",
            node_types.DISEASE:
            "<http://purl.obolibrary.org/obo/MONDO_0000001>",
            node_types.MOLECULAR_ACTIVITY:
            "<http://purl.obolibrary.org/obo/GO_0003674>",
            node_types.BIOLOGICAL_PROCESS:
            "<http://purl.obolibrary.org/obo/GO_0008150>",
            node_types.CHEMICAL_SUBSTANCE:
            "<http://purl.obolibrary.org/obo/CHEBI_24431>",
            node_types.PHENOTYPIC_FEATURE:
            "<http://purl.obolibrary.org/obo/HP_0000118>",
            node_types.CELL:
            "http://purl.obolibrary.org/obo/CL_0000000",
            node_types.CELLULAR_COMPONENT:
            "http://purl.orolibrary.org/obo/GO_0005575"
        }
        obo_prefixes = '\n'.join([
            f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>'
            for pref in set(
                reduce(lambda x, y: x + y, self.prefix_set.values(), []))
        ])
        self.query = f"""
                    {obo_prefixes}
                    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>        
                    select distinct ?parent_id ?label
                    where {{
                      graph <http://reasoner.renci.org/ontology/closure> {{
                        $child_curie  rdfs:subClassOf ?parent_id .
                        ?parent_id rdfs:subClassOf $root_uri .
                      }}
                      graph <http://reasoner.renci.org/ontology>{{
                      ?parent_id rdfs:label ?label.
                      }}
                    }}
                    """

    def term_get_ancestors(self, child_node):
        root_uri = self.root_uris.get(child_node.type, None)
        if not root_uri:
            return []
        ###
        # Query does have an upper bound so for ontologies that start from
        #
        # Step 1 get prefixes that are supported for input node
        curie_set = set()
        for node_type in child_node.export_labels:
            ps = self.prefix_set.get(node_type, [])
            for prefix in ps:
                synonyms = child_node.get_synonyms_by_prefix(prefix)
                curie_set.update(synonyms)
        # Step 2 get parents for those curies we support from uberon graph
        outputs = []
        for curie in curie_set:
            results = self.triplestore.query_template(
                template_text=self.query,
                inputs={
                    'child_curie': curie,
                    'root_uri': root_uri
                },
                outputs=['parent_id', 'label'])

            for row in results:
                # Output type would be same as input type?
                ancestor_node = KNode(Text.obo_to_curie(row['parent_id']),
                                      name=row['label'],
                                      type=child_node.type)
                if ancestor_node.id == child_node.id:
                    # refrain from adding edge to the node itself
                    continue
                predicate = LabeledID(identifier='rdfs:subClassOf',
                                      label='subclass of')
                edge = self.create_edge(
                    source_node=child_node,
                    target_node=ancestor_node,
                    predicate=predicate,
                    provided_by='uberongraph.term_get_ancestors',
                    input_id=child_node.id)
                outputs.append((edge, ancestor_node))
        return outputs