예제 #1
0
def test2():
    from service import ServiceContext
    oxo = OXO(ServiceContext.create_context())
    r = oxo.efo_to_doid(KNode('EFO:0000764', node_types.DISEASE))
    print(r)
def test_metabolite_to_enzyme(hmdb):
    hete = KNode('HMDB:HMDB0011134', type=node_types.CHEMICAL_SUBSTANCE)
    results = hmdb.metabolite_to_enzyme(hete)
    assert len(results) > 0
    node_ids = [node.id for edge, node in results]
    assert 'UniProtKB:Q96SL4' in node_ids
    def get(self, api_name, node, method_metadata):
        """ Invoke a GET requests on the specified API for value node with the given metadata. """
        result = []
        try:
            """ Find synonym in the input node of an appropriate input type for this operation. """
            input_arg = None
            input_type = None
            for synonym in node.synonyms:
                #print (f"synonym -> {synonym}")
                syn = self.identifiers.curie_instance2id(synonym)
                print(f"syn -> {syn}")
                print(f"syn -> {method_metadata.in_types}")
                for t in method_metadata.in_types:
                    if input_arg:
                        break
                    if t in syn:
                        input_arg = synonym.split(':')[1]
                        input_type = t
                        break
            """ Fail if no supplied synonym is of an appropriate type to make the call. """
            if not input_arg:
                raise ValueError(
                    f"Node {node} contains no synonyms of type {method_metadata.in_types} required by operation {method_metadata.op}"
                )
            """ Get the service metadata """
            service_metadata = self.get_service_metadata(
                api_name, input_type, method_metadata.out_type)
            logger.debug(
                "* Executing translator registry method: {0} in: {1} out: {2} template: {3} value: {4} "
                .format(api_name, input_type, method_metadata.out_type,
                        service_metadata.get_url, node))
            """ Parameterize and execute the HTTP request. """
            url = Template(service_metadata.get_url).render(input=input_arg)
            response = requests.get(url).json()
            #with open ("a.txt", "w") as stream:
            #    stream.write (json.dumps (response, indent=2))
            """ Expand the context with JSON-LD """
            jsonld_context = json.loads(json.dumps(service_metadata.jsonld),
                                        parse_float=lambda v: str(v))
            del jsonld_context['@context']['@version']
            expanded = jsonld.expand(
                response, {"expandContext": jsonld_context['@context']})
            """ Extract data from the returned JSON object. """
            """ TODO: Responses are complex. Figure out how to generalize
                         * Traversal of the response
                         * Decisions about how to create nodes and edges
                         * What to say about the semantic types of returned identifiers
            """
            print(json.dumps(expanded, indent=2))
            for obj in expanded:
                for predicate, v in obj.items():
                    if isinstance(v, list):
                        for item in v:
                            val = item["@id"] if "@id" in item else None
                            if val:
                                curie = self.identifiers.instance2curie(val)
                                #print (f"val: {val} curie: {curie}")
                                out_concept = method_metadata.out_concept
                                node_type = get_node_type(out_concept)
                                if curie and node_type:
                                    #print (f" ------> node type {node_type} id {val} ")
                                    new_node = KNode(curie, type=node_type)
                                    result.append(
                                        (self.new_edge(source=self.name,
                                                       function=predicate,
                                                       properties=response,
                                                       source_node=node,
                                                       target_node=new_node),
                                         new_node))

        except Exception as e:
            traceback.print_exc()
            exc_type, exc_value, exc_tb = sys.exc_info()
            exception_text = traceback.format_exception(
                exc_type, exc_value, exc_tb)
            logger.error(exception_text)
        return result
def x_test_diabetes_to_metabolite(rosetta, hmdb):
    diabetes = KNode('MONDO:0005148', type=node_types.DISEASE)
    rosetta.synonymizer.synonymize(diabetes)
    print(diabetes.synonyms)
    results = hmdb.disease_to_metabolite(diabetes)
    assert len(results) > 0
def test_enzyme_to_metabolite(hmdb):
    asthma = KNode('UniProtKB:Q96SL4', type=node_types.GENE)
    results = hmdb.enzyme_to_metabolite(asthma)
    assert len(results) > 0
    node_labels = [node.name for edge, node in results]
    assert '5-HETE' in node_labels
예제 #6
0
 def name_to_efo (self, name):
     result = []
     response = self.request_concept (name)
     seen = {}
     for r in response:
         for a in r['aliases']:
             if a.startswith ("EFO:"):
                 if not a in seen:
                     logger.debug ("      -- appending a {}".format (a))
                     result.append ( ( self.get_edge (r, predicate='name_to_efo'), KNode(a, node_types.DISEASE ) ) )
                     seen[a] = a
     return result
예제 #7
0
 def name_to_mesh (self, name):
     result = []
     response = self.request_concept (name)
     seen = {}
     for r in response:
         for a in r['aliases']:
             if a.startswith ("MESH:"):
                 if not a in seen:
                     #TODO: not sure what node type should be here...
                     result.append ( ( self.get_edge (r, predicate='name_to_mesh'), KNode(a, name.node_type) ) )
                     seen[a] = a
     return list(set(result))
예제 #8
0
def test():
    from greent.service import ServiceContext 
    hgnc = HGNC( ServiceContext.create_context() )
    input_knode = KNode( 'NCBIGENE:3815' , node_type = node_types.GENE )
    print( hgnc.ncbigene_to_uniprotkb( input_knode ) )
def test_imatinib_asthma(omnicorpus):
    drug_node = KNode('CHEBI:45783', type=node_types.DRUG)
    disease_node = KNode('MONDO:0004979', type=node_types.DISEASE)
    pmids = omnicorpus.get_shared_pmids(drug_node, disease_node)
    assert len(pmids) > 0
    assert 'PMID:15374841' in pmids
def test_non_HP_pheno_to_anatomy(uberon):
    #Arrhythmia occurs in...
    k = KNode('xx:0011675', type=node_types.PHENOTYPIC_FEATURE)
    results = uberon.get_anatomy_by_phenotype_graph(k)
    assert len(results) == 0
def test_disease_to_anatomy_rc_face(uberon):
    k = KNode('MONDO:0022407', type=node_types.DISEASE)
    results = uberon.get_anatomy_by_disease(k)
    newnodes = [node.id for edge, node in results]
    print(newnodes)
    assert 'UBERON:0001456' in newnodes
def test_pheno_to_anatomy_7354(uberon):
    #Arrhythmia occurs in...
    k = KNode('HP:0007354', type=node_types.PHENOTYPIC_FEATURE)
    results = uberon.get_anatomy_by_phenotype_graph(k)
    assert len(results) > 0
예제 #13
0
    def execute_knowledge_graph_program(self, inputs, program):
        """ Construct a knowledge graph given a set of input nodes and a program - a list
        of frames, each of which contains the name of a concept, a collector containing a list of edges and
        nodes where all target nodes are instances of the frame's concept, and a list of operations for 
        transitioning from one frame's concept space to the next frames.

        This method assumes a linear path.
        """
        """ Convert inputs to be structured like edges-and-nodes returned by a previous services. """
        next_nodes = {
            key: [(None, KNode(val, type=key)) for val in val_list]
            for key, val_list in inputs.items()
        }
        logger.debug(f"inputs: {next_nodes}")
        """ Validated the input program. """
        if len(program) == 0:
            logger.info(f"No program found for {query}")
            return []
        logger.info(f"program> {program}")
        result = []
        """ Each frame's name is a concept. We use the top frame's as a key to index the arguments. """
        top_frame = program[0]
        inputs = next_nodes[top_frame.name]
        for i in inputs:
            self.synonymizer.synonymize(i[1])
        """ Stack is the overall executable. We prepend a base frame with a collector primed with input arguments. """
        stack = [Frame(collector=inputs)] + program
        """ Execute the program frame by frame. """
        for index, frame in enumerate(program):
            # logger.debug (f"--inputs: {stack[index].collector}")
            for k, o in frame.ops.items():
                logger.debug(f"-- frame-index--> {frame} {index} {k}=>{o.op}")
            """ Process each node in the collector. """
            index = 0
            for edge, source_node in stack[index].collector:
                """ Process each operator in the frame. """
                for op_name, operator in frame.ops.items():
                    """ Generate a cache key. """
                    key = f"{operator.op}({source_node.id})"
                    try:
                        logger.debug(f"  --op: {key}")
                        """ Load the object from cache. """
                        response = self.cache.get(key)
                        if not response:
                            """ Invoke the knowledge source with the given input. """
                            op = self.get_ops(operator.op)
                            if not op:
                                raise Exception(
                                    f"Unable to find op: {operator.op}")
                            response = op(source_node)
                            for edge, node in response:
                                """ Process the edge adding metadata. """
                                if isinstance(edge, KEdge):
                                    edge.predicate = operator.predicate
                                    edge.source_node = source_node
                                    self.synonymizer.synonymize(node)
                                    edge.target_node = node
                                """ Validate the id space of the returned data maps to the target concept. """
                                if index < len(program) - 1:
                                    target_concept_name = program[index +
                                                                  1].name
                                    prefixes = self.type_graph.concept_model.get(
                                        target_concept_name).id_prefixes
                                    valid = any([
                                        node.id.upper().startswith(p.upper())
                                        for p in prefixes
                                    ])
                                    if not valid:
                                        logger.debug(
                                            f"Operator {operator} wired to type: {concept_name} returned node with id: {node.id}"
                                        )
                            """ Cache the annotated and validated response. """
                            self.cache.set(key, response)
                        """ Add processed edges to the overall result. """
                        result += [edge for edge, node in response]
                        logger.debug(f"{key} => {Text.short(response)}")
                        """ Response edges go in the collector to become input for the next operation. """
                        frame.collector += response
                    except Exception as e:
                        traceback.print_exc()
                        logger.warning("Error invoking> {key}")
        logger.debug(f"returning {len(result)} values.")
        return result
예제 #14
0
 def compile_results(self, fname, ntype, searchResults):
     result = []
     for other in searchResults:
         result.append((KEdge('oxo', fname, is_synonym=True),
                        KNode(identifier=other['curie'], node_type=ntype)))
     return result
def test_huntington_is_genetic(mondo2):
    huntington = KNode('OMIM:143100', type=node_types.DISEASE)
    assert mondo2.is_genetic_disease(huntington)
def test_two_disease(omnicorpus):
    disease1 = KNode('MONDO:0005090', type=node_types.DISEASE)
    disease2 = KNode('MONDO:0003425', type=node_types.DISEASE)
    pmids = omnicorpus.get_shared_pmids(disease1, disease2)
    assert len(pmids) > 0
예제 #17
0
 def name_to_doid (self, name):
     result = []
     response = self.request_concept (name, node_types.DISEASE)
     seen = {}
     for r in response:
         got_doid = False
         for a in r['aliases']:
             if a.startswith ("DOID:"):
                 got_doid = True
                 if not a in seen:
                     logger.debug ("      -- appending a {}".format (a))
                     result.append ( ( self.get_edge (r, predicate='name_to_doid'), KNode(a, node_types.DISEASE ) ) )
                     seen[a] = a
     logger.info("Returning {} doids".format(len(result)))
     return result
def test_list_returns_zero(omnicorpus):
    disease_node = KNode('UBERON:0013694', type=node_types.ANATOMY)
    go_node = KNode('GO:0045892', type=node_types.PROCESS)
    results = omnicorpus.get_shared_pmids(disease_node, go_node)
    assert len(results) == 0
예제 #19
0
 def name_to_drugbank (self, name):
     response = self.request_concept (name)
     result = []
     seen = {}
     for r in response:
         for a in r['aliases']:
             if a.startswith ("DRUGBANK:"):
                 if not a in seen:
                     result.append ( ( self.get_edge (r, predicate='name_to_drugbank'), KNode(a, node_types.DRUG) ) )
                     seen[a] = a
     return list(set(result))
def test_pmid_count(omnicorpus):
    uberon_node = KNode('UBERON:0013694', type=node_types.ANATOMY)
    n = omnicorpus.count_pmids(uberon_node)
    #Checked by hand in the database
    assert n == 2058
def test_metabolite_to_enzyme_with_syn(rosetta, hmdb):
    chem = KNode('CHEBI:27732', type=node_types.CHEMICAL_SUBSTANCE)
    rosetta.synonymizer.synonymize(chem)
    print(chem.synonyms)
    results = hmdb.metabolite_to_enzyme(chem)
    assert len(results) > 0
예제 #22
0
    def sequence_variant_to_gene(self, variant_node):

        flanking_region_size = 500000
        results = []

        found_valid_robokop_key = False
        robokop_ids = variant_node.get_synonyms_by_prefix('ROBO_VARIANT')
        if not robokop_ids:
            logger.debug(
                f'ensembl: robokop variant key not found for variant: {variant_node.id}'
            )
            return results
        else:
            try:
                for robokop_key in robokop_ids:
                    robokop_data = Text.un_curie(robokop_key).split('|')
                    reference_genome = robokop_data[0]
                    if reference_genome == 'HG38':
                        found_valid_robokop_key = True
                    else:
                        continue
                    chromosome = robokop_data[1]
                    start_position = int(robokop_data[2])
                    end_position = int(robokop_data[3])
            except IndexError as e:
                logger.debug(
                    f'ensembl: robokop variant key not set properly for variant: {variant_node.id} - {robokop_ids[0]}'
                )
                return results

        if not found_valid_robokop_key:
            logger.debug(
                f'ensembl: latest robokop variant key not found for variant: {variant_node.id}'
            )
            return results

        flanking_min = start_position - flanking_region_size
        if flanking_min < 0:
            flanking_min = 0
        flanking_max = end_position + flanking_region_size

        db_conn = self.create_or_connect_to_genes_db()
        db_cursor = db_conn.cursor()

        #logger.info(f'looking for genes overlapping {flanking_min}-{flanking_max}')

        db_cursor.execute(
            self.gene_range_select_sql,
            (chromosome, flanking_min, flanking_min, flanking_max,
             flanking_max, flanking_min, flanking_max))

        genes_in_region = db_cursor.fetchall()
        for gene_id_text, gene_start, gene_end in genes_in_region:
            #cast this to make neo4j happy
            gene_id = str(gene_id_text)
            #logger.info(f'Found matching gene: {gene_id},{gene_start},{gene_end}')
            gene_node = KNode(f'ENSEMBL:{gene_id}',
                              name=f'{gene_id}',
                              type=node_types.GENE)
            if start_position < gene_start:
                distance = gene_start - start_position
            elif end_position > gene_end:
                distance = end_position - gene_end
            else:
                distance = 0
            props = {'distance': distance}
            edge = self.create_edge(variant_node,
                                    gene_node,
                                    'ensembl.sequence_variant_to_gene',
                                    variant_node.id,
                                    self.var_to_gene_predicate,
                                    url=self.gene_batch_url,
                                    properties=props)
            results.append((edge, gene_node))

        logger.info(
            f'ensembl sequence_variant_to_gene found {len(results)} results for {variant_node.id}'
        )

        return results
def test_disease_to_metabolite(hmdb):
    asthma = KNode('UMLS:C0004096', type=node_types.DISEASE)
    results = hmdb.disease_to_metabolite(asthma)
    assert len(results) > 0
    node_labels = [node.name for edge, node in results]
    assert '5-HETE' in node_labels
예제 #24
0
    def sequence_variant_to_sequence_variant(self, variant_node):
        ld_url = '/ld/human/'
        options_url = '?r2=0.8'
        population = '1000GENOMES:phase_3:MXL'

        return_results = []
        # with self.redis.pipeline() as redis_pipe:
        dbsnp_curie_ids = variant_node.get_synonyms_by_prefix('DBSNP')
        for dbsnp_curie in dbsnp_curie_ids:
            variant_id = Text.un_curie(dbsnp_curie)
            query_url = f'{self.url}{ld_url}{variant_id}/{population}{options_url}'
            query_response = requests.get(
                query_url, headers={"Content-Type": "application/json"})
            if query_response.status_code == 200:
                query_json = query_response.json()
                variant_results = self.parse_ld_variants_from_ensembl(
                    query_json)
                for variant_info in variant_results:
                    new_variant_id = variant_info[0]
                    r_squared = variant_info[1]
                    props = {'r2': r_squared}
                    new_variant_curie = f'DBSNP:{new_variant_id}'
                    new_variant_node = KNode(new_variant_curie,
                                             type=node_types.SEQUENCE_VARIANT)
                    new_variant_node.add_export_labels(
                        [node_types.SEQUENCE_VARIANT])
                    edge = self.create_edge(
                        variant_node,
                        new_variant_node,
                        'ensembl.sequence_variant_to_sequence_variant',
                        dbsnp_curie,
                        self.var_to_var_predicate,
                        url=query_url,
                        properties=props)
                    return_results.append((edge, new_variant_node))
                    # new_rsid_node = None
                    # is_new_dbsnp = False
                    # synonyms = self.cache.get(f'synonymize({new_variant_curie})')
                    # if synonyms is None:
                    #     new_rsid_node = KNode(new_variant_curie, name=f'{new_variant_id}', type=node_types.SEQUENCE_VARIANT)
                    #     synonyms = self.clingen.get_synonyms_by_other_ids(new_rsid_node)
                    #     redis_pipe.set(f'synonymize({new_variant_curie})', pickle.dumps(synonyms))
                    #     is_new_dbsnp = True
                    # caid_count = 0
                    # caid_node = None
                    # for synonym in synonyms:
                    #     if Text.get_curie(synonym.identifier) == 'CAID':
                    #         caid_count += 1
                    #         caid_node = KNode(synonym.identifier, name=f'{synonym.label}', type=node_types.SEQUENCE_VARIANT)
                    #         edge = self.create_edge(variant_node, caid_node, 'ensembl.sequence_variant_to_sequence_variant', dbsnp_curie, self.var_to_var_predicate, url=query_url, properties=props)
                    #         return_results.append((edge, caid_node))
                    #         found_caid = True
                    # if caid_count > 2 we can't cache it easily right now so we skip it and let synonymizer do it later
                    # if caid_count == 1 and is_new_dbsnp:
                    # assume we didn't cache the CAID yet if the dbsnp is new and do it if needed
                    # if self.cache.get(f'synonymize({caid_node.id})') is None:
                    #     redis_pipe.set(f'synonymize({caid_node.id})',  pickle.dumps(synonyms))
                    # elif caid_count == 0:
                    #     if not new_rsid_node:
                    #         new_rsid_node = KNode(new_variant_curie, name=f'{new_variant_id}', type=node_types.SEQUENCE_VARIANT)
                    #     edge = self.create_edge(variant_node, new_rsid_node, 'ensembl.sequence_variant_to_sequence_variant', dbsnp_curie, self.var_to_var_predicate, url=query_url, properties=props)
                    #     return_results.append((edge, new_rsid_node))

                #elif query_response.status_code == 429:
                #   handle the rate limiting by waiting and retrying
                #
            else:
                logger.error(
                    f'Ensembl returned a non-200 response for {variant_node.id}: {query_response.status_code})'
                )
            # redis_pipe.execute()

        return return_results
def test_metabolite_to_disease(hmdb):
    hete = KNode('HMDB:HMDB0011134', type=node_types.CHEMICAL_SUBSTANCE)
    results = hmdb.metabolite_to_disease(hete)
    assert len(results) > 0
    node_labels = [node.name for edge, node in results]
    assert 'Asthma' in node_labels
예제 #26
0
 def process_associations(self,
                          associations,
                          relationship_id,
                          function,
                          target_node_type,
                          input_identifier,
                          url,
                          input_node,
                          reverse=False):
     """Given a response from biolink, create our edge and node structures.
     Sometimes (as in pathway->Genes) biolink returns the query as the object, rather
     than the subject.  reverse=True will handle this case, bringing back the subject
     of the response, rather than the object.  Fortunately, it looks like this is just per-function.
     We could instead try to see if the subject id matched our input id, etc... if the same
     function sometimes spun things around."""
     edge_nodes = []
     for association in associations:
         # We would like to include edges that are direct links, if we have entity A we've queried for we also get other subjects that have (New_subject)-is_a-> A and relations returned for those,
         # so we end up having direct relations of subclasses  being pushed up to parent classes, so check to see if subject is actually the one we asked for
         if association['subject']['id'] != input_node.id:
             continue
         pubs = []
         if 'publications' in association and association[
                 'publications'] is not None:
             for pub in association['publications']:
                 # Sometimes, we get back something like "uniprotkb" instead of a PMID.  We don't want it.
                 pubid_prefix = pub['id'][:4].upper()
                 if pubid_prefix == 'PMID':
                     # Sometimes, there is something like: 'id': 'PMID:9557891PMID:9557891' !?
                     # Oh, and even better, sometimes there is this: 'id': 'PMID:12687501:PMID:17918734'
                     # I will refrain from cursing in code.
                     ids = pub['id'].split('PMID:')
                     for n in ids[1:]:
                         while n.endswith(':'):
                             n = n[:-1]
                         pubs.append(f'PMID:{n}')
         inverse = False
         if 'relation' in association:
             inverse = association['relation'].get('inverse', False)
         if reverse or inverse:
             source_node = KNode(association['object']['id'],
                                 type=target_node_type,
                                 name=association['object']['label'])
             target_node = input_node
             newnode = source_node
         else:
             target_node = KNode(association['object']['id'],
                                 type=target_node_type,
                                 name=association['object']['label'])
             source_node = input_node
             newnode = target_node
         #Deal with biolink's occasional propensity to return Null relations
         # This basically happens only with the gene_get_function call, so if that gets fixed, we might be
         # able to make this a little nicer
         predicate_id = association['relation']['id']
         if (predicate_id is None):
             predicate_id = relationship_id
         elif (':' not in predicate_id):
             if predicate_id in self.label2id:
                 predicate_id = self.label2id[predicate_id]
             else:
                 logging.getLogger('application').error(
                     f'Relationship Missing: {predicate_id}')
                 predicate_id = relationship_id
         predicate_label = association['relation']['label']
         #now back to the show
         predicate = LabeledID(identifier=predicate_id,
                               label=predicate_label)
         try:
             edge = self.create_edge(source_node,
                                     target_node,
                                     f'biolink.{function}',
                                     input_identifier,
                                     predicate,
                                     publications=pubs,
                                     url=url)
         except Exception as e:
             print(e)
             print(association['publications'])
             print(pubs)
             raise e
         edge_nodes.append((edge, newnode))
     return edge_nodes
def test_metabolite_to_pathway(hmdb):
    hete = KNode('HMDB:HMDB0011134', type=node_types.CHEMICAL_SUBSTANCE)
    results = hmdb.metabolite_to_pathway(hete)
    assert len(results) > 0
    node_ids = [node.id for edge, node in results]
    assert 'SMPDB:SMP00710' in node_ids
def test_is_genetic_diabetes_genetic(mondo2):
    rgd = KNode('MONDO:0015967',
                name='rare genetic disease',
                type=node_types.DISEASE)
    assert mondo2.is_genetic_disease(rgd)
예제 #29
0
 def graph_uniprot_to_hgnc(self, uniprot_symbol):
     result = self.uniprot_to_hgnc(uniprot_symbol)
     return [(self.get_edge(r, predicate='synonym'),
              KNode('HGNC:{0}'.format(r['hgncID'].split(':')[-1]),
                    node_types.GENE)) for r in result]
예제 #30
0
def test():
    m = Mondo(ServiceContext.create_context())
    huntington = KNode('OMIM:143100', node_types.DISEASE)
    print(m.is_genetic_disease(huntington))
    print('------')