コード例 #1
0
 def get_anatomy_parts(self, anatomy_identifier):
     """Given an UBERON id, find other UBERONS that are parts of the query"""
     if anatomy_identifier.startswith('http'):
         anatomy_identifier = Text.obo_to_curie(anatomy_identifier)
     text = """
     prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
     prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_>
     prefix BFO: <http://purl.obolibrary.org/obo/BFO_>
     select distinct ?part ?partlabel
     from <http://reasoner.renci.org/nonredundant> 
     from <http://example.org/uberon-hp-cl.ttl>
     where {
             $anatomy_id BFO:0000051 ?part .
             graph <http://reasoner.renci.org/redundant> {
               ?part rdfs:subClassOf UBERON:0001062 .
             }
             ?part rdfs:label ?partlabel .
     }
     """
     results = self.triplestore.query_template(
         inputs  = { 'anatomy_id': anatomy_identifier }, \
         outputs = [ 'part', 'partlabel' ], \
         template_text = text \
     )
     for result in results:
         result['curie'] = Text.obo_to_curie(result['part'])
     return results
コード例 #2
0
def build_exact_sets(o,u):
    sets = []
    mids = o.get_ids()
    print(len(mids))
    n = 0
    now = dt.now()
    for mid in mids:
        if n % 100 == 0 and n > 0:
            later = dt.now()
            delt = (later-now).seconds
            f = n / len(mids)
            print(f'{n}/{len(mids)} = {f} in {delt} s')
            print(f'  estimated time remaining = {delt * (1-f)/(f)}')
        #FWIW, ICD codes tend to be mapped to multiple MONDO identifiers, leading to mass confusion. So we
        #just excise them here.  It's possible that we'll want to revisit this decision in the future.  If so,
        #then we probably will want to set a 'glommable' and 'not glommable' set.
        print(mid)
        dbx = [ Text.upper_curie(x) for x in o.get_exact_matches(mid) ]
        print(dbx)
        dbx = set( filter( lambda x: not x.startswith('ICD'), dbx ) )
        label = u.get_label(mid)
        print(label)
        mid = Text.upper_curie(mid)
        dbx.add(LabeledID(mid,label))
        sets.append(dbx)
        n += 1
    return sets
コード例 #3
0
 def term_to_term(self,node_a,node_b,limit = 10000):
     """Given two diseases, check the co-occurrence """
     icd9_a = list(filter( lambda x: x.startswith('ICD9'), node_a.synonyms ) )
     icd9_b = list(filter( lambda x: x.startswith('ICD9'), node_b.synonyms ) )
     if (len(icd9_a) == 0)  or (len(icd9_b) == 0):
         #can't do co-occurence unless we get icd9 codes
         return
     co_occurrences = []
     for icd9a_curie in icd9_a:
         icd9a = Text.un_curie(icd9a_curie)
         if icd9a not in self.icd9_codes:
             logging.getLogger('application').debug('Dont have data for {}'.format(icd9a))
             continue
         for icd9b_curie in icd9_b:
             icd9b = Text.un_curie(icd9b_curie)
             if icd9b not in self.icd9_codes:
                 logging.getLogger('application').debug('Dont have data for {}'.format(icd9b))
                 continue
             #Now we have nodes that both have ICD9 codees and the both map to our results!
             k = (icd9a, icd9b)
             if k not in self.icd9_paircounts:
                 #There were less than 11 shared counts.
                 counta = self.icd9_codes[icd9a]
                 countb = self.icd9_codes[icd9b]
                 expected = float(counta) * float(countb) / self.total
                 co_occurrences.append( (k, {'c1': counta, 'c2': countb, 'c': '<11', 'e': expected, 'p':None}) )
             else:
                 co_occurrences.append( (k, self.icd9_paircounts[k] ) )
     if len(co_occurrences) > 0:
         return self.make_edge(co_occurrences, node_a, node_b)
     return None
コード例 #4
0
 def gene_to_drug_expanded(self, gene_node):
     output = []
     identifiers = gene_node.get_synonyms_by_prefix('NCBIGENE')
     for identifier in identifiers:
         unique = set()
         geneid = Text.un_curie(identifier)
         url = f"{self.url}CTD_chem_gene_expanded_geneID/ncbigene:{geneid}/"
         obj = requests.get (url).json ()
         for r in obj:
             good_row, predicate_label, props, pmids = self.check_expanded_gene_chemical_row(r)
             if not good_row:
                 continue
             predicate = self.normalize_predicate(
                 LabeledID(identifier=f"CTD:{Text.snakify(predicate_label)}", label=predicate_label)
             )
             #Should this be substance?
             drug_node = KNode(Text.upper_curie(r['chemicalID']), type=node_types.CHEMICAL_SUBSTANCE, name=r['chem_label'])
             direction = r['direction']
             if direction == '->':
                 subject = drug_node
                 object = gene_node
             else:
                 subject = gene_node
                 object = drug_node
             edge = self.create_edge(subject,object,'ctd.gene_to_drug_expanded',identifier,predicate,properties = props,url=url,publications=pmids)
             #This is what we'd like it to be, but right now there's not enough real specificity on the predicates
             #key = (drug_node.id, edge.standard_predicate.label)
             key = (drug_node.id, edge.original_predicate.label)
             if key not in unique:
                 output.append( (edge,drug_node) )
                 unique.add(key)
     return output
コード例 #5
0
 def term_get_ancestors(self, node_type, root_iri):
     results = self.triplestore.query_template(
         template_text=self.query,
         inputs={'root_uri': root_iri},
         outputs=['parent_id', 'parent_label', 'child_id', 'child_label'])
     print('found total ', len(results), ' results.')
     nodes = set()
     edges = set()
     for index, row in enumerate(results):
         # Output type would be same as input type?
         ancestor_node = KNode(Text.obo_to_curie(row['parent_id']),
                               name=row['parent_label'],
                               type=node_type)
         child_node = KNode(Text.obo_to_curie(row['child_id']),
                            name=row['child_label'],
                            type=node_type)
         if ancestor_node.id == child_node.id:
             # refrain from adding edge to the node itself
             continue
         predicate = LabeledID(identifier='rdfs:subClassOf',
                               label='subclass of')
         edge = self.create_edge(
             source_node=child_node,
             target_node=ancestor_node,
             predicate=predicate,
             provided_by='uberongraph.term_get_ancestors',
             input_id=child_node.id)
         nodes.add(child_node)
         nodes.add(ancestor_node)
         edges.add(edge)
     return nodes, edges
コード例 #6
0
 def normalize(self, node):
     """Given a node, which will have many potential identifiers, choose the best identifier to be the node ID,
     where 'best' is defined by the order in which identifiers appear in the id prefix configurations within the concept model."""
     #If we have two synonyms with the same id, but one has no label, chuck it
     smap = defaultdict(list)
     for labeledid in node.synonyms:
         smap[labeledid.identifier].append(labeledid.label)
     for lid, labels in smap.items():
         if len(labels) > 1 and (None in labels):
             node.synonyms.remove(LabeledID(identifier=lid, label=None))
         if len(labels) > 1 and ('' in labels):
             node.synonyms.remove(LabeledID(identifier=lid, label=''))
     #Now find the bset one for an id
     type_curies = self.concepts.get(node.type).id_prefixes
     #Now start looking for the best curies
     synonyms_by_curie = defaultdict(list)
     for s in node.synonyms:
         c = Text.get_curie(s.identifier)
         synonyms_by_curie[c].append(s)
     for type_curie in type_curies:
         potential_identifiers = synonyms_by_curie[type_curie]
         if len(potential_identifiers) > 0:
             if len(potential_identifiers) > 1:
                 pis = [
                     f'{pi.identifier}({pi.label})'
                     for pi in potential_identifiers
                 ]
                 ids_with_labels = list(
                     filter(lambda x: x.label is not None,
                            potential_identifiers))
                 if len(ids_with_labels) > 0:
                     potential_identifiers = ids_with_labels
                 potential_identifiers.sort()
             node.id = potential_identifiers[0].identifier
             #Only replace the label if we have a label.
             if potential_identifiers[0].label != '':
                 node.name = potential_identifiers[0].label
             break
     #Remove any synonyms with extraneous prefixes.  The point of this is not so much to remove
     # unknown prefixes, as to make sure that if we got e.g. a meddra, and we downcast it to a disease,
     # that we don't end up with HP's in the equivalent ids.
     bad_synonyms = set()
     for synonym in node.synonyms:
         if isinstance(synonym, LabeledID):
             prefix = Text.get_curie(synonym.identifier)
         else:
             prefix = Text.get_curie(synonym)
         if prefix not in type_curies:
             bad_synonyms.add(synonym)
     for bs in bad_synonyms:
         node.synonyms.remove(bs)
     if node.id.startswith('DOID'):
         logger.warn("We are ending up with a DOID here")
         logger.warn(node.id)
         logger.warn(node.synonyms)
         logger.warn(node.type)
コード例 #7
0
 def graph_drugname_to_gene_symbol(self, drug_name_node):
     drug_name = Text.un_curie(drug_name_node.identifier)
     response = self.drug_name_to_gene_symbol(drug_name)
     results = []
     for r in response:
         edge = self.get_edge(r, predicate="targets")
         node = KNode("UNIPROT:{0}".format(Text.path_last(r['uniprotSym'])),
                      node_types.GENE)
         results.append((edge, node))
     return results
コード例 #8
0
 def graph_name_to_drugbank(self, drug_name_node):
     drug_name = Text.un_curie(drug_name_node.identifier)
     response = self.drug_name_to_gene_symbol(drug_name)
     results = []
     for r in response:
         edge = self.get_edge(r, predicate="drugname")
         node = KNode ("DRUGBANK:{0}".format (Text.path_last (r['drugID'])), \
                       node_types.DRUG, \
                       label=r['drugName'])
         results.append((edge, node))
     return results
コード例 #9
0
def build_sets(o, ignore_list = ['ICD']):
    sets = []
    mids = o.get_ids()
    for mid in mids:
        #FWIW, ICD codes tend to be mapped to multiple MONDO identifiers, leading to mass confusion. So we
        #just excise them here.  It's possible that we'll want to revisit this decision in the future.  If so,
        #then we probably will want to set a 'glommable' and 'not glommable' set.
        dbx = set([Text.upper_curie(x) for x in o.get_xrefs(mid) if not reduce(lambda accumlator, ignore_prefix: accumlator or x.startswith(ignore_prefix) , ignore_list, False)])
        dbx = set([norm(x) for x in dbx])
        label = o.get_label(mid)
        mid = Text.upper_curie(mid)
        dbx.add(LabeledID(mid,label))
        sets.append(dbx)
    return sets
コード例 #10
0
def synonymize(node,gt):
    if not node.type == node_types.GENE:
        raise Exception("Incorrect node type")
    if Text.get_curie(node.id).upper() == 'UNIPROTKB':
        new_ids = gt.uniprot.get_synonyms(node.id)
        if len(new_ids) > 0:
            labeled_ids = [ LabeledID(identifier=h, label='') for h in new_ids ]
            node.synonyms.update(labeled_ids)
            node.id = new_ids[0]
    if Text.get_curie(node.id).upper() != 'UNIPROTKB':
        g_synonyms = gt.hgnc.get_synonyms(node.id)
    else:
        g_synonyms = set()
    return g_synonyms
コード例 #11
0
 def add_chemotext_terms(self,nodes):
     """For each mesh term in a node, find out what chemotext calls that thing so we can query for it"""
     logging.getLogger('application').debug('{} nodes'.format(len(nodes) ))
     for node in nodes:
         logging.getLogger('application').debug('node: {}'.format(node.id) )
         mesh_identifiers = list( filter( lambda x: Text.get_curie(x)=='MESH', node.synonyms))
         for mesh_id in mesh_identifiers:
             logging.getLogger('application').debug('  mesh_id: {}'.format(mesh_id) )
             bare_id = Text.un_curie(mesh_id)
             cterm = self.ctext.get_chemotext_term_from_meshid( bare_id )
             if cterm is None:
                 logging.getLogger('application').warn("  Cannot find chemotext synonym for %s (%s) %s" % (bare_id,mesh_id,node.id))
             else:
                 logging.getLogger('application').debug('  node: {}, label: {}, chemotext: {}'.format(node.id, bare_id, cterm) )
                 self.identifier_to_label[node.id].append(cterm)
コード例 #12
0
 def gene_get_drug(self, gene_node):
     """ Get a drug from a gene. """
     resolved_edge_nodes = []
     identifiers = gene_node.get_synonyms_by_prefix('UNIPROTKB')
     for s in identifiers:
         try:
             logger.debug(f'Call with {s}')
             pharosid = Text.un_curie(s)
             original_edge_nodes = []
             url = 'https://pharos.nih.gov/idg/api/v1/targets(%s)?view=full' % pharosid
             r = requests.get(url)
             try:
                 result = r.json()
                 logger.debug('back')
             except:
                 #If pharos doesn't know the identifier, it just 404s.  move to the next
                 logger.debug('404')
                 continue 
             actions = set()  # for testing
             predicate = LabeledID(identifier='PHAROS:drug_targets', label='is_target')
             chembl_id = None
             for link in result['links']:
                 if link['kind'] == 'ix.idg.models.Ligand':
                     pharos_drug_id = link['refid']
                     chembl_id, label = self.drugid_to_identifiers(pharos_drug_id)
                     if chembl_id is not None:
                         drug_node = KNode(chembl_id, type=node_types.CHEMICAL_SUBSTANCE, name=label)
                         edge = self.create_edge(drug_node,gene_node, 'pharos.gene_get_drug',
                                 pharosid,predicate, url=url)
                         resolved_edge_nodes.append( (edge,drug_node) )
         except:
             logger.debug("Error encountered calling pharos with",s)
         logger.debug('ok')
     return resolved_edge_nodes
コード例 #13
0
ファイル: chembio.py プロジェクト: patlsc/robokop-interfaces
 def graph_drugbank_to_uniprot(self, drugbank):
     response = self.triplestore.query_template(inputs={
         "drugID":
         "DB{0}".format(Text.un_curie(drugbank.identifier))
     },
                                                outputs=["uniprotGeneID"],
                                                template_text="""
         prefix drugbank:      <http://chem2bio2rdf.org/drugbank/resource/>
         prefix drugbank_drug: <http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/>
         prefix ctd:           <http://chem2bio2rdf.org/ctd/resource/>
         select distinct ?uniprotGeneID where {
            values ( ?drugID ) { ( drugbank_drug:${drugID} ) }
            ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                         drugbank:gene               ?uniprotGeneID .
            ?drugID      drugbank:CID                ?pubchemCID ;
                         drugbank:Generic_Name       ?drugGenericName .
            ?ctd_disease ctd:diseaseid               ?diseaseID ;
                         ctd:cid                     ?pubchemCID .
         }""")
     predicate = LabeledID(identifier='SIO:001257',
                           label='chemical to gene association')
     results = []
     for r in response:
         node = KNode("UNIPROT:{0}".format(
             r['uniprotGeneID'].split('/')[-1:][0]),
                      type=node_types.GENE)
         edge = self.create_edge(drugbank, node,
                                 'chembio.graph_drugbank_to_uniprot',
                                 predicate, drugbank.id)
         results.append(edge, node)
     return results
コード例 #14
0
 def disease_get_gene(self, disease_node):
     """ Get a gene from a pharos disease id."""
     resolved_edge_nodes = []
     hgncs = set()
     # WD:P2293 gene assoc with condition.
     # domain is gene and range is disease or phenotype for this relationship
     predicate = LabeledID(identifier='WD:P2293', label='gene_involved')
     #Pharos contains multiple kinds of disease identifiers in its disease table:
     # For OMIM identifiers, they can have either prefix OMIM or MIM
     # UMLS doen't have any prefixes.... :(
     pharos_predicates = {'DOID':('DOID',),'UMLS':(None,),'MESH':('MESH',),'OMIM':('OMIM','MIM'),'ORPHANET':('Orphanet',)}
     for ppred,dbpreds in pharos_predicates.items():
         pharos_candidates = [Text.un_curie(x) for x in disease_node.get_synonyms_by_prefix(ppred)]
         for dbpred in dbpreds:
             if dbpred is None:
                 pharos_ids = pharos_candidates
             else:
                 pharos_ids = [f'{dbpred}:{x}' for x in pharos_candidates]
                 for pharos_id in pharos_ids:
                     cursor = self.db.cursor(dictionary = True, buffered = True)
                     query = f"select distinct x.value, p.sym  from disease d join xref x on x.protein_id = d.target_id join protein p on d.target_id = p.id where x.xtype = 'HGNC' and d.dtype <> 'Expression Atlas' and d.did='{pharos_id}';"
                     cursor.execute(query)
                     for result in cursor:
                         label = result['sym']
                         hgnc = result['value']
                         if hgnc not in hgncs:
                             hgncs.add(hgnc)
                             gene_node = KNode(hgnc, type=node_types.GENE, name=label)
                             edge = self.create_edge(gene_node, disease_node, 'pharos.disease_get_gene', pharos_id, predicate)
                             resolved_edge_nodes.append((edge, gene_node))
     return resolved_edge_nodes
コード例 #15
0
ファイル: hetio.py プロジェクト: stevencox/robokop-interfaces
 def gene_to_disease(self, gene):
     if not Text.get_curie(
             gene.identifier) in ['HGNC', 'UNIPROT', 'PHAROS']:
         return []
     result = self.query(
         "MATCH (d:Disease)-[a1]-(g:Gene) WHERE g.name='{0}' RETURN a1,d".
         format(Text.un_curie(gene.identifier)),
         labels=['Disease'])
     #        result = self.nodes_and_edges (result)
     for r in result:
         print(r)
         print(result)
         print(type(result))
     #print ("-------------------> {}".format (json.dumps (result, indent=2)))
     return [(self.get_edge({'res': r}, predicate='affects'),
              KNode(r['identifier'], node_types.DISEASE)) for r in result]
コード例 #16
0
    def drug_get_gene(self, subject):
        """ Get a gene from a pharos disease id. """
        pharosid = Text.un_curie (subject.identifier)
        original_edge_nodes=[]
        r = requests.get('https://pharos.nih.gov/idg/api/v1/ligands(%s)?view=full' % pharosid)
        result = r.json()
        resolved_edge_nodes = []
        actions = set() #for testing
        for link in result['links']:
            if link['kind'] == 'ix.idg.models.Target':
                pharos_target_id = int(link['refid'])
                edge_properties = {}
                for prop in link['properties']:
                    if prop['label'] == 'Pharmalogical Action': #!
                        actions.add(prop['term'] ) 
                pharos_edge = KEdge( 'pharos', 'drug_get_gene', {'properties': link['properties']} )               
                #Pharos returns target ids in its own numbering system. Collect other names for it.
                hgnc = self.target_to_hgnc (pharos_target_id)
                if hgnc is not None:
                    hgnc_node = KNode (hgnc, node_types.GENE)
                    resolved_edge_nodes.append( (pharos_edge, hgnc_node) )
                else:
                    logging.getLogger('application').warn('Did not get HGNC for pharosID %d' % pharos_target_id)
#        for a in actions:
#            print ('Action: {}'.format(a) ) 
        return resolved_edge_nodes
コード例 #17
0
 def gene_to_drug(self, gene_node):
     output = []
     identifiers = gene_node.get_synonyms_by_prefix('NCBIGENE')
     for identifier in identifiers:
         unique = set()
         geneid = Text.un_curie(identifier)
         url = f"{self.url}/CTD_chem_gene_ixns_GeneID/{geneid}/"
         obj = requests.get (url).json ()
         for r in obj:
             if r['GeneID'] != geneid:
                 continue
             good_row, predicate_label, props = self.check_gene_chemical_row(r)
             if not good_row:
                 continue
             predicate = self.normalize_predicate(
                 LabeledID(identifier=f'CTD:{predicate_label}', label=predicate_label)
             )
             #Should this be substance?
             drug_node = KNode(f"MESH:{r['ChemicalID']}", type=node_types.CHEMICAL_SUBSTANCE, name=f"{r['ChemicalName']}")
             if sum([s in predicate.identifier for s in self.g2d_strings]) > 0:
                 subject = gene_node
                 obj = drug_node
             else:
                 subject = drug_node
                 obj = gene_node
             edge = self.create_edge(subject,obj,'ctd.gene_to_drug',identifier,predicate,
                                     publications=[f"PMID:{x}" for x in r['PubMedIDs'].split('|') ],url=url,properties=props)
             #This is what we'd like it to be, but right now there's not enough real specificity on the predicates
             #key = (drug_node.id, edge.standard_predicate.label)
             key = (drug_node.id, edge.original_predicate.label)
             if key not in unique:
                 output.append( (edge,drug_node) )
                 unique.add(key)
     return output
コード例 #18
0
 def go_term_to_cell_xontology_relationships(self, go_node):
     #This call is not paged!
     url = "{0}/QuickGO/services/ontology/go/terms/GO:{1}/xontologyrelations".format(
         self.url, Text.un_curie(go_node.id))
     response = self.query(url)
     if 'results' not in response:
         return []
     results = []
     for r in response['results']:
         if 'xRelations' in r:
             for xrel in r['xRelations']:
                 if xrel['id'].startswith('CL:'):
                     predicate = self.get_predicate(xrel['relation'])
                     if predicate is None:
                         continue
                     cell_node = KNode(xrel['id'],
                                       type=node_types.CELL,
                                       name=xrel['term'])
                     edge = self.create_edge(
                         go_node,
                         cell_node,
                         'quickgo.go_term_to_cell_xontology_relationships',
                         go_node.id,
                         predicate,
                         url=url)
                     results.append((edge, cell_node))
     return results
コード例 #19
0
    def __init__(self, *args, **kwargs):
        self.id = None
        self.name = None
        self.type = None
        self.original_curie = None
        self.properties = {}

        if args and len(args) == 1 and isinstance(args[0], str):
            self.id = args[0]
            args = []
        # TODO: Currently hack to only utilize the 1st curie in a list if multiple curies provided
        elif args and len(args) == 1 and isinstance(
                args[0], list) and isinstance(args[0][0], str):
            self.id = args[0][0]
            args = []

        super().__init__(*args, **kwargs)

        # Another hack to keep things running.
        if isinstance(self.name, list):
            self.name = self.name[0]

        if self.id.startswith('http'):
            self.id = Text.obo_to_curie(self.id)

        #Synonyms is just for CURIEs
        self.synonyms = set()
        self.synonyms.add(LabeledID(identifier=self.id, label=self.name))

        #List of labels to attach to exports
        self.export_labels = []
コード例 #20
0
 async def get_kegg_data(self, kegg_id):
     conf = self.get_prefix_config('KEGG.COMPOUND')
     kegg_c_id = Text.un_curie(kegg_id)
     url = conf['url'] + kegg_c_id 
     response = await self.async_get_text(url)
     kegg_dict = self.parse_flat_file_to_dict(response)
     return self.extract_kegg_data(kegg_dict, conf['keys'])
コード例 #21
0
 async def get_pubchem_data(self, pubchem_id, retries = 0):
     """
     Gets pubchem annotations.
     """ 
     conf = self.get_prefix_config('PUBCHEM')
     url = conf['url'] + pubchem_id.split(':')[-1]
     headers = {
         'Accept': 'application/json'
     }
     result = await self.async_get_raw_response(url, headers= headers)
     # async with result as result_json:
     result_json = result['json']
     # pubmed api blocks if too many req are sent
     throttle = result['headers']['X-Throttling-Control']
     throttle_warnings = { Text.snakify(value.split(':')[0].lower()) : value.split(':')[1] for value in throttle.split(',') if ':' in value }
     if 'Yellow' in throttle_warnings['request_time_status'] or 'Yellow' in throttle_warnings['request_count_status']:
         logger.warn('Pubchem requests reached Yellow')
         await asyncio.sleep(0.5) 
     if 'Red' in throttle_warnings['request_time_status'] or 'Red' in throttle_warnings['request_count_status']:
         logger.warn('Pubchem requests reached RED')
         await asyncio.sleep(2)
     if 'Black' in throttle_warnings['request_time_status'] or 'Black' in throttle_warnings['request_count_status']:
         sleep_sec = 3 * ( retries + 1 ) # 
         logger.error(f'Pubchem request blocked, sleeping {sleep_sec} seconds, no of retries {retries}')
         await asyncio.sleep(sleep_sec)
         # repeat call if retries has changed till 3 
         if retries < 3:
             return await self.get_pubchem_data(pubchem_id, retries + 1)
         else:
             # exceeding retries return {}
             logger.warn(f'retry limit exceed for {pubchem_id} , returning empty')
             return {}
     return self.extract_pubchem_data(result_json, conf['keys'])
コード例 #22
0
 async def get_chemical_roles(self, chebi_id):
     """
     Gets all the roles assigned to a chebi id. Should return along result along chebi_id,
     useful when making bulk request concurrently to keep track.
     """
     text = """
     PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
     PREFIX has_role: <http://purl.obolibrary.org/obo/RO_0000087>
     PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431>
     PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
     SELECT DISTINCT ?role_label
     from <http://reasoner.renci.org/ontology>
     from <http://reasoner.renci.org/redundant>
     where {
         $chebi_id has_role: ?role.
         ?role rdfs:label ?role_label.
         GRAPH <http://reasoner.renci.org/ontology/closure> {
             ?role rdfs:subClassOf CHEBI:50906.
         }
     }
     """
     query_result = await self.tripleStore.async_query_template(
         inputs = {'chebi_id': chebi_id},
         outputs = [ 'role_label' ],
         template_text = text
     )        
     for r in query_result:
         r['role_label'] = Text.snakify(r['role_label'])
     return {chebi_id: query_result}
コード例 #23
0
 def drug_get_gene(self, subject):
     """ Get a gene from a drug. """
     resolved_edge_nodes = []
     identifiers = subject.get_synonyms_by_prefix('CHEMBL.COMPOUND')
     for s in identifiers:
         pharosid = Text.un_curie(s)
         original_edge_nodes = []
         url = 'https://pharos.nih.gov/idg/api/v1/ligands(%s)?view=full' % pharosid
         r = requests.get(url)
         try: 
             result = r.json()
         except:
             #Pharos returns a 404 if it doesn't recognize the identifier, which ends up producing
             # errors in turning into json. Skip to next identifier
             continue
         actions = set()  # for testing
         predicate = LabeledID(identifier='PHAROS:drug_targets', label='is_target')
         for link in result['links']:
             if link['kind'] == 'ix.idg.models.Target':
                 pharos_target_id = int(link['refid'])
                 hgnc = self.target_to_hgnc(pharos_target_id)
                 if hgnc is not None:
                     hgnc_node = KNode(hgnc, type=node_types.GENE)
                     edge = self.create_edge(subject,hgnc_node,'pharos.drug_get_gene',pharosid,predicate,url=url)
                     resolved_edge_nodes.append((edge, hgnc_node))
                 else:
                     logging.getLogger('application').warn('Did not get HGNC for pharosID %d' % pharos_target_id)
     return resolved_edge_nodes
コード例 #24
0
 def drug_to_gene_expanded(self, drug):
     output = []
     identifiers = drug.get_synonyms_by_prefix('MESH')
     for identifier in identifiers:
         url=f"{self.url}CTD_chem_gene_expanded_chemicalID/mesh:{Text.un_curie(identifier)}/"
         result = requests.get(url)
         obj=result.json()
         for r in obj:
             good_row, predicate_label, props, pmids = self.check_expanded_gene_chemical_row(r)
             if not good_row:
                 continue
             predicate = self.normalize_predicate(
                 LabeledID(identifier=f"CTD:{Text.snakify(predicate_label)}", label=predicate_label)
             )
             gene_node = KNode(Text.upper_curie(r['geneID']), name=r['gene_label'],type=node_types.GENE)
             direction = r['direction']
             if direction == '->':
                 subject = drug
                 object = gene_node
             else:
                 subject = gene_node
                 object = drug
             edge = self.create_edge(subject,object,'ctd.drug_to_gene_expanded',identifier,predicate,publications=pmids,properties=props,url=url )
             output.append( (edge,gene_node) )
     return output
コード例 #25
0
    def map_concept_types(self, thing, object_type=None):
        """ Expand high level concepts into concrete types our data sources understand. """

        # Try the CURIE approach.
        the_type = self.guess_type(
            thing.identifier) if thing and thing.identifier else None

        # If that didn't work, get candiddate types based on the (abstract) node type.
        if thing and not the_type:
            the_type = self.concepts.get(thing.node_type, None)
            if the_type:
                # Attempt to map them down to IRIs
                the_type = [self.vocab.get(t, t) for t in the_type]

        # Systematize this:
        # If the concept type is disease but the curie is NAME, we don't have a DOID.
        if isinstance(the_type, str):
            # If we've ended up with just one string, make it a list for conformity of return type
            the_type = [the_type]

        result = the_type if the_type else self.concepts.get(
            object_type, [object_type])

        curie = Text.get_curie(thing.identifier) if thing else None
        if curie:
            result = [self.make_up_curie(curie)]  #[ self.vocab[curie] ]
            #result = [ self.vocab[curie] ]

        return result
コード例 #26
0
 async def get_inchikey_data(self, inchikey_id):
     conf = self.get_prefix_config('INCHIKEY')
     inchikey_c_id = Text.un_curie(inchikey_id) 
     url = conf['url'] + inchikey_c_id
     response = await self.async_get_text(url)
     inchikey_dict = self.parse_flat_file_to_dict(response)
     return self.extract_inchikey_data(inchikey_dict, conf['keys'])
コード例 #27
0
 def execute(self):
     """Execute the query that defines the graph"""
     self.logger.debug('Executing Query')
     #GreenT wants a cypherquery to find transitions, and a starting point
     cyphers  = self.userquery.generate_cypher()
     starts   = self.userquery.get_start_node()
     reverses = self.userquery.get_reversed()
     lookups  = self.userquery.get_lookups()
     for cypher, start, reverse,lookup in zip(cyphers,starts,reverses,lookups):
         input_name = Text.un_curie(lookup.identifier)
         self.logger.debug(start)
         self.logger.debug('CYPHER')
         self.logger.debug(cypher)
         identifier, ntype = start
         start_node = KNode( identifier, ntype, label=input_name )
         kedge = KEdge( 'lookup', 'lookup' )
         kedge.source_node = lookup
         kedge.target_node = start_node
         self.add_nonsynonymous_edge( kedge )
         #Fire this to rosetta, collect the result
         result_graph = self.rosetta.graph([(None, start_node)],query=cypher)
         #result_graph contains duplicate edges.  Remove them, while preserving order:
         result_graph = list(OrderedDict.fromkeys( result_graph ) )
         self.add_edges( result_graph , reverse )
     self.logger.debug('Query Complete')
コード例 #28
0
 def go_term_to_cell_annotation_extensions(self, go_node):
     """This is playing a little fast and loose with the annotations.  Annotations relate a gene to a go term,
     and they can have an extension like occurs_in(celltype). Technically, that occurs_in only relates to that
     particular gene/go combination.  But it's the only way to traverse from neurotransmitter release to neurons 
     that is currently available"""
     url = '{0}/QuickGO/services/annotation/search?includeFields=goName&goId=GO:{1}&taxonId=9606&extension=occurs_in(CL)'.format(
         self.url, Text.un_curie(go_node.id))
     call_results = self.page_calls(url)
     cell_ids = set()
     results = []
     for r in call_results:
         for e in r['extensions']:
             for c in e['connectedXrefs']:
                 if c['db'] == 'CL':
                     if c['id'] not in cell_ids:
                         predicate = self.get_predicate(c['qualifier'])
                         if predicate is None:
                             continue
                         #Bummer, don't get a name
                         cell_node = KNode('CL:{}'.format(c['id']),
                                           type=node_types.CELL)
                         edge = self.create_edge(
                             go_node,
                             cell_node,
                             'quickgo.go_term_to_cell_annotation_extensions',
                             go_node.id,
                             predicate,
                             url=url)
                         results.append((edge, cell_node))
                         cell_ids.add(c['id'])
     return results
コード例 #29
0
 def chemical_get_enzyme(self,chemnode):
     """To get an enzyme from chemicals, we first look up the reactions for the chemical.
     Then we pull the reaction which gives us (1) the enzyme and (2) whether the chemical
     is a reactant or a product."""
     reactions = self.chemical_get_reaction(chemnode)
     chemids = set([Text.un_curie(x) for x in chemnode.get_synonyms_by_prefix('KEGG')])
     results = []
     for reaction_id in reactions:
         rxns = self.get_reaction(reaction_id)
         for rxn in rxns:
             if 'enzyme' in rxn:
                 for gene_id in rxn['enzyme']:
                     enzyme = KNode(gene_id, type=node_types.GENE)
                     if len(chemids.intersection(rxn['reactants'])) > 0:
                         predicate = LabeledID('CTD:increases_degradation_of', label='increases degradation of')
                         #predicate = LabeledID('RO:0002449','negatively regulates, entity to entity')
                         input_identifier = chemids.intersection(rxn['reactants']).pop()
                     elif len(chemids.intersection(rxn['products'])) > 0:
                         predicate = LabeledID('CTD:increases_synthesis_of', label='increases synthesis of')
                         #predicate = LabeledID('RO:0002450','positively regulates, entity to entity')
                         input_identifier = chemids.intersection(rxn['products']).pop()
                     else:
                         logger.error(f"Mismatch between query and answer: {rxn} {chemids}")
                         continue
                     edge = self.create_edge(enzyme, chemnode, f'kegg.chemical_get_enzyme',  input_identifier, predicate)
                     results.append( (edge, enzyme))
     return results
コード例 #30
0
    def get_transitions(self, graph, query):
        """ Execute a cypher query and walk the results to build a set of transitions to execute.
        The query should be such that it returns a path (node0-relation0-node1-relation1-node2), and
        an array of the relation start nodes.  For the path above, start nodes like (node0,node1) would
        indicate a unidirectional path, while (node0,node2) would indicate an end-based path meeting in
        the middle.
        Each node in the path can be described with an arbitrary node index.  Note that this index does not
        have to correspond to the order of calling or any structural property of the graph.  It simply points
        to a particular node in the call map.
        Returns:
            nodes: A map from a node index to the concept.
            transitions: a map from a node index to an (operation, output index) pair
        """
        with graph.driver.session() as session:
            result = session.run(query)
        plans = []
        for row in result:
            nodes = row['nodes']
            edges = row['edges']

            # extract transitions
            transitions = {
                node_id: {node_id: []
                          for node_id in nodes}
                for node_id in nodes
            }
            for e in edges:
                edge = edges[e]
                source_id = edge['source']
                target_id = edge['target']
                qedge = next(e2 for e2 in self.query_graph['edges']
                             if e2.id == e)
                qedge_type = qedge.type
                predicate = [
                    Text.snakify(e2type) for e2type in qedge_type
                ] if isinstance(
                    qedge_type, list) and qedge_type else Text.snakify(
                        qedge_type) if isinstance(qedge_type, str) else None
                trans = {
                    "op": edge['op'],
                    "link": edge['predicate'],
                    "predicate": predicate
                }
                transitions[source_id][target_id].append(trans)

            plans.append(transitions)
        return plans