コード例 #1
0
 async def get_pubchem_data(self, pubchem_id, retries = 0):
     """
     Gets pubchem annotations.
     """ 
     conf = self.get_prefix_config('PUBCHEM')
     url = conf['url'] + pubchem_id.split(':')[-1]
     headers = {
         'Accept': 'application/json'
     }
     result = await self.async_get_raw_response(url, headers= headers)
     # async with result as result_json:
     result_json = result['json']
     # pubmed api blocks if too many req are sent
     throttle = result['headers']['X-Throttling-Control']
     throttle_warnings = { Text.snakify(value.split(':')[0].lower()) : value.split(':')[1] for value in throttle.split(',') if ':' in value }
     if 'Yellow' in throttle_warnings['request_time_status'] or 'Yellow' in throttle_warnings['request_count_status']:
         logger.warn('Pubchem requests reached Yellow')
         await asyncio.sleep(0.5) 
     if 'Red' in throttle_warnings['request_time_status'] or 'Red' in throttle_warnings['request_count_status']:
         logger.warn('Pubchem requests reached RED')
         await asyncio.sleep(2)
     if 'Black' in throttle_warnings['request_time_status'] or 'Black' in throttle_warnings['request_count_status']:
         sleep_sec = 3 * ( retries + 1 ) # 
         logger.error(f'Pubchem request blocked, sleeping {sleep_sec} seconds, no of retries {retries}')
         await asyncio.sleep(sleep_sec)
         # repeat call if retries has changed till 3 
         if retries < 3:
             return await self.get_pubchem_data(pubchem_id, retries + 1)
         else:
             # exceeding retries return {}
             logger.warn(f'retry limit exceed for {pubchem_id} , returning empty')
             return {}
     return self.extract_pubchem_data(result_json, conf['keys'])
コード例 #2
0
 async def get_chemical_roles(self, chebi_id):
     """
     Gets all the roles assigned to a chebi id. Should return along result along chebi_id,
     useful when making bulk request concurrently to keep track.
     """
     text = """
     PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
     PREFIX has_role: <http://purl.obolibrary.org/obo/RO_0000087>
     PREFIX chemical_entity: <http://purl.obolibrary.org/obo/CHEBI_24431>
     PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
     SELECT DISTINCT ?role_label
     from <http://reasoner.renci.org/ontology>
     from <http://reasoner.renci.org/redundant>
     where {
         $chebi_id has_role: ?role.
         ?role rdfs:label ?role_label.
         GRAPH <http://reasoner.renci.org/ontology/closure> {
             ?role rdfs:subClassOf CHEBI:50906.
         }
     }
     """
     query_result = await self.tripleStore.async_query_template(
         inputs = {'chebi_id': chebi_id},
         outputs = [ 'role_label' ],
         template_text = text
     )        
     for r in query_result:
         r['role_label'] = Text.snakify(r['role_label'])
     return {chebi_id: query_result}
コード例 #3
0
    def get_transitions(self, graph, query):
        """ Execute a cypher query and walk the results to build a set of transitions to execute.
        The query should be such that it returns a path (node0-relation0-node1-relation1-node2), and
        an array of the relation start nodes.  For the path above, start nodes like (node0,node1) would
        indicate a unidirectional path, while (node0,node2) would indicate an end-based path meeting in
        the middle.
        Each node in the path can be described with an arbitrary node index.  Note that this index does not
        have to correspond to the order of calling or any structural property of the graph.  It simply points
        to a particular node in the call map.
        Returns:
            nodes: A map from a node index to the concept.
            transitions: a map from a node index to an (operation, output index) pair
        """
        with graph.driver.session() as session:
            result = session.run(query)
        plans = []
        for row in result:
            nodes = row['nodes']
            edges = row['edges']

            # extract transitions
            transitions = {
                node_id: {node_id: []
                          for node_id in nodes}
                for node_id in nodes
            }
            for e in edges:
                edge = edges[e]
                source_id = edge['source']
                target_id = edge['target']
                qedge = next(e2 for e2 in self.query_graph['edges']
                             if e2.id == e)
                qedge_type = qedge.type
                predicate = [
                    Text.snakify(e2type) for e2type in qedge_type
                ] if isinstance(
                    qedge_type, list) and qedge_type else Text.snakify(
                        qedge_type) if isinstance(qedge_type, str) else None
                trans = {
                    "op": edge['op'],
                    "link": edge['predicate'],
                    "predicate": predicate
                }
                transitions[source_id][target_id].append(trans)

            plans.append(transitions)
        return plans
コード例 #4
0
 def write_edge(self,edge):
     if edge in self.written_edges[edge.source_id][edge.target_id]:
         return
     self.written_edges[edge.source_id][edge.target_id].add(edge)
     label = Text.snakify(edge.standard_predicate.label)
     typed_edges = self.edge_queues[label]
     typed_edges.append(edge)
     if len(typed_edges) >= self.edge_buffer_size:
         self.flush()
コード例 #5
0
 async def get_mondo_properties(self, mondo_curie):
     """
     Gets the ascestors from onto and maps them to the ones we are intereseted in.
     """
     conf = self.get_prefix_config('MONDO')
     ancestors_url = conf['url'] + mondo_curie
     response = await self.async_get_json(ancestors_url)
     if 'superterms' not in response:
         return {}
     ancestors = response['superterms']
     properties = {
         Text.snakify(conf['keys'][x]): True
         for x in ancestors if x in conf['keys']
     }
     return properties
コード例 #6
0
 def grab_edge_props(self,result):
     if result['pred'] is not None and len(result['pred']) > 1:
         rel = Text.snakify(result['pred']).lower()
     else:
         rel = 'interacts_with'
     predicate = LabeledID(identifier=f'GAMMA:{rel}', label=rel)
     if 'pubmed_ids' in result and result['pubmed_ids'] is not None:
         pmids = [ f'PMID:{r}' for r in result['pubmed_ids'].split('|')]
     else:
         pmids = []
     props = {}
     if result['affinity'] is not None:
         props['affinity'] = float(result['affinity'])
         props['affinity_parameter'] = result['affinity_parameter']
     return predicate, pmids, props
コード例 #7
0
    def process_op(self, link, source_node, history):
        op_name = link['op']
        key = f"{op_name}({Text.upper_curie(source_node.id)})"
        maxtime = timedelta(minutes=2)
        try:
            try:
                results = self.rosetta.cache.get(key)
            except Exception as e:
                # logger.warning(e)
                results = None
            if results is not None:
                logger.debug(f"cache hit: {key} size:{len(results)}")
            else:
                logger.debug(f"exec op: {key}")
                op = self.rosetta.get_ops(op_name)
                start = dt.now()
                results = op(source_node)
                end = dt.now()
                logger.debug(f'Call {key} took {end-start}')
                if (end - start) > maxtime:
                    logger.warn(f"Call {key} exceeded {maxtime}")
                self.rosetta.cache.set(key, results)
                logger.debug(f"cache.set-> {key} length:{len(results)}")
                logger.debug(f"    {[node for _, node in results]}")
            results = list(
                filter(lambda x: x[1].id not in self.excluded_identifiers,
                       results))
            for edge, node in results:
                edge_label = Text.snakify(edge.original_predicate.label)
                if link['predicate'] is None or edge_label == link[
                        'predicate'] or (isinstance(link['predicate'], list)
                                         and
                                         (edge_label in link['predicate'])):
                    self.process_node(node, history, edge)
                else:
                    pass

        except pika.exceptions.ChannelClosed:
            traceback.print_exc()
            raise
        except Exception as e:
            traceback.print_exc()
            log_text = f"  -- {key}"
            logger.warning(f"Error invoking> {log_text}")
コード例 #8
0
 def make_edge(self,chem,gene,r,identifier,url):
     rel=Text.snakify(r['type']).lower()
     predicate = LabeledID(identifier=f'GAMMA:{rel}',label=rel)
     #if r['type'] == 'Agonist':
     #    predicate = LabeledID(identifier='CTD:increases_activity_of', label='increases activity of')
     #elif r['type'] in ['Antagonist','Channel blocker', 'Inhibitor', 'Gating inhibitor']:
     #    predicate = LabeledID(identifier='CTD:decreases_activity_of', label='decreases activity of')
     #else:
     #    predicate = LabeledID(identifier='RO:0002434', label='interacts with')
     props = {x: r[x] for x in ['primaryTarget', 'affinityParameter', 'endogenous'] }
     try:
         affins = [float(x.strip()) for x in r['affinity'].split('-') ]
         if len(affins) > 0:
             props['affinity'] = sum(affins) / len(affins)
     except:
         logger.debug(f"Can't parse affinity {r['affinity']}")
         pass
     edge = self.create_edge(chem,gene,'gtopdb.ligand_to_gene',identifier,predicate,
         publications=[f"PMID:{x['pmid']}" for x in r['refs'] if x['pmid']],url=url,properties=props)
     return edge
コード例 #9
0
 def get_transitions_disconnected(self, graph, op_filter):
     """
     Function adjusted for crawler works on the assumptions that quetion contains
     unform types of  pairs of nodes which we don't have pair to pair connections.
     I.e (a)->(b) (c) -> (d) but no (b)->(c)
     """
     source_node = self.query_graph['nodes'][0].concept_cypher_signature(
         'n0')
     target_node = self.query_graph['nodes'][1].concept_cypher_signature(
         'n1')
     cypher = [f'MATCH {source_node}-[e]-> {target_node}']
     cypher += ['WHERE Exists(e.op) RETURN Collect(e) as edges']
     query = '\n'.join(cypher)
     result = ''
     with graph.driver.session() as session:
         result = session.run(query)
     edges = []
     for row in result:
         for edge in row['edges']:
             e = {
                 "op":
                 edge['op'],
                 "link":
                 edge['predicate'],
                 "predicate":
                 Text.snakify(edge['type']) if edge['type'] else None
             }
             edges.append(e)
     p = {}
     for edge in self.query_graph['edges']:
         p[edge.source_id] = {}
         p[edge.source_id][edge.target_id] = {
             e['op']: e
             for e in edges if op_filter(e['op'])
         }.values()
     return p
コード例 #10
0
def sort_edges_by_label(edges):
    el = defaultdict(list)
    deque( map( lambda x: el[Text.snakify(x[2]['object'].standard_predicate.label)].append(x), edges ) )
    return el
コード例 #11
0
 def get_gene_by_drug(self, input_node):
     drugbank_ids = input_node.get_synonyms_by_prefix('DRUGBANK')
     response = []
     for drugbank_id in drugbank_ids:
         drugbank_id = Text.un_curie(drugbank_id)
         url = f'{self.url}chem/{drugbank_id}?fields=drugbank.enzymes,drugbank.targets,drugbank.carriers,drugbank.transporters'
         logger.debug(url)
         results = self.query(url)
         if 'drugbank' in results:
             # maybe gene are everywhere they are enzymes they are
             genes = []
             if 'enzymes' in results['drugbank']:
                 # maybe we don't need this filter ... ???
                 logger.debug('found enzymes')
                 genes += list(
                     filter(
                         lambda x:
                         ('organism' in x) and (x['organism'] == 'Humans'),
                         results['drugbank']['enzymes']))
             if 'transporters' in results['drugbank']:
                 logger.debug('found transporters')
                 genes += list(
                     filter(
                         lambda x:
                         ('organism' in x) and (x['organism'] == 'Humans'),
                         results['drugbank']['transporters']))
             if 'carriers' in results['drugbank']:
                 logger.debug('found some carriers')
                 genes += list(
                     filter(
                         lambda x:
                         ('organism' in x) and (x['organism'] == 'Humans'),
                         results['drugbank']['carriers']))
             if 'targets' in results['drugbank']:
                 logger.debug('found targets')
                 genes += list(
                     filter(
                         lambda x:
                         ('organism' in x) and (x['organism'] == 'Humans'),
                         results['drugbank']['targets']))
             for gene in genes:
                 # Actions relate what the drug does to the enzyme ...  ?./>
                 # so I think we can treat actions as relationship types
                 # eg : Alfuzosin (DB00346) is a substrate for CYP34A (Uniprokb:P08684) which implies its metabolized by that enzyme ....
                 # we might have (A drug)  that (inhibits) a gene  and the action here is inhibitor.
                 # So I think its safe to generalize the actions are what the drug is to the enzyme. Or how the enzyme acts to the drug.
                 # so more like (Drug) - is a/an (action) for ->  (Enzyme/gene), but some <- so list contains direction
                 #action_to_predicate_map = {
                 #    'substrate': (LabeledID(identifier='CTD:increases_degradation_of', label= 'substrate'), True), #(label, direction where true means reverse)
                 #    'inhibitor': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "inhibitor"), False),
                 #    'inducer': (LabeledID(identifier = 'CTD:increases_activity_of', label="inducer"), False),
                 #    'antagonist': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "antagonist"), False),
                 #    'weak inhibitor': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "weak_inhibitor"), False),
                 #    'partial antagonist': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "partial_antagonist"), False),
                 #    'blocker': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "blocker"), False),
                 #    'inverse agonist': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "inverse_agonist"), False),
                 #    'binder': (LabeledID(identifier='CTD:molecularly_interacts_with', label= 'binder'), False),
                 #    'activator': (LabeledID(identifier = 'CTD:increases_activity_of', label="activator"), False),
                 #    'agonist': (LabeledID(identifier = 'CTD:increases_activity_of', label="agonist"), False),
                 #    'partial agonist': (LabeledID(identifier = 'CTD:increases_activity_of', label="partial_agonist"), False),
                 #    'potentiator': (LabeledID(identifier = 'CTD:increases_activity_of', label="potentiator"), False),
                 #    'carrier': (LabeledID(identifier = 'CTD:increases_transport_of', label="potentiator"), True),
                 #    'product of': (LabeledID(identifier= 'CTD:increases_synthesis_of', label = "product_of"), False),
                 #    'inhibition of synthesis': (LabeledID(identifier= 'CTD:decreases_synthesis_of', label = "inhibition_of_synthesis"), False),
                 #    'inactivator': (LabeledID(identifier= 'CTD:decreases_activity_of', label = "inactivator"), False),
                 #}
                 reverse = ['substrate', 'carrier']
                 #Some genes are more like gene familes, and we don't want them
                 if 'actions' in gene and 'uniprot' in gene:
                     actions = gene['actions'] if type(
                         gene['actions']) == type(
                             []) else [gene['actions']]
                     # create the gene node
                     if 'gene_name' in gene:
                         nm = gene['gene_name']
                     else:
                         nm = ''
                     gene_node = KNode(f"UNIPROTKB:{gene['uniprot']}",
                                       name=nm,
                                       type=node_types.GENE)
                     publications = [f'PMID:{x}' for x in gene['pmids']
                                     ] if 'pmids' in gene else []
                     for action in actions:
                         if action in reverse:
                             direction = True
                         else:
                             direction = False
                         #predicate,direction = action_to_predicate_map.get(action, (LabeledID(identifier= 'CTD:interacts_with', label=action),False))
                         rel = Text.snakify(action)
                         rel = Text.normalize_predicate(rel)
                         predicate = LabeledID(identifier=f'GAMMA:{rel}',
                                               label=rel)
                         source_node = input_node
                         target_node = gene_node
                         if direction:  # swap input and target nodes
                             source_node = gene_node
                             target_node = input_node
                         if predicate:
                             edge = self.create_edge(
                                 source_node,
                                 target_node,
                                 'mychem.get_gene_by_drug',
                                 source_node.id,
                                 predicate,
                                 publications=publications)
                             response.append((edge, gene_node))
     return response