Example #1
0
 def term_to_term(self, node_a, node_b, limit=10000):
     """Given two terms, find articles in chemotext that connect them, and return as a KEdge.
     If nothing is found, return None"""
     logging.getLogger('application').debug(
         'chemotext2: "{}" to "{}"'.format(node_a.label, node_b.label))
     phrases_a = self.generate_phrases(node_a.label)
     phrases_b = self.generate_phrases(node_b.label)
     maxr = -1
     besta = ''
     bestb = ''
     for p_a in phrases_a:
         for p_b in phrases_b:
             if p_a == p_b:
                 continue
             r = self.chemotext2.get_semantic_similarity(p_a, p_b)
             if r > maxr:
                 maxr = r
                 besta = p_a
                 bestb = p_b
             logging.getLogger('application').debug(
                 '  "{}"-"{}": {} ({})'.format(p_a, p_b, r, maxr))
     logging.getLogger('application').debug(' "{}"-"{}": {}'.format(
         besta, bestb, maxr))
     if maxr > -1:
         ke = KEdge('chemotext2',
                    'term_to_term', {
                        'similarity': maxr,
                        'terms': [besta, bestb]
                    },
                    is_support=True)
         ke.source_node = node_a
         ke.target_node = node_b
         return ke
     return None
Example #2
0
 def term_to_term(self, node_a, node_b, limit=10000):
     """Given two terms, find articles in chemotext that connect them, and return as a KEdge.
     If nothing is found, return None"""
     meshes_a = self.get_mesh_labels(node_a)
     meshes_b = self.get_mesh_labels(node_b)
     articles = []
     from datetime import datetime
     start = datetime.now()
     for label_a in meshes_a:
         for label_b in meshes_b:
             response = self.ctext.query(
                 query=
                 "MATCH (d:Term)-[r1]-(a:Art)-[r2]-(t:Term) WHERE d.name='%s' AND t.name='%s' RETURN a LIMIT %d"
                 % (label_a, label_b, limit))
             for result in response['results']:
                 for data in result['data']:
                     articles += data['row']
     end = datetime.now()
     logging.getLogger('application').debug(
         'chemotext: {} to {}: {} ({})'.format(meshes_a, meshes_b,
                                               len(articles), end - start))
     if len(articles) > 0:
         ke = KEdge('chemotext',
                    'term_to_term', {'publications': articles},
                    is_support=True)
         ke.source_node = node_a
         ke.target_node = node_b
         return ke
     return None
 def term_to_term(self,node_a,node_b,limit = 10000):
     """Given two terms, find articles in chemotext that connect them, and return as a KEdge.
     If nothing is found, return None"""
     logging.getLogger('application').debug('identifiers: {} to {}'.format(node_a.id, node_b.id))
     meshes_a = self.get_mesh_labels(node_a)
     meshes_b = self.get_mesh_labels(node_b)
     articles=[]
     from datetime import datetime
     start = datetime.now()
     for label_a in meshes_a:
         for label_b in meshes_b:
             response = self.ctext.query( query="MATCH (d:Term)-[r1]-(a:Art)-[r2]-(t:Term) WHERE d.name='%s' AND t.name='%s' RETURN a LIMIT %d" % (label_a, label_b, limit))
             for result in response['results']:
                 for data in result['data']:
                     articles += data['row']
     end = datetime.now()
     logging.getLogger('application').debug('chemotext: {} to {}: {} ({})'.format(meshes_a, meshes_b, len(articles), end-start))
     if len(articles) > 0:
         #ke= KEdge( 'chemotext', 'term_to_term', { 'publications': articles }, is_support = True )
         pmids = [f'PMID:{x["pmid"]}' for x in articles]
         raise RuntimeError('The following KEdge constructor syntax looks very suspect.')
         ke = KEdge('chemotext.term_to_term', dt.now(), 'chemotext:1', 'literature_co-occurence',
                    f'{node_a.id},{node_b.id}','chemotext:1','literature_co-occurence',publications=pmids,
                    is_support=True)
         ke.source_node = node_a
         ke.target_node = node_b
         return ke
     return None
 def execute(self):
     """Execute the query that defines the graph"""
     self.logger.debug('Executing Query')
     #GreenT wants a cypherquery to find transitions, and a starting point
     cyphers  = self.userquery.generate_cypher()
     starts   = self.userquery.get_start_node()
     reverses = self.userquery.get_reversed()
     lookups  = self.userquery.get_lookups()
     for cypher, start, reverse,lookup in zip(cyphers,starts,reverses,lookups):
         input_name = Text.un_curie(lookup.identifier)
         self.logger.debug(start)
         self.logger.debug('CYPHER')
         self.logger.debug(cypher)
         identifier, ntype = start
         start_node = KNode( identifier, ntype, label=input_name )
         kedge = KEdge( 'lookup', 'lookup' )
         kedge.source_node = lookup
         kedge.target_node = start_node
         self.add_nonsynonymous_edge( kedge )
         #Fire this to rosetta, collect the result
         result_graph = self.rosetta.graph([(None, start_node)],query=cypher)
         #result_graph contains duplicate edges.  Remove them, while preserving order:
         result_graph = list(OrderedDict.fromkeys( result_graph ) )
         self.add_edges( result_graph , reverse )
     self.logger.debug('Query Complete')
Example #5
0
 def make_edge(self,cooc_list, node_a, node_b):
     k,c = cooc_list[0]
     #TODO: fix this up with details
     c[ 'icd9' ] = list(k) 
     ke= KEdge( 'cdw', 'term_to_term', c,  is_support = True )
     ke.source_node = node_a
     ke.target_node = node_b
     return ke
 def make_edge(self, cooc_list, node_a, node_b):
     k, c = cooc_list[0]
     #TODO: fix this up with details
     c['icd9'] = list(k)
     raise RuntimeError(
         'The following KEdge constructor looks somewhat suspect.')
     ke = KEdge('cdw', 'term_to_term', c, is_support=True)
     ke.source_node = node_a
     ke.target_node = node_b
     return ke
 def new_edge(self,
              source,
              function,
              properties,
              source_node=None,
              target_node=None):
     raise RuntimeError(
         'The following KEdge constructor looks very suspect.')
     edge = KEdge(source, function, properties)
     edge.source_node = source_node
     edge.target_node = target_node
     return edge
Example #8
0
 def term_to_term(self,node_a,node_b):
     articles = self.omnicorp.get_shared_pmids(node_a, node_b)
     #logger.debug(f'OmniCorp {node_a.identifier} {node_b.identifier}')
     if len(articles) > 0:
         #logger.debug(f'    -> {len(articles)}')
         pmids = [f'PMID:{x.split("/")[-1]}' for x in articles]
         ke = KEdge('omnicorp.term_to_term', dt.now(), 'omnicorp:1', 'literature_co-occurence',
                    f'{node_a.identifier},{node_b.identifier}','omnicorp:1','literature_co-occurence',publications=pmids,
                    is_support=True)
         ke.source_node = node_a
         ke.target_node = node_b
         return ke
     return None
Example #9
0
 def process_associations(self,
                          r,
                          predicate,
                          target_node_type,
                          reverse=False):
     """Given a response from biolink, create our edge and node structures.
     Sometimes (as in pathway->Genes) biolink returns the query as the object, rather
     than the subject.  reverse=True will handle this case, bringing back the subject
     of the response, rather than the object."""
     edge_nodes = []
     for association in r['associations']:
         pubs = []
         if 'publications' in association and association[
                 'publications'] is not None:
             for pub in association['publications']:
                 # Sometimes, we get back something like "uniprotkb" instead of a PMID.  We don't want it.
                 pubid_prefix = pub['id'][:4].upper()
                 if pubid_prefix == 'PMID':
                     pubs.append(pub['id'])
         if reverse:
             obj = KNode(association['subject']['id'], target_node_type,
                         association['subject']['label'])
         else:
             obj = KNode(association['object']['id'], target_node_type,
                         association['object']['label'])
         rel = {
             'typeid': association['relation']['id'],
             'label': association['relation']['label']
         }
         props = {'publications': pubs, 'relation': rel}
         edge = KEdge('biolink', predicate, props)
         edge_nodes.append((edge, obj))
     return edge_nodes
Example #10
0
 def graph_get_pathways_by_gene(self, gene):  #reasoner
     response = self.triplestore.query_template(
         inputs={"gene": gene.identifier.split(':')[1].upper()},
         outputs=['keggPath'],
         template_text="""
         prefix kegg:      <http://chem2bio2rdf.org/kegg/resource/>
         prefix drugbank:  <http://chem2bio2rdf.org/drugbank/resource/>
         prefix uniprot:   <http://chem2bio2rdf.org/uniprot/resource/gene/>
         prefix ctd:       <http://chem2bio2rdf.org/ctd/resource/>
         prefix mesh:      <http://bio2rdf.org/mesh:>
         select ?drugGenericName ?uniprotGeneID ?pathwayName ?keggPath where {
            ?keggPath    kegg:protein                ?swissProtID ;
                         kegg:Pathway_name           ?pathwayName .
            ?keggInter   kegg:cid                    ?pubchemCID .
            ?dbInter     drugbank:GeneBank_ID        ?geneBankID ;
                         drugbank:SwissProt_ID       ?swissProtID ;
                         drugbank:gene               ?uniprotGeneID .
            ?drugID      drugbank:CID                ?pubchemCID ;
                         drugbank:Generic_Name       ?drugGenericName .
            ?ctd_disease ctd:diseaseid               ?diseaseID ;
                         ctd:cid                     ?pubchemCID .
            values ( ?uniprotGeneID ) {
               ( uniprot:$gene )
            }
         } LIMIT 2000""")
     results = []
     for r in response:
         edge = KEdge('c2b2r', 'geneToPathway', {})
         node = KNode("KEGG:{0}".format(r['keggPath'].split('/')[-1:][0]),
                      node_types.PATHWAY)
         results.append((edge, node))
     return results
Example #11
0
    def process_variant_to_gene_relationships(self, variant_nodes: list, writer: WriterDelegator):
        all_results = self.genetics_services.get_variant_to_gene(self.crawl_for_service, variant_nodes)
        for source_node_id, results in all_results.items():
            # convert the simple edges and nodes to rags objects and write them to the graph
            for (edge, node) in results:
                gene_node = KNode(node.id, type=node.type, name=node.name, properties=node.properties)
                if self.recreate_sv_node:
                    variant_node = KNode(source_node_id, type= node_types.SEQUENCE_VARIANT)
                    variant_node.add_export_labels([node_types.SEQUENCE_VARIANT])
                    writer.write_node(variant_node)
                if gene_node.id not in self.written_genes:
                    writer.write_node(gene_node)
                    self.written_genes.add(gene_node.id)

                predicate = LabeledID(identifier=edge.predicate_id, label=edge.predicate_label)
                gene_edge = KEdge(source_id=source_node_id,
                                  target_id=gene_node.id,
                                  provided_by=edge.provided_by,
                                  ctime=edge.ctime,
                                  original_predicate=predicate,
                                  # standard_predicate=predicate,
                                  input_id=edge.input_id,
                                  properties=edge.properties)
                writer.write_edge(gene_edge)
            logger.info(f'added {len(results)} variant relationships for {source_node_id}')
Example #12
0
    def drug_get_gene(self, subject):
        """ Get a gene from a pharos disease id. """
        pharosid = Text.un_curie (subject.identifier)
        original_edge_nodes=[]
        r = requests.get('https://pharos.nih.gov/idg/api/v1/ligands(%s)?view=full' % pharosid)
        result = r.json()
        resolved_edge_nodes = []
        actions = set() #for testing
        for link in result['links']:
            if link['kind'] == 'ix.idg.models.Target':
                pharos_target_id = int(link['refid'])
                edge_properties = {}
                for prop in link['properties']:
                    if prop['label'] == 'Pharmalogical Action': #!
                        actions.add(prop['term'] ) 
                pharos_edge = KEdge( 'pharos', 'drug_get_gene', {'properties': link['properties']} )               
                #Pharos returns target ids in its own numbering system. Collect other names for it.
                hgnc = self.target_to_hgnc (pharos_target_id)
                if hgnc is not None:
                    hgnc_node = KNode (hgnc, node_types.GENE)
                    resolved_edge_nodes.append( (pharos_edge, hgnc_node) )
                else:
                    logging.getLogger('application').warn('Did not get HGNC for pharosID %d' % pharos_target_id)
#        for a in actions:
#            print ('Action: {}'.format(a) ) 
        return resolved_edge_nodes
Example #13
0
 def drugname_to_pharos(self, namenode):
     drugname  = Text.un_curie(namenode.identifier)
     pharosids = drugname_string_to_pharos_string(drugname)
     for pharosid, pharoslabel in pharosids:
         newnode = KNode( pharosid, node_types.DRUG, label=pharoslabel)
         newedge = KEdge( 'pharos', 'drugname_to_pharos', {} )
         results.append( (newedge, newnode ) )
     return results
Example #14
0
 def drugname_to_ctd(self, namenode):
     drugname = Text.un_curie(namenode.identifier)
     ctdids = self.drugname_string_to_ctd_string(drugname)
     for ctd in ctdids:
         label = drugname
         newnode = KNode(ctdid, node_types.DRUG, label=label)
         newedge = KEdge('CTD', 'drugname_to_ctd', {})
         results.append((newedge, newnode))
     return results
Example #15
0
 def get_anatomy_by_cell_graph(self, cell_node):
     anatomies = self.cell_to_anatomy(cell_node.identifier)
     results = []
     for r in anatomies:
         edge = KEdge('uberongraph', 'cellToAnatomy')
         node = KNode (Text.obo_to_curie(r['anatomyID']), \
                node_types.ANATOMY )
         node.label = r['anatomyLabel']
         results.append((edge, node))
     return results
Example #16
0
 def graph_get_genes_by_disease(self, disease):  #reasoner
     disease = disease.identifier.split(':')[1].lower()
     response = self.get_genes_pathways_by_disease([disease])
     results = []
     for r in response:
         edge = KEdge('c2b2r', 'diseaseToGene', {'keggPath': r['keggPath']})
         node = KNode(
             "UNIPROT:{0}".format(r['uniprotGene'].split('/')[-1:][0]),
             node_types.GENE)
         results.append((edge, node))
     return results
Example #17
0
def callback(ch, method, properties, body):
    body = body.decode()
    # logger.info(f" [x] Received {body}")
    if isinstance(body, str) and body == 'flush':
        writer.flush()
        return
    graph = json.loads(body)
    for node in graph['nodes']:
        writer.write_node(KNode(node))
    for edge in graph['edges']:
        writer.write_edge(KEdge(edge))
 def drugname_to_pharos(self, namenode):
     drugname = Text.un_curie(namenode.id)
     pharosids = self.drugname_string_to_pharos_info(drugname)
     results = []
     predicate = LabeledID(identifier='RDFS:id', label='identifies')
     for pharosid, pharoslabel in pharosids:
         newnode = KNode(pharosid, type=node_types.CHEMICAL_SUBSTANCE, name=pharoslabel)
         raise RuntimeError('namenode.id is probably not a ctime...')
         newedge = KEdge(namenode, newnode, 'pharos.drugname_to_pharos', namenode.id, predicate)
         results.append((newedge, newnode))
     return results
Example #19
0
 def process_pharos_response (r):
     try:
         result = r.json()
         for link in result['links']:
             if link['kind'] != 'ix.idg.models.Target':
                 logger.info('Pharos disease returning new kind: %s' % link['kind'])
             else:
                 pharos_target_id = int(link['refid'])
                 pharos_edge = KEdge( 'pharos', 'disease_get_gene', {'properties': link['properties']} )
                 original_edge_nodes.append( (pharos_edge, pharos_target_id) )
     except JSONDecodeError as e:
         pass #logger.error ("got exception %s", e)
Example #20
0
def test_write_edges():
    bf = BufferedWriter(rosetta_mock)
    edge = KEdge({
        'source_id': 'source:1',
        'target_id': 'target:1',
        'provided_by': 'test_write_edges'
    })
    # edge.source_id = 'source:1'
    # edge.target_id = 'target:1'
    # edge.provided_by = 'test_write_edges'
    edge.original_predicate = LabeledID(identifier='SEMMEDDB:CAUSES',
                                        label='semmed:causes')
    bf.write_edge(edge)
    assert bf.written_edges[edge.source_id][edge.target_id] == set(
        [edge.original_predicate.identifier])
    assert len(bf.edge_queues) == 1
    # try to write it twice and it should be keeping edge queues as 1
    bf.write_edge(edge)
    assert len(bf.edge_queues) == 1
    bf.write_edge(edge, force_create=True)
    assert len(bf.edge_queues) == 2
Example #21
0
    def get_edge (self, props={}, predicate=None, pmids=[]):
        """ Generate graph edges in a standard way, propagating information needed for
        scoring and semantic context above. """
        if not isinstance (props, dict):
            raise ValueError ("Properties must be a dict")

        # Add a predicate describing the connection between subject and object.
        # Pass up pmids for provenance and confidence scoring.
#        print (pmids)
        props['stdprop'] = {
            'predicate' : predicate,
            'pmids'     : pmids
        }
        return KEdge (self.name, predicate, props, is_synonym = (predicate=='synonym'))
Example #22
0
 def create_edge(self, source_node, target_node, provided_by, input_id, predicate, analysis_id=None, publications=[], url=None, properties={}):
     ctime = time.time()
     if provided_by is None:
         raise 'missing edge source'
     return KEdge(source_id=source_node.id,
                  target_id=target_node.id,
                  provided_by=provided_by,
                  ctime=ctime,
                  original_predicate=predicate,
                  # standard_predicate=standard_predicate, # This is now moved to Buffered writer flush method
                  input_id=input_id,
                  publications=publications,
                  url=url,
                  properties=properties)
Example #23
0
def test_edge_changing_node_ids():
    bf = BufferedWriter(rosetta_mock)

    # flush edge
    def write_transaction_mock_edge(export_func, edges, edge_label,
                                    merge_edges):
        import os
        assert os.environ.get('MERGE_EDGES', False) == merge_edges
        # make sure this is the right function
        assert edge_label == 'causes'
        # make sure we have out node id in there
        assert export_func == export_edge_chunk
        edge = edges[0]
        assert edge.source_id == 'CHEBI:127682'
        assert edge.target_id == 'NCBIGene:84125'
        print(edges)

    # pass the mock tester to bf and let it rip
    source_node = KNode('PUBCHEM:44490445')
    target_node = KNode('HGNC:25708')
    edge = KEdge({
        'source_id':
        source_node.id,
        'target_id':
        target_node.id,
        'provided_by':
        'test_write_edges',
        'original_predicate':
        LabeledID(identifier='SEMMEDDB:CAUSES', label='semmed:causes'),
        'standard_predicate':
        None,
        'input_id':
        'PUBCHEM:44490445',
        'publications': [],
    })
    bf.write_node(source_node)
    bf.write_node(target_node)
    # a mock for writing node
    session_for_node = Mock()
    session_for_node.write_transaction = lambda export_func, node_list, labels: None
    # we are not testing for nodes here
    bf.flush_nodes(session_for_node)
    assert bf.synonym_map == {
        'PUBCHEM:44490445': 'CHEBI:127682',
        'HGNC:25708': 'NCBIGene:84125'
    }
    session_for_edge = Mock()
    session_for_edge.write_transaction = write_transaction_mock_edge
    bf.write_edge(edge)
    bf.flush_edges(session_for_edge)
Example #24
0
 def create_edge(self, source_node, target_node, provided_by, input_id, predicate, publications=None, url=None, properties=None):
     ctime = time.time()
     standard_predicate=self.standardize_predicate(predicate, source_node.id, target_node.id)
     if provided_by is None:
         raise 'missing edge source'
     return KEdge(source_id=source_node.id,
                  target_id=target_node.id,
                  provided_by=provided_by,
                  ctime=ctime,
                  original_predicate=predicate,
                  standard_predicate=standard_predicate,
                  input_id=input_id,
                  publications=publications,
                  url=url,
                  properties=properties)
Example #25
0
 def get_drugs_by_condition_graph(self, conditions):
     drugs = self.get_drugs_by_condition(conditions.identifier)
     results = []
     for r in drugs:
         edge = KEdge('c2b2r', 'conditionToDrug', {
             'cid': r['pubChemCID'],
             'pmids': r['diseasePMIDs']
         })
         node = KNode(
             r['drugID'].split('/')[-1:][0],
             #"http://chem2bio2rdf.org/drugbank/resource/drugbank_drug",
             node_types.DRUG,
             r['drugGenericName'])
         results.append((edge, node))
     #logger.debug ("chembio drugs by condition: {}".format (results))
     return results
Example #26
0
 def generate_all_edges(self, nodelist):
     results = self.omnicorp.get_all_shared_pmids(nodelist)
     predicate = LabeledID(identifier='omnicorp:1',
                           label='literature_co-occurrence')
     edges = [
         KEdge(k[0],
               k[1],
               'omnicorp.term_to_term',
               time.time(),
               predicate,
               predicate,
               f'{k[0].id},{k[1].id}',
               publications=v,
               is_support=True) for k, v in results.items()
     ]
     return edges
Example #27
0
 def disease_get_gene0(self, subject):
     """ Get a gene from a pharos disease id. """
     pharosids = self.translate (subject)
     print ("pharos ids: {}".format (pharosids))
     original_edge_nodes=[]
     for pharosid in pharosids:
         logger.debug ('pharos> https://pharos.nih.gov/idg/api/v1/diseases(%s)?view=full' % pharosid)
         r = requests.get('https://pharos.nih.gov/idg/api/v1/diseases(%s)?view=full' % pharosid)
         result = r.json()
         for link in result['links']:
             if link['kind'] != 'ix.idg.models.Target':
                 logger.info('Pharos disease returning new kind: %s' % link['kind'])
             else:
                 pharos_target_id = int(link['refid'])
                 pharos_edge = KEdge( 'pharos', 'disease_get_gene', {'properties': link['properties']} )
                 original_edge_nodes.append( (pharos_edge, pharos_target_id) )
def generate_graph(size: int):
    """ generate triplets of length size"""
    for i in range(0, size):
        source_curie = f'SOURCE:{i}'
        target_curie = f'TARGER:{i}'
        source_node = KNode(source_curie, type=node_types.CHEMICAL_SUBSTANCE)

        target_node = KNode(target_curie, type=node_types.CHEMICAL_SUBSTANCE)
        edge = KEdge(
            source_id=source_node.id,
            target_id=target_node.id,
            provided_by='stress_tester',
            ctime='now',
            original_predicate=LabeledID('RO:0000052', 'affects'),
            input_id=source_node.id,
        )
        yield (source_node, edge, target_node)
Example #29
0
 def hgnc_to_uniprotkb(self, node):
     """Given a node representing an HGNC retrieve the UniProtKB identifier"""
     if node.node_type != node_types.GENE:
         raise ValueError('Node must be a gene')
     identifier_parts = node.identifier.split(':')
     if not identifier_parts[0].upper() == 'HGNC':
         raise ValueError('Node must represent an HGNC identifier.')
     hgnc_id = identifier_parts[1]
     headers = {'Accept':'application/json'}
     r = requests.get('{0}/hgnc_id/{1}'.format(self.url, hgnc_id), headers= headers).json()
     try:
         uniprots = r['response']['docs'][0]['uniprot_ids']
         return  [  ( KEdge( 'hgnc', 'ncbigene_to_uniprotkb', is_synonym=True ),\
                      KNode( identifier='UNIPROTKB:{}'.format(uniprot), node_type = node_types.GENE )) \
                      for uniprot in uniprots ]
     except (IndexError,KeyError):
         #No results back
         return []
Example #30
0
 def drug_to_gene(self, subject):
     """ Get a gene from a ctd drug id. """
     ctdid = Text.un_curie(subject.identifier)
     actions = set()
     edge_nodes = []
     for link in self.drug_genes[ctdid]:
         target_id = link['gene_id']
         edge_properties = {
             'actions': link['actions'],
             'publications': link['publications']
         }
         actions.update(link['actions'])
         edge = KEdge('ctd', 'drug_get_gene',
                      {'properties': edge_properties})
         node = KNode(target_id, node_types.GENE)
         edge_nodes.append((edge, node))
     #        for action in actions:
     #            print( 'Action: {}'.format(action) )
     return edge_nodes