Ejemplo n.º 1
0
def build_exact_sets(o,u):
    sets = []
    mids = o.get_ids()
    print(len(mids))
    n = 0
    now = dt.now()
    for mid in mids:
        if n % 100 == 0 and n > 0:
            later = dt.now()
            delt = (later-now).seconds
            f = n / len(mids)
            print(f'{n}/{len(mids)} = {f} in {delt} s')
            print(f'  estimated time remaining = {delt * (1-f)/(f)}')
        #FWIW, ICD codes tend to be mapped to multiple MONDO identifiers, leading to mass confusion. So we
        #just excise them here.  It's possible that we'll want to revisit this decision in the future.  If so,
        #then we probably will want to set a 'glommable' and 'not glommable' set.
        print(mid)
        dbx = [ Text.upper_curie(x) for x in o.get_exact_matches(mid) ]
        print(dbx)
        dbx = set( filter( lambda x: not x.startswith('ICD'), dbx ) )
        label = u.get_label(mid)
        print(label)
        mid = Text.upper_curie(mid)
        dbx.add(LabeledID(mid,label))
        sets.append(dbx)
        n += 1
    return sets
Ejemplo n.º 2
0
def build_sets(o, ignore_list = ['ICD']):
    sets = []
    mids = o.get_ids()
    for mid in mids:
        #FWIW, ICD codes tend to be mapped to multiple MONDO identifiers, leading to mass confusion. So we
        #just excise them here.  It's possible that we'll want to revisit this decision in the future.  If so,
        #then we probably will want to set a 'glommable' and 'not glommable' set.
        dbx = set([Text.upper_curie(x) for x in o.get_xrefs(mid) if not reduce(lambda accumlator, ignore_prefix: accumlator or x.startswith(ignore_prefix) , ignore_list, False)])
        dbx = set([norm(x) for x in dbx])
        label = o.get_label(mid)
        mid = Text.upper_curie(mid)
        dbx.add(LabeledID(mid,label))
        sets.append(dbx)
    return sets
Ejemplo n.º 3
0
 def gene_to_drug_expanded(self, gene_node):
     output = []
     identifiers = gene_node.get_synonyms_by_prefix('NCBIGENE')
     for identifier in identifiers:
         unique = set()
         geneid = Text.un_curie(identifier)
         url = f"{self.url}CTD_chem_gene_expanded_geneID/ncbigene:{geneid}/"
         obj = requests.get (url).json ()
         for r in obj:
             good_row, predicate_label, props, pmids = self.check_expanded_gene_chemical_row(r)
             if not good_row:
                 continue
             predicate = self.normalize_predicate(
                 LabeledID(identifier=f"CTD:{Text.snakify(predicate_label)}", label=predicate_label)
             )
             #Should this be substance?
             drug_node = KNode(Text.upper_curie(r['chemicalID']), type=node_types.CHEMICAL_SUBSTANCE, name=r['chem_label'])
             direction = r['direction']
             if direction == '->':
                 subject = drug_node
                 object = gene_node
             else:
                 subject = gene_node
                 object = drug_node
             edge = self.create_edge(subject,object,'ctd.gene_to_drug_expanded',identifier,predicate,properties = props,url=url,publications=pmids)
             #This is what we'd like it to be, but right now there's not enough real specificity on the predicates
             #key = (drug_node.id, edge.standard_predicate.label)
             key = (drug_node.id, edge.original_predicate.label)
             if key not in unique:
                 output.append( (edge,drug_node) )
                 unique.add(key)
     return output
Ejemplo n.º 4
0
 def drug_to_gene_expanded(self, drug):
     output = []
     identifiers = drug.get_synonyms_by_prefix('MESH')
     for identifier in identifiers:
         url=f"{self.url}CTD_chem_gene_expanded_chemicalID/mesh:{Text.un_curie(identifier)}/"
         result = requests.get(url)
         obj=result.json()
         for r in obj:
             good_row, predicate_label, props, pmids = self.check_expanded_gene_chemical_row(r)
             if not good_row:
                 continue
             predicate = self.normalize_predicate(
                 LabeledID(identifier=f"CTD:{Text.snakify(predicate_label)}", label=predicate_label)
             )
             gene_node = KNode(Text.upper_curie(r['geneID']), name=r['gene_label'],type=node_types.GENE)
             direction = r['direction']
             if direction == '->':
                 subject = drug
                 object = gene_node
             else:
                 subject = gene_node
                 object = drug
             edge = self.create_edge(subject,object,'ctd.drug_to_gene_expanded',identifier,predicate,publications=pmids,properties=props,url=url )
             output.append( (edge,gene_node) )
     return output
Ejemplo n.º 5
0
 def check_expanded_gene_chemical_row(self, r):
     props = {"description": r['interaction'], 'taxon': f"taxon:{r['taxonID']}"}
     pmids = r['PMID'].split('|')
     predicate_label = r['interaction']
     # there are lots of garbage microarrays with only one paper. THey goop the place up
     # ignore them
     good_row = True
     if len(pmids) < 3:
         if predicate_label in ['affects expression of', 'increases expression of',
                                'decreases expression of', 'affects methylation of',
                                'increases methylation of', 'decreases methylation of',
                                'affects molecular modification of',
                                'increases molecular modification of',
                                'decreases molecular modification of']:
             good_row = False
     if len(pmids) < 2:
         if predicate_label in ['affects splicing of', 'increases splicing of', 'decreases splicing of']:
             good_row = False
     pmids = [Text.upper_curie(p) for p in pmids]
     return good_row, predicate_label, props, pmids