def test(): """What do we get back for HBB""" b = Biolink(ServiceContext.create_context()) relations = b.gene_get_disease(('HGNC:4827', )) checker = Mondo(ServiceContext.create_context()) for p, a in relations: igc, nid = checker.is_genetic_disease(a) print(a['id'], igc, nid)
def test_one(infname, outfname, fieldnum): m = Mondo(ServiceContext.create_context()) n_good = 0 n_bad = 0 diseases = set() with open(infname, 'r') as inf, open(outfname, 'w') as outf: h = inf.readline() for line in inf: if line.startswith('#'): continue x = line.strip().split('\t')[fieldnum] if x in diseases: continue diseases.add(x) result = m.search(x) if len(result) == 0: mondos = '' names = '' doids = '' umlss = '' efos = '' n_bad += 1 else: n_good += 1 mondos = ';'.join(result) names = ';'.join([m.get_label(r) for r in result]) doids = ';'.join(sum([m.mondo_get_doid(r) for r in result], [])) umlss = ';'.join(sum([m.mondo_get_umls(r) for r in result], [])) efos = ';'.join(sum([m.mondo_get_efo(r) for r in result], [])) outf.write('{}\t{}\t{}\t{}\t{}\n'.format(x, mondos, doids, umlss, efos)) print('Good: {} Bad: {}'.format(n_good, n_bad))
def test2(): from greent.service import ServiceContext cb = ChemBioKS(ServiceContext.create_context()) with open('q2-drugandcondition-list.txt', 'r') as inf: h = inf.readline() uniq = set() for line in inf: x = line.split('\t')[0] uniq.add(x) n_no_pub = 0 n_no_ncbi = 0 for name in uniq: input_node = KNode("DRUG_NAME:{}".format(name), node_types.DRUG_NAME) try: drug_node = cb.graph_drugname_to_pubchem(input_node)[0][1] ident = drug_node.identifier ncbi_nodes = cb.graph_pubchem_to_ncbigene(drug_node) if len(ncbi_nodes) == 0: n_no_ncbi += 1 except: n_no_pub += 1 ident = '' ncbi_nodes = [] print('{}\t{}\t{}'.format(name, ident, len(ncbi_nodes))) print('{} drugs'.format(len(uniq))) print('{} without pubchem id'.format(n_no_pub)) print('{} without genes'.format(n_no_ncbi)) ngood = len(uniq) - n_no_pub - n_no_ncbi print('{} good ({})'.format(ngood, ngood / len(uniq)))
def test_q2_diseases(): t = TranslatorKnowledgeBeaconAggregator (ServiceContext.create_context ()) n_good = 0 n_bad = 0 diseases = set() # with open('q2-drugandcondition-list.txt','r') as inf, open('q2_disease_report.txt','w') as outf: # h = inf.readline() # outf.write('OriginalName\tDOIDs\tOthers\n') # for line in inf: # x = line.strip().split('\t')[1] with open('q1-disease-list.txt','r') as inf, open('q1_disease_report.txt','w') as outf: h = inf.readline() outf.write('OriginalName\tDOIDs\tOthers\n') for line in inf: x = line.strip().split('\t')[0] if x in diseases: continue diseases.add(x) node = KNode("NAME.DISEASE:{}".format(x), node_types.DISEASE) result = t.name_to_doid (node) if len(result) == 0: doids = '' aliases = t.name_to_anything(node) n_bad += 1 else: n_good += 1 doids = ';'.join( [r[1].identifier for r in result] ) aliases = '' outf.write('{}\t{}\t{}\n'.format(x, doids, aliases)) print( 'Good: {} Bad: {}'.format(n_good, n_bad) )
def __init__(self, config=None, override={}): self.service_context = ServiceContext.create_context(config) service_context = self.service_context self.clinical = Clinical(service_context) #temporarly taken out because of http errors #self.exposures = CMAQ (service_context) self.endotype = Endotype(service_context) self.chembio = ChemBioKS(self.service_context) self.chemotext = Chemotext(self.service_context) self.disease_ontology = DiseaseOntology(self.service_context) self.pharos = Pharos(self.service_context) self.oxo = OXO(self.service_context) self.hpo = HPO(self.service_context) self.hetio = HetIO(self.service_context) self.biolink = Biolink(self.service_context) self.mondo = Mondo(self.service_context) self.go = GO(self.service_context) self.tkba = TranslatorKnowledgeBeaconAggregator(self.service_context) self.translator_registry = TranslatorRegistry(self.service_context) self.quickgo = QuickGo(self.service_context) self.translator = Translator(core=self) self.hgnc = HGNC(self.service_context) self.uberongraph = UberonGraphKS(self.service_context) self.ctd = CTD(self.service_context)
def test_all_drugs_ctd(): from greent.service import ServiceContext ctd = CTD(ServiceContext.create_context()) with open('q2-drugandcondition-list.txt', 'r') as inf: h = inf.readline() uniq = set() for line in inf: x = line.split('\t')[0] uniq.add(x) n_no_ctd = 0 n_no_gene = 0 for name in uniq: input_node = KNode("DRUG_NAME:{}".format(name), type=node_types.CHEMICAL_SUBSTANCE_NAME) results = ctd.drugname_to_ctd(input_node) try: drug_node = results[0][1] ident = drug_node.identifier except: n_no_ctd += 1 ident = '' gene_nodes = [] if ident != '': gene_nodes = ctd.drug_to_gene(drug_node) if len(gene_nodes) == 0: n_no_gene += 1 print('{}\t{}\t{}\t{}'.format(name, ident, len(results), len(gene_nodes))) print('{} drugs'.format(len(uniq))) print('{} without pubchem id'.format(n_no_ctd)) print('{} without genes'.format(n_no_gene)) ngood = len(uniq) - n_no_ctd - n_no_gene print('{} good ({})'.format(ngood, ngood / len(uniq)))
def test_all_drugs_pharos(): from greent.service import ServiceContext pharos = Pharos(ServiceContext.create_context()) with open('q2-drugandcondition-list.txt', 'r') as inf: h = inf.readline() uniq = set() for line in inf: x = line.split('\t')[0] uniq.add(x) n_no_pharos = 0 n_no_hgnc = 0 for name in uniq: input_node = KNode("DRUG_NAME:{}".format(name), type=node_types.CHEMICAL_SUBSTANCE_NAME) try: results = pharos.drugname_to_pharos(input_node) #print(name, results) drug_node = results[0][1] ident = drug_node.identifier hgnc_nodes = pharos.drug_get_gene(drug_node) if len(hgnc_nodes) == 0: n_no_hgnc += 1 except: # print ('Not finding {}'.format(name)) # exit() n_no_pharos += 1 ident = '' hgnc_nodes = [] print('{}\t{}\t{}\t{}'.format(name, ident, len(results), len(hgnc_nodes))) print('{} drugs'.format(len(uniq))) print('{} without pubchem id'.format(n_no_pharos)) print('{} without genes'.format(n_no_hgnc)) ngood = len(uniq) - n_no_pharos - n_no_hgnc print('{} good ({})'.format(ngood, ngood / len(uniq)))
def test_gc(): b = Biolink(ServiceContext.create_context()) gene = KNode('HGNC:4851', node_type=node_types.GENE) results = b.gene_get_disease(gene) for e, k in results: print(k) results = b.gene_get_genetic_condition(gene) print(results)
class TestHetIO(unittest.TestCase): h = HetIO(ServiceContext.create_context()) def test_anatomy(self): pprint(self.h.gene_to_anatomy(KNode('HGNC:TP53', node_types.GENE))) def test_cell(self): pprint(self.h.gene_to_cell(KNode('HGNC:7121', node_types.GENE)))
def basic_test(): t = TranslatorKnowledgeBeaconAggregator (ServiceContext.create_context ()) #print (t.name_to_mesh_disease (KNode("NAME.DISEASE:asthma", node_types.NAME_DISEASE))) #print (t.name_to_doid (KNode("NAME.DISEASE:asthma", node_types.DISEASE))) print ('1.') print (t.name_to_doid (KNode("NAME.DISEASE:Osteoporosis", node_types.DISEASE))) print ('2.') print (t.name_to_doid (KNode("NAME.DISEASE:HIV infection", node_types.DISEASE))) print ('3.') print (t.name_to_efo (KNode("NAME.DISEASE:HIV infection", node_types.DISEASE)))
def test(): uk = UberonGraphKS(ServiceContext.create_context()) #Test cell->anatomy # k = KNode('CL:0000097',node_types.CELL) # results = uk.get_anatomy_by_cell_graph( k ) # print(results) #Test pheno->anatomy k = KNode('HP:0011675', node_types.PHENOTYPE) results = uk.get_anatomy_by_phenotype_graph(k) print(results)
def test(): q = QuickGo(ServiceContext.create_context()) r = q.go_term_xontology_relationships( KNode("GO:0002551", node_types.PROCESS)) pprint.pprint(r) r = q.go_term_xontology_relationships( KNode("GO.BIOLOGICAL_PROCESS:0042626", node_types.PROCESS)) pprint.pprint(r) r = q.go_term_annotation_extensions( KNode("GO.BIOLOGICAL_PROCESS:0007269", node_types.PROCESS)) pprint.pprint(r)
def __init__(self,greent): greent.chemotext2 = chemotext2.Chemotext2( ServiceContext.create_context() ) self.chemotext2 = greent.chemotext2 self.badwords = set(['disease','virus','infection','fever','syndrome','hemorrhagic','disorder',\ 'gene','cell','system','tissue','non','positive','negative','receptor',\ 'type','severe','perinatal','form','adult','onset','nonsyndromic','syndromic',\ 'syndrome','infantile','juvenile','early','late','chronic','rare',\ 'autosomal','dominant','recessive', 'congenital','hereditary','familial',\ 'male','female','with','without','single','mutation','isolated','absence','group', \ 'susceptibility','plus','essential','distal','and','during','continuous',\ 'due','deficiency','extensive','large','small','pro','partial','complete','morbid', \ 'central','distal','middle','deficit','defect','status','rhythm','like'])
def test_pathways(): b = Biolink(ServiceContext.create_context()) gene = KNode('HGNC:5013', node_type=node_types.GENE) results = b.gene_get_kegg_pathway(gene) for e, k in results: print(k) results = b.gene_get_react_pathway(gene) for e, k in results: print(k) k = results[0][1] results = b.pathway_get_gene(k) for e, k in results: print(k)
def build_synonym_cache(ctext=None): if ctext is None: from greent.service import ServiceContext ctext = Chemotext(ServiceContext.create_context()) response = ctext.query(query="MATCH (d:Term) RETURN d") with open(ctext.cache, 'w') as outfile: outfile.write('QUERY\tKEY\n') res = response['results'][0] n = 0 for datum in res['data']: rows = datum['row'] for row in rows: n += 1 rowtype = row['type'] meshname = row['name'] if 'synonyms' in row: rowsyn = row['synonyms'] else: rowsyn = [] outfile.write('{}\t{}\n'.format(meshname.upper(), meshname)) for syn in rowsyn: outfile.write('{}\t{}\n'.format(syn.upper(), meshname))
def test(): h = HPO (ServiceContext.create_context ()) #print( h.search('Arrhythmias, Cardiac') ) print( h.search('Thyroid Neoplasms') ) print( h.search('Neoplasm of the thyroid gland') )
node_types.PHENOTYPE))) return edge_node #return [ ( self.get_edge ({ 'res' : r }, predicate='affects'), KNode("MESH:{0}".format (r['identifier']), 'PH') ) for r in result ] class TestHetIO(unittest.TestCase): h = HetIO(ServiceContext.create_context()) def test_anatomy(self): pprint(self.h.gene_to_anatomy(KNode('HGNC:TP53', node_types.GENE))) def test_cell(self): pprint(self.h.gene_to_cell(KNode('HGNC:7121', node_types.GENE))) if __name__ == '__main__': het = HetIO(ServiceContext.create_context()) print(het.disease_to_phenotype(KNode('DOID:2841', node_types.DISEASE))) ''' with open('hgnc-entrez', 'r') as stream: for line in stream: h, e, u = line.split ('\t') het.gene_to_anatomy (KNode('SOMETHING:{}'.format (e), node_types.GENE)) ''' #unittest.main () #MATCH (g:Gene)-[r]-(c:CellularComponent) WHERE g.name='HGNC:3263' RETURN g, r, c LIMIT 200
def test(): from greent.service import ServiceContext cb = ChemBioKS(ServiceContext.create_context()) print(cb.drugname_to_pubchem('imatinib')) input_node = KNode("DRUG_NAME:imatinib", node_types.DRUG_NAME) print(cb.graph_drugname_to_pubchem(input_node))
def test(): from greent.service import ServiceContext hgnc = HGNC( ServiceContext.create_context() ) input_knode = KNode( 'NCBIGENE:3815' , node_type = node_types.GENE ) print( hgnc.ncbigene_to_uniprotkb( input_knode ) )
def test_name(): uk = UberonGraphKS(ServiceContext.create_context()) #Test cell->name cn = 'CL:0000097' results = uk.cell_get_cellname(cn) print(results)
return { "exposures" : exposures, "icd_codes" : icd_codes, "lat" : lat, "lon" : lon, "time" : time, "visit_type" : visit_type } def get_endotype (self, request): print (json.dumps (request)) r = self.client.endotypes.endotypes_post (input=request).result() print (r) return r['output'] if 'output' in r else None if __name__ == "__main__": e = Endotype (ServiceContext.create_context ()) exposures = list(map(lambda exp : e.create_exposure (**exp), [{ "exposure_type": "pm25", "units" : "", "value" : 2 }])) visits = list(map(lambda v : e.create_visit(**v), [{ "icd_codes" : "ICD9:V12,ICD9:E002", "lat" : "20", "lon" : "20", "time" : "2017-10-12 21:12:29", "visit_type" : "INPATIENT", "exposures" : exposures }])) request = e.create_request (dob= "2017-10-04", model_type="M0", race="1", sex="M", visits = visits) print (json.dumps (e.get_endotype (request), indent=2))
rosetta_config = Resource.get_resource_obj("rosetta.yml", format='yaml') semantics = rosetta_config['@translator-semantics'] subscriptions = [] for api in self.op_map: for in_type, in_params in self.op_map[api].items(): for out_type, out_vals in self.op_map[api][in_type].items(): predicate = semantics.get(api, {}).get(in_type, {}).get(out_type, None) #if not predicate: # predicate = '*-missing-*' subscriptions.append((in_type, out_type, { "link": predicate, "op": self.add_method(TranslatorRegistry, api, in_type, out_type) })) return subscriptions if __name__ == "__main__": """ Load the registry """ treg = TranslatorRegistry(ServiceContext.create_context()) """ Generate subscriptions """ subscriptions = treg.get_subscriptions() r = treg.myvariantinfo__uniprot_to_hgvs( KNode('UNIPROT:AKT1', node_types.GENE)) pprint(r)
def __init__(self, context): super(Biolink, self).__init__("biolink", context) # TODO, can we just use the Mondo that's inthe core already? self.checker = Mondo(ServiceContext.create_context()) self.go = GO(ServiceContext.create_context())
def test_go(): KIT_protein = KNode('UniProtKB:P10721', node_types.GENE) b = Biolink(ServiceContext.create_context()) results = b.gene_get_process(KIT_protein) for ke, kn in results: print(ke, kn)
def __init__(self): """ Create a GreenT service context. Initialize the Chemotext service with that context.""" self.service_context = ServiceContext() self.chemotext = Chemotext(self.service_context)
def test_phenotypes(): asthma = KNode('DOID:2841', node_types.DISEASE) b = Biolink(ServiceContext.create_context()) results = b.disease_get_phenotype(asthma) for ke, kn in results: print(ke, kn)
def test_parts(): uk = UberonGraphKS(ServiceContext.create_context()) print(uk.get_anatomy_parts('UBERON:0004535'))
raise ValueError ("We don't have a word embedding model for {0} word phrases".format (term_a.count(' ') + 1)) model = self.bigram_model term_a = term_a.replace (' ', '_') term_b = term_b.replace (' ', '_') elif term_a.count (' ') == 0: model = self.model else: raise ValueError ("We don't have a word embedding model for {0} word phrases".format (term_a.count(' ') + 1)) return model.similarity (term_a, term_b) if term_a in model.vocab and term_b in model.vocab else -1.0 #return self.model.similarity (term_a, term_b) if term_a in self.model.vocab and term_b in self.model.vocab else -1.0 if __name__ == "__main__": ct2 = Chemotext2 (ServiceContext.create_context ()) print (ct2.get_semantic_similarity ("lung cancer", "p53")) print (ct2.get_semantic_similarity ("cell line", "disease")) print (ct2.get_semantic_similarity ("cellular component", "nucleus")) print (ct2.get_semantic_similarity ("cell cycle", "krebbs")) print (ct2.get_semantic_similarity ("MAPK2", "P53")) w = [ "albuterol", "imatinib", "aspirin", "atrovent", "decadron", "medrol", "rayos" , "abemaciclib", "abraxane"] pairs = zip (w, w[1:]) for k, v in pairs: print ((" k %s -> v %s : sim: %s" % (k, v, ct2.get_semantic_similarity (k, v)))) print (ct2.model.most_similar (positive=['aspirin' ])) print (ct2.model.most_similar (positive=['p53' ])) print (ct2.model.most_similar (positive=['kit' ])) print (ct2.model.most_similar (positive=['asthma' ]))
def test(): m = Mondo(ServiceContext.create_context()) huntington = KNode('OMIM:143100', node_types.DISEASE) print(m.is_genetic_disease(huntington)) print('------')