def CpdActivity(base_uri,fout,cid,verbose=0): tags=['bardExptId','sid','cid','outcome'] ## mucho data; minimize storage fout.write('%s\n'%(','.join(tags))) n_expd=0; n_sub_act=0; n_exp_act=0; n_sam_act=0; n_sam_tst=0; eids={}; sids={}; link=('/compounds/%d/exptdata'%cid) while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) expdlinks=rval['collection'] if not expdlinks or len(expdlinks)==0: break for expdlink in expdlinks: expd=rest_utils.GetURL(base_uri+expdlink,{},parse_json=True,verbose=verbose) if not expd: break n_expd+=1 eid=int(expd['bardExptId']) sid=int(expd['sid']) cid=int(expd['cid']) outcome=Expd2PCOutcome(expd) fout.write('%d,%d,%d,%d\n'%(eid,sid,cid,outcome)) fout.flush() n_sam_tst+=1 if not sids.has_key(sid): sids[sid]=False if not eids.has_key(eid): eids[eid]=False if outcome==2: n_sam_act+=1 eids[eid]=True sids[sid]=True link=rval['link'] if not link: break ## END of expdata IDs for eid,act in eids.items(): if act: n_exp_act+=1 for sid,act in sids.items(): if act: n_sub_act+=1 print >>sys.stderr, 'n_expd: %d'%n_expd print >>sys.stderr, 'n_exp_tst: %d'%len(eids) print >>sys.stderr, 'n_exp_act: %d'%n_exp_act print >>sys.stderr, 'n_sub_tst: %d'%n_sub_tst print >>sys.stderr, 'n_sub_act: %d'%len(sids) print >>sys.stderr, 'n_sam_tst: %d'%n_sam_tst print >>sys.stderr, 'n_sam_act: %d'%n_sam_act return
def ListBiologyOfType(base_uri,fout,btype,verbose=0): n_all=0; n_out=0; n_err=0; tags=[ 'biology', 'name', 'dictId', 'dictLabel', 'entity', 'entityId', 'extRef', 'extId', 'serial' ] fout.write(','.join(tags)+'\n') link=('/biology/types/%s?expand=true'%btype) rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: n_err+=1 if verbose: print >>sys.stderr, 'Error: no biologys, type: %s'%btype else: bios=rval for bio in bios: n_all+=1 vals=[] for tag in tags: vals.append(rest_utils.ToStringForCSV(bio[tag])) fout.write((','.join(vals))+'\n') n_out+=1 return n_all,n_out,n_err
def ListCompoundsFromSource(base_uri,fout,source,nskip=0,nmax=0,nchunk=500,verbose=0): ## For given source, write all compounds to smiles file. n_all=0; n_out=0; n_err=0; cids={} t0=time.time() link=('/substances?filter=%s[source_name]&expand=true&skip=%d&top=%d'%(source,nskip,nchunk)) while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: continue ## ERROR substances=rval['collection'] if not substances: break ## may be link past end of SIDs (feature) for substance in substances: if verbose>0: if n_all%1000==0: print >>sys.stderr, 'n_all: %d n_out: %d n_err: %d'%(n_all,n_out,n_err), print >>sys.stderr, 'elapsed time: %s'%time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)) n_all+=1 cid=substance['cid'] if cids.has_key(cid): continue cids[cid]=True #sid=substance['sid'] smi=substance['smiles'] fout.write('%s %d\n'%(smi,cid)) fout.flush() n_out+=1 if nmax>0 and n_out>=nmax: break link=rval['link'] if not link: break ## END of SIDs return n_all,n_out,n_err
def ListCompoundsTested(base_uri,fout,nskip=0,nmax=0,nchunk=500,verbose=0): n_all=0; n_out=0; n_err=0; t0=time.time() link=('/compounds?filter=[tested]&expand=true&skip=%d&top=%d'%(nskip,nchunk)) done=False while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: print >>sys.stderr, ('ERROR: no response: %s'%link) break compounds=rval['collection'] for compound in compounds: if not compound: continue if verbose>0: if n_all%100==0: print >>sys.stderr, 'n_all: %d n_out: %d n_err: %d'%(n_all,n_out,n_err), print >>sys.stderr, 'elapsed time: %s'%time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)) n_all+=1 cid=compound['cid'] smi=compound['smiles'] if not smi: print >>sys.stderr, ('ERROR: (cid=%d) SMILES missing.'%cid) n_err+=1 continue fout.write("%s %d\n"%(smi,cid)) n_out+=1 if nmax>0 and n_out>=nmax: done=True break if done: break link=rval['link'] if not link: break ## END of CIDs return n_all,n_out,n_err
def ListSubstancesTested(base_uri,fout,nskip=0,nmax=0,nchunk=500,verbose=0): n_all=0; n_out=0; n_err=0; t0=time.time() link=('/substances?filter=[tested]&skip=%d&top=%d&expand=true'%(nskip,nchunk)) done=False while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not type(rval) is dict: print >>sys.stderr, ('DEBUG: uri="%s" ; rval="%s"'%(base_uri+link,str(rval))) break substances=rval['collection'] for substance in substances: if not substance: continue if verbose>0: if n_all%100==0: print >>sys.stderr, 'n_all: %d n_out: %d n_err: %d'%(n_all,n_out,n_err), print >>sys.stderr, 'elapsed time: %s'%time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)) n_all+=1 sid=substance['sid'] smi=substance['smiles'] cid=substance['cid'] #source=substance['source'] if not smi: print >>sys.stderr, ('ERROR: (sid=%d) SMILES missing.'%sid) n_err+=1 continue fout.write("%s %d %d\n"%(smi,sid,cid)) n_out+=1 if nmax>0 and n_out>=nmax: done=True break if done: break link=rval['link'] if not link: break ## END of SIDs return n_all,n_out,n_err
def EID2PCAID(base_uri,eid,verbose=0): rval=rest_utils.GetURL(base_uri+'/experiments/%d'%eid,{},parse_json=True,verbose=verbose) aid=None try: aid=rval['pubchemAid'] aid=int(aid) return aid except Exception, e: print >>sys.stderr, 'Error (Exception): %s'%e
def Describe(base_uri,verbose=0): txt='' for res in RESOURCES: try: rval=rest_utils.GetURL(base_uri+'/%s/_info'%res,{},parse_json=True,verbose=verbose) txt+=('%s\n'%(str(rval))) except urllib2.HTTPError, e: print >>sys.stderr, 'HTTP Error (%s): %s'%(res,e) continue
def Pid2CapPid(base_uri,pid): project=rest_utils.GetURL(base_uri+'/projects/%d'%pid,{},parse_json=True,verbose=0) #print >>sys.stderr, 'DEBUG: %s/projects/%d'%(base_uri,pid) if not project: return None if project.has_key('capProjectId'): return project['capProjectId'] else: return None
def AID2Targets(base_uri,aid,verbose=0): link=('/assays/%d'%aid) link=('/assays/%d?expand=true'%aid) #link=('/assays/%d/targets'%aid) assay=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not assay: return None elif not assay.has_key('targets'): return [] return assay['targets']
def ListPlugins(base_uri,fout,verbose=0): plugins = rest_utils.GetURL(base_uri+'/plugins/registry/list',{},parse_json=True,verbose=verbose) fout.write('%18s\t%-8s\t%-9s\t%s\n'%('name','version','available','title')) for plugin in plugins: name = os.path.basename(plugin['path']) title = plugin['title'] version = plugin['version'] available = plugin['available'] fout.write('%18s\t%-8s\t%-9s\t%s\n'%(name,version,available,title)) fout.flush() return
def Counts(base_uri,verbose=0): txt='' for res in RESOURCES: try: n=rest_utils.GetURL(base_uri+'/%s/_count'%res,{},parse_json=True,verbose=verbose) txt+=('%s count: %d\n'%(res,n)) except urllib2.HTTPError, e: print >>sys.stderr, 'HTTP Error (%s): %s'%(res,e) continue except Exception, e: print >>sys.stderr, 'Exception (%s): %s'%(res,e) continue
def ListBiologyCounts(base_uri,fout,verbose=0): btypes=ListBiologyTypes(base_uri,verbose) for btype in btypes: link=('/biology/types/%s/_count'%btype) rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) try: count=int(rval) except Exception, e: if verbose>0: print >>sys.stderr, 'Error (Exception): %s'%e print >>sys.stderr, 'DEBUG: rval=%s'%rval count=0 print >>sys.stderr, '\tbiology/%-10s: %6d'%(btype,count)
def Project2Labname(base_uri,pid): link='/projects/%d/annotations'%pid #print >>sys.stderr, 'DEBUG: %s/projects/%d/annotations'%(base_uri,pid) rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=0) if not rval: return None contexts=rval['contexts'] for context in contexts: if context.has_key('name') and context['name']=='project management': if context.has_key('comps') and context['comps']: annos=context['comps'] for anno in annos: if anno.has_key('key') and anno['key']=='laboratory name': val=anno['value'] if val: return val else: return anno['display'] return None
def ListTargets(base_uri,fout,verbose=0): n_all=0; n_out=0; n_err=0; n_classified=0; tags=[ 'acc', 'name', 'status', 'url', 'taxId', 'geneId', 'description', 'classes' ] fout.write(','.join(tags)+'\n') link=('/targets?expand=true') while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: n_err+=1 continue ## ERROR targets=rval['collection'] for target in targets: n_all+=1 vals=[] for tag in tags: if tag=='classes' and len(target['classes'])>0: n_classified+=1 vals.append('"['+((','.join(map(lambda x:x['source']+':'+x['id']+':'+x['name'],target['classes']))))+']"') else: vals.append(rest_utils.ToStringForCSV(target[tag])) fout.write((','.join(vals))+'\n') n_out+=1 link=rval['link'] if not link: break ## END of PIDs print >>sys.stderr, ('n_classified: %d'%n_classified) return n_all,n_out,n_err
def ListExperiments(base_uri,fout,mincpds=0,verbose=0): n_all=0; n_out=0; n_err=0; etags=[ 'bardExptId', 'capExptId', 'name', 'description', 'status', 'hasProbe', 'confidenceLevel', 'pubchemAid', 'bardAssayId', 'capAssayId', 'substances', #number of substances 'compounds', #number of compounds 'activeCompounds' ] ### experiment-annotations discontinued??? eatags={'project lead name':None} ##experiment-annotation tags (context name:"project lead name") ttags=[ 'biology', #PROTEIN|PROCESS|GENE|etc. 'name', 'dictLabel', 'dictId', 'extId', 'resourcePath' ] tags=etags[:] tags.extend(['projectId']) # only 1st, possibly more tags.extend(['capProjectId']) # only 1st, possibly more tags.extend(eatags.keys()) tags.extend(map(lambda s:'target:'+s,ttags)) tags.extend(['targetCount']) tags.extend(['project:labName']) fout.write((','.join(tags))+'\n') link=('/experiments?expand=true&top=100') n_experiment_total=0; n_experiment_notarget=0; n_experiment_filtered=0; aids={}; bids={}; #biology IDs pids={}; #project IDs n_target_total=0; while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: n_err+=1 continue ## ERROR experiments=rval['collection'] for experiment in experiments: n_all+=1 n_target=0; eid=experiment['bardExptId'] aid=experiment['bardAssayId'] aids[aid]=True for tag in eatags.keys(): eatags[tag]=None if mincpds: cpd_count=experiment['compounds'] if cpd_count<mincpds: #print >>sys.stderr, 'DEBUG: expt cpd_count < %d'%mincpds n_experiment_filtered+=1 continue targets=AID2Targets(base_uri,aid,verbose) ##broken? if targets and len(targets)>0: n_target=len(targets) target=PreferredBiology(targets) ##only 1 for now... for bid in Biologys2BIDs(targets): bids[bid]=True else: target=None if not target: n_experiment_notarget+=1 vals=[] pids_this=None for etag in etags: vals.append(rest_utils.ToStringForCSV(experiment[etag])) etag='projectIdList' if experiment.has_key('projectIdList'): pids_this=experiment[etag] if pids_this: pid=pids_this[0] else: pid=None cap_pids_this=map(lambda x: Pid2CapPid(base_uri,x),pids_this) if cap_pids_this: cap_pid=cap_pids_this[0] else: cap_pid=None vals.append(rest_utils.ToStringForCSV(pid)) vals.append(rest_utils.ToStringForCSV(cap_pid)) ### Broken in latest, ok in straw (May 2014)... link2=('/experiments/%d/annotations'%eid) rval2=rest_utils.GetURL(base_uri+link2,{},parse_json=True,verbose=verbose) if not rval2: n_err+=1 continue ## ERROR contexts=rval2['contexts'] for context in contexts: if context.has_key('name') and context['name']=='project lead name': if context.has_key('comps') and context['comps']: annos=context['comps'] for anno in annos: for tag in eatags.keys(): if anno.has_key('key') and anno['key']==tag: val=anno['value'] if not val: val=anno['display'] if val: eatags[tag]=val for tag in eatags.keys(): vals.append(rest_utils.ToStringForCSV(eatags[tag])) for ttag in ttags: if target: vals.append(rest_utils.ToStringForCSV(target[ttag])) else: vals.append('') vals.append('%d'%n_target) if pids_this: vals.append(rest_utils.ToStringForCSV(Project2Labname(base_uri,pids_this[0]))) fout.write((','.join(vals))+'\n') n_out+=1 n_experiment_total+=1 n_target_total+=n_target link=rval['link'] if not link: break ## END of EIDs print >>sys.stderr, 'n_experiment_total: %d'%n_all print >>sys.stderr, 'n_experiment_out: %d'%n_out print >>sys.stderr, 'n_assay_total_uniq: %d'%len(aids.keys()) print >>sys.stderr, 'n_target_total: %d'%n_target_total print >>sys.stderr, 'n_target_total_uniq: %d'%len(bids.keys()) print >>sys.stderr, 'n_experiment_notarget: %d'%n_experiment_notarget print >>sys.stderr, 'n_experiment_filtered: %d'%n_experiment_filtered return n_all,n_out,n_err
def SearchProjects(base_uri,qstr,verbose=0): print rest_utils.GetURL(base_uri+'/projects/_count?filter=%s[description]'%qstr,{},parse_json=True,verbose=verbose) return
def ListProbes(base_uri,fout,verbose=0): n_all=0; n_out=0; n_err=0; n_htc=0; pids=set(); cids=set(); aids=set(); ctags=[ #compound-tag 'smiles', 'name', 'tpsa', 'exactMass', 'probeId', 'mwt', 'complexity', 'rotatable', 'highlight', 'compoundClass', 'iupacName', 'probeAnnotations', 'numAssay', 'hbondAcceptor', 'hbondDonor', 'numActiveAssay', 'xlogp' ] cstags=[ #compound-summary-tag #'hitTarget', 'hitAssays', #'testedAssays', #'hitExptdata', #'testedExptdata', 'ntest', 'nhit' ] tags=['PID','CID']+ctags+cstags fout.write(','.join(tags)+'\n') link=('/projects') n_project_total=0; rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) projects=rval['collection'] for link2 in projects: #e.g. "/projects/17" pid=re.sub(r'^.*/','',link2) pids.add(int(pid)) n_project_total+=1 rval2=rest_utils.GetURL(base_uri+link2+'/probes',{},parse_json=True,verbose=verbose) probes=rval2 for link3 in probes: #e.g. "/compounds/5716367" cid=re.sub(r'^.*/','',link3) cids.add(int(cid)) rval2=rest_utils.GetURL(base_uri+link3,{},parse_json=True,verbose=verbose) cpds=rval2 if not cpds: print >>sys.stderr, ('ERROR: no cpds: %s'%link2) n_err+=1 continue cpd=cpds[0] rval3=rest_utils.GetURL(base_uri+link3+'/summary',{},parse_json=True,verbose=verbose) cpdsum=rval3 if not cpdsum: print >>sys.stderr, ('ERROR: no summary: %s'%link2+'/summary') n_err+=1 continue vals=[pid,cid] for tag in ctags: if tag=='probeAnnotations' and cpd.has_key(tag) and cpd[tag]: urls=map(lambda x:x['url'],cpd[tag]) urls=list(set(urls)) ## uniquify if None in urls: urls.remove(None) ## remove JSON nulls. try: vals.append(rest_utils.ToStringForCSV(','.join(urls),maxlen=0)) except Exception, e: print >>sys.stderr, 'DEBUG: problem with urls: CID=%s'%(cid) print >>sys.stderr, 'Error (Exception): %s'%e vals.append('ERROR') elif cpd.has_key(tag): vals.append(rest_utils.ToStringForCSV(cpd[tag])) else: vals.append(rest_utils.ToStringForCSV(None)) for tag in cstags: if tag=='hitTargetClasses' and cpdsum.has_key(tag) and cpdsum[tag]: vals.append(rest_utils.ToStringForCSV(','.join(cpdsum[tag].keys()))) n_htc+=1 elif tag=='hitAssays' and cpdsum.has_key(tag) and cpdsum[tag]: aid_strs=map(lambda x:x.replace('/assays/',''),cpdsum[tag]) for aid in aid_strs: aids.add(int(aid)) #print >>sys.stderr, 'DEBUG: "%s"'%(','.join(aid_strs)) vals.append('"'+(','.join(aid_strs))+'"') elif cpdsum.has_key(tag): vals.append(rest_utils.ToStringForCSV(cpdsum[tag])) else: vals.append(rest_utils.ToStringForCSV(None)) fout.write((','.join(vals))+'\n') n_out+=1
def ListAssays(base_uri,fout,mincpds=0,verbose=0): n_all=0; n_out=0; n_err=0; cats=[ 'source', 'designedBy' ] atags=['bardAssayId','capAssayId','name','source','designedBy','title', 'assayType','assayStatus','deposited','updated','grantNo' ##,'description','comments' ## (verbose) ] ttags=['biology','name','dictLabel','dictId','extId','resourcePath'] minann_tags=['assay type','assay format','detection method type'] counts={}; for cat in cats+minann_tags: counts[cat]={} tags=(atags+minann_tags+map(lambda s:'target:'+s,ttags)) tags.extend(['targetCount']) fout.write(','.join(tags)+'\n') ### This now broken... (No more paginated list of all assay entities?) link=('/assays?expand=true&top=100') n_assay_total=0; n_target_total=0; bids={}; #biology IDs while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: n_err+=1 continue ## ERROR assays=rval['collection'] for assay in assays: n_all+=1 #aid=assay['bardAssayId'] #experiments=assay['experiments'] minanns=assay['minimumAnnotations'] targets=assay['targets'] n_target=len(targets) if len(targets)>0: target=PreferredBiology(targets) ##1 only for now... for bid in Biologys2BIDs(targets): bids[bid]=True else: target=None vals=[] for atag in atags: vals.append(rest_utils.ToStringForCSV(assay[atag])) for minann_tag in minann_tags: try: vals.append(rest_utils.ToStringForCSV(minanns[minann_tag])) except Exception, e: #print >>sys.stderr, 'DEBUG: Error (Exception): %s'%e #print >>sys.stderr, 'DEBUG: minanns=%s'%str(minanns) vals.append('') for ttag in ttags: if target: vals.append(rest_utils.ToStringForCSV(target[ttag])) else: vals.append('') vals.append('%d'%n_target) fout.write((','.join(vals))+'\n') n_out+=1 for cat in cats: if assay.has_key(cat): if not counts[cat].has_key(assay[cat]): counts[cat][assay[cat]]=0 counts[cat][assay[cat]]+=1 for cat in minann_tags: if minanns.has_key(cat): if not counts[cat].has_key(minanns[cat]): counts[cat][minanns[cat]]=0 counts[cat][minanns[cat]]+=1 n_assay_total+=1 n_target_total+=n_target link=rval['link'] if not link: break ## END of AIDs
def ListProjects(base_uri,fout,verbose=0): n_all=0; n_out=0; n_err=0; ptags=[ 'bardProjectId', 'capProjectId', 'name', 'description', 'source', 'category', 'type', 'classification', 'experimentCount' ] ## project-annotations discontinued!? patags={'assay provider name':None,'laboratory name':None,'grant number':None} ##project-annotation tags (context name:"project management") ttags=['biology','name','dictLabel','dictId','extId','resourcePath'] tags=(ptags+map(lambda s:'target:'+s,ttags)) tags+=(map(lambda s:'projann:'+s,patags.keys())) tags.extend(['assayCount','targetCount','probeCount']) fout.write(','.join(tags)+'\n') link=('/projects?expand=true') bids={}; n_project_total=0; while True: rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: n_err+=1 continue ## ERROR projects=rval['collection'] for project in projects: n_all+=1 n_target=0; n_assay=0; n_probe=0; for tag in patags.keys(): patags[tag]=None pid=project['bardProjectId'] targets=project['targets'] if targets and len(targets)>0: n_target=len(targets) target=PreferredBiology(targets) ##1 only for now... for bid in Biologys2BIDs(targets): bids[bid]=True else: target=None aids=project['aids'] if aids and len(aids)>0: n_assay=len(aids) probes=project['probes'] if probes and len(probes)>0: n_probe=len(probes) ### This stopped working... # link2='/projects/%d/annotations'%pid # rval2=rest_utils.GetURL(base_uri+link2,{},parse_json=True,verbose=verbose) # if not rval2: # n_err+=1 # continue ## ERROR # contexts=rval2['contexts'] # for context in contexts: # if context.has_key('name') and context['name']=='project management': # if context.has_key('comps') and context['comps']: # annos=context['comps'] # for anno in annos: # for tag in patags.keys(): # if anno.has_key('key') and anno['key']==tag: # val=anno['value'] # if not val: val=anno['display'] # if val: # patags[tag]=val vals=[] for tag in ptags: vals.append(rest_utils.ToStringForCSV(project[tag])) for ttag in ttags: if target: vals.append(rest_utils.ToStringForCSV(target[ttag])) else: vals.append('') for tag in patags.keys(): vals.append(rest_utils.ToStringForCSV(patags[tag])) vals.append('%d'%n_assay) vals.append('%d'%n_target) vals.append('%d'%n_probe) fout.write((','.join(vals))+'\n') n_out+=1 link=rval['link'] if not link: break ## END of PIDs print >>sys.stderr, 'n_project_total: %d'%n_all print >>sys.stderr, 'n_target_total_uniq: %d'%len(bids.keys()) return n_all,n_out,n_err
def ListProteins_classified(base_uri,fout,verbose=0): n_all=0; n_out=0; n_err=0; n_classified=0; btags=[ 'biology', 'name', 'dictId', 'dictLabel', 'entity', 'entityId', 'extRef', 'extId', 'serial' ] ctags=[ ## protein classification tags 'source', 'id', 'name', 'levelIdentifier' ] tags=(btags+map(lambda s:'targetclass:'+s,ctags)) fout.write(','.join(tags)+'\n') link=('/biology/types/PROTEIN?expand=true') rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) if not rval: n_err+=1 if verbose: print >>sys.stderr, 'Error: no biologys, type: %s'%btype else: bios=rval for bio in bios: n_all+=1 vals=[] for tag in btags: vals.append(rest_utils.ToStringForCSV(bio[tag])) if bio['dictId']==1398 or bio['dictLabel']=='Uniprot accession number': uid=bio['extId'] ### NOTE: OK in v17.2. ### NOTE: Not OK in v17.3, since /targets deprecated. ### NOTE: Now we must instead use the "protclass" plugin. # # link2=('/targets/accession/%s'%uid) # tgt=rest_utils.GetURL(base_uri+link2,{},parse_json=True,verbose=verbose) # clevel=sys.maxint # if tgt and tgt.has_key('classes'): # n_classified+=1 # for protclass in tgt['classes']: # if protclass.has_key('source') and protclass['source']=='panther': # ##Select higher levelIdentifier. # if PantherLevel(protclass['levelIdentifier'])<clevel: # clevel=PantherLevel(protclass['levelIdentifier']) # while len(vals)>len(btags): # vals.pop() # for tag in ctags: # vals.append(rest_utils.ToStringForCSV(protclass[tag])) link2=('/plugins/protclass/panther/%s'%uid) rval=rest_utils.GetURL(base_uri+link2,{},parse_json=True,verbose=verbose) clevel=sys.maxint if rval.has_key(uid) and rval[uid]: n_classified+=1 for protclass in rval[uid]: if protclass.has_key('source') and protclass['source']=='panther': ##Select higher levelIdentifier. if PantherLevel(protclass['levelIdentifier'])<clevel: clevel=PantherLevel(protclass['levelIdentifier']) while len(vals)>len(btags): vals.pop() for tag in ctags: vals.append(rest_utils.ToStringForCSV(protclass[tag])) fout.write((','.join(vals))+'\n') n_out+=1 print >>sys.stderr, 'n_classified: %d'%n_classified return n_all,n_out,n_err
def Cid2Smiles(base_uri,id,verbose=0): smi=rest_utils.GetURL(base_uri+'/compounds/%d/smiles'%id,{},parse_json=True,verbose=verbose) smi=re.sub(r'\s.*$','',smi) return smi
def ListBiologyTypes(base_uri,verbose=0): link=('/biology/types') rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) return rval
def ExpActivity(base_uri,fout,eid,sids_query,verbose=0): tags=['bardExptId','sid','outcome'] ## mucho data; minimize storage fout.write('%s\n'%(','.join(tags))) n_expd=0; n_sub_act=0; n_cpd_act=0; n_sam_act=0; n_sam_tst=0; cids={}; sids={}; nskip=0; nchunk=500; link=None while True: ## process sids in chunks: if sids_query: print >>sys.stderr, 'DEBUG: len(sids_query)=%d'%len(sids_query) if len(sids_query)<nskip: break sidstr=(','.join(map(lambda x:str(x),sids_query[nskip:nskip+nchunk]))) d={'sids':sidstr,'eids':str(eid)} expds=rest_utils.PostURL(base_uri+'/exptdata',{},d,parse_json=True,verbose=verbose) else: if not link: link='/experiments/%d/exptdata?expand=true&top=%d'%(eid,nchunk) print >>sys.stderr, 'DEBUG: link: %s'%link rval=rest_utils.GetURL(base_uri+link,{},parse_json=True,verbose=verbose) expds=rval['collection'] for expd in expds: n_expd+=1 eid=int(expd['bardExptId']) sid=int(expd['sid']) cid=int(expd['cid']) if sids_query: outcome=Expd2PCOutcome(expd) else: outcome=int(expd['outcome']) fout.write('%d,%d,%d\n'%(eid,sid,outcome)) fout.flush() n_sam_tst+=1 if not sids.has_key(sid): sids[sid]=False if not cids.has_key(cid): cids[cid]=False if outcome==2: n_sam_act+=1 sids[sid]=True cids[cid]=True if sids_query: nskip+=nchunk else: link=rval['link'] if not link: break ## END of expdata IDs for cid,act in cids.items(): if act: n_cpd_act+=1 for sid,act in sids.items(): if act: n_sub_act+=1 print >>sys.stderr, ('n_expd: %d'%n_expd) print >>sys.stderr, ('n_cpd_tst: %d'%len(cids)) print >>sys.stderr, ('n_cpd_act: %d'%n_cpd_act) print >>sys.stderr, ('n_sub_tst: %d'%len(sids)) print >>sys.stderr, ('n_sub_act: %d'%n_sub_act) print >>sys.stderr, ('n_sam_tst: %d'%n_sam_tst) print >>sys.stderr, ('n_sam_act: %d'%n_sam_act) return
def ExpSummary(base_uri,fout,eid,verbose=0): rval=rest_utils.GetURL(base_uri+'/experiments/%d?expand=true'%eid,{},parse_json=True,verbose=verbose) return rval