def results(self,hid,*args,**kwargs) : args = self.parse_kwargs(kwargs) session_d = self.sessions.get(hid) if session_d is None : log.error('hid passed but no session found, aborting') return 'Error occurred, please try submitting again' thread = self.threads[hid] session_d.update(thread.thread.session_d) session_d.update(thread.thread.session_d['motif_enrich']) templ_d = {} templ_d.update(session_d) # check for sortby argument from user templ_d['sortby'] = str(args.get('sortby')) top_template = self.template_env.get_template('adipo_site_results.html') top_content = top_template.render(**templ_d) return top_content
def processing(self,hid,*args,**kwargs) : hid = str(hid) cherrypy.log('processing hid %s (%s)'%(hid,type(hid))) session_d = self.sessions.get(hid) if session_d is None : log.error('hid passed but no session found, aborting') return 'Error occurred, please try submitting again' # we use a thread to process in the bg thread = self.threads.get(hid) # processing hasn't begun yet if thread is None : gene_list = session_d.get('gene_list') orig_gene_list = session_d.get('orig_gene_list') self.gene_name_map = session_d.get('gene_name_map') if gene_list is None : log.error('hid is valid but no gene list was found, aborting') return 'Error occurred, please try submitting again' thread = AdipoThread() enrich_thread = EnrichThread(session_d,self.db_session,self.args) thread.run_thread(enrich_thread) self.threads[hid] = thread if thread.is_running() : template = self.template_env.get_template('adipo_site.html') top = ('Request is processing, please wait.<br/>This page refreshes automatically every 5 seconds.<br/>' '<meta http-equiv="refresh" content="5; URL=processing?hid=%s"</meta>')%hid return template.render(top_content=top,bottom_content='') elif thread.is_done() : raise cherrypy.HTTPRedirect("http://fraenkel.mit.edu/adipo_sight/results?hid=%s"%hid,status=303)
def run(self) : session_d = self.session_d if session_d is None : log.error('hid passed but no session found, aborting') return 'Error occurred, please try submitting again' gene_list = session_d['gene_list'] d = {} # get motif scores cherrypy.log('getting motif scores') # sqlite has a limitation of 999 SQL variables, need to split up # this query batch_size = 500 gene_list_batches = [gene_list[i:i+500] for i in xrange(0,len(gene_list),500)] cherrypy.log('gene_list_batches len()s: %s'%str([len(b) for b in gene_list_batches])) scores = [] for batch in gene_list_batches : score_q = (self.db_session.query(db.Region,db.SeqData) .join(db.RegionMembership) .join(db.RegionSet) .join(db.SeqData) .filter(db.SeqData.seq_type.has(db.SeqType.name=='motif scores')) .filter(db.RegionSet.name.in_(batch)) .filter(db.RegionMembership.dist_to_feature.between( -int(self.args['upstream']), int(self.args['downstream']) )) ) scores.extend(score_q.all()) score_mat = [] gene_names = set() condition_scores = defaultdict(list) cherrypy.log('loading motif scores') for region, seqdata in scores : for region_membership in region.membership : region_set = region_membership.region_set gene_names.add(region_set.name) condition = seqdata.condition.name motif_scores = cPickle.loads(seqdata.value) condition_scores[condition].append(motif_scores) score_mat.append(motif_scores) cherrypy.log('found conditions: %s'%condition_scores.keys()) score_mat = np.array(score_mat) session_d['found'] = gene_names session_d['missing'] = [g for g in gene_list if g not in gene_names] # compare motif scores of requested genes to all hypersensitive regions # in the dataset cherrypy.log('loading motif background') # pick n random background sequences, but seed so the same indices are picked for every # different # of input DHS regions random.seed("jo mama") # walk through the conditions and compute scores sig_scores = defaultdict(dict) hs_regions = {} enriched_motifs = {} for c, scores in condition_scores.items() : score_mat = np.array(scores) hs_regions[c] = len(scores) # get the background out for this condition motif_bg_fn = resource_filename('adipo_sight','data/%s_hypersensitive_peaks_bg_motif_scores.npy'%c) all_scores = np.load(motif_bg_fn).T bg_inds = random.sample(xrange(all_scores.shape[0]),min(score_mat.shape[0],all_scores.shape[0])) all_scores = all_scores[bg_inds,:] cherrypy.log('score_mat.shape = %s'%str(score_mat.shape)) cherrypy.log('all_scores.shape = %s'%str(all_scores.shape)) cherrypy.log('done loading motif background') # calculate MWW pvals = mww_multiprocess(score_mat.T,all_scores.T,True) log.debug('motif scores for condition: %s'%c) log.debug('pvals: %s'%str(pvals.shape)) motif_name_fn = resource_filename('adipo_sight','data/motif_names.txt') motif_names = np.array(open(motif_name_fn).readlines()) motif_cluster_fn = resource_filename('adipo_sight','data/motif_clusters.txt') motif_cluster_map = dict((i,int(m)) for i,m in enumerate(open(motif_cluster_fn))) thresh = pvals < self.args.get('diff_hyp_pval') thresh_inds = np.where(thresh)[0] thresh_names, thresh_pvals = motif_names[thresh], pvals[thresh] thresh_imgs = np.array(['images/motif_logos/%03d_motif.png'%i for i in thresh_inds]) cluster_set = set() for i,n,p in zip(thresh_inds,thresh_names,thresh_pvals) : cluster_i = motif_cluster_map[i] sig_scores[cluster_i].setdefault('name',set()).add(n.strip()) sig_scores[cluster_i][c] = min(sig_scores[cluster_i].get(c,1.),p) cluster_set.add(cluster_i) enriched_motifs[c] = len(cluster_set) d['motifs'] = dict(sig_scores) d['hs_regions'] = hs_regions d['enriched_motifs'] = enriched_motifs session_d['motif_enrich'] = d