def clustersamples(exp,minreads=0): """ cluster samples in an experiment according to similar behavior input: exp :Experiment minreads : int the minimal original number of reads per sample to keep it output: newexp : Experiment the filtered and clustered experiment """ params=locals() newexp=hs.filterorigreads(exp,minreads) # normalize each row (bacteria) to sum 1 dat=copy.copy(newexp.data) dat=np.transpose(dat) dat[dat<=2]=2 dat=np.log2(dat) # cluster dm=spatial.distance.pdist(dat,metric='braycurtis') ll=cluster.hierarchy.single(dm) order=cluster.hierarchy.leaves_list(ll) newexp=hs.reordersamples(newexp,order) hs.addcommand(newexp,"clustersamples",params=params,replaceparams={'exp':exp}) newexp.filters.append("cluster samples minreads=%d" % minreads) return newexp
def filterorigreads(self): items=self.bMainList.selectedItems() if len(items)!=1: print("Need 1 item") return for citem in items: cname=str(citem.text()) cexp=self.explist[cname] val,ok=QtGui.QInputDialog.getInt(self,'Filter Original Reads','Minimal number of reads per sample',5000,0,100000) if ok: newexp=hs.filterorigreads(cexp,minreads=val) newexp.studyname=newexp.studyname+'_for' self.addexp(newexp)
def subsample(expdat,numreads=10000,inplace=False): """ subsample (rarify) reads from all samples in an experiment input: expdat numreads - number of reads to subsample to inplace - true to replace current experiment output: newexp - the new subsampled experiment """ import biom params=locals() newexp=hs.filterorigreads(expdat,numreads,inplace) newexp=hs.toorigreads(newexp,inplace=True) table=biom.table.Table(newexp.data,newexp.seqs,newexp.samples) table=table.subsample(numreads,axis='observation') tids=table.ids(axis='sample') for idx,cid in enumerate(tids): if not cid==newexp.samples[idx]: print('problem with sample ids!!!!') newpos=[] for cseq in table.ids(axis='observation'): newpos.append(newexp.seqdict[cseq]) newexp=hs.reorderbacteria(newexp,newpos,inplace=True) newexp.data=table.matrix_data.todense().A newexp=normalizereads(newexp,numreads=10000,inplace=True,fixorig=False) for cidx in range(len(newexp.samples)): newexp.origreads[cidx]=numreads newexp=updateorigreads(newexp) newexp.filters.append("subsample to %d" % numreads) hs.addcommand(newexp,"subsample",params=params,replaceparams={'expdat':expdat}) return newexp