def updateinfo(self,csamp,cseq): """ update the information about the sample/bacteria """ self.csamp=csamp self.cseq=cseq self.lSample.setText(self.cexp.samples[self.csamp]) self.lTaxonomy.setText(self.cexp.tax[self.cseq]) self.lID.setText(str(self.cexp.sids[self.cseq])) self.lReads.setText('%f' % (float(self.cexp.data[self.cseq,self.csamp])/100)) self.lSampleFieldVal.setText(self.cexp.smap[self.cexp.samples[self.csamp]][str(self.cSampleField.currentText())]) # update the stats about the database: if self.cexp.seqdb: self.lStudies.clear() totappear,numstudies,allstudies,studysamples,totdbsamples=hs.bactdb.GetSeqInfo(self.cexp.seqdb,self.cexp.seqs[self.cseq]) if totappear>0: self.lNumSamples.setText(str('%d/%dK' % (totappear,int(totdbsamples/1000)))) self.lNumStudies.setText(str(numstudies)) res=list(studysamples.items()) vlens=[] for cv in res: totsamps=hs.bactdb.SamplesInStudy(self.cexp.seqdb,cv[0]) vlens.append(float(len(cv[1]))/len(totsamps)) sv,si=hs.isort(vlens,reverse=True) for cind in si: studyname=hs.bactdb.StudyNameFromID(self.cexp.seqdb,res[cind][0]) self.lStudies.addItem('%s (%f)' % (studyname,vlens[cind])) else: self.lNumSamples.setText(str('%d/%dK' % (0,int(totdbsamples/1000)))) self.lNumStudies.setText("0") if self.FigureTab.currentIndex()==2: self.plotxgraph() if self.FigureTab.currentIndex()==1: self.plotontology()
def sortbycentermass(expdat,field=False,numeric=True,uselog=True): """ sort bacteria in the experiment according to a 1d gradient by calculating the center of mass input: expdat field : string the name of the field to sort by or False to skip sorting numeric : bool True if the sort field is numeric (ignored if no sort field) uselog : bool True to log transform the data before mass center calculation output: newexp - the experiment with sorted bacteria """ params=locals() if field: newexp=hs.sortsamples(expdat,field,numeric=numeric) else: newexp=hs.copyexp(expdat) dat=newexp.data if uselog: dat[dat<1]=1 dat=np.log2(dat) cm=[] multpos=np.arange(len(newexp.samples)) for cseqind in range(len(newexp.seqs)): cm.append(np.dot(dat[cseqind,:],multpos)/np.sum(dat[cseqind,:])) sv,si=hs.isort(cm) newexp=hs.reorderbacteria(expdat,si) newexp.filters.append("sort by center of mass field=%s, uselog=%s" % (field,uselog)) hs.addcommand(newexp,"sortbycentermass",params=params,replaceparams={'expdat':expdat}) return newexp
def sortbacteria(exp,inplace=False,logit=True): """ sort bacteria according to taxonomy (alphabetically) input: exp : experiment the experiment to sort inplace : bool True to sort in place (replace current experiment), False to create a new experiment logit : bool True to add to command log, False to skip (if called from other heatsequer function) output: newexp : experiment The sorted experiment (by taxonomy name) """ params=locals() tax=exp.tax svals,sidx=hs.isort(tax) newexp=hs.reorderbacteria(exp,sidx,inplace=inplace) if logit: newexp.filters.append('sorted bacteria by taxonomy') hs.addcommand(newexp,"sortbacteria",params=params,replaceparams={'exp':exp}) return newexp
def sortbyvariance(expdat,field=False,value=False,exact=False,norm=False): """ sort bacteria by their variance sorting is performed based on a subset of samples (field/val/exact) and then all the experiment is sorted according to them input: expdat : Experiment field : string name of the field to filter samples for freq. sorting or False for all samples value : string value of samples to use for the freq. sorting exact : bool is the value exact or partial string norm : bool - False to sort by varinace, True to sort by variance/mean output: newexp : Experiment the experiment with bacteria sorted according to subgroup freq. """ params=locals() if field: texp=hs.filtersamples(expdat,field,value,exact=exact) else: texp=copy.deepcopy(expdat) svals=np.std(texp.data,axis=1) if norm: svals=svals/np.mean(texp.data,axis=1) svals,sidx=hs.isort(svals) newexp=hs.reorderbacteria(expdat,sidx) newexp.filters.append("sort by variance field=%s value=%s normalize=%s" % (field,value,norm)) hs.addcommand(newexp,"sortbyvariance",params=params,replaceparams={'expdat':expdat}) return newexp
def sortsamples(exp,field,numeric=False,logit=True): """ sort samples according to field input: exp : Experiment field : string name of the field to sort by numeric : bool True for numeric values in field, false for text output: newexp : Experiment the sorted experiment """ params=locals() fvals=hs.getfieldvals(exp,field) if numeric: fvals=hs.tofloat(fvals) svals,sidx=hs.isort(fvals) newexp=hs.reordersamples(exp,sidx) if logit: hs.addcommand(newexp,"sortsamples",params=params,replaceparams={'exp':exp}) newexp.filters.append('sorted samples by field %s' % field) return newexp
def sortbyfreq(expdat,field=False,value=False,exact=False,exclude=False,logscale=True,useabs=False): """ sort bacteria in experiment according to frequency sorting is performed based on a subset of samples (field/val/exact) and then all the experiment is sorted according to them input: expdat : Experiment field : string name of the field to filter samples for freq. sorting or False for all samples value : string value of samples to use for the freq. sorting exact : bool is the value exact or partial string exclude : bool True to sort on all samples except the field/value ones, False to sort only on field/value samples (default=False) logscale : bool True (default) to use log2 transform for frequencies before mean and sorting, False to use original values useabs : bool True to sort by absolute value of freq, False (default) to sort by freq output: newexp : Experiment the experiment with bacteria sorted according to subgroup freq. """ params=locals() if field: texp=hs.filtersamples(expdat,field,value,exact=exact,exclude=exclude) else: texp=copy.deepcopy(expdat) if logscale: texp.data[texp.data<2]=2 texp.data=np.log2(texp.data) if useabs: meanvals=np.mean(np.abs(texp.data),axis=1) else: meanvals=np.mean(texp.data,axis=1) svals,sidx=hs.isort(meanvals) newexp=hs.reorderbacteria(expdat,sidx) newexp.filters.append("sort by freq field=%s value=%s" % (field,value)) hs.addcommand(newexp,"sortbyfreq",params=params,replaceparams={'expdat':expdat}) return newexp
def sortbygroupdiff(expdat,field,val1,val2): """ sort bacteria in the experiment by the difference in the mean between the 2 groups (val1,val2 in field) input: expdat field - the name of the field for the 2 groups val1,val2 - the values for the 2 groups output: newexp - the experiment with sorted bacteria """ params=locals() exp1=hs.filtersamples(expdat,field,val1,exact=True) exp2=hs.filtersamples(expdat,field,val2,exact=True) m1=np.mean(np.log2(exp1.data+2),axis=1) m2=np.mean(np.log2(exp2.data+2),axis=1) diff=(m1-m2)/(m1+m2+20) sv,si=hs.isort(diff) newexp=hs.reorderbacteria(expdat,si) newexp.filters.append("sort by group difference field=%s val1=%s val2=%s" % (field,val1,val2)) hs.addcommand(newexp,"sortbygroupdiff",params=params,replaceparams={'expdat':expdat}) return newexp
def plotseqfreq(expdat,seqs,toaxis=False,xfield=False,normalizey=False): """ plot the frequency of sequences in seq as a function of the sortfield input: expdat : Experiment seqs : list of sequence strings a list of sequnces (acgt) to plot toaxis : matplotlib axis if not empty - the axis to plot to, False plot a new figure xfield : string if not empty - space the points on the x axis according to (numeric) value of in xfield, False - just according to the sorted order normalizey : bool True: normalize all y values to 0-1, False: no normalization """ if xfield: xdat=hs.tofloat(getfieldvals(expdat,xfield)) else: xdat=range(len(expdat.samples)) sv,si=hs.isort(xdat) if not toaxis: figure() toaxis=plt.gca() labels=[] for cseq in seqs: if cseq not in expdat.seqdict: continue spos=expdat.seqdict[cseq] cdat=expdat.data[spos,si] if normalizey: cdat=cdat/sum(cdat) toaxis.plot(sv,cdat) labels.append(str(expdat.sids[spos])+'-'+expdat.tax[spos]) labels=hs.clipstrings(labels,20,reverse=True) toaxis.legend(labels,prop={'size':6}) toaxis.set_xticks(xdat)
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False,linewidth=2,subline='',showhline=True,newfig=True,fixfont=False,fontsize=None,nosort=False,zeroisnone=False,xlabelrotation=45,showtaxnames=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load), or None (default) to use the heatsequer loaded cdb showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle : str (optional) '' to show o show processing history as name, None to not show title, or str of name of the figure lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall subline : str Name of category for subline plotting or '' (Default) for no sublines showhline : bool True (default) to plot the horizontal lines listed in exp.hlines. False to not plot them newfig : bool True (default) to open figure in new window, False to use current fixfont : bool (optional) False (default) to use fixedfont, True to use fixed width font fontsize : int or None (optional) None (default) to use default font size, number to use that font size nosort : bool (optional) False (default) to sort by the sort field, True to skip the sorting zeroisnone : bool (optional) False (default) to plot zeros as 0, True to assign None (white color) xlabelrotation : int (optional) the rotation of the xtick labels showtaxnames : book (optional) False (default) to not show tax names (need to press 'h' to show) True to show the taxonomy names output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) if exp.sparse: hs.Debug(9,'Sparse matrix - converting to dense') exp=hs.copyexp(exp,todense=True) vals=[] if cdb is None: cdb=hs.cdb if seqdb is None: seqdb=hs.bdb if sortby: if not nosort: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"no sorting but showing columns") svals=hs.getfieldvals(exp,sortby) newexp=hs.copyexp(exp) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb newexp.scdb=hs.scdb # if usegui: # hs.Debug(1,"Using the GUI window") # import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) # guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # ldat=ldat[:,sidx] ldat=newexp.data if zeroisnone: ldat[ldat==0]=None if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') if newfig: f=plt.figure(tight_layout=True) else: f=plt.gcf() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if ptitle is not None: if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) plt.title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) # plot the sublines (smaller category lines) if subline: slval=hs.getfieldvals(newexp,subline) prevval=slval[0] for idx,cval in enumerate(slval): if cval!=prevval: xpos=idx-0.5 plt.plot([xpos,xpos],[-0.5,np.size(ldat,0)-0.5],'w:') prevval=cval if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=xlabelrotation,ha='right') for cx in linepos: plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=linewidth) plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'w:',linewidth=linewidth) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) # f.tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)-0.5) if fixfont: fontProperties = {'family':'monospace'} ax.set_yticklabels(ax.get_yticks(), fontProperties) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=plt.colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) from heatsequer.plots import plotwingui guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') ax.plot_labelsize=fontsize if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) if showhline: if newexp.hlines: for cpos in newexp.hlines: plt.plot([0,np.shape(newexp.data)[1]],[cpos-0.5,cpos-0.5],'g') plt.show() if showtaxnames: showtaxonomies(newexp,ax,showdb=False,showcontam=False) # if usegui: # app.exec_() return newexp,ax
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load) showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle - name of the figure or False to show processing history as name lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) vals=[] if sortby: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb # ldat=ldat[:,sidx] ldat=newexp.data if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') f=figure() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=45,ha='right') for cx in linepos: plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=2) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)+0.5) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # from heatsequer.plots import plotwingui # guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) show() return newexp,ax