def normalizeprctile(expdat,percent=80): """ normalize reads per experiment so percentile (rather than mean) will be normalized used to reduce effect of outliers (compositionality correction) note normalization is done on the same set of bacteria for all samples input: expdat : Experiment percent : float the percentile to normalize (0-100) output: newexp : Experiment the new normalized experiment """ params=locals() # select the bacteria to use - don't want to include very low freq. bacteria newexp=hs.filterminreads(expdat,1*len(expdat.samples)) percvals=np.percentile(newexp.data,percent,axis=0) # plt.figure() # plt.plot(percvals) percvals=percvals/np.mean(percvals) newexp=hs.copyexp(expdat) for idx,samp in enumerate(expdat.samples): newexp.data[:,idx]=newexp.data[:,idx]*percvals[idx] newexp.filters.append("normalize percentile %f" % percent) hs.addcommand(newexp,"normalizeprctile",params=params,replaceparams={'expdat':expdat}) return newexp
def clusterbacteria(exp,minreads=0,uselog=True): """ cluster bacteria in an experiment according to similar behavior input: exp : Experiment minreads : int the minimal number of reads to keep before clustering (to make faster) uselog : bool True to log transform reads for clustering (before normalizing), false to use full reads output: newexp : Experiment the filtered and clustered experiment """ params=locals() newexp=hs.filterminreads(exp,minreads,logit=False) # normalize each row (bacteria) to sum 1 dat=copy.copy(newexp.data) if uselog: dat[dat<=2]=2 dat=np.log2(dat) dat=scale(dat,axis=1,copy=False) # cluster dm=spatial.distance.pdist(dat,metric='euclidean') ll=cluster.hierarchy.single(dm) order=cluster.hierarchy.leaves_list(ll) newexp=hs.reorderbacteria(newexp,order) hs.addcommand(newexp,"clusterbacteria",params=params,replaceparams={'exp':exp}) newexp.filters.append("cluster bacteria minreads=%d" % minreads) return newexp
def filterminreads(self): items=self.bMainList.selectedItems() if len(items)!=1: print("Need 1 item") return for citem in items: cname=str(citem.text()) cexp=self.explist[cname] val,ok=QtGui.QInputDialog.getInt(self,'Filter min reads','Minimal number of reads per bacteria',10,0,10000) if ok: newexp=hs.filterminreads(cexp,minreads=val) newexp.studyname=newexp.studyname+'_fmr' self.addexp(newexp)
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False,linewidth=2,subline='',showhline=True,newfig=True,fixfont=False,fontsize=None,nosort=False,zeroisnone=False,xlabelrotation=45,showtaxnames=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load), or None (default) to use the heatsequer loaded cdb showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle : str (optional) '' to show o show processing history as name, None to not show title, or str of name of the figure lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall subline : str Name of category for subline plotting or '' (Default) for no sublines showhline : bool True (default) to plot the horizontal lines listed in exp.hlines. False to not plot them newfig : bool True (default) to open figure in new window, False to use current fixfont : bool (optional) False (default) to use fixedfont, True to use fixed width font fontsize : int or None (optional) None (default) to use default font size, number to use that font size nosort : bool (optional) False (default) to sort by the sort field, True to skip the sorting zeroisnone : bool (optional) False (default) to plot zeros as 0, True to assign None (white color) xlabelrotation : int (optional) the rotation of the xtick labels showtaxnames : book (optional) False (default) to not show tax names (need to press 'h' to show) True to show the taxonomy names output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) if exp.sparse: hs.Debug(9,'Sparse matrix - converting to dense') exp=hs.copyexp(exp,todense=True) vals=[] if cdb is None: cdb=hs.cdb if seqdb is None: seqdb=hs.bdb if sortby: if not nosort: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"no sorting but showing columns") svals=hs.getfieldvals(exp,sortby) newexp=hs.copyexp(exp) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb newexp.scdb=hs.scdb # if usegui: # hs.Debug(1,"Using the GUI window") # import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) # guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # ldat=ldat[:,sidx] ldat=newexp.data if zeroisnone: ldat[ldat==0]=None if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') if newfig: f=plt.figure(tight_layout=True) else: f=plt.gcf() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if ptitle is not None: if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) plt.title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) # plot the sublines (smaller category lines) if subline: slval=hs.getfieldvals(newexp,subline) prevval=slval[0] for idx,cval in enumerate(slval): if cval!=prevval: xpos=idx-0.5 plt.plot([xpos,xpos],[-0.5,np.size(ldat,0)-0.5],'w:') prevval=cval if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=xlabelrotation,ha='right') for cx in linepos: plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=linewidth) plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'w:',linewidth=linewidth) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) # f.tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)-0.5) if fixfont: fontProperties = {'family':'monospace'} ax.set_yticklabels(ax.get_yticks(), fontProperties) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=plt.colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui # from PyQt4 import QtGui # app = QtGui.QApplication(sys.argv) guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) from heatsequer.plots import plotwingui guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') ax.plot_labelsize=fontsize if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) if showhline: if newexp.hlines: for cpos in newexp.hlines: plt.plot([0,np.shape(newexp.data)[1]],[cpos-0.5,cpos-0.5],'g') plt.show() if showtaxnames: showtaxonomies(newexp,ax,showdb=False,showcontam=False) # if usegui: # app.exec_() return newexp,ax
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False): """ Plot an experiment input: exp - from load() sortby - name of mapping file field to sort by or Flase to not sort numeric - True if the field is numeric minreads - minimum number of reads per bacteria in order to show it or 0 to show all rangeall - True to show all frequencies in image scale, false to saturate at 10% seqdb - the SRBactDB database (from bactdb.load) cdb - the cool sequences database (from cooldb.load) showline - if True plot lines between category values ontofig - name of ontology to plot for bactdb or false to no plot usegui - True use a gui for otu summary, False just print showxall - True to show all sample names when not sorting, False to show no more than 10 showcolorbar - True to plot the colorbar. False to not plot ptitle - name of the figure or False to show processing history as name lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads showxlabel : bool True to show the x label (default), False to hide it colormap : string or False name of colormap or False (default) to use mpl default colormap colorrange : [min,max] or False [min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall output: newexp - the plotted experiment (sorted and filtered) ax - the plot axis """ hs.Debug(1,"Plot experiment %s" % exp.studyname) hs.Debug(1,"Commands:") for ccommand in exp.commands: hs.Debug(1,"%s" % ccommand) vals=[] if sortby: hs.Debug(1,"Sorting by field %s" % sortby) for csamp in exp.samples: vals.append(exp.smap[csamp][sortby]) if numeric: hs.Debug(1,"(numeric sort)") vals=hs.tofloat(vals) svals,sidx=hs.isort(vals) newexp=hs.reordersamples(exp,sidx) else: hs.Debug(1,"No sample sorting") svals=hs.getfieldvals(exp,'#SampleID') newexp=hs.copyexp(exp) hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs)) if minreads>0: newexp=hs.filterminreads(newexp,minreads,logit=uselog) hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs)) newexp.seqdb=seqdb newexp.cdb=cdb # ldat=ldat[:,sidx] ldat=newexp.data if uselog: hs.Debug(1,"Using log, cutoff at %f" % lowcutoff) ldat[np.where(ldat<lowcutoff)]=lowcutoff ldat=np.log2(ldat) oldparams=plt.rcParams mpl.rc('keymap',back='c, backspace') mpl.rc('keymap',forward='v') mpl.rc('keymap',all_axes='A') f=figure() # set the colormap to default if not supplied if not colormap: colormap=plt.rcParams['image.cmap'] # plot the image if colorrange: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap)) elif rangeall: hs.Debug(1,"colormap range is all") iax=imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap)) else: hs.Debug(1,"colormap range is 0,10") iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap)) if not ptitle: hs.Debug(1,"Showing filters in title") if (len(newexp.filters))>4: cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]] else: cfilters=newexp.filters cfilters=hs.clipstrings(cfilters,30) ptitle='\n'.join(cfilters) title(ptitle,fontsize=10) ax=iax.get_axes() ax.autoscale(False) if showline: hs.Debug(1,"Showing lines") labs=[] labpos=[] linepos=[] minpos=0 svals.append('end') for idx,cval in enumerate(svals[:-1]): if cval==svals[idx+1]: continue labpos.append(minpos-0.5+float(idx+1-minpos)/2) minpos=idx+1 linepos.append(idx+0.5) labs.append(cval) hs.Debug(1,"number of lines is %d" % len(linepos)) if showxlabel: ax.set_xticks(labpos) ax.set_xticklabels(labs,rotation=45,ha='right') for cx in linepos: plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=2) else: hs.Debug(1,"Not showing lines") if showxall or len(newexp.samples)<=10: hs.Debug(1,"less than 10 samples, showing all sample names") ax.set_xticklabels(svals,rotation=90) ax.set_xticks(range(len(newexp.samples))) tight_layout() ax.set_ylim(-0.5,np.size(ldat,0)+0.5) if showcolorbar: hs.Debug(1,"Showing colorbar") cb=colorbar(ticks=list(np.log2([2,10,100,500,1000]))) cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%']) # create the plot ax.expdat=newexp ax.lastselect=-1 ax.sampline='' ax.ofig=f ax.labelson=False ax.labelnames=[] f.canvas.mpl_connect('button_press_event', onplotmouseclick) f.canvas.mpl_connect('key_press_event', onplotkeyclick) # show() plt.rcParams=oldparams # if want the ontology analysis for a given category: if ontofig: hs.Debug(1,"Ontofig is set") newexp.ontofigname=ontofig else: newexp.ontofigname=False # if we want gui, open it if usegui: hs.Debug(1,"Using the GUI window") import heatsequer.plots.plotwingui guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp) # from heatsequer.plots import plotwingui # guiwin = plotwingui.PlotGUIWindow(newexp) ax.guiwin=guiwin guiwin.plotfig=f guiwin.plotax=ax guiwin.show() else: ax.guiwin=False hs.Debug(7,'Not using gui') if newexp.plotmetadata: hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata)) for cmet in newexp.plotmetadata: addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4]) show() return newexp,ax