Example #1
0
def normalizeprctile(expdat,percent=80):
	"""
	normalize reads per experiment so percentile (rather than mean) will be normalized
	used to reduce effect of outliers (compositionality correction)
	note normalization is done on the same set of bacteria for all samples
	input:
	expdat : Experiment
	percent : float
		the percentile to normalize (0-100)

	output:
	newexp : Experiment
		the new normalized experiment
	"""
	params=locals()

	# select the bacteria to use - don't want to include very low freq. bacteria
	newexp=hs.filterminreads(expdat,1*len(expdat.samples))

	percvals=np.percentile(newexp.data,percent,axis=0)
#	plt.figure()
#	plt.plot(percvals)
	percvals=percvals/np.mean(percvals)
	newexp=hs.copyexp(expdat)
	for idx,samp in enumerate(expdat.samples):
		newexp.data[:,idx]=newexp.data[:,idx]*percvals[idx]
	newexp.filters.append("normalize percentile %f" % percent)
	hs.addcommand(newexp,"normalizeprctile",params=params,replaceparams={'expdat':expdat})

	return newexp
Example #2
0
def clusterbacteria(exp,minreads=0,uselog=True):
	"""
	cluster bacteria in an experiment according to similar behavior
	input:
	exp : Experiment
	minreads : int
		the minimal number of reads to keep before clustering (to make faster)
	uselog : bool
		True to log transform reads for clustering (before normalizing), false to use full reads

	output:
	newexp : Experiment
		the filtered and clustered experiment
	"""
	params=locals()

	newexp=hs.filterminreads(exp,minreads,logit=False)
	# normalize each row (bacteria) to sum 1
	dat=copy.copy(newexp.data)
	if uselog:
		dat[dat<=2]=2
		dat=np.log2(dat)
	dat=scale(dat,axis=1,copy=False)
	# cluster
	dm=spatial.distance.pdist(dat,metric='euclidean')
	ll=cluster.hierarchy.single(dm)
	order=cluster.hierarchy.leaves_list(ll)

	newexp=hs.reorderbacteria(newexp,order)
	hs.addcommand(newexp,"clusterbacteria",params=params,replaceparams={'exp':exp})
	newexp.filters.append("cluster bacteria minreads=%d" % minreads)
	return newexp
Example #3
0
	def filterminreads(self):
		items=self.bMainList.selectedItems()
		if len(items)!=1:
			print("Need 1 item")
			return
		for citem in items:
			cname=str(citem.text())
			cexp=self.explist[cname]
			val,ok=QtGui.QInputDialog.getInt(self,'Filter min reads','Minimal number of reads per bacteria',10,0,10000)
			if ok:
				newexp=hs.filterminreads(cexp,minreads=val)
				newexp.studyname=newexp.studyname+'_fmr'
				self.addexp(newexp)
Example #4
0
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False,linewidth=2,subline='',showhline=True,newfig=True,fixfont=False,fontsize=None,nosort=False,zeroisnone=False,xlabelrotation=45,showtaxnames=False):
	"""
	Plot an experiment
	input:
	exp - from load()
	sortby - name of mapping file field to sort by or Flase to not sort
	numeric - True if the field is numeric
	minreads - minimum number of reads per bacteria in order to show it or 0 to show all
	rangeall - True to show all frequencies in image scale, false to saturate at 10%
	seqdb - the SRBactDB database (from bactdb.load)
	cdb - the cool sequences database (from cooldb.load), or None (default) to use the heatsequer loaded cdb
	showline - if True plot lines between category values
	ontofig - name of ontology to plot for bactdb or false to no plot
	usegui - True use a gui for otu summary, False just print
	showxall - True to show all sample names when not sorting, False to show no more than 10
	showcolorbar - True to plot the colorbar. False to not plot
	ptitle : str (optional)
		'' to show o show processing history as name, None to not show title, or str of name of the figure
	lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads
	showxlabel : bool
		True to show the x label (default), False to hide it
	colormap : string or False
		name of colormap or False (default) to use mpl default colormap
	colorrange : [min,max] or False
		[min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall
	subline : str
		Name of category for subline plotting or '' (Default) for no sublines
	showhline : bool
		True (default) to plot the horizontal lines listed in exp.hlines. False to not plot them
	newfig : bool
		True (default) to open figure in new window, False to use current
	fixfont : bool (optional)
		False (default) to use fixedfont, True to use fixed width font
	fontsize : int or None (optional)
		None (default) to use default font size, number to use that font size
	nosort : bool (optional)
		False (default) to sort by the sort field, True to skip the sorting
	zeroisnone : bool (optional)
		False (default) to plot zeros as 0, True to assign None (white color)
	xlabelrotation : int (optional)
		the rotation of the xtick labels
	showtaxnames : book (optional)
		False (default) to not show tax names (need to press 'h' to show)
		True to show the taxonomy names

	output:
	newexp - the plotted experiment (sorted and filtered)
	ax - the plot axis
	"""

	hs.Debug(1,"Plot experiment %s" % exp.studyname)
	hs.Debug(1,"Commands:")
	for ccommand in exp.commands:
		hs.Debug(1,"%s" % ccommand)

	if exp.sparse:
		hs.Debug(9,'Sparse matrix - converting to dense')
		exp=hs.copyexp(exp,todense=True)

	vals=[]
	if cdb is None:
		cdb=hs.cdb
	if seqdb is None:
		seqdb=hs.bdb
	if sortby:
		if not nosort:
			hs.Debug(1,"Sorting by field %s" % sortby)
			for csamp in exp.samples:
				vals.append(exp.smap[csamp][sortby])
			if numeric:
				hs.Debug(1,"(numeric sort)")
				vals=hs.tofloat(vals)
			svals,sidx=hs.isort(vals)
			newexp=hs.reordersamples(exp,sidx)
		else:
			hs.Debug(1,"no sorting but showing columns")
			svals=hs.getfieldvals(exp,sortby)
			newexp=hs.copyexp(exp)
	else:
		hs.Debug(1,"No sample sorting")
		svals=hs.getfieldvals(exp,'#SampleID')
		newexp=hs.copyexp(exp)
	hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs))
	if minreads>0:
		newexp=hs.filterminreads(newexp,minreads,logit=uselog)
	hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs))
	newexp.seqdb=seqdb
	newexp.cdb=cdb
	newexp.scdb=hs.scdb

	# if usegui:
	# 	hs.Debug(1,"Using the GUI window")
	# 	import heatsequer.plots.plotwingui
	# 	from PyQt4 import QtGui

	# 	app = QtGui.QApplication(sys.argv)
	# 	guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)

#	ldat=ldat[:,sidx]
	ldat=newexp.data
	if zeroisnone:
		ldat[ldat==0]=None
	if uselog:
		hs.Debug(1,"Using log, cutoff at %f" % lowcutoff)
		ldat[np.where(ldat<lowcutoff)]=lowcutoff
		ldat=np.log2(ldat)
	oldparams=plt.rcParams
	mpl.rc('keymap',back='c, backspace')
	mpl.rc('keymap',forward='v')
	mpl.rc('keymap',all_axes='A')
	if newfig:
		f=plt.figure(tight_layout=True)
	else:
		f=plt.gcf()
	# set the colormap to default if not supplied
	if not colormap:
		colormap=plt.rcParams['image.cmap']
	# plot the image
	if colorrange:
		hs.Debug(1,"colormap range is 0,10")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap))
	elif rangeall:
		hs.Debug(1,"colormap range is all")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap))
	else:
		hs.Debug(1,"colormap range is 0,10")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap))

	if ptitle is not None:
		if not ptitle:
			hs.Debug(1,"Showing filters in title")
			if (len(newexp.filters))>4:
				cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]]
			else:
				cfilters=newexp.filters
			cfilters=hs.clipstrings(cfilters,30)
			ptitle='\n'.join(cfilters)
		plt.title(ptitle,fontsize=10)

	ax=iax.get_axes()
	ax.autoscale(False)

	# plot the sublines (smaller category lines)
	if subline:
		slval=hs.getfieldvals(newexp,subline)
		prevval=slval[0]
		for idx,cval in enumerate(slval):
			if cval!=prevval:
				xpos=idx-0.5
				plt.plot([xpos,xpos],[-0.5,np.size(ldat,0)-0.5],'w:')
				prevval=cval

	if showline:
		hs.Debug(1,"Showing lines")
		labs=[]
		labpos=[]
		linepos=[]
		minpos=0
		svals.append('end')
		for idx,cval in enumerate(svals[:-1]):
			if cval==svals[idx+1]:
				continue
			labpos.append(minpos-0.5+float(idx+1-minpos)/2)
			minpos=idx+1
			linepos.append(idx+0.5)
			labs.append(cval)
		hs.Debug(1,"number of lines is %d" % len(linepos))
		if showxlabel:
			ax.set_xticks(labpos)
			ax.set_xticklabels(labs,rotation=xlabelrotation,ha='right')
		for cx in linepos:
			plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=linewidth)
			plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'w:',linewidth=linewidth)
	else:
		hs.Debug(1,"Not showing lines")
		if showxall or len(newexp.samples)<=10:
			hs.Debug(1,"less than 10 samples, showing all sample names")
			ax.set_xticklabels(svals,rotation=90)
			ax.set_xticks(range(len(newexp.samples)))
	# f.tight_layout()
	ax.set_ylim(-0.5,np.size(ldat,0)-0.5)

	if fixfont:
		fontProperties = {'family':'monospace'}
		ax.set_yticklabels(ax.get_yticks(), fontProperties)

	if showcolorbar:
		hs.Debug(1,"Showing colorbar")
		cb=plt.colorbar(ticks=list(np.log2([2,10,100,500,1000])))
		cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%'])

	# create the plot
	ax.expdat=newexp
	ax.lastselect=-1
	ax.sampline=''
	ax.ofig=f
	ax.labelson=False
	ax.labelnames=[]
	f.canvas.mpl_connect('button_press_event', onplotmouseclick)
	f.canvas.mpl_connect('key_press_event', onplotkeyclick)
#	show()
	plt.rcParams=oldparams

	# if want the ontology analysis for a given category:
	if ontofig:
		hs.Debug(1,"Ontofig is set")
		newexp.ontofigname=ontofig
	else:
		newexp.ontofigname=False

	# if we want gui, open it
	if usegui:
		hs.Debug(1,"Using the GUI window")
		import heatsequer.plots.plotwingui
#		from PyQt4 import QtGui

#		app = QtGui.QApplication(sys.argv)
		guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)
		from heatsequer.plots import plotwingui
		guiwin = plotwingui.PlotGUIWindow(newexp)
		ax.guiwin=guiwin
		guiwin.plotfig=f
		guiwin.plotax=ax
		guiwin.show()
	else:
		ax.guiwin=False
		hs.Debug(7,'Not using gui')

	ax.plot_labelsize=fontsize
	if newexp.plotmetadata:
		hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata))
		for cmet in newexp.plotmetadata:
			addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4])
	if showhline:
		if newexp.hlines:
			for cpos in newexp.hlines:
				plt.plot([0,np.shape(newexp.data)[1]],[cpos-0.5,cpos-0.5],'g')
	plt.show()
	if showtaxnames:
		showtaxonomies(newexp,ax,showdb=False,showcontam=False)

#	if usegui:
#		app.exec_()

	return newexp,ax
Example #5
0
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False):
	"""
	Plot an experiment
	input:
	exp - from load()
	sortby - name of mapping file field to sort by or Flase to not sort
	numeric - True if the field is numeric
	minreads - minimum number of reads per bacteria in order to show it or 0 to show all
	rangeall - True to show all frequencies in image scale, false to saturate at 10%
	seqdb - the SRBactDB database (from bactdb.load)
	cdb - the cool sequences database (from cooldb.load)
	showline - if True plot lines between category values
	ontofig - name of ontology to plot for bactdb or false to no plot
	usegui - True use a gui for otu summary, False just print
	showxall - True to show all sample names when not sorting, False to show no more than 10
	showcolorbar - True to plot the colorbar. False to not plot
	ptitle - name of the figure or False to show processing history as name
	lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads
	showxlabel : bool
		True to show the x label (default), False to hide it
	colormap : string or False
		name of colormap or False (default) to use mpl default colormap
	colorrange : [min,max] or False
		[min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall

	output:
	newexp - the plotted experiment (sorted and filtered)
	ax - the plot axis
	"""

	hs.Debug(1,"Plot experiment %s" % exp.studyname)
	hs.Debug(1,"Commands:")
	for ccommand in exp.commands:
		hs.Debug(1,"%s" % ccommand)
	vals=[]
	if sortby:
		hs.Debug(1,"Sorting by field %s" % sortby)
		for csamp in exp.samples:
			vals.append(exp.smap[csamp][sortby])
		if numeric:
			hs.Debug(1,"(numeric sort)")
			vals=hs.tofloat(vals)
		svals,sidx=hs.isort(vals)
		newexp=hs.reordersamples(exp,sidx)
	else:
		hs.Debug(1,"No sample sorting")
		svals=hs.getfieldvals(exp,'#SampleID')
		newexp=hs.copyexp(exp)
	hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs))
	if minreads>0:
		newexp=hs.filterminreads(newexp,minreads,logit=uselog)
	hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs))
	newexp.seqdb=seqdb
	newexp.cdb=cdb

#	ldat=ldat[:,sidx]
	ldat=newexp.data
	if uselog:
		hs.Debug(1,"Using log, cutoff at %f" % lowcutoff)
		ldat[np.where(ldat<lowcutoff)]=lowcutoff
		ldat=np.log2(ldat)
	oldparams=plt.rcParams
	mpl.rc('keymap',back='c, backspace')
	mpl.rc('keymap',forward='v')
	mpl.rc('keymap',all_axes='A')
	f=figure()
	# set the colormap to default if not supplied
	if not colormap:
		colormap=plt.rcParams['image.cmap']
	# plot the image
	if colorrange:
		hs.Debug(1,"colormap range is 0,10")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap))
	elif rangeall:
		hs.Debug(1,"colormap range is all")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap))
	else:
		hs.Debug(1,"colormap range is 0,10")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap))

	if not ptitle:
		hs.Debug(1,"Showing filters in title")
		if (len(newexp.filters))>4:
			cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]]
		else:
			cfilters=newexp.filters
		cfilters=hs.clipstrings(cfilters,30)
		ptitle='\n'.join(cfilters)
	title(ptitle,fontsize=10)

	ax=iax.get_axes()
	ax.autoscale(False)
	if showline:
		hs.Debug(1,"Showing lines")
		labs=[]
		labpos=[]
		linepos=[]
		minpos=0
		svals.append('end')
		for idx,cval in enumerate(svals[:-1]):
			if cval==svals[idx+1]:
				continue
			labpos.append(minpos-0.5+float(idx+1-minpos)/2)
			minpos=idx+1
			linepos.append(idx+0.5)
			labs.append(cval)
		hs.Debug(1,"number of lines is %d" % len(linepos))
		if showxlabel:
			ax.set_xticks(labpos)
			ax.set_xticklabels(labs,rotation=45,ha='right')
		for cx in linepos:
			plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=2)
	else:
		hs.Debug(1,"Not showing lines")
		if showxall or len(newexp.samples)<=10:
			hs.Debug(1,"less than 10 samples, showing all sample names")
			ax.set_xticklabels(svals,rotation=90)
			ax.set_xticks(range(len(newexp.samples)))
	tight_layout()
	ax.set_ylim(-0.5,np.size(ldat,0)+0.5)

	if showcolorbar:
		hs.Debug(1,"Showing colorbar")
		cb=colorbar(ticks=list(np.log2([2,10,100,500,1000])))
		cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%'])

	# create the plot
	ax.expdat=newexp
	ax.lastselect=-1
	ax.sampline=''
	ax.ofig=f
	ax.labelson=False
	ax.labelnames=[]
	f.canvas.mpl_connect('button_press_event', onplotmouseclick)
	f.canvas.mpl_connect('key_press_event', onplotkeyclick)
#	show()
	plt.rcParams=oldparams

	# if want the ontology analysis for a given category:
	if ontofig:
		hs.Debug(1,"Ontofig is set")
		newexp.ontofigname=ontofig
	else:
		newexp.ontofigname=False

	# if we want gui, open it
	if usegui:
		hs.Debug(1,"Using the GUI window")
		import heatsequer.plots.plotwingui
		guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)
#		from heatsequer.plots import plotwingui
#		guiwin = plotwingui.PlotGUIWindow(newexp)
		ax.guiwin=guiwin
		guiwin.plotfig=f
		guiwin.plotax=ax
		guiwin.show()
	else:
		ax.guiwin=False
		hs.Debug(7,'Not using gui')

	if newexp.plotmetadata:
		hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata))
		for cmet in newexp.plotmetadata:
			addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4])
	show()
	return newexp,ax