Example #1
0
def samplemeanpervalue(expdat,field):
	"""
	create a new experiment, with 1 sample per value in field, containing the mean of all samples with that value

	input:
	expdat : Experiment
	field : string
		the field to use (i.e. 'ENV_MATTER')

	output:
	newexp : Experiment
		The new experiment with 1 sample per unique value of field
	"""
	params=locals()

	uvals=hs.getfieldvals(expdat,field,ounique=True)
	vals=hs.getfieldvals(expdat,field,ounique=False)

	vdict=hs.listtodict(vals)
	nsamps=[]
	for cval in uvals:
		nsamps.append(vdict[cval][0])
	newexp=hs.reordersamples(expdat,nsamps)
	for idx,cval in enumerate(uvals):
		cdat=expdat.data[:,vdict[cval]]
		mv=np.mean(cdat,axis=1)
		newexp.data[:,idx]=mv
	newexp.filters.append('samplemeanpervalue for field %s' % field)
	hs.addcommand(newexp,"samplemeanpervalue",params=params,replaceparams={'expdat':expdat})
	return(newexp)
Example #2
0
def clustersamples(exp,minreads=0):
	"""
	cluster samples in an experiment according to similar behavior
	input:
	exp :Experiment
	minreads : int
		the minimal original number of reads per sample to keep it
	output:
	newexp : Experiment
		the filtered and clustered experiment
	"""
	params=locals()

	newexp=hs.filterorigreads(exp,minreads)
	# normalize each row (bacteria) to sum 1
	dat=copy.copy(newexp.data)
	dat=np.transpose(dat)
	dat[dat<=2]=2
	dat=np.log2(dat)
	# cluster
	dm=spatial.distance.pdist(dat,metric='braycurtis')
	ll=cluster.hierarchy.single(dm)
	order=cluster.hierarchy.leaves_list(ll)

	newexp=hs.reordersamples(newexp,order)
	hs.addcommand(newexp,"clustersamples",params=params,replaceparams={'exp':exp})
	newexp.filters.append("cluster samples minreads=%d" % minreads)
	return newexp
Example #3
0
def sortsamples(exp,field,numeric=False,logit=True):
	"""
	sort samples according to field
	input:
	exp : Experiment
	field : string
		name of the field to sort by
	numeric : bool
		True for numeric values in field, false for text
	output:
	newexp : Experiment
		the sorted experiment
	"""
	params=locals()

	fvals=hs.getfieldvals(exp,field)
	if numeric:
		fvals=hs.tofloat(fvals)
	svals,sidx=hs.isort(fvals)
	newexp=hs.reordersamples(exp,sidx)

	if logit:
		hs.addcommand(newexp,"sortsamples",params=params,replaceparams={'exp':exp})
		newexp.filters.append('sorted samples by field %s' % field)
	return newexp
Example #4
0
def filtersamples(expdat,field,filtval,exact=True,exclude=False,numexpression=False,shownumoutput=True):
	"""
	filter samples in experiment according to value in field
	input:
	exp : Experiment
	field : string
		name of the field to filter by
	filtval : string or list of strings
		the string to filter (if a list of strings, filter if any in the list)
	exact : bool
		True for exact match, False for substring
	exclude : bool
		False to keep only matching samples, True to exclude matching samples
	numexpression : bool
		True if val is a python expression, False if just a value. For an expression assume value is the beggining of the line (i.e. '<=5')
	shownumoutput : bool
		True (default) to show number of samples remaining, False to not show
	"""
	params=locals()
	if not isinstance(filtval,list):
		filtval=[filtval]

	keep=[]
	filt=''
	for cidx,csamp in enumerate(expdat.samples):
		keepit=False
		for filt in filtval:
			if numexpression:
				cval=expdat.smap[csamp][field]
				if len(cval)==0:
					continue
				if eval(cval+filt):
					keepit=True
			elif exact:
				if expdat.smap[csamp][field]==filt:
					keepit=True
			else:
				if filt in expdat.smap[csamp][field]:
					keepit=True
			# if exclude reverse the decision
		if exclude:
			keepit=not keepit
		if keepit:
			keep.append(cidx)
	newexp=hs.reordersamples(expdat,keep)
	fstr="filter data %s in %s " % (filt,field)
	if exact:
		fstr=fstr+"(exact)"
	else:
		fstr=fstr+"(substr)"
	if exclude:
		fstr+=" (exclude)"
	newexp.filters.append(fstr)
	hs.addcommand(newexp,"filtersamples",params=params,replaceparams={'expdat':expdat})
	if shownumoutput:
		hs.Debug(6,'%d Samples left' % len(newexp.samples))
	else:
		hs.Debug(1,'%d Samples left' % len(newexp.samples))
	return newexp
Example #5
0
	def view(self):
		cexp=self.cexp
		allsamp=np.arange(len(cexp.samples))
		allbact=np.arange(len(cexp.seqs))

		x=np.setdiff1d(allsamp,self.samples)
		sampo=np.concatenate((self.samples,x))
		ubact=[]
		for cseq in self.seqs:
			ubact.append(cexp.seqdict[cseq])
		bacto=np.concatenate((ubact,np.setdiff1d(allbact,ubact)))

		newexp=hs.reorderbacteria(cexp,bacto)
		newexp=hs.reordersamples(newexp,sampo,inplace=True)
		hs.plotexp(newexp,seqdb=self.bactdb,sortby=False,numeric=False,usegui=True,cdb=self.cooldb,showline=False)
Example #6
0
def filterorigreads(expdat,minreads,inplace=False):
	"""
	filter away all samples that contained originally (before normalization) less than minreads
	input:
	expdat : Experiment
	minreads : float
		the minimum number of reads of the sample in the biom table to filter if less (usually int)
	inplace : Bool
		True to replace current experiment, False (default) to create a new one
	output:
	newexp : Experiment
		the filtered experiment
	"""
	params=locals()

	numreads=np.array(expdat.origreads)
	keep=np.where(numreads>=minreads)
	newexp=hs.reordersamples(expdat,keep[0])
	hs.Debug(6,'%d Samples left' % len(newexp.samples))
	hs.addcommand(newexp,"filterorigreads",params=params,replaceparams={'expdat':expdat})
	return newexp
Example #7
0
def filtersamplesfromfile(expdat,filename,field='#SampleID',exclude=False):
	"""
	filter samples based on a list in a file (one line per sample)
	input:
	expdat
	filename - name of the text file (1 line per sample)
	field - field for the experiment that the file contains
	exclude - true to throw away instead of keep
	output:
	newexp - the filtered experiment
	"""
	params=locals()

	fl=open(filename,'rU')
	vals=[]
	for cline in fl:
		cline=cline.strip()
		vals.append(cline)
	fl.close()

	keep=[]
	for cidx,csamp in enumerate(expdat.samples):
		keepit=False
		if expdat.smap[csamp][field] in vals:
			hs.Debug(3,'found file value %s in sample %s' % (expdat.smap[csamp][field],csamp))
			keepit=True
		# if exclude reverse the decision
		if exclude:
			keepit=not keepit
		if keepit:
			keep.append(cidx)
	newexp=hs.reordersamples(expdat,keep)

	fstr="filter data from file %s in %s " % (filename,field)
	if exclude:
		fstr+=" (exclude)"
	newexp.filters.append(fstr)
	hs.addcommand(newexp,"filtersamplesfromfile",params=params,replaceparams={'expdat':expdat})
	hs.Debug(6,'%d Samples left' % len(newexp.samples))
	return newexp
Example #8
0
def load(tablename, mapname='map.txt', taxfile='', nameisseq=True,studyname=False,tabletype='biom',normalize=True,addsname='',keepzero=False,removefrom=False,removenum=1,mapsampletolowercase=False,sortit=True,useseqnamefortax=True,rawreads=False,usesparse=False):
	"""
	Load an experiment - a biom table and a mapping file
	input:
	tablename - the name of the biom table file
	mapname - name of the mapping file
	taxfile - empty ('') to load taxonomy from biom table, non-empty to load
	from rdp output file (web)
	nameisseq - False to keep otu name as sid without hashing it, True to treat otuid as sequence
	addsname - a string to add to each table sample name (or empty to not add)
	studyname - Flase to assign from table file name, otherwise string to store as study name
	tabletype:
		'biom' - a biom table
		'meta' - a metabolomics table (row per sample, col per metabolite, can contain duplicate metaboliteids)
	normalize - True to normalize to 10k reads per sample, False to not normalize (change to mean 10k reads/sample)
	keepzero : bool
		True (default) to keep samples with 0 reads, False to throw away
	removefrom : string
		if non empty - cut table sample name after (and including) the first occurance of removefrom
	mapsampletolowercase : bool
		True to convert the mapping file sample id to lower case (for EMP data). default=False
	sortit : bool
		True (default) to sort sequences by taxonomy, False to not sort
	useseqnamefortax : bool
		True (default) to use the sequence as taxonomy if no taxonomy supplied, False to use 'unknown'
	rawreads : bool
		True in combination with normalize=False - do not modify read count to mean 10k
	usesparse : book
		True to use sparse matrix representation, False to use non-sparse (default)

	output:
	an experiment class for the current experiment
	"""

	params=locals()

	# load the table
	if tabletype=='biom':
		hs.Debug(6,'Loading biom table')
		table=biom.load_table(tablename)
	elif tabletype=='meta':
		hs.Debug(6,'Loading metabolite table')
		table=loadmetabuckettable(tablename)
	else:
		hs.Debug(9,'Table type %s not supported' % tabletype)
		return False

	datamd5g=hashlib.md5()
	datamd5g.update(table.matrix_data.todense().A.view(np.uint8))
	datamd5=datamd5g.hexdigest()
	print(datamd5)
	# if need to cut table sample names
	if removefrom:
		idtable={}
		foundids={}
		ids=table.ids(axis='sample')
		if len(set(ids))!=len(ids):
			hs.Debug(8,'non unique ids identified')
		for cid in ids:
			if removefrom in cid:
				fpos=hs.findn(cid,removefrom,removenum)
				if fpos==-1:
					hs.Debug(6,'Did not find enough %s in %s' % (removefrom,cid))
					tid=cid
				else:
					tid=cid[:fpos]
			else:
				hs.Debug(6,'%s not found in sample name %s (removefrom)' % (removefrom,cid))
				tid=cid
			if tid in foundids:
				hs.Debug(6,'already have id %s' % cid)
				foundids[tid]+=1
				idtable[cid]=tid+'-rep-'+str(foundids[tid])
				print(idtable[cid])
			else:
				foundids[tid]=1
				idtable[cid]=tid
		hs.Debug(6,'found %d keys %d values' % (len(set(idtable.keys())),len(set(idtable.values()))))
		table=table.update_ids(idtable,axis='sample')

	# if need to add constant string to sample names in table
	if addsname!='':
		idtable={}
		ids=table.ids(axis='sample')
		for cid in ids:
			idtable[cid]=addsname+cid
		table=table.update_ids(idtable,axis='sample')


	smap = {}
	mapsamples = []
	mapmd5=''
	if mapname:
		# if mapping file supplied, load it
		mapsamples,smap,fields,mapmd5=loadmap(mapname,mapsampletolowercase=mapsampletolowercase)
	else:
		# no mapping file, so just create the #SampleID field
		hs.Debug(6,'No mapping file supplied - using just sample names')
		tablesamples = table.ids(axis='sample')
		for cid in tablesamples:
			smap[cid]={'#SampleID':cid}
			mapsamples.append(cid)
		fields=['#SampleID']
		mapmd5=''

	# remove table samples not in mapping file
	tablesamples = table.ids(axis='sample')
	hs.Debug(6,'number of samples in table is %d' % len(tablesamples))
	removelist=[]
	for cid in tablesamples:
		if cid not in mapsamples:
			removelist.append(cid)
			hs.Debug(6,'Table sample %s not found in mapping file' % cid)
	hs.Debug(6,'removing %s samples' % len(removelist))
	if len(removelist)>0:
		table=table.filter(removelist,axis='sample',invert=True)

	tablesamples = table.ids(axis='sample')
	hs.Debug(6,'deleted. number of samples in table is now %d' % len(tablesamples))

	# remove samples not in table from mapping file
	removemap=[]
	addlist=[]
	for idx,cmap in enumerate(mapsamples):
		if cmap not in tablesamples:
			hs.Debug(2,'Map sample %s not in table' % cmap)
			if not keepzero:
				removemap.append(idx)
				try:
					del smap[cmap]
				except:
					hs.Debug(8,'Duplicate SampleID %s in mapping file' % cmap)
			else:
				addlist.append(cmap)
	if len(removemap)>0:
		hs.Debug(7,'removing %d samples from mapping file' % len(removemap))
		mapsamples=hs.delete(mapsamples,removemap)
	hs.Debug(7,'number of samples in mapping file is now %d' % len(mapsamples))

	# get info about the sequences
	tableseqs = table.ids(axis='observation')
	sids = []
	tax = []
	osnames=[]
	for cid in tableseqs:
		# get the original sample name
		osnames.append(cid)
		# get the sid (hash )
		if nameisseq:
			sids.append(hs.hashseq(cid))
		else:
			sids.append(cid)
		# get the taxonomy string
		ctax=gettaxfromtable(table,cid,useseqname=useseqnamefortax)
		tax.append(ctax)

	if not studyname:
		studyname=os.path.basename(tablename)

	exp=hs.Experiment()
	exp.datatype=tabletype
	if usesparse:
		exp.data=scipy.sparse.dok_matrix(table.matrix_data)
	else:
		exp.data=table.matrix_data.todense().A
	# check if need to add the 0 read samples to the data
	if len(addlist)>0:
		tablesamples=list(tablesamples)
		tablesamples=tablesamples+addlist
		exp.data=np.hstack([exp.data,np.zeros([np.shape(exp.data)[0],len(addlist)])])
	exp.smap=smap
	exp.samples=tablesamples
	exp.seqs=tableseqs
	for idx,cseq in enumerate(exp.seqs):
		exp.seqdict[cseq]=idx
	exp.sids=sids
	exp.origotunames=osnames
	exp.tax=tax
	exp.tablefilename=tablename
	exp.studyname=studyname
	exp.mapfilename=tablename
	exp.filters = [tablename]
	exp.fields = fields
	exp.datamd5 = datamd5
	exp.mapmd5 = mapmd5
	colsum=np.sum(exp.data,axis=0,keepdims=False)
	exp.origreads=list(colsum)
	# add the original number of reads as a field to the experiment
	exp.fields.append('origReads')
	for idx,csamp in enumerate(exp.samples):
		exp.smap[csamp]['origReads']=str(exp.origreads[idx])

	# normalize samples to 10k reads per samples
	colsum=np.sum(exp.data,axis=0,keepdims=True)
	okreads=np.where(colsum>0)
	if np.size(colsum)-np.size(okreads[1])>0:
		print("Samples with 0 reads: %d" % (np.size(colsum)-np.size(okreads[1])))
		if not keepzero:
			exp=hs.reordersamples(exp,okreads[1])
		colsum=np.sum(exp.data,axis=0,keepdims=True)
	if tabletype=='meta':
		normalize=False

	if normalize:
		exp.data=10000*exp.data/colsum
	else:
		if not rawreads:
			exp.data=10000*exp.data/np.mean(colsum)

	exp.uniqueid=exp.getexperimentid()
	if sortit:
		exp=hs.sortbacteria(exp,logit=False)
	hs.addcommand(exp,"load",params=params)
	exp.filters.append('loaded table=%s, map=%s' % (tablename,mapname))
	return(exp)
Example #9
0
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False,linewidth=2,subline='',showhline=True,newfig=True,fixfont=False,fontsize=None,nosort=False,zeroisnone=False,xlabelrotation=45,showtaxnames=False):
	"""
	Plot an experiment
	input:
	exp - from load()
	sortby - name of mapping file field to sort by or Flase to not sort
	numeric - True if the field is numeric
	minreads - minimum number of reads per bacteria in order to show it or 0 to show all
	rangeall - True to show all frequencies in image scale, false to saturate at 10%
	seqdb - the SRBactDB database (from bactdb.load)
	cdb - the cool sequences database (from cooldb.load), or None (default) to use the heatsequer loaded cdb
	showline - if True plot lines between category values
	ontofig - name of ontology to plot for bactdb or false to no plot
	usegui - True use a gui for otu summary, False just print
	showxall - True to show all sample names when not sorting, False to show no more than 10
	showcolorbar - True to plot the colorbar. False to not plot
	ptitle : str (optional)
		'' to show o show processing history as name, None to not show title, or str of name of the figure
	lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads
	showxlabel : bool
		True to show the x label (default), False to hide it
	colormap : string or False
		name of colormap or False (default) to use mpl default colormap
	colorrange : [min,max] or False
		[min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall
	subline : str
		Name of category for subline plotting or '' (Default) for no sublines
	showhline : bool
		True (default) to plot the horizontal lines listed in exp.hlines. False to not plot them
	newfig : bool
		True (default) to open figure in new window, False to use current
	fixfont : bool (optional)
		False (default) to use fixedfont, True to use fixed width font
	fontsize : int or None (optional)
		None (default) to use default font size, number to use that font size
	nosort : bool (optional)
		False (default) to sort by the sort field, True to skip the sorting
	zeroisnone : bool (optional)
		False (default) to plot zeros as 0, True to assign None (white color)
	xlabelrotation : int (optional)
		the rotation of the xtick labels
	showtaxnames : book (optional)
		False (default) to not show tax names (need to press 'h' to show)
		True to show the taxonomy names

	output:
	newexp - the plotted experiment (sorted and filtered)
	ax - the plot axis
	"""

	hs.Debug(1,"Plot experiment %s" % exp.studyname)
	hs.Debug(1,"Commands:")
	for ccommand in exp.commands:
		hs.Debug(1,"%s" % ccommand)

	if exp.sparse:
		hs.Debug(9,'Sparse matrix - converting to dense')
		exp=hs.copyexp(exp,todense=True)

	vals=[]
	if cdb is None:
		cdb=hs.cdb
	if seqdb is None:
		seqdb=hs.bdb
	if sortby:
		if not nosort:
			hs.Debug(1,"Sorting by field %s" % sortby)
			for csamp in exp.samples:
				vals.append(exp.smap[csamp][sortby])
			if numeric:
				hs.Debug(1,"(numeric sort)")
				vals=hs.tofloat(vals)
			svals,sidx=hs.isort(vals)
			newexp=hs.reordersamples(exp,sidx)
		else:
			hs.Debug(1,"no sorting but showing columns")
			svals=hs.getfieldvals(exp,sortby)
			newexp=hs.copyexp(exp)
	else:
		hs.Debug(1,"No sample sorting")
		svals=hs.getfieldvals(exp,'#SampleID')
		newexp=hs.copyexp(exp)
	hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs))
	if minreads>0:
		newexp=hs.filterminreads(newexp,minreads,logit=uselog)
	hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs))
	newexp.seqdb=seqdb
	newexp.cdb=cdb
	newexp.scdb=hs.scdb

	# if usegui:
	# 	hs.Debug(1,"Using the GUI window")
	# 	import heatsequer.plots.plotwingui
	# 	from PyQt4 import QtGui

	# 	app = QtGui.QApplication(sys.argv)
	# 	guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)

#	ldat=ldat[:,sidx]
	ldat=newexp.data
	if zeroisnone:
		ldat[ldat==0]=None
	if uselog:
		hs.Debug(1,"Using log, cutoff at %f" % lowcutoff)
		ldat[np.where(ldat<lowcutoff)]=lowcutoff
		ldat=np.log2(ldat)
	oldparams=plt.rcParams
	mpl.rc('keymap',back='c, backspace')
	mpl.rc('keymap',forward='v')
	mpl.rc('keymap',all_axes='A')
	if newfig:
		f=plt.figure(tight_layout=True)
	else:
		f=plt.gcf()
	# set the colormap to default if not supplied
	if not colormap:
		colormap=plt.rcParams['image.cmap']
	# plot the image
	if colorrange:
		hs.Debug(1,"colormap range is 0,10")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap))
	elif rangeall:
		hs.Debug(1,"colormap range is all")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap))
	else:
		hs.Debug(1,"colormap range is 0,10")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap))

	if ptitle is not None:
		if not ptitle:
			hs.Debug(1,"Showing filters in title")
			if (len(newexp.filters))>4:
				cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]]
			else:
				cfilters=newexp.filters
			cfilters=hs.clipstrings(cfilters,30)
			ptitle='\n'.join(cfilters)
		plt.title(ptitle,fontsize=10)

	ax=iax.get_axes()
	ax.autoscale(False)

	# plot the sublines (smaller category lines)
	if subline:
		slval=hs.getfieldvals(newexp,subline)
		prevval=slval[0]
		for idx,cval in enumerate(slval):
			if cval!=prevval:
				xpos=idx-0.5
				plt.plot([xpos,xpos],[-0.5,np.size(ldat,0)-0.5],'w:')
				prevval=cval

	if showline:
		hs.Debug(1,"Showing lines")
		labs=[]
		labpos=[]
		linepos=[]
		minpos=0
		svals.append('end')
		for idx,cval in enumerate(svals[:-1]):
			if cval==svals[idx+1]:
				continue
			labpos.append(minpos-0.5+float(idx+1-minpos)/2)
			minpos=idx+1
			linepos.append(idx+0.5)
			labs.append(cval)
		hs.Debug(1,"number of lines is %d" % len(linepos))
		if showxlabel:
			ax.set_xticks(labpos)
			ax.set_xticklabels(labs,rotation=xlabelrotation,ha='right')
		for cx in linepos:
			plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=linewidth)
			plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'w:',linewidth=linewidth)
	else:
		hs.Debug(1,"Not showing lines")
		if showxall or len(newexp.samples)<=10:
			hs.Debug(1,"less than 10 samples, showing all sample names")
			ax.set_xticklabels(svals,rotation=90)
			ax.set_xticks(range(len(newexp.samples)))
	# f.tight_layout()
	ax.set_ylim(-0.5,np.size(ldat,0)-0.5)

	if fixfont:
		fontProperties = {'family':'monospace'}
		ax.set_yticklabels(ax.get_yticks(), fontProperties)

	if showcolorbar:
		hs.Debug(1,"Showing colorbar")
		cb=plt.colorbar(ticks=list(np.log2([2,10,100,500,1000])))
		cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%'])

	# create the plot
	ax.expdat=newexp
	ax.lastselect=-1
	ax.sampline=''
	ax.ofig=f
	ax.labelson=False
	ax.labelnames=[]
	f.canvas.mpl_connect('button_press_event', onplotmouseclick)
	f.canvas.mpl_connect('key_press_event', onplotkeyclick)
#	show()
	plt.rcParams=oldparams

	# if want the ontology analysis for a given category:
	if ontofig:
		hs.Debug(1,"Ontofig is set")
		newexp.ontofigname=ontofig
	else:
		newexp.ontofigname=False

	# if we want gui, open it
	if usegui:
		hs.Debug(1,"Using the GUI window")
		import heatsequer.plots.plotwingui
#		from PyQt4 import QtGui

#		app = QtGui.QApplication(sys.argv)
		guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)
		from heatsequer.plots import plotwingui
		guiwin = plotwingui.PlotGUIWindow(newexp)
		ax.guiwin=guiwin
		guiwin.plotfig=f
		guiwin.plotax=ax
		guiwin.show()
	else:
		ax.guiwin=False
		hs.Debug(7,'Not using gui')

	ax.plot_labelsize=fontsize
	if newexp.plotmetadata:
		hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata))
		for cmet in newexp.plotmetadata:
			addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4])
	if showhline:
		if newexp.hlines:
			for cpos in newexp.hlines:
				plt.plot([0,np.shape(newexp.data)[1]],[cpos-0.5,cpos-0.5],'g')
	plt.show()
	if showtaxnames:
		showtaxonomies(newexp,ax,showdb=False,showcontam=False)

#	if usegui:
#		app.exec_()

	return newexp,ax
Example #10
0
def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False):
	"""
	Plot an experiment
	input:
	exp - from load()
	sortby - name of mapping file field to sort by or Flase to not sort
	numeric - True if the field is numeric
	minreads - minimum number of reads per bacteria in order to show it or 0 to show all
	rangeall - True to show all frequencies in image scale, false to saturate at 10%
	seqdb - the SRBactDB database (from bactdb.load)
	cdb - the cool sequences database (from cooldb.load)
	showline - if True plot lines between category values
	ontofig - name of ontology to plot for bactdb or false to no plot
	usegui - True use a gui for otu summary, False just print
	showxall - True to show all sample names when not sorting, False to show no more than 10
	showcolorbar - True to plot the colorbar. False to not plot
	ptitle - name of the figure or False to show processing history as name
	lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads
	showxlabel : bool
		True to show the x label (default), False to hide it
	colormap : string or False
		name of colormap or False (default) to use mpl default colormap
	colorrange : [min,max] or False
		[min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall

	output:
	newexp - the plotted experiment (sorted and filtered)
	ax - the plot axis
	"""

	hs.Debug(1,"Plot experiment %s" % exp.studyname)
	hs.Debug(1,"Commands:")
	for ccommand in exp.commands:
		hs.Debug(1,"%s" % ccommand)
	vals=[]
	if sortby:
		hs.Debug(1,"Sorting by field %s" % sortby)
		for csamp in exp.samples:
			vals.append(exp.smap[csamp][sortby])
		if numeric:
			hs.Debug(1,"(numeric sort)")
			vals=hs.tofloat(vals)
		svals,sidx=hs.isort(vals)
		newexp=hs.reordersamples(exp,sidx)
	else:
		hs.Debug(1,"No sample sorting")
		svals=hs.getfieldvals(exp,'#SampleID')
		newexp=hs.copyexp(exp)
	hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs))
	if minreads>0:
		newexp=hs.filterminreads(newexp,minreads,logit=uselog)
	hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs))
	newexp.seqdb=seqdb
	newexp.cdb=cdb

#	ldat=ldat[:,sidx]
	ldat=newexp.data
	if uselog:
		hs.Debug(1,"Using log, cutoff at %f" % lowcutoff)
		ldat[np.where(ldat<lowcutoff)]=lowcutoff
		ldat=np.log2(ldat)
	oldparams=plt.rcParams
	mpl.rc('keymap',back='c, backspace')
	mpl.rc('keymap',forward='v')
	mpl.rc('keymap',all_axes='A')
	f=figure()
	# set the colormap to default if not supplied
	if not colormap:
		colormap=plt.rcParams['image.cmap']
	# plot the image
	if colorrange:
		hs.Debug(1,"colormap range is 0,10")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap))
	elif rangeall:
		hs.Debug(1,"colormap range is all")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap))
	else:
		hs.Debug(1,"colormap range is 0,10")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap))

	if not ptitle:
		hs.Debug(1,"Showing filters in title")
		if (len(newexp.filters))>4:
			cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]]
		else:
			cfilters=newexp.filters
		cfilters=hs.clipstrings(cfilters,30)
		ptitle='\n'.join(cfilters)
	title(ptitle,fontsize=10)

	ax=iax.get_axes()
	ax.autoscale(False)
	if showline:
		hs.Debug(1,"Showing lines")
		labs=[]
		labpos=[]
		linepos=[]
		minpos=0
		svals.append('end')
		for idx,cval in enumerate(svals[:-1]):
			if cval==svals[idx+1]:
				continue
			labpos.append(minpos-0.5+float(idx+1-minpos)/2)
			minpos=idx+1
			linepos.append(idx+0.5)
			labs.append(cval)
		hs.Debug(1,"number of lines is %d" % len(linepos))
		if showxlabel:
			ax.set_xticks(labpos)
			ax.set_xticklabels(labs,rotation=45,ha='right')
		for cx in linepos:
			plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=2)
	else:
		hs.Debug(1,"Not showing lines")
		if showxall or len(newexp.samples)<=10:
			hs.Debug(1,"less than 10 samples, showing all sample names")
			ax.set_xticklabels(svals,rotation=90)
			ax.set_xticks(range(len(newexp.samples)))
	tight_layout()
	ax.set_ylim(-0.5,np.size(ldat,0)+0.5)

	if showcolorbar:
		hs.Debug(1,"Showing colorbar")
		cb=colorbar(ticks=list(np.log2([2,10,100,500,1000])))
		cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%'])

	# create the plot
	ax.expdat=newexp
	ax.lastselect=-1
	ax.sampline=''
	ax.ofig=f
	ax.labelson=False
	ax.labelnames=[]
	f.canvas.mpl_connect('button_press_event', onplotmouseclick)
	f.canvas.mpl_connect('key_press_event', onplotkeyclick)
#	show()
	plt.rcParams=oldparams

	# if want the ontology analysis for a given category:
	if ontofig:
		hs.Debug(1,"Ontofig is set")
		newexp.ontofigname=ontofig
	else:
		newexp.ontofigname=False

	# if we want gui, open it
	if usegui:
		hs.Debug(1,"Using the GUI window")
		import heatsequer.plots.plotwingui
		guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)
#		from heatsequer.plots import plotwingui
#		guiwin = plotwingui.PlotGUIWindow(newexp)
		ax.guiwin=guiwin
		guiwin.plotfig=f
		guiwin.plotax=ax
		guiwin.show()
	else:
		ax.guiwin=False
		hs.Debug(7,'Not using gui')

	if newexp.plotmetadata:
		hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata))
		for cmet in newexp.plotmetadata:
			addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4])
	show()
	return newexp,ax
Example #11
0
def filtersimilarsamples(expdat,field,method='mean'):
	"""
	join similar samples into one sample (i.e. to remove samples of same individual)
	input:
	expdat : Experiment
	field : string
		Name of the field containing the values (for which similar values will be joined)
	method : string
		What to do with samples with similar value. options:
		'mean' - replace with a sample containing the mean of the samples
		'median'- replace with a sample containing the median of the samples
		'random' - replace with a single random sample out of these samples
		'sum' - replace with sum of original reads in all samples, renormalized after to 10k and orignumreads updated
		'fracpres' - replace with fraction of samples where the bacteria is present
	output:
	newexp : Experiment
		like the input experiment but only one sample per unique value in field
	"""
	params=locals()

	newexp=hs.copyexp(expdat)
	if method=='sum':
		newexp=hs.toorigreads(newexp)
	uvals=hs.getfieldvals(expdat,field,ounique=True)
	keep=[]
	for cval in uvals:
		cpos=hs.findsamples(expdat,field,cval)
		if len(cpos)==1:
			keep.append(cpos[0])
			continue
		if method=='random':
			keep.append(cpos[np.random.randint(len(cpos))])
			continue
		# set the mapping file values
		cmap=expdat.smap[expdat.samples[cpos[0]]]
		for ccpos in cpos[1:]:
			for cfield in cmap.keys():
				if cmap[cfield]!=expdat.smap[expdat.samples[ccpos]][cfield]:
					cmap[cfield]='NA'
		if method=='mean':
			cval=np.mean(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='median':
			cval=np.median(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='sum':
			cval=np.sum(newexp.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			newexp.origreads[cpos[0]]=np.sum(hs.reorder(expdat.origreads,cpos))
			keep.append(cpos[0])
		elif method=='fracpres':
			cval=np.sum(expdat.data[:,cpos]>0,axis=1)
			newexp.data[:,cpos[0]]=cval/len(cpos)
			keep.append(cpos[0])
		else:
			hs.Debug(9,'method %s not supported' % method)
			return False
		newexp.smap[expdat.samples[cpos[0]]]=cmap
	newexp=hs.reordersamples(newexp,keep)
	if method=='sum':
		newexp=hs.normalizereads(newexp)
	newexp.filters.append('Filter similar samples field %s method %s' % (field,method))
	hs.addcommand(newexp,"filtersimilarsamples",params=params,replaceparams={'expdat':expdat})
	hs.Debug(6,'%d samples before filtering, %d after' % (len(expdat.samples),len(newexp.samples)))
	return newexp