Python getfieldvalsの例、heatsequer.getfieldvals Pythonの例

コード例 #1

0

ファイルを表示

ファイル: expclass.py プロジェクト: tanaes/heatsequer

def samplemeanpervalue(expdat,field):
	"""
	create a new experiment, with 1 sample per value in field, containing the mean of all samples with that value

	input:
	expdat : Experiment
	field : string
		the field to use (i.e. 'ENV_MATTER')

	output:
	newexp : Experiment
		The new experiment with 1 sample per unique value of field
	"""
	params=locals()

	uvals=hs.getfieldvals(expdat,field,ounique=True)
	vals=hs.getfieldvals(expdat,field,ounique=False)

	vdict=hs.listtodict(vals)
	nsamps=[]
	for cval in uvals:
		nsamps.append(vdict[cval][0])
	newexp=hs.reordersamples(expdat,nsamps)
	for idx,cval in enumerate(uvals):
		cdat=expdat.data[:,vdict[cval]]
		mv=np.mean(cdat,axis=1)
		newexp.data[:,idx]=mv
	newexp.filters.append('samplemeanpervalue for field %s' % field)
	hs.addcommand(newexp,"samplemeanpervalue",params=params,replaceparams={'expdat':expdat})
	return(newexp)

コード例 #2

0

ファイルを表示

ファイル: expclass.py プロジェクト: tanaes/heatsequer

def fieldtobact(expdat,field,bactname='',meanreads=1000,cutoff=0):
	"""
	convert values in a map file field to a new bacteria (to facilitate numeric analysis)
	input:
	expdat : Experiment
	field : string
		name of the field to convert
	bactname : string
		name of the new bacteria (empty to have similar to field name)
	meanreads : int
		the mean number of reads for the new field bacteria
	cutoff : int
		the minimal value of the field per sample (otherwise replace with meanreads)

	output:
	newexp : Experiment
		with added bacteria with the field vals as reads
	"""
	params=locals()

	if len(bactname)==0:
		bactname=field
	fv=hs.getfieldvals(expdat,field)
	vals=np.array(hs.tofloat(fv))
	okpos=np.where(vals>=cutoff)[0]
	badpos=np.where(vals<cutoff)[0]
	scalefactor=np.mean(vals[okpos])
	vals[okpos]=(vals[okpos]/scalefactor)*meanreads
	vals[badpos]=meanreads
	newexp=hs.copyexp(expdat)
	hs.insertbacteria(newexp,vals,bactname,bactname,logit=False)
	newexp.filters.append('add bacteria from map field %s' % field)
	hs.addcommand(newexp,"fieldtobact",params=params,replaceparams={'expdat':expdat})
	return(newexp)

コード例 #3

0

ファイルを表示

ファイル: mislabels.py プロジェクト: tanaes/heatsequer

def findmislabels(expdat,field,distmetric='bc'):
	""""
	find mislabelled samples according to field
	input:
	expdat : Experiment
	field : string
		name of the field to examine (i.e. subjectid)
	distmetric : string
		the distance meteric to use (see calcdist)
	"""

	expdat=hs.sortsamples(expdat,field)
	fvals=hs.getfieldvals(expdat,field)
	ufvals=list(set(fvals))
	onames=[]
	for idx,csamp in enumerate(expdat.samples):
		onames.append(csamp+';'+fvals[idx])
	omat=np.zeros([len(fvals),len(ufvals)])
	for groupidx,groupval in enumerate(ufvals):
		cexp=hs.filtersamples(expdat,field,groupval,exact=True)
		for aidx,aval in enumerate(expdat.samples):
			cdist=[]
			for gidx,gval in enumerate(cexp.samples):
				# don't measure distance to ourselves
				if gval==aval:
					continue
				cdist.append(hs.calcdist(cexp.data[:,gidx],expdat.data[:,aidx],distmetric=distmetric))
			omat[aidx,groupidx]=np.mean(cdist)
	figure()
	iax=imshow(omat,interpolation='nearest',aspect='auto')
	ax=iax.get_axes()
	ax.set_xticks(range(len(ufvals)))
	ax.set_xticklabels(ufvals,rotation=90)
	ax.set_yticks(range(len(onames)))
	ax.set_yticklabels(onames)

コード例 #4

0

ファイルを表示

ファイル: hsgui.py プロジェクト: amnona/heatsequer

 def values(self):
     cfield = str(self.cField.currentText())
     val, ok = QtWidgets.QInputDialog.getItem(
         self, "Select field value", "Field=%s" % cfield, list(set(hs.getfieldvals(self.cexp, cfield)))
     )
     if ok:
         self.tValue.setText(val)

コード例 #5

0

ファイルを表示

ファイル: sorting.py プロジェクト: mortonjt/heatsequer

def sortsamples(exp,field,numeric=False,logit=True):
	"""
	sort samples according to field
	input:
	exp : Experiment
	field : string
		name of the field to sort by
	numeric : bool
		True for numeric values in field, false for text
	output:
	newexp : Experiment
		the sorted experiment
	"""
	params=locals()

	fvals=hs.getfieldvals(exp,field)
	if numeric:
		fvals=hs.tofloat(fvals)
	svals,sidx=hs.isort(fvals)
	newexp=hs.reordersamples(exp,sidx)

	if logit:
		hs.addcommand(newexp,"sortsamples",params=params,replaceparams={'exp':exp})
		newexp.filters.append('sorted samples by field %s' % field)
	return newexp

コード例 #6

0

ファイルを表示

ファイル: plotwingui.py プロジェクト: amnona/heatsequer

	def prepstudyinfo(self):
		"""
		add the study info from the mapping file if available
		"""
		fieldlist=[('SRA_Study_s','sra'),('project_name_s','name'),('experiment_title','name'),('experiment_design_description','name'),('BioProject_s','sra')]
		cexp=self.cexp
		for (cfield,infofield) in fieldlist:
			if cfield in cexp.fields:
				uvals=hs.getfieldvals(cexp,cfield,ounique=True)
				if len(uvals)==1:
					self.addentry(fromdb=False,ctype=infofield,value=uvals[0].lower(),color='black')

コード例 #7

0

ファイルを表示

ファイル: metrics.py プロジェクト: amnona/heatsequer

def getgroupgroupdist(expdat,field,distmat,dsamp,uvals=False,subfield='host_subject_id',vmin=0,vmax=1):
	"""
	calculate the distance matrix based on groups of samples according to field but calculate seperately for each individual and then combine
	using a distance matrix and mapping
	for the Amina skin cosmetics study

	input:
	expdat : Experiment
	field : string
		name of the field to group by
	distmat : numpy 2d arrau
		the distance matrix (from calcdistmat or loaddistmat)
	dsamp : dict
		the mapping of each sample id to the distance matrix position (from calcdistmat or loaddistmat)
	uvals : string
		empty to plot all values, or a list of values to plot only them (in field)
	subfield : str
		name of the subfield so all distances are calculated seperately for each subfield value (i.e. 'host_subject_id')
	"""
	vals=hs.getfieldvals(expdat,field)
	if not uvals:
		uvals=list(set(vals))
	svals=hs.getfieldvals(expdat,subfield,ounique=True)
	omat=np.zeros([len(uvals),len(uvals)])
	numok=0
	for cval in svals:
		newexp=hs.filtersamples(expdat,subfield,cval)
#		dmap,dmapd=hs.loaddistmat(newexp,'amnon/bray_curtis_armpit-diff-log.txt')
		gdist,uvals=hs.getgroupdist(newexp,field,distmat,dsamp,plotit=False,uvals=uvals)
		gdist[np.isnan(gdist)]=0
		# print(cval)
		# print(gdist)
		if np.isnan(np.sum(np.sum(gdist))):
			continue
		omat=omat+gdist
		numok+=1
	omat=omat/numok
	# print('-----')
	# print(omat)
	plotdistheatmap(omat,uvals,vmin=vmin,vmax=vmax)
	return omat

コード例 #8

0

ファイルを表示

ファイル: filtering.py プロジェクト: mortonjt/heatsequer

def filterwinperid(expdat,idfield,field,val1,val2,mineffect=1):
	"""
	do filterfieldwave on each individual (based on idfield) and join the resulting bacteria
	"""
	params=locals()

	iseqs=[]
	uids=hs.getfieldvals(expdat,idfield,ounique=True)
	for cid in uids:
		cexp=hs.filtersamples(expdat,idfield,cid)
		texp=hs.filterfieldwave(cexp,field,val1,val2,mineffect=mineffect)
		iseqs+=texp.seqs
	iseqs=list(set(iseqs))
	newexp=hs.filterseqs(expdat,iseqs)
	return newexp

コード例 #9

0

ファイルを表示

ファイル: plotwingui.py プロジェクト: amnona/heatsequer

	def prefillinfo(self):
		"""
		prefill "ALL" data fields based on mapping file
		if all samples have same info
		"""
		hs.Debug(1,'prefill info')
		ontologyfromid=self.ontologyfromid
#		fl=open('/Users/amnon/Python/git/heatsequer/db/ncbitaxontofromid.pickle','rb')
		fl=open(os.path.join(hs.heatsequerdir,'db/ncbitaxontofromid.pickle'),'rb')
		ncbitax=pickle.load(fl)
		fl.close()

		cexp=self.cexp
		for cfield in cexp.fields:
			uvals=[]
			if cfield in cexp.fields:
				uvals=hs.getfieldvals(cexp,cfield,ounique=True)
			# if we have 1 value
			if len(uvals)==1:
				cval=uvals[0]
				hs.Debug(1,'found 1 value %s' % cval)
				if cfield=='HOST_TAXID' or cfield=='host_taxid':
					hs.Debug(2,'%s field has 1 value %s' % (cfield,cval))
					# if ncbi taxonomy (field used differently)
					cval='NCBITaxon:'+cval
					if cval in ncbitax:
						hs.Debug(2,'found in ncbitax %s' % cval)
						cval=ncbitax[cval]
				else:
					# get the XXX from ENVO:XXX value
					uvalspl=cval.split(':',1)
					if len(uvalspl)>1:
						cval=uvalspl[1]
						cval=uvalspl[1]+' :'+uvalspl[0]
				if cval in self.ontology:
					cval=ontologyfromid[self.ontology[cval]]
					hs.Debug(2,'term %s found in ontologyfromid' % cval)
					conto=cval
					hs.Debug(1,'add prefill %s' % conto)
					self.addtolist('ALL',conto)
				else:
					hs.Debug(3,'term %s NOT found in ontologyfromid' % uvals[0])

			else:
				hs.Debug(1,'found %d values' % len(uvals))

コード例 #10

0

ファイルを表示

ファイル: metrics.py プロジェクト: tanaes/heatsequer

def getgroupdist(expdat,field,distmat,dsamp,plotit=True,plottype='heatmap',uvals=False):
	"""
	calculate the distance matrix based on groups of samples according to field
	using a distance matrix and mapping
	input:
	expdat : Experiment
	field : string
		name of the field to group by
	distmat : numpy 2d arrau
		the distance matrix (from calcdistmat or loaddistmat)
	dsamp : dict
		the mapping of each sample id to the distance matrix position (from calcdistmat or loaddistmat)
	plotit : bool
		True to plot heatmap, False to no plot
	plottype: string
		'heatmap' - to plot heatmap of pairwise values
		'hist' - to plot histogram of pairwise values
	uvals : string
		empty to plot all values, or a list of values to plot only them (in field)
	output:
	gdist : numpy 2d array
		the group distance matrix
	uvals : list
		group names in the matrix (ordered)
	"""

	vals=hs.getfieldvals(expdat,field)
	if not uvals:
		uvals=list(set(vals))
	gdist=np.empty([len(uvals),len(uvals)])
	gdist.fill(np.NaN)
	gmap=defaultdict(list)
	distdict={}
	for idx,cval in enumerate(vals):
		gmap[cval].append(idx)
	for idx1,cg1 in enumerate(uvals):
		pos1=gmap[cg1]
		for idx2,cg2 in enumerate(uvals):
			pos2=gmap[cg2]
			adist=[]
			for p1 in pos1:
				if expdat.samples[p1] not in dsamp:
					continue
				for p2 in pos2:
					if expdat.samples[p2] not in dsamp:
						continue
					if p1==p2:
						continue
					adist.append(distmat[dsamp[expdat.samples[p1]],dsamp[expdat.samples[p2]]])
			distdict[(cg1,cg2)]=adist
			gdist[idx1,idx2]=np.mean(adist)
	if plotit:
		figure()
		if plottype=='heatmap':
			plotdistheatmap(gdist,uvals)
			title(expdat.studyname+' '+field)
		elif plottype=='hist':
			pl=[]
			pairs=[]
			names=[]
			for k,v in distdict.items():
				ks=set(k)
				if ks in pairs:
					continue
				pl.append(v)
				pairs.append(ks)
				names.append(k)
			hist(pl,alpha=0.5,normed=True,bins=50,range=[0,1])
			legend(names)
	return gdist,uvals

コード例 #11

0

ファイルを表示

ファイル: plotwin.py プロジェクト: amnona/heatsequer

def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False,linewidth=2,subline='',showhline=True,newfig=True,fixfont=False,fontsize=None,nosort=False,zeroisnone=False,xlabelrotation=45,showtaxnames=False):
	"""
	Plot an experiment
	input:
	exp - from load()
	sortby - name of mapping file field to sort by or Flase to not sort
	numeric - True if the field is numeric
	minreads - minimum number of reads per bacteria in order to show it or 0 to show all
	rangeall - True to show all frequencies in image scale, false to saturate at 10%
	seqdb - the SRBactDB database (from bactdb.load)
	cdb - the cool sequences database (from cooldb.load), or None (default) to use the heatsequer loaded cdb
	showline - if True plot lines between category values
	ontofig - name of ontology to plot for bactdb or false to no plot
	usegui - True use a gui for otu summary, False just print
	showxall - True to show all sample names when not sorting, False to show no more than 10
	showcolorbar - True to plot the colorbar. False to not plot
	ptitle : str (optional)
		'' to show o show processing history as name, None to not show title, or str of name of the figure
	lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads
	showxlabel : bool
		True to show the x label (default), False to hide it
	colormap : string or False
		name of colormap or False (default) to use mpl default colormap
	colorrange : [min,max] or False
		[min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall
	subline : str
		Name of category for subline plotting or '' (Default) for no sublines
	showhline : bool
		True (default) to plot the horizontal lines listed in exp.hlines. False to not plot them
	newfig : bool
		True (default) to open figure in new window, False to use current
	fixfont : bool (optional)
		False (default) to use fixedfont, True to use fixed width font
	fontsize : int or None (optional)
		None (default) to use default font size, number to use that font size
	nosort : bool (optional)
		False (default) to sort by the sort field, True to skip the sorting
	zeroisnone : bool (optional)
		False (default) to plot zeros as 0, True to assign None (white color)
	xlabelrotation : int (optional)
		the rotation of the xtick labels
	showtaxnames : book (optional)
		False (default) to not show tax names (need to press 'h' to show)
		True to show the taxonomy names

	output:
	newexp - the plotted experiment (sorted and filtered)
	ax - the plot axis
	"""

	hs.Debug(1,"Plot experiment %s" % exp.studyname)
	hs.Debug(1,"Commands:")
	for ccommand in exp.commands:
		hs.Debug(1,"%s" % ccommand)

	if exp.sparse:
		hs.Debug(9,'Sparse matrix - converting to dense')
		exp=hs.copyexp(exp,todense=True)

	vals=[]
	if cdb is None:
		cdb=hs.cdb
	if seqdb is None:
		seqdb=hs.bdb
	if sortby:
		if not nosort:
			hs.Debug(1,"Sorting by field %s" % sortby)
			for csamp in exp.samples:
				vals.append(exp.smap[csamp][sortby])
			if numeric:
				hs.Debug(1,"(numeric sort)")
				vals=hs.tofloat(vals)
			svals,sidx=hs.isort(vals)
			newexp=hs.reordersamples(exp,sidx)
		else:
			hs.Debug(1,"no sorting but showing columns")
			svals=hs.getfieldvals(exp,sortby)
			newexp=hs.copyexp(exp)
	else:
		hs.Debug(1,"No sample sorting")
		svals=hs.getfieldvals(exp,'#SampleID')
		newexp=hs.copyexp(exp)
	hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs))
	if minreads>0:
		newexp=hs.filterminreads(newexp,minreads,logit=uselog)
	hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs))
	newexp.seqdb=seqdb
	newexp.cdb=cdb
	newexp.scdb=hs.scdb

	# if usegui:
	# 	hs.Debug(1,"Using the GUI window")
	# 	import heatsequer.plots.plotwingui
	# 	from PyQt4 import QtGui

	# 	app = QtGui.QApplication(sys.argv)
	# 	guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)

#	ldat=ldat[:,sidx]
	ldat=newexp.data
	if zeroisnone:
		ldat[ldat==0]=None
	if uselog:
		hs.Debug(1,"Using log, cutoff at %f" % lowcutoff)
		ldat[np.where(ldat<lowcutoff)]=lowcutoff
		ldat=np.log2(ldat)
	oldparams=plt.rcParams
	mpl.rc('keymap',back='c, backspace')
	mpl.rc('keymap',forward='v')
	mpl.rc('keymap',all_axes='A')
	if newfig:
		f=plt.figure(tight_layout=True)
	else:
		f=plt.gcf()
	# set the colormap to default if not supplied
	if not colormap:
		colormap=plt.rcParams['image.cmap']
	# plot the image
	if colorrange:
		hs.Debug(1,"colormap range is 0,10")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap))
	elif rangeall:
		hs.Debug(1,"colormap range is all")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap))
	else:
		hs.Debug(1,"colormap range is 0,10")
		iax=plt.imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap))

	if ptitle is not None:
		if not ptitle:
			hs.Debug(1,"Showing filters in title")
			if (len(newexp.filters))>4:
				cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]]
			else:
				cfilters=newexp.filters
			cfilters=hs.clipstrings(cfilters,30)
			ptitle='\n'.join(cfilters)
		plt.title(ptitle,fontsize=10)

	ax=iax.get_axes()
	ax.autoscale(False)

	# plot the sublines (smaller category lines)
	if subline:
		slval=hs.getfieldvals(newexp,subline)
		prevval=slval[0]
		for idx,cval in enumerate(slval):
			if cval!=prevval:
				xpos=idx-0.5
				plt.plot([xpos,xpos],[-0.5,np.size(ldat,0)-0.5],'w:')
				prevval=cval

	if showline:
		hs.Debug(1,"Showing lines")
		labs=[]
		labpos=[]
		linepos=[]
		minpos=0
		svals.append('end')
		for idx,cval in enumerate(svals[:-1]):
			if cval==svals[idx+1]:
				continue
			labpos.append(minpos-0.5+float(idx+1-minpos)/2)
			minpos=idx+1
			linepos.append(idx+0.5)
			labs.append(cval)
		hs.Debug(1,"number of lines is %d" % len(linepos))
		if showxlabel:
			ax.set_xticks(labpos)
			ax.set_xticklabels(labs,rotation=xlabelrotation,ha='right')
		for cx in linepos:
			plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=linewidth)
			plt.plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'w:',linewidth=linewidth)
	else:
		hs.Debug(1,"Not showing lines")
		if showxall or len(newexp.samples)<=10:
			hs.Debug(1,"less than 10 samples, showing all sample names")
			ax.set_xticklabels(svals,rotation=90)
			ax.set_xticks(range(len(newexp.samples)))
	# f.tight_layout()
	ax.set_ylim(-0.5,np.size(ldat,0)-0.5)

	if fixfont:
		fontProperties = {'family':'monospace'}
		ax.set_yticklabels(ax.get_yticks(), fontProperties)

	if showcolorbar:
		hs.Debug(1,"Showing colorbar")
		cb=plt.colorbar(ticks=list(np.log2([2,10,100,500,1000])))
		cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%'])

	# create the plot
	ax.expdat=newexp
	ax.lastselect=-1
	ax.sampline=''
	ax.ofig=f
	ax.labelson=False
	ax.labelnames=[]
	f.canvas.mpl_connect('button_press_event', onplotmouseclick)
	f.canvas.mpl_connect('key_press_event', onplotkeyclick)
#	show()
	plt.rcParams=oldparams

	# if want the ontology analysis for a given category:
	if ontofig:
		hs.Debug(1,"Ontofig is set")
		newexp.ontofigname=ontofig
	else:
		newexp.ontofigname=False

	# if we want gui, open it
	if usegui:
		hs.Debug(1,"Using the GUI window")
		import heatsequer.plots.plotwingui
#		from PyQt4 import QtGui

#		app = QtGui.QApplication(sys.argv)
		guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)
		from heatsequer.plots import plotwingui
		guiwin = plotwingui.PlotGUIWindow(newexp)
		ax.guiwin=guiwin
		guiwin.plotfig=f
		guiwin.plotax=ax
		guiwin.show()
	else:
		ax.guiwin=False
		hs.Debug(7,'Not using gui')

	ax.plot_labelsize=fontsize
	if newexp.plotmetadata:
		hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata))
		for cmet in newexp.plotmetadata:
			addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4])
	if showhline:
		if newexp.hlines:
			for cpos in newexp.hlines:
				plt.plot([0,np.shape(newexp.data)[1]],[cpos-0.5,cpos-0.5],'g')
	plt.show()
	if showtaxnames:
		showtaxonomies(newexp,ax,showdb=False,showcontam=False)

#	if usegui:
#		app.exec_()

	return newexp,ax

コード例 #12

0

ファイルを表示

ファイル: hsgui.py プロジェクト: mortonjt/heatsequer

	def values(self):
		cfield=str(self.cField.currentText())
		val,ok=QtGui.QInputDialog.getItem(self,'Select field value','Field=%s' % cfield,list(set(hs.getfieldvals(self.cexp,cfield))))
		if ok:
			self.tValue.setText(val)

コード例 #13

0

ファイルを表示

ファイル: plotwin.py プロジェクト: tanaes/heatsequer

def plotexp(exp,sortby=False,numeric=False,minreads=4,rangeall=False,seqdb=None,cdb=None,showline=True,ontofig=False,usegui=True,showxall=False,showcolorbar=False,ptitle=False,lowcutoff=1,uselog=True,showxlabel=True,colormap=False,colorrange=False):
	"""
	Plot an experiment
	input:
	exp - from load()
	sortby - name of mapping file field to sort by or Flase to not sort
	numeric - True if the field is numeric
	minreads - minimum number of reads per bacteria in order to show it or 0 to show all
	rangeall - True to show all frequencies in image scale, false to saturate at 10%
	seqdb - the SRBactDB database (from bactdb.load)
	cdb - the cool sequences database (from cooldb.load)
	showline - if True plot lines between category values
	ontofig - name of ontology to plot for bactdb or false to no plot
	usegui - True use a gui for otu summary, False just print
	showxall - True to show all sample names when not sorting, False to show no more than 10
	showcolorbar - True to plot the colorbar. False to not plot
	ptitle - name of the figure or False to show processing history as name
	lowcutoff - minimal value for read (for 0 log transform) - the minimal resolution - could be 10000*2/origreads
	showxlabel : bool
		True to show the x label (default), False to hide it
	colormap : string or False
		name of colormap or False (default) to use mpl default colormap
	colorrange : [min,max] or False
		[min,max] to set the colormap range, False to use data min,max (default) as specified in rangeall

	output:
	newexp - the plotted experiment (sorted and filtered)
	ax - the plot axis
	"""

	hs.Debug(1,"Plot experiment %s" % exp.studyname)
	hs.Debug(1,"Commands:")
	for ccommand in exp.commands:
		hs.Debug(1,"%s" % ccommand)
	vals=[]
	if sortby:
		hs.Debug(1,"Sorting by field %s" % sortby)
		for csamp in exp.samples:
			vals.append(exp.smap[csamp][sortby])
		if numeric:
			hs.Debug(1,"(numeric sort)")
			vals=hs.tofloat(vals)
		svals,sidx=hs.isort(vals)
		newexp=hs.reordersamples(exp,sidx)
	else:
		hs.Debug(1,"No sample sorting")
		svals=hs.getfieldvals(exp,'#SampleID')
		newexp=hs.copyexp(exp)
	hs.Debug(1,"Filtering min reads. original bacteria - %d" % len(newexp.seqs))
	if minreads>0:
		newexp=hs.filterminreads(newexp,minreads,logit=uselog)
	hs.Debug(1,"New number of bacteria %d" % len(newexp.seqs))
	newexp.seqdb=seqdb
	newexp.cdb=cdb

#	ldat=ldat[:,sidx]
	ldat=newexp.data
	if uselog:
		hs.Debug(1,"Using log, cutoff at %f" % lowcutoff)
		ldat[np.where(ldat<lowcutoff)]=lowcutoff
		ldat=np.log2(ldat)
	oldparams=plt.rcParams
	mpl.rc('keymap',back='c, backspace')
	mpl.rc('keymap',forward='v')
	mpl.rc('keymap',all_axes='A')
	f=figure()
	# set the colormap to default if not supplied
	if not colormap:
		colormap=plt.rcParams['image.cmap']
	# plot the image
	if colorrange:
		hs.Debug(1,"colormap range is 0,10")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=colorrange,cmap=plt.get_cmap(colormap))
	elif rangeall:
		hs.Debug(1,"colormap range is all")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',cmap=plt.get_cmap(colormap))
	else:
		hs.Debug(1,"colormap range is 0,10")
		iax=imshow(ldat,interpolation='nearest',aspect='auto',clim=[0,10],cmap=plt.get_cmap(colormap))

	if not ptitle:
		hs.Debug(1,"Showing filters in title")
		if (len(newexp.filters))>4:
			cfilters=[newexp.filters[0],'...',newexp.filters[-2],newexp.filters[-1]]
		else:
			cfilters=newexp.filters
		cfilters=hs.clipstrings(cfilters,30)
		ptitle='\n'.join(cfilters)
	title(ptitle,fontsize=10)

	ax=iax.get_axes()
	ax.autoscale(False)
	if showline:
		hs.Debug(1,"Showing lines")
		labs=[]
		labpos=[]
		linepos=[]
		minpos=0
		svals.append('end')
		for idx,cval in enumerate(svals[:-1]):
			if cval==svals[idx+1]:
				continue
			labpos.append(minpos-0.5+float(idx+1-minpos)/2)
			minpos=idx+1
			linepos.append(idx+0.5)
			labs.append(cval)
		hs.Debug(1,"number of lines is %d" % len(linepos))
		if showxlabel:
			ax.set_xticks(labpos)
			ax.set_xticklabels(labs,rotation=45,ha='right')
		for cx in linepos:
			plot([cx,cx],[-0.5,np.size(ldat,0)-0.5],'k',linewidth=2)
	else:
		hs.Debug(1,"Not showing lines")
		if showxall or len(newexp.samples)<=10:
			hs.Debug(1,"less than 10 samples, showing all sample names")
			ax.set_xticklabels(svals,rotation=90)
			ax.set_xticks(range(len(newexp.samples)))
	tight_layout()
	ax.set_ylim(-0.5,np.size(ldat,0)+0.5)

	if showcolorbar:
		hs.Debug(1,"Showing colorbar")
		cb=colorbar(ticks=list(np.log2([2,10,100,500,1000])))
		cb.ax.set_yticklabels(['<0.02%','0.1%','1%','5%','>10%'])

	# create the plot
	ax.expdat=newexp
	ax.lastselect=-1
	ax.sampline=''
	ax.ofig=f
	ax.labelson=False
	ax.labelnames=[]
	f.canvas.mpl_connect('button_press_event', onplotmouseclick)
	f.canvas.mpl_connect('key_press_event', onplotkeyclick)
#	show()
	plt.rcParams=oldparams

	# if want the ontology analysis for a given category:
	if ontofig:
		hs.Debug(1,"Ontofig is set")
		newexp.ontofigname=ontofig
	else:
		newexp.ontofigname=False

	# if we want gui, open it
	if usegui:
		hs.Debug(1,"Using the GUI window")
		import heatsequer.plots.plotwingui
		guiwin = heatsequer.plots.plotwingui.PlotGUIWindow(newexp)
#		from heatsequer.plots import plotwingui
#		guiwin = plotwingui.PlotGUIWindow(newexp)
		ax.guiwin=guiwin
		guiwin.plotfig=f
		guiwin.plotax=ax
		guiwin.show()
	else:
		ax.guiwin=False
		hs.Debug(7,'Not using gui')

	if newexp.plotmetadata:
		hs.Debug(1,"Experiment has metadata attached for plotting (%d points)" % len(newexp.plotmetadata))
		for cmet in newexp.plotmetadata:
			addplotmetadata(newexp,field=cmet[0],value=cmet[1],color=cmet[2],inverse=cmet[3],beforesample=cmet[4])
	show()
	return newexp,ax

コード例 #14

0

ファイルを表示

ファイル: filtering.py プロジェクト: mortonjt/heatsequer

def filtersimilarsamples(expdat,field,method='mean'):
	"""
	join similar samples into one sample (i.e. to remove samples of same individual)
	input:
	expdat : Experiment
	field : string
		Name of the field containing the values (for which similar values will be joined)
	method : string
		What to do with samples with similar value. options:
		'mean' - replace with a sample containing the mean of the samples
		'median'- replace with a sample containing the median of the samples
		'random' - replace with a single random sample out of these samples
		'sum' - replace with sum of original reads in all samples, renormalized after to 10k and orignumreads updated
		'fracpres' - replace with fraction of samples where the bacteria is present
	output:
	newexp : Experiment
		like the input experiment but only one sample per unique value in field
	"""
	params=locals()

	newexp=hs.copyexp(expdat)
	if method=='sum':
		newexp=hs.toorigreads(newexp)
	uvals=hs.getfieldvals(expdat,field,ounique=True)
	keep=[]
	for cval in uvals:
		cpos=hs.findsamples(expdat,field,cval)
		if len(cpos)==1:
			keep.append(cpos[0])
			continue
		if method=='random':
			keep.append(cpos[np.random.randint(len(cpos))])
			continue
		# set the mapping file values
		cmap=expdat.smap[expdat.samples[cpos[0]]]
		for ccpos in cpos[1:]:
			for cfield in cmap.keys():
				if cmap[cfield]!=expdat.smap[expdat.samples[ccpos]][cfield]:
					cmap[cfield]='NA'
		if method=='mean':
			cval=np.mean(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='median':
			cval=np.median(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='sum':
			cval=np.sum(newexp.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			newexp.origreads[cpos[0]]=np.sum(hs.reorder(expdat.origreads,cpos))
			keep.append(cpos[0])
		elif method=='fracpres':
			cval=np.sum(expdat.data[:,cpos]>0,axis=1)
			newexp.data[:,cpos[0]]=cval/len(cpos)
			keep.append(cpos[0])
		else:
			hs.Debug(9,'method %s not supported' % method)
			return False
		newexp.smap[expdat.samples[cpos[0]]]=cmap
	newexp=hs.reordersamples(newexp,keep)
	if method=='sum':
		newexp=hs.normalizereads(newexp)
	newexp.filters.append('Filter similar samples field %s method %s' % (field,method))
	hs.addcommand(newexp,"filtersimilarsamples",params=params,replaceparams={'expdat':expdat})
	hs.Debug(6,'%d samples before filtering, %d after' % (len(expdat.samples),len(newexp.samples)))
	return newexp