Example #1
0
def reorderbacteria(exp,order,inplace=False):
	"""
	reorder the bacteria in an experiment (can delete if bacteria not in new order)
	input:
	exp - the experiment
	order - the new order
	output:
	newexp
	"""
	if inplace:
		newexp=exp
	else:
		newexp=copyexp(exp)
#		newexp=copy.deepcopy(exp)
	newexp.data=newexp.data[order,:]
	newexp.seqs=hs.reorder(newexp.seqs,order)
	newexp.seqdict={}
	for idx,cseq in enumerate(newexp.seqs):
		newexp.seqdict[cseq]=idx
	newexp.tax=hs.reorder(newexp.tax,order)
	newexp.sids=hs.reorder(newexp.sids,order)
	# filter the annotations if needed
	if exp.seqannotations is not None:
		seqannotations={}
		annotationseqs=collections.defaultdict(list)
		for cseq in newexp.seqs:
			seqannotations[cseq]=newexp.seqannotations[cseq]
			for cinfo in seqannotations[cseq]:
				annotationseqs[cinfo].append(cseq)
		newexp.seqannotations=seqannotations
		newexp.annotationseqs=annotationseqs
	return newexp
Example #2
0
def plotdistheatmap(gdist,uvals,neworder=False,vmin=0,vmax=1):
	"""
	plot a distance heat map and add axis labels
	input:
	gdist : numpy array of float
		the distance matrix (from getgroupdist)
	uvals : list of strings
		the names of the categories (from getgroupdist)
	neworder : list of integers of False
		if not False, the order by which to sort the matrix and labels prior to plotting
	vmin,vmax : int (optional)
		the range for the heatmap
	"""
	if neworder:
		gdist=gdist[neworder,:]
		gdist=gdist[:,neworder]
		uvals=hs.reorder(uvals,neworder)
	plt.figure()
	iax=plt.imshow(gdist,interpolation='nearest',aspect='auto',vmin=vmin,vmax=vmax)
	ax=iax.get_axes()
	ax.set_xticks(range(len(uvals)))
	ax.set_xticklabels(uvals,rotation=90)
	ax.set_yticks(range(len(uvals)))
	ax.set_yticklabels(uvals)
	plt.tight_layout()
	plt.draw()
	plt.colorbar()
Example #3
0
def filterfieldwave(expdat,field,val1,val2=False,mineffect=1,method='mean',uselog=True):
	"""
	find all sequences which show an effect size of at least mineffect between val1 and val2 samples in field
	no statistical significance testing is performed

	input:
	expdat : Experiment
	field : string
		name of field to use for group separation
	val1 : string
		value in field for group1
	val2 : string
		value in field for group2 or False for all the other samples except val1
	mineffect : float
		min difference between groups per OTU in order to keep
	method: string
		'ranksum'
	uselog : bool
		True to log transform the data

	output:
	newexp : Experiment
		only with sequences showing a mineffect difference
	"""
	params=locals()

	numseqs=len(expdat.seqs)
	numsamples=len(expdat.samples)
	dat=expdat.data
	if uselog:
		dat[dat<1]=1
		dat=np.log2(dat)
	if method=='ranksum':
		for idx in range(numseqs):
			dat[idx,:]=stats.rankdata(dat[idx,:])

	pos1=hs.findsamples(expdat,field,val1)
	if val2:
		pos2=hs.findsamples(expdat,field,val2)
	else:
		pos2=np.setdiff1d(np.arange(numsamples),pos1,assume_unique=True)

	outpos=[]
	odif=[]
	for idx in range(numseqs):
		cdif=np.mean(dat[idx,pos1])-np.mean(dat[idx,pos2])
		if abs(cdif)>=mineffect:
			outpos.append(idx)
			odif.append(cdif)

	si=np.argsort(odif)
	outpos=hs.reorder(outpos,si)
	newexp=hs.reorderbacteria(expdat,outpos)
	newexp.filters.append('filterfieldwave field %s val1 %s val2 %s' % (field,val1,val2))
	hs.addcommand(newexp,"filterfieldwave",params=params,replaceparams={'expdat':expdat})
	return newexp
Example #4
0
def reordersamples(exp,newpos,inplace=False):
	"""
	reorder the samples of the experiment
	input:
	exp - the experiment
	newpos - array - the new positions (can skip positions to delete them)
	output:
	newexp - the new experiment
	"""

	if inplace:
		newexp=exp
	else:
		newexp=copyexp(exp)
#		newexp=copy.deepcopy(exp)
	newexp.data=newexp.data[:,newpos]
	newexp.samples=hs.reorder(newexp.samples,newpos)
	newexp.origreads=hs.reorder(newexp.origreads,newpos)
	return newexp
Example #5
0
def reorderbacteria(exp,order,inplace=False):
	"""
	reorder the bacteria in an experiment (can delete if bacteria not in new order)
	input:
	exp - the experiment
	order - the new order
	output:
	newexp
	"""
	if inplace:
		newexp=exp
	else:
		newexp=copyexp(exp)
#		newexp=copy.deepcopy(exp)
	newexp.data=newexp.data[order,:]
	newexp.seqs=hs.reorder(newexp.seqs,order)
	newexp.seqdict={}
	for idx,cseq in enumerate(newexp.seqs):
		newexp.seqdict[cseq]=idx
	newexp.tax=hs.reorder(newexp.tax,order)
	newexp.sids=hs.reorder(newexp.sids,order)
	return newexp
Example #6
0
def plotdistbar(gdist,uvals,crow=0,neworder=False):
	"""
	plot a distance heat map and add axis labels
	input:
	gdist : numpy array of float
		the distance matrix (from getgroupdist)
	uvals : list of strings
		the names of the categories (from getgroupdist)
	neworder : list of integers of False
		if not False, the order by which to sort the matrix and labels prior to plotting
	"""
	if neworder:
		gdist=gdist[neworder,:]
		gdist=gdist[:,neworder]
		uvals=hs.reorder(uvals,neworder)

	figure()
	bar(np.arange(len(uvals))-0.5,gdist[crow,:])
	ax=gca()
	ax.set_xticks(range(len(uvals)))
	ax.set_xticklabels(uvals,rotation=90)
	tight_layout()
	draw()
Example #7
0
def plotdiffsummary(expdatlist,seqs,field,val1,val2=False,method='mean',sortit=True,threshold=0.1,ptitle=False,showallfirstexp=True):
	"""
	plot a heat map for the fold change in each experiment in expdatlist
	for the log2 foldchange between the 2 groups (val1,val2 values in field)
	for zech chinese ibd paper
	input:
	expdatlist - a list of experiments to plot (row per experiment - all must contain field and val1,val2 in it)
	seqs - the sequences to examine
	field - name of the field dividing the 2 groups
	val1 - value of the field for group 1 (or a list of values 1 per experiment)
	val2 - value of the field for group 2 or False for all the rest (not val1) (or a list of values 1 per experiment)
	method:
		- mean - calculate the difference in the mean of the 2 groups
	sortit - True to sort according to difference in the first expdat, False to use the order in seqs
	threshold - minimum value of stat for ratio calculation (otherwise rounded up to threshold)
	ptitle - name of figure of False for auto title
	showallfirstexp : bool
		True - show all sequences, False - show only sequences present in at least one other study except the first

	output:
	diffsum - the same as the plotted heatmap (row per otu, column per experiment)
	expnames - names (studyname) of the experiments plotted (for label)
	otus - the otu sequences for the rows
	"""

	if not(type(val1) is list):
		tval1=val1
		val1=[]
		for cexp in expdatlist:
			val1.append(tval1)
	if not(type(val2) is list):
		tval2=val2
		val2=[]
		for cexp in expdatlist:
			val2.append(tval2)
	diff=np.array(hs.getdiffsummary(expdatlist[0],seqs,field,val1[0],val2[0],method,threshold=threshold))
	odiff=copy.copy(diff)
	odiffnotnan=np.where(np.isfinite(odiff))[0]
	diffsum=[]
	for cidx,cexp in enumerate(expdatlist[1:]):
		cdiff=np.array(hs.getdiffsummary(cexp,seqs,field,val1[cidx+1],val2[cidx+1],method,threshold=threshold))
		diff=np.vstack([diff,cdiff])
		notnan=np.where(np.isfinite(cdiff))[0]
		notnan=np.intersect1d(notnan,odiffnotnan)
		if len(notnan)>0:
			cdiffsum=float(np.sum((cdiff[notnan]>0)==(odiff[notnan]>0)))/len(notnan)
		else:
			cdiffsum=np.nan
		diffsum.append(cdiffsum)

	# remove all NaN lines (not enough reads for threshold)
	if showallfirstexp:
		nanlines=np.where(~np.isnan(diff).all(axis=0))[0]
	else:
		nanlines=np.where(~np.isnan(diff[1:,:]).all(axis=0))[0]
	diff=diff[:,nanlines]
	otus=hs.reorder(seqs,nanlines)

	if sortit:
		si=np.argsort(diff[0,:])
		diff=diff[:,si]
		otus=hs.reorder(otus,si)
	figure()
	maxdiff=np.nanmax(np.abs(diff))
	diff=np.transpose(diff)
	imshow(diff,interpolation='nearest',aspect='auto',cmap=plt.get_cmap("coolwarm"),clim=[-maxdiff,maxdiff])
	colorbar()
	if ptitle:
		title(ptitle)
	else:
		title("log2 fold change between %s and %s in field %s" % (val1,val2,field))
	expnames=[]
	for cexp in expdatlist:
		expnames.append(cexp.studyname)
	xticks(np.arange(len(expnames)),expnames,rotation=45)
	tight_layout()
	show()
	return diff,expnames,otus
Example #8
0
def filtersimilarsamples(expdat,field,method='mean'):
	"""
	join similar samples into one sample (i.e. to remove samples of same individual)
	input:
	expdat : Experiment
	field : string
		Name of the field containing the values (for which similar values will be joined)
	method : string
		What to do with samples with similar value. options:
		'mean' - replace with a sample containing the mean of the samples
		'median'- replace with a sample containing the median of the samples
		'random' - replace with a single random sample out of these samples
		'sum' - replace with sum of original reads in all samples, renormalized after to 10k and orignumreads updated
		'fracpres' - replace with fraction of samples where the bacteria is present
	output:
	newexp : Experiment
		like the input experiment but only one sample per unique value in field
	"""
	params=locals()

	newexp=hs.copyexp(expdat)
	if method=='sum':
		newexp=hs.toorigreads(newexp)
	uvals=hs.getfieldvals(expdat,field,ounique=True)
	keep=[]
	for cval in uvals:
		cpos=hs.findsamples(expdat,field,cval)
		if len(cpos)==1:
			keep.append(cpos[0])
			continue
		if method=='random':
			keep.append(cpos[np.random.randint(len(cpos))])
			continue
		# set the mapping file values
		cmap=expdat.smap[expdat.samples[cpos[0]]]
		for ccpos in cpos[1:]:
			for cfield in cmap.keys():
				if cmap[cfield]!=expdat.smap[expdat.samples[ccpos]][cfield]:
					cmap[cfield]='NA'
		if method=='mean':
			cval=np.mean(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='median':
			cval=np.median(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='sum':
			cval=np.sum(newexp.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			newexp.origreads[cpos[0]]=np.sum(hs.reorder(expdat.origreads,cpos))
			keep.append(cpos[0])
		elif method=='fracpres':
			cval=np.sum(expdat.data[:,cpos]>0,axis=1)
			newexp.data[:,cpos[0]]=cval/len(cpos)
			keep.append(cpos[0])
		else:
			hs.Debug(9,'method %s not supported' % method)
			return False
		newexp.smap[expdat.samples[cpos[0]]]=cmap
	newexp=hs.reordersamples(newexp,keep)
	if method=='sum':
		newexp=hs.normalizereads(newexp)
	newexp.filters.append('Filter similar samples field %s method %s' % (field,method))
	hs.addcommand(newexp,"filtersimilarsamples",params=params,replaceparams={'expdat':expdat})
	hs.Debug(6,'%d samples before filtering, %d after' % (len(expdat.samples),len(newexp.samples)))
	return newexp