Esempio n. 1
0
	def renormalize(self):
		items=self.bMainList.selectedItems()
		if len(items)!=1:
			print("Need 1 item")
			return
		for citem in items:
			cname=str(citem.text())
			cexp=self.explist[cname]
			newexp=hs.normalizereads(cexp)
			newexp.studyname=newexp.studyname+'_norm'
			self.addexp(newexp)
Esempio n. 2
0
def filterandnormalize(expdat,seqs,exclude=False,subseq=False,numreads=10000):
	"""
	filter away sequences in seqs
	and then renormalize the data and recalculate origreads per sample
	input:
	expdat
	seqs - a list of sequences (ACGT) to remove
	exclude - False to remove seqs, True to only keep seqs
	subseq - False to look for exact match only, True to look for subsequence match (slower)

	output:
	newexp - the experiment without seqs and renormalied to 10k reads/sample
	"""
	params=locals()

	newexp=hs.filterseqs(expdat,seqs,exclude=not(exclude),subseq=subseq)
	newexp=hs.normalizereads(newexp,fixorig=True,numreads=numreads)
	newexp.filters.append("filter sequences and normalize to numreads %d" % numreads)
	hs.addcommand(newexp,"filterandnormalize",params=params,replaceparams={'expdat':expdat})
	return newexp
Esempio n. 3
0
	def cleantaxonomy(self):
		items=self.bMainList.selectedItems()
		if len(items)!=1:
			print("Need 1 item")
			return
		for citem in items:
			cname=str(citem.text())
			cexp=self.explist[cname]
			ctwin = CleanTaxonomyWindow(cexp)
			res=ctwin.exec_()
			if res==QtGui.QDialog.Accepted:
				newexp=hs.copyexp(cexp)
				if ctwin.cMitochondria.checkState():
					newexp=hs.filtertaxonomy(newexp,'mitochondria',exclude=True)
				if ctwin.cChloroplast.checkState():
					newexp=hs.filtertaxonomy(newexp,'Streptophyta',exclude=True)
					newexp=hs.filtertaxonomy(newexp,'Chloroplast',exclude=True)
				if ctwin.cUnknown.checkState():
					newexp=hs.filtertaxonomy(newexp,'nknown',exclude=True)
				if ctwin.cBacteria.checkState():
					newexp=hs.filtertaxonomy(newexp,'Bacteria;',exclude=True,exact=True)
				newexp=hs.normalizereads(newexp)
				newexp.studyname=cexp.studyname+'.ct'
				self.addexp(newexp)
Esempio n. 4
0
def filtern(expdat):
	"""
	delete sequences containing "N" from experiment and renormalize
	input:
	expdat : Experiment
	output:
	newexp : Experiment
		experiment without sequences containing "N"
	"""
	params=locals()

	keeplist=[]
	for idx,cseq in enumerate(expdat.seqs):
		if "N" in cseq:
			continue
		if "n" in cseq:
			continue
		keeplist.append(idx)
	newexp=hs.reorderbacteria(expdat,keeplist)
	newexp=hs.normalizereads(newexp)
	newexp.filters.append('Filter sequences containing N')
	hs.addcommand(newexp,"filtern",params=params,replaceparams={'expdat':expdat})
	hs.Debug(6,'%d sequences before filtering, %d after' % (len(expdat.seqs),len(newexp.seqs)))
	return newexp
Esempio n. 5
0
def cleantaxonomy(expdat,mitochondria=True,chloroplast=True,bacteria=True,unknown=True,exclude=True):
	"""
	remove common non-16s sequences from the experiment and renormalize

	input:
	expdat : Experiment
	mitochondria : bool
		remove mitochondrial sequences
	chloroplast : bool
		remove chloroplast sequences
	bacteria : bool
		remove sequences only identified as "Bacteria" (no finer identification)
	unknown : bool
		remove unknown sequences
	exclude : bool
		True (default) to remove these sequecnes, False to keep them and throw other

	output:
	newexp : Experiment
		the renormalized experiment without these bacteria
	"""
	params=locals()

	newexp=hs.copyexp(expdat)
	if mitochondria:
		if exclude:
			newexp=hs.filtertaxonomy(newexp,'mitochondria',exclude=True)
		else:
			ne1=hs.filtertaxonomy(newexp,'mitochondria',exclude=False)
	if chloroplast:
		if exclude:
			newexp=hs.filtertaxonomy(newexp,'Streptophyta',exclude=True)
			newexp=hs.filtertaxonomy(newexp,'Chloroplast',exclude=True)
		else:
			ne2=hs.filtertaxonomy(newexp,'Streptophyta',exclude=False)
			ne3=hs.filtertaxonomy(newexp,'Chloroplast',exclude=False)
	if unknown:
		if exclude:
			newexp=hs.filtertaxonomy(newexp,'Unknown',exclude=True)
			newexp=hs.filtertaxonomy(newexp,'Unclassified;',exclude=True,exact=True)
		else:
			ne4=hs.filtertaxonomy(newexp,'Unknown',exclude=False)
			ne5=hs.filtertaxonomy(newexp,'Unclassified;',exclude=False,exact=True)
	if bacteria:
		if exclude:
			newexp=hs.filtertaxonomy(newexp,'Bacteria;',exclude=True,exact=True)
		else:
			ne6=hs.filtertaxonomy(newexp,'Bacteria;',exclude=False,exact=True)
	if exclude:
		newexp=hs.normalizereads(newexp)
	else:
		allseqs=[]
		allseqs+=(ne1.seqs)
		allseqs+=(ne2.seqs)
		allseqs+=(ne3.seqs)
		allseqs+=(ne4.seqs)
		allseqs+=(ne5.seqs)
		allseqs+=(ne6.seqs)
		allseqs=list(set(allseqs))
		newexp=hs.filterseqs(newexp,allseqs)
	newexp.filters.append('Clean Taxonomy (remove mitochondria etc.)')
	hs.addcommand(newexp,"cleantaxonomy",params=params,replaceparams={'expdat':expdat})
	hs.Debug(6,'%d sequences before filtering, %d after' % (len(expdat.seqs),len(newexp.seqs)))
	return newexp
Esempio n. 6
0
def filtersimilarsamples(expdat,field,method='mean'):
	"""
	join similar samples into one sample (i.e. to remove samples of same individual)
	input:
	expdat : Experiment
	field : string
		Name of the field containing the values (for which similar values will be joined)
	method : string
		What to do with samples with similar value. options:
		'mean' - replace with a sample containing the mean of the samples
		'median'- replace with a sample containing the median of the samples
		'random' - replace with a single random sample out of these samples
		'sum' - replace with sum of original reads in all samples, renormalized after to 10k and orignumreads updated
		'fracpres' - replace with fraction of samples where the bacteria is present
	output:
	newexp : Experiment
		like the input experiment but only one sample per unique value in field
	"""
	params=locals()

	newexp=hs.copyexp(expdat)
	if method=='sum':
		newexp=hs.toorigreads(newexp)
	uvals=hs.getfieldvals(expdat,field,ounique=True)
	keep=[]
	for cval in uvals:
		cpos=hs.findsamples(expdat,field,cval)
		if len(cpos)==1:
			keep.append(cpos[0])
			continue
		if method=='random':
			keep.append(cpos[np.random.randint(len(cpos))])
			continue
		# set the mapping file values
		cmap=expdat.smap[expdat.samples[cpos[0]]]
		for ccpos in cpos[1:]:
			for cfield in cmap.keys():
				if cmap[cfield]!=expdat.smap[expdat.samples[ccpos]][cfield]:
					cmap[cfield]='NA'
		if method=='mean':
			cval=np.mean(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='median':
			cval=np.median(expdat.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			keep.append(cpos[0])
		elif method=='sum':
			cval=np.sum(newexp.data[:,cpos],axis=1)
			newexp.data[:,cpos[0]]=cval
			newexp.origreads[cpos[0]]=np.sum(hs.reorder(expdat.origreads,cpos))
			keep.append(cpos[0])
		elif method=='fracpres':
			cval=np.sum(expdat.data[:,cpos]>0,axis=1)
			newexp.data[:,cpos[0]]=cval/len(cpos)
			keep.append(cpos[0])
		else:
			hs.Debug(9,'method %s not supported' % method)
			return False
		newexp.smap[expdat.samples[cpos[0]]]=cmap
	newexp=hs.reordersamples(newexp,keep)
	if method=='sum':
		newexp=hs.normalizereads(newexp)
	newexp.filters.append('Filter similar samples field %s method %s' % (field,method))
	hs.addcommand(newexp,"filtersimilarsamples",params=params,replaceparams={'expdat':expdat})
	hs.Debug(6,'%d samples before filtering, %d after' % (len(expdat.samples),len(newexp.samples)))
	return newexp