Ejemplo n.º 1
0
	def updateinfo(self,csamp,cseq):
		"""
		update the information about the sample/bacteria
		"""
		self.csamp=csamp
		self.cseq=cseq
		self.lSample.setText(self.cexp.samples[self.csamp])
		self.lTaxonomy.setText(self.cexp.tax[self.cseq])
		self.lID.setText(str(self.cexp.sids[self.cseq]))
		self.lReads.setText('%f' % (float(self.cexp.data[self.cseq,self.csamp])/100))
		self.lSampleFieldVal.setText(self.cexp.smap[self.cexp.samples[self.csamp]][str(self.cSampleField.currentText())])
		# update the stats about the database:
		if self.cexp.seqdb:
			self.lStudies.clear()
			totappear,numstudies,allstudies,studysamples,totdbsamples=bactdb.GetSeqInfo(self.cexp.seqdb,self.cexp.seqs[self.cseq])
			if totappear>0:
				self.lNumSamples.setText(str('%d/%dK' % (totappear,int(totdbsamples/1000))))
				self.lNumStudies.setText(str(numstudies))
				res=studysamples.items()
				vlens=[]
				for cv in res:
					totsamps=bactdb.SamplesInStudy(self.cexp.seqdb,cv[0])
					vlens.append(float(len(cv[1]))/len(totsamps))
				sv,si=au.isort(vlens,reverse=True)
				for cind in si:
					studyname=bactdb.StudyNameFromID(self.cexp.seqdb,res[cind][0])
					self.lStudies.addItem('%s (%f)' % (studyname,vlens[cind]))
		if self.FigureTab.currentIndex()==2:
			self.plotxgraph()
		if self.FigureTab.currentIndex()==1:
			self.plotontology()
Ejemplo n.º 2
0
def GetSeqListInfo(db,seqs,info='samples'):
	"""
	Get information about a sequence list.
	input:
	db
	seqs - a list of sequences (ACGT)
	info - the info type to collect:
		'samples' - the samples for each sequence
		'studies' - the studies where each seq appears
		'types' - env_matter+host_taxid

	output:
	res - a dict with info type as keys, each containing an array (1 entry per sequence) with the freq of the seq in the entry
	"""

	res={}
	studyreads={}
	for idx,cseq in enumerate(seqs):
		if info=='samples':
			# Get the read vector for the samples
			pv=GetSeqVec(db,cseq)
			if len(pv)==0:
				continue
			nz=np.where(pv>db.MINFREQ)
			totalappear=len(nz[0])
			au.Debug(2,"--------Seq",cseq)
			au.Debug(3,"Found in",totalappear,"Samples")
			au.Debug(3,"Fraction=",float(totalappear)/len(pv))
			for csamp in nz[0]:
				res.setdefault(csamp,np.zeros(len(seqs)))[idx]=pv[csamp]
		elif info=='studies':
			totappear,numstudies,allstudies,studysamples,totdbsamples=GetSeqInfo(db,cseq)
			if totappear>0:
				sres=studysamples.items()
				vlens=[]
				for cv in sres:
					cstudy=cv[0]
					cnumreads=cv[1]
					if not cstudy in studyreads:
						studyreads[cstudy]=SamplesInStudy(db,cstudy)
					totsamps=studyreads[cstudy]
					vlens.append(float(len(cnumreads))/len(totsamps))
				sv,si=au.isort(vlens,reverse=True)
				for cind in si:
					studyid=sres[cind][0]
					if vlens[cind]>0.25:
						res.setdefault(studyid,np.zeros(len(seqs)))[idx]+=1
		elif info=='types':
			pv=GetSeqVec(db,cseq)
			if len(pv)==0:
				continue
			nz=np.where(pv>db.MINFREQ)
			for csamp in nz[0]:
				db.cur.execute("SELECT Field,Value FROM Maps WHERE SampleID=?",[int(csamp)+1])
				allvals=db.cur.fetchall()
				matter='NA'
				host='NA'
				for cv in allvals:
					if cv[0].lower()=='env_matter':
						matter=cv[1]
					if cv[0].lower()=='host_taxid':
						host=cv[1]
				res.setdefault(matter+'-'+host,np.zeros(len(seqs)))[idx]+=1

		else:
			au.Debug(9,"info type not supported",info)
			return False
	return res