Beispiel #1
0
def dbEntropy(inFileName,wordsize):
	"""
	deprecated
	"""
	vec_len=[]
	stat=Stat()
	file_db=open(inFileName)
	seq=Bioseq()
	numseq=0
	while 1:
		seq.read(file_db)
		if seq.sequence==None:
			break
		i=seq.entropy(wordsize)
		stat.add(i)

		numseq=numseq+1
		print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...] entropy',i
		vec_len.append((-i,numseq,seq.header))

	file_db.close()
	vec_len.sort()
	for s in vec_len:
		print 'I=',-s[0],'=> #',s[1], s[2]
	print stat.string()
	return vec_len
Beispiel #2
0
def dbRelEntropy(inFileName,wordsize):
	"""
	deprecated
	"""
	file_db=open(inFileName)
	seq=Bioseq()
	refocc={}
	sumlen=0
	while 1:
		seq.read(file_db)
		if seq.sequence==None:
			break
		sumlen=sumlen+seq.getLength()-wordsize
		occ=seq.occ_word(wordsize)
		if(len(refocc)==0):
			refocc=occ
		else:
			for w in occ.keys():
				if refocc.has_key(w):
					refocc[w]=refocc[w]+occ[w]
				else:
					refocc[w]=occ[w]
       	file_db.close()
	reffreq={}
	for w in refocc.keys():
		reffreq[w]=float(refocc[w]+1)/sumlen


	vec_len=[]
	stat=Stat()

	file_db=open(inFileName)
	numseq=0
	while 1:
		seq.read(file_db)
		if seq.sequence==None:
			break
		i=seq.rel_entropy(reffreq)
		stat.add(i)
		numseq=numseq+1
		print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...] entropy',i
		vec_len.append((i,numseq,seq.header))

	file_db.close()
	vec_len.sort()
	for s in vec_len:
		print 'H=',s[0],'=> #',s[1], s[2]
	print stat.string()
	return vec_len