def dbEntropy(inFileName,wordsize): """ deprecated """ vec_len=[] stat=Stat() file_db=open(inFileName) seq=Bioseq() numseq=0 while 1: seq.read(file_db) if seq.sequence==None: break i=seq.entropy(wordsize) stat.add(i) numseq=numseq+1 print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...] entropy',i vec_len.append((-i,numseq,seq.header)) file_db.close() vec_len.sort() for s in vec_len: print 'I=',-s[0],'=> #',s[1], s[2] print stat.string() return vec_len
def dbRelEntropy(inFileName,wordsize): """ deprecated """ file_db=open(inFileName) seq=Bioseq() refocc={} sumlen=0 while 1: seq.read(file_db) if seq.sequence==None: break sumlen=sumlen+seq.getLength()-wordsize occ=seq.occ_word(wordsize) if(len(refocc)==0): refocc=occ else: for w in occ.keys(): if refocc.has_key(w): refocc[w]=refocc[w]+occ[w] else: refocc[w]=occ[w] file_db.close() reffreq={} for w in refocc.keys(): reffreq[w]=float(refocc[w]+1)/sumlen vec_len=[] stat=Stat() file_db=open(inFileName) numseq=0 while 1: seq.read(file_db) if seq.sequence==None: break i=seq.rel_entropy(reffreq) stat.add(i) numseq=numseq+1 print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...] entropy',i vec_len.append((i,numseq,seq.header)) file_db.close() vec_len.sort() for s in vec_len: print 'H=',s[0],'=> #',s[1], s[2] print stat.string() return vec_len