#with gzip.open('test.txt.gz', 'wb') as f: # for i in range(10): # f.write('111222333444555666777888999000'*100) bstat = RunningStats() sstat = RunningStats() def test(): big = build_x() bstat.push(len(big)) sio = StringIO() f = gzip.GzipFile("", "wb", 9, sio) f.write(big) f.close() sstat.push(len(sio.getvalue())) sio.close() for i in range(1005): test() print("%.4g %.3g +-%.3g %.4g" % (bstat.min(), bstat.mean(), 2 * bstat.stdev(), bstat.max())) print("%.4g %.3g +-%.3g %.4g" % (sstat.min(), sstat.mean(), 2 * sstat.stdev(), sstat.max()))
shuffledCrcs.add(calcCrc(shufSeq)) seq1shufftrans = Seq(shufSeq, generic_dna).translate() shuffledTransCrcs.add(calcCrc(seq1shufftrans)) print("Shuffled distinct: %d" % (len(shuffledCrcs))) print("Shuffled trans distinct: %d" % len(shuffledTransCrcs)) #for s in range(len(shuffles)): # shuf = cds.getShuffledSeq(s) # if( len(cds.sequence()) != cds.length() ): # print("WARNING: incorrect shuffled sequence length detected for record (taxid=%d, protId=%s, seqId=%d); real-length=%d, recorded-length=%d." % (taxId, protId, shuffles[s], len(shuff.sequence()), cds.length())) # warningsCount += 1 if (rl()): print("processed %d records (%.2g%%)" % (recordsCount, float(recordsCount) / total * 100)) print(statsLength.count()) print("%.3g %.3g +-%.3g %.3g" % (statsLength.min(), statsLength.mean(), 2 * statsLength.stdev(), statsLength.max())) print(statsShuffles.count()) print("%.3g %.3g +-%.3g %.3g" % (statsShuffles.min(), statsShuffles.mean(), 2 * statsShuffles.stdev(), statsShuffles.max())) print("Done - Processed %d records, found %d warnings" % (recordsCount, warningsCount)) if (warningsCount > 0): print("%d warnings found!" % (warningsCount, ))
assert (len(fragment) == windowWidth) # Calculate the RNA folding energy. This is the computation-heavy part. strct, energy = RNA.fold(fragment) assert (energy <= 0.0) stats.push(energy) if (stats.mean() > -3.2 and stats.min() > (-6.32 - 0.50)): weakFasta.append( SeqRecord( Seq(seq, alphabet=generic_dna), id=("rand%d" % i), description=( "Random synthetic sequence, mean MFE=%.4g, MFA>=%.4g" % (stats.mean(), stats.min())))) elif (stats.mean() < -8.03 and stats.max() < (-6.32 + 0.50)): strongFasta.append( SeqRecord( Seq(seq, alphabet=generic_dna), id=("rand%d" % i), description=( "Random synthetic sequence, mean MFE=%.4g, MFA<=%.4g" % (stats.mean(), stats.max())))) gstats.push(stats.mean()) if (i % 500 == 499): print("weak: %d strong: %d" % (len(weakFasta), len(strongFasta))) print( "Totals: weak folding: %d seqs (%.2g%%) strong folding: %d seqs (%.2g%%)" % (len(weakFasta), float(len(weakFasta)) / i * 100, len(strongFasta),