Esempio n. 1
0

#with gzip.open('test.txt.gz', 'wb') as f:
#     for i in range(10):
#         f.write('111222333444555666777888999000'*100)

bstat = RunningStats()
sstat = RunningStats()


def test():
    big = build_x()
    bstat.push(len(big))

    sio = StringIO()
    f = gzip.GzipFile("", "wb", 9, sio)
    f.write(big)
    f.close()

    sstat.push(len(sio.getvalue()))
    sio.close()


for i in range(1005):
    test()

print("%.4g %.3g +-%.3g %.4g" %
      (bstat.min(), bstat.mean(), 2 * bstat.stdev(), bstat.max()))
print("%.4g %.3g +-%.3g %.4g" %
      (sstat.min(), sstat.mean(), 2 * sstat.stdev(), sstat.max()))
        shuffledCrcs.add(calcCrc(shufSeq))
        seq1shufftrans = Seq(shufSeq, generic_dna).translate()
        shuffledTransCrcs.add(calcCrc(seq1shufftrans))

    print("Shuffled distinct: %d" % (len(shuffledCrcs)))
    print("Shuffled trans distinct: %d" % len(shuffledTransCrcs))

    #for s in range(len(shuffles)):
    #    shuf = cds.getShuffledSeq(s)
    #    if( len(cds.sequence()) != cds.length() ):
    #        print("WARNING: incorrect shuffled sequence length detected for record (taxid=%d, protId=%s, seqId=%d); real-length=%d, recorded-length=%d." % (taxId, protId, shuffles[s], len(shuff.sequence()), cds.length()))
    #        warningsCount += 1

    if (rl()):
        print("processed %d records (%.2g%%)" %
              (recordsCount, float(recordsCount) / total * 100))

print(statsLength.count())
print("%.3g %.3g +-%.3g %.3g" % (statsLength.min(), statsLength.mean(),
                                 2 * statsLength.stdev(), statsLength.max()))

print(statsShuffles.count())
print("%.3g %.3g +-%.3g %.3g" % (statsShuffles.min(), statsShuffles.mean(), 2 *
                                 statsShuffles.stdev(), statsShuffles.max()))

print("Done - Processed %d records, found %d warnings" %
      (recordsCount, warningsCount))
if (warningsCount > 0):
    print("%d warnings found!" % (warningsCount, ))
i = 0
for i in range(totalSequncesToTry):
    seq = randseq(sequenceLength)

    stats = RunningStats()

    for start in range(len(seq) - windowWidth + 1):
        fragment = seq[start:(start + windowWidth)]
        assert (len(fragment) == windowWidth)

        # Calculate the RNA folding energy. This is the computation-heavy part.
        strct, energy = RNA.fold(fragment)
        assert (energy <= 0.0)
        stats.push(energy)

    if (stats.mean() > -3.2 and stats.min() > (-6.32 - 0.50)):
        weakFasta.append(
            SeqRecord(
                Seq(seq, alphabet=generic_dna),
                id=("rand%d" % i),
                description=(
                    "Random synthetic sequence, mean MFE=%.4g, MFA>=%.4g" %
                    (stats.mean(), stats.min()))))
    elif (stats.mean() < -8.03 and stats.max() < (-6.32 + 0.50)):
        strongFasta.append(
            SeqRecord(
                Seq(seq, alphabet=generic_dna),
                id=("rand%d" % i),
                description=(
                    "Random synthetic sequence, mean MFE=%.4g, MFA<=%.4g" %
                    (stats.mean(), stats.max()))))