primes = [
    75000007, 75000017, 75000031, 75000047, 75000071, 75000083, 75000097,
    75000103, 75000113, 75000143, 75000157, 75000169, 75000173, 75000179,
    75000181, 75000187, 75000197, 75000227, 75000241
]

###

this_filename = filename

for n, prime in enumerate(primes):
    ht = khmer.new_hashtable(15, prime)
    next_filename = filename + '.round%d' % n

    total_reads, n_consumed = ht.consume_fasta(this_filename)
    x = khmer.filter_fasta_file(ht, this_filename, total_reads, next_filename,
                                5)
    _, n_seq_kept = x

    print '%d: ate %d k-mers of %d reads' % (n, n_consumed, total_reads)

    print '%d: kept %d of %d (%.1f%%)' % (
        n, n_seq_kept, total_reads, n_seq_kept / float(total_reads) * 100)

    fp.write('%d %d %d\n' % (n, n_seq_kept, ht.n_occupied()))
    fp.flush()

    this_filename = next_filename

###

# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
#! /usr/bin/env python
import khmer

filename = '1m.fa'

ht_full = khmer.new_hashtable(15, 4**15)
print 'filtering 15-mers exactly:'
total_reads, n_consumed = ht_full.consume_fasta(filename)
print 'ate %d k-mers of %d reads' % (n_consumed, total_reads)
print 'filtering...'
if 0:
    (total_reads_2,
     n_seq_kept) = khmer.filter_fasta_file(ht_full, filename, total_reads,
                                           filename + '.15.exact', 5)

    print 'kept %d of %d (%.1f%%)' % (n_seq_kept, total_reads,
                                      n_seq_kept / float(total_reads) * 100)

print 'counting!'
print '%d total k-mers' % (ht_full.n_occupied())
          75000181,
          75000187,
          75000197,
          75000227,
          75000241]

###

this_filename = filename

for n, prime in enumerate(primes):
    ht = khmer.new_hashtable(15, prime)
    next_filename = filename + '.round%d' % n

    total_reads, n_consumed = ht.consume_fasta(this_filename)
    x = khmer.filter_fasta_file(ht, this_filename, total_reads, next_filename,
                                5)
    _, n_seq_kept = x

    print '%d: ate %d k-mers of %d reads' % (n, n_consumed, total_reads)

    print '%d: kept %d of %d (%.1f%%)' % (
        n, n_seq_kept, total_reads, n_seq_kept / float(total_reads) * 100)

    fp.write('%d %d %d\n' % (n, n_seq_kept, ht.n_occupied()))
    fp.flush()

    this_filename = next_filename

###

# vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
#! /usr/bin/env python
import khmer

filename = '1m.fa'

ht_full = khmer.new_hashtable(15, 4 ** 15)
print 'filtering 15-mers exactly:'
total_reads, n_consumed = ht_full.consume_fasta(filename)
print 'ate %d k-mers of %d reads' % (n_consumed, total_reads)
print 'filtering...'
if 0:
    (total_reads_2, n_seq_kept) = khmer.filter_fasta_file(ht_full,
                                                          filename,
                                                          total_reads,
                                                          filename +
                                                          '.15.exact',
                                                          5)

    print 'kept %d of %d (%.1f%%)' % (
        n_seq_kept, total_reads, n_seq_kept / float(total_reads) * 100)

print 'counting!'
print '%d total k-mers' % (ht_full.n_occupied())

# vim: set ft=python ts=4 sts=4 sw=4 et tw=79: