primes = [ 75000007, 75000017, 75000031, 75000047, 75000071, 75000083, 75000097, 75000103, 75000113, 75000143, 75000157, 75000169, 75000173, 75000179, 75000181, 75000187, 75000197, 75000227, 75000241 ] ### this_filename = filename for n, prime in enumerate(primes): ht = khmer.new_hashtable(15, prime) next_filename = filename + '.round%d' % n total_reads, n_consumed = ht.consume_fasta(this_filename) x = khmer.filter_fasta_file(ht, this_filename, total_reads, next_filename, 5) _, n_seq_kept = x print '%d: ate %d k-mers of %d reads' % (n, n_consumed, total_reads) print '%d: kept %d of %d (%.1f%%)' % ( n, n_seq_kept, total_reads, n_seq_kept / float(total_reads) * 100) fp.write('%d %d %d\n' % (n, n_seq_kept, ht.n_occupied())) fp.flush() this_filename = next_filename ### # vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
#! /usr/bin/env python import khmer filename = '1m.fa' ht_full = khmer.new_hashtable(15, 4**15) print 'filtering 15-mers exactly:' total_reads, n_consumed = ht_full.consume_fasta(filename) print 'ate %d k-mers of %d reads' % (n_consumed, total_reads) print 'filtering...' if 0: (total_reads_2, n_seq_kept) = khmer.filter_fasta_file(ht_full, filename, total_reads, filename + '.15.exact', 5) print 'kept %d of %d (%.1f%%)' % (n_seq_kept, total_reads, n_seq_kept / float(total_reads) * 100) print 'counting!' print '%d total k-mers' % (ht_full.n_occupied())
75000181, 75000187, 75000197, 75000227, 75000241] ### this_filename = filename for n, prime in enumerate(primes): ht = khmer.new_hashtable(15, prime) next_filename = filename + '.round%d' % n total_reads, n_consumed = ht.consume_fasta(this_filename) x = khmer.filter_fasta_file(ht, this_filename, total_reads, next_filename, 5) _, n_seq_kept = x print '%d: ate %d k-mers of %d reads' % (n, n_consumed, total_reads) print '%d: kept %d of %d (%.1f%%)' % ( n, n_seq_kept, total_reads, n_seq_kept / float(total_reads) * 100) fp.write('%d %d %d\n' % (n, n_seq_kept, ht.n_occupied())) fp.flush() this_filename = next_filename ### # vim: set ft=python ts=4 sts=4 sw=4 et tw=79:
#! /usr/bin/env python import khmer filename = '1m.fa' ht_full = khmer.new_hashtable(15, 4 ** 15) print 'filtering 15-mers exactly:' total_reads, n_consumed = ht_full.consume_fasta(filename) print 'ate %d k-mers of %d reads' % (n_consumed, total_reads) print 'filtering...' if 0: (total_reads_2, n_seq_kept) = khmer.filter_fasta_file(ht_full, filename, total_reads, filename + '.15.exact', 5) print 'kept %d of %d (%.1f%%)' % ( n_seq_kept, total_reads, n_seq_kept / float(total_reads) * 100) print 'counting!' print '%d total k-mers' % (ht_full.n_occupied()) # vim: set ft=python ts=4 sts=4 sw=4 et tw=79: