def test_very_short_read(): short_filename = utils.get_test_data('test-short.fa') kh = khmer.new_hashtable(9, 4) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1 assert n_kmers == 0 kh = khmer.new_hashtable(8, 4) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1 assert n_kmers == 1
def test_very_short_read(): short_filename = os.path.join(thisdir, 'test-short.fa') kh = khmer.new_hashtable(9, 4**4+1) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1 assert n_kmers == 0 kh = khmer.new_hashtable(8, 4**4+1) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1 assert n_kmers == 1
def test_very_short_read(): short_filename = utils.get_test_data('test-short.fa') kh = khmer.new_hashtable(9, 4) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1, n_reads assert n_kmers == 0, n_kmers kh = khmer.new_hashtable(8, 4) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1, n_reads assert n_kmers == 1, n_kmers
def test_complete_no_collision(): kh = khmer.new_hashtable(4, 4**4) kt = khmer.new_ktable(4) for i in range(0, kt.n_entries()): s = kt.reverse_hash(i) kh.count(s) n_palindromes = 0 n_rc_filled = 0 n_fwd_filled = 0 for i in range(0, kt.n_entries()): s = kt.reverse_hash(i) if kh.get(s): # string hashing is rc aware n_rc_filled += 1 if kh.get(s) == 1: # palindromes are singular n_palindromes += 1 if kh.get(i): # int hashing is not rc aware n_fwd_filled += 1 assert n_rc_filled == kt.n_entries(), n_rc_filled assert n_palindromes == 16, n_palindromes # @CTB check this assert n_fwd_filled == kt.n_entries() / 2 + n_palindromes / 2, \ n_fwd_filled
def test_abund(self): ht = khmer.new_hashtable(10, 4 ** 10) filename = utils.get_test_data('test-abund-read.fa') outname = utils.get_temp_filename('test_abund.out') ht.consume_fasta(filename) try: ht.consume_fasta() assert 0, "should fail" except TypeError as err: print str(err) try: ht.consume_fasta("nonexistent") assert 0, "should fail" except IOError as err: print str(err) ht.output_fasta_kmer_pos_freq(filename, outname) try: ht.output_fasta_kmer_pos_freq() assert 0, "should fail" except TypeError as err: print str(err) fd = open(outname, "r") output = fd.readlines() assert len(output) == 1 output = output[0] output = output.strip().split() assert ['1'] * (114 - 10 + 1) == output fd.close()
def test_64bitshift(): kh = khmer.new_hashtable(25, 4) fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG" substr = "ATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGC" kh.consume(fullstr) assert 0 < kh.get_min_count(substr), kh.get_min_count(substr)
def test_hashtable_n_entries(): countingtable = khmer.new_hashtable(4, 4) try: countingtable.n_entries("nope") assert 0, "n_entries should accept no arguments" except TypeError as err: print str(err)
def test_abund(self): ht = khmer.new_hashtable(10, 4**10) filename = utils.get_test_data('test-abund-read.fa') outname = utils.get_temp_filename('test_abund.out') ht.consume_fasta(filename) try: ht.consume_fasta() assert 0, "should fail" except TypeError as err: print str(err) try: ht.consume_fasta("nonexistent") assert 0, "should fail" except IOError as err: print str(err) ht.output_fasta_kmer_pos_freq(filename, outname) try: ht.output_fasta_kmer_pos_freq() assert 0, "should fail" except TypeError as err: print str(err) fd = open(outname, "r") output = fd.readlines() assert len(output) == 1 output = output[0] output = output.strip().split() assert ['1'] * (114 - 10 + 1) == output fd.close()
def test_filter_limit_n(self): ht = khmer.new_hashtable(4, 4**4) filename = utils.get_test_data('simple_3.fa') outname = utils.get_temp_filename('test_filter.out') (total_reads, n_consumed) = ht.consume_fasta(filename) assert total_reads == 2, total_reads (total_reads, n_seq_kept) = \ khmer.filter_fasta_file_limit_n(ht, filename, total_reads, outname, 2, 7) assert total_reads == 2 assert n_seq_kept == 1 (total_reads, n_seq_kept) = \ khmer.filter_fasta_file_limit_n(ht, filename, total_reads, outname, 2, 4) assert total_reads == 2 assert n_seq_kept == 2
def test_filter_limit_n(self): ht = khmer.new_hashtable(4, 4**4) filename = os.path.join(thisdir, 'test-data/simple_3.fa') outname = os.path.join(self.tempdir, 'test_filter.out') (total_reads, n_consumed) = ht.consume_fasta(filename) assert total_reads == 2, total_reads (total_reads, n_seq_kept) = khmer.filter_fasta_file_limit_n(ht, filename, total_reads, outname, 2, 7) assert total_reads == 2 assert n_seq_kept == 1 (total_reads, n_seq_kept) = khmer.filter_fasta_file_limit_n(ht, filename, total_reads, outname, 2, 4) assert total_reads == 2 assert n_seq_kept == 2
def test_badcount(): countingtable = khmer.new_hashtable(4, 4) try: countingtable.count() assert 0, "count should require one argument" except TypeError, err: print str(err)
def test_hashtable_n_entries(): countingtable = khmer.new_hashtable(4, 4) try: countingtable.n_entries("nope") assert 0, "n_entries should accept no arguments" except TypeError, err: print str(err)
def test_no_collision(): kh = khmer.new_hashtable(4, 4) kh.count('AAAA') assert kh.get('AAAA') == 1 kh.count('TTTT') # reverse complement assert kh.get('TTTT') == 2
def test_collision(): kh = khmer.new_hashtable(4, 4) kh.count("AAAA") assert kh.get("AAAA") == 1 kh.count("TTTT") assert kh.get("TTTT") == 2
def test_no_collision(): kh = khmer.new_hashtable(4, 4) kh.count("AAAA") assert kh.get("AAAA") == 1 kh.count("TTTT") # reverse complement assert kh.get("TTTT") == 2
def test_collision(): kh = khmer.new_hashtable(4, 4) kh.count('AAAA') assert kh.get('AAAA') == 1 kh.count('TTTT') assert kh.get('TTTT') == 2
def test_64bitshift_2(): kh = khmer.new_hashtable(25, 4) fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG" kh.consume(fullstr) for i in range(len(fullstr) - 25 + 1): substr = fullstr[i:i + 25] assert kh.get(substr) > 0
def test_consume_uniqify_first(): kh = khmer.new_hashtable(4, 4) s = "TTTT" s_rc = "AAAA" kh.consume(s) n = kh.get(s_rc) assert n == 1
def test_maxcount_consume(): # hashtable should saturate at some point so as not to overflow counter kh = khmer.new_hashtable(4, 4) s = "A" * 10000 kh.consume(s) c = kh.get('AAAA') assert c == MAX_COUNT, c # this will depend on HashcountType...
def consume_fasta_if_intersect(ht, filename, total_reads, orig_readmask): #mmt = ht.fasta_file_to_minmax(filename, total_reads, orig_readmask) #new_readmask = ht.filter_fasta_file_any(mmt, 2) new_readmask = ht.filter_fasta_file_run(filename, total_reads, 1, 5) print 'XXX', new_readmask.n_kept() new_readmask.save('the_readmask') ht = khmer.new_hashtable(K, HTSIZE) (t, n) = ht.consume_fasta(filename, 0, 0, new_readmask, False) return ht, n
def test_maxcount_consume_with_bigcount(): # use the bigcount hack to avoid saturating the hashtable. kh = khmer.new_hashtable(4, 4) kh.set_use_bigcount(True) s = "A" * 10000 kh.consume(s) c = kh.get('AAAA') assert c == 10000 - 3, c
def test_nonbool_in_consume_fasta(): kh = khmer.new_hashtable(4, 4**4) try: kh.consume_fasta(reads_filename, 0, 0, "hi", False, callback_raise) assert 0 except TypeError: pass except: raise
def test_raise_in_consume_fasta_build_readmask(): kh = khmer.new_hashtable(4, 4**4) try: kh.consume_fasta_build_readmask(reads_filename, 0, 0, callback_raise) assert 0 except GoodException: pass except: raise
def test_bad_mmt_in_filter_fasta_file_max(): ht = khmer.new_hashtable(4, 4**4) try: ht.filter_fasta_file_any("hi", 2) assert 0 except TypeError: pass # expected except: raise
def test_bad_readmask_in_consume_fasta(): kh = khmer.new_hashtable(4, 4**4) try: kh.consume_fasta(reads_filename, 0, 0, None, "hi", callback_raise) assert 0 except TypeError: pass except: raise
def test_raise_in_fasta_file_to_minmax(): ht = khmer.new_hashtable(4, 4**4) try: ht.fasta_file_to_minmax(reads_filename, N_READS, None, callback_raise) assert 0 except GoodException: pass except: raise
def test_raise_in_consume_fasta_build_readmask(): return ## @CTB kh = khmer.new_hashtable(4, 4**4) try: kh.consume_fasta_build_readmask(reads_filename, 0, 0, callback_raise) assert 0 except GoodException: pass except: raise
def test_raise_in_consume_fasta(): kh = khmer.new_hashtable(4, 4**4) try: n, _ = kh.consume_fasta(reads_filename, 0, 0, None, False, callback_raise) print n assert 0 except GoodException: pass except: raise
def test_bad_readmask_in_filter_fasta_file_limit_n(): ht = khmer.new_hashtable(4, 4**4) mmt = ht.fasta_file_to_minmax(reads_filename, N_READS) try: ht.filter_fasta_file_limit_n(mmt, 2, 2, "hi") assert 0 except TypeError: pass except: raise
def test_raise_in_fasta_file_to_minmax(): return # @@CTB fix ht = khmer.new_hashtable(4, 4**4) try: ht.fasta_file_to_minmax(reads_filename, N_READS, None, callback_raise) assert 0 except GoodException: pass except: raise
def test_raise_in_filter_fasta_file_max(): return ## @CTB ht = khmer.new_hashtable(4, 4**4) mmt = ht.fasta_file_to_minmax(reads_filename, N_READS) try: ht.filter_fasta_file_any(mmt, 2, None, callback_raise) assert 0 except GoodException: pass except: raise
def test_raise_in_consume_fasta(): return # @CTB kh = khmer.new_hashtable(4, 4**4) try: n, _ = kh.consume_fasta(reads_filename, 0, 0, callback_raise) print n assert 0 except GoodException: pass except: raise
def test_abund(self): ht = khmer.new_hashtable(10, 4 ** 10) filename = utils.get_test_data('test-abund-read.fa') outname = utils.get_temp_filename('test_abund.out') ht.consume_fasta(filename) try: ht.consume_fasta() assert 0, "should fail" except TypeError, err: print str(err)
def test_get_maxcount(): kh = khmer.new_hashtable(4, 4) s = "AAAAACGT" kh.consume(s) x = kh.get_max_count(s) assert x == 2 kh.consume(s) x = kh.get_max_count(s) assert x == 4
def test_abund(self): ht = khmer.new_hashtable(10, 4**10) filename = utils.get_test_data('test-abund-read.fa') outname = utils.get_temp_filename('test_abund.out') ht.consume_fasta(filename) try: ht.consume_fasta() assert 0, "should fail" except TypeError, err: print str(err)
def test_badcount(): countingtable = khmer.new_hashtable(4, 4) try: countingtable.count() assert 0, "count should require one argument" except TypeError as err: print str(err) try: countingtable.count('ABCDE') assert 0, "count should require k-mer size to be equal" except ValueError as err: print str(err)
def test_get_mincount(): kh = khmer.new_hashtable(4, 4**4) s = "AAAAACGT" kh.consume(s) x = kh.get_min_count(s) assert x == 1 kh.consume(s) x = kh.get_min_count(s) assert x == 2
def test_nonbool_in_consume_fasta(): return ## @CTB kh = khmer.new_hashtable(4, 4**4) try: kh.consume_fasta(reads_filename, 0, 0, "hi", False, callback_raise) assert 0 except TypeError: pass except: raise
def test_get_mincount_rc(): kh = khmer.new_hashtable(4, 4) s = "AAAAACGT" src = "ACGTTTTT" kh.consume(s) x = kh.get_min_count(s) assert x == 1 kh.consume(src) x = kh.get_min_count(s) assert x == 2
def test_bad_readmask_in_filter_fasta_file_max(): ht = khmer.new_hashtable(4, 4**4) khmer.reset_reporting_callback() mmt = ht.fasta_file_to_minmax(reads_filename, N_READS) try: ht.filter_fasta_file_any(mmt, 2, "hi") assert 0 except TypeError: pass # expected except: raise
def test_raise_in_filter_fasta_file_max(): return ## @CTB ht = khmer.new_hashtable(4, 4**4) khmer.reset_reporting_callback() mmt = ht.fasta_file_to_minmax(reads_filename, N_READS) try: ht.filter_fasta_file_any(mmt, 2, None, callback_raise) assert 0 except GoodException: pass except: raise
def test_badget(): kh = khmer.new_hashtable(6, 4**10) DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG" kh.consume(DNA) assert kh.get("AGCTTT") == 1 assert kh.get("GATGAG") == 0 try: kh.get("AGCTT") assert 0, "this should fail" except ValueError, err: print str(err)
def test_consume_build_readmask(self): ht = khmer.new_hashtable(10, 4**10) filename = utils.get_test_data('simple_2.fa') outname = utils.get_temp_filename('test_filter.out') # sequence #4 (index 3) is bad; the new readmask should have that. x = ht.consume_fasta_build_readmask(filename) (total_reads, n_consumed, readmask) = x assert total_reads == 4, total_reads assert n_consumed == 63, n_consumed assert readmask.get(0) assert readmask.get(1) assert readmask.get(2) assert not readmask.get(3)
def test_maxcount(): # hashtable should saturate at some point so as not to overflow counter kh = khmer.new_hashtable(4, 4) last_count = None for _ in range(0, 10000): kh.count('AAAA') c = kh.get('AAAA') print last_count, c if c == last_count: break last_count = c assert c != 10000, "should not be able to count to 10000" assert c == MAX_COUNT # this will depend on HashcountType...
def test_maxcount_with_bigcount(): # hashtable should not saturate, if use_bigcount is set. kh = khmer.new_hashtable(4, 4) kh.set_use_bigcount(True) last_count = None for _ in range(0, 10000): kh.count('AAAA') c = kh.get('AAAA') print last_count, c if c == last_count: break last_count = c assert c == 10000, "should be able to count to 10000" assert c != MAX_COUNT