def test_load_gz(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave1.ht') loadpath = utils.get_temp_filename('tempcountingsave1.ht.gz') sizes = list(PRIMES_1m) sizes.append(1000005) # save uncompressed hashtable. hi = khmer._new_counting_hash(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) # compress. in_file = open(savepath, 'rb') out_file = gzip.open(loadpath, 'wb') out_file.writelines(in_file) out_file.close() in_file.close() # load compressed hashtable. ht = khmer._new_counting_hash(12, sizes) ht.load(loadpath) tracking = khmer._new_hashbits(12, sizes) x = hi.abundance_distribution(inpath, tracking) tracking = khmer._new_hashbits(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) assert x == y, (x, y)
def test_3_tables(): x = list(PRIMES_1m) x.append(1000005) hi = khmer._new_counting_hash(12, x) GG = 'G' * 12 # forward_hash: 11184810 assert khmer.forward_hash(GG, 12) == 11184810 collision_1 = 'AAACGTATGACT' assert khmer.forward_hash(collision_1, 12) == 184777L collision_2 = 'AAATACCGAGCG' assert khmer.forward_hash(collision_2, 12) == 76603L collision_3 = 'AAACGTATCGAG' assert khmer.forward_hash(collision_3, 12) == 184755L # hash(GG) % 1000003 == hash(collision_1) # hash(GG) % 1009837 == hash(collision_2) # hash(GG) % 1000005 == hash(collision_3) hi.consume(GG) assert hi.get(GG) == 1 hi.consume(collision_1) assert hi.get(GG) == 1 hi.consume(collision_2) assert hi.get(GG) == 1 hi.consume(collision_3) assert hi.get(GG) == 2
def test_save_load_gz(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave2.ht.gz') sizes = list(PRIMES_1m) sizes.append(1000005) hi = khmer._new_counting_hash(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) ht = khmer._new_counting_hash(12, sizes) ht.load(savepath) tracking = khmer._new_hashbits(12, sizes) x = hi.abundance_distribution(inpath, tracking) tracking = khmer._new_hashbits(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) assert x == y, (x, y)
def test_save_load_gz(): thisdir = os.path.dirname(__file__) inpath = os.path.join(thisdir, 'test-data/random-20-a.fa') savepath = os.path.join(thisdir, 'tempcountingsave2.ht.gz') sizes = list(PRIMES_1m) sizes.append(1000005) hi = khmer._new_counting_hash(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) ht = khmer._new_counting_hash(12, sizes) ht.load(savepath) tracking = khmer._new_hashbits(12, sizes) x = hi.abundance_distribution(inpath, tracking) tracking = khmer._new_hashbits(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) assert x == y, (x,y)
def test_counting_bad_primes_list(): try: ht = khmer._new_counting_hash(12, ["a", "b", "c"], 1) assert 0, "bad list of primes should fail" except TypeError, e: print str(e)
def setup(self): self.hi = khmer._new_counting_hash(12, PRIMES_1m)