def test_split_byte_murmur(): # check the byte is correctly split when using murmur hash sct = SmallCounttable(4, 4, 1) # these kmers were carefully chosen to have hash values that produce # consecutive indices in the count table. a = "AAAC" b = "AAAG" assert sct.get_kmer_hashes(a) == [11898086063751343884] assert sct.get_kmer_hashes(b) == [10548630838975263317] sct.add(a) assert sct.get(a) == 1 assert sct.get(b) == 0
def test_save_load(Tabletype): kh = Tabletype(5) ttype = type(kh) savefile = utils.get_temp_filename('tablesave.out') # test add(dna) x = kh.add("ATGGC") z = kh.get("ATGGC") assert z == 1 kh.save(savefile) # should we provide a single load function here? yes, probably. @CTB if ttype == _Countgraph: loaded = khmer.load_countgraph(savefile) elif ttype == Counttable: loaded = Counttable.load(savefile) elif ttype == _SmallCountgraph: loaded = khmer.load_countgraph(savefile, small=True) elif ttype == SmallCounttable: loaded = SmallCounttable.load(savefile) elif ttype == _Nodegraph: loaded = khmer.load_nodegraph(savefile) elif ttype == Nodetable: loaded = Nodetable.load(savefile) else: raise Exception("unknown tabletype") z = loaded.get('ATGGC') assert z == 1
def test_read_write(): rng = random.Random(1) sct = SmallCounttable(20, 1e2, 4) kmers = ["".join(rng.choice("ACGT") for _ in range(20)) for n in range(400)] for kmer in kmers: sct.add(kmer) fname = utils.get_temp_filename('zzz') sct.save(fname) # on purpose choose parameters that are different from sct sct2 = SmallCounttable.load(fname) assert sct.ksize() == sct2.ksize() for kmer in kmers: assert sct.get(kmer) == sct2.get(kmer)
def test_overflow(): # check that we do not overflow into other part of the byte sct = SmallCounttable(4, 1e6, 4) a = "AAAA" b = "AAAT" # try to overflow our 4bit counter for n in range(17): sct.add(a) assert sct.get(a) == 15 assert sct.get(b) == 0 # repeat with the other kmer that hashes to the other half of the byte sct = SmallCounttable(4, 1e6, 4) a = "AAAA" b = "AAAT" # try to overflow our 4bit counter for n in range(17): sct.add(b) assert sct.get(b) == 15 assert sct.get(a) == 0
def test_random_kmers(): # check for out-of-bounds errors and similar with random kmers rng = random.Random(1) sct = SmallCounttable(20, 1e2, 4) kmers = ["".join(rng.choice("ACGT") for _ in range(20)) for n in range(400)] for kmer in kmers: sct.add(kmer) for kmer in kmers: sct.get(kmer)
def test_single_add(): sct = SmallCounttable(4, 1e6, 4) sct.add("AAAA") assert sct.get("AAAA") == 1