def test_lookup3(): bloomfilter_size = 250 number_hash_functions = 1 kmers1 = ["ATC", "ATG", "ATA", "ATT"] kmers2 = ["ATC", "ATG", "ATA", "TTT"] bloomfilter1 = BloomFilter(bloomfilter_size, number_hash_functions).update( convert_query_kmers(kmers1)) bloomfilter2 = BloomFilter(bloomfilter_size, number_hash_functions).update( convert_query_kmers(kmers2)) bloomfilters = [bloomfilter1, bloomfilter2] for storage in get_storages(): storage.delete_all() ksi = KmerSignatureIndex.create(storage, bloomfilters, bloomfilter_size, number_hash_functions) assert ksi.lookup(["ATC"]) == {"ATC": bitarray("11")} assert ksi.lookup(["ATC", "ATC", "ATT"]) == { "ATC": bitarray("11"), "ATT": bitarray("10"), } assert ksi.lookup(["ATC", "ATC", "ATT", "TTT"]) == { "ATC": bitarray("11"), "ATT": bitarray("10"), "TTT": bitarray("01"), }
def bloom(cls, config, kmers): kmers = convert_query_kmers(kmers) ## Convert to canonical kmers bloomfilter = BloomFilter(m=config["m"], h=config["h"]) bloomfilter.update(kmers) return bloomfilter.bitarray