def test_count(): bf = CountingBloomFilter(8000, 3) assert bf.count() == 0 bf.add("test") assert bf.count() == 1 bf.add("test") assert bf.count() == 1 bf.add("test2") assert bf.count() == 2
def test_count_when_full(): length = 8 num_of_hashes = 2 bf = CountingBloomFilter(length, num_of_hashes) # We index 20 strings to kind of guarantee that # filter of length 8 is full afterwards. # NOTE: In perfect situation, only 4 items are required, # but we don't know which ones. for i in range(20): bf.add("test{}".format(i)) assert bf.count() == length / num_of_hashes
LOREM_IPSUM = ( "Lorem ipsum dolor sit amet, consectetur adipiscing elit." " Mauris consequat leo ut vehicula placerat. In lacinia, nisl" " id maximus auctor, sem elit interdum urna, at efficitur tellus" " turpis at quam. Pellentesque eget iaculis turpis. Nam ac ligula" " ut nunc porttitor pharetra in non lorem. In purus metus," " sollicitudin tristique sapien.") if __name__ == '__main__': bf = CountingBloomFilter(80000, 4) print(bf) print("Bloom filter uses {} bytes in the memory".format(bf.sizeof())) print("Filter contains approx. {} unique elements".format(bf.count())) print("'Lorem' {} in the filter".format( "is" if bf.test("Lorem") else "is not")) words = set(LOREM_IPSUM.split()) for word in words: bf.add(word.strip(" .,")) print("Added {} words, in the filter approx. {} unique elements".format( len(words), bf.count())) print("'Lorem' {} in the filter".format( "is" if bf.test("Lorem") else "is not")) print("Delete 'Lorem' from the filter")