def test_add_item(self, item, bit_array_size, nb_salt): bloom_filter = BloomFilter( bit_array_size=bit_array_size, nb_salt=nb_salt ) for salt in range(nb_salt): custom_address = hash(''.join([str(salt), str(item)])) % bit_array_size assert bloom_filter.bit_array[custom_address] is False bloom_filter.add_item(item=item) for salt in range(nb_salt): custom_address = hash(''.join([str(salt), str(item)])) % bit_array_size assert bloom_filter.bit_array[custom_address]
def test_all_bits_should_be_set_to_false(self, bit_array_size, nb_salt): bloom_filter = BloomFilter( bit_array_size=bit_array_size, nb_salt=nb_salt ) for bit in bloom_filter.bit_array: assert bit is False
def test_bit_array_size(self, bit_array_size, nb_salt): bloom_filter = BloomFilter( bit_array_size=bit_array_size, nb_salt=nb_salt ) assert len(bloom_filter.bit_array) == bit_array_size assert bloom_filter.bit_array_size == bit_array_size
def benchmark(bit_array_size, nb_salt, input_cardinal): logger = logging.getLogger(__name__) bloom_filter = BloomFilter(bit_array_size=bit_array_size, nb_salt=nb_salt) for item in range(input_cardinal): bloom_filter.add_item(item=item) false_negative_count = 0 for item in range(input_cardinal): if not bloom_filter.retrieve_item(item=item): false_negative_count += 1 exp_false_negative_rate = false_negative_count / input_cardinal the_false_negative_rate = 0 logger.info(f'False negative rate: {exp_false_negative_rate} ' f'should be {the_false_negative_rate}') assert exp_false_negative_rate == 0 false_positive_count = 0 for item in range(input_cardinal, 101 * input_cardinal): if bloom_filter.retrieve_item(item=item): false_positive_count += 1 exp_false_positive_rate = false_positive_count / (100 * input_cardinal) the_false_positive_rate = false_positive_rate( bit_array_size=bit_array_size, nb_salt=nb_salt, input_cardinal=input_cardinal) logger.info(f'False positive rate: {exp_false_positive_rate}' f'should be >= {the_false_positive_rate} ') assert exp_false_positive_rate <= the_false_positive_rate return exp_false_negative_rate, exp_false_positive_rate
def test_inserted_element(self): bf = BloomFilter(1000, 2) bf.add('test') bf.add('another') assert 'test' in bf assert 'another' in bf
def test_retrieve_item(self, item, bit_array_size, nb_salt): bloom_filter = BloomFilter( bit_array_size=bit_array_size, nb_salt=nb_salt ) assert bloom_filter.retrieve_item(item=item) is False bloom_filter.add_item(item=item) assert bloom_filter.retrieve_item(item=item)
def test_bloom_filter(): bloom_filter = BloomFilter(100) bloom_filter.add("10.17.2013") bloom_filter.add("10.17.2014") assert not bloom_filter.not_exist("10.17.2013") assert not bloom_filter.not_exist("10.17.2014") assert bloom_filter.not_exist("10.17.2021")
def test_minimal_memory_error_rate(self, input_cardinal, error_rate): bloom_filter = BloomFilter.minimal_memory_bloom_filter( input_cardinal=input_cardinal, error_rate=error_rate) observed_error_rate = false_positive_rate( bit_array_size=bloom_filter.bit_array_size, nb_salt=bloom_filter.nb_salt, input_cardinal=input_cardinal ) assert observed_error_rate == pytest.approx( expected=error_rate, abs=error_rate / 20 # 5% interval ), "Observed error rate {observed} is higher than expected {expected}".format( observed=observed_error_rate, expected=error_rate )
def test_minimal_false_positive_rate(self, bit_array_size, input_cardinal): bloom_filter = BloomFilter.minimal_false_positive_rate_bloom_filter( bit_array_size=bit_array_size, input_cardinal=input_cardinal ) assert false_positive_rate( bit_array_size=bit_array_size, nb_salt=bloom_filter.nb_salt, input_cardinal=input_cardinal ) < false_positive_rate( bit_array_size=bit_array_size, nb_salt=bloom_filter.nb_salt - 1, input_cardinal=input_cardinal ) assert false_positive_rate( bit_array_size=bit_array_size, nb_salt=bloom_filter.nb_salt, input_cardinal=input_cardinal ) < false_positive_rate( bit_array_size=bit_array_size, nb_salt=bloom_filter.nb_salt + 1, input_cardinal=input_cardinal )
def test_zero_fp(self): with self.assertRaises(ValueError): bf = BloomFilter.build_for_target_fp(10000, 2.0)
def test_inserted_element_build_with_target_fp(self): bf = BloomFilter.build_for_target_fp(10000, .01) bf.add('word') assert 'some_other_word' not in bf
def test_inserteds_element_build_with_target_fp(self): bf = BloomFilter.build_for_target_fp(1000, .01) bf.add('word') assert 'word' in bf