def test_add_item(self, item, bit_array_size, nb_salt):
     bloom_filter = BloomFilter(
         bit_array_size=bit_array_size,
         nb_salt=nb_salt
     )
     for salt in range(nb_salt):
         custom_address = hash(''.join([str(salt), str(item)])) % bit_array_size
         assert bloom_filter.bit_array[custom_address] is False
     bloom_filter.add_item(item=item)
     for salt in range(nb_salt):
         custom_address = hash(''.join([str(salt), str(item)])) % bit_array_size
         assert bloom_filter.bit_array[custom_address]
 def test_all_bits_should_be_set_to_false(self, bit_array_size, nb_salt):
     bloom_filter = BloomFilter(
         bit_array_size=bit_array_size,
         nb_salt=nb_salt
     )
     for bit in bloom_filter.bit_array:
         assert bit is False
 def test_bit_array_size(self, bit_array_size, nb_salt):
     bloom_filter = BloomFilter(
         bit_array_size=bit_array_size,
         nb_salt=nb_salt
     )
     assert len(bloom_filter.bit_array) == bit_array_size
     assert bloom_filter.bit_array_size == bit_array_size
def benchmark(bit_array_size, nb_salt, input_cardinal):
    logger = logging.getLogger(__name__)
    bloom_filter = BloomFilter(bit_array_size=bit_array_size, nb_salt=nb_salt)

    for item in range(input_cardinal):
        bloom_filter.add_item(item=item)
    false_negative_count = 0

    for item in range(input_cardinal):
        if not bloom_filter.retrieve_item(item=item):
            false_negative_count += 1
    exp_false_negative_rate = false_negative_count / input_cardinal
    the_false_negative_rate = 0
    logger.info(f'False negative rate: {exp_false_negative_rate} '
                f'should be {the_false_negative_rate}')
    assert exp_false_negative_rate == 0

    false_positive_count = 0
    for item in range(input_cardinal, 101 * input_cardinal):
        if bloom_filter.retrieve_item(item=item):
            false_positive_count += 1
    exp_false_positive_rate = false_positive_count / (100 * input_cardinal)
    the_false_positive_rate = false_positive_rate(
        bit_array_size=bit_array_size,
        nb_salt=nb_salt,
        input_cardinal=input_cardinal)
    logger.info(f'False positive rate: {exp_false_positive_rate}'
                f'should be >= {the_false_positive_rate} ')
    assert exp_false_positive_rate <= the_false_positive_rate

    return exp_false_negative_rate, exp_false_positive_rate
예제 #5
0
    def test_inserted_element(self):
        bf = BloomFilter(1000, 2)
        bf.add('test')
        bf.add('another')

        assert 'test' in bf
        assert 'another' in bf
 def test_retrieve_item(self, item, bit_array_size, nb_salt):
     bloom_filter = BloomFilter(
         bit_array_size=bit_array_size,
         nb_salt=nb_salt
     )
     assert bloom_filter.retrieve_item(item=item) is False
     bloom_filter.add_item(item=item)
     assert bloom_filter.retrieve_item(item=item)
예제 #7
0
def test_bloom_filter():
    bloom_filter = BloomFilter(100)

    bloom_filter.add("10.17.2013")
    bloom_filter.add("10.17.2014")

    assert not bloom_filter.not_exist("10.17.2013")
    assert not bloom_filter.not_exist("10.17.2014")
    assert bloom_filter.not_exist("10.17.2021")
 def test_minimal_memory_error_rate(self, input_cardinal, error_rate):
     bloom_filter = BloomFilter.minimal_memory_bloom_filter(
         input_cardinal=input_cardinal, error_rate=error_rate)
     observed_error_rate = false_positive_rate(
         bit_array_size=bloom_filter.bit_array_size,
         nb_salt=bloom_filter.nb_salt,
         input_cardinal=input_cardinal
     )
     assert observed_error_rate == pytest.approx(
         expected=error_rate,
         abs=error_rate / 20  # 5% interval
     ), "Observed error rate {observed} is higher than expected {expected}".format(
         observed=observed_error_rate,
         expected=error_rate
     )
 def test_minimal_false_positive_rate(self, bit_array_size, input_cardinal):
     bloom_filter = BloomFilter.minimal_false_positive_rate_bloom_filter(
         bit_array_size=bit_array_size,
         input_cardinal=input_cardinal
     )
     assert false_positive_rate(
         bit_array_size=bit_array_size,
         nb_salt=bloom_filter.nb_salt,
         input_cardinal=input_cardinal
     ) < false_positive_rate(
         bit_array_size=bit_array_size,
         nb_salt=bloom_filter.nb_salt - 1,
         input_cardinal=input_cardinal
     )
     assert false_positive_rate(
         bit_array_size=bit_array_size,
         nb_salt=bloom_filter.nb_salt,
         input_cardinal=input_cardinal
     ) < false_positive_rate(
         bit_array_size=bit_array_size,
         nb_salt=bloom_filter.nb_salt + 1,
         input_cardinal=input_cardinal
     )
예제 #10
0
 def test_zero_fp(self):
     with self.assertRaises(ValueError):
         bf = BloomFilter.build_for_target_fp(10000, 2.0)
예제 #11
0
    def test_inserted_element_build_with_target_fp(self):
        bf = BloomFilter.build_for_target_fp(10000, .01)
        bf.add('word')

        assert 'some_other_word' not in bf
예제 #12
0
    def test_inserteds_element_build_with_target_fp(self):
        bf = BloomFilter.build_for_target_fp(1000, .01)
        bf.add('word')

        assert 'word' in bf