def test_decay(self):
     tbf = TimingBloomFilter(500, decay_time=4, ioloop=self.io_loop).start()
     tbf += "hello"
     assert tbf.contains("hello") == True
     try:
         self.wait(timeout = 4)
     except:
         pass
     assert tbf.contains("hello") == False
    def test_save(self):
        tbf = TimingBloomFilter(5, decay_time=30, ioloop=self.io_loop).start()
        tbf += "hello"

        assert "hello" in tbf
        prev_num_nonzero = tbf.num_non_zero

        tbf.tofile(open("test.tbf", "w+"))

        with TestFile("test.tbf") as fd:
            tbf2 = TimingBloomFilter.fromfile(fd)
        assert "hello" in tbf
        assert prev_num_nonzero == tbf2.num_non_zero
Example #3
0
def get_bloom(**overrides):
    '''
    Helper function to easily get a bloom for testing.
    '''
    kwargs = copy(BLOOM_DEFAULTS)
    kwargs.update(overrides)

    return TimingBloomFilter(**kwargs)
def test_bloom_repeat_saves_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing 
    bloom = get_bloom(temp_path)

    # Save the bloom
    bloom.save()

    # Capture the mtime on the save files
    bloom_file = testing_dir.join('bloom.npy')
    meta_file = testing_dir.join('meta.json')
    first_bloom_save = bloom_file.mtime()
    first_meta_save = meta_file.mtime()

    # Sleep for 1 second to deal ensure mtimes will advance
    time.sleep(1)

    # Reload the bloom
    second_gen_bloom = TimingBloomFilter.load(temp_path)

    # Add a few more keys to the reloaded bloom
    second_gen_bloom.add('101')
    second_gen_bloom.add('102')
    second_gen_bloom.add('103')

    # Save the second generation bloom
    second_gen_bloom.save()

    # Check that the mtimes have changed
    assert first_bloom_save <  bloom_file.mtime()
    assert first_meta_save < meta_file.mtime()

    # Load the bloom one more time
    third_gen_bloom = TimingBloomFilter.load(temp_path)

    # Check that the loaded data is as expected
    assert third_gen_bloom.contains('50')
    assert third_gen_bloom.contains('103')
    assert not third_gen_bloom.contains('105')
def test_bloom_repeat_saves_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing
    bloom = get_bloom(temp_path)

    # Save the bloom
    bloom.save()

    # Capture the mtime on the save files
    bloom_file = testing_dir.join('bloom.npy')
    meta_file = testing_dir.join('meta.json')
    first_bloom_save = bloom_file.mtime()
    first_meta_save = meta_file.mtime()

    # Sleep for 1 second to deal ensure mtimes will advance
    time.sleep(1)

    # Reload the bloom
    second_gen_bloom = TimingBloomFilter.load(temp_path)

    # Add a few more keys to the reloaded bloom
    second_gen_bloom.add('101')
    second_gen_bloom.add('102')
    second_gen_bloom.add('103')

    # Save the second generation bloom
    second_gen_bloom.save()

    # Check that the mtimes have changed
    assert first_bloom_save < bloom_file.mtime()
    assert first_meta_save < meta_file.mtime()

    # Load the bloom one more time
    third_gen_bloom = TimingBloomFilter.load(temp_path)

    # Check that the loaded data is as expected
    assert third_gen_bloom.contains('50')
    assert third_gen_bloom.contains('103')
    assert not third_gen_bloom.contains('105')
Example #6
0
def test_init_with_bloom_data(exists_mock, load_mock):
    # Setup test data and mocks
    exists_mock.return_value = True
    load_mock.return_value = sentinel.data

    capacity = 1000
    error = 0.002
    decay_time = 86400
    data_path = '/does/not/exist'

    # Call init
    bloom = TimingBloomFilter(
        capacity=capacity,
        error=error,
        decay_time=decay_time,
        data_path=data_path,
    )

    # Check that the bloom is setup as expected
    assert_bloom_values(
        bloom, {
            'capacity': capacity,
            'error': error,
            'data_path': data_path,
            'id': None,
            'num_bytes': 12935,
            'num_hashes': 9,
            'ring_size': 15,
            'dN': 7,
            'seconds_per_tick': 12342.857142857143,
            '_optimize': True,
            'data': sentinel.data,
        })

    test_dp, test_mf, test_bf = bloom._get_paths(None)
    assert test_dp == '/does/not/exist'
    assert test_mf == '/does/not/exist/meta.json'
    assert test_bf == '/does/not/exist/bloom.npy'

    exists_mock.assert_called_once_with(test_bf)
    load_mock.assert_called_once_with(test_bf)
def test_init_with_bloom_data(exists_mock, load_mock):
    # Setup test data and mocks
    exists_mock.return_value = True
    load_mock.return_value = sentinel.data

    capacity = 1000
    error = 0.002
    decay_time = 86400
    data_path = '/does/not/exist'

    # Call init
    bloom = TimingBloomFilter(
        capacity=capacity,
        error=error,
        decay_time=decay_time,
        data_path=data_path,
    )

    # Check that the bloom is setup as expected
    assert_bloom_values(bloom, {
        'capacity': capacity,
        'error': error,
        'data_path': data_path,
        'id': None,
        'num_bytes': 12935,
        'num_hashes': 9,
        'ring_size': 15,
        'dN': 7,
        'seconds_per_tick': 12342.857142857143,
        '_optimize': True,
        'data': sentinel.data,
    })

    test_dp, test_mf, test_bf = bloom._get_paths(None)
    assert test_dp == '/does/not/exist'
    assert test_mf == '/does/not/exist/meta.json'
    assert test_bf == '/does/not/exist/bloom.npy'

    exists_mock.assert_called_once_with(test_bf)
    load_mock.assert_called_once_with(test_bf)
    def test_holistic(self):
        n = int(2e4)
        N = int(1e4)
        T = 3
        print "TimingBloom with capacity %e and expiration time %ds" % (n, T)

        with TimingBlock("Initialization"):
            tbf = TimingBloomFilter(n, decay_time=T, ioloop=self.io_loop)

        orig_decay = tbf.decay
        def new_decay(*args, **kwargs):
            with TimingBlock("Decaying"):
                val = orig_decay(*args, **kwargs)
            return val
        setattr(tbf, "decay", new_decay)
        tbf._setup_decay()
        tbf.start()

        print "num_hashes = %d, num_bytes = %d" % (tbf.num_hashes, tbf.num_bytes)
        print "sizeof(TimingBloom) = %d bytes" % (tbf.num_bytes)

        with TimingBlock("Adding %d values" % N, N):
            for i in xrange(N):
                tbf.add(str(i))
        last_insert = time.time()

        with TimingBlock("Testing %d positive values" % N, N):
            for i in xrange(N):
                assert str(i) in tbf

        with TimingBlock("Testing %d negative values" % N, N):
            err = 0
            for i in xrange(N, 2*N):
                if str(i) in tbf:
                    err += 1
            tot_err = err / float(N)
            assert tot_err <= tbf.error, "Error is too high: %f > %f" % (tot_err, tbf.error)

        try:
            t = T - (time.time() - last_insert) + 1
            if t > 0:
                self.wait(timeout = t)
        except:
            pass

        with TimingBlock("Testing %d expired values" % N, N):
            err = 0
            for i in xrange(N):
                if str(i) in tbf:
                    err += 1
            tot_err = err / float(N)
            assert tot_err <= tbf.error, "Error is too high: %f > %f" % (tot_err, tbf.error)

        assert tbf.num_non_zero == 0, "All entries in the bloom should be zero: %d non-zero entries" % tbf.num_non_zero
Example #9
0
def test_init_no_bloom_data():
    # Setup test data
    capacity = 1000
    error = 0.002
    decay_time = 86400
    data_path = '/does/not/exist'
    id = 5

    # Call init and get back a bloom
    bloom = TimingBloomFilter(
        capacity=capacity,
        error=error,
        decay_time=decay_time,
        data_path=data_path,
        id=id,
    )

    # Make sure the bloom is setup as expected
    assert_bloom_values(
        bloom, {
            'capacity': capacity,
            'error': error,
            'data_path': data_path,
            'id': id,
            'num_bytes': 12935,
            'num_hashes': 9,
            'ring_size': 15,
            'dN': 7,
            'seconds_per_tick': 12342.857142857143,
            '_optimize': True,
        })

    test_dp, test_mf, test_bf = bloom._get_paths(None)
    assert test_dp == '/does/not/exist'
    assert test_mf == '/does/not/exist/meta.json'
    assert test_bf == '/does/not/exist/bloom.npy'

    assert_empty_bloom(bloom)
def test_init_no_bloom_data():
    # Setup test data
    capacity = 1000
    error = 0.002
    decay_time = 86400
    data_path = '/does/not/exist'
    id = 5

    # Call init and get back a bloom
    bloom = TimingBloomFilter(
        capacity=capacity,
        error=error,
        decay_time=decay_time,
        data_path=data_path,
        id=id,
    )

    # Make sure the bloom is setup as expected
    assert_bloom_values(bloom, {
        'capacity': capacity,
        'error': error,
        'data_path': data_path,
        'id': id,
        'num_bytes': 12935,
        'num_hashes': 9,
        'ring_size': 15,
        'dN': 7,
        'seconds_per_tick': 12342.857142857143,
        '_optimize': True,
    })

    test_dp, test_mf, test_bf = bloom._get_paths(None)
    assert test_dp == '/does/not/exist'
    assert test_mf == '/does/not/exist/meta.json'
    assert test_bf == '/does/not/exist/bloom.npy'

    assert_empty_bloom(bloom)
Example #11
0
def get_bloom(temp_path, disable_optimizations=False):

    # Create a bloom
    bloom = TimingBloomFilter(
        capacity=1000,
        decay_time=86400,
        data_path=temp_path,
        disable_optimizations=disable_optimizations,
    )

    # Add a bunch of keys
    for i in range(100):
        bloom.add(str(i))

    # Check that the bloom is working as expected
    assert bloom.contains('1')
    assert bloom.contains('50')
    assert not bloom.contains('101')

    return bloom
def test_bloom_initial_save_and_load_without_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing 
    bloom = get_bloom(temp_path, disable_optimizations=True)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    bloom_file = testing_dir.join('bloom.npy')
    meta_file = testing_dir.join('meta.json')
    assert bloom_file.check()
    assert meta_file.check()

    # Reload the bloom
    reloaded = TimingBloomFilter.load(temp_path)

    # Check that the reloaded bloom is working as expected
    assert reloaded.contains('1')
    assert reloaded.contains('50')
    assert not reloaded.contains('101')
Example #13
0
def test_bloom_initial_save_and_load_without_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing
    bloom = get_bloom(temp_path, disable_optimizations=True)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    bloom_file = testing_dir.join('bloom.npy')
    meta_file = testing_dir.join('meta.json')
    assert bloom_file.check()
    assert meta_file.check()

    # Reload the bloom
    reloaded = TimingBloomFilter.load(temp_path)

    # Check that the reloaded bloom is working as expected
    assert reloaded.contains('1')
    assert reloaded.contains('50')
    assert not reloaded.contains('101')
def get_bloom(temp_path, disable_optimizations=False):

    # Create a bloom
    bloom = TimingBloomFilter(
        capacity=1000,
        decay_time=86400,
        data_path=temp_path,
        disable_optimizations=disable_optimizations,
    )

    # Add a bunch of keys
    for i in range(100):
        bloom.add(str(i))

    # Check that the bloom is working as expected
    assert bloom.contains('1')
    assert bloom.contains('50')
    assert not bloom.contains('101')

    return bloom