def test_decay(self):
     stbf = ScalingTimingBloomFilter(500, decay_time=4, ioloop=self.io_loop).start()
     stbf += "hello"
     assert stbf.contains("hello") == True
     try:
         self.wait(timeout = 4)
     except:
         pass
     assert stbf.contains("hello") == False
    def test_save(self):
        stbf = ScalingTimingBloomFilter(5, decay_time=30, ioloop=self.io_loop).start()
        stbf += "hello"

        assert "hello" in stbf
        prev_num_nonzero = stbf.blooms[0]['bloom'].num_non_zero
        stbf.tofile(open("test.stbf", "w+"))

        with TestFile("test.stbf") as fd:
            stbf2 = ScalingTimingBloomFilter.fromfile(fd)

        assert "hello" in stbf
        assert prev_num_nonzero == stbf2.blooms[0]['bloom'].num_non_zero
    def test_size_stability(self):
        stbf = ScalingTimingBloomFilter(10, decay_time=5, min_fill_factor=0.2, growth_factor=2, ioloop=self.io_loop).start()
        for i in xrange(100):
            stbf.add("FOO%d" % i)

        assert len(stbf.blooms) > 0, "Did not scale up"

        for i in xrange(100, 130):
            stbf.add("FOO%d" % i)
            try:
                self.wait(timeout = .5)
            except:
                pass
            if len(stbf.blooms) == 1 and stbf.blooms[0]['id'] == 1:
                return
        assert "Did not scale down"
def test_full_init():
    # Setup test data
    error = 0.0002
    capacity = 1000
    decay_time = 86400
    ticker = MagicMock(NoOpTicker)
    data_path = '/foo/bar/baz'
    error_tightening_ratio = 0.4
    growth_factor = 3
    min_fill_factor = 0.1
    max_fill_factor = 0.9
    insert_tail = False
    disable_optimizations = True

    timing_bloom = MagicMock(TimingBloomFilter)
    timing_bloom.seconds_per_tick = 100
    blooms = [timing_bloom]

    # Create the bloom
    bloom = ScalingTimingBloomFilter(
        error=error,
        capacity=capacity,
        decay_time=decay_time,
        ticker=ticker,
        data_path=data_path,
        error_tightening_ratio=error_tightening_ratio,
        growth_factor=growth_factor,
        min_fill_factor=min_fill_factor,
        max_fill_factor=max_fill_factor,
        insert_tail=insert_tail,
        blooms=blooms,
        disable_optimizations=disable_optimizations,
    )

    # Check that the bloom's state matches expectations
    assert_bloom_values(
        bloom, {
            'error': error,
            'capacity': capacity,
            'decay_time': decay_time,
            'error_tightening_ratio': error_tightening_ratio,
            'growth_factor': growth_factor,
            'max_fill_factor': max_fill_factor,
            'min_fill_factor': min_fill_factor,
            'insert_tail': insert_tail,
            'data_path': data_path,
            'blooms': blooms,
            'error_initial': 0.00012,
            'seconds_per_tick': timing_bloom.seconds_per_tick,
            'disable_optimizations': disable_optimizations,
        })

    data_path, meta_filename, blooms_path = _get_paths(bloom.data_path, None)
    assert meta_filename == '/foo/bar/baz/meta.json'
    assert blooms_path == '/foo/bar/baz/blooms'

    ticker.setup.assert_called_once_with(bloom.decay,
                                         timing_bloom.seconds_per_tick)
    ticker.start.assert_called_once_with()
def test_save_and_load_with_scaling(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)

    # Get a bloom for testing
    bloom = get_bloom(data_path=temp_path,
                      disable_optimizations=True,
                      capacity=200)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add enough items to trigger a scale
    for i in range(101, 201):
        second_gen_bloom.add(str(i))

    # Call save
    second_gen_bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 2 == len(blooms_path.listdir())
    assert meta_file.check()

    # Load again and make sure the new keys are found
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    assert third_gen_bloom.contains('101')
    assert third_gen_bloom.contains('150')
    assert not third_gen_bloom.contains('201')
    def test_holistic(self):
        n = int(1e4)
        N = int(2e4)
        T = 3
        print "ScalingTimingBloom with capacity %e and expiration time %ds" % (n, T)

        with TimingBlock("Initialization"):
            stbf = ScalingTimingBloomFilter(n, decay_time=T, ioloop=self.io_loop)

        orig_decay = stbf.decay
        def new_decay(*args, **kwargs):
            with TimingBlock("Decaying"):
                val = orig_decay(*args, **kwargs)
            return val
        setattr(stbf, "decay", new_decay)
        stbf._setup_decay()
        stbf.start()

        print "State of blooms: %d blooms with expected error %.2f%%" % (len(stbf.blooms), stbf.expected_error()*100.)

        with TimingBlock("Adding %d values" % N, N):
            for i in xrange(N):
                stbf.add(str(i))
        last_insert = time.time()

        print "State of blooms: %d blooms with expected error %.2f%%" % (len(stbf.blooms), stbf.expected_error()*100.)

        with TimingBlock("Testing %d positive values" % N, N):
            for i in xrange(N):
                assert str(i) in stbf

        with TimingBlock("Testing %d negative values" % N, N):
            err = 0
            for i in xrange(N, 2*N):
                if str(i) in stbf:
                    err += 1
            tot_err = err / float(N)
            assert tot_err <= stbf.error, "Error is too high: %f > %f" % (tot_err, stbf.error)

        try:
            t = T - (time.time() - last_insert) + 1
            if t > 0:
                self.wait(timeout = t)
        except:
            pass

        print "State of blooms: %d blooms with expected error %.2f%%" % (len(stbf.blooms), stbf.expected_error()*100.)

        with TimingBlock("Testing %d expired values" % N, N):
            err = 0
            for i in xrange(N):
                if str(i) in stbf:
                    err += 1
            tot_err = err / float(N)
            assert tot_err <= stbf.error, "Error is too high: %f > %f" % (tot_err, stbf.error)

        assert len(stbf.blooms) == 1, "Decay should have pruned all but one bloom filters: %d blooms left" % len(stbf.blooms)
def test_bloom_repeat_saves_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Capture the mtime on the save files
    bloom_file = testing_dir.join('blooms/0/bloom.npy')
    meta_file = testing_dir.join('meta.json')
    first_bloom_save = bloom_file.mtime()
    first_meta_save = meta_file.mtime()

    # Sleep for 1 second to deal ensure mtimes will advance
    time.sleep(1)

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add a few more keys to the reloaded bloom
    second_gen_bloom.add('101')
    second_gen_bloom.add('102')
    second_gen_bloom.add('103')

    # Save the second generation bloom
    second_gen_bloom.save()

    # Check that the mtimes have changed
    assert first_bloom_save < bloom_file.mtime()
    assert first_meta_save < meta_file.mtime()

    # Load the bloom one more time
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Check that the loaded data is as expected
    assert third_gen_bloom.contains('50')
    assert third_gen_bloom.contains('103')
    assert not third_gen_bloom.contains('105')
def test_save_and_load_with_scaling(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)

    # Get a bloom for testing 
    bloom = get_bloom(data_path=temp_path, disable_optimizations=True, capacity=200)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add enough items to trigger a scale
    for i in range(101, 201):
        second_gen_bloom.add(str(i))

    # Call save
    second_gen_bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 2 == len(blooms_path.listdir())
    assert meta_file.check()

    # Load again and make sure the new keys are found
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    assert third_gen_bloom.contains('101')
    assert third_gen_bloom.contains('150')
    assert not third_gen_bloom.contains('201')
def test_bloom_repeat_saves_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing 
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Capture the mtime on the save files
    bloom_file = testing_dir.join('blooms/0/bloom.npy')
    meta_file = testing_dir.join('meta.json')
    first_bloom_save = bloom_file.mtime()
    first_meta_save = meta_file.mtime()

    # Sleep for 1 second to deal ensure mtimes will advance
    time.sleep(1)

    # Reload the bloom
    second_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Add a few more keys to the reloaded bloom
    second_gen_bloom.add('101')
    second_gen_bloom.add('102')
    second_gen_bloom.add('103')

    # Save the second generation bloom
    second_gen_bloom.save()

    # Check that the mtimes have changed
    assert first_bloom_save <  bloom_file.mtime()
    assert first_meta_save < meta_file.mtime()

    # Load the bloom one more time
    third_gen_bloom = ScalingTimingBloomFilter.load(temp_path)

    # Check that the loaded data is as expected
    assert third_gen_bloom.contains('50')
    assert third_gen_bloom.contains('103')
    assert not third_gen_bloom.contains('105')
def test_discover_blooms(listdir_mock, isdir_mock):
    # Setup mocks
    listdir_mock.return_value = ['/path/1', '/path/2', '/path/3']
    isdir_mock.side_effect = [True, False, True]

    # Call discover blooms
    paths = ScalingTimingBloomFilter.discover_blooms('/path')

    # Check results
    expected_paths = ['/path/1', '/path/3']
    assert expected_paths == paths

    # Check that system calls were made as expected
    listdir_mock.assert_called_once_with('/path')

    for path in listdir_mock.return_value:
        isdir_mock.assert_any_call(path)
def test_discover_blooms(listdir_mock, isdir_mock):
    # Setup mocks
    listdir_mock.return_value = ['/path/1', '/path/2', '/path/3']
    isdir_mock.side_effect = [True, False, True]

    # Call discover blooms
    paths = ScalingTimingBloomFilter.discover_blooms('/path')

    # Check results
    expected_paths = ['/path/1', '/path/3']
    assert expected_paths == paths

    # Check that system calls were made as expected
    listdir_mock.assert_called_once_with('/path')

    for path in listdir_mock.return_value:
        isdir_mock.assert_any_call(path)
def test_init_without_optimizations(timing_bloom_mock):
    # Setup test data
    error = 0.0002
    capacity = 1000
    decay_time = 86400
    disable_optimizations = True

    # Create bloom
    bloom = ScalingTimingBloomFilter(
        error=error,
        capacity=capacity,
        decay_time=decay_time,
        disable_optimizations=disable_optimizations,
    )

    assert_bloom_values(
        bloom, {
            'error': error,
            'capacity': capacity,
            'decay_time': decay_time,
            'error_tightening_ratio': 0.5,
            'error_initial': 0.0001,
            'growth_factor': 2,
            'max_fill_factor': 0.8,
            'min_fill_factor': 0.2,
            'insert_tail': True,
            'data_path': None,
            'seconds_per_tick': bloom.blooms[0].seconds_per_tick,
            'disable_optimizations': disable_optimizations,
        })

    expected_blooms_len = 1
    assert expected_blooms_len == len(bloom.blooms)

    timing_bloom_mock.assert_called_once_with(
        capacity=693,
        decay_time=decay_time,
        error=0.0001,
        id=0,
        disable_optimizations=disable_optimizations,
    )

    expected_ticker_class = NoOpTicker
    assert isinstance(bloom.ticker, expected_ticker_class)
def get_bloom(n=100, **updates):
    kwargs = copy(BLOOM_DEFAULTS)
    kwargs.update(updates)

    # Create a bloom
    bloom = ScalingTimingBloomFilter(**kwargs)

    # Add a bunch of keys
    for i in range(100):
        bloom.add(str(i))

    # Check that the bloom is working as expected
    assert bloom.contains('1')
    assert bloom.contains(str(n // 2))
    assert not bloom.contains(str(n + 1))

    return bloom
def get_bloom( n=100, **updates):
    kwargs = copy(BLOOM_DEFAULTS)
    kwargs.update(updates)

    # Create a bloom
    bloom = ScalingTimingBloomFilter(**kwargs)

    # Add a bunch of keys
    for i in range(100):
        bloom.add(str(i))

    # Check that the bloom is working as expected
    assert bloom.contains('1')
    assert bloom.contains(str(n // 2))
    assert not bloom.contains(str(n + 1))

    return bloom
def test_load__with_blooms(timing_bloom_mock):
    # Setup test data 
    test_data = {
        'capacity': 500,
        'decay_time': 30,
        'error': 0.5,
        'error_tightening_ratio': 0.2,
        'growth_factor': 5,
        'min_fill_factor': 0.1,
        'max_fill_factor': 0.9,
        'insert_tail': False,
        'disable_optimizations': True,
    }
    open_mock = mock_open(read_data=json.dumps(test_data))
    data_path = '/test/foo/bar'
    bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2']
    ScalingTimingBloomFilter.discover_blooms = MagicMock(ScalingTimingBloomFilter)
    ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths

    # Call load
    with patch('__builtin__.open', open_mock, create=True):
        loaded = ScalingTimingBloomFilter.load(data_path)


    # Check that metadata was opened as expected
    open_mock.assert_called_once_with(data_path + '/meta.json', 'r')

    # Check that the loaded bloom looks as expected
    for key, value in test_data.iteritems():
        assert value == getattr(loaded, key)
    expected_bloom_count = 2
    assert expected_bloom_count == len(loaded.blooms)

    # Check that the sub blooms were loaded as expected
    for path in bloom_paths:
        timing_bloom_mock.load.assert_any_call(path)
    expected_load_calls = len(bloom_paths)
    assert expected_load_calls == timing_bloom_mock.load.call_count
def test_load__with_blooms(timing_bloom_mock):
    # Setup test data
    test_data = {
        'capacity': 500,
        'decay_time': 30,
        'error': 0.5,
        'error_tightening_ratio': 0.2,
        'growth_factor': 5,
        'min_fill_factor': 0.1,
        'max_fill_factor': 0.9,
        'insert_tail': False,
        'disable_optimizations': True,
    }
    open_mock = mock_open(read_data=json.dumps(test_data))
    data_path = '/test/foo/bar'
    bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2']
    ScalingTimingBloomFilter.discover_blooms = MagicMock(
        ScalingTimingBloomFilter)
    ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths

    # Call load
    with patch('__builtin__.open', open_mock, create=True):
        loaded = ScalingTimingBloomFilter.load(data_path)

    # Check that metadata was opened as expected
    open_mock.assert_called_once_with(data_path + '/meta.json', 'r')

    # Check that the loaded bloom looks as expected
    for key, value in test_data.iteritems():
        assert value == getattr(loaded, key)
    expected_bloom_count = 2
    assert expected_bloom_count == len(loaded.blooms)

    # Check that the sub blooms were loaded as expected
    for path in bloom_paths:
        timing_bloom_mock.load.assert_any_call(path)
    expected_load_calls = len(bloom_paths)
    assert expected_load_calls == timing_bloom_mock.load.call_count
def test_bloom_initial_save_and_load_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing 
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    reloaded = ScalingTimingBloomFilter.load(temp_path)

    # Check that the reloaded bloom is working as expected
    assert reloaded.contains('1')
    assert reloaded.contains('50')
    assert not reloaded.contains('101')
def test_bloom_initial_save_and_load_with_optimization(tmpdir):
    testing_dir = tmpdir.mkdir('bloom_test')
    # Setup a temporary directory
    temp_path = str(testing_dir)
    # Get a bloom for testing
    bloom = get_bloom(data_path=temp_path)

    # Save the bloom
    bloom.save()

    # Check that the expected files were created
    blooms_path = testing_dir.join('blooms')
    meta_file = testing_dir.join('meta.json')
    assert blooms_path.check()
    assert 1 == len(blooms_path.listdir())
    assert meta_file.check()

    # Reload the bloom
    reloaded = ScalingTimingBloomFilter.load(temp_path)

    # Check that the reloaded bloom is working as expected
    assert reloaded.contains('1')
    assert reloaded.contains('50')
    assert not reloaded.contains('101')
def get_bloom(bloom_mocks=None, **overrides):
    '''
    Helper function to easily get a bloom for testing.
    '''
    kwargs = copy(BLOOM_DEFAULTS)
    kwargs.update(overrides)

    if bloom_mocks:
        blooms = []
        for mock_info in bloom_mocks:
            mock = MagicMock(TimingBloomFilter)
            mock_attrs = copy(TIMING_BLOOM_DEFAULTS)
            mock_attrs.update(mock_info.get('attrs', {}))
            for key, value in mock_attrs.iteritems():
                setattr(mock, key, value)

            for key, value in mock_info.get('return_values', {}).iteritems():
                attr = getattr(mock, key)
                attr.return_value = value

            blooms.append(mock)
        kwargs['blooms'] = blooms

    return ScalingTimingBloomFilter(**kwargs)