def test_save_and_load_with_scaling(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path, disable_optimizations=True, capacity=200) # Save the bloom bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 1 == len(blooms_path.listdir()) assert meta_file.check() # Reload the bloom second_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Add enough items to trigger a scale for i in range(101, 201): second_gen_bloom.add(str(i)) # Call save second_gen_bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 2 == len(blooms_path.listdir()) assert meta_file.check() # Load again and make sure the new keys are found third_gen_bloom = ScalingTimingBloomFilter.load(temp_path) assert third_gen_bloom.contains('101') assert third_gen_bloom.contains('150') assert not third_gen_bloom.contains('201')
def test_bloom_repeat_saves_with_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path) # Save the bloom bloom.save() # Capture the mtime on the save files bloom_file = testing_dir.join('blooms/0/bloom.npy') meta_file = testing_dir.join('meta.json') first_bloom_save = bloom_file.mtime() first_meta_save = meta_file.mtime() # Sleep for 1 second to deal ensure mtimes will advance time.sleep(1) # Reload the bloom second_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Add a few more keys to the reloaded bloom second_gen_bloom.add('101') second_gen_bloom.add('102') second_gen_bloom.add('103') # Save the second generation bloom second_gen_bloom.save() # Check that the mtimes have changed assert first_bloom_save < bloom_file.mtime() assert first_meta_save < meta_file.mtime() # Load the bloom one more time third_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Check that the loaded data is as expected assert third_gen_bloom.contains('50') assert third_gen_bloom.contains('103') assert not third_gen_bloom.contains('105')
def test_save_and_load_with_scaling(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path, disable_optimizations=True, capacity=200) # Save the bloom bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 1 == len(blooms_path.listdir()) assert meta_file.check() # Reload the bloom second_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Add enough items to trigger a scale for i in range(101, 201): second_gen_bloom.add(str(i)) # Call save second_gen_bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 2 == len(blooms_path.listdir()) assert meta_file.check() # Load again and make sure the new keys are found third_gen_bloom = ScalingTimingBloomFilter.load(temp_path) assert third_gen_bloom.contains('101') assert third_gen_bloom.contains('150') assert not third_gen_bloom.contains('201')
def test_bloom_repeat_saves_with_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path) # Save the bloom bloom.save() # Capture the mtime on the save files bloom_file = testing_dir.join('blooms/0/bloom.npy') meta_file = testing_dir.join('meta.json') first_bloom_save = bloom_file.mtime() first_meta_save = meta_file.mtime() # Sleep for 1 second to deal ensure mtimes will advance time.sleep(1) # Reload the bloom second_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Add a few more keys to the reloaded bloom second_gen_bloom.add('101') second_gen_bloom.add('102') second_gen_bloom.add('103') # Save the second generation bloom second_gen_bloom.save() # Check that the mtimes have changed assert first_bloom_save < bloom_file.mtime() assert first_meta_save < meta_file.mtime() # Load the bloom one more time third_gen_bloom = ScalingTimingBloomFilter.load(temp_path) # Check that the loaded data is as expected assert third_gen_bloom.contains('50') assert third_gen_bloom.contains('103') assert not third_gen_bloom.contains('105')
def test_load__with_blooms(timing_bloom_mock): # Setup test data test_data = { 'capacity': 500, 'decay_time': 30, 'error': 0.5, 'error_tightening_ratio': 0.2, 'growth_factor': 5, 'min_fill_factor': 0.1, 'max_fill_factor': 0.9, 'insert_tail': False, 'disable_optimizations': True, } open_mock = mock_open(read_data=json.dumps(test_data)) data_path = '/test/foo/bar' bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2'] ScalingTimingBloomFilter.discover_blooms = MagicMock( ScalingTimingBloomFilter) ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths # Call load with patch('__builtin__.open', open_mock, create=True): loaded = ScalingTimingBloomFilter.load(data_path) # Check that metadata was opened as expected open_mock.assert_called_once_with(data_path + '/meta.json', 'r') # Check that the loaded bloom looks as expected for key, value in test_data.iteritems(): assert value == getattr(loaded, key) expected_bloom_count = 2 assert expected_bloom_count == len(loaded.blooms) # Check that the sub blooms were loaded as expected for path in bloom_paths: timing_bloom_mock.load.assert_any_call(path) expected_load_calls = len(bloom_paths) assert expected_load_calls == timing_bloom_mock.load.call_count
def test_load__with_blooms(timing_bloom_mock): # Setup test data test_data = { 'capacity': 500, 'decay_time': 30, 'error': 0.5, 'error_tightening_ratio': 0.2, 'growth_factor': 5, 'min_fill_factor': 0.1, 'max_fill_factor': 0.9, 'insert_tail': False, 'disable_optimizations': True, } open_mock = mock_open(read_data=json.dumps(test_data)) data_path = '/test/foo/bar' bloom_paths = ['/test/foo/bar/blooms/1', '/test/foo/bar/blooms/2'] ScalingTimingBloomFilter.discover_blooms = MagicMock(ScalingTimingBloomFilter) ScalingTimingBloomFilter.discover_blooms.return_value = bloom_paths # Call load with patch('__builtin__.open', open_mock, create=True): loaded = ScalingTimingBloomFilter.load(data_path) # Check that metadata was opened as expected open_mock.assert_called_once_with(data_path + '/meta.json', 'r') # Check that the loaded bloom looks as expected for key, value in test_data.iteritems(): assert value == getattr(loaded, key) expected_bloom_count = 2 assert expected_bloom_count == len(loaded.blooms) # Check that the sub blooms were loaded as expected for path in bloom_paths: timing_bloom_mock.load.assert_any_call(path) expected_load_calls = len(bloom_paths) assert expected_load_calls == timing_bloom_mock.load.call_count
def test_bloom_initial_save_and_load_with_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path) # Save the bloom bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 1 == len(blooms_path.listdir()) assert meta_file.check() # Reload the bloom reloaded = ScalingTimingBloomFilter.load(temp_path) # Check that the reloaded bloom is working as expected assert reloaded.contains('1') assert reloaded.contains('50') assert not reloaded.contains('101')
def test_bloom_initial_save_and_load_with_optimization(tmpdir): testing_dir = tmpdir.mkdir('bloom_test') # Setup a temporary directory temp_path = str(testing_dir) # Get a bloom for testing bloom = get_bloom(data_path=temp_path) # Save the bloom bloom.save() # Check that the expected files were created blooms_path = testing_dir.join('blooms') meta_file = testing_dir.join('meta.json') assert blooms_path.check() assert 1 == len(blooms_path.listdir()) assert meta_file.check() # Reload the bloom reloaded = ScalingTimingBloomFilter.load(temp_path) # Check that the reloaded bloom is working as expected assert reloaded.contains('1') assert reloaded.contains('50') assert not reloaded.contains('101')