class FileBloomFilterDeduper(Deduper): def __init__(self, working_dir, capacity, false_positive_rate=0.01): super(FileBloomFilterDeduper, self).__init__(working_dir) sync_file = os.path.join(self.working_dir, BLOOM_FILETER_STATUS_FILENAME) self.filter = FileBloomFilter(sync_file, capacity, false_positive_rate=false_positive_rate) self.is_shutdown = False def exist(self, key): return self.filter.verify(key) def shutdown(self): if self.is_shutdown is True: return self.is_shutdown = True try: self.filter.sync() finally: self.filter.close() def __del__(self): self.shutdown()
class FileBloomFilterDeduper(Deduper): def __init__(self, sync_file, capacity): self.filter = FileBloomFilter(sync_file, capacity) def exist(self, key): return self.filter.verify(key) def __del__(self): try: self.filter.sync() finally: self.filter.close()
def __init__(self, working_dir, capacity, false_positive_rate=0.01): super(FileBloomFilterDeduper, self).__init__(working_dir) sync_file = os.path.join(self.working_dir, BLOOM_FILETER_STATUS_FILENAME) self.filter = FileBloomFilter(sync_file, capacity, false_positive_rate=false_positive_rate) self.is_shutdown = False
def _init_bloom_filter(self): size = self.job.context.job.size base = 1 if not self.job.is_bundle else 1000 bloom_filter_file = os.path.join(self.root, 'bloomfilter') if not os.path.exists(bloom_filter_file): if size > 0: bloom_filter_size = size * 10 * base else: bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY else: if size > 0: bloom_filter_size = size * 2 * base else: bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY return FileBloomFilter(bloom_filter_file, bloom_filter_size)
def testPutGet(self): num = str(12345) self.assertEqual(self.node.put(num), num) self.assertEqual(self.node.put(num), '') num2 = str(67890) nums = [num, num2] self.assertEqual(self.node.put(nums), [num2]) self.node.shutdown() self.assertGreater( os.path.getsize(os.path.join(self.dir_, 'bloomfilter')), 0) bloom_filter_hook = FileBloomFilter( os.path.join(self.dir_, 'bloomfilter'), 5) self.node = Node(self.node_dir, verify_exists_hook=bloom_filter_hook) num3 = str(13579) nums = [num, num2, num3] self.assertEqual(self.node.put(nums), [num3])
def __init__(self, sync_file, capacity): self.filter = FileBloomFilter(sync_file, capacity)