Example #1
0
class FileBloomFilterDeduper(Deduper):
    def __init__(self, working_dir, capacity, false_positive_rate=0.01):
        super(FileBloomFilterDeduper, self).__init__(working_dir)
        sync_file = os.path.join(self.working_dir, 
                                 BLOOM_FILETER_STATUS_FILENAME)
        
        self.filter = FileBloomFilter(sync_file, capacity,
                                      false_positive_rate=false_positive_rate)
        self.is_shutdown = False
        
    def exist(self, key):
        return self.filter.verify(key)
    
    def shutdown(self):
        if self.is_shutdown is True:
            return
        self.is_shutdown = True
        
        try:
            self.filter.sync()
        finally:
            self.filter.close()
            
    def __del__(self):
        self.shutdown()
Example #2
0
File: dedup.py Project: 0pengl/cola
class FileBloomFilterDeduper(Deduper):
    def __init__(self, sync_file, capacity):
        self.filter = FileBloomFilter(sync_file, capacity)
        
    def exist(self, key):
        return self.filter.verify(key)
    
    def __del__(self):
        try:
            self.filter.sync()
        finally:
            self.filter.close()
Example #3
0
 def __init__(self, working_dir, capacity, false_positive_rate=0.01):
     super(FileBloomFilterDeduper, self).__init__(working_dir)
     sync_file = os.path.join(self.working_dir, 
                              BLOOM_FILETER_STATUS_FILENAME)
     
     self.filter = FileBloomFilter(sync_file, capacity,
                                   false_positive_rate=false_positive_rate)
     self.is_shutdown = False
Example #4
0
    def _init_bloom_filter(self):
        size = self.job.context.job.size
        base = 1 if not self.job.is_bundle else 1000
        bloom_filter_file = os.path.join(self.root, 'bloomfilter')

        if not os.path.exists(bloom_filter_file):
            if size > 0:
                bloom_filter_size = size * 10 * base
            else:
                bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY
        else:
            if size > 0:
                bloom_filter_size = size * 2 * base
            else:
                bloom_filter_size = UNLIMIT_BLOOM_FILTER_CAPACITY
        return FileBloomFilter(bloom_filter_file, bloom_filter_size)
Example #5
0
    def testPutGet(self):
        num = str(12345)

        self.assertEqual(self.node.put(num), num)
        self.assertEqual(self.node.put(num), '')

        num2 = str(67890)
        nums = [num, num2]
        self.assertEqual(self.node.put(nums), [num2])

        self.node.shutdown()
        self.assertGreater(
            os.path.getsize(os.path.join(self.dir_, 'bloomfilter')), 0)

        bloom_filter_hook = FileBloomFilter(
            os.path.join(self.dir_, 'bloomfilter'), 5)
        self.node = Node(self.node_dir, verify_exists_hook=bloom_filter_hook)

        num3 = str(13579)
        nums = [num, num2, num3]
        self.assertEqual(self.node.put(nums), [num3])
Example #6
0
File: dedup.py Project: 0pengl/cola
 def __init__(self, sync_file, capacity):
     self.filter = FileBloomFilter(sync_file, capacity)
 def __init__(self, sync_file, capacity):
     self.filter = FileBloomFilter(sync_file, capacity)