Example #1
0
class StoreBitArrayTests(unittest.TestCase):
    'Whenever we change a BloomFilter, ensure that we Memcache our changes.'

    def setUp(self):
        super(self.__class__, self).setUp()
        self.dilberts = BloomFilter({'rajiv', 'raj'}, key='dilberts')

    def tearDown(self):
        self.dilberts.memcache.delete(self.dilberts.key)
        super(self.__class__, self).tearDown()

    def test_init_gets_stored(self):
        'When we __init__() on an iterable, ensure we Memcache the bit array'
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array

    def test_add_gets_stored(self):
        'When we add() an element, ensure that we Memcache the bit array'
        self.dilberts.add('dan')
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array

    def test_update_gets_stored(self):
        'When we update() with elements, ensure that we Memcache the bit array'
        self.dilberts.update({'dan', 'eric'})
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array

    def test_clear_gets_stored(self):
        'When we clear() all elements, ensure that we Memcache the bit array'
        self.dilberts.clear()
        office_space = BloomFilter(key='dilberts')
        assert office_space._bit_array == self.dilberts._bit_array
Example #2
0
    def test_clear(self):
        dilberts = BloomFilter({'rajiv', 'raj'})
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.clear()
        assert 'rajiv' not in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 0
Example #3
0
class CheckAndSetTests(unittest.TestCase):
    def setUp(self):
        super(self.__class__, self).setUp()
        self.thread1 = BloomFilter(key='dilberts')
        self.thread1.clear()
        self.thread2 = BloomFilter(key='dilberts')

    def tearDown(self):
        self.thread1.memcache.delete(self.thread1.key)
        super(self.__class__, self).tearDown()

    def test_check_and_set(self):
        "Ensure that multiple threads don't stomp each other's changes"

        # Let's simulate instantiating BloomFilters in two threads, both
        # pointed at the same Memcache key.  I've named these BloomFilters
        # self.thread1 and self.thread2 for clarity's sake.

        # When we update the BloomFilter in thread 1, ...
        self.thread1.update({'rajiv', 'raj'})

        # ... notice that the BloomFilter in thread 2 doesn't automatically get
        # updated:
        assert 'rajiv' not in self.thread2
        assert 'raj' not in self.thread2

        # But now when we update the BloomFilter in thread 2, ...
        self.thread2.update({'dan', 'eric'})

        # ... notice that this BloomFilter in thread 2 first pulls in thread
        # 1's changes, then applies its own:
        assert 'rajiv' in self.thread2
        assert 'raj' in self.thread2
        assert 'dan' in self.thread2
        assert 'eric' in self.thread2

        # So even though our local BloomFilter objects might get out of sync,
        # ...
        assert 'dan' not in self.thread1
        assert 'eric' not in self.thread1

        # ... whenever we update them, we first merge in changes from Memcache,
        # which is always in sync:
        self.thread1.update({'jenny', 'will'})
        assert 'dan' in self.thread1
        assert 'eric' in self.thread1
Example #4
0
class RecentlyConsumedSimulationTests(unittest.TestCase):
    "Simulate reddit's recently consumed problem to test our Bloom filter."

    def setUp(self):
        super(self.__class__, self).setUp()

        # Construct a set of links that the user has seen.
        self.seen_links = set()
        while len(self.seen_links) < 100:
            fullname = self.random_fullname()
            self.seen_links.add(fullname)

        # Construct a set of links that the user hasn't seen.  Ensure that
        # there's no intersection between the seen set and the unseen set.
        self.unseen_links = set()
        while len(self.unseen_links) < 100:
            fullname = self.random_fullname()
            if fullname not in self.seen_links:
                self.unseen_links.add(fullname)

        # Initialize the recently consumed Bloom filter on the seen set.
        self.recently_consumed = BloomFilter(
            num_values=1000,
            false_positives=0.001,
            key='recently-consumed',
        )
        self.recently_consumed.clear()
        self.recently_consumed.update(self.seen_links)

    def tearDown(self):
        self.recently_consumed.memcache.delete(self.recently_consumed.key)
        super(self.__class__, self).tearDown()

    @staticmethod
    def random_fullname(prefix='t3_', size=6):
        alphabet36, id36 = string.digits + string.ascii_lowercase, []
        for _ in xrange(size):
            id36.append(random.choice(alphabet36))
        return prefix + ''.join(id36)

    @staticmethod
    def round(number, sig_digits=1):
        '''Round a float to the specified number of significant digits.

        Reference implementation:
            https://github.com/ActiveState/code/blob/3b27230f418b714bc9a0f897cb8ea189c3515e99/recipes/Python/578114_Round_number_specified_number_significant/recipe-578114.py
        '''
        try:
            ndigits = sig_digits - 1 - int(math.floor(math.log10(abs(number))))
        except ValueError:
            # math.log10(number) raised a ValueError, so number must be 0.0.
            # No need to round 0.0.
            return number
        else:
            return round(number, ndigits)

    def test_zero_false_negatives(self):
        'Ensure that we produce zero false negatives'
        for seen_link in self.seen_links:
            assert seen_link in self.recently_consumed

    def test_acceptable_false_positives(self):
        'Ensure that we produce false positives at an acceptable rate'
        acceptable, actual = self.recently_consumed.false_positives, 0

        for unseen_link in self.unseen_links:
            actual += unseen_link in self.recently_consumed
        actual /= float(len(self.unseen_links))
        actual = self.round(actual, sig_digits=1)

        message = 'acceptable: {}; actual: {}'.format(acceptable, actual)
        assert actual <= acceptable, message