Exemple #1
0
    def test_size_and_num_hashes(self):
        'Test BloomFilter.size()'
        dilberts = BloomFilter(
            redis=self.redis,
            num_elements=100,
            false_positives=0.1,
        )
        assert dilberts.size() == 480
        assert dilberts.num_hashes() == 4

        dilberts = BloomFilter(
            redis=self.redis,
            num_elements=1000,
            false_positives=0.1,
        )
        assert dilberts.size() == 4793
        assert dilberts.num_hashes() == 4

        dilberts = BloomFilter(
            redis=self.redis,
            num_elements=100,
            false_positives=0.01,
        )
        assert dilberts.size() == 959
        assert dilberts.num_hashes() == 7

        dilberts = BloomFilter(
            redis=self.redis,
            num_elements=1000,
            false_positives=0.01,
        )
        assert dilberts.size() == 9586
        assert dilberts.num_hashes() == 7
Exemple #2
0
    def setUp(self):
        super().setUp()
        self.redis.delete('recently-consumed')

        # Construct a set of links that the user has seen.
        self.seen_links = set()
        while len(self.seen_links) < 100:
            fullname = self.random_fullname()
            self.seen_links.add(fullname)

        # Construct a set of links that the user hasn't seen.  Ensure that
        # there's no intersection between the seen set and the unseen set.
        self.unseen_links = set()
        while len(self.unseen_links) < 100:
            fullname = self.random_fullname()
            if fullname not in self.seen_links:
                self.unseen_links.add(fullname)

        # Initialize the recently consumed Bloom filter on the seen set.
        self.recently_consumed = BloomFilter(
            self.seen_links,
            num_values=1000,
            false_positives=0.001,
            key='recently-consumed',
        )
Exemple #3
0
 def test_repr(self):
     'Test BloomFilter.__repr__()'
     dilberts = BloomFilter(
         num_elements=100,
         false_positives=0.01,
         key=self._KEY,
     )
     assert repr(dilberts) == f'<BloomFilter key={self._KEY}>'
Exemple #4
0
    def test_size_and_num_hashes(self):
        'Test BloomFilter.size()'
        dilberts = BloomFilter(num_values=100, false_positives=0.1)
        assert dilberts.size() == 480
        assert dilberts.num_hashes() == 4

        dilberts = BloomFilter(num_values=1000, false_positives=0.1)
        assert dilberts.size() == 4793
        assert dilberts.num_hashes() == 4

        dilberts = BloomFilter(num_values=100, false_positives=0.01)
        assert dilberts.size() == 959
        assert dilberts.num_hashes() == 7

        dilberts = BloomFilter(num_values=1000, false_positives=0.01)
        assert dilberts.size() == 9586
        assert dilberts.num_hashes() == 7
Exemple #5
0
 def test_repr(self):
     'Test BloomFilter.__repr__()'
     dilberts = BloomFilter(
         num_values=100,
         false_positives=0.01,
         key='dilberts',
     )
     assert repr(dilberts) == '<BloomFilter key=dilberts>'
Exemple #6
0
 def test_repr(self):
     'Test BloomFilter.__repr__()'
     dilberts = BloomFilter(
         num_values=100,
         false_positives=0.01,
         key=self._KEY,
     )
     assert repr(dilberts) == '<BloomFilter key={}>'.format(self._KEY)
Exemple #7
0
 def test_init_without_iterable(self):
     'Test BloomFilter.__init__() without an iterable for initialization'
     dilberts = BloomFilter(num_elements=100, false_positives=0.01)
     assert dilberts.num_elements == 100
     assert dilberts.false_positives == 0.01
     assert 'rajiv' not in dilberts
     assert 'raj' not in dilberts
     assert 'dan' not in dilberts
     assert 'eric' not in dilberts
     assert dilberts._num_bits_set() == 0
     assert len(dilberts) == 0
Exemple #8
0
    def test_add(self):
        'Test BloomFilter add(), __contains__(), and __len__()'
        dilberts = BloomFilter(
            redis=self.redis,
            num_elements=100,
            false_positives=0.01,
        )
        assert 'rajiv' not in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 0

        dilberts.add('rajiv')
        assert 'rajiv' in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 1

        dilberts.add('raj')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('rajiv')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('raj')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 2

        dilberts.add('dan')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' not in dilberts
        assert len(dilberts) == 3

        dilberts.add('eric')
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert len(dilberts) == 4
Exemple #9
0
    def test_update(self):
        'Test BloomFilter update(), __contains__(), and __len__()'
        dilberts = BloomFilter(
            redis=self.redis,
            num_elements=100,
            false_positives=0.01,
        )
        assert 'rajiv' not in dilberts
        assert 'raj' not in dilberts
        assert 'dan' not in dilberts
        assert 'eric' not in dilberts
        assert 'jenny' not in dilberts
        assert 'will' not in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 0

        dilberts.update({'rajiv', 'raj'}, {'dan', 'eric'})
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert 'jenny' not in dilberts
        assert 'will' not in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 4

        dilberts.update({'jenny', 'will'})
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert 'jenny' in dilberts
        assert 'will' in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 6

        dilberts.update(set())
        assert 'rajiv' in dilberts
        assert 'raj' in dilberts
        assert 'dan' in dilberts
        assert 'eric' in dilberts
        assert 'jenny' in dilberts
        assert 'will' in dilberts
        assert 'rhodes' not in dilberts
        assert len(dilberts) == 6
Exemple #10
0
 def test_init_with_iterable(self):
     'Test BloomFilter.__init__() with an iterable for initialization'
     dilberts = BloomFilter(
         {'rajiv', 'raj'},
         num_elements=100,
         false_positives=0.01,
     )
     assert dilberts.num_elements == 100
     assert dilberts.false_positives == 0.01
     assert 'rajiv' in dilberts
     assert 'raj' in dilberts
     assert 'dan' not in dilberts
     assert 'eric' not in dilberts
     # We've inserted two elements into dilberts: 'rajiv' and 'raj'.  So
     # unless dilberts._bit_offsets('rajiv') and
     # dilberts._bit_offsets('raj') perfectly collide/overlap, they differ
     # by at least 1 bit, hence dilberts.num_hashes() + 1:
     assert dilberts._num_bits_set() > dilberts.num_hashes() + 1
     assert len(dilberts) == 2