def testHLL(self): hll = HyperLogLog(0.05) buf = hll.buffer() for i in xrange(10000): hll(buf, str(i)) self.assertAlmostEqual(hll.getvalue(buf) / float(10000), 1, delta=0.1) for i in xrange(100000, 200000): hll(buf, str(i)) self.assertAlmostEqual(hll.getvalue(buf) / 110000, 1, delta=0.2) buf2 = hll.buffer() for i in xrange(10000): hll(buf2, str(i)) hll.merge(buf, buf2) self.assertAlmostEqual(hll.getvalue(buf) / 110000, 1, delta=0.2)
def test_intersection(self): bloom_one = BloomFilter(100, 0.001) bloom_two = BloomFilter(100, 0.001) chars = [chr(i) for i in xrange(97, 123)] for char in chars: bloom_one.add(char) for char in chars[:int(len(chars) / 2)]: bloom_two.add(char) new_bloom = bloom_one.intersection(bloom_two) for char in chars[:int(len(chars) / 2)]: self.assertTrue(char in new_bloom) for char in chars[int(len(chars) / 2):]: self.assertTrue(char not in new_bloom)
def testHllc(self): names = [randint(0, 100000) for _ in xrange(100000)] data = [[n] + [None] * 5 for n in names] self._gen_data(data=data) expr = self.expr.name.hll_count() res = self.engine.execute(expr) result = self._get_result(res) expect = len(set(names)) self.assertAlmostEqual(expect, result, delta=result*0.1)