def test_calc_cardinality_sliding1(self): a = SlidingHyperLogLog(0.05, 100) a.add(1, 'k1') self.assertEqual(int(a.card(1)), 1) self.assertEqual(int(a.card(101)), 1) self.assertEqual(int(a.card(102)), 0) a.add(2, 'k2') a.add(3, 'k3') self.assertEqual(int(a.card(3)), 3) self.assertEqual(int(a.card(101)), 3) self.assertEqual(int(a.card(102)), 2) self.assertEqual(int(a.card(103)), 1) self.assertEqual(int(a.card(104)), 0)
def test_from_list(self): s1 = SlidingHyperLogLog(0.05, 100) for i in range(10): s1.add(i, str(i)) s2 = SlidingHyperLogLog.from_list(s1.LPFM, 100) self.assertEqual(s1, s2) self.assertEqual(s1.card(9), s2.card(9)) self.assertEqual(s1.card_wlist(9, [100, 3, 5]), [s2.card(9, 100), s2.card(9, 3), s2.card(9, 5)])
def test_from_list(self): s1 = SlidingHyperLogLog(0.05, 100) for i in range(10): s1.add(i, str(i)) s2 = SlidingHyperLogLog.from_list(s1.LPFM, 100) self.assertEqual(s1, s2) self.assertEqual(s1.card(9), s2.card(9)) self.assertEqual( s1.card_wlist(9, [100, 3, 5]), [s2.card(9, 100), s2.card(9, 3), s2.card(9, 5)])
def test_calc_cardinality(self): clist = [1, 5, 10, 30, 60, 200, 1000, 10000, 60000] n = 30 rel_err = 0.05 for card in clist: s = 0.0 for c in xrange(n): a = SlidingHyperLogLog(rel_err, 100) for i in xrange(card): a.add(int(time.time()), os.urandom(20)) s += a.card(int(time.time())) z = (float(s) / n - card) / (rel_err * card / math.sqrt(n)) self.assertLess(-1.96, z) self.assertGreater(1.96, z)
def test_calc_cardinality_sliding3(self): clist = [30, 60, 200, 1000, 10000, 60000] rel_err = 0.05 t1 = 0 t2 = 0 for card in clist: a = SlidingHyperLogLog(rel_err, card) for i in xrange(card): a.add(i, os.urandom(20)) ts = time.time() l1 = [a.card(1.5 * card, w / 10.0) for w in range(1, card + 1, card / 10)] t1 = (time.time() - ts) ts = time.time() l2 = a.card_wlist(1.5 * card, [ w / 10.0 for w in range(1, card + 1, card / 10)]) t2 = (time.time() - ts) #print card, t1, t2 self.assertEqual(l1, l2)
def test_calc_cardinality_sliding3(self): clist = [30, 60, 200, 1000, 10000, 60000] rel_err = 0.05 t1 = 0 t2 = 0 for card in clist: a = SlidingHyperLogLog(rel_err, card) for i in xrange(card): a.add(i, os.urandom(20)) ts = time.time() l1 = [ a.card(1.5 * card, w / 10.0) for w in range(1, card + 1, card / 10) ] t1 = (time.time() - ts) ts = time.time() l2 = a.card_wlist( 1.5 * card, [w / 10.0 for w in range(1, card + 1, card / 10)]) t2 = (time.time() - ts) #print card, t1, t2 self.assertEqual(l1, l2)