def test_rho(self): self.assertEqual(get_rho(0, 32), 33) self.assertEqual(get_rho(1, 32), 32) self.assertEqual(get_rho(2, 32), 31) self.assertEqual(get_rho(3, 32), 31) self.assertEqual(get_rho(4, 32), 30) self.assertEqual(get_rho(5, 32), 30) self.assertEqual(get_rho(6, 32), 30) self.assertEqual(get_rho(7, 32), 30) self.assertEqual(get_rho(1 << 31, 32), 1) self.assertRaises(ValueError, get_rho, 1 << 32, 32)
def add(self, timestamp, value): """ Adds the item to the HyperLogLog """ # h: D -> {0,1} ** 64 # x = h(v) # j = <x_0x_1..x_{p-1})> # w = <x_{p}x_{p+1}..> # <t_i, rho(w)> x = long(sha1(value).hexdigest()[:16], 16) j = x & (self.m - 1) w = x >> self.p R = get_rho(w, 64 - self.p) Rmax = None tmp = [] tmax = None tmp2 = list( heapq.merge(self.LPFM[j] if self.LPFM[j] is not None else [], [(timestamp, R)])) for t, R in reversed(tmp2): if tmax is None: tmax = t if t < (tmax - self.window): break if R > Rmax: tmp.append((t, R)) Rmax = R tmp.reverse() self.LPFM[j] = tuple(tmp) if tmp else None
def add(self, timestamp, value): """ Adds the item to the HyperLogLog """ # h: D -> {0,1} ** 64 # x = h(v) # j = <x_0x_1..x_{p-1})> # w = <x_{p}x_{p+1}..> # <t_i, rho(w)> x = long(sha1(value).hexdigest()[:16], 16) j = x & (self.m - 1) w = x >> self.p R = get_rho(w, 64 - self.p) Rmax = None tmp = [] tmax = None tmp2 = list(heapq.merge(self.LPFM[j] if self.LPFM[j] is not None else [], [(timestamp, R)])) for t, R in reversed(tmp2): if tmax is None: tmax = t if t < (tmax - self.window): break if R > Rmax: tmp.append((t, R)) Rmax = R tmp.reverse() self.LPFM[j] = tuple(tmp) if tmp else None
def test_rho_emu(self): import hll old = hll.bit_length hll.bit_length = hll.bit_length_emu try: self.assertEqual(get_rho(0, 32), 33) self.assertEqual(get_rho(1, 32), 32) self.assertEqual(get_rho(2, 32), 31) self.assertEqual(get_rho(3, 32), 31) self.assertEqual(get_rho(4, 32), 30) self.assertEqual(get_rho(5, 32), 30) self.assertEqual(get_rho(6, 32), 30) self.assertEqual(get_rho(7, 32), 30) self.assertEqual(get_rho(1 << 31, 32), 1) self.assertRaises(ValueError, get_rho, 1 << 32, 32) finally: hll.bit_length = old