def _hll_merge(v: pd.DataFrame) -> bytes: hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: hll.set_registers(bytearray(x)) hll_res.merge(hll) return hll_res.registers()
def _hll_merge(v): hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: hll.set_registers(bytearray(x)) hll_res.merge(hll) return hll_res.registers()
def _hll_init_agg(v: pd.DataFrame) -> bytes: hll_res = HyperLogLog(k) hll = HyperLogLog(k) for x in v: if isinstance(x, (bytes, bytearray)): hll.set_registers(bytearray(x)) hll_res.merge(hll) elif x is not None: hll_res.add(str(x)) return hll_res.registers()
def test_merge(self): expected = bytearray(4) expected[0] = 1 expected[3] = 1 hll = HyperLogLog(2) hll2 = HyperLogLog(2) hll.set_register(0, 1) hll2.set_register(3, 1) hll.merge(hll2) self.assertEqual(hll.registers(), expected)
def test_only_same_size_HyperLogLogs_can_be_merged(self): hll = HyperLogLog(4) hll2 = HyperLogLog(5) with self.assertRaises(ValueError): hll.merge(hll2)