Beispiel #1
0
 def _hll_merge(v: pd.DataFrame) -> bytes:
     hll_res = HyperLogLog(k)
     hll = HyperLogLog(k)
     for x in v:
         hll.set_registers(bytearray(x))
         hll_res.merge(hll)
     return hll_res.registers()
Beispiel #2
0
 def _hll_merge(v):
     hll_res = HyperLogLog(k)
     hll = HyperLogLog(k)
     for x in v:
         hll.set_registers(bytearray(x))
         hll_res.merge(hll)
     return hll_res.registers()
Beispiel #3
0
 def _hll_init_agg(v: pd.DataFrame) -> bytes:
     hll_res = HyperLogLog(k)
     hll = HyperLogLog(k)
     for x in v:
         if isinstance(x, (bytes, bytearray)):
             hll.set_registers(bytearray(x))
             hll_res.merge(hll)
         elif x is not None:
             hll_res.add(str(x))
     return hll_res.registers()
Beispiel #4
0
def _stats_from_json(json: Dict[str, Any]) -> Dict[str, Any]:
    stats = {}
    if 'messages_sent' in json:
        stats['messages_sent'] = json['messages_sent']
    if 'messages_received' in json:
        stats['messages_received'] = json['messages_received']
    if 'users_active' in json:
        hll = HyperLogLog(12)
        hll.set_registers(bytearray(json['users_active']))
        stats['users_active'] = hll
    return stats
Beispiel #5
0
class TestRegisterFunctions(unittest.TestCase):

    def setUp(self):
        self.k = 5
        self.hll = HyperLogLog(5)

    def test_set_last_register(self):
        self.hll.set_register(self.k - 1, 1)
        self.assertTrue(self.hll.registers()[self.k - 1] == 1)

    def test_set_first_register(self):
        self.hll.set_register(0, 1)
        self.assertTrue(self.hll.registers()[0] == 1)

    def test_set_register_with_negative_value_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_set_register_with_greater_than_max_rank_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, 33)

    def test_set_register_with_index_out_of_bounds(self):
        with self.assertRaises(IndexError):
            self.hll.set_register(32, 1)

    def test_set_register_with_negative_index_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_bytesarray_has_correct_values(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        for i in range(32):
            self.hll.set_register(i, expected[i])

        registers = self.hll.registers()
        self.assertEqual(expected, registers)

    def test_registers_returns_bytesarray(self):
        self.assertTrue(type(self.hll.registers()) is bytearray)

    def test_bytesarray_has_correct_length(self):
        self.assertTrue(len(self.hll.registers()) == pow(2, self.k))

    def test_set_registers(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        self.hll.set_registers(expected)

        registers=self.hll.registers()
        self.assertEqual(expected, registers)
Beispiel #6
0
class TestRegisterFunctions(unittest.TestCase):
    def setUp(self):
        self.k = 5
        self.hll = HyperLogLog(5)

    def test_set_last_register(self):
        self.hll.set_register(self.k - 1, 1)
        self.assertTrue(self.hll.registers()[self.k - 1] == 1)

    def test_set_first_register(self):
        self.hll.set_register(0, 1)
        self.assertTrue(self.hll.registers()[0] == 1)

    def test_set_register_with_negative_value_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_set_register_with_greater_than_max_rank_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, 33)

    def test_set_register_with_index_out_of_bounds(self):
        with self.assertRaises(IndexError):
            self.hll.set_register(32, 1)

    def test_set_register_with_negative_index_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_bytesarray_has_correct_values(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        for i in range(32):
            self.hll.set_register(i, expected[i])

        registers = self.hll.registers()
        self.assertEqual(expected, registers)

    def test_registers_returns_bytesarray(self):
        self.assertTrue(type(self.hll.registers()) is bytearray)

    def test_bytesarray_has_correct_length(self):
        self.assertTrue(len(self.hll.registers()) == pow(2, self.k))

    def test_set_registers(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        self.hll.set_registers(expected)

        registers = self.hll.registers()
        self.assertEqual(expected, registers)