Exemple #1
0
 def _hll_merge(v: pd.DataFrame) -> bytes:
     hll_res = HyperLogLog(k)
     hll = HyperLogLog(k)
     for x in v:
         hll.set_registers(bytearray(x))
         hll_res.merge(hll)
     return hll_res.registers()
Exemple #2
0
 def _hll_merge(v):
     hll_res = HyperLogLog(k)
     hll = HyperLogLog(k)
     for x in v:
         hll.set_registers(bytearray(x))
         hll_res.merge(hll)
     return hll_res.registers()
Exemple #3
0
class TestRegisterFunctions(unittest.TestCase):

    def setUp(self):
        self.k = 5
        self.hll = HyperLogLog(5)

    def test_set_last_register(self):
        self.hll.set_register(self.k - 1, 1)
        self.assertTrue(self.hll.registers()[self.k - 1] == 1)

    def test_set_first_register(self):
        self.hll.set_register(0, 1)
        self.assertTrue(self.hll.registers()[0] == 1)

    def test_set_register_with_negative_value_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_set_register_with_greater_than_max_rank_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, 33)

    def test_set_register_with_index_out_of_bounds(self):
        with self.assertRaises(IndexError):
            self.hll.set_register(32, 1)

    def test_set_register_with_negative_index_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_bytesarray_has_correct_values(self):
        expected = bytearray(32)
        for i in range(31):
            expected[i] = randint(0, 16)

        for i in range(31):
            self.hll.set_register(i, expected[i])

        registers = self.hll.registers()
        for i in range(31):
            self.assertEqual(expected[i], registers[i])

    def test_registers_returns_bytesarray(self):
        self.assertTrue(type(self.hll.registers()) is bytearray)

    def test_bytesarray_has_correct_length(self):
        self.assertTrue(len(self.hll.registers()) == pow(2, self.k))
Exemple #4
0
 def _hll_init(v):
     hll = HyperLogLog(k)
     zero = hll.registers()
     def regs(x):
         hll.set_registers(zero);
         if x is not None:
             hll.add(str(x));
         return hll.registers()
     return v.apply(lambda x: regs(x))
Exemple #5
0
 def _hll_init_agg(v: pd.DataFrame) -> bytes:
     hll_res = HyperLogLog(k)
     hll = HyperLogLog(k)
     for x in v:
         if isinstance(x, (bytes, bytearray)):
             hll.set_registers(bytearray(x))
             hll_res.merge(hll)
         elif x is not None:
             hll_res.add(str(x))
     return hll_res.registers()
Exemple #6
0
    def test_merge(self):
        expected = bytearray(4)
        expected[0] = 1
        expected[3] = 1

        hll = HyperLogLog(2)
        hll2 = HyperLogLog(2)

        hll.set_register(0, 1)
        hll2.set_register(3, 1)

        hll.merge(hll2)
        self.assertEqual(hll.registers(), expected)
Exemple #7
0
    def test_merge(self):
        expected = bytearray(4)
        expected[0] = 1
        expected[3] = 1

        hll = HyperLogLog(2)
        hll2 = HyperLogLog(2)

        hll.set_register(0, 1)
        hll2.set_register(3, 1)

        hll.merge(hll2)
        self.assertEqual(hll.registers(), expected)
Exemple #8
0
class TestRegisterFunctions(unittest.TestCase):
    def setUp(self):
        self.k = 5
        self.hll = HyperLogLog(5)

    def test_set_last_register(self):
        self.hll.set_register(self.k - 1, 1)
        self.assertTrue(self.hll.registers()[self.k - 1] == 1)

    def test_set_first_register(self):
        self.hll.set_register(0, 1)
        self.assertTrue(self.hll.registers()[0] == 1)

    def test_set_register_with_negative_value_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_set_register_with_greater_than_max_rank_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, 33)

    def test_set_register_with_index_out_of_bounds(self):
        with self.assertRaises(IndexError):
            self.hll.set_register(32, 1)

    def test_set_register_with_negative_index_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_bytesarray_has_correct_values(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        for i in range(32):
            self.hll.set_register(i, expected[i])

        registers = self.hll.registers()
        self.assertEqual(expected, registers)

    def test_registers_returns_bytesarray(self):
        self.assertTrue(type(self.hll.registers()) is bytearray)

    def test_bytesarray_has_correct_length(self):
        self.assertTrue(len(self.hll.registers()) == pow(2, self.k))

    def test_set_registers(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        self.hll.set_registers(expected)

        registers = self.hll.registers()
        self.assertEqual(expected, registers)
Exemple #9
0
 def test_k_param_determines_the_number_of_registers(self):
     hll = HyperLogLog(5)
     self.assertEqual(len(hll.registers()), 32)
     self.assertEqual(hll.size(), 32)
Exemple #10
0
 def test_all_registers_initialized_to_zero(self):
     hll = HyperLogLog(5)
     registers = hll.registers()
     for register in registers:
         self.assertEqual(register, 0)
Exemple #11
0
 def test_k_param_determines_the_number_of_registers(self):
     hll = HyperLogLog(5)
     self.assertEqual(len(hll.registers()), 32)
     self.assertEqual(hll.size(), 32)
Exemple #12
0
 def test_all_registers_initialized_to_zero(self):
     hll = HyperLogLog(5)
     registers = hll.registers()
     for register in registers:
         self.assertEqual(register, 0)