Beispiel #1
0
class TestCardinalityEstimation(unittest.TestCase):

    def setUp(self):
        self.hll = HyperLogLog(5)

    def test_small_range_correction_all_registers_set_to_zero(self):
        self.assertEqual(self.hll.cardinality(), 0.0)

    def test_small_range_correction_not_all_registers_set_to_zero(self):
        self.hll.set_register(0, 1)
        c = self.hll.cardinality()
        correction= 1.46571806761 <= c and c <= 1.46571806762
        self.assertTrue(correction)

    def test_medium_range_no_correction(self):
        for i in range(32):
            self.hll.set_register(i, 2)

        c = self.hll.cardinality()
        no_correction = 89.216 <= c and c <= 89.217
        self.assertTrue(no_correction)

    @unittest.skip("correction value needs to be re-computed")
    def test_large_range_correction(self):
        hll = HyperLogLog(16)
        for i in range(hll.size() - 1):
            hll.set_register(i, 16)

        c = hll.cardinality()
        correction = 7916284520 <= c and c <= 7916284521
        self.assertTrue(correction)
Beispiel #2
0
class TestCardinalityEstimation(unittest.TestCase):
    def setUp(self):
        self.hll = HyperLogLog(5)

    def test_small_range_correction_all_registers_set_to_zero(self):
        self.assertEqual(self.hll.cardinality(), 0.0)

    def test_small_range_correction_not_all_registers_set_to_zero(self):
        self.hll.set_register(0, 1)
        c = self.hll.cardinality()
        correction = 1.46571806761 <= c and c <= 1.46571806762
        self.assertTrue(correction)

    def test_medium_range_no_correction(self):
        for i in range(32):
            self.hll.set_register(i, 2)

        c = self.hll.cardinality()
        no_correction = 89.216 <= c and c <= 89.217
        self.assertTrue(no_correction)

    @unittest.skip("correction value needs to be re-computed")
    def test_large_range_correction(self):
        hll = HyperLogLog(16)
        for i in range(hll.size() - 1):
            hll.set_register(i, 16)

        c = hll.cardinality()
        correction = 7916284520 <= c and c <= 7916284521
        self.assertTrue(correction)
Beispiel #3
0
    def test_large_range_correction(self):
        hll = HyperLogLog(16)
        for i in range(hll.size() - 1):
            hll.set_register(i, 16)

        c = hll.cardinality()
        correction = 7916284520 <= c and c <= 7916284521
        self.assertTrue(correction)
Beispiel #4
0
    def test_large_range_correction(self):
        hll = HyperLogLog(16)
        for i in range(hll.size() - 1):
            hll.set_register(i, 16)

        c = hll.cardinality()
        correction = 7916284520 <= c and c <= 7916284521
        self.assertTrue(correction)
Beispiel #5
0
    def test_merge(self):
        expected = bytearray(4)
        expected[0] = 1
        expected[3] = 1

        hll = HyperLogLog(2)
        hll2 = HyperLogLog(2)

        hll.set_register(0, 1)
        hll2.set_register(3, 1)

        hll.merge(hll2)
        self.assertEqual(hll.registers(), expected)
Beispiel #6
0
    def test_merge(self):
        expected = bytearray(4)
        expected[0] = 1
        expected[3] = 1

        hll = HyperLogLog(2)
        hll2 = HyperLogLog(2)

        hll.set_register(0, 1)
        hll2.set_register(3, 1)

        hll.merge(hll2)
        self.assertEqual(hll.registers(), expected)
Beispiel #7
0
class TestRegisterFunctions(unittest.TestCase):
    def setUp(self):
        self.k = 5
        self.hll = HyperLogLog(5)

    def test_set_last_register(self):
        self.hll.set_register(self.k - 1, 1)
        self.assertTrue(self.hll.registers()[self.k - 1] == 1)

    def test_set_first_register(self):
        self.hll.set_register(0, 1)
        self.assertTrue(self.hll.registers()[0] == 1)

    def test_set_register_with_negative_value_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_set_register_with_greater_than_max_rank_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, 33)

    def test_set_register_with_index_out_of_bounds(self):
        with self.assertRaises(IndexError):
            self.hll.set_register(32, 1)

    def test_set_register_with_negative_index_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_bytesarray_has_correct_values(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        for i in range(32):
            self.hll.set_register(i, expected[i])

        registers = self.hll.registers()
        self.assertEqual(expected, registers)

    def test_registers_returns_bytesarray(self):
        self.assertTrue(type(self.hll.registers()) is bytearray)

    def test_bytesarray_has_correct_length(self):
        self.assertTrue(len(self.hll.registers()) == pow(2, self.k))

    def test_set_registers(self):
        expected = bytearray(randint(0, 16) for x in range(32))
        self.hll.set_registers(expected)

        registers = self.hll.registers()
        self.assertEqual(expected, registers)
Beispiel #8
0
class TestRegisterFunctions(unittest.TestCase):

    def setUp(self):
        self.k = 5
        self.hll = HyperLogLog(5)

    def test_set_last_register(self):
        self.hll.set_register(self.k - 1, 1)
        self.assertTrue(self.hll.registers()[self.k - 1] == 1)

    def test_set_first_register(self):
        self.hll.set_register(0, 1)
        self.assertTrue(self.hll.registers()[0] == 1)

    def test_set_register_with_negative_value_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_set_register_with_greater_than_max_rank_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, 33)

    def test_set_register_with_index_out_of_bounds(self):
        with self.assertRaises(IndexError):
            self.hll.set_register(32, 1)

    def test_set_register_with_negative_index_fails(self):
        with self.assertRaises(ValueError):
            self.hll.set_register(0, -1)

    def test_bytesarray_has_correct_values(self):
        expected = bytearray(32)
        for i in range(31):
            expected[i] = randint(0, 16)

        for i in range(31):
            self.hll.set_register(i, expected[i])

        registers = self.hll.registers()
        for i in range(31):
            self.assertEqual(expected[i], registers[i])

    def test_registers_returns_bytesarray(self):
        self.assertTrue(type(self.hll.registers()) is bytearray)

    def test_bytesarray_has_correct_length(self):
        self.assertTrue(len(self.hll.registers()) == pow(2, self.k))