def test_simple_estimate_smaller(self): hll = HyperLogLogPlusPlus(length=self.vector_length, random_seed=42, num_integer_bits=self.num_integer_bits) one_vector = np.ones(self.vector_length) hll.buckets = one_vector alpha_16 = 0.673 hll_should_estimate = alpha_16 * self.vector_length**2 * 2 / self.vector_length self.assertEqual(alpha_16, hll.alpha) self.assertEqual(hll.estimate_cardinality(), hll_should_estimate)
def test_simple_estimate_larger(self): m = 2**14 hll = HyperLogLogPlusPlus(length=m, random_seed=42, num_integer_bits=self.num_integer_bits) thirty_vector = 30 * np.ones(m) hll.buckets = thirty_vector alpha_m = 0.7213 / (1 + 1.079 / m) hll_should_estimate = alpha_m * m**2 * 2**30 / m self.assertEqual(alpha_m, hll.alpha) self.assertEqual(hll.estimate_cardinality(), hll_should_estimate)