def test_insert_same(self): hll = HyperLogLogPlusPlus(random_seed=42) hll.add(1) card_one = hll.estimate_cardinality() hll.add(1) self.assertEqual(card_one, hll.estimate_cardinality())
def insertion_test_helper(self, number_to_insert, acceptable_error=.05): hll = HyperLogLogPlusPlus(random_seed=137) for i in range(number_to_insert): hll.add(i) error_ratio = hll.estimate_cardinality() / number_to_insert self.assertAlmostEqual(error_ratio, 1.0, delta=acceptable_error)
def test_simple_estimate_smaller(self): hll = HyperLogLogPlusPlus(length=self.vector_length, random_seed=42, num_integer_bits=self.num_integer_bits) one_vector = np.ones(self.vector_length) hll.buckets = one_vector alpha_16 = 0.673 hll_should_estimate = alpha_16 * self.vector_length**2 * 2 / self.vector_length self.assertEqual(alpha_16, hll.alpha) self.assertEqual(hll.estimate_cardinality(), hll_should_estimate)
def test_simple_estimate_larger(self): m = 2**14 hll = HyperLogLogPlusPlus(length=m, random_seed=42, num_integer_bits=self.num_integer_bits) thirty_vector = 30 * np.ones(m) hll.buckets = thirty_vector alpha_m = 0.7213 / (1 + 1.079 / m) hll_should_estimate = alpha_m * m**2 * 2**30 / m self.assertEqual(alpha_m, hll.alpha) self.assertEqual(hll.estimate_cardinality(), hll_should_estimate)
def test_merge_sparse_with_dense(self): hll1 = HyperLogLogPlusPlus(length=16, random_seed=234) hll1.add(100) hll2 = HyperLogLogPlusPlus(length=16, random_seed=234) for i in range(16 * 6 + 1): hll2.add(i) merged_hll = hll1.merge(hll2) self.assertFalse(merged_hll.sparse_mode, 'Merged sketch should not be in sparse mode.') # Should change one bucket value given this random seed. self.assertEqual(sum(hll2.buckets == merged_hll.buckets), 16 - 1, 'Merged sketch is not correct.') self.assertSameElements(merged_hll.temp_set, set(), 'Temp set is not correct.') self.assertGreater(merged_hll.estimate_cardinality(), hll2.estimate_cardinality())