def test_singleton_in_a_crowd(): compression = 100 digest = TDigest(compression=compression) for i in range(10000): digest.add(10) digest.add(20) digest.compress() assert digest.quantile(0) == 10.0 assert digest.quantile(0.5) == 10.0 assert digest.quantile(0.8) == 10.0 assert digest.quantile(0.9) == 10.0 assert digest.quantile(0.99) == 10.0 assert digest.quantile(1) == 20.0
def test_merge(): for parts in [2, 5, 10, 20, 50, 100]: data = [] digest = TDigest() subs = [TDigest() for _ in range(parts)] cnt = [0] * parts for i in range(10000): x = random.random() data.append(x) digest.add(x) subs[i % parts].add(x) cnt[i % parts] += 1 digest.compress() data = sorted(data) k = 0 for i, d in enumerate(subs): assert cnt[i] == len(d) k2 = sum(c.count for c in d.centroids) assert cnt[i] == k2 k += k2 assert k == len(data) digest2 = reduce(lambda x, y: x + y, subs) for q in [0.001, 0.01, 0.1, 0.2, 0.3, 0.5]: z = quantile(data, q) e2 = digest2.quantile(q) - z assert abs(e2) / q < 0.3 assert abs(e2) < 0.015 for q in [0.001, 0.01, 0.1, 0.2, 0.3, 0.5]: z = cdf(data, q) e2 = digest2.cdf(q) - z assert abs(e2) / q < 0.3 assert abs(e2) < 0.015