def test_monocity(): digest = TDigest() for i in range(10000): digest.add(random.random()) for i in range(int(1e4) - 1): q1 = i * 1e-4 q2 = (i + 1) * 1e-4 assert digest.quantile(q1) <= digest.quantile(q2) assert digest.cdf(q1) <= digest.cdf(q2)
def test_serialization(): digest = TDigest() for i in range(100): digest.add(random.random()) digest2 = pickle.loads(pickle.dumps(digest)) assert len(digest) == len(digest2) assert len(digest.centroids) == len(digest2.centroids) for c1, c2 in zip(digest.centroids, digest2.centroids): assert c1.mean == c2.mean assert c1.count == c2.count for q in range(10000): assert digest.quantile(q / 10000.) == digest2.quantile(q / 10000.) assert digest.cdf(q / 10000.) == digest2.cdf(q / 10000.)
def test_repeated_values(): digest = TDigest() data = [rint(random.uniform(0, 1) * 10) / 10. for _ in range(10000)] for d in data: digest.add(d) assert len(digest.centroids) < 10 * 1000. for i in range(10): z = i / 10. for delta in [0.01, 0.02, 0.03, 0.07, 0.08, 0.09]: q = z + delta cdf = digest.cdf(q) assert abs(z + 0.05 - cdf) < 0.02 estimate = digest.quantile(q) assert abs(rint(q * 10) / 10. - estimate) < 0.001