예제 #1
0
def test_monocity():
    digest = TDigest()
    for i in range(10000):
        digest.add(random.random())

    for i in range(int(1e4) - 1):
        q1 = i * 1e-4
        q2 = (i + 1) * 1e-4
        assert digest.quantile(q1) <= digest.quantile(q2)
        assert digest.cdf(q1) <= digest.cdf(q2)
예제 #2
0
def test_serialization():
    digest = TDigest()
    for i in range(100):
        digest.add(random.random())

    digest2 = pickle.loads(pickle.dumps(digest))

    assert len(digest) == len(digest2)
    assert len(digest.centroids) == len(digest2.centroids)
    for c1, c2 in zip(digest.centroids, digest2.centroids):
        assert c1.mean == c2.mean
        assert c1.count == c2.count

    for q in range(10000):
        assert digest.quantile(q / 10000.) == digest2.quantile(q / 10000.)
        assert digest.cdf(q / 10000.) == digest2.cdf(q / 10000.)
예제 #3
0
def test_repeated_values():
    digest = TDigest()
    data = [rint(random.uniform(0, 1) * 10) / 10. for _ in range(10000)]

    for d in data:
        digest.add(d)

    assert len(digest.centroids) < 10 * 1000.
    for i in range(10):
        z = i / 10.
        for delta in [0.01, 0.02, 0.03, 0.07, 0.08, 0.09]:
            q = z + delta
            cdf = digest.cdf(q)
            assert abs(z + 0.05 - cdf) < 0.02

            estimate = digest.quantile(q)
            assert abs(rint(q * 10) / 10. - estimate) < 0.001