Esempio n. 1
0
def test_singleton_in_a_crowd():
    compression = 100
    digest = TDigest(compression=compression)
    for i in range(10000):
        digest.add(10)

    digest.add(20)
    digest.compress()

    assert digest.quantile(0) == 10.0
    assert digest.quantile(0.5) == 10.0
    assert digest.quantile(0.8) == 10.0
    assert digest.quantile(0.9) == 10.0
    assert digest.quantile(0.99) == 10.0
    assert digest.quantile(1) == 20.0
Esempio n. 2
0
def test_singleton_in_a_crowd():
    compression = 100
    digest = TDigest(compression=compression)
    for i in range(10000):
        digest.add(10)

    digest.add(20)
    digest.compress()

    assert digest.quantile(0) == 10.0
    assert digest.quantile(0.5) == 10.0
    assert digest.quantile(0.8) == 10.0
    assert digest.quantile(0.9) == 10.0
    assert digest.quantile(0.99) == 10.0
    assert digest.quantile(1) == 20.0
Esempio n. 3
0
def test_merge():
    for parts in [2, 5, 10, 20, 50, 100]:
        data = []
        digest = TDigest()
        subs = [TDigest() for _ in range(parts)]
        cnt = [0] * parts

        for i in range(10000):
            x = random.random()
            data.append(x)
            digest.add(x)
            subs[i % parts].add(x)
            cnt[i % parts] += 1

        digest.compress()
        data = sorted(data)

        k = 0
        for i, d in enumerate(subs):
            assert cnt[i] == len(d)
            k2 = sum(c.count for c in d.centroids)
            assert cnt[i] == k2
            k += k2

        assert k == len(data)

        digest2 = reduce(lambda x, y: x + y, subs)

        for q in [0.001, 0.01, 0.1, 0.2, 0.3, 0.5]:
            z = quantile(data, q)
            e2 = digest2.quantile(q) - z

            assert abs(e2) / q < 0.3
            assert abs(e2) < 0.015

        for q in [0.001, 0.01, 0.1, 0.2, 0.3, 0.5]:
            z = cdf(data, q)
            e2 = digest2.cdf(q) - z

            assert abs(e2) / q < 0.3
            assert abs(e2) < 0.015
Esempio n. 4
0
def test_merge():
    for parts in [2, 5, 10, 20, 50, 100]:
        data = []
        digest = TDigest()
        subs = [TDigest() for _ in range(parts)]
        cnt = [0] * parts

        for i in range(10000):
            x = random.random()
            data.append(x)
            digest.add(x)
            subs[i % parts].add(x)
            cnt[i % parts] += 1

        digest.compress()
        data = sorted(data)

        k = 0
        for i, d in enumerate(subs):
            assert cnt[i] == len(d)
            k2 = sum(c.count for c in d.centroids)
            assert cnt[i] == k2
            k += k2

        assert k == len(data)

        digest2 = reduce(lambda x, y: x + y, subs)

        for q in [0.001, 0.01, 0.1, 0.2, 0.3, 0.5]:
            z = quantile(data, q)
            e2 = digest2.quantile(q) - z

            assert abs(e2) / q < 0.3
            assert abs(e2) < 0.015

        for q in [0.001, 0.01, 0.1, 0.2, 0.3, 0.5]:
            z = cdf(data, q)
            e2 = digest2.cdf(q) - z

            assert abs(e2) / q < 0.3
            assert abs(e2) < 0.015