コード例 #1
0
def test_single():
    t = TDigest()
    t.add(10)
    assert t.min() == 10
    assert t.max() == 10
    assert t.size() == 1

    assert t.quantile(0) == 10
    assert t.quantile(0.5) == 10
    assert t.quantile(1) == 10

    assert t.cdf(9) == 0
    assert t.cdf(10) == 0.5
    assert t.cdf(11) == 1
コード例 #2
0
def test_quantile_and_cdf_non_numeric():
    t = TDigest()
    t.update(np.arange(5))

    with pytest.raises(TypeError):
        t.quantile('foo')

    with pytest.raises(TypeError):
        t.update(['foo'])

    with pytest.raises(TypeError):
        t.cdf('foo')

    with pytest.raises(TypeError):
        t.cdf(['foo'])
コード例 #3
0
def test_merge():
    t = TDigest()
    t2 = TDigest()
    t3 = TDigest()
    a = np.random.uniform(0, 1, N)
    b = np.random.uniform(2, 3, N)
    data = np.concatenate([a, b])
    t2.update(a)
    t3.update(b)

    t2_centroids = t2.centroids()

    t.merge(t2, t3)
    assert t.min() == min(t2.min(), t3.min())
    assert t.max() == max(t2.max(), t3.max())
    assert t.size() == t2.size() + t3.size()
    # Check no mutation of args
    assert (t2.centroids() == t2_centroids).all()

    # *Quantile
    q = np.array([0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999])
    est = t.quantile(q)
    q_est = quantiles_to_q(data, est)
    np.testing.assert_allclose(q, q_est, atol=0.012, rtol=0)

    # *CDF
    x = q_to_x(data, q)
    q_est = t.cdf(x)
    np.testing.assert_allclose(q, q_est, atol=0.005)

    with pytest.raises(TypeError):
        t.merge(t2, 'not a tdigest')
コード例 #4
0
def test_histogram():
    t = TDigest()
    data = np.random.normal(size=10000)
    t.update(data)
    hist, bins = t.histogram(100)
    assert len(hist) == 100
    assert len(bins) == 101
    c = t.cdf(bins)
    np.testing.assert_allclose((c[1:] - c[:-1]) * t.size(), hist)

    min = t.min()
    max = t.max()
    eps = np.finfo('f8').eps
    bins = np.array([min - 1, min - eps,
                     min, min + (max - min)/2, max,
                     max + eps, max + 1])
    hist, bins2 = t.histogram(bins)
    np.testing.assert_allclose(bins, bins2)
    assert hist[0] == 0
    assert hist[1] == 0
    assert hist[-2] == 0
    assert hist[-1] == 0
    assert hist.sum() == t.size()

    # range ignored when bins provided
    hist2, bins2 = t.histogram(bins, range=(-5, -3))
    np.testing.assert_allclose(hist, hist2)
    np.testing.assert_allclose(bins, bins2)
コード例 #5
0
def test_empty():
    t = TDigest()
    assert t.size() == 0
    assert len(t.centroids()) == 0
    assert np.isnan(t.min())
    assert np.isnan(t.max())
    assert np.isnan(t.quantile(0.5))
    assert np.isnan(t.cdf(0.5))
コード例 #6
0
def test_quantile_and_cdf_shape():
    t = TDigest()
    t.update(np.arange(5))

    assert isinstance(t.quantile(0.5), np.float64)
    assert isinstance(t.cdf(2), np.float64)

    res = t.quantile(())
    assert res.shape == (0,)
    res = t.cdf(())
    assert res.shape == (0,)

    qs = [np.array([0.5, 0.9]),
          np.array([[0.5, 0.9], [0, 1]]),
          np.linspace(0, 1, 100)[10:-10:2]]
    for q in qs:
        res = t.quantile(q)
        assert res.shape == q.shape
        res = t.cdf(q)
        assert res.shape == q.shape
コード例 #7
0
def test_distributions(data):
    t = TDigest()
    t.update(data)

    assert t.size() == len(data)
    assert t.min() == data.min()
    assert t.max() == data.max()

    check_valid_quantile_and_cdf(t)

    # *Quantile
    q = np.array([0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999])
    est = t.quantile(q)
    q_est = quantiles_to_q(data, est)
    np.testing.assert_allclose(q, q_est, atol=0.012, rtol=0)

    # *CDF
    x = q_to_x(data, q)
    q_est = t.cdf(x)
    np.testing.assert_allclose(q, q_est, atol=0.005)