def test_single(): t = TDigest() t.add(10) assert t.min() == 10 assert t.max() == 10 assert t.size() == 1 assert t.quantile(0) == 10 assert t.quantile(0.5) == 10 assert t.quantile(1) == 10 assert t.cdf(9) == 0 assert t.cdf(10) == 0.5 assert t.cdf(11) == 1
def test_quantile_and_cdf_non_numeric(): t = TDigest() t.update(np.arange(5)) with pytest.raises(TypeError): t.quantile('foo') with pytest.raises(TypeError): t.update(['foo']) with pytest.raises(TypeError): t.cdf('foo') with pytest.raises(TypeError): t.cdf(['foo'])
def test_merge(): t = TDigest() t2 = TDigest() t3 = TDigest() a = np.random.uniform(0, 1, N) b = np.random.uniform(2, 3, N) data = np.concatenate([a, b]) t2.update(a) t3.update(b) t2_centroids = t2.centroids() t.merge(t2, t3) assert t.min() == min(t2.min(), t3.min()) assert t.max() == max(t2.max(), t3.max()) assert t.size() == t2.size() + t3.size() # Check no mutation of args assert (t2.centroids() == t2_centroids).all() # *Quantile q = np.array([0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999]) est = t.quantile(q) q_est = quantiles_to_q(data, est) np.testing.assert_allclose(q, q_est, atol=0.012, rtol=0) # *CDF x = q_to_x(data, q) q_est = t.cdf(x) np.testing.assert_allclose(q, q_est, atol=0.005) with pytest.raises(TypeError): t.merge(t2, 'not a tdigest')
def test_histogram(): t = TDigest() data = np.random.normal(size=10000) t.update(data) hist, bins = t.histogram(100) assert len(hist) == 100 assert len(bins) == 101 c = t.cdf(bins) np.testing.assert_allclose((c[1:] - c[:-1]) * t.size(), hist) min = t.min() max = t.max() eps = np.finfo('f8').eps bins = np.array([min - 1, min - eps, min, min + (max - min)/2, max, max + eps, max + 1]) hist, bins2 = t.histogram(bins) np.testing.assert_allclose(bins, bins2) assert hist[0] == 0 assert hist[1] == 0 assert hist[-2] == 0 assert hist[-1] == 0 assert hist.sum() == t.size() # range ignored when bins provided hist2, bins2 = t.histogram(bins, range=(-5, -3)) np.testing.assert_allclose(hist, hist2) np.testing.assert_allclose(bins, bins2)
def test_empty(): t = TDigest() assert t.size() == 0 assert len(t.centroids()) == 0 assert np.isnan(t.min()) assert np.isnan(t.max()) assert np.isnan(t.quantile(0.5)) assert np.isnan(t.cdf(0.5))
def test_quantile_and_cdf_shape(): t = TDigest() t.update(np.arange(5)) assert isinstance(t.quantile(0.5), np.float64) assert isinstance(t.cdf(2), np.float64) res = t.quantile(()) assert res.shape == (0,) res = t.cdf(()) assert res.shape == (0,) qs = [np.array([0.5, 0.9]), np.array([[0.5, 0.9], [0, 1]]), np.linspace(0, 1, 100)[10:-10:2]] for q in qs: res = t.quantile(q) assert res.shape == q.shape res = t.cdf(q) assert res.shape == q.shape
def test_distributions(data): t = TDigest() t.update(data) assert t.size() == len(data) assert t.min() == data.min() assert t.max() == data.max() check_valid_quantile_and_cdf(t) # *Quantile q = np.array([0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999]) est = t.quantile(q) q_est = quantiles_to_q(data, est) np.testing.assert_allclose(q, q_est, atol=0.012, rtol=0) # *CDF x = q_to_x(data, q) q_est = t.cdf(x) np.testing.assert_allclose(q, q_est, atol=0.005)