def test_histogram(): t = TDigest() data = np.random.normal(size=10000) t.update(data) hist, bins = t.histogram(100) assert len(hist) == 100 assert len(bins) == 101 c = t.cdf(bins) np.testing.assert_allclose((c[1:] - c[:-1]) * t.size(), hist) min = t.min() max = t.max() eps = np.finfo('f8').eps bins = np.array([min - 1, min - eps, min, min + (max - min)/2, max, max + eps, max + 1]) hist, bins2 = t.histogram(bins) np.testing.assert_allclose(bins, bins2) assert hist[0] == 0 assert hist[1] == 0 assert hist[-2] == 0 assert hist[-1] == 0 assert hist.sum() == t.size() # range ignored when bins provided hist2, bins2 = t.histogram(bins, range=(-5, -3)) np.testing.assert_allclose(hist, hist2) np.testing.assert_allclose(bins, bins2)
def test_histogram_small_n(): t = TDigest() t.add(1) hist, bins = t.histogram(10) assert len(hist) == 10 assert len(bins) == 11 assert bins[0] == 0.5 assert bins[-1] == 1.5 assert hist.sum() == 1 t.add(2) hist, bins = t.histogram(10) assert hist.sum() == 2 assert bins[0] == 1 assert bins[-1] == 2 hist, bins = t.histogram(range=(-5, -3)) assert hist.sum() == 0
def test_histogram_empty(): t = TDigest() for b, r in [(5, None), (5, (-1, 1)), (np.arange(6), None)]: hist, bins = t.histogram(bins=b, range=r) assert len(hist) == 5 assert len(bins) == 6 if r is not None: assert bins[0] == r[0] assert bins[-1] == r[1] assert (hist == 0).all() assert (np.diff(bins) > 0).all()
def test_histogram_errors(): t = TDigest() t.update(np.random.uniform(1000)) for r in [('a', 'b'), 1]: with pytest.raises(TypeError): t.histogram(range=r) with pytest.raises(Exception): t.histogram(range=1) for r in [(np.nan, 1), (np.inf, 1), (1, np.nan), (1, np.inf)]: with pytest.raises(ValueError): t.histogram(range=r) with pytest.raises(ValueError): t.histogram(range=(1, 0)) for b in ['a', -1, np.arange(4).reshape((2, 2)), np.arange(0, 10, -1), np.array([np.nan, 0, 1])]: with pytest.raises(ValueError): t.histogram(bins=b)