def test_iterable(): h = StreamHist().update([p for p in range(4)]) assert h.total == 4 nested = [[1, 2, 3], 4, [5, 6], 7, 8, [9], [10, 11, 12], 13, 14, 15] h = StreamHist().update(nested) assert h.total == 15 assert h.mean() == 8
def test_freeze(): points = 100000 h = StreamHist(freeze=500) for p in make_normal(points): h.update(p) assert about(h.sum(0), points / 2.0, points / 50.0) assert about(h.median(), 0, 0.05) assert about(h.mean(), 0, 0.05) assert about(h.var(), 1, 0.05)
def test_freeze(): points = 100000 h = StreamHist(freeze=500) for p in make_normal(points): h.update(p) assert about(h.sum(0), points/2.0, points/50.0) assert about(h.median(), 0, 0.05) assert about(h.mean(), 0, 0.05) assert about(h.var(), 1, 0.05)
def test_median_mean(): points = 10000 h = StreamHist() for p in make_uniform(points): h.update(p) assert about(h.median(), 0.5, 0.05) h = StreamHist() for p in make_normal(points): h.update(p) assert about(h.median(), 0, 0.05) assert about(h.mean(), 0, 0.05)
def test_histogram_exact(): """A StreamHist which is not at capacity matches numpy statistics""" max_bins = 50 points = [random.expovariate(1 / 5) for _ in range(max_bins)] h = StreamHist(max_bins) h.update(points) q = [i / 100 for i in range(101)] import numpy as np assert h.quantiles(*q) == approx(np.quantile(points, q)) assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points)) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == max_bins
def test_histogram_approx(max_bins, num_points, expected_error): """Test accuracy of StreamHist over capacity, especially quantiles.""" points = [random.expovariate(1 / 5) for _ in range(num_points)] h = StreamHist(max_bins) h.update(points) import numpy as np q = [i / 100 for i in range(101)] err_sum = 0 # avg percent error across samples for p, b, b_np, b_np_min, b_np_max in zip( q, h.quantiles(*q), np.quantile(points, q), np.quantile(points, [0] * 7 + q), np.quantile(points, q[7:] + [1] * 7)): err_denom = b_np_max - b_np_min err_sum += abs(b - b_np) / err_denom assert err_sum <= expected_error assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points), rel=.05) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == num_points
def test_mean(): points = 1001 h = StreamHist() for p in range(points): h.update(p) assert h.mean() == (points - 1) / 2.0
def test_mean(): points = 1001 h = StreamHist() for p in range(points): h.update(p) assert h.mean() == (points-1)/2.0