def test_var(): assert StreamHist().update(1).var() is None h = StreamHist() for p in [1, 1, 2, 3, 4, 5, 6, 6]: h.update(p) assert h.var() == 3.75 h = StreamHist() for p in make_normal(10000): h.update(p) assert about(h.var(), 1, 0.05)
def test_freeze(): points = 100000 h = StreamHist(freeze=500) for p in make_normal(points): h.update(p) assert about(h.sum(0), points / 2.0, points / 50.0) assert about(h.median(), 0, 0.05) assert about(h.mean(), 0, 0.05) assert about(h.var(), 1, 0.05)
def test_freeze(): points = 100000 h = StreamHist(freeze=500) for p in make_normal(points): h.update(p) assert about(h.sum(0), points/2.0, points/50.0) assert about(h.median(), 0, 0.05) assert about(h.mean(), 0, 0.05) assert about(h.var(), 1, 0.05)
def test_histogram_exact(): """A StreamHist which is not at capacity matches numpy statistics""" max_bins = 50 points = [random.expovariate(1 / 5) for _ in range(max_bins)] h = StreamHist(max_bins) h.update(points) q = [i / 100 for i in range(101)] import numpy as np assert h.quantiles(*q) == approx(np.quantile(points, q)) assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points)) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == max_bins
def test_histogram_approx(max_bins, num_points, expected_error): """Test accuracy of StreamHist over capacity, especially quantiles.""" points = [random.expovariate(1 / 5) for _ in range(num_points)] h = StreamHist(max_bins) h.update(points) import numpy as np q = [i / 100 for i in range(101)] err_sum = 0 # avg percent error across samples for p, b, b_np, b_np_min, b_np_max in zip( q, h.quantiles(*q), np.quantile(points, q), np.quantile(points, [0] * 7 + q), np.quantile(points, q[7:] + [1] * 7)): err_denom = b_np_max - b_np_min err_sum += abs(b - b_np) / err_denom assert err_sum <= expected_error assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points), rel=.05) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == num_points