def test_negative_densities(): points = 10000 h = StreamHist() data = make_normal(points) h.update(data) from numpy import linspace x = linspace(h.min(), h.max(), 100) assert all([h.pdf(t) >= 0. for t in x])
def test_min_max(): h = StreamHist() assert h.min() is None assert h.max() is None for _ in range(1000): h.update(rand_int(10)) assert h.min() == 0 assert h.max() == 10 h1 = StreamHist() h2 = StreamHist() for p in range(4): h1.update(p) h2.update(p + 2) merged = h1.merge(h2) assert merged.min() == 0 assert merged.max() == 5
def test_min_max(): h = StreamHist() assert h.min() is None assert h.max() is None for _ in range(1000): h.update(rand_int(10)) assert h.min() == 0 assert h.max() == 10 h1 = StreamHist() h2 = StreamHist() for p in range(4): h1.update(p) h2.update(p+2) merged = h1.merge(h2) assert merged.min() == 0 assert merged.max() == 5
def test_histogram_exact(): """A StreamHist which is not at capacity matches numpy statistics""" max_bins = 50 points = [random.expovariate(1 / 5) for _ in range(max_bins)] h = StreamHist(max_bins) h.update(points) q = [i / 100 for i in range(101)] import numpy as np assert h.quantiles(*q) == approx(np.quantile(points, q)) assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points)) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == max_bins
def test_histogram_approx(max_bins, num_points, expected_error): """Test accuracy of StreamHist over capacity, especially quantiles.""" points = [random.expovariate(1 / 5) for _ in range(num_points)] h = StreamHist(max_bins) h.update(points) import numpy as np q = [i / 100 for i in range(101)] err_sum = 0 # avg percent error across samples for p, b, b_np, b_np_min, b_np_max in zip( q, h.quantiles(*q), np.quantile(points, q), np.quantile(points, [0] * 7 + q), np.quantile(points, q[7:] + [1] * 7)): err_denom = b_np_max - b_np_min err_sum += abs(b - b_np) / err_denom assert err_sum <= expected_error assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points), rel=.05) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == num_points