def test_sum_first_half_of_first_bin(): # test sum at point between min and first bin value # https://github.com/carsonfarmer/streamhist/issues/13 h = StreamHist(maxbins=5) h.update((1, 2, 3, 4, 5, .5)) assert h.min() == 0.5 bin0 = h.bins[0] assert bin0.value == 0.75 assert bin0.count == 2 assert h.sum(h.min()) == 0 assert h.sum((h.min() + bin0.value) / 2) == (.5**2) * bin0.count / 2
def test_negative_densities(): points = 10000 h = StreamHist() data = make_normal(points) h.update(data) from numpy import linspace x = linspace(h.min(), h.max(), 100) assert all([h.pdf(t) >= 0. for t in x])
def test_min_max(): h = StreamHist() assert h.min() is None assert h.max() is None for _ in range(1000): h.update(rand_int(10)) assert h.min() == 0 assert h.max() == 10 h1 = StreamHist() h2 = StreamHist() for p in range(4): h1.update(p) h2.update(p + 2) merged = h1.merge(h2) assert merged.min() == 0 assert merged.max() == 5
def test_min_max(): h = StreamHist() assert h.min() is None assert h.max() is None for _ in range(1000): h.update(rand_int(10)) assert h.min() == 0 assert h.max() == 10 h1 = StreamHist() h2 = StreamHist() for p in range(4): h1.update(p) h2.update(p+2) merged = h1.merge(h2) assert merged.min() == 0 assert merged.max() == 5
def test_histogram_exact(): """A StreamHist which is not at capacity matches numpy statistics""" max_bins = 50 points = [random.expovariate(1 / 5) for _ in range(max_bins)] h = StreamHist(max_bins) h.update(points) q = [i / 100 for i in range(101)] import numpy as np assert h.quantiles(*q) == approx(np.quantile(points, q)) assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points)) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == max_bins
def test_histogram_approx(max_bins, num_points, expected_error): """Test accuracy of StreamHist over capacity, especially quantiles.""" points = [random.expovariate(1 / 5) for _ in range(num_points)] h = StreamHist(max_bins) h.update(points) import numpy as np q = [i / 100 for i in range(101)] err_sum = 0 # avg percent error across samples for p, b, b_np, b_np_min, b_np_max in zip( q, h.quantiles(*q), np.quantile(points, q), np.quantile(points, [0] * 7 + q), np.quantile(points, q[7:] + [1] * 7)): err_denom = b_np_max - b_np_min err_sum += abs(b - b_np) / err_denom assert err_sum <= expected_error assert h.mean() == approx(np.mean(points)) assert h.var() == approx(np.var(points), rel=.05) assert h.min() == min(points) assert h.max() == max(points) assert h.count() == num_points