예제 #1
0
def test_var():
    assert StreamHist().update(1).var() is None
    h = StreamHist()
    for p in [1, 1, 2, 3, 4, 5, 6, 6]:
        h.update(p)
    assert h.var() == 3.75
    h = StreamHist()
    for p in make_normal(10000):
        h.update(p)
    assert about(h.var(), 1, 0.05)
예제 #2
0
def test_var():
    assert StreamHist().update(1).var() is None
    h = StreamHist()
    for p in [1, 1, 2, 3, 4, 5, 6, 6]:
        h.update(p)
    assert h.var() == 3.75
    h = StreamHist()
    for p in make_normal(10000):
        h.update(p)
    assert about(h.var(), 1, 0.05)
예제 #3
0
def test_freeze():
    points = 100000
    h = StreamHist(freeze=500)
    for p in make_normal(points):
        h.update(p)
    assert about(h.sum(0), points / 2.0, points / 50.0)
    assert about(h.median(), 0, 0.05)
    assert about(h.mean(), 0, 0.05)
    assert about(h.var(), 1, 0.05)
예제 #4
0
def test_freeze():
    points = 100000
    h = StreamHist(freeze=500)
    for p in make_normal(points):
        h.update(p)
    assert about(h.sum(0), points/2.0, points/50.0)
    assert about(h.median(), 0, 0.05)
    assert about(h.mean(), 0, 0.05)
    assert about(h.var(), 1, 0.05)
예제 #5
0
def test_histogram_exact():
    """A StreamHist which is not at capacity matches numpy statistics"""
    max_bins = 50
    points = [random.expovariate(1 / 5) for _ in range(max_bins)]
    h = StreamHist(max_bins)
    h.update(points)

    q = [i / 100 for i in range(101)]
    import numpy as np
    assert h.quantiles(*q) == approx(np.quantile(points, q))
    assert h.mean() == approx(np.mean(points))
    assert h.var() == approx(np.var(points))
    assert h.min() == min(points)
    assert h.max() == max(points)
    assert h.count() == max_bins
예제 #6
0
def test_histogram_approx(max_bins, num_points, expected_error):
    """Test accuracy of StreamHist over capacity, especially quantiles."""
    points = [random.expovariate(1 / 5) for _ in range(num_points)]
    h = StreamHist(max_bins)
    h.update(points)

    import numpy as np
    q = [i / 100 for i in range(101)]
    err_sum = 0  # avg percent error across samples
    for p, b, b_np, b_np_min, b_np_max in zip(
            q, h.quantiles(*q), np.quantile(points, q),
            np.quantile(points, [0] * 7 + q),
            np.quantile(points, q[7:] + [1] * 7)):
        err_denom = b_np_max - b_np_min
        err_sum += abs(b - b_np) / err_denom
    assert err_sum <= expected_error
    assert h.mean() == approx(np.mean(points))
    assert h.var() == approx(np.var(points), rel=.05)
    assert h.min() == min(points)
    assert h.max() == max(points)
    assert h.count() == num_points