예제 #1
0
def test_regression():
    random.seed(1700)
    data = make_normal(10000)
    hist1 = StreamHist(maxbins=5)
    hist2 = StreamHist(maxbins=5, weighted=True)
    # hist3 = StreamHist(maxbins=5, weighted=True)
    hist4 = StreamHist(maxbins=5)

    hist1.update(data)
    hist2.update(data)
    hist3 = hist2 + hist1
    hist4.update(range(10000))

    reg = [{'count': 1176.0, 'mean': -1.622498097884402},
           {'count': 5290.0, 'mean': -0.3390892100898127},
           {'count': 3497.0, 'mean': 1.0310297400593385},
           {'count': 35.0, 'mean': 2.2157182954841126},
           {'count': 2.0, 'mean': 3.563619987633774}]
    assert hist1.to_dict()["bins"] == reg

    reg = [-1.022649473089556, -0.5279748744244142, 0.1476067074922296,
           0.9815338358189885, 1.6627248917927795]
    assert hist1.quantiles(0.1, 0.25, 0.5, 0.75, 0.9) == reg

    reg = [{'count': 579.0, 'mean': -2.017257931684027},
           {'count': 1902.0, 'mean': -1.0677091300958608},
           {'count': 3061.0, 'mean': -0.24660751313691653},
           {'count': 2986.0, 'mean': 0.5523120572161528},
           {'count': 1472.0, 'mean': 1.557598912751095}]
    assert hist2.to_dict()["bins"] == reg

    reg = [-1.1941285587341846, -0.6041467139342105, 0.08840996549170466,
           0.8247014091807423, 1.557598912751095]
    assert hist2.quantiles(0.1, 0.25, 0.5, 0.75, 0.9) == reg

    reg = [{'count': 1755.0, 'mean': -1.7527351028815432},
           {'count': 1902.0, 'mean': -1.0677091300958608},
           {'count': 8351.0, 'mean': -0.3051906980106826},
           {'count': 6483.0, 'mean': 0.8105375295133331},
           {'count': 1509.0, 'mean': 1.5755221868037264}]
    assert hist3.to_dict()["bins"] == reg

    reg = [-1.0074328972882012, -0.5037558708214145, 0.11958766584785563,
           0.8874923692642509, 1.432517386448461]
    assert hist3.quantiles(0.1, 0.25, 0.5, 0.75, 0.9) == reg

    reg = [{'count': 1339.0, 'mean': 669.0},
           {'count': 2673.0, 'mean': 2675.0},
           {'count': 1338.0, 'mean': 4680.5},
           {'count': 2672.0, 'mean': 6685.5},
           {'count': 1978.0, 'mean': 9010.5}]
    assert hist4.to_dict()["bins"] == reg

    reg = [1830.581598358843, 3063.70150218845, 5831.110283907479,
           8084.851093080222, 9010.5]
    assert hist4.quantiles(0.1, 0.25, 0.5, 0.75, 0.9) == reg
예제 #2
0
def test_update_vs_insert():
    points = 1000
    data = make_normal(points)
    h1 = StreamHist(maxbins=50)
    h1.update(data)
    h2 = StreamHist(maxbins=50)
    for i, p in enumerate(data):
        h2.insert(p, 1)
        h2.trim()
    h2.trim()

    assert h1.to_dict() == h2.to_dict()
예제 #3
0
def test_update_vs_insert():
    points = 1000
    data = make_normal(points)
    h1 = StreamHist(maxbins=50)
    h1.update(data)
    h2 = StreamHist(maxbins=50)
    for i, p in enumerate(data):
        h2.insert(p, 1)
        h2.trim()
    h2.trim()

    assert h1.to_dict() == h2.to_dict()
예제 #4
0
def test_warm_start_with_history():
    normal_data = np.random.normal(0, 10, 10)
    h1 = StreamHist(maxbins=8)
    h1.update(normal_data)
    d = h1.to_dict()
    h2 = StreamHist.from_dict(d)
    assert str(h2) == str(h1)
예제 #5
0
def test_copy():
    h1 = StreamHist()
    h2 = h1.copy()
    assert h1.bins == h2.bins
    h1.update(make_normal(1000))
    assert h1.bins != h2.bins
    h2 = h1.copy()
    assert h1.bins == h2.bins
    h1 = StreamHist().update([p for p in range(4)])
    h2 = h1.copy()
    assert h1.to_dict() == h2.to_dict()
예제 #6
0
def test_copy():
    h1 = StreamHist()
    h2 = h1.copy()
    assert h1.bins == h2.bins
    h1.update(make_normal(1000))
    assert h1.bins != h2.bins
    h2 = h1.copy()
    assert h1.bins == h2.bins
    h1 = StreamHist().update([p for p in range(4)])
    h2 = h1.copy()
    assert h1.to_dict() == h2.to_dict()
예제 #7
0
def test_iris_regression():
    sepal_length = [5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8,
                    4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1,
                    4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0,
                    5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6,
                    5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2,
                    5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1,
                    6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0,
                    5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7,
                    5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3,
                    6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0,
                    6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9,
                    6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8,
                    6.7, 6.7, 6.3, 6.5, 6.2, 5.9]

    h = StreamHist(maxbins=32)
    h.update(sepal_length)

    b = [{'count': 1, 'mean': 4.3}, {'count': 4, 'mean': 4.425000000000001},
         {'count': 4, 'mean': 4.6}, {'count': 7, 'mean': 4.771428571428571},
         {'count': 6, 'mean': 4.8999999999999995},
         {'count': 10, 'mean': 5.0}, {'count': 9, 'mean': 5.1},
         {'count': 4, 'mean': 5.2}, {'count': 1, 'mean': 5.3},
         {'count': 6, 'mean': 5.3999999999999995},
         {'count': 7, 'mean': 5.5},
         {'count': 6, 'mean': 5.6000000000000005},
         {'count': 15, 'mean': 5.746666666666667},
         {'count': 3, 'mean': 5.900000000000001},
         {'count': 6, 'mean': 6.0},
         {'count': 6, 'mean': 6.1000000000000005},
         {'count': 4, 'mean': 6.2}, {'count': 9, 'mean': 6.299999999999999},
         {'count': 7, 'mean': 6.3999999999999995},
         {'count': 5, 'mean': 6.5}, {'count': 2, 'mean': 6.6},
         {'count': 8, 'mean': 6.700000000000001}, {'count': 3, 'mean': 6.8},
         {'count': 4, 'mean': 6.9}, {'count': 1, 'mean': 7.0},
         {'count': 1, 'mean': 7.1}, {'count': 3, 'mean': 7.2},
         {'count': 1, 'mean': 7.3}, {'count': 1, 'mean': 7.4},
         {'count': 1, 'mean': 7.6}, {'count': 4, 'mean': 7.7},
         {'count': 1, 'mean': 7.9}]
    assert h.to_dict()["bins"] == b
예제 #8
0
def test_round_trip():
    # Tests to_dict and from_dict
    h = StreamHist().update([1, 1, 4])
    assert h.to_dict() == h.from_dict(h.to_dict()).to_dict()
예제 #9
0
def test_round_trip():
    # Tests to_dict and from_dict
    h = StreamHist().update([1, 1, 4])
    assert h.to_dict() == h.from_dict(h.to_dict()).to_dict()