def test_ints(self): t = TDigest() t.batch_update([1, 2, 3]) assert abs(t.percentile(50) - 2) < 0.0001 t = TDigest() x = [1, 2, 2, 2, 2, 2, 2, 2, 3] t.batch_update(x) assert t.percentile(50) == 2 assert sum([c.count for c in t.C.values()]) == len(x)
def test_trimmed_mean(self, percentile_range, data_size): p1 = percentile_range[0] p2 = percentile_range[1] t = TDigest() x = random.random(size=data_size) t.batch_update(x) tm_actual = t.trimmed_mean(p1, p2) tm_expected = x[bitwise_and(x >= percentile(x, p1), x <= percentile(x, p2))].mean() testing.assert_allclose(tm_actual, tm_expected, rtol= 0.01, atol= 0.01)
def test_trimmed_mean(self, percentile_range, data_size): p1 = percentile_range[0] p2 = percentile_range[1] t = TDigest() x = random.random(size=data_size) t.batch_update(x) tm_actual = t.trimmed_mean(p1, p2) tm_expected = x[bitwise_and(x >= percentile(x, p1), x <= percentile(x, p2))].mean() testing.assert_allclose(tm_actual, tm_expected, rtol=0.01, atol=0.01)
def test_uniform(self): t = TDigest() x = random.random(size=10000) t.batch_update(x) assert abs(t.percentile(50) - 0.5) < 0.01 assert abs(t.percentile(10) - .1) < 0.01 assert abs(t.percentile(90) - 0.9) < 0.01 assert abs(t.percentile(1) - 0.01) < 0.005 assert abs(t.percentile(99) - 0.99) < 0.005 assert abs(t.percentile(0.1) - 0.001) < 0.001 assert abs(t.percentile(99.9) - 0.999) < 0.001
def test_extreme_percentiles_return_min_and_max(self, empty_tdigest): t = TDigest() data = random.randn(10000) t.batch_update(data) assert t.percentile(100.) == data.max() assert t.percentile(0) == data.min() assert t.percentile(0.1) > data.min() assert t.percentile(0.999) < data.max()
def test_uniform(self): t = TDigest() x = random.random(size=10000) t.batch_update(x) assert abs(t.percentile(50) - 0.5) < 0.02 assert abs(t.percentile(10) - .1) < 0.01 assert abs(t.percentile(90) - 0.9) < 0.01 assert abs(t.percentile(1) - 0.01) < 0.005 assert abs(t.percentile(99) - 0.99) < 0.005 assert abs(t.percentile(0.1) - 0.001) < 0.001 assert abs(t.percentile(99.9) - 0.999) < 0.001
def test_trimmed_mean_corner_cases(self): td = TDigest() mean = td.trimmed_mean(0, 100) assert mean == 0 td.update(1) mean = td.trimmed_mean(0, 100) assert mean == 1 td.update(1000) mean = td.trimmed_mean(0, 100) assert mean == 500.5
def test_data_comes_in_sorted_does_not_blow_up(self, empty_tdigest): t = TDigest() for x in range(10000): t.update(x, 1) assert len(t) < 5000 t = TDigest() t.batch_update(range(10000)) assert len(t) < 1000
def test_ints(self): t = TDigest() t.batch_update([1,2,3]) assert t.percentile(0.5) == 2 t = TDigest() x = [1,2,2,2,2,2,2,2,3] t.batch_update(x) assert t.percentile(0.5) == 2 assert sum([c.count for c in t.C.values()]) == len(x)
def test_uniform(self): T1 = TDigest() x = random.random(size=10000) T1.batch_update(x) assert abs(T1.percentile(.5) - 0.5) < 0.02 assert abs(T1.percentile(.1) - .1) < 0.01 assert abs(T1.percentile(.9) - 0.9) < 0.01 assert abs(T1.percentile(.01) - 0.01) < 0.005 assert abs(T1.percentile(.99) - 0.99) < 0.005 assert abs(T1.percentile(.001) - 0.001) < 0.001 assert abs(T1.percentile(.999) - 0.999) < 0.001
def test_trimmed_mean_negative(self): td = TDigest() for i in range(100): td.update(random.random()) for i in range(10): td.update(i*100) mean = td.trimmed_mean(1, 99) assert mean >= 0
def test_negative_extreme_percentile_is_still_positive( self, empty_tdigest): # Test https://github.com/CamDavidsonPilon/tdigest/issues/16 t = TDigest() t.batch_update([62.0, 202.0, 1415.0, 1433.0]) print(t.percentile(26)) assert t.percentile(26) > 0
def test_data_comes_in_sorted_does_not_blow_up(self, empty_tdigest): t = TDigest() for x in range(10000): t.update(x,1) assert len(t) < 5000 t = TDigest() t.batch_update(range(10000)) assert len(t) < 1000
def tdigest_from_centroids(seq): """Create a TDigest from a list of centroid means and weights tuples Parameters ---------- seq : iterable List of tuples of length 2 that contain the centroid mean and weight from a TDigest. """ tdigest = TDigest() for mean, weight in seq: tdigest.C.insert(mean, Centroid(mean, weight)) tdigest.n += weight return tdigest
def test_negative_extreme_percentile_is_still_positive(self, empty_tdigest): # Test https://github.com/CamDavidsonPilon/tdigest/issues/16 t = TDigest() t.batch_update([62.0, 202.0, 1415.0, 1433.0]) assert t.percentile(0.25) > 0
def test_percentile_at_border_returns_an_intermediate_value(self, empty_tdigest): data = [62.0, 202.0, 1415.0, 1433.0] t = TDigest() t.batch_update(data) assert t.percentile(25) == 132.0
def test_extreme_percentiles_return_min_and_max(self, empty_tdigest): t = TDigest() data = random.randn(100000) t.batch_update(data) assert t.percentile(0) == data.min() assert t.percentile(1.) == data.max()
def empty_tdigest(): return TDigest()