def example_centroids(): return RBTree([ (-1.1, Centroid(-1.1, 1)), (-0.5, Centroid(-0.5, 1)), (0.1, Centroid(0.1, 1)), (1.5, Centroid(1.5, 1)), ])
def test_add_centroid_if_key_already_present(self, empty_tdigest, example_positive_centroids): empty_tdigest.C = example_positive_centroids new_centroid = Centroid(1.1, 5) empty_tdigest._add_centroid(new_centroid) assert (empty_tdigest.C - RBTree([ (0.5, Centroid(0.5, 1)), (1.1, Centroid(1.1, 1 + 5)), (1.5, Centroid(1.5, 1)), ])).is_empty()
def test_adding_centroid_with_exisiting_key_does_not_break_synchronicity( self, empty_tdigest, example_centroids): td = empty_tdigest td.C = example_centroids assert -1.1 in td.C td._add_centroid(Centroid(-1.1, 10)) assert all([k == centroid.mean for k, centroid in td.C.items()])
def test_update(self): c = Centroid(0, 0) value, weight = 1, 1 c.update(value, weight) assert c.count == 1 assert c.mean == 1 value, weight = 2, 1 c.update(value, weight) assert c.count == 2 assert c.mean == (2 + 1.) / 2. value, weight = 1, 2 c.update(value, weight) assert c.count == 4 assert c.mean == 1 * 1 / 4. + 2 * 1 / 4. + 1 * 2 / 4.
def tdigest_from_centroids(seq): """Create a TDigest from a list of centroid means and weights tuples Parameters ---------- seq : iterable List of tuples of length 2 that contain the centroid mean and weight from a TDigest. """ tdigest = TDigest() for mean, weight in seq: tdigest.C.insert(mean, Centroid(mean, weight)) tdigest.n += weight return tdigest
def test_add_centroid(self, empty_tdigest, example_positive_centroids): empty_tdigest.C = example_positive_centroids new_centroid = Centroid(0.9, 1) empty_tdigest._add_centroid(new_centroid) assert (empty_tdigest.C - RBTree([ (0.5, Centroid(0.5, 1)), (new_centroid.mean, new_centroid), (1.1, Centroid(1.1, 1)), (1.5, Centroid(1.5, 1)), ])).is_empty() last_centroid = Centroid(10., 1) empty_tdigest._add_centroid(last_centroid) assert (empty_tdigest.C - RBTree([ (0.5, Centroid(0.5, 1)), (new_centroid.mean, new_centroid), (1.1, Centroid(1.1, 1)), (1.5, Centroid(1.5, 1)), (last_centroid.mean, last_centroid), ])).is_empty()
def example_positive_centroids(): return RBTree([ (0.5, Centroid(0.5, 1)), (1.1, Centroid(1.1, 1)), (1.5, Centroid(1.5, 1)), ])