def example_centroids():
    return RBTree([
        (-1.1, Centroid(-1.1, 1)),
        (-0.5, Centroid(-0.5, 1)),
        (0.1, Centroid(0.1, 1)),
        (1.5, Centroid(1.5, 1)),
    ])
 def test_add_centroid_if_key_already_present(self, empty_tdigest, example_positive_centroids):
     empty_tdigest.C = example_positive_centroids
     new_centroid = Centroid(1.1, 5)
     empty_tdigest._add_centroid(new_centroid)
     assert (empty_tdigest.C - RBTree([
         (0.5, Centroid(0.5, 1)),
         (1.1, Centroid(1.1, 1 + 5)),
         (1.5, Centroid(1.5, 1)),
     ])).is_empty()
Exemple #3
0
 def test_adding_centroid_with_exisiting_key_does_not_break_synchronicity(
         self, empty_tdigest, example_centroids):
     td = empty_tdigest
     td.C = example_centroids
     assert -1.1 in td.C
     td._add_centroid(Centroid(-1.1, 10))
     assert all([k == centroid.mean for k, centroid in td.C.items()])
    def test_update(self):
        c = Centroid(0, 0)
        value, weight = 1, 1
        c.update(value, weight)
        assert c.count == 1
        assert c.mean == 1

        value, weight = 2, 1
        c.update(value, weight)
        assert c.count == 2
        assert c.mean == (2 + 1.) / 2.

        value, weight = 1, 2
        c.update(value, weight)
        assert c.count == 4
        assert c.mean == 1 * 1 / 4. + 2 * 1 / 4. + 1 * 2 / 4.
Exemple #5
0
    def test_update(self):
        c = Centroid(0, 0)
        value, weight = 1, 1
        c.update(value, weight)
        assert c.count == 1
        assert c.mean == 1

        value, weight = 2, 1
        c.update(value, weight)
        assert c.count == 2
        assert c.mean == (2 + 1.) / 2.

        value, weight = 1, 2
        c.update(value, weight)
        assert c.count == 4
        assert c.mean == 1 * 1 / 4. + 2 * 1 / 4. + 1 * 2 / 4.
Exemple #6
0
def tdigest_from_centroids(seq):
    """Create a TDigest from a list of centroid means and weights tuples

    Parameters
    ----------

    seq : iterable
        List of tuples of length 2 that contain the centroid mean and weight
        from a TDigest.
    """

    tdigest = TDigest()

    for mean, weight in seq:
        tdigest.C.insert(mean, Centroid(mean, weight))
        tdigest.n += weight

    return tdigest
    def test_add_centroid(self, empty_tdigest, example_positive_centroids):
        empty_tdigest.C = example_positive_centroids
        new_centroid = Centroid(0.9, 1)
        empty_tdigest._add_centroid(new_centroid)
        assert (empty_tdigest.C - RBTree([
            (0.5, Centroid(0.5, 1)),
            (new_centroid.mean, new_centroid),
            (1.1, Centroid(1.1, 1)),
            (1.5, Centroid(1.5, 1)),
        ])).is_empty()

        last_centroid = Centroid(10., 1)
        empty_tdigest._add_centroid(last_centroid)
        assert (empty_tdigest.C - RBTree([
            (0.5, Centroid(0.5, 1)),
            (new_centroid.mean, new_centroid),
            (1.1, Centroid(1.1, 1)),
            (1.5, Centroid(1.5, 1)),
            (last_centroid.mean, last_centroid),
        ])).is_empty()
def example_positive_centroids():
    return RBTree([
        (0.5, Centroid(0.5, 1)),
        (1.1, Centroid(1.1, 1)),
        (1.5, Centroid(1.5, 1)),
    ])