コード例 #1
0
def test_update():
    vals = [1.0, 2.0, 3]
    tracker = VarianceTracker()
    for v in vals:
        tracker.update(v)

    assert tracker.mean == pytest.approx(2.0, 1e-6)
    assert tracker.count == len(vals)
    # Note: this is a population variance estimate (normalized by N-1)
    assert tracker.variance() == pytest.approx(1.0, 1e-6)
コード例 #2
0
def test_merge_simple_trackers():
    x1 = VarianceTracker()
    x1.update(1.0)

    x2 = VarianceTracker()
    x2.update(2.0)
    x2.update(3.0)

    merged = x1.merge(x2)
    assert merged.mean == pytest.approx(2.0, 1e-4)
    assert merged.variance() == pytest.approx(1.0, 1e-4)
    assert merged.count == 3
コード例 #3
0
 def __init__(
     self,
     variance: VarianceTracker = None,
     floats: FloatTracker = None,
     ints: IntTracker = None,
     theta_sketch: ThetaSketch = None,
     histogram: datasketches.kll_floats_sketch = None,
     frequent_numbers: dsketch.FrequentNumbersSketch = None,
 ):
     # Our own trackers
     if variance is None:
         variance = VarianceTracker()
     if floats is None:
         floats = FloatTracker()
     if ints is None:
         ints = IntTracker()
     if theta_sketch is None:
         theta_sketch = ThetaSketch()
     if histogram is None:
         histogram = datasketches.kll_floats_sketch(DEFAULT_HIST_K)
     if frequent_numbers is None:
         frequent_numbers = dsketch.FrequentNumbersSketch()
     self.variance = variance
     self.floats = floats
     self.ints = ints
     self.theta_sketch = theta_sketch
     self.histogram = histogram
     self.frequent_numbers = frequent_numbers
コード例 #4
0
    def from_protobuf(message: NumbersMessage):
        """
        Load from a protobuf message

        Returns
        -------
        number_tracker : NumberTracker
        """
        theta = None
        if message.theta is not None and len(message.theta) > 0:
            theta = ThetaSketch.deserialize(message.theta)
        elif message.compact_theta is not None and len(message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)

        opts = dict(
            theta_sketch=theta,
            variance=VarianceTracker.from_protobuf(message.variance),
            histogram=dsketch.deserialize_kll_floats_sketch(message.histogram),
            frequent_numbers=dsketch.FrequentNumbersSketch.from_protobuf(
                message.frequent_numbers
            ),
        )
        if message.HasField("doubles"):
            opts["floats"] = FloatTracker.from_protobuf(message.doubles)
        if message.HasField("longs"):
            opts["ints"] = IntTracker.from_protobuf(message.longs)
        return NumberTracker(**opts)
コード例 #5
0
    def from_protobuf(message: NumbersMessage):
        """
        Load from a protobuf message

        Returns
        -------
        number_tracker : NumberTracker
        """
        theta = None
        if message.compact_theta is not None and len(
                message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)
        elif message.theta is not None and len(message.theta) > 0:
            logger.warning(
                "Possible missing data. Non-compact theta sketches are no longer supported"
            )

        opts = dict(
            theta_sketch=theta,
            variance=VarianceTracker.from_protobuf(message.variance),
            histogram=dsketch.deserialize_kll_floats_sketch(message.histogram),
        )
        if message.HasField("doubles"):
            opts["floats"] = FloatTracker.from_protobuf(message.doubles)
        if message.HasField("longs"):
            opts["ints"] = IntTracker.from_protobuf(message.longs)
        return NumberTracker(**opts)
コード例 #6
0
def test_empty_merge_full():
    x1 = VarianceTracker()
    vals = list(range(10))
    for v in vals:
        x1.update(v)

    var = x1.variance()
    mean = x1.mean
    count = x1.count

    x2 = VarianceTracker().merge(x1)

    assert var == x2.variance()
    assert mean == x2.mean
    assert count == x2.count

    assert x1.variance() == x2.variance()
    assert x1.mean == x2.mean
    assert x1.count == x2.count
コード例 #7
0
def test_protobuf_roundtrip_matches():
    x1 = VarianceTracker()
    for i in range(10):
        x1.update(i)

    proto = x1.to_protobuf()
    roundtrip = VarianceTracker.from_protobuf(proto)

    assert x1.count == roundtrip.count
    assert x1.variance() == roundtrip.variance()
    assert x1.mean == roundtrip.mean
コード例 #8
0
def test_merge_bigger():
    x1 = VarianceTracker()
    for i in range(10):
        x1.update(i)

    assert x1.variance() == pytest.approx(9.1667, 1e-4)
    assert x1.count == 10
    assert x1.mean == pytest.approx(4.5, 1e-7)

    merged = x1.merge(x1.copy())
    assert merged.variance() == pytest.approx(8.684, 1e-4)
    assert merged.count == 20
    assert merged.mean == pytest.approx(4.5, 1e-7)
コード例 #9
0
def test_copy():
    x1 = VarianceTracker()
    for i in range(10):
        x1.update(i)

    var = x1.variance()
    count = x1.count
    mean = x1.mean

    x2 = x1.copy()
    assert x2.variance() == var
    assert x2.count == count
    assert x2.mean == mean

    for i in range(3):
        x1.update(i)

    assert x1.variance() != var
    assert x1.count == count + 3
    assert x1.mean != mean

    assert x2.variance() == var
    assert x2.mean == mean
    assert x2.count == count
コード例 #10
0
def test_single_value():
    tracker = VarianceTracker()
    tracker.update(1.0)

    assert tracker.count == 1
    assert tracker.variance() == 0