Exemple #1
0
    def __init__(self, on: Optional[KeyFn] = None, ignore_nulls: bool = True):
        self._set_key_fn(on)
        on_fn = _to_on_fn(on)

        super().__init__(
            init=_null_wrap_init(lambda k: 0),
            accumulate=_null_wrap_accumulate(ignore_nulls, on_fn, lambda a, r: a + r),
            merge=_null_wrap_merge(ignore_nulls, lambda a1, a2: a1 + a2),
            finalize=_null_wrap_finalize(lambda a: a),
            name=(f"sum({str(on)})"),
        )
Exemple #2
0
    def __init__(self, on: Optional[KeyFn] = None, ignore_nulls: bool = True):
        self._set_key_fn(on)
        on_fn = _to_on_fn(on)

        super().__init__(
            init=_null_wrap_init(lambda k: float("-inf")),
            accumulate=_null_wrap_accumulate(ignore_nulls, on_fn, max),
            merge=_null_wrap_merge(ignore_nulls, max),
            finalize=_null_wrap_finalize(lambda a: a),
            name=(f"max({str(on)})"),
        )
Exemple #3
0
    def __init__(
        self,
        on: Optional[KeyFn] = None,
        ddof: int = 1,
        ignore_nulls: bool = True,
    ):
        self._set_key_fn(on)
        on_fn = _to_on_fn(on)

        def accumulate(a: List[float], r: float):
            # Accumulates the current count, the current mean, and the sum of
            # squared differences from the current mean (M2).
            M2, mean, count = a

            count += 1
            delta = r - mean
            mean += delta / count
            delta2 = r - mean
            M2 += delta * delta2
            return [M2, mean, count]

        def merge(a: List[float], b: List[float]):
            # Merges two accumulations into one.
            # See
            # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
            M2_a, mean_a, count_a = a
            M2_b, mean_b, count_b = b
            delta = mean_b - mean_a
            count = count_a + count_b
            # NOTE: We use this mean calculation since it's more numerically
            # stable than mean_a + delta * count_b / count, which actually
            # deviates from Pandas in the ~15th decimal place and causes our
            # exact comparison tests to fail.
            mean = (mean_a * count_a + mean_b * count_b) / count
            # Update the sum of squared differences.
            M2 = M2_a + M2_b + (delta ** 2) * count_a * count_b / count
            return [M2, mean, count]

        def finalize(a: List[float]):
            # Compute the final standard deviation from the accumulated
            # sum of squared differences from current mean and the count.
            M2, mean, count = a
            if count < 2:
                return 0.0
            return math.sqrt(M2 / (count - ddof))

        super().__init__(
            init=_null_wrap_init(lambda k: [0, 0, 0]),
            accumulate=_null_wrap_accumulate(ignore_nulls, on_fn, accumulate),
            merge=_null_wrap_merge(ignore_nulls, merge),
            finalize=_null_wrap_finalize(finalize),
            name=(f"std({str(on)})"),
        )
Exemple #4
0
    def __init__(self, on: Optional[KeyFn] = None, ignore_nulls: bool = True):
        self._set_key_fn(on)
        on_fn = _to_on_fn(on)

        super().__init__(
            init=_null_wrap_init(lambda k: [0, 0]),
            accumulate=_null_wrap_accumulate(
                ignore_nulls, on_fn, lambda a, r: [a[0] + r, a[1] + 1]),
            merge=_null_wrap_merge(
                ignore_nulls, lambda a1, a2: [a1[0] + a2[0], a1[1] + a2[1]]),
            finalize=_null_wrap_finalize(lambda a: a[0] / a[1]),
            name=(f"mean({str(on)})"),
        )