Exemple #1
0
def quantiles_38(data, *, n=4, method="exclusive"):
    """
    Copy of quantiles from standard lib, available for Python 3.8+
    https://github.com/python/cpython/blob/3.8/Lib/statistics.py#L620
    """
    if n < 1:
        raise StatisticsError("n must be at least 1")
    data = sorted(data)
    ld = len(data)
    if ld < 2:
        raise StatisticsError("must have at least two data points")
    if method == "inclusive":
        m = ld - 1
        result = []
        for i in range(1, n):
            j = i * m // n
            delta = i * m - j * n
            interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n
            result.append(interpolated)
        return result
    if method == "exclusive":
        m = ld + 1
        result = []
        for i in range(1, n):
            j = i * m // n  # rescale i to m/n
            j = 1 if j < 1 else ld - 1 if j > ld - 1 else j  # clamp to 1 .. ld-1
            delta = i * m - j * n  # exact integer math
            interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n
            result.append(interpolated)
        return result
    raise ValueError(f"Unknown method: {method!r}")
def stdev_for(a):
    if len(a) < 2:
        raise StatisticsError("variance requires at least two data points")
    m = mean(a)
    v = 0
    for u in a:
        v += (u - m)**2
    return sqrt(v / (len(a) - 1))
def stdev_range_for(a):
    if len(a) < 2:
        raise StatisticsError("variance requires at least two data points")
    m = mean(a)
    v = 0
    for i in range(len(a)):
        v += (a[i] - m)**2
    return sqrt(v / (len(a) - 1))
 def _presorted_median(self, xs):
     n = len(xs)
     if n == 0:
         raise StatisticsError('no median for empty data')
     if n % 2 == 1:
         return xs[n // 2]
     else:
         i = n // 2
         return (xs[i - 1] + xs[i]) / 2
Exemple #5
0
def norm_cdf_inv(x, precision=4):
    'Inverse cumulative distribution function for the standard normal distribution'
    if not 0 <= x <= 1:
        raise StatisticsError(('Invalid argument \'{}\' for '
                               'function \'norm_cdf_inv\'').format(x))

    if x == 0: return -float('infinity')
    elif x == 1: return float('infinity')
    elif x < .5:
        return -round(norm_cdf_inv(1 - x), precision)
    return round(sqrt(2) * erf_inv(2 * x - 1), precision)
def stdev_numpy(a):
    if len(a) < 2:
        raise StatisticsError("variance requires at least two data points")
    return std(a, ddof=1)
def stdev_generator(a):
    if len(a) < 2:
        raise StatisticsError("variance requires at least two data points")
    m = mean(a)
    v = sum((v - m)**2 for v in a)
    return sqrt(v / (len(a) - 1))
def stdev_map(a):
    if len(a) < 2:
        raise StatisticsError("variance requires at least two data points")
    m = mean(a)
    v = sum(map(lambda v: (v - m)**2, a))
    return sqrt(v / (len(a) - 1))
def stdev_reduce(a):
    if len(a) < 2:
        raise StatisticsError("variance requires at least two data points")
    m = mean(a)
    v = reduce(lambda w, u: w + (u - m)**2, a, 0)
    return sqrt(v / (len(a) - 1))
Exemple #10
0
    def compute(
        self,
        time: float,
        window_size: float,
        min_sample_size: int,
        messages: List[Any],
    ) -> Record:
        """Compute summary statistics for a list of messages.

        Parameters
        ----------
        time: `float`
            The timestamp of the aggregated message, typically the midpoint
            of the aggregation window.
        window_size: `float`
            Size of the aggregation window.
        messages: `list`
            List of messages from which to compute the summary statistics

        Returns
        -------
        aggregated_message: `Record`
            Aggregated message.
        """
        if not self._record:
            msg = (
                "Use Aggregator.record() to created the Faust record for the "
                "aggregation topic first."
            )
            raise RuntimeError(msg)

        count = len(messages)

        aggregated_values = {
            "count": count,
            "time": time,
            "window_size": window_size,
        }

        for aggregation_field in self._aggregation_fields:

            if aggregation_field.operation:

                source_field_name = aggregation_field.source_field_name
                values = [message[source_field_name] for message in messages]

                try:
                    operation = aggregation_field.operation
                    # Make sure there are enough values to compute statistics
                    if len(values) >= min_sample_size:
                        aggregated_value = eval(operation)(values)
                    else:
                        aggregated_value = values[0]
                except Exception:
                    msg = f"Error computing {operation} of {values}."
                    raise StatisticsError(msg)

                aggregated_values.update(
                    {aggregation_field.name: aggregated_value}
                )

        aggregated_message = self._record(**aggregated_values)

        return aggregated_message