def quantiles_38(data, *, n=4, method="exclusive"): """ Copy of quantiles from standard lib, available for Python 3.8+ https://github.com/python/cpython/blob/3.8/Lib/statistics.py#L620 """ if n < 1: raise StatisticsError("n must be at least 1") data = sorted(data) ld = len(data) if ld < 2: raise StatisticsError("must have at least two data points") if method == "inclusive": m = ld - 1 result = [] for i in range(1, n): j = i * m // n delta = i * m - j * n interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n result.append(interpolated) return result if method == "exclusive": m = ld + 1 result = [] for i in range(1, n): j = i * m // n # rescale i to m/n j = 1 if j < 1 else ld - 1 if j > ld - 1 else j # clamp to 1 .. ld-1 delta = i * m - j * n # exact integer math interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n result.append(interpolated) return result raise ValueError(f"Unknown method: {method!r}")
def stdev_for(a): if len(a) < 2: raise StatisticsError("variance requires at least two data points") m = mean(a) v = 0 for u in a: v += (u - m)**2 return sqrt(v / (len(a) - 1))
def stdev_range_for(a): if len(a) < 2: raise StatisticsError("variance requires at least two data points") m = mean(a) v = 0 for i in range(len(a)): v += (a[i] - m)**2 return sqrt(v / (len(a) - 1))
def _presorted_median(self, xs): n = len(xs) if n == 0: raise StatisticsError('no median for empty data') if n % 2 == 1: return xs[n // 2] else: i = n // 2 return (xs[i - 1] + xs[i]) / 2
def norm_cdf_inv(x, precision=4): 'Inverse cumulative distribution function for the standard normal distribution' if not 0 <= x <= 1: raise StatisticsError(('Invalid argument \'{}\' for ' 'function \'norm_cdf_inv\'').format(x)) if x == 0: return -float('infinity') elif x == 1: return float('infinity') elif x < .5: return -round(norm_cdf_inv(1 - x), precision) return round(sqrt(2) * erf_inv(2 * x - 1), precision)
def stdev_numpy(a): if len(a) < 2: raise StatisticsError("variance requires at least two data points") return std(a, ddof=1)
def stdev_generator(a): if len(a) < 2: raise StatisticsError("variance requires at least two data points") m = mean(a) v = sum((v - m)**2 for v in a) return sqrt(v / (len(a) - 1))
def stdev_map(a): if len(a) < 2: raise StatisticsError("variance requires at least two data points") m = mean(a) v = sum(map(lambda v: (v - m)**2, a)) return sqrt(v / (len(a) - 1))
def stdev_reduce(a): if len(a) < 2: raise StatisticsError("variance requires at least two data points") m = mean(a) v = reduce(lambda w, u: w + (u - m)**2, a, 0) return sqrt(v / (len(a) - 1))
def compute( self, time: float, window_size: float, min_sample_size: int, messages: List[Any], ) -> Record: """Compute summary statistics for a list of messages. Parameters ---------- time: `float` The timestamp of the aggregated message, typically the midpoint of the aggregation window. window_size: `float` Size of the aggregation window. messages: `list` List of messages from which to compute the summary statistics Returns ------- aggregated_message: `Record` Aggregated message. """ if not self._record: msg = ( "Use Aggregator.record() to created the Faust record for the " "aggregation topic first." ) raise RuntimeError(msg) count = len(messages) aggregated_values = { "count": count, "time": time, "window_size": window_size, } for aggregation_field in self._aggregation_fields: if aggregation_field.operation: source_field_name = aggregation_field.source_field_name values = [message[source_field_name] for message in messages] try: operation = aggregation_field.operation # Make sure there are enough values to compute statistics if len(values) >= min_sample_size: aggregated_value = eval(operation)(values) else: aggregated_value = values[0] except Exception: msg = f"Error computing {operation} of {values}." raise StatisticsError(msg) aggregated_values.update( {aggregation_field.name: aggregated_value} ) aggregated_message = self._record(**aggregated_values) return aggregated_message