def test_statistics_1(): a, w = get_data() _means = np.zeros((3, 1, 1, 6)) _weights = np.zeros((3, 1, 1, 6)) for i, j, k, l in itertools.product(range(a.shape[0]), range(a.shape[1]), range(a.shape[2]), range(a.shape[3])): _means[i, 0, 0, l] += w[i, j, k, l] * a[i, j, k, l] _weights[i, 0, 0, l] += w[i, j, k, l] _means /= _weights np.testing.assert_allclose(_means, mean(a, w, axis=(1, 2), keepdims=True), rtol=1e-07) _stds = np.zeros((3, 1, 1, 6)) for i, j, k, l in itertools.product(range(a.shape[0]), range(a.shape[1]), range(a.shape[2]), range(a.shape[3])): _stds[i, 0, 0, l] += w[i, j, k, l] * (a[i, j, k, l] - _means[i, 0, 0, l])**2 _stds = np.sqrt(_stds / _weights) np.testing.assert_allclose(_stds, std(a, w, axis=(1, 2), keepdims=True), rtol=1e-07) _values = np.zeros((3, 6, 4 * 5)) _weights = np.zeros((3, 6, 4 * 5)) for i, j, k, l in itertools.product(range(a.shape[0]), range(a.shape[1]), range(a.shape[2]), range(a.shape[3])): _values[i, l, j * a.shape[2] + k] = a[i, j, k, l] _weights[i, l, j * a.shape[2] + k] = w[i, j, k, l] def get_quantiles(q): _quantiles = np.zeros((3, 6)) for i in range(a.shape[0]): for l in range(a.shape[3]): isort = np.argsort(_values[i, l]) v = _values[i, l][isort] u = _weights[i, l][isort] U = u.cumsum() r = (U - 0.5 * u) / U[-1] for m in range(1, len(u)): if r[m - 1] <= q and r[m] > q: _quantiles[i, l] = v[m - 1] + (q - r[m - 1]) / ( r[m] - r[m - 1]) * (v[m] - v[m - 1]) break return _quantiles np.testing.assert_allclose(get_quantiles(0.1), quantile(a, 0.1, w, axis=(1, 2), keepdims=False), rtol=1e-07) np.testing.assert_allclose(get_quantiles(0.5), quantile(a, 0.5, w, axis=(1, 2), keepdims=False), rtol=1e-07) np.testing.assert_allclose(get_quantiles(0.9), quantile(a, 0.9, w, axis=(1, 2), keepdims=False), rtol=1e-07)
def test_quantiles_shapes(): a, w = get_data() out = quantile(a, 0.5) assert out.ndim == 0 out = quantile(a, 0.5, w) assert out.ndim == 0 out = quantile(a, (0.4, 0.6)) assert out.ndim == 1 out = quantile(a, (0.4, 0.6), w) assert out.ndim == 1 out = quantile(a, 0.5, keepdims=True) assert out.shape == (1, 1, 1, 1) np.testing.assert_allclose(out.ravel(), quantile(a, 0.5), rtol=1e-07) out = quantile(a, 0.5, w, keepdims=True) assert out.shape == (1, 1, 1, 1) np.testing.assert_allclose(out.ravel(), quantile(a, 0.5, w), rtol=1e-07) out = quantile(a, (0.4, 0.6), keepdims=True) assert out.shape == (2, 1, 1, 1, 1) np.testing.assert_allclose(out.ravel(), quantile(a, (0.4, 0.6)), rtol=1e-07) out = quantile(a, (0.4, 0.6), w, keepdims=True) assert out.shape == (2, 1, 1, 1, 1) np.testing.assert_allclose(out.ravel(), quantile(a, (0.4, 0.6), w), rtol=1e-07) out = quantile(a, 0.5, axis=1) assert out.shape == (3, 5, 6) out = quantile(a, 0.5, w, axis=1) assert out.shape == (3, 5, 6) out = quantile(a, (0.4, 0.6), axis=1) assert out.shape == (2, 3, 5, 6) out = quantile(a, (0.4, 0.6), w, axis=1) assert out.shape == (2, 3, 5, 6) out = quantile(a, 0.5, axis=(1, 2)) assert out.shape == (3, 6) out = quantile(a, 0.5, w, axis=(1, 2)) assert out.shape == (3, 6) out = quantile(a, (0.4, 0.6), axis=(1, 2)) assert out.shape == (2, 3, 6) out = quantile(a, (0.4, 0.6), w, axis=(1, 2)) assert out.shape == (2, 3, 6) out = quantile(a, 0.5, axis=(1, 2), keepdims=True) assert out.shape == (3, 1, 1, 6) np.testing.assert_allclose(out.ravel(), quantile(a, 0.5, axis=(1, 2)).ravel(), rtol=1e-07) out = quantile(a, 0.5, w, axis=(1, 2), keepdims=True) assert out.shape == (3, 1, 1, 6) np.testing.assert_allclose(out.ravel(), quantile(a, 0.5, w, axis=(1, 2)).ravel(), rtol=1e-07) out = quantile(a, (0.4, 0.6), axis=(1, 2), keepdims=True) assert out.shape == (2, 3, 1, 1, 6) np.testing.assert_allclose(out.ravel(), quantile(a, (0.4, 0.6), axis=(1, 2)).ravel(), rtol=1e-07) out = quantile(a, (0.4, 0.6), w, axis=(1, 2), keepdims=True) assert out.shape == (2, 3, 1, 1, 6) np.testing.assert_allclose(out.ravel(), quantile(a, (0.4, 0.6), w, axis=(1, 2)).ravel(), rtol=1e-07)
import pandas as pd from phik import phik import popmon.stats.numpy as pm_np from ...analysis.hist_numpy import get_2dgrid from ...base import Module from ...hist.histogram import sum_entries DEFAULT_STATS = { "mean": pm_np.mean, "std": pm_np.std, "min,max,p01,p05,p16,p50,p84,p95,p99": lambda x, w: pm_np.quantile( x, q=[0.0, 1.0, 0.01, 0.05, 0.16, 0.50, 0.84, 0.95, 0.99], weights=w), } NUM_NS_DAY = 24 * 3600 * int(1e9) class HistProfiler(Module): """Generate profiles of histograms using default statistical functions. Profiles are: - 1 dim histograms, all: 'count', 'filled', 'distinct', 'nan', 'most_probable_value', 'overflow', 'underflow'. - 1 dim histograms, numeric: mean, std, min, max, p01, p05, p16, p50, p84, p95, p99. - 1 dim histograms, boolean: fraction of true entries. - 2 dim histograms: count, phi_k correlation constant, p-value and Z-score of contingency test. :param str read_key: key of the input test data to read from the datastore