Beispiel #1
0
def test_statistics_1():
    a, w = get_data()
    _means = np.zeros((3, 1, 1, 6))
    _weights = np.zeros((3, 1, 1, 6))
    for i, j, k, l in itertools.product(range(a.shape[0]), range(a.shape[1]),
                                        range(a.shape[2]), range(a.shape[3])):
        _means[i, 0, 0, l] += w[i, j, k, l] * a[i, j, k, l]
        _weights[i, 0, 0, l] += w[i, j, k, l]
    _means /= _weights
    np.testing.assert_allclose(_means,
                               mean(a, w, axis=(1, 2), keepdims=True),
                               rtol=1e-07)
    _stds = np.zeros((3, 1, 1, 6))
    for i, j, k, l in itertools.product(range(a.shape[0]), range(a.shape[1]),
                                        range(a.shape[2]), range(a.shape[3])):
        _stds[i, 0, 0,
              l] += w[i, j, k, l] * (a[i, j, k, l] - _means[i, 0, 0, l])**2
    _stds = np.sqrt(_stds / _weights)
    np.testing.assert_allclose(_stds,
                               std(a, w, axis=(1, 2), keepdims=True),
                               rtol=1e-07)
    _values = np.zeros((3, 6, 4 * 5))
    _weights = np.zeros((3, 6, 4 * 5))
    for i, j, k, l in itertools.product(range(a.shape[0]), range(a.shape[1]),
                                        range(a.shape[2]), range(a.shape[3])):
        _values[i, l, j * a.shape[2] + k] = a[i, j, k, l]
        _weights[i, l, j * a.shape[2] + k] = w[i, j, k, l]

    def get_quantiles(q):
        _quantiles = np.zeros((3, 6))
        for i in range(a.shape[0]):
            for l in range(a.shape[3]):
                isort = np.argsort(_values[i, l])
                v = _values[i, l][isort]
                u = _weights[i, l][isort]
                U = u.cumsum()
                r = (U - 0.5 * u) / U[-1]
                for m in range(1, len(u)):
                    if r[m - 1] <= q and r[m] > q:
                        _quantiles[i, l] = v[m - 1] + (q - r[m - 1]) / (
                            r[m] - r[m - 1]) * (v[m] - v[m - 1])
                        break
        return _quantiles

    np.testing.assert_allclose(get_quantiles(0.1),
                               quantile(a, 0.1, w, axis=(1, 2),
                                        keepdims=False),
                               rtol=1e-07)
    np.testing.assert_allclose(get_quantiles(0.5),
                               quantile(a, 0.5, w, axis=(1, 2),
                                        keepdims=False),
                               rtol=1e-07)
    np.testing.assert_allclose(get_quantiles(0.9),
                               quantile(a, 0.9, w, axis=(1, 2),
                                        keepdims=False),
                               rtol=1e-07)
Beispiel #2
0
def test_quantiles_shapes():
    a, w = get_data()
    out = quantile(a, 0.5)
    assert out.ndim == 0
    out = quantile(a, 0.5, w)
    assert out.ndim == 0
    out = quantile(a, (0.4, 0.6))
    assert out.ndim == 1
    out = quantile(a, (0.4, 0.6), w)
    assert out.ndim == 1
    out = quantile(a, 0.5, keepdims=True)
    assert out.shape == (1, 1, 1, 1)
    np.testing.assert_allclose(out.ravel(), quantile(a, 0.5), rtol=1e-07)
    out = quantile(a, 0.5, w, keepdims=True)
    assert out.shape == (1, 1, 1, 1)
    np.testing.assert_allclose(out.ravel(), quantile(a, 0.5, w), rtol=1e-07)
    out = quantile(a, (0.4, 0.6), keepdims=True)
    assert out.shape == (2, 1, 1, 1, 1)
    np.testing.assert_allclose(out.ravel(),
                               quantile(a, (0.4, 0.6)),
                               rtol=1e-07)
    out = quantile(a, (0.4, 0.6), w, keepdims=True)
    assert out.shape == (2, 1, 1, 1, 1)
    np.testing.assert_allclose(out.ravel(),
                               quantile(a, (0.4, 0.6), w),
                               rtol=1e-07)
    out = quantile(a, 0.5, axis=1)
    assert out.shape == (3, 5, 6)
    out = quantile(a, 0.5, w, axis=1)
    assert out.shape == (3, 5, 6)
    out = quantile(a, (0.4, 0.6), axis=1)
    assert out.shape == (2, 3, 5, 6)
    out = quantile(a, (0.4, 0.6), w, axis=1)
    assert out.shape == (2, 3, 5, 6)
    out = quantile(a, 0.5, axis=(1, 2))
    assert out.shape == (3, 6)
    out = quantile(a, 0.5, w, axis=(1, 2))
    assert out.shape == (3, 6)
    out = quantile(a, (0.4, 0.6), axis=(1, 2))
    assert out.shape == (2, 3, 6)
    out = quantile(a, (0.4, 0.6), w, axis=(1, 2))
    assert out.shape == (2, 3, 6)
    out = quantile(a, 0.5, axis=(1, 2), keepdims=True)
    assert out.shape == (3, 1, 1, 6)
    np.testing.assert_allclose(out.ravel(),
                               quantile(a, 0.5, axis=(1, 2)).ravel(),
                               rtol=1e-07)
    out = quantile(a, 0.5, w, axis=(1, 2), keepdims=True)
    assert out.shape == (3, 1, 1, 6)
    np.testing.assert_allclose(out.ravel(),
                               quantile(a, 0.5, w, axis=(1, 2)).ravel(),
                               rtol=1e-07)
    out = quantile(a, (0.4, 0.6), axis=(1, 2), keepdims=True)
    assert out.shape == (2, 3, 1, 1, 6)
    np.testing.assert_allclose(out.ravel(),
                               quantile(a, (0.4, 0.6), axis=(1, 2)).ravel(),
                               rtol=1e-07)
    out = quantile(a, (0.4, 0.6), w, axis=(1, 2), keepdims=True)
    assert out.shape == (2, 3, 1, 1, 6)
    np.testing.assert_allclose(out.ravel(),
                               quantile(a, (0.4, 0.6), w, axis=(1, 2)).ravel(),
                               rtol=1e-07)
Beispiel #3
0
import pandas as pd
from phik import phik

import popmon.stats.numpy as pm_np

from ...analysis.hist_numpy import get_2dgrid
from ...base import Module
from ...hist.histogram import sum_entries

DEFAULT_STATS = {
    "mean":
    pm_np.mean,
    "std":
    pm_np.std,
    "min,max,p01,p05,p16,p50,p84,p95,p99":
    lambda x, w: pm_np.quantile(
        x, q=[0.0, 1.0, 0.01, 0.05, 0.16, 0.50, 0.84, 0.95, 0.99], weights=w),
}
NUM_NS_DAY = 24 * 3600 * int(1e9)


class HistProfiler(Module):
    """Generate profiles of histograms using default statistical functions.

    Profiles are:

    - 1 dim histograms, all: 'count', 'filled', 'distinct', 'nan', 'most_probable_value', 'overflow', 'underflow'.
    - 1 dim histograms, numeric: mean, std, min, max, p01, p05, p16, p50, p84, p95, p99.
    - 1 dim histograms, boolean: fraction of true entries.
    - 2 dim histograms: count, phi_k correlation constant, p-value and Z-score of contingency test.

    :param str read_key: key of the input test data to read from the datastore