Beispiel #1
0
def test_calibrators():
    from pysad.transform.probability_calibration import GaussianTailProbabilityCalibrator
    import numpy as np
    from pysad.transform.probability_calibration import ConformalProbabilityCalibrator
    from pysad.utils import fix_seed
    fix_seed(61)

    scores = np.random.rand(100)

    calibrators = {
        GaussianTailProbabilityCalibrator: {},
        ConformalProbabilityCalibrator: {}
    }

    for calibrator_cls, args in calibrators.items():
        calibrator = calibrator_cls(**args)
        calibrated_scores = calibrator.fit_transform(scores)

        assert calibrated_scores.shape == scores.shape
        assert not np.isnan(calibrated_scores).any()

        calibrator = calibrator_cls(**args).fit(scores)
        assert type(calibrator) is calibrator_cls
        calibrated_scores = calibrator.fit_transform(scores)

        assert calibrated_scores.shape == scores.shape
        assert not np.isnan(calibrated_scores).any()
Beispiel #2
0
def test_stats_with_batch_numpy():

    from pysad.statistics import AverageMeter
    from pysad.statistics import CountMeter
    from pysad.statistics import MaxMeter
    from pysad.statistics import MedianMeter
    from pysad.statistics import MinMeter
    from pysad.statistics import SumMeter
    from pysad.statistics import SumSquaresMeter
    from pysad.statistics import VarianceMeter
    import numpy as np
    from pysad.utils import fix_seed
    fix_seed(61)

    num_items = 100
    stat_classes = {
        AverageMeter: np.mean,
        CountMeter: len,
        MaxMeter: np.max,
        MedianMeter: np.median,
        MinMeter: np.min,
        SumMeter: np.sum,
        SumSquaresMeter: lambda x: np.sum(x**2),
        VarianceMeter: np.var
    }

    for stat_cls, val in stat_classes.items():
        stat = stat_cls()
        abs_stat = AbsStatistic(stat_cls)
        window_size = 25
        running_stat = RunningStatistic(stat_cls, window_size=window_size)

        arr = np.random.rand(num_items)
        prev_value = 0.0
        for i in range(arr.shape[0]):
            num = arr[i]
            stat.update(num)
            abs_stat.update(num)
            running_stat.update(num)

            if i > 1:  # for variance meter.
                assert np.isclose(stat.get(), val(arr[:i + 1]))
                assert np.isclose(running_stat.get(),
                                  val(arr[max(0, i - window_size + 1):i + 1]))
                assert np.isclose(abs(stat.get()), abs_stat.get())

            stat.remove(num)
            abs_stat.remove(num)

            if i > 1:
                assert np.isclose(stat.get(), prev_value)
                assert np.isclose(abs_stat.get(), abs(prev_value))

            stat.update(num)
            abs_stat.update(num)

            prev_value = stat.get()
Beispiel #3
0
def test_all_zero_stats():
    import numpy as np
    from pysad.statistics import AbsStatistic
    from pysad.statistics import RunningStatistic
    from pysad.statistics import AverageMeter
    from pysad.statistics import CountMeter
    from pysad.statistics import MaxMeter
    from pysad.statistics import MedianMeter
    from pysad.statistics import MinMeter
    from pysad.statistics import SumMeter
    from pysad.statistics import SumSquaresMeter
    from pysad.statistics import VarianceMeter
    from pysad.utils import fix_seed
    fix_seed(61)

    num_items = 100
    stat_classes = {
        AverageMeter: 0.0,
        CountMeter: "count",
        MaxMeter: 0.0,
        MedianMeter: 0.0,
        MinMeter: 0.0,
        SumMeter: 0.0,
        SumSquaresMeter: 0.0,
        VarianceMeter: 0.0
    }

    for stat_cls, val in stat_classes.items():
        stat = stat_cls()
        abs_stat = AbsStatistic(stat_cls)
        window_size = 25
        running_stat = RunningStatistic(stat_cls, window_size=window_size)
        arr = np.zeros(num_items, dtype=np.float)
        prev_value = 0.0
        for i in range(arr.shape[0]):
            num = arr[i]
            stat.update(num)
            abs_stat.update(num)
            running_stat.update(num)
            if i > 1:  # for variance meter.
                assert np.isclose(stat.get(), val if val != "count" else i + 1)
                assert np.isclose(abs_stat.get(),
                                  val if val != "count" else i + 1)
                assert np.isclose(
                    running_stat.get(),
                    val if val != "count" else min(i + 1, window_size))

                stat.remove(num)
                abs_stat.remove(num)
                assert np.isclose(stat.get(), prev_value)
                assert np.isclose(abs_stat.get(), abs(prev_value))
                stat.update(num)
                abs_stat.update(num)

            prev_value = stat.get()
Beispiel #4
0
def test_fit_and_score_separately():
    from pysad.models import xStream
    import numpy as np
    from pysad.utils import fix_seed
    fix_seed(61)

    X = np.random.rand(150, 1)

    model = xStream()

    model = model.fit(X)
    y_pred = model.score(X)
    assert y_pred.shape == (X.shape[0], )
Beispiel #5
0
def test_all_correct():
    from pysad.evaluation import PrecisionMetric, AUPRMetric, AUROCMetric, RecallMetric
    import numpy as np
    from pysad.utils import fix_seed
    fix_seed(61)

    metric_classes = [PrecisionMetric, RecallMetric, AUPRMetric, AUROCMetric]
    metric_classes = {metric_cls: 1.0 for metric_cls in metric_classes}
    y_true = np.random.randint(0, 2, size=(25, ), dtype=np.int)
    y_true[0] = 1
    y_true[1] = 0
    y_pred = y_true.copy()

    helper_test_all_metrics(metric_classes, y_true, y_pred)
Beispiel #6
0
def test_perfect_model():
    import numpy as np
    from pysad.models import PerfectModel
    from pysad.utils import fix_seed
    fix_seed(61)

    model = PerfectModel()
    y1 = np.random.randint(0, 2, 100)
    y = np.random.randint(0, 2, 100)
    X = np.random.rand(100)
    y_pred = model.fit_score(X, y)
    print(y_pred)
    assert np.all(np.isclose(y, y_pred))
    assert not np.all(np.isclose(y1, y_pred))
def test_standard_absolute_deviation():
    from pysad.models import StandardAbsoluteDeviation
    import numpy as np
    from numpy.testing import assert_raises
    from pysad.utils import fix_seed

    fix_seed(61)
    X = np.random.rand(150, 1)

    model = StandardAbsoluteDeviation(substracted_statistic="mean")
    model = model.fit(X)
    y_pred = model.score(X)
    assert y_pred.shape == (X.shape[0],)

    model = StandardAbsoluteDeviation(substracted_statistic="median")
    model = model.fit(X)
    y_pred = model.score(X)
    assert y_pred.shape == (X.shape[0],)

    with assert_raises(ValueError):
        StandardAbsoluteDeviation(substracted_statistic="asd")

    with assert_raises(ValueError):
        StandardAbsoluteDeviation(substracted_statistic=None)
Beispiel #8
0
def test_unsupervised_models():
    from pysad.models import RobustRandomCutForest
    from pysad.models import ExactStorm
    from pysad.models import HalfSpaceTrees
    from pysad.models import IForestASD
    from pysad.models import KitNet
    from pysad.models import KNNCAD
    from pysad.models import LODA
    from pysad.models import LocalOutlierProbability
    from pysad.models import MedianAbsoluteDeviation
    from pysad.models import NullModel
    from pysad.models import RandomModel
    from pysad.models import RelativeEntropy
    from pysad.models import RSHash
    from pysad.models import StandardAbsoluteDeviation
    from pysad.models import xStream
    import numpy as np
    from pysad.utils import fix_seed
    fix_seed(61)

    X = np.random.rand(150, 1)

    model_classes = {
        ExactStorm: {},
        HalfSpaceTrees: {
            "feature_mins": [0.0],
            "feature_maxes": [1.0]
        },
        IForestASD: {},
        KitNet: {},
        KNNCAD: {
            "probationary_period": 50
        },
        LODA: {},
        LocalOutlierProbability: {
            "initial_X": True
        },
        MedianAbsoluteDeviation: [{}, {
            "absolute": False
        }],
        NullModel: {},
        RandomModel: {},
        RelativeEntropy: {
            "min_val": 0.0,
            "max_val": 1.0
        },
        RSHash: {
            "feature_mins": [0.0],
            "feature_maxes": [1.0]
        },
        StandardAbsoluteDeviation: [{}, {
            "absolute": False
        }],
        xStream: {},
        RobustRandomCutForest: {}
    }

    for model_cls, params_dict in model_classes.items():
        if type(params_dict) is dict:
            helper_test_model(X, model_cls, params_dict)
        elif type(params_dict) is list:
            for params in params_dict:
                helper_test_model(X, model_cls, params)