예제 #1
0
def test_est_tuning():
    """Check that est_tuning returns the estimator with minimum auc."""

    for algo in algorithms:

        name_algo = algo.name
        parameters = algo_param[name_algo]
        param_grid = ParameterGrid(parameters)
        alphas = rng.randint(1, 100, size=5) / 100
        alphas = np.sort(alphas)
        clf_est, offsets_est = est_tuning(X_train, X_test, algo, param_grid,
                                          alphas, U, vol_tot_cube)

        # check that clf_est gives the minimum auc
        score_function = clf_est.score_samples
        vol_est, _ = _compute_volumes(score_function, alphas, X_test, U,
                                      vol_tot_cube)
        auc_est = auc(alphas, vol_est)

        auc_algo = np.zeros(len(param_grid))
        for p, param in enumerate(param_grid):
            clf = algo(**param)
            clf = clf.fit(X_train)
            score_function_p = clf.score_samples
            vol_p, _ = _compute_volumes(score_function_p, alphas, X_test, U,
                                        vol_tot_cube)
            auc_algo[p] = auc(alphas, vol_p)

        assert_equal(np.min(auc_algo), auc_est)

        clf_test = clf_est.score_samples(X_test)
        proba_offsets_est = (clf_test >= offsets_est[:, np.newaxis])
        # this test requires to have a large number of samples because
        # np.percentile is an empirical quantile which uses interpolation.
        # this is also why we ask the values to be equal only up to the
        # second decimal.
        assert_array_almost_equal(np.mean(proba_offsets_est, axis=1),
                                  alphas,
                                  decimal=2)
예제 #2
0
def test_compute_volumes_toy():
    """Check _compute_volumes on a toy scoring function."""
    def score_function_toy(X):
        """ score(x_0, x_1) = x_0 """
        return X[:, 0]

    # regular grid with step size equal to 0.01
    xx, yy = np.meshgrid(np.arange(0, 1, 0.01), np.arange(0, 1, 0.01))
    grid = np.c_[xx.ravel(), yy.ravel()]

    alphas = rng.randint(1, 100, size=5) / 100
    vols, offsets = _compute_volumes(score_function_toy, alphas, grid, grid,
                                     1.)
    # check values of volume
    assert_array_equal(alphas, vols)
    # check values of offsets
    pred_offsets = (score_function_toy(grid) >= offsets[:, np.newaxis])
    assert_array_equal(np.mean(pred_offsets, axis=1), alphas)
예제 #3
0
def test_compute_volumes():
    """Check _compute_volumes for several masses."""
    estimators = [
        AverageKLPE(k=3, novelty=True),
        MaxKLPE(k=3, novelty=True),
        OCSVM(sigma=1.),
        IsolationForest(n_estimators=5, random_state=2),
        KernelSmoothing()
    ]
    alphas = rng.randint(1, 100, size=5) / 100
    alphas = np.sort(alphas)

    for clf in estimators:
        clf = clf.fit(X_train)
        clf_test = clf.score_samples(X_test)
        min_test = np.min(clf_test)
        max_test = np.max(clf_test)

        score_function = clf.score_samples
        vols, offsets = _compute_volumes(score_function, alphas, X_test, U,
                                         vol_tot_cube)
        # check increasing order of volumes and decreasing order of offsets
        assert_array_equal(vols, np.sort(vols))
        assert_array_equal(offsets, -np.sort(-offsets))

        # check volumes in [0, vol_tot_cube]
        assert_true(np.all(0 <= vols) and np.all(vols <= vol_tot_cube))

        # check offset values
        assert_true(
            np.all(min_test <= offsets) and np.all(offsets <= max_test))

        proba_offsets_pos = (clf_test >= offsets[:, np.newaxis])
        # this test requires to have a large number of samples because
        # np.percentile is an empirical quantile which uses interpolation.
        # this is also why we ask the values to be equal only up to the
        # second decimal.
        assert_array_almost_equal(np.mean(proba_offsets_pos, axis=1),
                                  alphas,
                                  decimal=2)