Esempio n. 1
0
def test_anomaly_tuning():
    """Check anomaly_tuning gives same results than est_tuning."""

    parameters = {'k': np.arange(1, 10), 'novelty': [True]}
    alphas = np.array([0.2, 0.5, 0.9])
    models, offsets = anomaly_tuning(X,
                                     AverageKLPE,
                                     alphas=alphas,
                                     parameters=parameters,
                                     cv=cv,
                                     n_sim=100)
    score_estimators = np.empty((n_estimator, len(X)))
    for i in range(n_estimator):
        score_estimators[i, :] = models[i].score_samples(X)

    score_estimators_seq = np.empty((n_estimator, len(X)))
    offsets_seq = np.empty((n_estimator, len(alphas)))
    param_grid = ParameterGrid(parameters)
    i = 0
    for train, test in cv.split(X):
        X_train = X[train]
        X_test = X[test]

        model, offset = est_tuning(X_train, X_test, AverageKLPE, param_grid,
                                   alphas, U, vol_tot_cube)
        score_estimators_seq[i, :] = model.score_samples(X)
        offsets_seq[i, :] = offset
        i += 1

    assert_array_almost_equal(score_estimators, score_estimators_seq)
    assert_array_almost_equal(offsets, offsets_seq)
Esempio n. 2
0
def test_est_tuning():
    """Check that est_tuning returns the estimator with minimum auc."""

    for algo in algorithms:

        name_algo = algo.name
        parameters = algo_param[name_algo]
        param_grid = ParameterGrid(parameters)
        alphas = rng.randint(1, 100, size=5) / 100
        alphas = np.sort(alphas)
        clf_est, offsets_est = est_tuning(X_train, X_test, algo, param_grid,
                                          alphas, U, vol_tot_cube)

        # check that clf_est gives the minimum auc
        score_function = clf_est.score_samples
        vol_est, _ = _compute_volumes(score_function, alphas, X_test, U,
                                      vol_tot_cube)
        auc_est = auc(alphas, vol_est)

        auc_algo = np.zeros(len(param_grid))
        for p, param in enumerate(param_grid):
            clf = algo(**param)
            clf = clf.fit(X_train)
            score_function_p = clf.score_samples
            vol_p, _ = _compute_volumes(score_function_p, alphas, X_test, U,
                                        vol_tot_cube)
            auc_algo[p] = auc(alphas, vol_p)

        assert_equal(np.min(auc_algo), auc_est)

        clf_test = clf_est.score_samples(X_test)
        proba_offsets_est = (clf_test >= offsets_est[:, np.newaxis])
        # this test requires to have a large number of samples because
        # np.percentile is an empirical quantile which uses interpolation.
        # this is also why we ask the values to be equal only up to the
        # second decimal.
        assert_array_almost_equal(np.mean(proba_offsets_est, axis=1),
                                  alphas,
                                  decimal=2)