def test_est_tuning(): """Check that est_tuning returns the estimator with minimum auc.""" for algo in algorithms: name_algo = algo.name parameters = algo_param[name_algo] param_grid = ParameterGrid(parameters) alphas = rng.randint(1, 100, size=5) / 100 alphas = np.sort(alphas) clf_est, offsets_est = est_tuning(X_train, X_test, algo, param_grid, alphas, U, vol_tot_cube) # check that clf_est gives the minimum auc score_function = clf_est.score_samples vol_est, _ = _compute_volumes(score_function, alphas, X_test, U, vol_tot_cube) auc_est = auc(alphas, vol_est) auc_algo = np.zeros(len(param_grid)) for p, param in enumerate(param_grid): clf = algo(**param) clf = clf.fit(X_train) score_function_p = clf.score_samples vol_p, _ = _compute_volumes(score_function_p, alphas, X_test, U, vol_tot_cube) auc_algo[p] = auc(alphas, vol_p) assert_equal(np.min(auc_algo), auc_est) clf_test = clf_est.score_samples(X_test) proba_offsets_est = (clf_test >= offsets_est[:, np.newaxis]) # this test requires to have a large number of samples because # np.percentile is an empirical quantile which uses interpolation. # this is also why we ask the values to be equal only up to the # second decimal. assert_array_almost_equal(np.mean(proba_offsets_est, axis=1), alphas, decimal=2)
def test_compute_volumes_toy(): """Check _compute_volumes on a toy scoring function.""" def score_function_toy(X): """ score(x_0, x_1) = x_0 """ return X[:, 0] # regular grid with step size equal to 0.01 xx, yy = np.meshgrid(np.arange(0, 1, 0.01), np.arange(0, 1, 0.01)) grid = np.c_[xx.ravel(), yy.ravel()] alphas = rng.randint(1, 100, size=5) / 100 vols, offsets = _compute_volumes(score_function_toy, alphas, grid, grid, 1.) # check values of volume assert_array_equal(alphas, vols) # check values of offsets pred_offsets = (score_function_toy(grid) >= offsets[:, np.newaxis]) assert_array_equal(np.mean(pred_offsets, axis=1), alphas)
def test_compute_volumes(): """Check _compute_volumes for several masses.""" estimators = [ AverageKLPE(k=3, novelty=True), MaxKLPE(k=3, novelty=True), OCSVM(sigma=1.), IsolationForest(n_estimators=5, random_state=2), KernelSmoothing() ] alphas = rng.randint(1, 100, size=5) / 100 alphas = np.sort(alphas) for clf in estimators: clf = clf.fit(X_train) clf_test = clf.score_samples(X_test) min_test = np.min(clf_test) max_test = np.max(clf_test) score_function = clf.score_samples vols, offsets = _compute_volumes(score_function, alphas, X_test, U, vol_tot_cube) # check increasing order of volumes and decreasing order of offsets assert_array_equal(vols, np.sort(vols)) assert_array_equal(offsets, -np.sort(-offsets)) # check volumes in [0, vol_tot_cube] assert_true(np.all(0 <= vols) and np.all(vols <= vol_tot_cube)) # check offset values assert_true( np.all(min_test <= offsets) and np.all(offsets <= max_test)) proba_offsets_pos = (clf_test >= offsets[:, np.newaxis]) # this test requires to have a large number of samples because # np.percentile is an empirical quantile which uses interpolation. # this is also why we ask the values to be equal only up to the # second decimal. assert_array_almost_equal(np.mean(proba_offsets_pos, axis=1), alphas, decimal=2)