Exemple #1
0
class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        assert_true(hasattr(self.clf, 'decision_scores_') and
                    self.clf.decision_scores_ is not None)
        assert_true(hasattr(self.clf, 'labels_') and
                    self.clf.labels_ is not None)
        assert_true(hasattr(self.clf, 'threshold_') and
                    self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and
                    self.clf._mu is not None)
        assert_true(hasattr(self.clf, '_sigma') and
                    self.clf._sigma is not None)
        assert_true(hasattr(self.clf, 'support_') and
                    self.clf.support_ is not None)
        assert_true(hasattr(self.clf, 'support_vectors_') and
                    self.clf.support_vectors_ is not None)
        assert_true(hasattr(self.clf, 'dual_coef_') and
                    self.clf.dual_coef_ is not None)
        assert_true(hasattr(self.clf, 'intercept_') and
                    self.clf.intercept_ is not None)

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test, self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test, self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass
Exemple #2
0
class TestOCSVM(unittest.TestCase):
    def setUp(self):
        self.n_train = 200
        self.n_test = 100
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = OCSVM()
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert (hasattr(self.clf, 'support_')
                and self.clf.support_ is not None)
        assert (hasattr(self.clf, 'support_vectors_')
                and self.clf.support_vectors_ is not None)
        assert (hasattr(self.clf, 'dual_coef_')
                and self.clf.dual_coef_ is not None)
        assert (hasattr(self.clf, 'intercept_')
                and self.clf.intercept_ is not None)

        # only available for linear kernel
        # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None:
        #     self.assertRaises(AttributeError, 'coef_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass
Exemple #3
0
    def getDetectors(self, parameterdict):
        detectors = []

        windowSize = [
            value for key, value in parameterdict.items()
            if "windowsize_" in key.lower()
        ][0]
        featureset = [
            value for key, value in parameterdict.items()
            if "featureset_" in key.lower()
        ][0]
        detectortype = [
            value for key, value in parameterdict.items()
            if "type_" in key.lower()
        ][0]

        detectorlist = detectortype.split()

        features = self.featureGenerator.getSlidingWindowFeaturesEvents(
            int(windowSize), int(featureset))

        X = features[0]
        y = features[1]

        for detector in detectorlist:

            if (detector.rstrip() == "OCSVM"):
                kernel = [
                    value for key, value in parameterdict.items()
                    if "kernel_" in key.lower()
                ][0]
                nu = [
                    value for key, value in parameterdict.items()
                    if "nu_" in key.lower()
                ][0]
                clf = OCSVM(kernel=kernel, nu=nu, max_iter=100)

            if (detector.rstrip() == "IForest"):
                num_estimators = [
                    value for key, value in parameterdict.items()
                    if "num_estimators_" in key.lower()
                ][0]
                max_samples = [
                    value for key, value in parameterdict.items()
                    if "max_samples_" in key.lower()
                ][0]
                clf = IForest(n_estimators=int(num_estimators),
                              max_samples=int(max_samples))

            if (detector.rstrip() == "PCA"):
                clf = PCA()

            if (detector.rstrip() == "LOF"):
                n_neighbors = [
                    value for key, value in parameterdict.items()
                    if "lof_n_neighbors_" in key.lower()
                ][0]
                clf = LOF(n_neighbors=int(n_neighbors))

            if (detector.rstrip() == "KNN"):
                n_neighbors = [
                    value for key, value in parameterdict.items()
                    if "knn_n_neighbors_" in key.lower()
                ][0]
                clf = KNN(n_neighbors=int(n_neighbors))

            with HiddenPrints():
                clf.fit_predict_score(X, y, scoring='roc_auc_score')

            detectors.append(clf)
        return detectors, detectorlist, y, X