class TestOCSVM(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = OCSVM() self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): assert_true(hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert_true(hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert_true(hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert_true(hasattr(self.clf, '_mu') and self.clf._mu is not None) assert_true(hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert_true(hasattr(self.clf, 'support_') and self.clf.support_ is not None) assert_true(hasattr(self.clf, 'support_vectors_') and self.clf.support_vectors_ is not None) assert_true(hasattr(self.clf, 'dual_coef_') and self.clf.dual_coef_ is not None) assert_true(hasattr(self.clf, 'intercept_') and self.clf.intercept_ is not None) # only available for linear kernel # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None: # self.assertRaises(AttributeError, 'coef_ is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
class TestOCSVM(unittest.TestCase): def setUp(self): self.n_train = 200 self.n_test = 100 self.contamination = 0.1 self.roc_floor = 0.8 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = OCSVM() self.clf.fit(self.X_train) def test_parameters(self): assert (hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert (hasattr(self.clf, '_mu') and self.clf._mu is not None) assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert (hasattr(self.clf, 'support_') and self.clf.support_ is not None) assert (hasattr(self.clf, 'support_vectors_') and self.clf.support_vectors_ is not None) assert (hasattr(self.clf, 'dual_coef_') and self.clf.dual_coef_ is not None) assert (hasattr(self.clf, 'intercept_') and self.clf.intercept_ is not None) # only available for linear kernel # if not hasattr(self.clf, 'coef_') or self.clf.coef_ is None: # self.assertRaises(AttributeError, 'coef_ is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
def getDetectors(self, parameterdict): detectors = [] windowSize = [ value for key, value in parameterdict.items() if "windowsize_" in key.lower() ][0] featureset = [ value for key, value in parameterdict.items() if "featureset_" in key.lower() ][0] detectortype = [ value for key, value in parameterdict.items() if "type_" in key.lower() ][0] detectorlist = detectortype.split() features = self.featureGenerator.getSlidingWindowFeaturesEvents( int(windowSize), int(featureset)) X = features[0] y = features[1] for detector in detectorlist: if (detector.rstrip() == "OCSVM"): kernel = [ value for key, value in parameterdict.items() if "kernel_" in key.lower() ][0] nu = [ value for key, value in parameterdict.items() if "nu_" in key.lower() ][0] clf = OCSVM(kernel=kernel, nu=nu, max_iter=100) if (detector.rstrip() == "IForest"): num_estimators = [ value for key, value in parameterdict.items() if "num_estimators_" in key.lower() ][0] max_samples = [ value for key, value in parameterdict.items() if "max_samples_" in key.lower() ][0] clf = IForest(n_estimators=int(num_estimators), max_samples=int(max_samples)) if (detector.rstrip() == "PCA"): clf = PCA() if (detector.rstrip() == "LOF"): n_neighbors = [ value for key, value in parameterdict.items() if "lof_n_neighbors_" in key.lower() ][0] clf = LOF(n_neighbors=int(n_neighbors)) if (detector.rstrip() == "KNN"): n_neighbors = [ value for key, value in parameterdict.items() if "knn_n_neighbors_" in key.lower() ][0] clf = KNN(n_neighbors=int(n_neighbors)) with HiddenPrints(): clf.fit_predict_score(X, y, scoring='roc_auc_score') detectors.append(clf) return detectors, detectorlist, y, X