def test_transform(self): detector = OneClassSVMLearner(nu=0.1) detect = detector(self.iris) pred = detect(self.iris) self.assert_table_appended_outlier(self.iris, pred) pred2 = self.iris.transform(pred.domain) self.assert_table_equal(pred, pred2)
def test_OneClassSVM(self): # TODO: improve the test - what does it check? nrows, ncols = 200, 5 X = 0.3 * np.random.randn(nrows, ncols) X = np.r_[X + 2, X - 2] table = Orange.data.Table(X, None) learn = OneClassSVMLearner(kernel="rbf") m = learn(table[:100]) z = m(table[100:]) self.assertTrue(0.1 < np.sum(z == 1) < 0.5 * len(z))
def test_OneClassSVM(self): nrows = 100 ncols = 5 x1 = 0.3 * np.random.randn(nrows, ncols) t = Orange.data.Table(np.r_[x1 + 2, x1 - 2], None) x2 = 0.3 * np.random.randn(nrows, ncols) x2 = np.r_[x2 + 2, x2 - 2] learn = OneClassSVMLearner(kernel="rbf", nu=0.1, gamma=0.1) clf = learn(t) z = clf(x2) self.assertTrue(np.sum(z == 1) > 0.7 * len(z))
def detect_outliers(self): if self.outlier_method == self.OneClassSVM: learner = OneClassSVMLearner( gamma=self.gamma, nu=self.nu / 100, preprocessors=SklLearner.preprocessors) else: learner = EllipticEnvelopeLearner( support_fraction=self.support_fraction if self.empirical_covariance else None, contamination=self.cont / 100.) model = learner(self.data) y_pred = model(self.data) self.add_metas(model) return np.array(y_pred)
def detect_outliers(self): if self.outlier_method == self.OneClassSVM: learner = OneClassSVMLearner( gamma=self.gamma, nu=self.nu / 100, preprocessors=SklLearner.preprocessors) else: learner = EllipticEnvelopeLearner( support_fraction=self.support_fraction if self.empirical_covariance else None, contamination=self.cont / 100.) data = self.data.transform(Domain(self.data.domain.attributes)) model = learner(data) y_pred = model(data) amended_data = self.amended_data(model) return np.array(y_pred), amended_data
def test_OneClassSVM_ignores_y(self): domain = Domain((ContinuousVariable("x1"), ContinuousVariable("x2")), class_vars=(ContinuousVariable("y1"), ContinuousVariable("y2"))) X = np.random.random((40, 2)) Y = np.random.random((40, 2)) table = Table(domain, X, Y) classless_table = table.transform(Domain(table.domain.attributes)) learner = OneClassSVMLearner() classless_model = learner(classless_table) model = learner(table) pred1 = classless_model(classless_table) pred2 = classless_model(table) pred3 = model(classless_table) pred4 = model(table) np.testing.assert_array_equal(pred1, pred2) np.testing.assert_array_equal(pred2, pred3) np.testing.assert_array_equal(pred3, pred4)
def test_OneClassSVM(self): np.random.seed(42) domain = Domain((ContinuousVariable("c1"), ContinuousVariable("c2"))) X_in = 0.3 * np.random.randn(40, 2) X_out = np.random.uniform(low=-4, high=4, size=(20, 2)) X_all = Table(domain, np.r_[X_in + 2, X_in - 2, X_out]) n_true_in = len(X_in) * 2 n_true_out = len(X_out) nu = 0.2 learner = OneClassSVMLearner(nu=nu) cls = learner(X_all) y_pred = cls(X_all) n_pred_out_all = np.sum(y_pred.metas == 0) n_pred_in_true_in = np.sum(y_pred.metas[:n_true_in] == 1) n_pred_out_true_out = np.sum(y_pred.metas[-n_true_out:] == 0) self.assertLessEqual(n_pred_out_all, len(X_all) * nu) self.assertLess(np.absolute(n_pred_out_all - n_true_out), 2) self.assertLess(np.absolute(n_pred_in_true_in - n_true_in), 4) self.assertLess(np.absolute(n_pred_out_true_out - n_true_out), 3)
def test_reprs(self): lr = LogisticRegressionLearner(tol=0.0002) m = MajorityLearner() nb = NaiveBayesLearner() rf = RandomForestLearner(bootstrap=False, n_jobs=3) st = SimpleTreeLearner(seed=1, bootstrap=True) sm = SoftmaxRegressionLearner() svm = SVMLearner(shrinking=False) lsvm = LinearSVMLearner(tol=0.022, dual=False) nsvm = NuSVMLearner(tol=0.003, cache_size=190) osvm = OneClassSVMLearner(degree=2) tl = TreeLearner(max_depth=3, min_samples_split=1) knn = KNNLearner(n_neighbors=4) el = EllipticEnvelopeLearner(store_precision=False) srf = SimpleRandomForestLearner(n_estimators=20) learners = [lr, m, nb, rf, st, sm, svm, lsvm, nsvm, osvm, tl, knn, el, srf] for l in learners: repr_str = repr(l) new_l = eval(repr_str) self.assertEqual(repr(new_l), repr_str)