def test_transform(self): detector = EllipticEnvelopeLearner() detect = detector(self.iris) pred = detect(self.iris) self.assert_table_appended_outlier(self.iris, pred, offset=2) self.assertEqual(pred.domain.metas[-1].name, "Mahalanobis") self.assertIsNotNone(pred.domain.metas[-1].compute_value) pred2 = self.iris.transform(pred.domain) self.assert_table_equal(pred, pred2)
def setUpClass(cls): np.random.seed(42) domain = Domain((ContinuousVariable("c1"), ContinuousVariable("c2"))) cls.n_true_in, cls.n_true_out = 80, 20 cls.X_in = 0.3 * np.random.randn(cls.n_true_in, 2) cls.X_out = np.random.uniform(low=-4, high=4, size=(cls.n_true_out, 2)) cls.X_all = Table(domain, np.r_[cls.X_in, cls.X_out]) cls.cont = cls.n_true_out / (cls.n_true_in + cls.n_true_out) cls.learner = EllipticEnvelopeLearner(contamination=cls.cont) cls.model = cls.learner(cls.X_all)
def setUp(self): np.random.seed(42) domain = Domain((ContinuousVariable("c1"), ContinuousVariable("c2"))) self.n_true_in, self.n_true_out = 80, 20 self.X_in = 0.3 * np.random.randn(self.n_true_in, 2) self.X_out = np.random.uniform(low=-4, high=4, size=(self.n_true_out, 2)) self.X_all = Table(domain, np.r_[self.X_in, self.X_out]) self.cont = self.n_true_out / (self.n_true_in + self.n_true_out) self.learner = EllipticEnvelopeLearner(contamination=self.cont) self.model = self.learner(self.X_all)
def detect_outliers(self): if self.outlier_method == self.OneClassSVM: learner = OneClassSVMLearner( gamma=self.gamma, nu=self.nu / 100, preprocessors=SklLearner.preprocessors) else: learner = EllipticEnvelopeLearner( support_fraction=self.support_fraction if self.empirical_covariance else None, contamination=self.cont / 100.) model = learner(self.data) y_pred = model(self.data) self.add_metas(model) return np.array(y_pred)
def detect_outliers(self): if self.outlier_method == self.OneClassSVM: learner = OneClassSVMLearner( gamma=self.gamma, nu=self.nu / 100, preprocessors=SklLearner.preprocessors) else: learner = EllipticEnvelopeLearner( support_fraction=self.support_fraction if self.empirical_covariance else None, contamination=self.cont / 100.) data = self.data.transform(Domain(self.data.domain.attributes)) model = learner(data) y_pred = model(data) amended_data = self.amended_data(model) return np.array(y_pred), amended_data
def test_EllipticEnvelope_ignores_y(self): domain = Domain((ContinuousVariable("x1"), ContinuousVariable("x2")), (ContinuousVariable("y1"), ContinuousVariable("y2"))) X = np.random.random((40, 2)) Y = np.random.random((40, 2)) table = Table(domain, X, Y) classless_table = table.transform(Domain(table.domain.attributes)) learner = EllipticEnvelopeLearner() classless_model = learner(classless_table) model = learner(table) pred1 = classless_model(classless_table) pred2 = classless_model(table) pred3 = model(classless_table) pred4 = model(table) np.testing.assert_array_equal(pred1.metas, pred2.metas) np.testing.assert_array_equal(pred2.metas, pred3.metas) np.testing.assert_array_equal(pred3.metas, pred4.metas)
def test_reprs(self): lr = LogisticRegressionLearner(tol=0.0002) m = MajorityLearner() nb = NaiveBayesLearner() rf = RandomForestLearner(bootstrap=False, n_jobs=3) st = SimpleTreeLearner(seed=1, bootstrap=True) sm = SoftmaxRegressionLearner() svm = SVMLearner(shrinking=False) lsvm = LinearSVMLearner(tol=0.022, dual=False) nsvm = NuSVMLearner(tol=0.003, cache_size=190) osvm = OneClassSVMLearner(degree=2) tl = TreeLearner(max_depth=3, min_samples_split=1) knn = KNNLearner(n_neighbors=4) el = EllipticEnvelopeLearner(store_precision=False) srf = SimpleRandomForestLearner(n_estimators=20) learners = [lr, m, nb, rf, st, sm, svm, lsvm, nsvm, osvm, tl, knn, el, srf] for l in learners: repr_str = repr(l) new_l = eval(repr_str) self.assertEqual(repr(new_l), repr_str)