def test_augmented_data_classification(self): data = Table("iris") n_classes = len(data.domain.class_var.values) res = CrossValidation(data, [NaiveBayesLearner()], store_data=True) table = res.get_augmented_data(['Naive Bayes']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +1 for class, +n_classes for probabilities, +1 for fold self.assertEqual( len(table.domain.metas), len(data.domain.metas) + 1 + n_classes + 1) self.assertEqual( table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) res = CrossValidation(data, [NaiveBayesLearner(), MajorityLearner()], store_data=True) table = res.get_augmented_data(['Naive Bayes', 'Majority']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) self.assertEqual( len(table.domain.metas), len(data.domain.metas) + 2*(n_classes+1) + 1) self.assertEqual( table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) self.assertEqual( table.domain.metas[len(data.domain.metas)+1].values, data.domain.class_var.values)
def test_split_by_model(self): learners = [NaiveBayesLearner(), MajorityLearner()] res = CrossValidation(self.random_table, learners, k=5, store_models=True) for i, result in enumerate(res.split_by_model()): self.assertIsInstance(result, Results) self.assertTrue((result.predicted == res.predicted[i]).all()) self.assertTrue((result.probabilities == res.probabilities[i]).all()) self.assertEqual(len(result.models), 5) for model in result.models: self.assertIsInstance(model, learners[i].__returns__)
def results_for_preview(data_name=""): from Orange.data import Table from Orange.evaluation import CrossValidation from Orange.classification import \ LogisticRegressionLearner, SVMLearner, NuSVMLearner data = Table(data_name or "ionosphere") results = CrossValidation( data, [LogisticRegressionLearner(penalty="l2"), LogisticRegressionLearner(penalty="l1"), SVMLearner(probability=True), NuSVMLearner(probability=True) ], store_data=True ) results.learner_names = ["LR l2", "LR l1", "SVM", "Nu SVM"] return results
def test_SoftmaxRegressionPreprocessors(self): table = self.iris.copy() table.X[:, 2] = table.X[:, 2] * 0.001 table.X[:, 3] = table.X[:, 3] * 0.001 learners = [SoftmaxRegressionLearner(preprocessors=[]), SoftmaxRegressionLearner()] results = CrossValidation(table, learners, k=10) ca = CA(results) self.assertLess(ca[0], ca[1])
def test_report_widgets_evaluate(self): rep = OWReport.get_instance() data = Table("zoo") widgets = self.eval_widgets results = CrossValidation(data, [LogisticRegressionLearner()], store_data=True) results.learner_names = ["LR l2"] w = self.create_widget(OWTestLearners) set_learner = getattr(w, w.inputs[0].handler) set_train = getattr(w, w.inputs[1].handler) set_test = getattr(w, w.inputs[2].handler) set_learner(LogisticRegressionLearner(), 0) set_train(data) set_test(data) w.create_report_html() rep.make_report(w) self._create_report(widgets, rep, results)
def test_report_widgets_evaluate(self): rep = OWReport.get_instance() data = Table("zoo") widgets = self.eval_widgets results = CrossValidation(data, [LogisticRegressionLearner()], store_data=True) results.learner_names = ["LR l2"] w = OWTestLearners() set_learner = getattr(w, w.inputs[0].handler) set_train = getattr(w, w.inputs[1].handler) set_test = getattr(w, w.inputs[2].handler) set_learner(LogisticRegressionLearner(), 0) set_train(data) set_test(data) w.create_report_html() rep.make_report(w) self.assertEqual(len(widgets) + 1, 4) self._create_report(widgets, rep, results)
def test_10_fold_probs(self): learners = [MajorityLearner(), MajorityLearner()] results = CrossValidation(self.iris[30:130], learners, k=10) self.assertEqual(results.predicted.shape, (2, len(self.iris[30:130]))) np.testing.assert_equal(results.predicted, np.ones((2, 100))) probs = results.probabilities self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all()) self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all())
def test_augmented_data_regression(self): data = Table("housing") res = CrossValidation(data, [LinearRegressionLearner(), ], store_data=True) table = res.get_augmented_data(['Linear Regression']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +1 for class, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + 1) res = CrossValidation(data, [LinearRegressionLearner(), MeanLearner()], store_data=True) table = res.get_augmented_data(['Linear Regression', 'Mean Learner']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +2 for class, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2 + 1)
def test_preprocessors(self): table = Table('housing') learners = [ LinearRegressionLearner(preprocessors=[]), LinearRegressionLearner() ] cv = CrossValidation(k=3) results = cv(table, learners) rmse = RMSE(results) self.assertLess(rmse[0], rmse[1])
def tune_penalty(self, data): learner = LRRulesLearner(fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling) penalties = [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10., 100.] scores = [] for pen in penalties: learner.penalty = pen res = CrossValidation(data, [learner], k=5, random_state=1111) ll = LogLoss(res) scores.append(ll) return penalties[scores.index(min(scores))]
def test_call_5(self): nrows, _ = self.random_table.X.shape res = CrossValidation(k=5, stratified=False)(self.random_table, [NaiveBayesLearner()]) y = self.random_table.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows)) self.check_folds(res, 5, nrows)
def test_adaboost_base_estimator(self): np.random.seed(0) stump_estimator = SklTreeLearner(max_depth=1) tree_estimator = SklTreeLearner() stump = SklAdaBoostClassificationLearner( base_estimator=stump_estimator, n_estimators=5) tree = SklAdaBoostClassificationLearner(base_estimator=tree_estimator, n_estimators=5) results = CrossValidation(self.iris, [stump, tree], k=4) ca = CA(results) self.assertLessEqual(ca[0], ca[1])
def test_Regression(self): ridge = RidgeRegressionLearner() lasso = LassoRegressionLearner() elastic = ElasticNetLearner() elasticCV = ElasticNetCVLearner() mean = MeanLearner() learners = [ridge, lasso, elastic, elasticCV, mean] res = CrossValidation(self.housing, learners, k=2) rmse = RMSE(res) for i in range(len(learners) - 1): self.assertLess(rmse[i], rmse[-1])
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[-4:] = np.ones((4, 3)) data = Table(x, y) cv = CrossValidation(k=3) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[-4:] = np.zeros((4, 3)) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0)
def test_report_widgets_evaluate(self): rep = OWReport.get_instance() data = Table("zoo") widgets = self.eval_widgets results = CrossValidation(data, [LogisticRegressionLearner()], store_data=True, k=3) results.learner_names = ["LR l2"] w = self.create_widget(OWTestLearners) set_learner = getattr(w, w.Inputs.learner.handler) set_train = getattr(w, w.Inputs.train_data.handler) set_test = getattr(w, w.Inputs.test_data.handler) set_learner(LogisticRegressionLearner(), 0) set_train(data) set_test(data) w.create_report_html() rep.make_report(w) self._create_report(widgets, rep, results)
def test_augmented_data_classification(self): data = Table("iris") n_classes = len(data.domain.class_var.values) res = CrossValidation(store_data=True)(data, [NaiveBayesLearner()]) table = res.get_augmented_data(['Naive Bayes']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +1 for class, +n_classes for probabilities, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + n_classes + 1) self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) res = CrossValidation(store_data=True)( data, [NaiveBayesLearner(), MajorityLearner()]) table = res.get_augmented_data(['Naive Bayes', 'Majority']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2 * (n_classes + 1) + 1) self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) self.assertEqual(table.domain.metas[len(data.domain.metas) + 1].values, data.domain.class_var.values)
def test_augmented_data_regression(self): data = Table("housing") res = CrossValidation(store_data=True)(data, [LinearRegressionLearner()]) table = res.get_augmented_data(['Linear Regression']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +1 for class, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + 1) res = CrossValidation(store_data=True)( data, [LinearRegressionLearner(), MeanLearner()]) table = res.get_augmented_data(['Linear Regression', 'Mean Learner']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +2 for class, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2 + 1)
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[-4:] = np.ones((4, 3)) data = Table.from_numpy(None, x, y) cv = CrossValidation(k=3) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) with data.unlocked(data.X): x[-4:] = np.zeros((4, 3)) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0)
def test_cv_preprocess(self): def fun(x, a): return x[:, 0] + a imputer = Impute() learner = CurveFitLearner(fun, ["a"], ["CRIM"]) cv = CrossValidation(k=2) results = cv(self.data, [learner]) rmse1 = RMSE(results)[0] learner = CurveFitLearner(fun, ["a"], ["CRIM"]) cv = CrossValidation(k=2) results = cv(self.data, [learner], preprocessor=imputer) rmse2 = RMSE(results)[0] learner = CurveFitLearner(fun, ["a"], ["CRIM"], preprocessors=imputer) cv = CrossValidation(k=2) results = cv(self.data, [learner]) rmse3 = RMSE(results)[0] self.assertEqual(rmse1, rmse2) self.assertEqual(rmse2, rmse3)
def test_results(self): nrows, ncols = 1000, 10 t = random_data(nrows, ncols) res = CrossValidation(t, [NaiveBayesLearner()]) y = t.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows)) self.assertEqual(len(res.folds), 10) for i, fold in enumerate(res.folds): self.assertAlmostEqual(fold.start, i * 100, delta=3) self.assertAlmostEqual(fold.stop, (i + 1) * 100, delta=3)
def test_SoftmaxRegressionPreprocessors(self): np.random.seed(42) table = Table('iris') new_attrs = (ContinuousVariable('c0'), ) + table.domain.attributes new_domain = Domain(new_attrs, table.domain.class_vars, table.domain.metas) new_table = np.hstack((1000000 * np.random.random( (table.X.shape[0], 1)), table)) table = table.from_numpy(new_domain, new_table) learners = [ SoftmaxRegressionLearner(preprocessors=[]), SoftmaxRegressionLearner() ] results = CrossValidation(table, learners, k=3) ca = CA(results) self.assertTrue(ca[0] < ca[1])
def test_report_widgets_evaluate(self): rep = OWReport.get_instance() data = Table("zoo") widgets = self.eval_widgets cv = CrossValidation(k=3, store_data=True) results = cv(data, [LogisticRegressionLearner()]) results.learner_names = ["LR l2"] w = self.create_widget(OWTestAndScore) w.insert_learner(0, LogisticRegressionLearner()) w.set_train_data(data) w.set_test_data(data) w.create_report_html() rep.make_report(w) self._create_report(widgets, rep, results)
def predict_wine_quality(table, n): #Make the continous varibles discrete disc = Discretize() disc.method = discretize.EqualWidth(n=n) table = disc(table) #Define domain feature_vars = list(table.domain[1:]) class_label_var = table.domain[0] wine_domain = Domain(feature_vars, class_label_var) table = Table.from_table(domain=wine_domain, source=table) #Construct learner and print results tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ), max_iter=4000) eval_results = CrossValidation(table, [tree_learner], k=10) print("Accuracy of cross validation: {:.3f}".format( scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
def fit_storage(self, data): res = CrossValidation(data, self.learners, k=self.k) if data.domain.class_var.is_discrete: X = np.hstack(res.probabilities) use_prob = True else: X = res.predicted.T use_prob = False dom = Domain([ ContinuousVariable('f{}'.format(i + 1)) for i in range(X.shape[1]) ], data.domain.class_var) stacked_data = data.transform(dom) stacked_data.X = X stacked_data.Y = res.actual models = [l(data) for l in self.learners] aggregate_model = self.aggregate(stacked_data) return StackedModel(models, aggregate_model, use_prob=use_prob)
def test_LogisticRegressionNormalization(self): np.random.seed(42) new_attrs = (ContinuousVariable('c0'), ) + self.iris.domain.attributes new_domain = Domain(new_attrs, self.iris.domain.class_vars, self.iris.domain.metas) new_table = np.hstack((1000000 * np.random.random( (self.iris.X.shape[0], 1)), self.iris)) table = self.iris.from_numpy(new_domain, new_table) lr = LogisticRegressionLearner(normalize=False) lr_norm = LogisticRegressionLearner(normalize=True) # check that normalization produces better results results = CrossValidation(table, [lr_norm, lr], k=3) ca = CA(results) self.assertGreater(ca[0], ca[1]) # check that coefficients are properly scaled back to unnormalized data model = lr_norm(table) y = np.argmax(np.dot(table.X, model.coefficients.T) + model.intercept, axis=1) np.testing.assert_array_equal(model(table), y)
def test_tree(self): tree = SklTreeLearner() res = CrossValidation(self.iris, [tree], k=2) self.assertGreater(AUC(res)[0], 0.8) self.assertLess(AUC(res)[0], 1.)
def test_LogisticRegression(self): learn = LogisticRegressionLearner() results = CrossValidation(self.voting, [learn], k=2) ca = CA(results) self.assertGreater(ca, 0.8) self.assertLess(ca, 1.0)
def test_SGDClassification(self): sgd = SGDClassificationLearner() cv = CrossValidation(k=3) res = cv(self.iris, [sgd]) self.assertGreater(AUC(res)[0], 0.8)
def test_NuSVM(self): learn = NuSVMLearner(nu=0.01) cv = CrossValidation(k=2) res = cv(self.data, [learn]) self.assertGreater(CA(res)[0], 0.9)
def test_n_jobs_fitting(self): with patch( 'Orange.evaluation.testing.CrossValidation._MIN_NJOBS_X_SIZE', 1): CrossValidation(self.heart_disease, [DummyFitter()], k=5, n_jobs=5)
def test_LinearSVM(self): learn = LinearSVMLearner() res = CrossValidation(self.data, [learn], k=2) self.assertGreater(CA(res)[0], 0.8) self.assertLess(CA(res)[0], 0.9)
return grad d = Orange.data.Table("housing") d.X = np.hstack((d.X, np.ones((d.X.shape[0], 1)))) d.shuffle() # m = LinearRegressionLearner(lambda_=1.0) # print(m(d)(d)) # # gradient check # m = LinearRegressionLearner(lambda_=1.0) # theta = np.random.randn(d.X.shape[1]) # # ga = m.cost_grad(theta, d.X, d.Y.ravel())[1] # gm = numerical_grad(lambda t: m.cost_grad(t, d.X, d.Y.ravel())[0], theta) # # print(np.sum((ga - gm)**2)) for lambda_ in (0.01, 0.03, 0.1, 0.3, 1, 3): m = LinearRegressionLearner(lambda_=lambda_) scores = [] res = CrossValidation(d, [m], 3, False) for tr_ind, te_ind in res.indices: s = np.mean((m(d[tr_ind])(d[te_ind]) - d[te_ind].Y.ravel()) ** 2) scores.append(s) print("{:5.2f} {}".format(lambda_, np.mean(scores))) m = LinearRegressionLearner(lambda_=0) print("test data", np.mean((m(d)(d) - d.Y.ravel()) ** 2)) print("majority", np.mean((np.mean(d.Y.ravel()) - d.Y.ravel()) ** 2))
def test_NN_classification(self): results = CrossValidation(self.iris, [NNClassificationLearner()], k=3) ca = CA(results) self.assertGreater(ca, 0.8) self.assertLess(ca, 0.99)
def test_RandomForest(self): forest = RandomForestLearner() results = CrossValidation(self.iris, [forest], k=10) ca = CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 0.99)
def test_RandomForestRegression(self): forest = RandomForestRegressionLearner() results = CrossValidation(self.housing, [forest], k=10) _ = RMSE(results)
def test_allnan_cv(self): # GH 2740 data = Table(test_filename('datasets/lenses.tab')) cv = CrossValidation() results = cv(data, [self.learner]) self.assertFalse(any(results.failed))