def test_augmented_data_classification(self):
        data = Table("iris")
        n_classes = len(data.domain.class_var.values)
        res = CrossValidation(data, [NaiveBayesLearner()], store_data=True)
        table = res.get_augmented_data(['Naive Bayes'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes), len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars))
        # +1 for class, +n_classes for probabilities, +1 for fold
        self.assertEqual(
            len(table.domain.metas), len(data.domain.metas) + 1 + n_classes + 1)
        self.assertEqual(
            table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values)

        res = CrossValidation(data, [NaiveBayesLearner(), MajorityLearner()], store_data=True)
        table = res.get_augmented_data(['Naive Bayes', 'Majority'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes), len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars))
        self.assertEqual(
            len(table.domain.metas), len(data.domain.metas) + 2*(n_classes+1) + 1)
        self.assertEqual(
            table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values)
        self.assertEqual(
            table.domain.metas[len(data.domain.metas)+1].values, data.domain.class_var.values)
예제 #2
0
    def test_split_by_model(self):
        learners = [NaiveBayesLearner(), MajorityLearner()]
        res = CrossValidation(self.random_table, learners, k=5, store_models=True)

        for i, result in enumerate(res.split_by_model()):
            self.assertIsInstance(result, Results)
            self.assertTrue((result.predicted == res.predicted[i]).all())
            self.assertTrue((result.probabilities == res.probabilities[i]).all())
            self.assertEqual(len(result.models), 5)
            for model in result.models:
                self.assertIsInstance(model, learners[i].__returns__)
예제 #3
0
def results_for_preview(data_name=""):
    from Orange.data import Table
    from Orange.evaluation import CrossValidation
    from Orange.classification import \
        LogisticRegressionLearner, SVMLearner, NuSVMLearner

    data = Table(data_name or "ionosphere")
    results = CrossValidation(
        data,
        [LogisticRegressionLearner(penalty="l2"),
         LogisticRegressionLearner(penalty="l1"),
         SVMLearner(probability=True),
         NuSVMLearner(probability=True)
        ],
        store_data=True
    )
    results.learner_names = ["LR l2", "LR l1", "SVM", "Nu SVM"]
    return results
예제 #4
0
    def test_SoftmaxRegressionPreprocessors(self):
        table = self.iris.copy()
        table.X[:, 2] = table.X[:, 2] * 0.001
        table.X[:, 3] = table.X[:, 3] * 0.001
        learners = [SoftmaxRegressionLearner(preprocessors=[]),
                    SoftmaxRegressionLearner()]
        results = CrossValidation(table, learners, k=10)
        ca = CA(results)

        self.assertLess(ca[0], ca[1])
예제 #5
0
    def test_report_widgets_evaluate(self):
        rep = OWReport.get_instance()
        data = Table("zoo")
        widgets = self.eval_widgets
        results = CrossValidation(data, [LogisticRegressionLearner()],
                                  store_data=True)
        results.learner_names = ["LR l2"]

        w = self.create_widget(OWTestLearners)
        set_learner = getattr(w, w.inputs[0].handler)
        set_train = getattr(w, w.inputs[1].handler)
        set_test = getattr(w, w.inputs[2].handler)
        set_learner(LogisticRegressionLearner(), 0)
        set_train(data)
        set_test(data)
        w.create_report_html()
        rep.make_report(w)

        self._create_report(widgets, rep, results)
예제 #6
0
    def test_report_widgets_evaluate(self):
        rep = OWReport.get_instance()
        data = Table("zoo")
        widgets = self.eval_widgets
        results = CrossValidation(data, [LogisticRegressionLearner()], store_data=True)
        results.learner_names = ["LR l2"]

        w = OWTestLearners()
        set_learner = getattr(w, w.inputs[0].handler)
        set_train = getattr(w, w.inputs[1].handler)
        set_test = getattr(w, w.inputs[2].handler)
        set_learner(LogisticRegressionLearner(), 0)
        set_train(data)
        set_test(data)
        w.create_report_html()
        rep.make_report(w)

        self.assertEqual(len(widgets) + 1, 4)
        self._create_report(widgets, rep, results)
    def test_10_fold_probs(self):
        learners = [MajorityLearner(), MajorityLearner()]

        results = CrossValidation(self.iris[30:130], learners, k=10)

        self.assertEqual(results.predicted.shape, (2, len(self.iris[30:130])))
        np.testing.assert_equal(results.predicted, np.ones((2, 100)))
        probs = results.probabilities
        self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all())
        self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all())
예제 #8
0
    def test_augmented_data_regression(self):
        data = Table("housing")
        res = CrossValidation(data, [LinearRegressionLearner(), ], store_data=True)
        table = res.get_augmented_data(['Linear Regression'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes), len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars))
        # +1 for class, +1 for fold
        self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + 1)

        res = CrossValidation(data, [LinearRegressionLearner(), MeanLearner()], store_data=True)
        table = res.get_augmented_data(['Linear Regression', 'Mean Learner'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes), len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars))
        # +2 for class, +1 for fold
        self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2 + 1)
예제 #9
0
 def test_preprocessors(self):
     table = Table('housing')
     learners = [
         LinearRegressionLearner(preprocessors=[]),
         LinearRegressionLearner()
     ]
     cv = CrossValidation(k=3)
     results = cv(table, learners)
     rmse = RMSE(results)
     self.assertLess(rmse[0], rmse[1])
예제 #10
0
 def tune_penalty(self, data):
     learner = LRRulesLearner(fit_intercept=self.fit_intercept,
                              intercept_scaling=self.intercept_scaling)
     penalties = [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10., 100.]
     scores = []
     for pen in penalties:
         learner.penalty = pen
         res = CrossValidation(data, [learner], k=5, random_state=1111)
         ll = LogLoss(res)
         scores.append(ll)
     return penalties[scores.index(min(scores))]
예제 #11
0
 def test_call_5(self):
     nrows, _ = self.random_table.X.shape
     res = CrossValidation(k=5, stratified=False)(self.random_table,
                                                  [NaiveBayesLearner()])
     y = self.random_table.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
     self.check_folds(res, 5, nrows)
 def test_adaboost_base_estimator(self):
     np.random.seed(0)
     stump_estimator = SklTreeLearner(max_depth=1)
     tree_estimator = SklTreeLearner()
     stump = SklAdaBoostClassificationLearner(
         base_estimator=stump_estimator, n_estimators=5)
     tree = SklAdaBoostClassificationLearner(base_estimator=tree_estimator,
                                             n_estimators=5)
     results = CrossValidation(self.iris, [stump, tree], k=4)
     ca = CA(results)
     self.assertLessEqual(ca[0], ca[1])
 def test_Regression(self):
     ridge = RidgeRegressionLearner()
     lasso = LassoRegressionLearner()
     elastic = ElasticNetLearner()
     elasticCV = ElasticNetCVLearner()
     mean = MeanLearner()
     learners = [ridge, lasso, elastic, elasticCV, mean]
     res = CrossValidation(self.housing, learners, k=2)
     rmse = RMSE(res)
     for i in range(len(learners) - 1):
         self.assertLess(rmse[i], rmse[-1])
    def test_miss_majority():
        x = np.zeros((50, 3))
        y = x[:, -1]
        x[-4:] = np.ones((4, 3))
        data = Table(x, y)
        cv = CrossValidation(k=3)
        res = cv(data, [MajorityLearner()])
        np.testing.assert_equal(res.predicted[0][:49], 0)

        x[-4:] = np.zeros((4, 3))
        res = cv(data, [MajorityLearner()])
        np.testing.assert_equal(res.predicted[0][:49], 0)
예제 #15
0
    def test_report_widgets_evaluate(self):
        rep = OWReport.get_instance()
        data = Table("zoo")
        widgets = self.eval_widgets
        results = CrossValidation(data,
                                  [LogisticRegressionLearner()],
                                  store_data=True,
                                  k=3)
        results.learner_names = ["LR l2"]

        w = self.create_widget(OWTestLearners)
        set_learner = getattr(w, w.Inputs.learner.handler)
        set_train = getattr(w, w.Inputs.train_data.handler)
        set_test = getattr(w, w.Inputs.test_data.handler)
        set_learner(LogisticRegressionLearner(), 0)
        set_train(data)
        set_test(data)
        w.create_report_html()
        rep.make_report(w)

        self._create_report(widgets, rep, results)
예제 #16
0
    def test_augmented_data_classification(self):
        data = Table("iris")
        n_classes = len(data.domain.class_var.values)
        res = CrossValidation(store_data=True)(data, [NaiveBayesLearner()])
        table = res.get_augmented_data(['Naive Bayes'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes),
                         len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars),
                         len(data.domain.class_vars))
        # +1 for class, +n_classes for probabilities, +1 for fold
        self.assertEqual(len(table.domain.metas),
                         len(data.domain.metas) + 1 + n_classes + 1)
        self.assertEqual(table.domain.metas[len(data.domain.metas)].values,
                         data.domain.class_var.values)

        res = CrossValidation(store_data=True)(
            data, [NaiveBayesLearner(), MajorityLearner()])
        table = res.get_augmented_data(['Naive Bayes', 'Majority'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes),
                         len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars),
                         len(data.domain.class_vars))
        self.assertEqual(len(table.domain.metas),
                         len(data.domain.metas) + 2 * (n_classes + 1) + 1)
        self.assertEqual(table.domain.metas[len(data.domain.metas)].values,
                         data.domain.class_var.values)
        self.assertEqual(table.domain.metas[len(data.domain.metas) + 1].values,
                         data.domain.class_var.values)
예제 #17
0
    def test_augmented_data_regression(self):
        data = Table("housing")
        res = CrossValidation(store_data=True)(data,
                                               [LinearRegressionLearner()])
        table = res.get_augmented_data(['Linear Regression'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes),
                         len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars),
                         len(data.domain.class_vars))
        # +1 for class, +1 for fold
        self.assertEqual(len(table.domain.metas),
                         len(data.domain.metas) + 1 + 1)

        res = CrossValidation(store_data=True)(
            data, [LinearRegressionLearner(),
                   MeanLearner()])
        table = res.get_augmented_data(['Linear Regression', 'Mean Learner'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes),
                         len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars),
                         len(data.domain.class_vars))
        # +2 for class, +1 for fold
        self.assertEqual(len(table.domain.metas),
                         len(data.domain.metas) + 2 + 1)
예제 #18
0
    def test_miss_majority():
        x = np.zeros((50, 3))
        y = x[:, -1]
        x[-4:] = np.ones((4, 3))
        data = Table.from_numpy(None, x, y)
        cv = CrossValidation(k=3)
        res = cv(data, [MajorityLearner()])
        np.testing.assert_equal(res.predicted[0][:49], 0)

        with data.unlocked(data.X):
            x[-4:] = np.zeros((4, 3))
        res = cv(data, [MajorityLearner()])
        np.testing.assert_equal(res.predicted[0][:49], 0)
예제 #19
0
    def test_cv_preprocess(self):
        def fun(x, a):
            return x[:, 0] + a

        imputer = Impute()
        learner = CurveFitLearner(fun, ["a"], ["CRIM"])
        cv = CrossValidation(k=2)
        results = cv(self.data, [learner])
        rmse1 = RMSE(results)[0]

        learner = CurveFitLearner(fun, ["a"], ["CRIM"])
        cv = CrossValidation(k=2)
        results = cv(self.data, [learner], preprocessor=imputer)
        rmse2 = RMSE(results)[0]

        learner = CurveFitLearner(fun, ["a"], ["CRIM"], preprocessors=imputer)
        cv = CrossValidation(k=2)
        results = cv(self.data, [learner])
        rmse3 = RMSE(results)[0]

        self.assertEqual(rmse1, rmse2)
        self.assertEqual(rmse2, rmse3)
예제 #20
0
 def test_results(self):
     nrows, ncols = 1000, 10
     t = random_data(nrows, ncols)
     res = CrossValidation(t, [NaiveBayesLearner()])
     y = t.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
     self.assertEqual(len(res.folds), 10)
     for i, fold in enumerate(res.folds):
         self.assertAlmostEqual(fold.start, i * 100, delta=3)
         self.assertAlmostEqual(fold.stop, (i + 1) * 100, delta=3)
예제 #21
0
 def test_SoftmaxRegressionPreprocessors(self):
     np.random.seed(42)
     table = Table('iris')
     new_attrs = (ContinuousVariable('c0'), ) + table.domain.attributes
     new_domain = Domain(new_attrs, table.domain.class_vars,
                         table.domain.metas)
     new_table = np.hstack((1000000 * np.random.random(
         (table.X.shape[0], 1)), table))
     table = table.from_numpy(new_domain, new_table)
     learners = [
         SoftmaxRegressionLearner(preprocessors=[]),
         SoftmaxRegressionLearner()
     ]
     results = CrossValidation(table, learners, k=3)
     ca = CA(results)
     self.assertTrue(ca[0] < ca[1])
예제 #22
0
    def test_report_widgets_evaluate(self):
        rep = OWReport.get_instance()
        data = Table("zoo")
        widgets = self.eval_widgets
        cv = CrossValidation(k=3, store_data=True)
        results = cv(data, [LogisticRegressionLearner()])
        results.learner_names = ["LR l2"]

        w = self.create_widget(OWTestAndScore)
        w.insert_learner(0, LogisticRegressionLearner())
        w.set_train_data(data)
        w.set_test_data(data)
        w.create_report_html()
        rep.make_report(w)

        self._create_report(widgets, rep, results)
예제 #23
0
def predict_wine_quality(table, n):
    #Make the continous varibles discrete
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=n)
    table = disc(table)
    #Define domain
    feature_vars = list(table.domain[1:])
    class_label_var = table.domain[0]
    wine_domain = Domain(feature_vars, class_label_var)
    table = Table.from_table(domain=wine_domain, source=table)
    #Construct learner and print results
    tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                           max_iter=4000)
    eval_results = CrossValidation(table, [tree_learner], k=10)
    print("Accuracy of cross validation: {:.3f}".format(
        scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
예제 #24
0
 def fit_storage(self, data):
     res = CrossValidation(data, self.learners, k=self.k)
     if data.domain.class_var.is_discrete:
         X = np.hstack(res.probabilities)
         use_prob = True
     else:
         X = res.predicted.T
         use_prob = False
     dom = Domain([
         ContinuousVariable('f{}'.format(i + 1)) for i in range(X.shape[1])
     ], data.domain.class_var)
     stacked_data = data.transform(dom)
     stacked_data.X = X
     stacked_data.Y = res.actual
     models = [l(data) for l in self.learners]
     aggregate_model = self.aggregate(stacked_data)
     return StackedModel(models, aggregate_model, use_prob=use_prob)
예제 #25
0
    def test_LogisticRegressionNormalization(self):
        np.random.seed(42)
        new_attrs = (ContinuousVariable('c0'), ) + self.iris.domain.attributes
        new_domain = Domain(new_attrs, self.iris.domain.class_vars,
                            self.iris.domain.metas)
        new_table = np.hstack((1000000 * np.random.random(
            (self.iris.X.shape[0], 1)), self.iris))
        table = self.iris.from_numpy(new_domain, new_table)
        lr = LogisticRegressionLearner(normalize=False)
        lr_norm = LogisticRegressionLearner(normalize=True)

        # check that normalization produces better results
        results = CrossValidation(table, [lr_norm, lr], k=3)
        ca = CA(results)
        self.assertGreater(ca[0], ca[1])

        # check that coefficients are properly scaled back to unnormalized data
        model = lr_norm(table)
        y = np.argmax(np.dot(table.X, model.coefficients.T) + model.intercept,
                      axis=1)
        np.testing.assert_array_equal(model(table), y)
예제 #26
0
 def test_tree(self):
     tree = SklTreeLearner()
     res = CrossValidation(self.iris, [tree], k=2)
     self.assertGreater(AUC(res)[0], 0.8)
     self.assertLess(AUC(res)[0], 1.)
예제 #27
0
 def test_LogisticRegression(self):
     learn = LogisticRegressionLearner()
     results = CrossValidation(self.voting, [learn], k=2)
     ca = CA(results)
     self.assertGreater(ca, 0.8)
     self.assertLess(ca, 1.0)
예제 #28
0
파일: test_sgd.py 프로젝트: qeryq/SFECOMLA
 def test_SGDClassification(self):
     sgd = SGDClassificationLearner()
     cv = CrossValidation(k=3)
     res = cv(self.iris, [sgd])
     self.assertGreater(AUC(res)[0], 0.8)
예제 #29
0
 def test_NuSVM(self):
     learn = NuSVMLearner(nu=0.01)
     cv = CrossValidation(k=2)
     res = cv(self.data, [learn])
     self.assertGreater(CA(res)[0], 0.9)
예제 #30
0
 def test_n_jobs_fitting(self):
     with patch(
             'Orange.evaluation.testing.CrossValidation._MIN_NJOBS_X_SIZE',
             1):
         CrossValidation(self.heart_disease, [DummyFitter()], k=5, n_jobs=5)
예제 #31
0
 def test_LinearSVM(self):
     learn = LinearSVMLearner()
     res = CrossValidation(self.data, [learn], k=2)
     self.assertGreater(CA(res)[0], 0.8)
     self.assertLess(CA(res)[0], 0.9)
        return grad

    d = Orange.data.Table("housing")
    d.X = np.hstack((d.X, np.ones((d.X.shape[0], 1))))
    d.shuffle()

    #    m = LinearRegressionLearner(lambda_=1.0)
    #    print(m(d)(d))

    #    # gradient check
    #    m = LinearRegressionLearner(lambda_=1.0)
    #    theta = np.random.randn(d.X.shape[1])
    #
    #    ga = m.cost_grad(theta, d.X, d.Y.ravel())[1]
    #    gm = numerical_grad(lambda t: m.cost_grad(t, d.X, d.Y.ravel())[0], theta)
    #
    #    print(np.sum((ga - gm)**2))

    for lambda_ in (0.01, 0.03, 0.1, 0.3, 1, 3):
        m = LinearRegressionLearner(lambda_=lambda_)
        scores = []
        res = CrossValidation(d, [m], 3, False)
        for tr_ind, te_ind in res.indices:
            s = np.mean((m(d[tr_ind])(d[te_ind]) - d[te_ind].Y.ravel()) ** 2)
            scores.append(s)
        print("{:5.2f} {}".format(lambda_, np.mean(scores)))

    m = LinearRegressionLearner(lambda_=0)
    print("test data", np.mean((m(d)(d) - d.Y.ravel()) ** 2))
    print("majority", np.mean((np.mean(d.Y.ravel()) - d.Y.ravel()) ** 2))
예제 #33
0
 def test_NN_classification(self):
     results = CrossValidation(self.iris, [NNClassificationLearner()], k=3)
     ca = CA(results)
     self.assertGreater(ca, 0.8)
     self.assertLess(ca, 0.99)
예제 #34
0
 def test_RandomForest(self):
     forest = RandomForestLearner()
     results = CrossValidation(self.iris, [forest], k=10)
     ca = CA(results)
     self.assertGreater(ca, 0.9)
     self.assertLess(ca, 0.99)
예제 #35
0
 def test_RandomForestRegression(self):
     forest = RandomForestRegressionLearner()
     results = CrossValidation(self.housing, [forest], k=10)
     _ = RMSE(results)
예제 #36
0
 def test_allnan_cv(self):
     # GH 2740
     data = Table(test_filename('datasets/lenses.tab'))
     cv = CrossValidation()
     results = cv(data, [self.learner])
     self.assertFalse(any(results.failed))