Ejemplo n.º 1
0
 def test_adaboost_base_estimator(self):
     np.random.seed(0)
     stump_estimator = TreeLearner(max_depth=1)
     tree_estimator = TreeLearner()
     stump = SklAdaBoostLearner(base_estimator=stump_estimator)
     tree = SklAdaBoostLearner(base_estimator=tree_estimator)
     results = CrossValidation(self.iris, [stump, tree], k=4)
     ca = CA(results)
     self.assertLess(ca[0], ca[1])
    def test_set_learner(self):
        """
        Test if learner is set correctly
        """
        w = self.widget

        learner = TreeLearner()

        # check if empty
        self.assertEqual(w.learner_other, None)
        self.assertTrue(isinstance(w.learner, LogisticRegressionLearner))
        self.assertTrue(isinstance(w.learner, w.LEARNER))
        self.assertEqual(type(self.get_output(w.Outputs.learner)),
                         type(LogisticRegressionLearner()))

        self.send_signal(w.Inputs.learner, learner)

        # check if learners set correctly
        self.assertEqual(w.learner_other, learner)
        self.assertEqual(type(w.learner), type(learner))
        self.assertEqual(type(self.get_output(w.Outputs.learner)),
                         type(learner))

        # after learner is removed there should be LEARNER used
        self.send_signal(w.Inputs.learner, None)
        self.assertEqual(w.learner_other, None)
        self.assertTrue(isinstance(w.learner, LogisticRegressionLearner))
        self.assertTrue(isinstance(w.learner, w.LEARNER))
        self.assertEqual(type(self.get_output(w.Outputs.learner)),
                         type(LogisticRegressionLearner()))

        # set it again just in case something goes wrong
        learner = RandomForestLearner()
        self.send_signal(w.Inputs.learner, learner)

        self.assertEqual(w.learner_other, learner)
        self.assertEqual(type(w.learner), type(learner))
        self.assertEqual(type(self.get_output(w.Outputs.learner)),
                         type(learner))

        # change learner this time not from None
        learner = TreeLearner()
        self.send_signal(w.Inputs.learner, learner)

        self.assertEqual(w.learner_other, learner)
        self.assertEqual(type(w.learner), type(learner))
        self.assertEqual(type(self.get_output(w.Outputs.learner)),
                         type(learner))
Ejemplo n.º 3
0
    def add_main_layout(self):
        box = gui.widgetBox(self.controlArea, "Parameters")
        self.base_estimator = TreeLearner()
        self.base_label = gui.label(
            box, self, "Base estimator: " + self.base_estimator.name)

        gui.spin(box,
                 self,
                 "n_estimators",
                 1,
                 100,
                 label="Number of estimators:",
                 alignment=Qt.AlignRight,
                 callback=self.settings_changed)
        gui.doubleSpin(box,
                       self,
                       "learning_rate",
                       1e-5,
                       1.0,
                       1e-5,
                       label="Learning rate:",
                       decimals=5,
                       alignment=Qt.AlignRight,
                       controlWidth=90,
                       callback=self.settings_changed)
        self.add_specific_parameters(box)
Ejemplo n.º 4
0
    def test_uses_preprocessors(self):
        iris = Table('iris')
        mock_preprocessor = Mock(return_value=iris)

        tree = TreeLearner(preprocessors=[mock_preprocessor])
        tree(iris)
        mock_preprocessor.assert_called_with(iris)
    def test_raise_no_classifier_error(self):
        """
        Regression learner must raise error
        """
        w = self.widget

        # linear regression learner is regression - should raise
        learner = LinearRegressionLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertTrue(w.Error.no_classifier.is_shown())

        # make it empty to test if error disappear
        self.send_signal(w.Inputs.learner, None)
        self.assertFalse(w.Error.no_classifier.is_shown())

        # test with some other learners
        learner = LogisticRegressionLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())

        learner = TreeLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())

        learner = RandomForestLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())

        learner = SVMLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())
Ejemplo n.º 6
0
 def test_missing_values(self):
     heart_disease = Table("heart_disease.tab")
     learner = TreeLearner()
     model = learner(heart_disease)
     shap_values, _, _, _ = compute_shap_values(model, heart_disease,
                                                heart_disease)
     self.assertEqual(len(shap_values), 2)
     self.assertTupleEqual(shap_values[0].shape, heart_disease.X.shape)
     self.assertTupleEqual(shap_values[1].shape, heart_disease.X.shape)
Ejemplo n.º 7
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        tree = TreeLearner()
        cls.model = tree(cls.data)
        cls.model.instances = cls.data

        cls.signal_name = "Tree"
        cls.signal_data = cls.model
Ejemplo n.º 8
0
 def setUpClass(cls):
     super().setUpClass()
     bayes = NaiveBayesLearner()
     tree = TreeLearner()
     iris = Table("iris")
     titanic = Table("titanic")
     common = dict(k=3, store_data=True)
     cls.results_1_iris = CrossValidation(iris, [bayes], **common)
     cls.results_2_iris = CrossValidation(iris, [bayes, tree], **common)
     cls.results_2_titanic = CrossValidation(titanic, [bayes, tree],
                                             **common)
 def test_input_learner(self):
     """Check if base learner properly changes with learner on the input"""
     max_depth = 2
     default_base_est = self.widget.base_estimator
     self.assertIsInstance(default_base_est, TreeLearner)
     self.assertIsNone(default_base_est.params.get("max_depth"))
     self.send_signal("Learner", TreeLearner(max_depth=max_depth))
     self.assertEqual(self.widget.base_estimator.params.get("max_depth"),
                      max_depth)
     self.widget.apply_button.button.click()
     output_base_est = self.get_output("Learner").params.get("base_estimator")
     self.assertEqual(output_base_est.max_depth, max_depth)
Ejemplo n.º 10
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        tree = TreeLearner()
        cls.model = tree(cls.data)
        cls.model.instances = cls.data

        cls.signal_name = "Tree"
        cls.signal_data = cls.model

        # Load a dataset that contains two variables with the same entropy
        data_same_entropy = Table(
            path.join(path.dirname(path.dirname(path.dirname(__file__))),
                      "tests", "datasets", "same_entropy.tab"))
        cls.data_same_entropy = tree(data_same_entropy)
        cls.data_same_entropy.instances = data_same_entropy

        vara = DiscreteVariable("aaa", values=("e", "f", "g"))
        root = DiscreteNode(vara, 0, np.array([42, 8]))
        root.subset = np.arange(50)

        varb = DiscreteVariable("bbb", values=tuple("ijkl"))
        child0 = MappedDiscreteNode(varb, 1, np.array([0, 1, 0, 0]), (38, 5))
        child0.subset = np.arange(16)
        child1 = Node(None, 0, (13, 3))
        child1.subset = np.arange(16, 30)
        varc = ContinuousVariable("ccc")
        child2 = NumericNode(varc, 2, 42, (78, 12))
        child2.subset = np.arange(30, 50)
        root.children = (child0, child1, child2)

        child00 = Node(None, 0, (15, 4))
        child00.subset = np.arange(10)
        child01 = Node(None, 0, (10, 5))
        child01.subset = np.arange(10, 16)
        child0.children = (child00, child01)

        child20 = Node(None, 0, (90, 4))
        child20.subset = np.arange(30, 35)
        child21 = Node(None, 0, (70, 9))
        child21.subset = np.arange(35, 50)
        child2.children = (child20, child21)

        domain = Domain([vara, varb, varc], ContinuousVariable("y"))
        t = [[i, j, k] for i in range(3) for j in range(4) for k in (40, 44)]
        x = np.array((t * 3)[:50])
        data = Table.from_numpy(domain, x, np.arange(len(x)))
        cls.tree = TreeModel(data, root)
Ejemplo n.º 11
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        tree = TreeLearner()
        cls.model = tree(cls.data)
        cls.model.instances = cls.data

        cls.signal_name = "Tree"
        cls.signal_data = cls.model

        # Load a dataset that contains two variables with the same entropy
        data_same_entropy = Table(
            path.join(path.dirname(path.dirname(path.dirname(__file__))),
                      "tests", "datasets", "same_entropy.tab"))
        cls.data_same_entropy = tree(data_same_entropy)
        cls.data_same_entropy.instances = data_same_entropy
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        bayes = NaiveBayesLearner()
        tree = TreeLearner()
        # `data` is defined in WidgetOutputsTestMixin, pylint: disable=no-member
        cls.iris = cls.data
        titanic = Table("titanic")
        cv = CrossValidation(k=3, store_data=True)
        cls.results_1_iris = cv(cls.iris, [bayes])
        cls.results_2_iris = cv(cls.iris, [bayes, tree])
        cls.results_2_titanic = cv(titanic, [bayes, tree])

        cls.signal_name = "Evaluation Results"
        cls.signal_data = cls.results_1_iris
        cls.same_input_output_domain = False
Ejemplo n.º 13
0
    def setUpClass(cls):
        super().setUpClass()
        WidgetOutputsTestMixin.init(cls)

        bayes = NaiveBayesLearner()
        tree = TreeLearner()
        iris = cls.data
        titanic = Table("titanic")
        common = dict(k=3, store_data=True)
        cls.results_1_iris = CrossValidation(iris, [bayes], **common)
        cls.results_2_iris = CrossValidation(iris, [bayes, tree], **common)
        cls.results_2_titanic = CrossValidation(titanic, [bayes, tree],
                                                **common)

        cls.signal_name = "Evaluation Results"
        cls.signal_data = cls.results_1_iris
        cls.same_input_output_domain = False
Ejemplo n.º 14
0
    def test_orange_models(self):
        data = self.heart
        n_repeats = self.n_repeats
        model = NaiveBayesLearner()(data)
        res = permutation_feature_importance(model, data, CA(), n_repeats)
        shape = len(data.domain.attributes), n_repeats
        self.assertEqual(res[0].shape, shape)
        self.assertEqual(res[1], [a.name for a in data.domain.attributes])

        data = self.iris
        model = TreeLearner()(data)
        res = permutation_feature_importance(model, data, AUC(), n_repeats)
        shape = len(data.domain.attributes), n_repeats
        self.assertEqual(res[0].shape, shape)
        self.assertEqual(res[1], [a.name for a in data.domain.attributes])

        data = self.housing
        model = TreeRegressionLearner()(data)
        res = permutation_feature_importance(model, data, MSE(), n_repeats)
        shape = len(data.domain.attributes), n_repeats
        self.assertEqual(res[0].shape, (shape))
        self.assertEqual(res[1], [a.name for a in data.domain.attributes])
Ejemplo n.º 15
0
    def test_set_learner(self):
        """
        Test if learner is set correctly
        """
        w = self.widget

        learner = TreeLearner()

        self.send_signal(w.Inputs.learner, learner)
        # check if learners set correctly
        self.assertEqual(w.learner_other, learner)
        self.assertEqual(type(w.learner), type(learner))
        self.assertEqual(type(self.get_output(w.Outputs.learner)),
                         type(learner))

        # after learner is removed there should be LEARNER used
        self.send_signal(w.Inputs.learner, None)
        self.assertEqual(w.learner_other, None)
        self.assertTrue(isinstance(w.learner, LogisticRegressionLearner))
        self.assertTrue(isinstance(w.learner, w.LEARNER))
        self.assertEqual(type(self.get_output(w.Outputs.learner)),
                         type(LogisticRegressionLearner()))
Ejemplo n.º 16
0
    def test_reprs(self):
        lr = LogisticRegressionLearner(tol=0.0002)
        m = MajorityLearner()
        nb = NaiveBayesLearner()
        rf = RandomForestLearner(bootstrap=False, n_jobs=3)
        st = SimpleTreeLearner(seed=1, bootstrap=True)
        sm = SoftmaxRegressionLearner()
        svm = SVMLearner(shrinking=False)
        lsvm = LinearSVMLearner(tol=0.022, dual=False)
        nsvm = NuSVMLearner(tol=0.003, cache_size=190)
        osvm = OneClassSVMLearner(degree=2)
        tl = TreeLearner(max_depth=3, min_samples_split=1)
        knn = KNNLearner(n_neighbors=4)
        el = EllipticEnvelopeLearner(store_precision=False)
        srf = SimpleRandomForestLearner(n_estimators=20)

        learners = [lr, m, nb, rf, st, sm, svm,
                    lsvm, nsvm, osvm, tl, knn, el, srf]

        for l in learners:
            repr_str = repr(l)
            new_l = eval(repr_str)
            self.assertEqual(repr(new_l), repr_str)
Ejemplo n.º 17
0
 def test_tree(self):
     tree = TreeLearner()
     res = CrossValidation(self.iris, [tree], k=2)
     self.assertGreater(AUC(res)[0], 0.8)
     self.assertLess(AUC(res)[0], 1.)
    def test_init_learner(self):
        """
        Test init
        """
        w = self.widget

        learner = TreeLearner()

        # check if empty
        self.assertTrue(isinstance(w.learner, LogisticRegressionLearner))
        self.assertTrue(isinstance(w.learner, w.LEARNER))
        self.assertTrue(
            reduce(lambda x, y: x or isinstance(y, w.default_preprocessor),
                   w.learner.preprocessors, False))

        self.send_signal(w.Inputs.learner, learner)

        # check if learners set correctly
        self.assertEqual(type(w.learner), type(learner))

        # after learner is removed there should be LEARNER used
        self.send_signal(w.Inputs.learner, None)
        self.assertTrue(isinstance(w.learner, LogisticRegressionLearner))
        self.assertTrue(isinstance(w.learner, w.LEARNER))
        self.assertTrue(
            reduce(lambda x, y: x or isinstance(y, w.default_preprocessor),
                   w.learner.preprocessors, False))

        # set it again just in case something goes wrong
        learner = RandomForestLearner()
        self.send_signal(w.Inputs.learner, learner)

        self.assertEqual(type(w.learner), type(learner))
        self.assertTrue(
            reduce(lambda x, y: x or isinstance(y, w.default_preprocessor),
                   w.learner.preprocessors, False))

        # change learner this time not from None
        learner = TreeLearner()
        self.send_signal(w.Inputs.learner, learner)

        self.assertEqual(type(w.learner), type(learner))
        self.assertTrue(
            reduce(lambda x, y: x or isinstance(y, w.default_preprocessor),
                   w.learner.preprocessors, False))

        # set other preprocessor
        preprocessor = Discretize
        # selected this preprocessor because know that not exist in LogReg
        self.send_signal(w.Inputs.preprocessor, preprocessor())

        self.assertEqual(type(w.learner), type(learner))
        self.assertTrue(
            reduce(lambda x, y: x or isinstance(y, w.default_preprocessor),
                   w.learner.preprocessors, False))
        self.assertTrue(
            reduce(lambda x, y: x or isinstance(y, preprocessor),
                   w.learner.preprocessors, False))

        # remove preprocessor
        self.send_signal(w.Inputs.preprocessor, None)
        self.assertEqual(type(w.learner), type(learner))
        self.assertTrue(
            reduce(lambda x, y: x or isinstance(y, w.default_preprocessor),
                   w.learner.preprocessors, False))

        self.assertFalse(
            reduce(lambda x, y: x or isinstance(y, preprocessor),
                   w.learner.preprocessors, False))
Ejemplo n.º 19
0
 def test_get_tree_classification(self):
     table = Table('iris')
     learn = TreeLearner()
     clf = learn(table)
     self.assertIsInstance(clf.tree, Tree)
Ejemplo n.º 20
0
from Orange.data import Domain, Table
from Orange.classification import LogisticRegressionLearner
from Orange.classification import NaiveBayesLearner
from Orange.classification import TreeLearner
from Orange.classification import RandomForestLearner
from Orange.classification import KNNLearner
from Orange.classification import SVMLearner

### create models ###

models = [
    LogisticRegressionLearner(),
    NaiveBayesLearner(),
    TreeLearner(),
    RandomForestLearner(),
    KNNLearner(),
    SVMLearner(),
]

### read train data ###

train = Table.from_file('train.csv')
# move `sex` from X to Y (from attributes/features to class_var/target)
domain = Domain(train.domain.attributes[1:], train.domain.attributes[0])
train = train.transform(domain)

print('\n=== train.X ===')
print(train.X)
print('\n=== train.Y ===')
print(train.Y)
Ejemplo n.º 21
0
class OWAdaBoostClassification(OWBaseLearner):
    name = "AdaBoost"
    description = "An ensemble meta-algorithm that combines weak learners " \
                  "and adapts to the 'hardness' of each training sample. "
    icon = "icons/AdaBoost.svg"
    priority = 80

    LEARNER = SklAdaBoostLearner

    inputs = [("Learner", LearnerClassification, "set_base_learner")]

    losses = ["SAMME", "SAMME.R"]

    n_estimators = Setting(50)
    learning_rate = Setting(1.)
    algorithm = Setting(0)

    DEFAULT_BASE_ESTIMATOR = TreeLearner()

    def add_main_layout(self):
        box = gui.widgetBox(self.controlArea, "Parameters")
        self.base_estimator = self.DEFAULT_BASE_ESTIMATOR
        self.base_label = gui.label(
            box, self, "Base estimator: " + self.base_estimator.name)

        self.n_estimators_spin = gui.spin(box,
                                          self,
                                          "n_estimators",
                                          1,
                                          100,
                                          label="Number of estimators:",
                                          alignment=Qt.AlignRight,
                                          callback=self.settings_changed)
        self.learning_rate_spin = gui.doubleSpin(
            box,
            self,
            "learning_rate",
            1e-5,
            1.0,
            1e-5,
            label="Learning rate:",
            decimals=5,
            alignment=Qt.AlignRight,
            controlWidth=90,
            callback=self.settings_changed)
        self.add_specific_parameters(box)

    def add_specific_parameters(self, box):
        self.algorithm_combo = gui.comboBox(box,
                                            self,
                                            "algorithm",
                                            label="Algorithm:",
                                            items=self.losses,
                                            orientation=Qt.Horizontal,
                                            callback=self.settings_changed)

    def create_learner(self):
        return self.LEARNER(base_estimator=self.base_estimator,
                            n_estimators=self.n_estimators,
                            learning_rate=self.learning_rate,
                            preprocessors=self.preprocessors,
                            algorithm=self.losses[self.algorithm])

    def set_base_learner(self, learner):
        self.base_estimator = learner if learner \
            else self.DEFAULT_BASE_ESTIMATOR
        self.base_label.setText("Base estimator: " + self.base_estimator.name)
        if self.auto_apply:
            self.apply()

    def get_learner_parameters(self):
        return (("Base estimator", self.base_estimator),
                ("Number of estimators", self.n_estimators),
                ("Algorithm", self.losses[self.algorithm].capitalize()))
Ejemplo n.º 22
0
 def test_classification(self):
     table = Table('iris')
     learn = TreeLearner()
     clf = learn(table)
     Z = clf(table)
     self.assertTrue(np.all(table.Y.flatten() == Z))