Ejemplo n.º 1
0
    def test_saved_selection(self):
        log_reg = RandomForestLearner(random_state=0)(self.heart)

        self.send_signal(self.widget.Inputs.data, self.heart)
        self.send_signal(self.widget.Inputs.model, log_reg)
        self.wait_until_finished()

        plot = self.widget.plot
        h = plot.layout().itemAt(0, plot.ITEM_COLUMN)
        pos = self.widget.view.mapFromScene(h.scenePos())
        QTest.mousePress(self.widget.view.viewport(), Qt.LeftButton,
                         pos=pos + QPoint(0, 10))
        mouseMove(self.widget.view.viewport(), Qt.LeftButton,
                  pos=pos + QPoint(300, 20))
        QTest.mouseRelease(self.widget.view.viewport(), Qt.LeftButton,
                           pos=pos + QPoint(300, 30))
        saved_selection = self.get_output(self.widget.Outputs.selected_data)
        self.assertIsNotNone(saved_selection)

        settings = self.widget.settingsHandler.pack_data(self.widget)
        w = self.create_widget(OWPermutationImportance,
                               stored_settings=settings)
        self.send_signal(w.Inputs.data, self.heart, widget=w)
        log_reg = RandomForestLearner(random_state=0)(self.heart)
        self.send_signal(w.Inputs.model, log_reg, widget=w)
        self.wait_until_finished(widget=w)
        selection = self.get_output(w.Outputs.selected_data, widget=w)
        np.testing.assert_array_equal(selection.X, saved_selection.X)
Ejemplo n.º 2
0
    def test_sparse_data(self):
        data = self.heart
        sparse_data = data.to_sparse()
        with sparse_data.unlocked():
            sparse_data.X = sp.csr_matrix(sparse_data.X)

        sparse_model = RandomForestLearner(random_state=0)(sparse_data)
        self.send_signal(self.widget.Inputs.data, sparse_data)
        self.send_signal(self.widget.Inputs.model, sparse_model)
        self.wait_until_finished()
        self.assertFalse(self.widget.Error.domain_transform_err.is_shown())
        self.assertFalse(self.widget.Error.unknown_err.is_shown())

        model = RandomForestLearner(random_state=0)(data)
        self.send_signal(self.widget.Inputs.data, sparse_data)
        self.send_signal(self.widget.Inputs.model, model)
        self.wait_until_finished()
        self.assertFalse(self.widget.Error.domain_transform_err.is_shown())
        self.assertFalse(self.widget.Error.unknown_err.is_shown())

        self.send_signal(self.widget.Inputs.data, data)
        self.send_signal(self.widget.Inputs.model, sparse_model)
        self.wait_until_finished()
        self.assertFalse(self.widget.Error.domain_transform_err.is_shown())
        self.assertFalse(self.widget.Error.unknown_err.is_shown())
Ejemplo n.º 3
0
    def test_saved_selection(self):
        self.send_signal(self.widget.Inputs.data, self.heart)
        rf_cls = RandomForestLearner(random_state=42)(self.heart)
        self.send_signal(self.widget.Inputs.model, rf_cls)
        self.wait_until_finished()
        plot = self.widget._violin_plot
        h = plot.layout().itemAt(0, plot.VIOLIN_COLUMN)
        pos = self.widget.view.mapFromScene(h.scenePos())
        QTest.mousePress(self.widget.view.viewport(),
                         Qt.LeftButton,
                         pos=pos + QPoint(250, 10))
        mouseMove(self.widget.view.viewport(),
                  Qt.LeftButton,
                  pos=pos + QPoint(300, 20))
        QTest.mouseRelease(self.widget.view.viewport(),
                           Qt.LeftButton,
                           pos=pos + QPoint(300, 30))
        saved_selection = self.get_output(self.widget.Outputs.selected_data)

        settings = self.widget.settingsHandler.pack_data(self.widget)
        widget = self.create_widget(OWExplainModel, stored_settings=settings)
        self.send_signal(widget.Inputs.data, self.heart, widget=widget)
        rf_cls = RandomForestLearner(random_state=42)(self.heart)
        self.send_signal(widget.Inputs.model, rf_cls, widget=widget)
        self.wait_until_finished(widget=widget)
        selection = self.get_output(widget.Outputs.selected_data,
                                    widget=widget)
        np.testing.assert_array_equal(selection.X, saved_selection.X)
Ejemplo n.º 4
0
 def test_classification_scorer(self):
     learner = RandomForestLearner()
     scores = learner.score_data(self.iris)
     self.assertEqual(scores.shape[1], len(self.iris.domain.attributes))
     self.assertNotEqual(sum(scores[0]), 0)
     self.assertEqual(['petal length', 'petal width'],
                      sorted([self.iris.domain.attributes[i].name
                              for i in np.argsort(scores[0])[-2:]]))
Ejemplo n.º 5
0
 def test_classification_scorer(self):
     learner = RandomForestLearner()
     scores = learner.score_data(self.iris)
     self.assertEqual(scores.shape[1], len(self.iris.domain.attributes))
     self.assertNotEqual(sum(scores[0]), 0)
     self.assertEqual(['petal length', 'petal width'],
                      sorted([self.iris.domain.attributes[i].name
                              for i in np.argsort(scores[0])[-2:]]))
Ejemplo n.º 6
0
 def test_classification_scorer(self):
     data = Table('iris')
     learner = RandomForestLearner()
     scores = learner.score_data(data)
     self.assertEqual(len(scores), len(data.domain.attributes))
     self.assertNotEqual(sum(scores), 0)
     self.assertEqual(['petal length', 'petal width'],
                      sorted([data.domain.attributes[i].name
                              for i in np.argsort(scores)[-2:]]))
Ejemplo n.º 7
0
 def test_scorer_feature(self):
     np.random.seed(42)
     data = Table('test4.tab')
     learner = RandomForestLearner()
     scores = learner.score_data(data)
     for i, attr in enumerate(data.domain.attributes):
         np.random.seed(42)
         score = learner.score_data(data, attr)
         self.assertEqual(score, scores[i])
Ejemplo n.º 8
0
 def test_scorer_feature(self):
     np.random.seed(42)
     data = Table('test4.tab')
     learner = RandomForestLearner()
     scores = learner.score_data(data)
     for i, attr in enumerate(data.domain.attributes):
         np.random.seed(42)
         score = learner.score_data(data, attr)
         np.testing.assert_array_almost_equal(score, scores[:, i])
Ejemplo n.º 9
0
 def test_scorer_feature(self):
     np.random.seed(42)
     data = Table(test_filename('datasets/test4.tab'))
     learner = RandomForestLearner()
     scores = learner.score_data(data)
     for i, attr in enumerate(data.domain.attributes):
         np.random.seed(42)
         score = learner.score_data(data, attr)
         np.testing.assert_array_almost_equal(score, scores[:, i])
Ejemplo n.º 10
0
 def test_classification_scorer(self):
     data = Table('iris')
     learner = RandomForestLearner()
     scores = learner.score_data(data)
     self.assertEqual(len(scores), len(data.domain.attributes))
     self.assertNotEqual(sum(scores), 0)
     self.assertEqual(['petal length', 'petal width'],
                      sorted([
                          data.domain.attributes[i].name
                          for i in np.argsort(scores)[-2:]
                      ]))
Ejemplo n.º 11
0
 def test_RandomForest(self):
     table = Table('iris')
     forest = RandomForestLearner()
     results = CrossValidation(table, [forest], k=10)
     ca = CA(results)
     self.assertGreater(ca, 0.9)
     self.assertLess(ca, 0.99)
Ejemplo n.º 12
0
 def test_predict_single_instance(self):
     table = Table('iris')
     forest = RandomForestLearner()
     c = forest(table)
     for ins in table:
         c(ins)
         val, prob = c(ins, c.ValueProbs)
Ejemplo n.º 13
0
 def test_missing_values(self):
     data = self.heart
     model = RandomForestLearner(random_state=0)(data)
     res = permutation_feature_importance(model, data, CA(), self.n_repeats)
     shape = len(data.domain.attributes), self.n_repeats
     self.assertEqual(res[0].shape, shape)
     self.assertEqual(res[1], [a.name for a in data.domain.attributes])
Ejemplo n.º 14
0
    def test_retain_data(self):
        data = self.heart
        orig_X = data.X.copy()

        model = RandomForestLearner(random_state=0)(data)
        permutation_feature_importance(model, data, CA(), self.n_repeats)
        np.testing.assert_array_equal(data.X, orig_X)
Ejemplo n.º 15
0
 def setUpClass(cls):
     super().setUpClass()
     cls.iris = Table("iris")
     cls.heart = Table("heart_disease")
     cls.housing = Table("housing")
     cls.rf_cls = RandomForestLearner(random_state=0)(cls.iris)
     cls.rf_reg = RandomForestRegressionLearner(random_state=0)(cls.housing)
Ejemplo n.º 16
0
 def test_RandomForest(self):
     forest = RandomForestLearner()
     cv = CrossValidation(k=10)
     results = cv(self.iris, [forest])
     ca = CA(results)
     self.assertGreater(ca, 0.9)
     self.assertLess(ca, 0.99)
Ejemplo n.º 17
0
 def setUpClass(cls):
     super().setUpClass()
     cls.heart = Table("heart_disease")
     cls.housing = Table("housing")
     kwargs = {"random_state": 0}
     cls.rf_cls = RandomForestLearner(**kwargs)(cls.heart)
     cls.rf_reg = RandomForestRegressionLearner(**kwargs)(cls.housing)
Ejemplo n.º 18
0
 def test_RandomForest(self):
     table = Orange.data.Table('iris')
     forest = RandomForestLearner()
     results = Orange.evaluation.CrossValidation(table, [forest], k=10)
     ca = Orange.evaluation.CA(results)
     self.assertGreater(ca, 0.9)
     self.assertLess(ca, 0.99)
Ejemplo n.º 19
0
 def test_get_classification_trees(self):
     n = 5
     forest = RandomForestLearner(n_estimators=n)
     model = forest(self.iris)
     self.assertEqual(len(model.trees), n)
     tree = model.trees[0]
     self.assertEqual(tree(self.iris[0]), 0)
Ejemplo n.º 20
0
 def test_input_learner_disconnect(self):
     """Check base learner after disconnecting learner on the input"""
     self.send_signal("Learner", RandomForestLearner())
     self.assertIsInstance(self.widget.base_estimator, RandomForestLearner)
     self.send_signal("Learner", None)
     self.assertEqual(self.widget.base_estimator,
                      self.widget.DEFAULT_BASE_ESTIMATOR)
 def test_inputs(self):
     self.send_signal(self.widget.Inputs.background_data, self.heart)
     self.send_signal(self.widget.Inputs.data, self.heart[:1])
     rf_cls = RandomForestLearner(random_state=42)(self.heart)
     self.send_signal(self.widget.Inputs.model, rf_cls)
     self.wait_until_finished()
     self.assertPlotNotEmpty(self.widget._stripe_plot)
    def test_raise_no_classifier_error(self):
        """
        Regression learner must raise error
        """
        w = self.widget

        # linear regression learner is regression - should raise
        learner = LinearRegressionLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertTrue(w.Error.no_classifier.is_shown())

        # make it empty to test if error disappear
        self.send_signal(w.Inputs.learner, None)
        self.assertFalse(w.Error.no_classifier.is_shown())

        # test with some other learners
        learner = LogisticRegressionLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())

        learner = TreeLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())

        learner = RandomForestLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())

        learner = SVMLearner()
        self.send_signal(w.Inputs.learner, learner)
        self.assertFalse(w.Error.no_classifier.is_shown())
Ejemplo n.º 23
0
    def test_tree_explainer(self):
        learner = RandomForestLearner()
        model = learner(self.iris)

        shap_values, _, sample_mask, base_value = compute_shap_values(
            model, self.iris, self.iris
        )

        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual(shap_values[0].shape, self.iris.X.shape)
        self.assertTupleEqual(shap_values[1].shape, self.iris.X.shape)
        self.assertTupleEqual(shap_values[2].shape, self.iris.X.shape)
        self.assertIsInstance(shap_values, list)
        self.assertIsInstance(shap_values[0], np.ndarray)
        # number of cases to short to be subsampled
        self.assertEqual(len(shap_values[0]), sample_mask.sum())
        self.assertTupleEqual(
            (len(self.iris.domain.class_var.values),), base_value.shape
        )

        # test with small dataset
        shap_values, _, sample_mask, base_value = compute_shap_values(
            model, self.iris[:1], self.iris[:5]
        )
        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual(shap_values[0].shape, (1, 4))
        self.assertTupleEqual(shap_values[1].shape, (1, 4))
        self.assertTupleEqual(shap_values[2].shape, (1, 4))
Ejemplo n.º 24
0
 def test_inadequate_data(self):
     domain = Domain([],
                     class_vars=self.iris.domain.class_vars,
                     metas=self.iris.domain.attributes)
     data = self.iris.transform(domain)
     model = RandomForestLearner()(self.iris)
     args = model, data, self.n_repeats
     self.assertRaises(ValueError, permutation_feature_importance, *args)
Ejemplo n.º 25
0
    def test_class_not_predicted(self):
        """
        This is a case where one class is missing in the data. In this case
        skl learners output probabilities with only two classes. Orange models
        adds a zero probability for a missing class. In case where we work
        directly with skl learners - all tree-like learners it is added
        manually and tested here.
        """
        learner = RandomForestLearner()
        model = learner(self.iris[:100])

        shap_values, _, _, base_value = compute_shap_values(
            model, self.iris[:100], self.iris[:100]
        )

        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual((3,), base_value.shape)
        self.assertTrue(np.any(shap_values[0]))
        self.assertTrue(np.any(shap_values[1]))
        # missing class has all shap values 0
        self.assertFalse(np.any(shap_values[2]))

        # for one class SHAP returns only array (not list of arrays) -
        # must be handled
        learner = RandomForestLearner()
        model = learner(self.iris[:50])

        shap_values, _, _, base_value = compute_shap_values(
            model, self.iris[:100], self.iris[:100]
        )
        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual((3,), base_value.shape)

        # for Logistic regression Orange handle that - test anyway
        learner = LogisticRegressionLearner()
        model = learner(self.iris[:100])

        shap_values, _, _, base_value = compute_shap_values(
            model, self.iris[:100], self.iris[:100]
        )
        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual((3,), base_value.shape)
        self.assertNotEqual(shap_values[0].sum(), 0)
        self.assertNotEqual(shap_values[1].sum(), 0)
        # missing class has all shap values 0
        self.assertTrue(not np.any(shap_values[2].sum()))
Ejemplo n.º 26
0
    def test_wrap_score_cls(self):
        data = self.heart
        model = RandomForestLearner(random_state=0)(data)
        scorer = _wrap_score(CA(), _check_model(model, data))

        mocked_model = Mock(wraps=model)
        baseline_score = scorer(mocked_model, data)
        mocked_model.assert_called_once()
        self.assertAlmostEqual(baseline_score, 0.987, 3)
Ejemplo n.º 27
0
    def test_discrete_class(self):
        data = self.iris
        model = RandomForestLearner(random_state=0)(data)
        res = permutation_feature_importance(model, data, CA(), self.n_repeats)
        shape = len(data.domain.attributes), self.n_repeats
        self.assertEqual(res[0].shape, shape)
        self.assertEqual(res[1], [a.name for a in data.domain.attributes])

        mean = np.array([0.013333, 0, 0.322667, 0.474667])
        np.testing.assert_array_almost_equal(res[0].mean(axis=1), mean)
Ejemplo n.º 28
0
    def test_wrap_score_skl_predict_cls(self):
        data = self.iris
        model = RandomForestLearner(random_state=0)(data)
        scorer = _wrap_score(CA(), _check_model(model, data))

        mocked_model = Mock(wraps=model)
        baseline_score = scorer(mocked_model, data)
        mocked_model.assert_not_called()
        mocked_model.predict.assert_not_called()
        self.assertAlmostEqual(baseline_score, 0.993, 3)
Ejemplo n.º 29
0
    def test_sparse_data(self):
        sparse_data = self.heart.to_sparse()
        model = RandomForestLearner(random_state=0)(sparse_data)
        res = permutation_feature_importance(model, sparse_data,
                                             CA(), self.n_repeats)
        shape = len(sparse_data.domain.attributes), self.n_repeats
        self.assertEqual(res[0].shape, shape)
        self.assertEqual(
            res[1], [a.name for a in sparse_data.domain.attributes]
        )

        sparse_data = self.iris.to_sparse()
        model = RandomForestLearner(random_state=0)(sparse_data)
        res = permutation_feature_importance(model, sparse_data,
                                             CA(), self.n_repeats)
        shape = len(sparse_data.domain.attributes), self.n_repeats
        self.assertEqual(res[0].shape, shape)
        self.assertEqual(
            res[1], [a.name for a in sparse_data.domain.attributes]
        )
    def _fit(self):
        warnings.simplefilter(action='ignore')
        table = self.data.to_table()
        if self.params["eval_method"] == "relief":
            scores = ReliefF(table, n_iterations=100)
        elif self.params["eval_method"] == "fcbf":
            scores = FCBF(table)
        else:
            scores = RandomForestLearner().score_data(table)[0]

        for attr, score in zip(table.domain.attributes, scores):
            self.feature_importances[attr.name] = score
Ejemplo n.º 31
0
 def test_error_message_cleared_when_valid_learner_on_input(self):
     # Disconnecting an invalid learner should use the default one and hide
     # the error
     self.send_signal("Learner", KNNLearner())
     self.send_signal('Learner', None)
     self.assertFalse(self.widget.Error.no_weight_support.is_shown(),
                      'Error message was not hidden on input disconnect')
     # Connecting a valid learner should also reset the error message
     self.send_signal("Learner", KNNLearner())
     self.send_signal('Learner', RandomForestLearner())
     self.assertFalse(
         self.widget.Error.no_weight_support.is_shown(),
         'Error message was not hidden when a valid learner appeared on '
         'input')
Ejemplo n.º 32
0
    def test_input_too_many_instances(self):
        titanic = Table("titanic")
        model = RandomForestLearner(random_state=0)(titanic)
        self.send_signal(self.widget.Inputs.background_data, titanic)
        self.send_signal(self.widget.Inputs.data, titanic)
        self.send_signal(self.widget.Inputs.model, model)
        self.wait_until_finished()
        self.assertTrue(self.widget.Information.data_sampled.is_shown())

        output = self.get_output(self.widget.Outputs.scores)
        self.assertEqual(len(output), 1000)

        self.send_signal(self.widget.Inputs.data, None)
        self.assertFalse(self.widget.Information.data_sampled.is_shown())
Ejemplo n.º 33
0
    def test_shap_random_seed(self):
        model = LogisticRegressionLearner()(self.iris)

        shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris)
        shap_values_new, _, _, _ = compute_shap_values(model, self.iris,
                                                       self.iris)
        np.testing.assert_array_equal(shap_values, shap_values_new)

        model = RandomForestLearner()(self.iris)

        shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris)
        shap_values_new, _, _, _ = compute_shap_values(model, self.iris,
                                                       self.iris)
        np.testing.assert_array_equal(shap_values, shap_values_new)