def test_saved_selection(self): log_reg = RandomForestLearner(random_state=0)(self.heart) self.send_signal(self.widget.Inputs.data, self.heart) self.send_signal(self.widget.Inputs.model, log_reg) self.wait_until_finished() plot = self.widget.plot h = plot.layout().itemAt(0, plot.ITEM_COLUMN) pos = self.widget.view.mapFromScene(h.scenePos()) QTest.mousePress(self.widget.view.viewport(), Qt.LeftButton, pos=pos + QPoint(0, 10)) mouseMove(self.widget.view.viewport(), Qt.LeftButton, pos=pos + QPoint(300, 20)) QTest.mouseRelease(self.widget.view.viewport(), Qt.LeftButton, pos=pos + QPoint(300, 30)) saved_selection = self.get_output(self.widget.Outputs.selected_data) self.assertIsNotNone(saved_selection) settings = self.widget.settingsHandler.pack_data(self.widget) w = self.create_widget(OWPermutationImportance, stored_settings=settings) self.send_signal(w.Inputs.data, self.heart, widget=w) log_reg = RandomForestLearner(random_state=0)(self.heart) self.send_signal(w.Inputs.model, log_reg, widget=w) self.wait_until_finished(widget=w) selection = self.get_output(w.Outputs.selected_data, widget=w) np.testing.assert_array_equal(selection.X, saved_selection.X)
def test_sparse_data(self): data = self.heart sparse_data = data.to_sparse() with sparse_data.unlocked(): sparse_data.X = sp.csr_matrix(sparse_data.X) sparse_model = RandomForestLearner(random_state=0)(sparse_data) self.send_signal(self.widget.Inputs.data, sparse_data) self.send_signal(self.widget.Inputs.model, sparse_model) self.wait_until_finished() self.assertFalse(self.widget.Error.domain_transform_err.is_shown()) self.assertFalse(self.widget.Error.unknown_err.is_shown()) model = RandomForestLearner(random_state=0)(data) self.send_signal(self.widget.Inputs.data, sparse_data) self.send_signal(self.widget.Inputs.model, model) self.wait_until_finished() self.assertFalse(self.widget.Error.domain_transform_err.is_shown()) self.assertFalse(self.widget.Error.unknown_err.is_shown()) self.send_signal(self.widget.Inputs.data, data) self.send_signal(self.widget.Inputs.model, sparse_model) self.wait_until_finished() self.assertFalse(self.widget.Error.domain_transform_err.is_shown()) self.assertFalse(self.widget.Error.unknown_err.is_shown())
def test_saved_selection(self): self.send_signal(self.widget.Inputs.data, self.heart) rf_cls = RandomForestLearner(random_state=42)(self.heart) self.send_signal(self.widget.Inputs.model, rf_cls) self.wait_until_finished() plot = self.widget._violin_plot h = plot.layout().itemAt(0, plot.VIOLIN_COLUMN) pos = self.widget.view.mapFromScene(h.scenePos()) QTest.mousePress(self.widget.view.viewport(), Qt.LeftButton, pos=pos + QPoint(250, 10)) mouseMove(self.widget.view.viewport(), Qt.LeftButton, pos=pos + QPoint(300, 20)) QTest.mouseRelease(self.widget.view.viewport(), Qt.LeftButton, pos=pos + QPoint(300, 30)) saved_selection = self.get_output(self.widget.Outputs.selected_data) settings = self.widget.settingsHandler.pack_data(self.widget) widget = self.create_widget(OWExplainModel, stored_settings=settings) self.send_signal(widget.Inputs.data, self.heart, widget=widget) rf_cls = RandomForestLearner(random_state=42)(self.heart) self.send_signal(widget.Inputs.model, rf_cls, widget=widget) self.wait_until_finished(widget=widget) selection = self.get_output(widget.Outputs.selected_data, widget=widget) np.testing.assert_array_equal(selection.X, saved_selection.X)
def test_classification_scorer(self): learner = RandomForestLearner() scores = learner.score_data(self.iris) self.assertEqual(scores.shape[1], len(self.iris.domain.attributes)) self.assertNotEqual(sum(scores[0]), 0) self.assertEqual(['petal length', 'petal width'], sorted([self.iris.domain.attributes[i].name for i in np.argsort(scores[0])[-2:]]))
def test_classification_scorer(self): data = Table('iris') learner = RandomForestLearner() scores = learner.score_data(data) self.assertEqual(len(scores), len(data.domain.attributes)) self.assertNotEqual(sum(scores), 0) self.assertEqual(['petal length', 'petal width'], sorted([data.domain.attributes[i].name for i in np.argsort(scores)[-2:]]))
def test_scorer_feature(self): np.random.seed(42) data = Table('test4.tab') learner = RandomForestLearner() scores = learner.score_data(data) for i, attr in enumerate(data.domain.attributes): np.random.seed(42) score = learner.score_data(data, attr) self.assertEqual(score, scores[i])
def test_scorer_feature(self): np.random.seed(42) data = Table('test4.tab') learner = RandomForestLearner() scores = learner.score_data(data) for i, attr in enumerate(data.domain.attributes): np.random.seed(42) score = learner.score_data(data, attr) np.testing.assert_array_almost_equal(score, scores[:, i])
def test_scorer_feature(self): np.random.seed(42) data = Table(test_filename('datasets/test4.tab')) learner = RandomForestLearner() scores = learner.score_data(data) for i, attr in enumerate(data.domain.attributes): np.random.seed(42) score = learner.score_data(data, attr) np.testing.assert_array_almost_equal(score, scores[:, i])
def test_classification_scorer(self): data = Table('iris') learner = RandomForestLearner() scores = learner.score_data(data) self.assertEqual(len(scores), len(data.domain.attributes)) self.assertNotEqual(sum(scores), 0) self.assertEqual(['petal length', 'petal width'], sorted([ data.domain.attributes[i].name for i in np.argsort(scores)[-2:] ]))
def test_RandomForest(self): table = Table('iris') forest = RandomForestLearner() results = CrossValidation(table, [forest], k=10) ca = CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 0.99)
def test_predict_single_instance(self): table = Table('iris') forest = RandomForestLearner() c = forest(table) for ins in table: c(ins) val, prob = c(ins, c.ValueProbs)
def test_missing_values(self): data = self.heart model = RandomForestLearner(random_state=0)(data) res = permutation_feature_importance(model, data, CA(), self.n_repeats) shape = len(data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes])
def test_retain_data(self): data = self.heart orig_X = data.X.copy() model = RandomForestLearner(random_state=0)(data) permutation_feature_importance(model, data, CA(), self.n_repeats) np.testing.assert_array_equal(data.X, orig_X)
def setUpClass(cls): super().setUpClass() cls.iris = Table("iris") cls.heart = Table("heart_disease") cls.housing = Table("housing") cls.rf_cls = RandomForestLearner(random_state=0)(cls.iris) cls.rf_reg = RandomForestRegressionLearner(random_state=0)(cls.housing)
def test_RandomForest(self): forest = RandomForestLearner() cv = CrossValidation(k=10) results = cv(self.iris, [forest]) ca = CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 0.99)
def setUpClass(cls): super().setUpClass() cls.heart = Table("heart_disease") cls.housing = Table("housing") kwargs = {"random_state": 0} cls.rf_cls = RandomForestLearner(**kwargs)(cls.heart) cls.rf_reg = RandomForestRegressionLearner(**kwargs)(cls.housing)
def test_RandomForest(self): table = Orange.data.Table('iris') forest = RandomForestLearner() results = Orange.evaluation.CrossValidation(table, [forest], k=10) ca = Orange.evaluation.CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 0.99)
def test_get_classification_trees(self): n = 5 forest = RandomForestLearner(n_estimators=n) model = forest(self.iris) self.assertEqual(len(model.trees), n) tree = model.trees[0] self.assertEqual(tree(self.iris[0]), 0)
def test_input_learner_disconnect(self): """Check base learner after disconnecting learner on the input""" self.send_signal("Learner", RandomForestLearner()) self.assertIsInstance(self.widget.base_estimator, RandomForestLearner) self.send_signal("Learner", None) self.assertEqual(self.widget.base_estimator, self.widget.DEFAULT_BASE_ESTIMATOR)
def test_inputs(self): self.send_signal(self.widget.Inputs.background_data, self.heart) self.send_signal(self.widget.Inputs.data, self.heart[:1]) rf_cls = RandomForestLearner(random_state=42)(self.heart) self.send_signal(self.widget.Inputs.model, rf_cls) self.wait_until_finished() self.assertPlotNotEmpty(self.widget._stripe_plot)
def test_raise_no_classifier_error(self): """ Regression learner must raise error """ w = self.widget # linear regression learner is regression - should raise learner = LinearRegressionLearner() self.send_signal(w.Inputs.learner, learner) self.assertTrue(w.Error.no_classifier.is_shown()) # make it empty to test if error disappear self.send_signal(w.Inputs.learner, None) self.assertFalse(w.Error.no_classifier.is_shown()) # test with some other learners learner = LogisticRegressionLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = TreeLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = RandomForestLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown()) learner = SVMLearner() self.send_signal(w.Inputs.learner, learner) self.assertFalse(w.Error.no_classifier.is_shown())
def test_tree_explainer(self): learner = RandomForestLearner() model = learner(self.iris) shap_values, _, sample_mask, base_value = compute_shap_values( model, self.iris, self.iris ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual(shap_values[0].shape, self.iris.X.shape) self.assertTupleEqual(shap_values[1].shape, self.iris.X.shape) self.assertTupleEqual(shap_values[2].shape, self.iris.X.shape) self.assertIsInstance(shap_values, list) self.assertIsInstance(shap_values[0], np.ndarray) # number of cases to short to be subsampled self.assertEqual(len(shap_values[0]), sample_mask.sum()) self.assertTupleEqual( (len(self.iris.domain.class_var.values),), base_value.shape ) # test with small dataset shap_values, _, sample_mask, base_value = compute_shap_values( model, self.iris[:1], self.iris[:5] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual(shap_values[0].shape, (1, 4)) self.assertTupleEqual(shap_values[1].shape, (1, 4)) self.assertTupleEqual(shap_values[2].shape, (1, 4))
def test_inadequate_data(self): domain = Domain([], class_vars=self.iris.domain.class_vars, metas=self.iris.domain.attributes) data = self.iris.transform(domain) model = RandomForestLearner()(self.iris) args = model, data, self.n_repeats self.assertRaises(ValueError, permutation_feature_importance, *args)
def test_class_not_predicted(self): """ This is a case where one class is missing in the data. In this case skl learners output probabilities with only two classes. Orange models adds a zero probability for a missing class. In case where we work directly with skl learners - all tree-like learners it is added manually and tested here. """ learner = RandomForestLearner() model = learner(self.iris[:100]) shap_values, _, _, base_value = compute_shap_values( model, self.iris[:100], self.iris[:100] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual((3,), base_value.shape) self.assertTrue(np.any(shap_values[0])) self.assertTrue(np.any(shap_values[1])) # missing class has all shap values 0 self.assertFalse(np.any(shap_values[2])) # for one class SHAP returns only array (not list of arrays) - # must be handled learner = RandomForestLearner() model = learner(self.iris[:50]) shap_values, _, _, base_value = compute_shap_values( model, self.iris[:100], self.iris[:100] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual((3,), base_value.shape) # for Logistic regression Orange handle that - test anyway learner = LogisticRegressionLearner() model = learner(self.iris[:100]) shap_values, _, _, base_value = compute_shap_values( model, self.iris[:100], self.iris[:100] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual((3,), base_value.shape) self.assertNotEqual(shap_values[0].sum(), 0) self.assertNotEqual(shap_values[1].sum(), 0) # missing class has all shap values 0 self.assertTrue(not np.any(shap_values[2].sum()))
def test_wrap_score_cls(self): data = self.heart model = RandomForestLearner(random_state=0)(data) scorer = _wrap_score(CA(), _check_model(model, data)) mocked_model = Mock(wraps=model) baseline_score = scorer(mocked_model, data) mocked_model.assert_called_once() self.assertAlmostEqual(baseline_score, 0.987, 3)
def test_discrete_class(self): data = self.iris model = RandomForestLearner(random_state=0)(data) res = permutation_feature_importance(model, data, CA(), self.n_repeats) shape = len(data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) mean = np.array([0.013333, 0, 0.322667, 0.474667]) np.testing.assert_array_almost_equal(res[0].mean(axis=1), mean)
def test_wrap_score_skl_predict_cls(self): data = self.iris model = RandomForestLearner(random_state=0)(data) scorer = _wrap_score(CA(), _check_model(model, data)) mocked_model = Mock(wraps=model) baseline_score = scorer(mocked_model, data) mocked_model.assert_not_called() mocked_model.predict.assert_not_called() self.assertAlmostEqual(baseline_score, 0.993, 3)
def test_sparse_data(self): sparse_data = self.heart.to_sparse() model = RandomForestLearner(random_state=0)(sparse_data) res = permutation_feature_importance(model, sparse_data, CA(), self.n_repeats) shape = len(sparse_data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual( res[1], [a.name for a in sparse_data.domain.attributes] ) sparse_data = self.iris.to_sparse() model = RandomForestLearner(random_state=0)(sparse_data) res = permutation_feature_importance(model, sparse_data, CA(), self.n_repeats) shape = len(sparse_data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual( res[1], [a.name for a in sparse_data.domain.attributes] )
def _fit(self): warnings.simplefilter(action='ignore') table = self.data.to_table() if self.params["eval_method"] == "relief": scores = ReliefF(table, n_iterations=100) elif self.params["eval_method"] == "fcbf": scores = FCBF(table) else: scores = RandomForestLearner().score_data(table)[0] for attr, score in zip(table.domain.attributes, scores): self.feature_importances[attr.name] = score
def test_error_message_cleared_when_valid_learner_on_input(self): # Disconnecting an invalid learner should use the default one and hide # the error self.send_signal("Learner", KNNLearner()) self.send_signal('Learner', None) self.assertFalse(self.widget.Error.no_weight_support.is_shown(), 'Error message was not hidden on input disconnect') # Connecting a valid learner should also reset the error message self.send_signal("Learner", KNNLearner()) self.send_signal('Learner', RandomForestLearner()) self.assertFalse( self.widget.Error.no_weight_support.is_shown(), 'Error message was not hidden when a valid learner appeared on ' 'input')
def test_input_too_many_instances(self): titanic = Table("titanic") model = RandomForestLearner(random_state=0)(titanic) self.send_signal(self.widget.Inputs.background_data, titanic) self.send_signal(self.widget.Inputs.data, titanic) self.send_signal(self.widget.Inputs.model, model) self.wait_until_finished() self.assertTrue(self.widget.Information.data_sampled.is_shown()) output = self.get_output(self.widget.Outputs.scores) self.assertEqual(len(output), 1000) self.send_signal(self.widget.Inputs.data, None) self.assertFalse(self.widget.Information.data_sampled.is_shown())
def test_shap_random_seed(self): model = LogisticRegressionLearner()(self.iris) shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris) shap_values_new, _, _, _ = compute_shap_values(model, self.iris, self.iris) np.testing.assert_array_equal(shap_values, shap_values_new) model = RandomForestLearner()(self.iris) shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris) shap_values_new, _, _, _ = compute_shap_values(model, self.iris, self.iris) np.testing.assert_array_equal(shap_values, shap_values_new)