def test_retain_data(self): data = self.heart orig_X = data.X.copy() model = RandomForestLearner(random_state=0)(data) permutation_feature_importance(model, data, CA(), self.n_repeats) np.testing.assert_array_equal(data.X, orig_X)
def test_missing_values(self): data = self.heart model = RandomForestLearner(random_state=0)(data) res = permutation_feature_importance(model, data, CA(), self.n_repeats) shape = len(data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes])
def test_compare_to_skl(self): data = self.iris model = LogisticRegressionLearner()(data) res1 = _permutation_feature_importance_skl(model, data, self.n_repeats) res2 = permutation_feature_importance(model, data, CA(), self.n_repeats) np.testing.assert_array_equal(res1, res2[0])
def test_continuous_class(self): data = self.housing model = RandomForestRegressionLearner(random_state=0)(data) res = permutation_feature_importance(model, data, MSE(), self.n_repeats) shape = len(data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes])
def test_discrete_class(self): data = self.iris model = RandomForestLearner(random_state=0)(data) res = permutation_feature_importance(model, data, CA(), self.n_repeats) shape = len(data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) mean = np.array([0.013333, 0, 0.322667, 0.474667]) np.testing.assert_array_almost_equal(res[0].mean(axis=1), mean)
def test_sparse_data(self): sparse_data = self.heart.to_sparse() model = RandomForestLearner(random_state=0)(sparse_data) res = permutation_feature_importance(model, sparse_data, CA(), self.n_repeats) shape = len(sparse_data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual( res[1], [a.name for a in sparse_data.domain.attributes] ) sparse_data = self.iris.to_sparse() model = RandomForestLearner(random_state=0)(sparse_data) res = permutation_feature_importance(model, sparse_data, CA(), self.n_repeats) shape = len(sparse_data.domain.attributes), self.n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual( res[1], [a.name for a in sparse_data.domain.attributes] )
def test_orange_models(self): data = self.heart n_repeats = self.n_repeats model = NaiveBayesLearner()(data) res = permutation_feature_importance(model, data, CA(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) data = self.iris model = TreeLearner()(data) res = permutation_feature_importance(model, data, AUC(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, shape) self.assertEqual(res[1], [a.name for a in data.domain.attributes]) data = self.housing model = TreeRegressionLearner()(data) res = permutation_feature_importance(model, data, MSE(), n_repeats) shape = len(data.domain.attributes), n_repeats self.assertEqual(res[0].shape, (shape)) self.assertEqual(res[1], [a.name for a in data.domain.attributes])
def run(data: Table, model: Model, score_class: Type[Score], n_repeats: int, state: TaskState) -> Optional[Results]: if not data or not model or not score_class: return None def callback(i: float, status=""): state.set_progress_value(i * 100) if status: state.set_status(status) if state.is_interruption_requested(): raise Exception importance, names = permutation_feature_importance( model, data, score_class(), n_repeats, callback) mask = np.ones(importance.shape[0], dtype=bool) return Results(x=importance, names=names, mask=mask)
def test_auc_orange_model(self): data = self.titanic model = NaiveBayesLearner()(data) res = permutation_feature_importance(model, data, AUC(), self.n_repeats) self.assertAlmostEqual(res[0].mean(), 0.044, 3)
def test_auc_missing_values(self): data = self.heart model = RandomForestLearner(random_state=0)(data) res = permutation_feature_importance(model, data, AUC(), self.n_repeats) self.assertAlmostEqual(res[0].mean(), 0.013, 3)