def test_kernel_explainer(self): learner = LogisticRegressionLearner() model = learner(self.iris) shap_values, _, sample_mask, base_value = compute_shap_values( model, self.iris, self.iris ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual(shap_values[0].shape, self.iris.X.shape) self.assertTupleEqual(shap_values[1].shape, self.iris.X.shape) self.assertTupleEqual(shap_values[2].shape, self.iris.X.shape) self.assertIsInstance(shap_values, list) self.assertIsInstance(shap_values[0], np.ndarray) # number of cases to short to be subsampled self.assertEqual(len(shap_values[0]), sample_mask.sum()) self.assertTupleEqual( (len(self.iris.domain.class_var.values),), base_value.shape ) # test with small dataset shap_values, _, sample_mask, base_value = compute_shap_values( model, self.iris[:1], self.iris[:5] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual(shap_values[0].shape, (1, 4)) self.assertTupleEqual(shap_values[1].shape, (1, 4)) self.assertTupleEqual(shap_values[2].shape, (1, 4))
def test_subsample(self): titanic = Table("titanic") learner = LogisticRegressionLearner() model = learner(titanic) shap_values, _, sample_mask, _ = compute_shap_values( model, titanic, titanic) self.assertTupleEqual((1000, 8), shap_values[0].shape) self.assertTupleEqual((2201, ), sample_mask.shape) # sample mask should match due to same random seed _, _, sample_mask_new, _ = compute_shap_values(model, titanic, titanic) np.testing.assert_array_equal(sample_mask, sample_mask_new)
def test_shap_random_seed(self): model = LogisticRegressionLearner()(self.iris) shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris) shap_values_new, _, _, _ = compute_shap_values(model, self.iris, self.iris) np.testing.assert_array_equal(shap_values, shap_values_new) model = RandomForestLearner()(self.iris) shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris) shap_values_new, _, _, _ = compute_shap_values(model, self.iris, self.iris) np.testing.assert_array_equal(shap_values, shap_values_new)
def test_kernel_explainer_sgd(self): learner = SGDClassificationLearner() model = learner(self.titanic) np.random.shuffle(self.titanic.X) shap_values, _, sample_mask, _ = compute_shap_values( model, self.titanic[:200], self.titanic[:200] )
def test_class_not_predicted(self): """ This is a case where one class is missing in the data. In this case skl learners output probabilities with only two classes. Orange models adds a zero probability for a missing class. In case where we work directly with skl learners - all tree-like learners it is added manually and tested here. """ learner = RandomForestLearner() model = learner(self.iris[:100]) shap_values, _, _, base_value = compute_shap_values( model, self.iris[:100], self.iris[:100] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual((3,), base_value.shape) self.assertTrue(np.any(shap_values[0])) self.assertTrue(np.any(shap_values[1])) # missing class has all shap values 0 self.assertFalse(np.any(shap_values[2])) # for one class SHAP returns only array (not list of arrays) - # must be handled learner = RandomForestLearner() model = learner(self.iris[:50]) shap_values, _, _, base_value = compute_shap_values( model, self.iris[:100], self.iris[:100] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual((3,), base_value.shape) # for Logistic regression Orange handle that - test anyway learner = LogisticRegressionLearner() model = learner(self.iris[:100]) shap_values, _, _, base_value = compute_shap_values( model, self.iris[:100], self.iris[:100] ) self.assertEqual(len(shap_values), 3) self.assertTupleEqual((3,), base_value.shape) self.assertNotEqual(shap_values[0].sum(), 0) self.assertNotEqual(shap_values[1].sum(), 0) # missing class has all shap values 0 self.assertTrue(not np.any(shap_values[2].sum()))
def test_missing_values(self): heart_disease = Table("heart_disease.tab") learner = TreeLearner() model = learner(heart_disease) shap_values, _, _, _ = compute_shap_values(model, heart_disease, heart_disease) self.assertEqual(len(shap_values), 2) self.assertTupleEqual(shap_values[0].shape, heart_disease.X.shape) self.assertTupleEqual(shap_values[1].shape, heart_disease.X.shape)
def test_no_class(self): iris_no_class = Table.from_table(Domain(self.iris.domain.attributes), self.iris) # tree model = RandomForestLearner()(self.iris) shap_values, _, sample_mask, _ = compute_shap_values( model, iris_no_class, iris_no_class) self.assertTupleEqual(self.iris.X.shape, shap_values[0].shape) self.assertTupleEqual((len(self.iris), ), sample_mask.shape) # kernel model = LogisticRegressionLearner()(self.iris) shap_values, _, sample_mask, _ = compute_shap_values( model, iris_no_class, iris_no_class) self.assertTupleEqual(self.iris.X.shape, shap_values[0].shape) self.assertTupleEqual((len(self.iris), ), sample_mask.shape)
def test_sparse(self): sparse_iris = self.iris.to_sparse() learner = LogisticRegressionLearner() model = learner(sparse_iris) shap_values, _, _, _ = compute_shap_values(model, sparse_iris, sparse_iris) self.assertTupleEqual(shap_values[0].shape, sparse_iris.X.shape) self.assertTupleEqual(shap_values[1].shape, sparse_iris.X.shape) self.assertTupleEqual(shap_values[2].shape, sparse_iris.X.shape) learner = RandomForestLearner() model = learner(sparse_iris) shap_values, _, _, _ = compute_shap_values(model, sparse_iris, sparse_iris) self.assertTupleEqual(shap_values[0].shape, sparse_iris.X.shape) self.assertTupleEqual(shap_values[1].shape, sparse_iris.X.shape) self.assertTupleEqual(shap_values[2].shape, sparse_iris.X.shape)
def test_all_regressors(self): """ Test explanation for all regressors """ for learner in test_regression.all_learners(): with self.subTest(learner.name): model = learner()(self.housing) shap_values, _, _, _ = compute_shap_values( model, self.housing, self.housing ) self.assertEqual(len(shap_values), 1) self.assertTupleEqual( self.housing.X.shape, shap_values[0].shape )
def test_all_classifiers(self): """ Test explanation for all classifiers """ for learner in LearnerAccessibility.all_learners(None): with self.subTest(learner.name): model = learner(self.iris) shap_values, _, _, _ = compute_shap_values( model, self.iris, self.iris ) self.assertEqual(len(shap_values), 3) for i in range(3): self.assertTupleEqual( self.iris.X.shape, shap_values[i].shape )
def test_explain_regression(self): learner = LinearRegressionLearner() model = learner(self.housing) shap_values, _, sample_mask, base_value = compute_shap_values( model, self.housing, self.housing) self.assertEqual(len(shap_values), 1) self.assertTupleEqual(shap_values[0].shape, self.housing.X.shape) self.assertIsInstance(shap_values, list) self.assertIsInstance(shap_values[0], np.ndarray) # number of cases to short to be subsampled self.assertEqual(len(shap_values[0]), sample_mask.sum()) self.assertTupleEqual((1, ), base_value.shape)
def test_get_shap_values_and_colors(self): model = LogisticRegressionLearner()(self.iris) shap_values1, transformed_data, mask1, _ = compute_shap_values( model, self.iris, self.iris) colors1 = compute_colors(transformed_data) shap_values2, attributes, mask2, colors2 = get_shap_values_and_colors( model, self.iris) np.testing.assert_array_equal(shap_values1, shap_values2) np.testing.assert_array_equal(colors1, colors2) self.assertListEqual( list(map(lambda x: x.name, transformed_data.domain.attributes)), attributes, ) np.testing.assert_array_equal(mask1, mask2)
def test_all_regressors(self): """ Test explanation for all regressors """ for learner in test_regression.all_learners(): with self.subTest(learner): if learner == CurveFitLearner: attr = self.housing.domain.attributes learner = CurveFitLearner( lambda x, a: np.sum(x[:, i] for i in range(len(attr))), [], [a.name for a in self.housing.domain.attributes]) else: learner = learner() model = learner(self.housing) shap_values, _, _, _ = compute_shap_values( model, self.housing, self.housing) self.assertEqual(len(shap_values), 1) self.assertTupleEqual(self.housing.X.shape, shap_values[0].shape)
def test_all_classifiers(self): """ Test explanation for all classifiers """ for learner in test_classification.all_learners(): with self.subTest(learner): if learner == ThresholdLearner: # ThresholdLearner require binary class continue kwargs = {} if "base_learner" in inspect.signature(learner).parameters: kwargs = {"base_learner": LogisticRegressionLearner()} model = learner(**kwargs)(self.iris) shap_values, _, _, _ = compute_shap_values( model, self.iris, self.iris) self.assertEqual(len(shap_values), 3) for i in range(3): self.assertTupleEqual(self.iris.X.shape, shap_values[i].shape)