def test_kernel_explainer(self):
        learner = LogisticRegressionLearner()
        model = learner(self.iris)

        shap_values, _, sample_mask, base_value = compute_shap_values(
            model, self.iris, self.iris)

        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual(shap_values[0].shape, self.iris.X.shape)
        self.assertTupleEqual(shap_values[1].shape, self.iris.X.shape)
        self.assertTupleEqual(shap_values[2].shape, self.iris.X.shape)
        self.assertIsInstance(shap_values, list)
        self.assertIsInstance(shap_values[0], np.ndarray)
        # number of cases to short to be subsampled
        self.assertEqual(len(shap_values[0]), sample_mask.sum())
        self.assertTupleEqual((len(self.iris.domain.class_var.values), ),
                              base_value.shape)

        # test with small dataset
        shap_values, _, sample_mask, base_value = compute_shap_values(
            model, self.iris[:1], self.iris[:5])
        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual(shap_values[0].shape, (1, 4))
        self.assertTupleEqual(shap_values[1].shape, (1, 4))
        self.assertTupleEqual(shap_values[2].shape, (1, 4))
    def test_subsample(self):
        titanic = Table("titanic")
        learner = LogisticRegressionLearner()
        model = learner(titanic)

        shap_values, _, sample_mask, _ = compute_shap_values(
            model, titanic, titanic)
        self.assertTupleEqual((1000, 8), shap_values[0].shape)
        self.assertTupleEqual((2201, ), sample_mask.shape)

        # sample mask should match due to same random seed
        _, _, sample_mask_new, _ = compute_shap_values(model, titanic, titanic)
        np.testing.assert_array_equal(sample_mask, sample_mask_new)
    def test_shap_random_seed(self):
        model = LogisticRegressionLearner()(self.iris)

        shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris)
        shap_values_new, _, _, _ = compute_shap_values(model, self.iris,
                                                       self.iris)
        np.testing.assert_array_equal(shap_values, shap_values_new)

        model = RandomForestLearner()(self.iris)

        shap_values, _, _, _ = compute_shap_values(model, self.iris, self.iris)
        shap_values_new, _, _, _ = compute_shap_values(model, self.iris,
                                                       self.iris)
        np.testing.assert_array_equal(shap_values, shap_values_new)
    def test_kernel_explainer_sgd(self):
        learner = SGDClassificationLearner()
        model = learner(self.titanic)
        np.random.shuffle(self.titanic.X)

        shap_values, _, sample_mask, _ = compute_shap_values(
            model, self.titanic[:200], self.titanic[:200])
 def test_missing_values(self):
     heart_disease = Table("heart_disease.tab")
     learner = TreeLearner()
     model = learner(heart_disease)
     shap_values, _, _, _ = compute_shap_values(model, heart_disease,
                                                heart_disease)
     self.assertEqual(len(shap_values), 2)
     self.assertTupleEqual(shap_values[0].shape, heart_disease.X.shape)
     self.assertTupleEqual(shap_values[1].shape, heart_disease.X.shape)
    def test_no_class(self):
        iris_no_class = Table.from_table(Domain(self.iris.domain.attributes),
                                         self.iris)

        # tree
        model = RandomForestLearner()(self.iris)
        shap_values, _, sample_mask, _ = compute_shap_values(
            model, iris_no_class, iris_no_class)

        self.assertTupleEqual(self.iris.X.shape, shap_values[0].shape)
        self.assertTupleEqual((len(self.iris), ), sample_mask.shape)

        # kernel
        model = LogisticRegressionLearner()(self.iris)
        shap_values, _, sample_mask, _ = compute_shap_values(
            model, iris_no_class, iris_no_class)

        self.assertTupleEqual(self.iris.X.shape, shap_values[0].shape)
        self.assertTupleEqual((len(self.iris), ), sample_mask.shape)
    def test_sparse(self):
        sparse_iris = self.iris.to_sparse()
        learner = LogisticRegressionLearner()
        model = learner(sparse_iris)

        shap_values, _, _, _ = compute_shap_values(model, sparse_iris,
                                                   sparse_iris)
        self.assertTupleEqual(shap_values[0].shape, sparse_iris.X.shape)
        self.assertTupleEqual(shap_values[1].shape, sparse_iris.X.shape)
        self.assertTupleEqual(shap_values[2].shape, sparse_iris.X.shape)

        learner = RandomForestLearner()
        model = learner(sparse_iris)

        shap_values, _, _, _ = compute_shap_values(model, sparse_iris,
                                                   sparse_iris)
        self.assertTupleEqual(shap_values[0].shape, sparse_iris.X.shape)
        self.assertTupleEqual(shap_values[1].shape, sparse_iris.X.shape)
        self.assertTupleEqual(shap_values[2].shape, sparse_iris.X.shape)
 def test_all_regressors(self):
     """ Test explanation for all regressors """
     for learner in test_regression.all_learners():
         with self.subTest(learner.name):
             model = learner()(self.housing)
             shap_values, _, _, _ = compute_shap_values(
                 model, self.housing, self.housing)
             self.assertEqual(len(shap_values), 1)
             self.assertTupleEqual(self.housing.X.shape,
                                   shap_values[0].shape)
    def test_class_not_predicted(self):
        """
        This is a case where one class is missing in the data. In this case
        skl learners output probabilities with only two classes. Orange models
        adds a zero probability for a missing class. In case where we work
        directly with skl learners - all tree-like learners it is added
        manually and tested here.
        """
        learner = RandomForestLearner()
        model = learner(self.iris[:100])

        shap_values, _, _, base_value = compute_shap_values(
            model, self.iris[:100], self.iris[:100])

        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual((3, ), base_value.shape)
        self.assertTrue(np.any(shap_values[0]))
        self.assertTrue(np.any(shap_values[1]))
        # missing class has all shap values 0
        self.assertFalse(np.any(shap_values[2]))

        # for one class SHAP returns only array (not list of arrays) -
        # must be handled
        learner = RandomForestLearner()
        model = learner(self.iris[:50])

        shap_values, _, _, base_value = compute_shap_values(
            model, self.iris[:100], self.iris[:100])
        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual((3, ), base_value.shape)

        # for Logistic regression Orange handle that - test anyway
        learner = LogisticRegressionLearner()
        model = learner(self.iris[:100])

        shap_values, _, _, base_value = compute_shap_values(
            model, self.iris[:100], self.iris[:100])
        self.assertEqual(len(shap_values), 3)
        self.assertTupleEqual((3, ), base_value.shape)
        self.assertNotEqual(shap_values[0].sum(), 0)
        self.assertNotEqual(shap_values[1].sum(), 0)
        # missing class has all shap values 0
        self.assertTrue(not np.any(shap_values[2].sum()))
 def test_all_classifiers(self):
     """ Test explanation for all classifiers """
     for learner in LearnerAccessibility.all_learners(None):
         with self.subTest(learner.name):
             model = learner(self.iris)
             shap_values, _, _, _ = compute_shap_values(
                 model, self.iris, self.iris)
             self.assertEqual(len(shap_values), 3)
             for i in range(3):
                 self.assertTupleEqual(self.iris.X.shape,
                                       shap_values[i].shape)
    def test_explain_regression(self):
        learner = LinearRegressionLearner()
        model = learner(self.housing)

        shap_values, _, sample_mask, base_value = compute_shap_values(
            model, self.housing, self.housing)

        self.assertEqual(len(shap_values), 1)
        self.assertTupleEqual(shap_values[0].shape, self.housing.X.shape)
        self.assertIsInstance(shap_values, list)
        self.assertIsInstance(shap_values[0], np.ndarray)
        # number of cases to short to be subsampled
        self.assertEqual(len(shap_values[0]), sample_mask.sum())
        self.assertTupleEqual((1, ), base_value.shape)
    def test_get_shap_values_and_colors(self):
        model = LogisticRegressionLearner()(self.iris)

        shap_values1, transformed_data, mask1, _ = compute_shap_values(
            model, self.iris, self.iris)
        colors1 = compute_colors(transformed_data)

        shap_values2, attributes, mask2, colors2 = get_shap_values_and_colors(
            model, self.iris)

        np.testing.assert_array_equal(shap_values1, shap_values2)
        np.testing.assert_array_equal(colors1, colors2)
        self.assertListEqual(
            list(map(lambda x: x.name, transformed_data.domain.attributes)),
            attributes,
        )
        np.testing.assert_array_equal(mask1, mask2)