def test_verify_linear_model_coefficient_explanation(self):
        # Validate our explainer against an explainable linear model
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=7)
        # Fit a logistic regression classifier
        model = create_sklearn_logistic_regressor(x_train, y_train)

        # Create tabular explainer
        exp = TabularExplainer(model,
                               x_train,
                               features=list(range(x_train.shape[1])))
        test_logger.info(
            "Running explain model for test_verify_linear_model_coefficient_explanation"
        )
        # Validate evaluation sampling
        policy = {
            ExplainParams.SAMPLING_POLICY:
            SamplingPolicy(allow_eval_sampling=True)
        }
        explanation = exp.explain_global(x_test, **policy)
        mean_train = np.mean(x_train.values, axis=0)
        # Retrieve the model coefficients
        coefficients = model.coef_[0]
        # Normalize the coefficients by mean for a rough ground-truth of importance
        norm_coeff = mean_train * coefficients
        # order coefficients by importance
        norm_coeff_imp = np.abs(norm_coeff).argsort()[..., ::-1]
        # Calculate the correlation
        validate_correlation(explanation.global_importance_rank,
                             norm_coeff_imp, 0.76)
def tabular_explainer_imp(model, x_train, x_test, allow_eval_sampling=True):
    # Create tabular explainer
    exp = TabularExplainer(model,
                           x_train,
                           features=list(range(x_train.shape[1])))
    # Validate evaluation sampling
    policy = {
        ExplainParams.SAMPLING_POLICY:
        SamplingPolicy(allow_eval_sampling=allow_eval_sampling)
    }
    explanation = exp.explain_global(x_test, **policy)
    return explanation.global_importance_rank
Exemple #3
0
    def test_explain_model_sparse_tree(self, tabular_explainer):
        X, y = retrieve_dataset('a1a.svmlight')
        x_train, x_test, y_train, _ = train_test_split(X, y, test_size=0.002, random_state=7)
        # Fit a random forest regression model
        model = create_sklearn_random_forest_regressor(x_train, y_train)
        _, cols = x_train.shape
        shape = 1, cols
        background = csr_matrix(shape, dtype=x_train.dtype)

        # Create tabular explainer
        exp = tabular_explainer(model, background)
        test_logger.info('Running explain global for test_explain_model_sparse_tree')
        policy = SamplingPolicy(allow_eval_sampling=True)
        exp.explain_global(x_test, sampling_policy=policy)
    def test_verify_pipeline_model_coefficient_explanation(self):
        # Validate our explainer against an explainable linear model
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=7)
        # Note: in pipeline case, we use KernelExplainer;
        # in linear case we use LinearExplainer which is much faster
        pipeline = [True, False]
        threshold = [0.85, 0.76]
        for idx, is_pipeline in enumerate(pipeline):
            # Fit a logistic regression classifier
            model = create_sklearn_logistic_regressor(x_train,
                                                      y_train,
                                                      pipeline=is_pipeline)

            # Create tabular explainer
            exp = TabularExplainer(model,
                                   x_train,
                                   features=list(range(x_train.shape[1])))
            test_logger.info(
                "Running explain model for test_verify_linear_model_coefficient_explanation"
            )
            # Validate evaluation sampling
            policy = {
                ExplainParams.SAMPLING_POLICY:
                SamplingPolicy(allow_eval_sampling=True)
            }
            explanation = exp.explain_global(x_test, **policy)
            mean_train = np.mean(x_train.values, axis=0)
            # Retrieve the model coefficients
            if isinstance(model, Pipeline):
                model = model.steps[0][1]
            coefficients = model.coef_[0]
            # Normalize the coefficients by mean for a rough ground-truth of importance
            norm_coeff = mean_train * coefficients
            # order coefficients by importance
            norm_coeff_imp = np.abs(norm_coeff).argsort()[..., ::-1]
            # Calculate the correlation
            validate_correlation(explanation.global_importance_rank,
                                 norm_coeff_imp, threshold[idx])
 def _explain_model_dnn_common(self, tabular_explainer, model, x_train,
                               x_test, y_train, features):
     # Create local tabular explainer without run history
     exp = tabular_explainer(model, x_train.values, features=features)
     policy = SamplingPolicy(allow_eval_sampling=True)
     exp.explain_global(x_test.values, sampling_policy=policy)
Exemple #6
0
 def _explain_model_dnn_common(self, tabular_explainer, model, x_train, x_test, y_train, features):
     # Create tabular explainer
     exp = tabular_explainer(model, x_train.values, features=features, model_task=ModelTask.Classification)
     policy = SamplingPolicy(allow_eval_sampling=True)
     exp.explain_global(x_test.values, sampling_policy=policy)