def test_explain_raw_feats_regression(self, boston, tabular_explainer):
        # verify that no errors get thrown when calling get_raw_feat_importances
        x_train = boston[DatasetConstants.X_TRAIN][DATA_SLICE]
        x_test = boston[DatasetConstants.X_TEST][DATA_SLICE]
        y_train = boston[DatasetConstants.Y_TRAIN][DATA_SLICE]

        model = create_sklearn_linear_regressor(x_train, y_train)

        explainer = tabular_explainer(model, x_train)

        global_explanation = explainer.explain_global(x_test)
        local_explanation = explainer.explain_local(x_test)
        # 0th raw feature maps to 1 and 3 generated features, 1st raw feature maps to 0th and 2nd gen. features
        raw_feat_indices = [[1, 3], [0, 2]]
        num_generated_cols = x_train.shape[1]
        feature_map = _get_feature_map_from_indices_list(
            raw_feat_indices,
            num_raw_cols=2,
            num_generated_cols=num_generated_cols)
        global_raw_importances = global_explanation.get_raw_feature_importances(
            [feature_map])
        assert len(global_raw_importances) == len(raw_feat_indices), 'length of global importances ' \
                                                                     'does not match number of features'
        local_raw_importances = local_explanation.get_raw_feature_importances(
            [feature_map])
        assert len(local_raw_importances) == x_test.shape[0], 'length of local importances does not match number ' \
                                                              'of samples'
Ejemplo n.º 2
0
    def test_get_local_raw_explanations_sparse_regression(
            self, mimic_explainer):
        X, y = retrieve_dataset('a1a.svmlight')
        x_train, x_test, y_train, _ = train_test_split(X,
                                                       y,
                                                       test_size=0.2,
                                                       random_state=7)
        # Fit a linear regression model
        model = create_sklearn_linear_regressor(x_train, y_train)

        explainer = mimic_explainer(
            model,
            x_train,
            LinearExplainableModel,
            explainable_model_args={'sparse_data': True})
        global_explanation = explainer.explain_global(x_test)
        assert global_explanation.method == LINEAR_METHOD

        num_engineered_feats = x_train.shape[1]
        feature_map = np.eye(5, num_engineered_feats)

        global_raw_explanation = global_explanation.get_raw_explanation(
            [feature_map])
        self.validate_global_raw_explanation_regression(
            global_explanation, global_raw_explanation, feature_map)
 def test_save_and_load_sparse_explanation(self, mimic_explainer):
     x_train, x_test, y_train, y_test = create_msx_data(0.05)
     # Fit a linear regression model
     model = create_sklearn_linear_regressor(x_train, y_train.toarray().flatten())
     explainable_model = LGBMExplainableModel
     explainer = mimic_explainer(model, x_train, explainable_model, augment_data=False)
     explanation = explainer.explain_global(x_test)
     verify_serialization(explanation)
Ejemplo n.º 4
0
    def test_explain_model_linear_regression(self, boston, tabular_explainer):
        # Fit a linear regression model
        model = create_sklearn_linear_regressor(boston[DatasetConstants.X_TRAIN],
                                                boston[DatasetConstants.Y_TRAIN],
                                                pipeline=True)

        # Create tabular explainer
        exp = tabular_explainer(model, boston[DatasetConstants.X_TRAIN], features=boston[DatasetConstants.FEATURES])
        test_logger.info('Running explain global for test_explain_model_regression')
        explanation = exp.explain_global(boston[DatasetConstants.X_TEST])
        self.verify_boston_overall_features_lr(explanation.get_ranked_global_names(),
                                               explanation.get_ranked_global_values())
Ejemplo n.º 5
0
    def test_explain_model_local_kernel_regression(self, boston, tabular_explainer):
        # Fit a linear regression model
        model = create_sklearn_linear_regressor(boston[DatasetConstants.X_TRAIN], boston[DatasetConstants.Y_TRAIN])

        # Create tabular explainer
        exp = tabular_explainer(model, boston[DatasetConstants.X_TRAIN], features=boston[DatasetConstants.FEATURES])
        test_logger.info('Running explain local for test_explain_model_regression')
        explanation = exp.explain_local(boston[DatasetConstants.X_TEST])
        assert explanation.local_importance_values is not None
        assert len(explanation.local_importance_values) == len(boston[DatasetConstants.X_TEST])
        assert explanation.num_examples == len(boston[DatasetConstants.X_TEST])
        assert len(explanation.local_importance_values[0]) == len(boston[DatasetConstants.FEATURES])
        assert explanation.num_features == len(boston[DatasetConstants.FEATURES])
        self.verify_top_rows_local_features_with_and_without_top_k(explanation,
                                                                   self.boston_local_features_first_five_lr)
    def test_validate_against_shap(self):
        # Validate our explainer against shap library directly
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.02,
                                                            random_state=7)
        # Fit several classifiers
        tree_classifiers = [
            create_sklearn_random_forest_classifier(x_train, y_train)
        ]
        non_tree_classifiers = [
            create_sklearn_logistic_regressor(x_train, y_train)
        ]
        tree_regressors = [
            create_sklearn_random_forest_regressor(x_train, y_train)
        ]
        non_tree_regressors = [
            create_sklearn_linear_regressor(x_train, y_train)
        ]
        # For each model, validate we get the same results as calling shap directly
        test_logger.info(
            "Running tree classifiers in test_validate_against_shap")
        for model in tree_classifiers:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree classifiers in test_validate_against_shap")
        for model in non_tree_classifiers:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict_proba, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running tree regressors in test_validate_against_shap")
        for model in tree_regressors:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree regressors in test_validate_against_shap")
        for model in non_tree_regressors:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)
 def test_explain_model_serialization_regression(self, mimic_explainer):
     x_train, x_test, y_train, _, feature_names = create_energy_data()
     # Fit a linear model
     model = create_sklearn_linear_regressor(x_train, y_train)
     self._validate_model_serialization(model, x_train, x_test,
                                        mimic_explainer)
Ejemplo n.º 8
0
    def test_validate_against_shap(self):
        # Validate our explainer against shap library directly
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.02,
                                                            random_state=7)
        # Fit several classifiers
        tree_classifiers = [
            create_sklearn_random_forest_classifier(x_train, y_train)
        ]
        non_tree_classifiers = [
            create_sklearn_logistic_regressor(x_train, y_train)
        ]
        tree_regressors = [
            create_sklearn_random_forest_regressor(x_train, y_train)
        ]
        non_tree_regressors = [
            create_sklearn_linear_regressor(x_train, y_train)
        ]
        # For each model, validate we get the same results as calling shap directly
        test_logger.info(
            "Running tree classifiers in test_validate_against_shap")
        for model in tree_classifiers:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree classifiers in test_validate_against_shap")
        for model in non_tree_classifiers:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict_proba, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running tree regressors in test_validate_against_shap")
        for model in tree_regressors:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree regressors in test_validate_against_shap")
        for model in non_tree_regressors:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        if not rapids_installed:
            pytest.skip("cuML not installed; will skip testing GPU Explainer")
        else:
            test_logger.info(
                "Running GPU non tree classifiers in test_validate_against_shap"
            )
            x_train, x_test, y_train, y_validation, _, _ = create_cancer_data()
            gpu_non_tree_classifiers = [
                create_cuml_svm_classifier(x_train.astype(np.float32),
                                           y_train.astype(np.float32))
            ]
            for model in gpu_non_tree_classifiers:
                exp = KernelExplainer(model=model.predict_proba,
                                      data=x_train.astype(np.float32))
                explanation = exp.shap_values(x_test.astype(np.float32))
                shap_overall_imp = get_shap_imp_classification(explanation)
                overall_imp = tabular_explainer_imp(model,
                                                    x_train.astype(np.float32),
                                                    x_test.astype(np.float32),
                                                    use_gpu=True)
                validate_correlation(overall_imp, shap_overall_imp, 0.95)