Exemple #1
0
    def test_get_global_raw_explanations_classification_complex_mapping(
            self, mimic_explainer):
        x_train, y_train, x_test, y_test, classes = create_multiclass_classification_dataset(
            num_features=21, num_informative=10)
        model = create_sklearn_svm_classifier(x_train, y_train)

        exp = mimic_explainer(model,
                              x_train,
                              LGBMExplainableModel,
                              classes=classes)

        global_explanation = exp.explain_global(x_test)
        assert not global_explanation.is_raw
        assert not global_explanation.is_engineered

        feature_map = [
            [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]
        ]
        feature_map = np.array(feature_map)
        feature_names = [str(i) for i in range(feature_map.shape[0])]

        global_raw_explanation = global_explanation.get_raw_explanation(
            [feature_map],
            raw_feature_names=feature_names[:feature_map.shape[0]])

        self.validate_global_explanation_classification(
            global_explanation, global_raw_explanation, feature_map, classes,
            feature_names)
    def test_get_local_raw_explanations_classification(self, iris, tabular_explainer):
        model = create_sklearn_svm_classifier(iris[DatasetConstants.X_TRAIN], iris[DatasetConstants.Y_TRAIN])

        exp = tabular_explainer(model, iris[DatasetConstants.X_TRAIN], features=iris[DatasetConstants.FEATURES],
                                classes=iris[DatasetConstants.CLASSES])

        local_explanation = exp.explain_local(iris[DatasetConstants.X_TEST][0])

        num_engineered_feats = len(iris[DatasetConstants.FEATURES])
        feature_map = np.eye(num_engineered_feats - 1, num_engineered_feats)

        local_raw_explanation = local_explanation.get_raw_explanation([feature_map])

        assert len(local_raw_explanation.local_importance_values) == len(iris[DatasetConstants.CLASSES])
        assert len(local_raw_explanation.local_importance_values[0]) == feature_map.shape[0]

        local_rank = local_raw_explanation.get_local_importance_rank()
        assert len(local_rank) == len(iris[DatasetConstants.CLASSES])
        assert len(local_rank[0]) == feature_map.shape[0]

        ranked_names = local_raw_explanation.get_ranked_local_names()
        assert len(ranked_names) == len(iris[DatasetConstants.CLASSES])
        assert len(ranked_names[0]) == feature_map.shape[0]

        ranked_values = local_raw_explanation.get_ranked_local_values()
        assert len(ranked_values) == len(iris[DatasetConstants.CLASSES])
        assert len(ranked_values[0]) == feature_map.shape[0]
Exemple #3
0
    def test_get_global_raw_explanations_classification(
            self, iris, tabular_explainer):
        model = create_sklearn_svm_classifier(iris[DatasetConstants.X_TRAIN],
                                              iris[DatasetConstants.Y_TRAIN])

        exp = tabular_explainer(model,
                                iris[DatasetConstants.X_TRAIN],
                                features=iris[DatasetConstants.FEATURES],
                                classes=iris[DatasetConstants.CLASSES])

        global_explanation = exp.explain_global(iris[DatasetConstants.X_TEST])
        assert not global_explanation.is_raw
        assert not global_explanation.is_engineered
        num_engineered_feats = len(iris[DatasetConstants.FEATURES])

        feature_map = np.eye(num_engineered_feats - 1, num_engineered_feats)
        feature_names = [str(i) for i in range(feature_map.shape[0])]

        global_raw_explanation = global_explanation.get_raw_explanation(
            [feature_map],
            raw_feature_names=feature_names[:feature_map.shape[0]])

        self.validate_global_raw_explanation_classification(
            global_explanation, global_raw_explanation, feature_map,
            iris[DatasetConstants.CLASSES], feature_names)
Exemple #4
0
    def test_cohort_filter_multiclass_classification_outcome(
            self, arg, correct_prediction):
        X_train, X_test, y_train, y_test, feature_names = create_iris_pandas()
        model = create_sklearn_svm_classifier(X_train, y_train)
        model_task = ModelTask.CLASSIFICATION
        categorical_features = []

        # the index 1, corresponds to incorrect prediction
        # the index 0 correspond to correct prediction
        filters = [{
            'arg': arg,
            'column': CLASSIFICATION_OUTCOME,
            'method': 'includes'
        }]
        pred_y = model.predict(X_test)
        validation_data = create_validation_data(X_test, y_test, pred_y)
        if correct_prediction:
            validation_filter = validation_data[PRED_Y] == validation_data[
                TRUE_Y]
        else:
            validation_filter = validation_data[PRED_Y] != validation_data[
                TRUE_Y]
        validation_data = validation_data.loc[validation_filter]
        validation_data = validation_data.drop(columns=PRED_Y)
        model_task = ModelTask.CLASSIFICATION
        run_error_analyzer(validation_data,
                           model,
                           X_test,
                           y_test,
                           feature_names,
                           categorical_features,
                           model_task,
                           filters=filters,
                           is_empty_validation_data=(not correct_prediction))
    def test_explain_multi_local_instance_classification(
            self, iris, tabular_explainer):
        # Fit an SVM model
        model = create_sklearn_svm_classifier(iris[DatasetConstants.X_TRAIN],
                                              iris[DatasetConstants.Y_TRAIN])

        exp = tabular_explainer(model,
                                iris[DatasetConstants.X_TRAIN],
                                features=iris[DatasetConstants.FEATURES],
                                classes=iris[DatasetConstants.CLASSES])

        local_explanation = exp.explain_local(iris[DatasetConstants.X_TEST])

        assert len(local_explanation.local_importance_values) == len(
            iris[DatasetConstants.CLASSES])
        assert len(local_explanation.local_importance_values[0]) == len(
            iris[DatasetConstants.X_TEST])
        assert len(local_explanation.local_importance_values[0][0]) == len(
            iris[DatasetConstants.FEATURES])

        local_rank = local_explanation.get_local_importance_rank()
        assert len(local_rank) == len(iris[DatasetConstants.CLASSES])
        assert len(local_rank[0]) == len(iris[DatasetConstants.X_TEST])
        assert len(local_rank[0][0]) == len(iris[DatasetConstants.FEATURES])

        ranked_names = local_explanation.get_ranked_local_names()
        assert len(ranked_names) == len(iris[DatasetConstants.CLASSES])
        assert len(ranked_names[0]) == len(iris[DatasetConstants.X_TEST])
        assert len(ranked_names[0][0]) == len(iris[DatasetConstants.FEATURES])

        ranked_values = local_explanation.get_ranked_local_values()
        assert len(ranked_values) == len(iris[DatasetConstants.CLASSES])
        assert len(ranked_values[0]) == len(iris[DatasetConstants.X_TEST])
        assert len(ranked_values[0][0]) == len(iris[DatasetConstants.FEATURES])
    def test_pandas_no_feature_names(self, iris, tabular_explainer,
                                     verify_tabular):
        # create pandas dataframes
        x_train = pd.DataFrame(data=iris[DatasetConstants.X_TRAIN],
                               columns=iris[DatasetConstants.FEATURES])
        x_test = pd.DataFrame(data=iris[DatasetConstants.X_TEST],
                              columns=iris[DatasetConstants.FEATURES])
        # Fit an SVM model
        model = create_sklearn_svm_classifier(x_train,
                                              iris[DatasetConstants.Y_TRAIN])

        exp = tabular_explainer(model,
                                x_train,
                                classes=iris[DatasetConstants.CLASSES])
        test_logger.info(
            "Running explain global for test_pandas_no_feature_names")
        explanation = exp.explain_global(x_test)
        ranked_global_values = explanation.get_ranked_global_values()
        ranked_per_class_values = explanation.get_ranked_per_class_values()
        ranked_global_names = explanation.get_ranked_global_names()
        ranked_per_class_names = explanation.get_ranked_per_class_names()

        self.verify_iris_overall_features(ranked_global_names,
                                          ranked_global_values, verify_tabular)
        self.verify_iris_per_class_features(ranked_per_class_names,
                                            ranked_per_class_values)
Exemple #7
0
    def test_serialize_mimic_lightgbm(self):
        test_logger.info("Running test_serialize_mimic_lightgbm to validate serializing explainer with lightgbm model")
        x_train, x_test, y_train, _, feature_names, target_names = create_scikit_cancer_data()
        model = create_sklearn_svm_classifier(x_train, y_train)
        model_task = ModelTask.Unknown
        kwargs = {'explainable_model_args': {'n_jobs': 1}, 'augment_data': False, 'reset_index': True}
        explainer = MimicExplainer(model, x_train, LGBMExplainableModel, features=feature_names,
                                   model_task=model_task, classes=target_names, **kwargs)
        explanation = explainer.explain_global(x_test)
        assert explanation.method == LIGHTGBM_METHOD

        tree_explainer = shap.TreeExplainer(explainer.surrogate_model.model)

        # Validate wrapped model, surrogate, and tree explainer with surrogate can be serialized
        model_name = 'wrapped_model.joblib'
        surrogate_name = 'surrogate_model.joblib'
        tree_explainer_name = 'tree_explainer_model.joblib'
        with open(model_name, 'wb') as stream:
            dump(explainer.model, stream)
        with open(surrogate_name, 'wb') as stream:
            dump(explainer.surrogate_model.model, stream)
        with open(tree_explainer_name, 'wb') as stream:
            dump(tree_explainer, stream)
        assert path.exists(model_name)
        assert path.exists(surrogate_name)
        assert path.exists(tree_explainer_name)
 def test_local_explanation(self, mimic_explainer):
     # Validate visualizing ExplanationDashboard with a local explanation
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model, x_train, LGBMExplainableModel,
                                 features=feature_names, classes=target_names)
     explanation = explainer.explain_local(x_test)
     ExplanationDashboard(explanation, model, dataset=x_test, true_y=y_test)
Exemple #9
0
    def test_pickle_unpickle_mimic_explainer_classification(self, surrogate_model):
        x_train, x_test, y_train, _, feature_names, target_names = create_scikit_cancer_data()
        model = create_sklearn_svm_classifier(x_train, y_train)
        model_task = ModelTask.Unknown
        surrogate_model = surrogate_model
        explainer = MimicExplainer(model, x_train, surrogate_model, features=feature_names,
                                   model_task=model_task, classes=target_names)

        self._verify_explanations(explainer, x_test, get_mimic_method(surrogate_model))
        recovered_explainer = self.pickle_unpickle_explainer(explainer)
        self._verify_explanations(recovered_explainer, x_test, get_mimic_method(surrogate_model))
Exemple #10
0
    def test_get_global_raw_explanations_classification(
            self, iris, tabular_explainer):
        model = create_sklearn_svm_classifier(iris[DatasetConstants.X_TRAIN],
                                              iris[DatasetConstants.Y_TRAIN])

        exp = tabular_explainer(model,
                                iris[DatasetConstants.X_TRAIN],
                                features=iris[DatasetConstants.FEATURES],
                                classes=iris[DatasetConstants.CLASSES])

        global_explanation = exp.explain_global(iris[DatasetConstants.X_TEST])
        assert not global_explanation.is_raw
        assert not global_explanation.is_engineered
        num_engineered_feats = len(iris[DatasetConstants.FEATURES])

        feature_map = np.eye(num_engineered_feats - 1, num_engineered_feats)
        feature_names = [str(i) for i in range(feature_map.shape[0])]

        global_raw_explanation = global_explanation.get_raw_explanation(
            [feature_map],
            raw_feature_names=feature_names[:feature_map.shape[0]])
        assert not global_explanation.is_raw
        assert global_explanation.is_engineered

        assert global_raw_explanation.expected_values == global_explanation.expected_values
        assert global_raw_explanation.init_data == global_explanation.init_data
        assert np.all(
            global_raw_explanation.eval_data == global_explanation.eval_data)

        per_class_values = global_raw_explanation.get_ranked_per_class_values()
        assert len(per_class_values) == len(iris[DatasetConstants.CLASSES])
        assert len(per_class_values[0]) == feature_map.shape[0]
        assert len(global_raw_explanation.get_ranked_per_class_names()
                   [0]) == feature_map.shape[0]
        feat_imps_global_local = np.array(
            global_raw_explanation.local_importance_values)
        assert feat_imps_global_local.shape[-1] == feature_map.shape[0]

        assert global_raw_explanation.is_raw
        assert not global_raw_explanation.is_engineered
        assert len(global_raw_explanation.get_ranked_global_values()
                   ) == feature_map.shape[0]
        assert len(global_raw_explanation.get_ranked_global_names()
                   ) == feature_map.shape[0]
        assert (global_raw_explanation.classes == iris[
            DatasetConstants.CLASSES]).all()

        assert global_raw_explanation.features == feature_names

        feat_imps_global = np.array(
            global_raw_explanation.global_importance_values)

        assert feat_imps_global.shape[-1] == feature_map.shape[0]
Exemple #11
0
 def test_interpret_dashboard(self, mimic_explainer):
     # Validate our explanation works with the interpret dashboard
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data(
     )
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 features=feature_names,
                                 classes=target_names)
     explanation = explainer.explain_global(x_test)
     show(explanation)
Exemple #12
0
 def test_missing_true_labels(self):
     x_train, x_test, y_train, y_test, _, _ = create_cancer_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     pfi_explainer = PFIExplainer(model, is_function=False)
     # Validate we throw nice error message to user when missing true_labels parameter
     with pytest.raises(TypeError):
         pfi_explainer.explain_global(x_test)  # pylint: disable=no-value-for-parameter
     # Validate passing as args works
     explanation1 = pfi_explainer.explain_global(x_test, y_test)
     # Validate passing as kwargs works
     explanation2 = pfi_explainer.explain_global(x_test, true_labels=y_test)
     assert explanation1.global_importance_values == explanation2.global_importance_values
Exemple #13
0
    def test_explain_model_local_no_feature_names(self, iris, tabular_explainer, verify_tabular):
        # Fit an SVM model
        model = create_sklearn_svm_classifier(iris[DatasetConstants.X_TRAIN], iris[DatasetConstants.Y_TRAIN])

        # Create tabular explainer
        exp = tabular_explainer(model, iris[DatasetConstants.X_TRAIN], classes=iris[DatasetConstants.CLASSES])
        test_logger.info("Running explain global for test_explain_model_local")
        explanation = exp.explain_global(iris[DatasetConstants.X_TEST])
        ranked_global_values = explanation.get_ranked_global_values()
        ranked_global_names = explanation.get_ranked_global_names()
        ranked_per_class_values = explanation.get_ranked_per_class_values()
        ranked_per_class_names = explanation.get_ranked_per_class_names()
        self.verify_iris_overall_features_no_names(ranked_global_names, ranked_global_values)
        self.verify_iris_per_class_features_no_names(ranked_per_class_names, ranked_per_class_values)
Exemple #14
0
    def test_explanation_get_feature_importance_dict(self, iris, tabular_explainer):
        x_train = pd.DataFrame(data=iris[DatasetConstants.X_TRAIN], columns=iris[DatasetConstants.FEATURES])
        x_test = pd.DataFrame(data=iris[DatasetConstants.X_TEST], columns=iris[DatasetConstants.FEATURES])
        # Fit an SVM model
        model = create_sklearn_svm_classifier(x_train, iris[DatasetConstants.Y_TRAIN])

        exp = tabular_explainer(model, x_train, classes=iris[DatasetConstants.CLASSES])
        test_logger.info("Running explain global for test_pandas_no_feature_names")
        explanation = exp.explain_global(x_test)
        ranked_names = explanation.get_ranked_global_names()
        ranked_values = explanation.get_ranked_global_values()
        ranked_dict = explanation.get_feature_importance_dict()
        assert len(ranked_dict) == len(ranked_values)
        # Order isn't guaranteed for a python dictionary, but this has seemed to hold empirically
        assert ranked_names == list(ranked_dict.keys())
 def test_cohort_filter_greater(self):
     X_train, X_test, y_train, y_test, feature_names = create_iris_pandas()
     filters = [{'arg': [2.8], 'column': SEPAL_WIDTH, 'method': 'greater'}]
     validation_data = create_validation_data(X_test, y_test)
     validation_data = validation_data.loc[X_test[SEPAL_WIDTH] > 2.8]
     model_task = ModelTask.CLASSIFICATION
     model = create_sklearn_svm_classifier(X_train, y_train)
     categorical_features = []
     run_error_analyzer(validation_data,
                        model,
                        X_test,
                        y_test,
                        feature_names,
                        categorical_features,
                        model_task,
                        filters=filters)
Exemple #16
0
 def test_serialize_kernel(self):
     test_logger.info("Running test_serialize_kernel to validate inner explainer and wrapped model serialization")
     x_train, _, y_train, _, feature_names, target_names = create_scikit_cancer_data()
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = TabularExplainer(model,
                                  x_train,
                                  features=feature_names,
                                  classes=target_names)
     # Validate wrapped model and inner explainer can be serialized
     model_name = 'wrapped_model.joblib'
     explainer_name = 'inner_explainer.joblib'
     with open(explainer_name, 'wb') as stream:
         dump(explainer.explainer.explainer, stream)
     with open(model_name, 'wb') as stream:
         dump(explainer.model, stream)
     assert path.exists(model_name)
     assert path.exists(explainer_name)
Exemple #17
0
    def test_get_global_raw_explanations_classification_pandas(
            self, iris, mimic_explainer):
        x_train = pd.DataFrame(iris[DatasetConstants.X_TRAIN])
        x_test = pd.DataFrame(iris[DatasetConstants.X_TEST])
        model = create_sklearn_svm_classifier(x_train,
                                              iris[DatasetConstants.Y_TRAIN])

        exp = mimic_explainer(model,
                              x_train,
                              LinearExplainableModel,
                              features=iris[DatasetConstants.FEATURES],
                              classes=iris[DatasetConstants.CLASSES])

        global_explanation = exp.explain_global(x_test)
        assert not global_explanation.is_raw
        assert not global_explanation.is_engineered
        num_engineered_feats = len(iris[DatasetConstants.FEATURES])

        # Note in this case we are adding a feature in engineered features from raw,
        # so the raw explanation will have one fewer column than engineered explanation
        feature_map = np.eye(num_engineered_feats - 1, num_engineered_feats)
        feature_names = [str(i) for i in range(feature_map.shape[0])]

        has_raw_eval_data_options = [True, False]
        for has_raw_eval_data_option in has_raw_eval_data_options:
            if has_raw_eval_data_option:
                global_raw_explanation = global_explanation.get_raw_explanation(
                    [feature_map],
                    raw_feature_names=feature_names[:feature_map.shape[0]],
                    eval_data=x_test)
            else:
                global_raw_explanation = global_explanation.get_raw_explanation(
                    [feature_map],
                    raw_feature_names=feature_names[:feature_map.shape[0]])

            self.validate_global_raw_explanation_classification(
                global_raw_explanation,
                feature_map,
                iris[DatasetConstants.CLASSES],
                feature_names,
                has_raw_eval_data=has_raw_eval_data_option)
Exemple #18
0
 def test_cohort_filter_index(self):
     X_train, X_test, y_train, y_test, feature_names = create_iris_pandas()
     # filter on index, which can be done from the RAI dashboard
     filters = [{
         'arg': [40],
         'column': ROW_INDEX,
         'method': 'less and equal'
     }]
     validation_data = create_validation_data(X_test, y_test)
     validation_data = validation_data.loc[validation_data[ROW_INDEX] <= 40]
     model_task = ModelTask.CLASSIFICATION
     model = create_sklearn_svm_classifier(X_train, y_train)
     categorical_features = []
     run_error_analyzer(validation_data,
                        model,
                        X_test,
                        y_test,
                        feature_names,
                        categorical_features,
                        model_task,
                        filters=filters)
Exemple #19
0
 def test_old_explanation_dashboard(self, mimic_explainer):
     # Validate old explanation dashboard namespace works but only prints a warning
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data(
     )
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 features=feature_names,
                                 classes=target_names)
     explanation = explainer.explain_local(x_test)
     err = (
         "ExplanationDashboard in interpret-community package is deprecated and removed."
         "Please use the ExplanationDashboard from raiwidgets package instead."
     )
     with pytest.warns(UserWarning, match=err):
         OldExplanationDashboard(explanation,
                                 model,
                                 dataset=x_test,
                                 true_y=y_test)
Exemple #20
0
    def test_get_global_raw_explanations_classification_pandas_transformations(
            self, iris, mimic_explainer):
        feature_names = iris[DatasetConstants.FEATURES]
        x_train = pd.DataFrame(iris[DatasetConstants.X_TRAIN],
                               columns=feature_names)
        x_test = pd.DataFrame(iris[DatasetConstants.X_TEST],
                              columns=feature_names)
        # Note in this case the transformations drop a feature, so raw explanation
        # will have one more column than engineered explanation
        col_transformer = get_transformations_one_to_many_smaller(
            feature_names)
        x_train_transformed = col_transformer.fit_transform(x_train)
        transformations = get_transformations_from_col_transformer(
            col_transformer)

        model = create_sklearn_svm_classifier(x_train_transformed,
                                              iris[DatasetConstants.Y_TRAIN])

        exp = mimic_explainer(model,
                              x_train,
                              LinearExplainableModel,
                              features=feature_names,
                              classes=iris[DatasetConstants.CLASSES],
                              transformations=transformations)

        # Create global explanation without local importance values
        global_raw_explanation = exp.explain_global(x_test,
                                                    include_local=False)
        num_raw_feats = len(iris[DatasetConstants.FEATURES])
        num_engineered_feats = num_raw_feats - 1
        feature_map = np.eye(num_raw_feats, num_engineered_feats)
        self.validate_global_raw_explanation_classification(
            global_raw_explanation,
            feature_map,
            iris[DatasetConstants.CLASSES],
            feature_names,
            has_raw_eval_data=True)
def iris_svm_model(iris):
    # uses iris DatasetConstants
    model = create_sklearn_svm_classifier(iris[DatasetConstants.X_TRAIN],
                                          iris[DatasetConstants.Y_TRAIN])
    yield model
 def test_explain_model_serialization_binary(self, mimic_explainer):
     x_train, x_test, y_train, _, _, _ = create_cancer_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     self._validate_model_serialization(model, x_train, x_test,
                                        mimic_explainer)