コード例 #1
0
    def test_explain_raw_feats_classification(self, iris, tabular_explainer):
        # verify that no errors get thrown when calling get_raw_feat_importances
        x_train = iris[DatasetConstants.X_TRAIN]
        x_test = iris[DatasetConstants.X_TEST]
        y_train = iris[DatasetConstants.Y_TRAIN]

        model = create_sklearn_random_forest_classifier(x_train, y_train)

        explainer = tabular_explainer(model, x_train)
        global_explanation = explainer.explain_global(x_test)
        local_explanation = explainer.explain_local(x_test)
        raw_feat_indices = [[1, 3], [0, 2]]
        num_generated_cols = x_train.shape[1]
        # Create a feature map for only two features
        feature_map = _get_feature_map_from_indices_list(
            raw_feat_indices,
            num_raw_cols=2,
            num_generated_cols=num_generated_cols)
        global_raw_importances = global_explanation.get_raw_feature_importances(
            [feature_map])
        assert len(global_raw_importances) == len(raw_feat_indices), \
            'length of global importances does not match number of features'
        local_raw_importances = local_explanation.get_raw_feature_importances(
            [feature_map])
        assert len(local_raw_importances) == len(iris[DatasetConstants.CLASSES]), \
            'length of local importances does not match number of classes'
コード例 #2
0
    def test_explain_model_random_forest_classification(
            self, tabular_explainer):
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, _ = train_test_split(X,
                                                       y,
                                                       test_size=0.2,
                                                       random_state=7)
        # Fit a tree model
        model = create_sklearn_random_forest_classifier(x_train, y_train)

        # Create local tabular explainer without run history
        exp = tabular_explainer(model, x_train, features=X.columns.values)
        test_logger.info(
            'Running explain global for test_explain_model_random_forest_classification'
        )
        explanation = exp.explain_global(x_test)
        self.verify_adult_overall_features(
            explanation.get_ranked_global_names(),
            explanation.get_ranked_global_values())
        self.verify_adult_per_class_features(
            explanation.get_ranked_per_class_names(),
            explanation.get_ranked_per_class_values())
        self.verify_top_rows_local_features_with_and_without_top_k(
            explanation,
            self.adult_local_features_first_three_rf,
            is_classification=True,
            top_rows=3)
コード例 #3
0
    def test_explain_model_classification_with_predict_only(self, tabular_explainer):
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, _ = train_test_split(X, y, test_size=0.003, random_state=7)
        # Fit a tree model
        model = create_sklearn_random_forest_classifier(x_train, y_train)

        # Wrap the model in a predict-only API
        wrapped_model = wrap_classifier_without_proba(model)

        # Create tabular explainer
        exp = tabular_explainer(wrapped_model, x_train, features=X.columns.values, model_task=ModelTask.Classification)
        test_logger.info('Running explain global for test_explain_model_classification_with_predict_only')
        explanation = exp.explain_global(x_test)
        # Validate predicted y values are boolean
        assert(np.all(np.isin(explanation.eval_y_predicted, [0, 1])))
コード例 #4
0
    def test_basic_upload(self, iris, tabular_explainer):
        x_train = iris[DatasetConstants.X_TRAIN]
        x_test = iris[DatasetConstants.X_TEST]
        y_train = iris[DatasetConstants.Y_TRAIN]

        model = create_sklearn_random_forest_classifier(x_train, y_train)

        explainer = tabular_explainer(model, x_train)
        global_explanation = explainer.explain_global(x_test)
        mlflow.set_experiment(TEST_EXPERIMENT)
        client = mlflow.tracking.MlflowClient()
        with mlflow.start_run() as run:
            _log_explanation(TEST_EXPLANATION, global_explanation)
            os.mkdir(TEST_EXPLANATION)
            download_path = client.download_artifacts(
                run.info.run_uuid, '', dst_path=TEST_EXPLANATION)
        downloaded_explanation = load_explanation(download_path)
        _assert_explanation_equivalence(global_explanation,
                                        downloaded_explanation)
コード例 #5
0
    def test_upload_as_model(self, iris, tabular_explainer, tracking_uri):
        mlflow.set_tracking_uri(tracking_uri)
        x_train = iris[DatasetConstants.X_TRAIN]
        x_test = iris[DatasetConstants.X_TEST]
        y_train = iris[DatasetConstants.Y_TRAIN]

        model = create_sklearn_random_forest_classifier(x_train, y_train)

        explainer = tabular_explainer(model, x_train)
        global_explanation = explainer.explain_global(x_test)
        mlflow.set_experiment(TEST_EXPERIMENT)
        with mlflow.start_run() as run:
            log_explanation(TEST_EXPLANATION, global_explanation)
            os.makedirs(TEST_DOWNLOAD, exist_ok=True)
            run_id = run.info.run_id
        downloaded_explanation_mlflow = get_explanation(
            run_id, TEST_EXPLANATION)
        _assert_explanation_equivalence(global_explanation,
                                        downloaded_explanation_mlflow)
コード例 #6
0
    def test_validate_against_shap(self):
        # Validate our explainer against shap library directly
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.02,
                                                            random_state=7)
        # Fit several classifiers
        tree_classifiers = [
            create_sklearn_random_forest_classifier(x_train, y_train)
        ]
        non_tree_classifiers = [
            create_sklearn_logistic_regressor(x_train, y_train)
        ]
        tree_regressors = [
            create_sklearn_random_forest_regressor(x_train, y_train)
        ]
        non_tree_regressors = [
            create_sklearn_linear_regressor(x_train, y_train)
        ]
        # For each model, validate we get the same results as calling shap directly
        test_logger.info(
            "Running tree classifiers in test_validate_against_shap")
        for model in tree_classifiers:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree classifiers in test_validate_against_shap")
        for model in non_tree_classifiers:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict_proba, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running tree regressors in test_validate_against_shap")
        for model in tree_regressors:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree regressors in test_validate_against_shap")
        for model in non_tree_regressors:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)
コード例 #7
0
    def test_validate_against_shap(self):
        # Validate our explainer against shap library directly
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.02,
                                                            random_state=7)
        # Fit several classifiers
        tree_classifiers = [
            create_sklearn_random_forest_classifier(x_train, y_train)
        ]
        non_tree_classifiers = [
            create_sklearn_logistic_regressor(x_train, y_train)
        ]
        tree_regressors = [
            create_sklearn_random_forest_regressor(x_train, y_train)
        ]
        non_tree_regressors = [
            create_sklearn_linear_regressor(x_train, y_train)
        ]
        # For each model, validate we get the same results as calling shap directly
        test_logger.info(
            "Running tree classifiers in test_validate_against_shap")
        for model in tree_classifiers:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree classifiers in test_validate_against_shap")
        for model in non_tree_classifiers:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict_proba, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running tree regressors in test_validate_against_shap")
        for model in tree_regressors:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree regressors in test_validate_against_shap")
        for model in non_tree_regressors:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        if not rapids_installed:
            pytest.skip("cuML not installed; will skip testing GPU Explainer")
        else:
            test_logger.info(
                "Running GPU non tree classifiers in test_validate_against_shap"
            )
            x_train, x_test, y_train, y_validation, _, _ = create_cancer_data()
            gpu_non_tree_classifiers = [
                create_cuml_svm_classifier(x_train.astype(np.float32),
                                           y_train.astype(np.float32))
            ]
            for model in gpu_non_tree_classifiers:
                exp = KernelExplainer(model=model.predict_proba,
                                      data=x_train.astype(np.float32))
                explanation = exp.shap_values(x_test.astype(np.float32))
                shap_overall_imp = get_shap_imp_classification(explanation)
                overall_imp = tabular_explainer_imp(model,
                                                    x_train.astype(np.float32),
                                                    x_test.astype(np.float32),
                                                    use_gpu=True)
                validate_correlation(overall_imp, shap_overall_imp, 0.95)