コード例 #1
0
    def test_empty_cohort_cancer_classification(self, analyzer_type):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_cancer_data()

        model = create_kneighbors_classifier(X_train, y_train)

        composite_filters = [{
            COMPOSITE_FILTERS: [{
                COMPOSITE_FILTERS: [{
                    ARG: [20.45, 22.27],
                    COLUMN: 'mean radius',
                    METHOD: CohortFilterMethods.METHOD_RANGE
                }, {
                    ARG: [10.88, 14.46],
                    COLUMN: 'mean texture',
                    METHOD: CohortFilterMethods.METHOD_RANGE
                }],
                OPERATION:
                CohortFilterOps.AND
            }],
            OPERATION:
            CohortFilterOps.OR
        }]
        run_error_analyzer(model,
                           X_test,
                           y_test,
                           feature_names,
                           analyzer_type,
                           composite_filters=composite_filters)
コード例 #2
0
    def test_matrix_filter_cancer(self):
        (X_train, X_test, y_train, y_test, feature_names,
         _) = create_cancer_data()

        model_task = ModelTask.CLASSIFICATION
        run_error_analyzer_on_models(X_train, y_train, X_test, y_test,
                                     feature_names, model_task)
コード例 #3
0
 def test_local_explanation(self, mimic_explainer):
     # Validate visualizing ExplanationDashboard with a local explanation
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model, x_train, LGBMExplainableModel,
                                 features=feature_names, classes=target_names)
     explanation = explainer.explain_local(x_test)
     ExplanationDashboard(explanation, model, dataset=x_test, true_y=y_test)
コード例 #4
0
    def test_error_report_cancer(self):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_cancer_data()

        models = create_models_classification(X_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model, X_test, y_test, feature_names,
                               categorical_features)
コード例 #5
0
    def test_matrix_filter_cancer(self):
        x_train, x_test, y_train, y_test, feature_names, _ = \
            create_cancer_data()

        models = create_models(x_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model, x_test, y_test, feature_names,
                               categorical_features)
コード例 #6
0
    def test_raianalyzer_cancer(self):
        x_train, x_test, y_train, y_test, feature_names, classes = \
            create_cancer_data()
        x_train = pd.DataFrame(x_train, columns=feature_names)
        x_test = pd.DataFrame(x_test, columns=feature_names)
        models = create_models(x_train, y_train)
        x_train[LABELS] = y_train
        x_test[LABELS] = y_test

        for model in models:
            run_raianalyzer(model, x_train, x_test, LABELS, classes)
コード例 #7
0
    def test_matrix_filter_cancer_filters(self):
        # Validate the shift cohort functionality where base
        # cohort was chosen in matrix view
        (X_train, X_test, y_train, y_test, feature_names,
         _) = create_cancer_data()

        composite_filters = [{
            'compositeFilters': [{
                'compositeFilters': [{
                    'arg': [11.364, 13.182],
                    'column': 'mean radius',
                    'method': 'in the range of'
                }],
                'operation':
                'and'
            }, {
                'compositeFilters': [{
                    'arg': [13.182, 15],
                    'column': 'mean radius',
                    'method': 'in the range of'
                }],
                'operation':
                'and'
            }, {
                'compositeFilters': [{
                    'arg': [15, 16.817],
                    'column': 'mean radius',
                    'method': 'in the range of'
                }],
                'operation':
                'and'
            }, {
                'compositeFilters': [{
                    'arg': [16.817, 18.635],
                    'column': 'mean radius',
                    'method': 'in the range of'
                }],
                'operation':
                'and'
            }],
            'operation':
            'or'
        }]

        model_task = ModelTask.CLASSIFICATION
        run_error_analyzer_on_models(X_train,
                                     y_train,
                                     X_test,
                                     y_test,
                                     feature_names,
                                     model_task,
                                     composite_filters=composite_filters)
コード例 #8
0
 def test_missing_true_labels(self):
     x_train, x_test, y_train, y_test, _, _ = create_cancer_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     pfi_explainer = PFIExplainer(model, is_function=False)
     # Validate we throw nice error message to user when missing true_labels parameter
     with pytest.raises(TypeError):
         pfi_explainer.explain_global(x_test)  # pylint: disable=no-value-for-parameter
     # Validate passing as args works
     explanation1 = pfi_explainer.explain_global(x_test, y_test)
     # Validate passing as kwargs works
     explanation2 = pfi_explainer.explain_global(x_test, true_labels=y_test)
     assert explanation1.global_importance_values == explanation2.global_importance_values
コード例 #9
0
 def test_interpret_dashboard(self, mimic_explainer):
     # Validate our explanation works with the interpret dashboard
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data(
     )
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 features=feature_names,
                                 classes=target_names)
     explanation = explainer.explain_global(x_test)
     show(explanation)
コード例 #10
0
    def test_model_analysis_cancer(self, manager_type):
        x_train, x_test, y_train, y_test, feature_names, classes = \
            create_cancer_data()
        x_train = pd.DataFrame(x_train, columns=feature_names)
        x_test = pd.DataFrame(x_test, columns=feature_names)
        models = create_models_classification(x_train, y_train)
        x_train[LABELS] = y_train
        x_test[LABELS] = y_test
        manager_args = {DESIRED_CLASS: 'opposite'}

        for model in models:
            run_model_analysis(model, x_train, x_test, LABELS, [],
                               manager_type, manager_args, classes)
コード例 #11
0
 def test_mimic_pytorch_binary_single_output(self, mimic_explainer):
     x_train, x_test, y_train, _, feature_names, _ = create_cancer_data()
     # Fit a pytorch DNN model
     model = create_pytorch_single_output_classifier(
         x_train.values, y_train)
     test_logger.info(
         'Running explain global for test_mimic_pytorch_binary_single_output'
     )
     model_task = ModelTask.Classification
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 features=feature_names,
                                 model_task=model_task)
     global_explanation = explainer.explain_global(x_train)
     global_explanation is not None
     predicted_y = explainer.model.predict(x_train)
     # assert not all predictions zeros
     assert np.any(predicted_y)
     # also assert there are many nonzeros predicted
     assert np.count_nonzero(predicted_y) > predicted_y.shape[0] / 4
コード例 #12
0
 def test_old_explanation_dashboard(self, mimic_explainer):
     # Validate old explanation dashboard namespace works but only prints a warning
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data(
     )
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 features=feature_names,
                                 classes=target_names)
     explanation = explainer.explain_local(x_test)
     err = (
         "ExplanationDashboard in interpret-community package is deprecated and removed."
         "Please use the ExplanationDashboard from raiwidgets package instead."
     )
     with pytest.warns(UserWarning, match=err):
         OldExplanationDashboard(explanation,
                                 model,
                                 dataset=x_test,
                                 true_y=y_test)
コード例 #13
0
 def test_explain_model_serialization_binary(self, mimic_explainer):
     x_train, x_test, y_train, _, _, _ = create_cancer_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     self._validate_model_serialization(model, x_train, x_test,
                                        mimic_explainer)
コード例 #14
0
    def test_validate_against_shap(self):
        # Validate our explainer against shap library directly
        X, y = shap.datasets.adult()
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.02,
                                                            random_state=7)
        # Fit several classifiers
        tree_classifiers = [
            create_sklearn_random_forest_classifier(x_train, y_train)
        ]
        non_tree_classifiers = [
            create_sklearn_logistic_regressor(x_train, y_train)
        ]
        tree_regressors = [
            create_sklearn_random_forest_regressor(x_train, y_train)
        ]
        non_tree_regressors = [
            create_sklearn_linear_regressor(x_train, y_train)
        ]
        # For each model, validate we get the same results as calling shap directly
        test_logger.info(
            "Running tree classifiers in test_validate_against_shap")
        for model in tree_classifiers:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree classifiers in test_validate_against_shap")
        for model in non_tree_classifiers:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict_proba, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_classification(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running tree regressors in test_validate_against_shap")
        for model in tree_regressors:
            # Run shap directly for comparison
            exp = shap.TreeExplainer(model)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        test_logger.info(
            "Running non tree regressors in test_validate_against_shap")
        for model in non_tree_regressors:
            # Run shap directly for comparison
            clustered = shap.kmeans(x_train, 10)
            exp = shap.KernelExplainer(model.predict, clustered)
            explanation = exp.shap_values(x_test)
            shap_overall_imp = get_shap_imp_regression(explanation)
            overall_imp = tabular_explainer_imp(model, x_train, x_test)
            validate_correlation(overall_imp, shap_overall_imp, 0.95)

        if not rapids_installed:
            pytest.skip("cuML not installed; will skip testing GPU Explainer")
        else:
            test_logger.info(
                "Running GPU non tree classifiers in test_validate_against_shap"
            )
            x_train, x_test, y_train, y_validation, _, _ = create_cancer_data()
            gpu_non_tree_classifiers = [
                create_cuml_svm_classifier(x_train.astype(np.float32),
                                           y_train.astype(np.float32))
            ]
            for model in gpu_non_tree_classifiers:
                exp = KernelExplainer(model=model.predict_proba,
                                      data=x_train.astype(np.float32))
                explanation = exp.shap_values(x_test.astype(np.float32))
                shap_overall_imp = get_shap_imp_classification(explanation)
                overall_imp = tabular_explainer_imp(model,
                                                    x_train.astype(np.float32),
                                                    x_test.astype(np.float32),
                                                    use_gpu=True)
                validate_correlation(overall_imp, shap_overall_imp, 0.95)