def test_empty_cohort_cancer_classification(self, analyzer_type): X_train, X_test, y_train, y_test, feature_names, _ = \ create_cancer_data() model = create_kneighbors_classifier(X_train, y_train) composite_filters = [{ COMPOSITE_FILTERS: [{ COMPOSITE_FILTERS: [{ ARG: [20.45, 22.27], COLUMN: 'mean radius', METHOD: CohortFilterMethods.METHOD_RANGE }, { ARG: [10.88, 14.46], COLUMN: 'mean texture', METHOD: CohortFilterMethods.METHOD_RANGE }], OPERATION: CohortFilterOps.AND }], OPERATION: CohortFilterOps.OR }] run_error_analyzer(model, X_test, y_test, feature_names, analyzer_type, composite_filters=composite_filters)
def test_matrix_filter_cancer(self): (X_train, X_test, y_train, y_test, feature_names, _) = create_cancer_data() model_task = ModelTask.CLASSIFICATION run_error_analyzer_on_models(X_train, y_train, X_test, y_test, feature_names, model_task)
def test_local_explanation(self, mimic_explainer): # Validate visualizing ExplanationDashboard with a local explanation x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data() # Fit an SVM model model = create_sklearn_svm_classifier(x_train, y_train) explainer = mimic_explainer(model, x_train, LGBMExplainableModel, features=feature_names, classes=target_names) explanation = explainer.explain_local(x_test) ExplanationDashboard(explanation, model, dataset=x_test, true_y=y_test)
def test_error_report_cancer(self): X_train, X_test, y_train, y_test, feature_names, _ = \ create_cancer_data() models = create_models_classification(X_train, y_train) for model in models: categorical_features = [] run_error_analyzer(model, X_test, y_test, feature_names, categorical_features)
def test_matrix_filter_cancer(self): x_train, x_test, y_train, y_test, feature_names, _ = \ create_cancer_data() models = create_models(x_train, y_train) for model in models: categorical_features = [] run_error_analyzer(model, x_test, y_test, feature_names, categorical_features)
def test_raianalyzer_cancer(self): x_train, x_test, y_train, y_test, feature_names, classes = \ create_cancer_data() x_train = pd.DataFrame(x_train, columns=feature_names) x_test = pd.DataFrame(x_test, columns=feature_names) models = create_models(x_train, y_train) x_train[LABELS] = y_train x_test[LABELS] = y_test for model in models: run_raianalyzer(model, x_train, x_test, LABELS, classes)
def test_matrix_filter_cancer_filters(self): # Validate the shift cohort functionality where base # cohort was chosen in matrix view (X_train, X_test, y_train, y_test, feature_names, _) = create_cancer_data() composite_filters = [{ 'compositeFilters': [{ 'compositeFilters': [{ 'arg': [11.364, 13.182], 'column': 'mean radius', 'method': 'in the range of' }], 'operation': 'and' }, { 'compositeFilters': [{ 'arg': [13.182, 15], 'column': 'mean radius', 'method': 'in the range of' }], 'operation': 'and' }, { 'compositeFilters': [{ 'arg': [15, 16.817], 'column': 'mean radius', 'method': 'in the range of' }], 'operation': 'and' }, { 'compositeFilters': [{ 'arg': [16.817, 18.635], 'column': 'mean radius', 'method': 'in the range of' }], 'operation': 'and' }], 'operation': 'or' }] model_task = ModelTask.CLASSIFICATION run_error_analyzer_on_models(X_train, y_train, X_test, y_test, feature_names, model_task, composite_filters=composite_filters)
def test_missing_true_labels(self): x_train, x_test, y_train, y_test, _, _ = create_cancer_data() # Fit an SVM model model = create_sklearn_svm_classifier(x_train, y_train) pfi_explainer = PFIExplainer(model, is_function=False) # Validate we throw nice error message to user when missing true_labels parameter with pytest.raises(TypeError): pfi_explainer.explain_global(x_test) # pylint: disable=no-value-for-parameter # Validate passing as args works explanation1 = pfi_explainer.explain_global(x_test, y_test) # Validate passing as kwargs works explanation2 = pfi_explainer.explain_global(x_test, true_labels=y_test) assert explanation1.global_importance_values == explanation2.global_importance_values
def test_interpret_dashboard(self, mimic_explainer): # Validate our explanation works with the interpret dashboard x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data( ) # Fit an SVM model model = create_sklearn_svm_classifier(x_train, y_train) explainer = mimic_explainer(model, x_train, LGBMExplainableModel, features=feature_names, classes=target_names) explanation = explainer.explain_global(x_test) show(explanation)
def test_model_analysis_cancer(self, manager_type): x_train, x_test, y_train, y_test, feature_names, classes = \ create_cancer_data() x_train = pd.DataFrame(x_train, columns=feature_names) x_test = pd.DataFrame(x_test, columns=feature_names) models = create_models_classification(x_train, y_train) x_train[LABELS] = y_train x_test[LABELS] = y_test manager_args = {DESIRED_CLASS: 'opposite'} for model in models: run_model_analysis(model, x_train, x_test, LABELS, [], manager_type, manager_args, classes)
def test_mimic_pytorch_binary_single_output(self, mimic_explainer): x_train, x_test, y_train, _, feature_names, _ = create_cancer_data() # Fit a pytorch DNN model model = create_pytorch_single_output_classifier( x_train.values, y_train) test_logger.info( 'Running explain global for test_mimic_pytorch_binary_single_output' ) model_task = ModelTask.Classification explainer = mimic_explainer(model, x_train, LGBMExplainableModel, features=feature_names, model_task=model_task) global_explanation = explainer.explain_global(x_train) global_explanation is not None predicted_y = explainer.model.predict(x_train) # assert not all predictions zeros assert np.any(predicted_y) # also assert there are many nonzeros predicted assert np.count_nonzero(predicted_y) > predicted_y.shape[0] / 4
def test_old_explanation_dashboard(self, mimic_explainer): # Validate old explanation dashboard namespace works but only prints a warning x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data( ) # Fit an SVM model model = create_sklearn_svm_classifier(x_train, y_train) explainer = mimic_explainer(model, x_train, LGBMExplainableModel, features=feature_names, classes=target_names) explanation = explainer.explain_local(x_test) err = ( "ExplanationDashboard in interpret-community package is deprecated and removed." "Please use the ExplanationDashboard from raiwidgets package instead." ) with pytest.warns(UserWarning, match=err): OldExplanationDashboard(explanation, model, dataset=x_test, true_y=y_test)
def test_explain_model_serialization_binary(self, mimic_explainer): x_train, x_test, y_train, _, _, _ = create_cancer_data() # Fit an SVM model model = create_sklearn_svm_classifier(x_train, y_train) self._validate_model_serialization(model, x_train, x_test, mimic_explainer)
def test_validate_against_shap(self): # Validate our explainer against shap library directly X, y = shap.datasets.adult() x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.02, random_state=7) # Fit several classifiers tree_classifiers = [ create_sklearn_random_forest_classifier(x_train, y_train) ] non_tree_classifiers = [ create_sklearn_logistic_regressor(x_train, y_train) ] tree_regressors = [ create_sklearn_random_forest_regressor(x_train, y_train) ] non_tree_regressors = [ create_sklearn_linear_regressor(x_train, y_train) ] # For each model, validate we get the same results as calling shap directly test_logger.info( "Running tree classifiers in test_validate_against_shap") for model in tree_classifiers: # Run shap directly for comparison exp = shap.TreeExplainer(model) explanation = exp.shap_values(x_test) shap_overall_imp = get_shap_imp_classification(explanation) overall_imp = tabular_explainer_imp(model, x_train, x_test) validate_correlation(overall_imp, shap_overall_imp, 0.95) test_logger.info( "Running non tree classifiers in test_validate_against_shap") for model in non_tree_classifiers: # Run shap directly for comparison clustered = shap.kmeans(x_train, 10) exp = shap.KernelExplainer(model.predict_proba, clustered) explanation = exp.shap_values(x_test) shap_overall_imp = get_shap_imp_classification(explanation) overall_imp = tabular_explainer_imp(model, x_train, x_test) validate_correlation(overall_imp, shap_overall_imp, 0.95) test_logger.info( "Running tree regressors in test_validate_against_shap") for model in tree_regressors: # Run shap directly for comparison exp = shap.TreeExplainer(model) explanation = exp.shap_values(x_test) shap_overall_imp = get_shap_imp_regression(explanation) overall_imp = tabular_explainer_imp(model, x_train, x_test) validate_correlation(overall_imp, shap_overall_imp, 0.95) test_logger.info( "Running non tree regressors in test_validate_against_shap") for model in non_tree_regressors: # Run shap directly for comparison clustered = shap.kmeans(x_train, 10) exp = shap.KernelExplainer(model.predict, clustered) explanation = exp.shap_values(x_test) shap_overall_imp = get_shap_imp_regression(explanation) overall_imp = tabular_explainer_imp(model, x_train, x_test) validate_correlation(overall_imp, shap_overall_imp, 0.95) if not rapids_installed: pytest.skip("cuML not installed; will skip testing GPU Explainer") else: test_logger.info( "Running GPU non tree classifiers in test_validate_against_shap" ) x_train, x_test, y_train, y_validation, _, _ = create_cancer_data() gpu_non_tree_classifiers = [ create_cuml_svm_classifier(x_train.astype(np.float32), y_train.astype(np.float32)) ] for model in gpu_non_tree_classifiers: exp = KernelExplainer(model=model.predict_proba, data=x_train.astype(np.float32)) explanation = exp.shap_values(x_test.astype(np.float32)) shap_overall_imp = get_shap_imp_classification(explanation) overall_imp = tabular_explainer_imp(model, x_train.astype(np.float32), x_test.astype(np.float32), use_gpu=True) validate_correlation(overall_imp, shap_overall_imp, 0.95)