def test_output_format_checked(): input_features, y_true = pd.DataFrame(data=[range(15)]), pd.Series( range(15)) with pytest.raises( ValueError, match= "Parameter output_format must be either text or dict. Received bar" ): explain_predictions(None, input_features, output_format="bar") with pytest.raises( ValueError, match= "Parameter output_format must be either text or dict. Received xml" ): explain_prediction(None, input_features=input_features, training_data=None, output_format="xml") input_features, y_true = pd.DataFrame(data=range(15)), pd.Series(range(15)) with pytest.raises( ValueError, match= "Parameter output_format must be either text or dict. Received foo" ): explain_predictions_best_worst(None, input_features, y_true=y_true, output_format="foo")
def test_output_format_checked(): input_features, y_true = pd.DataFrame(data=[range(15)]), pd.Series( range(15)) with pytest.raises( ValueError, match= "Parameter output_format must be either text, dict, or dataframe. Received bar" ): explain_predictions(pipeline=MagicMock(), input_features=input_features, y=None, indices_to_explain=0, output_format="bar") with pytest.raises( ValueError, match= "Parameter output_format must be either text, dict, or dataframe. Received xml" ): explain_prediction(pipeline=MagicMock(), input_features=input_features, y=None, index_to_explain=0, output_format="xml") input_features, y_true = pd.DataFrame(data=range(15)), pd.Series(range(15)) with pytest.raises( ValueError, match= "Parameter output_format must be either text, dict, or dataframe. Received foo" ): explain_predictions_best_worst(pipeline=MagicMock(), input_features=input_features, y_true=y_true, output_format="foo")
def test_explain_predictions_stacked_ensemble( problem_type, dummy_stacked_ensemble_binary_estimator, dummy_stacked_ensemble_multiclass_estimator, dummy_stacked_ensemble_regressor_estimator, X_y_binary, X_y_multi, X_y_regression): if is_binary(problem_type): X, y = X_y_binary pipeline = dummy_stacked_ensemble_binary_estimator elif is_multiclass(problem_type): X, y = X_y_multi pipeline = dummy_stacked_ensemble_multiclass_estimator else: X, y = X_y_regression pipeline = dummy_stacked_ensemble_regressor_estimator with pytest.raises( ValueError, match="Cannot explain predictions for a stacked ensemble pipeline" ): explain_predictions(pipeline, X, y, indices_to_explain=[0]) with pytest.raises( ValueError, match="Cannot explain predictions for a stacked ensemble pipeline" ): explain_predictions_best_worst(pipeline, X, y)
def test_explain_predictions_raises_pipeline_score_error(): with pytest.raises(PipelineScoreError, match="Division by zero!"): def raise_zero_division(input_features): raise ZeroDivisionError("Division by zero!") pipeline = MagicMock() pipeline.problem_type = ProblemTypes.BINARY pipeline.predict_proba.side_effect = raise_zero_division explain_predictions_best_worst(pipeline, pd.DataFrame({"a": range(15)}), pd.Series(range(15)))
def test_explain_predictions_best_worst_time_series(output_format, pipeline_class, estimator, ts_data): X, y = ts_data if is_binary(pipeline_class.problem_type): y = y % 2 class TSPipeline(pipeline_class): component_graph = ["Delayed Feature Transformer", estimator] name = "time series pipeline" tspipeline = TSPipeline({"pipeline": {"gap": 1, "max_delay": 2}}) tspipeline.fit(X, y) exp = explain_predictions_best_worst(pipeline=tspipeline, input_features=X, y_true=y, output_format=output_format) if output_format == "dict": # Check that the computed features to be explained aren't NaN. for exp_idx in range(len(exp["explanations"])): assert not np.isnan( np.array(exp["explanations"][exp_idx]["explanations"][0] ["feature_values"])).any()
def test_explain_predictions_best_worst_custom_metric(mock_make_table, output_format, answer): mock_make_table.return_value = "table goes here" if output_format == "text" else { "explanations": ["explanation_dictionary_goes_here"] } pipeline = MagicMock() pipeline.parameters = "Parameters go here" input_features = pd.DataFrame({"a": [5, 6]}) pipeline.problem_type = ProblemTypes.REGRESSION pipeline.name = "Test Pipeline Name" pipeline.compute_estimator_features.return_value = ww.DataTable( input_features) pipeline.predict.return_value = ww.DataColumn(pd.Series([2, 1])) y_true = pd.Series([3, 2]) def sum(y_true, y_pred): return y_pred + y_true best_worst_report = explain_predictions_best_worst( pipeline, input_features, y_true=y_true, num_to_explain=1, metric=sum, output_format=output_format) if output_format == "text": compare_two_tables(best_worst_report.splitlines(), regression_custom_metric_answer.splitlines()) else: assert best_worst_report == answer
def test_json_serialization(problem_type, X_y_regression, linear_regression_pipeline_class, X_y_binary, logistic_regression_binary_pipeline_class, X_y_multi, logistic_regression_multiclass_pipeline_class): if problem_type == problem_type.REGRESSION: X, y = X_y_regression y = pd.Series(y) pipeline = linear_regression_pipeline_class(parameters={"Linear Regressor": {"n_jobs": 1}}) elif problem_type == problem_type.BINARY: X, y = X_y_binary y = pd.Series(y).astype("str") pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) else: X, y = X_y_multi y = pd.Series(y).astype("str") pipeline = logistic_regression_multiclass_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) pipeline.fit(X, y) best_worst = explain_predictions_best_worst(pipeline, pd.DataFrame(X), y, num_to_explain=1, output_format="dict") assert json.loads(json.dumps(best_worst)) == best_worst report = explain_predictions(pipeline, pd.DataFrame(X[:1]), output_format="dict") assert json.loads(json.dumps(report)) == report
def test_explain_predictions_best_worst_and_explain_predictions( mock_make_table, mock_default_metrics, problem_type, output_format, answer, explain_predictions_answer, custom_index): if output_format == "text": mock_make_table.return_value = "table goes here" elif output_format == "dataframe": shap_table = pd.DataFrame({ "feature_names": [0], "feature_values": [0], "qualitative_explanation": [0], "quantitative_explanation": [0], }) # Use side effect so that we always get a new copy of the dataframe mock_make_table.side_effect = lambda *args, **kwargs: shap_table.copy() else: mock_make_table.return_value = { "explanations": ["explanation_dictionary_goes_here"] } pipeline = MagicMock() pipeline.parameters = "Parameters go here" input_features = pd.DataFrame({"a": [3, 4]}, index=custom_index) pipeline.problem_type = problem_type pipeline.name = "Test Pipeline Name" pipeline.compute_estimator_features.return_value = ww.DataTable( input_features) def _add_custom_index(answer, index_best, index_worst, output_format): if output_format == "text": answer = answer.format(index_0=index_best, index_1=index_worst) elif output_format == "dataframe": col_name = "prefix" if "prefix" in answer.columns else "rank" n_repeats = answer[col_name].value_counts().tolist()[0] answer['index_id'] = [index_best] * n_repeats + [index_worst ] * n_repeats else: answer["explanations"][0]["predicted_values"][ "index_id"] = index_best answer["explanations"][1]["predicted_values"][ "index_id"] = index_worst return answer if is_regression(problem_type): abs_error_mock = MagicMock(__name__="abs_error") abs_error_mock.return_value = pd.Series([4., 1.], dtype="float64") mock_default_metrics.__getitem__.return_value = abs_error_mock pipeline.predict.return_value = ww.DataColumn(pd.Series([2, 1])) y_true = pd.Series([3, 2], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[1], index_worst=custom_index[0], output_format=output_format) elif is_binary(problem_type): pipeline.classes_.return_value = ["benign", "malignant"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.2, 0.78]) pipeline.predict_proba.return_value = ww.DataTable( pd.DataFrame({ "benign": [0.05, 0.1], "malignant": [0.95, 0.9] })) pipeline.predict.return_value = ww.DataColumn( pd.Series(["malignant"] * 2)) y_true = pd.Series(["malignant", "benign"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) else: # Multiclass text output is formatted slightly different so need to account for that if output_format == "text": mock_make_table.return_value = multiclass_table pipeline.classes_.return_value = ["setosa", "versicolor", "virginica"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.15, 0.34]) pipeline.predict_proba.return_value = ww.DataTable( pd.DataFrame({ "setosa": [0.8, 0.2], "versicolor": [0.1, 0.75], "virginica": [0.1, 0.05] })) pipeline.predict.return_value = ww.DataColumn( pd.Series(["setosa", "versicolor"])) y_true = pd.Series(["setosa", "versicolor"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) report = explain_predictions(pipeline, input_features, y=y_true, indices_to_explain=[0, 1], output_format=output_format) if output_format == "text": compare_two_tables(report.splitlines(), explain_predictions_answer.splitlines()) elif output_format == "dataframe": assert report.columns.tolist( ) == explain_predictions_answer.columns.tolist() pd.testing.assert_frame_equal( report, explain_predictions_answer[report.columns]) else: assert report == explain_predictions_answer best_worst_report = explain_predictions_best_worst( pipeline, input_features, y_true=y_true, num_to_explain=1, output_format=output_format) if output_format == "text": compare_two_tables(best_worst_report.splitlines(), answer.splitlines()) elif output_format == "dataframe": # Check dataframes equal without caring about column order assert sorted(best_worst_report.columns.tolist()) == sorted( answer.columns.tolist()) pd.testing.assert_frame_equal(best_worst_report, answer[best_worst_report.columns]) else: assert best_worst_report == answer
def test_explain_predictions_best_worst_value_errors(input_features, y_true, error_message): with pytest.raises(ValueError, match=error_message): explain_predictions_best_worst(None, input_features, y_true)
def test_explain_predictions_best_worst_and_explain_predictions( mock_make_table, mock_default_metrics, problem_type, output_format, answer, explain_predictions_answer, custom_index): mock_make_table.return_value = "table goes here" if output_format == "text" else { "explanations": ["explanation_dictionary_goes_here"] } pipeline = MagicMock() pipeline.parameters = "Parameters go here" input_features = pd.DataFrame({"a": [3, 4]}, index=custom_index) pipeline.problem_type = problem_type pipeline.name = "Test Pipeline Name" def _add_custom_index(answer, index_best, index_worst, output_format): if output_format == "text": answer = answer.format(index_0=index_best, index_1=index_worst) else: answer["explanations"][0]["predicted_values"][ "index_id"] = index_best answer["explanations"][1]["predicted_values"][ "index_id"] = index_worst return answer if problem_type == ProblemTypes.REGRESSION: abs_error_mock = MagicMock(__name__="abs_error") abs_error_mock.return_value = pd.Series([4, 1], dtype="int") mock_default_metrics.__getitem__.return_value = abs_error_mock pipeline.predict.return_value = pd.Series([2, 1]) y_true = pd.Series([3, 2], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[1], index_worst=custom_index[0], output_format=output_format) elif problem_type == ProblemTypes.BINARY: pipeline.classes_.return_value = ["benign", "malignant"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.2, 0.78]) pipeline.predict_proba.return_value = pd.DataFrame({ "benign": [0.05, 0.1], "malignant": [0.95, 0.9] }) pipeline.predict.return_value = pd.Series(["malignant"] * 2) y_true = pd.Series(["malignant", "benign"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) else: # Multiclass text output is formatted slightly different so need to account for that if output_format == "text": mock_make_table.return_value = multiclass_table pipeline.classes_.return_value = ["setosa", "versicolor", "virginica"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.15, 0.34]) pipeline.predict_proba.return_value = pd.DataFrame({ "setosa": [0.8, 0.2], "versicolor": [0.1, 0.75], "virginica": [0.1, 0.05] }) pipeline.predict.return_value = ["setosa", "versicolor"] y_true = pd.Series(["setosa", "versicolor"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) best_worst_report = explain_predictions_best_worst( pipeline, input_features, y_true=y_true, num_to_explain=1, output_format=output_format) if output_format == "text": compare_two_tables(best_worst_report.splitlines(), answer.splitlines()) else: assert best_worst_report == answer report = explain_predictions(pipeline, input_features, output_format=output_format) if output_format == "text": compare_two_tables(report.splitlines(), explain_predictions_answer.splitlines()) else: assert report == explain_predictions_answer