def test_explain_predictions_value_errors(): with pytest.raises( ValueError, match="Parameter input_features must be a non-empty dataframe."): explain_predictions(MagicMock(), pd.DataFrame(), y=None, indices_to_explain=[0]) with pytest.raises(ValueError, match="Explained indices should be between"): explain_predictions(MagicMock(), pd.DataFrame({"a": [0, 1, 2, 3, 4]}), y=None, indices_to_explain=[5]) with pytest.raises(ValueError, match="Explained indices should be between"): explain_predictions(MagicMock(), pd.DataFrame({"a": [0, 1, 2, 3, 4]}), y=None, indices_to_explain=[1, 5]) with pytest.raises(ValueError, match="Explained indices should be between"): explain_predictions(MagicMock(), pd.DataFrame({"a": [0, 1, 2, 3, 4]}), y=None, indices_to_explain=[-1])
def test_categories_aggregated_pca_dag(pipeline_class, estimator, fraud_100): X, y = fraud_100 y = y.to_series() class PcaDagPipeline(pipeline_class): component_graph = { 'SelectNumeric': ["Select Columns Transformer"], 'SelectCategorical': ["Select Columns Transformer"], 'SelectDate': ["Select Columns Transformer"], 'OHE': ['One Hot Encoder', 'SelectCategorical'], 'DT': ['DateTime Featurization Component', "SelectDate"], 'PCA': ['PCA Transformer', 'SelectNumeric'], 'Estimator': [estimator, 'PCA', 'DT', 'OHE'], } pipeline = PcaDagPipeline({ 'SelectNumeric': { 'columns': ['card_id', 'store_id', 'amount', 'lat', 'lng'] }, 'SelectCategorical': { 'columns': ['currency', 'provider'] }, 'SelectDate': { 'columns': ['datetime'] }, 'PCA': { "n_components": 2 }, 'Estimator': { "n_jobs": 1 } }) y = transform_y_for_problem_type(pipeline.problem_type, y) pipeline.fit(X, y) report = explain_predictions(pipeline, X, y, indices_to_explain=[0], output_format="dict", top_k_features=7) for explanation in report["explanations"][0]["explanations"]: assert set(explanation['feature_names']) == { "component_0", "component_1", "provider", "currency", "datetime" } assert all([ f in explanation['feature_values'] for f in [pd.Timestamp('2019-01-01 00:12:26'), 'Mastercard', 'CUC'] ]) assert explanation['drill_down'].keys() == { "currency", "provider", "datetime" } assert set(explanation['drill_down']['currency'] ['feature_names']) == EXPECTED_CURRENCY_FEATURES assert set(explanation['drill_down']['provider'] ['feature_names']) == EXPECTED_PROVIDER_FEATURES_OHE assert set(explanation['drill_down']['datetime'] ['feature_names']) == EXPECTED_DATETIME_FEATURES
def test_json_serialization(problem_type, X_y_regression, linear_regression_pipeline_class, X_y_binary, logistic_regression_binary_pipeline_class, X_y_multi, logistic_regression_multiclass_pipeline_class): if problem_type == problem_type.REGRESSION: X, y = X_y_regression y = pd.Series(y) pipeline = linear_regression_pipeline_class(parameters={"Linear Regressor": {"n_jobs": 1}}) elif problem_type == problem_type.BINARY: X, y = X_y_binary y = pd.Series(y).astype("str") pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) else: X, y = X_y_multi y = pd.Series(y).astype("str") pipeline = logistic_regression_multiclass_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) pipeline.fit(X, y) best_worst = explain_predictions_best_worst(pipeline, pd.DataFrame(X), y, num_to_explain=1, output_format="dict") assert json.loads(json.dumps(best_worst)) == best_worst report = explain_predictions(pipeline, pd.DataFrame(X[:1]), output_format="dict") assert json.loads(json.dumps(report)) == report
def test_categories_aggregated_when_some_are_dropped(pipeline_class, estimator, fraud_100): X, y = fraud_100 y = y.to_series() class LinearPipelineDropDates(pipeline_class): component_graph = [ "Select Columns Transformer", "One Hot Encoder", "DateTime Featurization Component", 'Drop Columns Transformer', estimator ] pipeline = LinearPipelineDropDates({ "Select Columns Transformer": { 'columns': ['amount', 'provider', "currency", "datetime"] }, "Drop Columns Transformer": { "columns": ["datetime_month", "datetime_hour"] }, estimator: { "n_jobs": 1 } }) y = transform_y_for_problem_type(pipeline.problem_type, y) pipeline.fit(X, y) report = explain_predictions(pipeline, X, y, indices_to_explain=[0], output_format="dict", top_k_features=4) for explanation in report["explanations"][0]['explanations']: assert set(explanation['feature_names']) == { "amount", "provider", "currency", "datetime" } assert set(explanation['feature_values']) == { "CUC", "Mastercard", 24900, pd.Timestamp('2019-01-01 00:12:26') } assert explanation['drill_down'].keys() == { "currency", "provider", "datetime" } assert set(explanation['drill_down']['currency'] ['feature_names']) == EXPECTED_CURRENCY_FEATURES assert set(explanation['drill_down']['provider'] ['feature_names']) == EXPECTED_PROVIDER_FEATURES_OHE assert set(explanation['drill_down']['datetime']['feature_names']) == { "datetime_year", "datetime_day_of_week" }
def test_explain_predictions_time_series(ts_data): X, y = ts_data class TSPipeline(TimeSeriesRegressionPipeline): component_graph = [ "Delayed Feature Transformer", "Random Forest Regressor" ] name = "time series pipeline" tspipeline = TSPipeline({ "pipeline": { "gap": 1, "max_delay": 2 }, "Random Forest Regressor": { "n_jobs": 1 } }) tspipeline.fit(X, y) exp = explain_predictions(pipeline=tspipeline, input_features=X, y=y, indices_to_explain=[5, 11], output_format="dict") # Check that the computed features to be explained aren't NaN. for exp_idx in range(len(exp["explanations"])): assert not np.isnan( np.array(exp["explanations"][exp_idx]["explanations"][0] ["feature_values"])).any() with pytest.raises(ValueError, match="Requested index"): explain_predictions(pipeline=tspipeline, input_features=X, y=y, indices_to_explain=[1, 11], output_format="text")
def test_categories_aggregated_text(pipeline_class, estimator, fraud_100): X, y = fraud_100 y = y.to_series() X = X.set_types(logical_types={'provider': 'NaturalLanguage'}) class LinearPipelineText(pipeline_class): component_graph = [ "Select Columns Transformer", "One Hot Encoder", "Text Featurization Component", "DateTime Featurization Component", estimator ] pipeline = LinearPipelineText({ "Select Columns Transformer": { 'columns': ['amount', 'provider', "currency", 'datetime'] }, estimator: { "n_jobs": 1 } }) y = transform_y_for_problem_type(pipeline.problem_type, y) pipeline.fit(X, y) report = explain_predictions(pipeline, X, y, indices_to_explain=[0], top_k_features=4, output_format="dict") for explanation in report["explanations"][0]['explanations']: assert set(explanation['feature_names']) == { "amount", "provider", "currency", "datetime" } assert set(explanation['feature_values']) == { "CUC", "Mastercard", 24900, pd.Timestamp('2019-01-01 00:12:26') } assert explanation['drill_down'].keys() == { "currency", "provider", "datetime" } assert set(explanation['drill_down']['currency'] ['feature_names']) == EXPECTED_CURRENCY_FEATURES assert set(explanation['drill_down']['provider'] ['feature_names']) == EXPECTED_PROVIDER_FEATURES_TEXT assert set(explanation['drill_down']['datetime'] ['feature_names']) == EXPECTED_DATETIME_FEATURES
def test_categories_aggregated_date_ohe(pipeline_class, estimator, fraud_100): X, y = fraud_100 y = y.to_series() class LinearPipelineEncodeDatesAsCategory(pipeline_class): component_graph = [ "Select Columns Transformer", "DateTime Featurization Component", "One Hot Encoder", estimator ] pipeline = LinearPipelineEncodeDatesAsCategory({ "Select Columns Transformer": { 'columns': ['datetime', 'amount', 'provider', "currency"] }, 'DateTime Featurization Component': { "encode_as_categories": True }, estimator: { "n_jobs": 1 } }) y = transform_y_for_problem_type(pipeline.problem_type, y) pipeline.fit(X, y) report = explain_predictions(pipeline, X, y, indices_to_explain=[0], output_format="dict", top_k_features=7) for explanation in report["explanations"][0]['explanations']: assert set(explanation['feature_names']) == { "amount", "provider", "currency", "datetime" } assert set(explanation['feature_values']) == { pd.Timestamp('2019-01-01 00:12:26'), 'Mastercard', 'CUC', 24900 } assert explanation['drill_down'].keys() == { "currency", "provider", "datetime" } assert set(explanation['drill_down']['datetime'] ['feature_names']) == EXPECTED_DATETIME_FEATURES_OHE assert set(explanation['drill_down']['currency'] ['feature_names']) == EXPECTED_CURRENCY_FEATURES assert set(explanation['drill_down']['provider'] ['feature_names']) == EXPECTED_PROVIDER_FEATURES_OHE
def test_output_format_checked(): input_features, y_true = pd.DataFrame(data=[range(15)]), pd.Series( range(15)) with pytest.raises( ValueError, match= "Parameter output_format must be either text, dict, or dataframe. Received bar" ): explain_predictions(pipeline=MagicMock(), input_features=input_features, y=None, indices_to_explain=0, output_format="bar") input_features, y_true = pd.DataFrame(data=range(15)), pd.Series(range(15)) with pytest.raises( ValueError, match= "Parameter output_format must be either text, dict, or dataframe. Received foo" ): explain_predictions_best_worst(pipeline=MagicMock(), input_features=input_features, y_true=y_true, output_format="foo")
def test_categories_aggregated_but_not_those_that_are_dropped( pipeline_class, estimator, fraud_100): X, y = fraud_100 y = y.to_series() class LinearPipelineDropDates(pipeline_class): component_graph = [ "Select Columns Transformer", "One Hot Encoder", "DateTime Featurization Component", 'Drop Columns Transformer', estimator ] pipeline = LinearPipelineDropDates({ "Select Columns Transformer": { 'columns': ['amount', 'provider', "currency", "datetime"] }, "Drop Columns Transformer": { "columns": list(EXPECTED_DATETIME_FEATURES) }, estimator: { "n_jobs": 1 } }) y = transform_y_for_problem_type(pipeline.problem_type, y) pipeline.fit(X, y) report = explain_predictions(pipeline, X, y, indices_to_explain=[0], output_format="dict") for explanation in report["explanations"][0]['explanations']: assert set(explanation['feature_names']) == { "amount", "provider", "currency" } assert set( explanation['feature_values']) == {"CUC", "Mastercard", 24900} assert explanation['drill_down'].keys() == {"currency", "provider"} assert set(explanation['drill_down']['currency'] ['feature_names']) == EXPECTED_CURRENCY_FEATURES assert set(explanation['drill_down']['provider'] ['feature_names']) == EXPECTED_PROVIDER_FEATURES_OHE
def test_explain_predictions_custom_index(mock_make_table, problem_type, output_format, answer): mock_make_table.return_value = "table goes here" if output_format == "text" else { "explanations": ["explanation_dictionary_goes_here"] } pipeline = MagicMock() pipeline.parameters = "Parameters go here" input_features = pd.DataFrame({"a": [3, 4]}, index=["first", "second"]) pipeline.problem_type = problem_type pipeline.name = "Test Pipeline Name" if problem_type == ProblemTypes.REGRESSION: pipeline.predict.return_value = pd.Series([2, 1]) elif problem_type == ProblemTypes.BINARY: pipeline.classes_.return_value = ["benign", "malignant"] pipeline.predict.return_value = pd.Series(["malignant"] * 2) pipeline.predict_proba.return_value = pd.DataFrame({ "benign": [0.05, 0.1], "malignant": [0.95, 0.9] }) else: if output_format == "text": mock_make_table.return_value = multiclass_table pipeline.classes_.return_value = ["setosa", "versicolor", "virginica"] pipeline.predict.return_value = pd.Series(["setosa", "versicolor"]) pipeline.predict_proba.return_value = pd.DataFrame({ "setosa": [0.8, 0.2], "versicolor": [0.1, 0.75], "virginica": [0.1, 0.05] }) report = explain_predictions(pipeline, input_features, training_data=input_features, output_format=output_format) if output_format == "text": compare_two_tables(report.splitlines(), answer.splitlines()) else: assert report == answer
def test_explain_predictions_best_worst_and_explain_predictions( mock_make_table, mock_default_metrics, problem_type, output_format, answer, explain_predictions_answer, custom_index): if output_format == "text": mock_make_table.return_value = "table goes here" elif output_format == "dataframe": shap_table = pd.DataFrame({ "feature_names": [0], "feature_values": [0], "qualitative_explanation": [0], "quantitative_explanation": [0], }) # Use side effect so that we always get a new copy of the dataframe mock_make_table.side_effect = lambda *args, **kwargs: shap_table.copy() else: mock_make_table.return_value = { "explanations": ["explanation_dictionary_goes_here"] } pipeline = MagicMock() pipeline.parameters = "Parameters go here" input_features = pd.DataFrame({"a": [3, 4]}, index=custom_index) pipeline.problem_type = problem_type pipeline.name = "Test Pipeline Name" pipeline.compute_estimator_features.return_value = ww.DataTable( input_features) def _add_custom_index(answer, index_best, index_worst, output_format): if output_format == "text": answer = answer.format(index_0=index_best, index_1=index_worst) elif output_format == "dataframe": col_name = "prefix" if "prefix" in answer.columns else "rank" n_repeats = answer[col_name].value_counts().tolist()[0] answer['index_id'] = [index_best] * n_repeats + [index_worst ] * n_repeats else: answer["explanations"][0]["predicted_values"][ "index_id"] = index_best answer["explanations"][1]["predicted_values"][ "index_id"] = index_worst return answer if is_regression(problem_type): abs_error_mock = MagicMock(__name__="abs_error") abs_error_mock.return_value = pd.Series([4., 1.], dtype="float64") mock_default_metrics.__getitem__.return_value = abs_error_mock pipeline.predict.return_value = ww.DataColumn(pd.Series([2, 1])) y_true = pd.Series([3, 2], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[1], index_worst=custom_index[0], output_format=output_format) elif is_binary(problem_type): pipeline.classes_.return_value = ["benign", "malignant"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.2, 0.78]) pipeline.predict_proba.return_value = ww.DataTable( pd.DataFrame({ "benign": [0.05, 0.1], "malignant": [0.95, 0.9] })) pipeline.predict.return_value = ww.DataColumn( pd.Series(["malignant"] * 2)) y_true = pd.Series(["malignant", "benign"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) else: # Multiclass text output is formatted slightly different so need to account for that if output_format == "text": mock_make_table.return_value = multiclass_table pipeline.classes_.return_value = ["setosa", "versicolor", "virginica"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.15, 0.34]) pipeline.predict_proba.return_value = ww.DataTable( pd.DataFrame({ "setosa": [0.8, 0.2], "versicolor": [0.1, 0.75], "virginica": [0.1, 0.05] })) pipeline.predict.return_value = ww.DataColumn( pd.Series(["setosa", "versicolor"])) y_true = pd.Series(["setosa", "versicolor"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) report = explain_predictions(pipeline, input_features, y=y_true, indices_to_explain=[0, 1], output_format=output_format) if output_format == "text": compare_two_tables(report.splitlines(), explain_predictions_answer.splitlines()) elif output_format == "dataframe": assert report.columns.tolist( ) == explain_predictions_answer.columns.tolist() pd.testing.assert_frame_equal( report, explain_predictions_answer[report.columns]) else: assert report == explain_predictions_answer best_worst_report = explain_predictions_best_worst( pipeline, input_features, y_true=y_true, num_to_explain=1, output_format=output_format) if output_format == "text": compare_two_tables(best_worst_report.splitlines(), answer.splitlines()) elif output_format == "dataframe": # Check dataframes equal without caring about column order assert sorted(best_worst_report.columns.tolist()) == sorted( answer.columns.tolist()) pd.testing.assert_frame_equal(best_worst_report, answer[best_worst_report.columns]) else: assert best_worst_report == answer
def test_explain_predictions_best_worst_and_explain_predictions( mock_make_table, mock_default_metrics, problem_type, output_format, answer, explain_predictions_answer, custom_index): mock_make_table.return_value = "table goes here" if output_format == "text" else { "explanations": ["explanation_dictionary_goes_here"] } pipeline = MagicMock() pipeline.parameters = "Parameters go here" input_features = pd.DataFrame({"a": [3, 4]}, index=custom_index) pipeline.problem_type = problem_type pipeline.name = "Test Pipeline Name" def _add_custom_index(answer, index_best, index_worst, output_format): if output_format == "text": answer = answer.format(index_0=index_best, index_1=index_worst) else: answer["explanations"][0]["predicted_values"][ "index_id"] = index_best answer["explanations"][1]["predicted_values"][ "index_id"] = index_worst return answer if problem_type == ProblemTypes.REGRESSION: abs_error_mock = MagicMock(__name__="abs_error") abs_error_mock.return_value = pd.Series([4, 1], dtype="int") mock_default_metrics.__getitem__.return_value = abs_error_mock pipeline.predict.return_value = pd.Series([2, 1]) y_true = pd.Series([3, 2], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[1], index_worst=custom_index[0], output_format=output_format) elif problem_type == ProblemTypes.BINARY: pipeline.classes_.return_value = ["benign", "malignant"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.2, 0.78]) pipeline.predict_proba.return_value = pd.DataFrame({ "benign": [0.05, 0.1], "malignant": [0.95, 0.9] }) pipeline.predict.return_value = pd.Series(["malignant"] * 2) y_true = pd.Series(["malignant", "benign"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) else: # Multiclass text output is formatted slightly different so need to account for that if output_format == "text": mock_make_table.return_value = multiclass_table pipeline.classes_.return_value = ["setosa", "versicolor", "virginica"] cross_entropy_mock = MagicMock(__name__="cross_entropy") mock_default_metrics.__getitem__.return_value = cross_entropy_mock cross_entropy_mock.return_value = pd.Series([0.15, 0.34]) pipeline.predict_proba.return_value = pd.DataFrame({ "setosa": [0.8, 0.2], "versicolor": [0.1, 0.75], "virginica": [0.1, 0.05] }) pipeline.predict.return_value = ["setosa", "versicolor"] y_true = pd.Series(["setosa", "versicolor"], index=custom_index) answer = _add_custom_index(answer, index_best=custom_index[0], index_worst=custom_index[1], output_format=output_format) best_worst_report = explain_predictions_best_worst( pipeline, input_features, y_true=y_true, num_to_explain=1, output_format=output_format) if output_format == "text": compare_two_tables(best_worst_report.splitlines(), answer.splitlines()) else: assert best_worst_report == answer report = explain_predictions(pipeline, input_features, output_format=output_format) if output_format == "text": compare_two_tables(report.splitlines(), explain_predictions_answer.splitlines()) else: assert report == explain_predictions_answer
def test_explain_predictions_value_errors(): with pytest.raises( ValueError, match="Parameter input_features must be a non-empty dataframe."): explain_predictions(None, pd.DataFrame())