Ejemplo n.º 1
0
def test_output_format_checked():
    input_features, y_true = pd.DataFrame(data=[range(15)]), pd.Series(
        range(15))
    with pytest.raises(
            ValueError,
            match=
            "Parameter output_format must be either text or dict. Received bar"
    ):
        explain_predictions(None, input_features, output_format="bar")
    with pytest.raises(
            ValueError,
            match=
            "Parameter output_format must be either text or dict. Received xml"
    ):
        explain_prediction(None,
                           input_features=input_features,
                           training_data=None,
                           output_format="xml")

    input_features, y_true = pd.DataFrame(data=range(15)), pd.Series(range(15))
    with pytest.raises(
            ValueError,
            match=
            "Parameter output_format must be either text or dict. Received foo"
    ):
        explain_predictions_best_worst(None,
                                       input_features,
                                       y_true=y_true,
                                       output_format="foo")
Ejemplo n.º 2
0
def test_output_format_checked():
    input_features, y_true = pd.DataFrame(data=[range(15)]), pd.Series(
        range(15))
    with pytest.raises(
            ValueError,
            match=
            "Parameter output_format must be either text, dict, or dataframe. Received bar"
    ):
        explain_predictions(pipeline=MagicMock(),
                            input_features=input_features,
                            y=None,
                            indices_to_explain=0,
                            output_format="bar")
    with pytest.raises(
            ValueError,
            match=
            "Parameter output_format must be either text, dict, or dataframe. Received xml"
    ):
        explain_prediction(pipeline=MagicMock(),
                           input_features=input_features,
                           y=None,
                           index_to_explain=0,
                           output_format="xml")

    input_features, y_true = pd.DataFrame(data=range(15)), pd.Series(range(15))
    with pytest.raises(
            ValueError,
            match=
            "Parameter output_format must be either text, dict, or dataframe. Received foo"
    ):
        explain_predictions_best_worst(pipeline=MagicMock(),
                                       input_features=input_features,
                                       y_true=y_true,
                                       output_format="foo")
Ejemplo n.º 3
0
def test_explain_predictions_stacked_ensemble(
        problem_type, dummy_stacked_ensemble_binary_estimator,
        dummy_stacked_ensemble_multiclass_estimator,
        dummy_stacked_ensemble_regressor_estimator, X_y_binary, X_y_multi,
        X_y_regression):
    if is_binary(problem_type):
        X, y = X_y_binary
        pipeline = dummy_stacked_ensemble_binary_estimator
    elif is_multiclass(problem_type):
        X, y = X_y_multi
        pipeline = dummy_stacked_ensemble_multiclass_estimator
    else:
        X, y = X_y_regression
        pipeline = dummy_stacked_ensemble_regressor_estimator

    with pytest.raises(
            ValueError,
            match="Cannot explain predictions for a stacked ensemble pipeline"
    ):
        explain_predictions(pipeline, X, y, indices_to_explain=[0])

    with pytest.raises(
            ValueError,
            match="Cannot explain predictions for a stacked ensemble pipeline"
    ):
        explain_predictions_best_worst(pipeline, X, y)
Ejemplo n.º 4
0
def test_explain_predictions_raises_pipeline_score_error():
    with pytest.raises(PipelineScoreError, match="Division by zero!"):

        def raise_zero_division(input_features):
            raise ZeroDivisionError("Division by zero!")

        pipeline = MagicMock()
        pipeline.problem_type = ProblemTypes.BINARY
        pipeline.predict_proba.side_effect = raise_zero_division
        explain_predictions_best_worst(pipeline, pd.DataFrame({"a": range(15)}), pd.Series(range(15)))
Ejemplo n.º 5
0
def test_explain_predictions_best_worst_time_series(output_format,
                                                    pipeline_class, estimator,
                                                    ts_data):
    X, y = ts_data

    if is_binary(pipeline_class.problem_type):
        y = y % 2

    class TSPipeline(pipeline_class):
        component_graph = ["Delayed Feature Transformer", estimator]
        name = "time series pipeline"

    tspipeline = TSPipeline({"pipeline": {"gap": 1, "max_delay": 2}})

    tspipeline.fit(X, y)

    exp = explain_predictions_best_worst(pipeline=tspipeline,
                                         input_features=X,
                                         y_true=y,
                                         output_format=output_format)

    if output_format == "dict":
        # Check that the computed features to be explained aren't NaN.
        for exp_idx in range(len(exp["explanations"])):
            assert not np.isnan(
                np.array(exp["explanations"][exp_idx]["explanations"][0]
                         ["feature_values"])).any()
Ejemplo n.º 6
0
def test_explain_predictions_best_worst_custom_metric(mock_make_table,
                                                      output_format, answer):

    mock_make_table.return_value = "table goes here" if output_format == "text" else {
        "explanations": ["explanation_dictionary_goes_here"]
    }
    pipeline = MagicMock()
    pipeline.parameters = "Parameters go here"
    input_features = pd.DataFrame({"a": [5, 6]})
    pipeline.problem_type = ProblemTypes.REGRESSION
    pipeline.name = "Test Pipeline Name"
    pipeline.compute_estimator_features.return_value = ww.DataTable(
        input_features)

    pipeline.predict.return_value = ww.DataColumn(pd.Series([2, 1]))
    y_true = pd.Series([3, 2])

    def sum(y_true, y_pred):
        return y_pred + y_true

    best_worst_report = explain_predictions_best_worst(
        pipeline,
        input_features,
        y_true=y_true,
        num_to_explain=1,
        metric=sum,
        output_format=output_format)

    if output_format == "text":
        compare_two_tables(best_worst_report.splitlines(),
                           regression_custom_metric_answer.splitlines())
    else:
        assert best_worst_report == answer
Ejemplo n.º 7
0
def test_json_serialization(problem_type, X_y_regression, linear_regression_pipeline_class,
                            X_y_binary, logistic_regression_binary_pipeline_class,
                            X_y_multi, logistic_regression_multiclass_pipeline_class):

    if problem_type == problem_type.REGRESSION:
        X, y = X_y_regression
        y = pd.Series(y)
        pipeline = linear_regression_pipeline_class(parameters={"Linear Regressor": {"n_jobs": 1}})
    elif problem_type == problem_type.BINARY:
        X, y = X_y_binary
        y = pd.Series(y).astype("str")
        pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
    else:
        X, y = X_y_multi
        y = pd.Series(y).astype("str")
        pipeline = logistic_regression_multiclass_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})

    pipeline.fit(X, y)

    best_worst = explain_predictions_best_worst(pipeline, pd.DataFrame(X), y,
                                                num_to_explain=1, output_format="dict")
    assert json.loads(json.dumps(best_worst)) == best_worst

    report = explain_predictions(pipeline, pd.DataFrame(X[:1]), output_format="dict")
    assert json.loads(json.dumps(report)) == report
Ejemplo n.º 8
0
def test_explain_predictions_best_worst_and_explain_predictions(
        mock_make_table, mock_default_metrics, problem_type, output_format,
        answer, explain_predictions_answer, custom_index):
    if output_format == "text":
        mock_make_table.return_value = "table goes here"
    elif output_format == "dataframe":
        shap_table = pd.DataFrame({
            "feature_names": [0],
            "feature_values": [0],
            "qualitative_explanation": [0],
            "quantitative_explanation": [0],
        })
        # Use side effect so that we always get a new copy of the dataframe
        mock_make_table.side_effect = lambda *args, **kwargs: shap_table.copy()
    else:
        mock_make_table.return_value = {
            "explanations": ["explanation_dictionary_goes_here"]
        }

    pipeline = MagicMock()
    pipeline.parameters = "Parameters go here"
    input_features = pd.DataFrame({"a": [3, 4]}, index=custom_index)
    pipeline.problem_type = problem_type
    pipeline.name = "Test Pipeline Name"
    pipeline.compute_estimator_features.return_value = ww.DataTable(
        input_features)

    def _add_custom_index(answer, index_best, index_worst, output_format):

        if output_format == "text":
            answer = answer.format(index_0=index_best, index_1=index_worst)
        elif output_format == "dataframe":
            col_name = "prefix" if "prefix" in answer.columns else "rank"
            n_repeats = answer[col_name].value_counts().tolist()[0]
            answer['index_id'] = [index_best] * n_repeats + [index_worst
                                                             ] * n_repeats
        else:
            answer["explanations"][0]["predicted_values"][
                "index_id"] = index_best
            answer["explanations"][1]["predicted_values"][
                "index_id"] = index_worst
        return answer

    if is_regression(problem_type):
        abs_error_mock = MagicMock(__name__="abs_error")
        abs_error_mock.return_value = pd.Series([4., 1.], dtype="float64")
        mock_default_metrics.__getitem__.return_value = abs_error_mock
        pipeline.predict.return_value = ww.DataColumn(pd.Series([2, 1]))
        y_true = pd.Series([3, 2], index=custom_index)
        answer = _add_custom_index(answer,
                                   index_best=custom_index[1],
                                   index_worst=custom_index[0],
                                   output_format=output_format)
    elif is_binary(problem_type):
        pipeline.classes_.return_value = ["benign", "malignant"]
        cross_entropy_mock = MagicMock(__name__="cross_entropy")
        mock_default_metrics.__getitem__.return_value = cross_entropy_mock
        cross_entropy_mock.return_value = pd.Series([0.2, 0.78])
        pipeline.predict_proba.return_value = ww.DataTable(
            pd.DataFrame({
                "benign": [0.05, 0.1],
                "malignant": [0.95, 0.9]
            }))
        pipeline.predict.return_value = ww.DataColumn(
            pd.Series(["malignant"] * 2))
        y_true = pd.Series(["malignant", "benign"], index=custom_index)
        answer = _add_custom_index(answer,
                                   index_best=custom_index[0],
                                   index_worst=custom_index[1],
                                   output_format=output_format)
    else:
        # Multiclass text output is formatted slightly different so need to account for that
        if output_format == "text":
            mock_make_table.return_value = multiclass_table
        pipeline.classes_.return_value = ["setosa", "versicolor", "virginica"]
        cross_entropy_mock = MagicMock(__name__="cross_entropy")
        mock_default_metrics.__getitem__.return_value = cross_entropy_mock
        cross_entropy_mock.return_value = pd.Series([0.15, 0.34])
        pipeline.predict_proba.return_value = ww.DataTable(
            pd.DataFrame({
                "setosa": [0.8, 0.2],
                "versicolor": [0.1, 0.75],
                "virginica": [0.1, 0.05]
            }))
        pipeline.predict.return_value = ww.DataColumn(
            pd.Series(["setosa", "versicolor"]))
        y_true = pd.Series(["setosa", "versicolor"], index=custom_index)
        answer = _add_custom_index(answer,
                                   index_best=custom_index[0],
                                   index_worst=custom_index[1],
                                   output_format=output_format)

    report = explain_predictions(pipeline,
                                 input_features,
                                 y=y_true,
                                 indices_to_explain=[0, 1],
                                 output_format=output_format)
    if output_format == "text":
        compare_two_tables(report.splitlines(),
                           explain_predictions_answer.splitlines())
    elif output_format == "dataframe":
        assert report.columns.tolist(
        ) == explain_predictions_answer.columns.tolist()
        pd.testing.assert_frame_equal(
            report, explain_predictions_answer[report.columns])
    else:
        assert report == explain_predictions_answer

    best_worst_report = explain_predictions_best_worst(
        pipeline,
        input_features,
        y_true=y_true,
        num_to_explain=1,
        output_format=output_format)
    if output_format == "text":
        compare_two_tables(best_worst_report.splitlines(), answer.splitlines())
    elif output_format == "dataframe":
        # Check dataframes equal without caring about column order
        assert sorted(best_worst_report.columns.tolist()) == sorted(
            answer.columns.tolist())
        pd.testing.assert_frame_equal(best_worst_report,
                                      answer[best_worst_report.columns])
    else:
        assert best_worst_report == answer
Ejemplo n.º 9
0
def test_explain_predictions_best_worst_value_errors(input_features, y_true,
                                                     error_message):
    with pytest.raises(ValueError, match=error_message):
        explain_predictions_best_worst(None, input_features, y_true)
Ejemplo n.º 10
0
def test_explain_predictions_best_worst_and_explain_predictions(
        mock_make_table, mock_default_metrics, problem_type, output_format,
        answer, explain_predictions_answer, custom_index):

    mock_make_table.return_value = "table goes here" if output_format == "text" else {
        "explanations": ["explanation_dictionary_goes_here"]
    }
    pipeline = MagicMock()
    pipeline.parameters = "Parameters go here"
    input_features = pd.DataFrame({"a": [3, 4]}, index=custom_index)
    pipeline.problem_type = problem_type
    pipeline.name = "Test Pipeline Name"

    def _add_custom_index(answer, index_best, index_worst, output_format):

        if output_format == "text":
            answer = answer.format(index_0=index_best, index_1=index_worst)
        else:
            answer["explanations"][0]["predicted_values"][
                "index_id"] = index_best
            answer["explanations"][1]["predicted_values"][
                "index_id"] = index_worst
        return answer

    if problem_type == ProblemTypes.REGRESSION:
        abs_error_mock = MagicMock(__name__="abs_error")
        abs_error_mock.return_value = pd.Series([4, 1], dtype="int")
        mock_default_metrics.__getitem__.return_value = abs_error_mock
        pipeline.predict.return_value = pd.Series([2, 1])
        y_true = pd.Series([3, 2], index=custom_index)
        answer = _add_custom_index(answer,
                                   index_best=custom_index[1],
                                   index_worst=custom_index[0],
                                   output_format=output_format)
    elif problem_type == ProblemTypes.BINARY:
        pipeline.classes_.return_value = ["benign", "malignant"]
        cross_entropy_mock = MagicMock(__name__="cross_entropy")
        mock_default_metrics.__getitem__.return_value = cross_entropy_mock
        cross_entropy_mock.return_value = pd.Series([0.2, 0.78])
        pipeline.predict_proba.return_value = pd.DataFrame({
            "benign": [0.05, 0.1],
            "malignant": [0.95, 0.9]
        })
        pipeline.predict.return_value = pd.Series(["malignant"] * 2)
        y_true = pd.Series(["malignant", "benign"], index=custom_index)
        answer = _add_custom_index(answer,
                                   index_best=custom_index[0],
                                   index_worst=custom_index[1],
                                   output_format=output_format)
    else:
        # Multiclass text output is formatted slightly different so need to account for that
        if output_format == "text":
            mock_make_table.return_value = multiclass_table
        pipeline.classes_.return_value = ["setosa", "versicolor", "virginica"]
        cross_entropy_mock = MagicMock(__name__="cross_entropy")
        mock_default_metrics.__getitem__.return_value = cross_entropy_mock
        cross_entropy_mock.return_value = pd.Series([0.15, 0.34])
        pipeline.predict_proba.return_value = pd.DataFrame({
            "setosa": [0.8, 0.2],
            "versicolor": [0.1, 0.75],
            "virginica": [0.1, 0.05]
        })
        pipeline.predict.return_value = ["setosa", "versicolor"]
        y_true = pd.Series(["setosa", "versicolor"], index=custom_index)
        answer = _add_custom_index(answer,
                                   index_best=custom_index[0],
                                   index_worst=custom_index[1],
                                   output_format=output_format)

    best_worst_report = explain_predictions_best_worst(
        pipeline,
        input_features,
        y_true=y_true,
        num_to_explain=1,
        output_format=output_format)
    if output_format == "text":
        compare_two_tables(best_worst_report.splitlines(), answer.splitlines())
    else:
        assert best_worst_report == answer

    report = explain_predictions(pipeline,
                                 input_features,
                                 output_format=output_format)
    if output_format == "text":
        compare_two_tables(report.splitlines(),
                           explain_predictions_answer.splitlines())
    else:
        assert report == explain_predictions_answer