def test_fast_permutation_importance_matches_sklearn_output(
        mock_supports_fast_importance, pipeline_class, parameters,
        has_minimal_dependencies):
    if has_minimal_dependencies and pipeline_class == LinearPipelineWithTargetEncoderAndOHE:
        pytest.skip(
            "Skipping test_fast_permutation_importance_matches_sklearn_output for target encoder cause "
            "dependency not installed.")
    X, y = load_fraud(100)

    if pipeline_class == LinearPipelineWithTextFeatures:
        X = X.set_types(logical_types={'provider': 'NaturalLanguage'})

    # Do this to make sure we use the same int as sklearn under the hood
    random_state = np.random.RandomState(0)
    random_seed = random_state.randint(np.iinfo(np.int32).max + 1)

    mock_supports_fast_importance.return_value = True
    parameters['Random Forest Classifier'] = {'n_jobs': 1}
    pipeline = pipeline_class(parameters=parameters)
    pipeline.fit(X, y)
    fast_scores = calculate_permutation_importance(pipeline,
                                                   X,
                                                   y,
                                                   objective='Log Loss Binary',
                                                   random_seed=random_seed)
    mock_supports_fast_importance.return_value = False
    slow_scores = calculate_permutation_importance(pipeline,
                                                   X,
                                                   y,
                                                   objective='Log Loss Binary',
                                                   random_seed=0)
    pd.testing.assert_frame_equal(fast_scores, slow_scores)
def test_get_permutation_importance_invalid_objective(
        X_y_regression, linear_regression_pipeline_class):
    X, y = X_y_regression
    pipeline = linear_regression_pipeline_class(parameters={}, random_seed=42)
    with pytest.raises(
            ValueError,
            match=
            f"Given objective 'MCC Multiclass' cannot be used with '{pipeline.name}'"
    ):
        calculate_permutation_importance(pipeline, X, y, "mcc multiclass")
def test_permutation_importance_raises_deprecated_random_state_warning(
        mock_fast_permutation_importance, X_y_binary,
        logistic_regression_binary_pipeline_class):
    X, y = X_y_binary
    pipeline = logistic_regression_binary_pipeline_class(parameters={},
                                                         random_seed=2)
    with warnings.catch_warnings(record=True) as warn:
        warnings.simplefilter("always")
        calculate_permutation_importance(pipeline,
                                         X,
                                         y,
                                         objective="Log Loss Binary",
                                         random_state=15)
        _, kwargs = mock_fast_permutation_importance.call_args_list[0]
        assert kwargs['random_seed'] == 15
        assert str(warn[0].message).startswith(
            "Argument 'random_state' has been deprecated in favor of 'random_seed'"
        )
Esempio n. 4
0
def test_get_permutation_importance_multiclass(X_y_multi, logistic_regression_multiclass_pipeline_class,
                                               multiclass_core_objectives):
    X, y = X_y_multi
    pipeline = logistic_regression_multiclass_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}},
                                                             random_state=42)
    pipeline.fit(X, y)
    for objective in multiclass_core_objectives:
        permutation_importance = calculate_permutation_importance(pipeline, X, y, objective)
        assert list(permutation_importance.columns) == ["feature", "importance"]
        assert not permutation_importance.isnull().all().all()
Esempio n. 5
0
def test_get_permutation_importance_regression(linear_regression_pipeline_class, regression_core_objectives):
    X = pd.DataFrame([1, 2, 1, 2, 1, 2, 1, 2, 1, 2])
    y = pd.Series([1, 2, 1, 2, 1, 2, 1, 2, 1, 2])
    pipeline = linear_regression_pipeline_class(parameters={"Linear Regressor": {"n_jobs": 1}},
                                                random_state=42)
    pipeline.fit(X, y)

    for objective in regression_core_objectives:
        permutation_importance = calculate_permutation_importance(pipeline, X, y, objective)
        assert list(permutation_importance.columns) == ["feature", "importance"]
        assert not permutation_importance.isnull().all().all()
Esempio n. 6
0
def test_get_permutation_importance_binary(X_y_binary, data_type, logistic_regression_binary_pipeline_class,
                                           binary_core_objectives, make_data_type):
    X, y = X_y_binary
    X = make_data_type(data_type, X)
    y = make_data_type(data_type, y)

    pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}},
                                                         random_state=42)
    pipeline.fit(X, y)
    for objective in binary_core_objectives:
        permutation_importance = calculate_permutation_importance(pipeline, X, y, objective)
        assert list(permutation_importance.columns) == ["feature", "importance"]
        assert not permutation_importance.isnull().all().all()
Esempio n. 7
0
def test_get_permutation_importance_correlated_features(logistic_regression_binary_pipeline_class):
    y = pd.Series([1, 0, 1, 1])
    X = pd.DataFrame()
    X["correlated"] = y * 2
    X["not correlated"] = [-1, -1, -1, 0]
    y = y.astype(bool)
    pipeline = logistic_regression_binary_pipeline_class(parameters={}, random_state=42)
    pipeline.fit(X, y)
    importance = calculate_permutation_importance(pipeline, X, y, objective="Log Loss Binary", random_state=0)
    assert list(importance.columns) == ["feature", "importance"]
    assert not importance.isnull().all().all()
    correlated_importance_val = importance["importance"][importance.index[importance["feature"] == "correlated"][0]]
    not_correlated_importance_val = importance["importance"][importance.index[importance["feature"] == "not correlated"][0]]
    assert correlated_importance_val > not_correlated_importance_val