コード例 #1
0
def test_target_imputer_woodwork_custom_overrides_returned_by_components(
        y_pd, has_nan, impute_strategy):
    y_to_use = y_pd.copy()
    if has_nan:
        y_to_use[len(y_pd) - 1] = np.nan
    override_types = [Integer, Double, Categorical, Boolean]
    for logical_type in override_types:
        try:
            y = ww.DataColumn(y_to_use.copy(), logical_type=logical_type)
        except TypeError:
            continue

        impute_strategy_to_use = impute_strategy
        if logical_type in [Categorical, NaturalLanguage]:
            impute_strategy_to_use = "most_frequent"

        imputer = TargetImputer(impute_strategy=impute_strategy_to_use)
        imputer.fit(None, y)
        _, y_t = imputer.transform(None, y)
        assert isinstance(y_t, ww.DataColumn)

        if impute_strategy_to_use == "most_frequent" or not has_nan:
            assert y_t.logical_type == logical_type
        else:
            assert y_t.logical_type == Double
コード例 #2
0
def test_target_imputer_all_bool_return_original(data_type, make_data_type):
    y = pd.Series([True, True, False, True, True], dtype=bool)
    y = make_data_type(data_type, y)
    y_expected = pd.Series([True, True, False, True, True], dtype='boolean')
    imputer = TargetImputer()
    imputer.fit(None, y)
    _, y_t = imputer.transform(None, y)
    assert_series_equal(y_expected, y_t.to_series())
コード例 #3
0
def test_target_imputer_no_y(X_y_binary):
    X, y = X_y_binary
    imputer = TargetImputer()
    assert imputer.fit_transform(None, None) == (None, None)

    imputer = TargetImputer()
    imputer.fit(None, None)
    assert imputer.transform(None, None) == (None, None)
コード例 #4
0
def test_target_imputer_boolean_dtype(data_type, make_data_type):
    y = pd.Series([True, np.nan, False, np.nan, True], dtype='boolean')
    y_expected = pd.Series([True, True, False, True, True], dtype='boolean')
    y = make_data_type(data_type, y)
    imputer = TargetImputer()
    imputer.fit(None, y)
    _, y_t = imputer.transform(None, y)
    assert_series_equal(y_expected, y_t.to_series())
コード例 #5
0
def test_target_imputer_fit_transform_all_nan_empty():
    y = pd.Series([np.nan, np.nan])

    imputer = TargetImputer()
    imputer.fit(None, y)
    with pytest.raises(RuntimeError, match="Transformed data is empty"):
        imputer.transform(None, y)

    imputer = TargetImputer()
    with pytest.raises(RuntimeError, match="Transformed data is empty"):
        imputer.fit_transform(None, y)
コード例 #6
0
def test_target_imputer_col_with_non_numeric_with_numeric_strategy():
    y = pd.Series([np.nan, "a", "b"])
    imputer = TargetImputer(impute_strategy='mean')
    with pytest.raises(ValueError,
                       match="Cannot use mean strategy with non-numeric data"):
        imputer.fit_transform(None, y)
    with pytest.raises(ValueError,
                       match="Cannot use mean strategy with non-numeric data"):
        imputer.fit(None, y)
    imputer = TargetImputer(impute_strategy='median')
    with pytest.raises(
            ValueError,
            match="Cannot use median strategy with non-numeric data"):
        imputer.fit_transform(None, y)
    with pytest.raises(
            ValueError,
            match="Cannot use median strategy with non-numeric data"):
        imputer.fit(None, y)
コード例 #7
0
def test_target_imputer_does_not_reset_index():
    y = pd.Series(np.arange(10))
    y[5] = np.nan
    assert y.index.tolist() == list(range(10))

    y.drop(0, inplace=True)
    pd.testing.assert_series_equal(
        pd.Series([1, 2, 3, 4, np.nan, 6, 7, 8, 9],
                  dtype=float,
                  index=list(range(1, 10))), y)

    imputer = TargetImputer(impute_strategy="mean")
    imputer.fit(None, y=y)
    _, y_t = imputer.transform(None, y)
    pd.testing.assert_series_equal(
        pd.Series([1.0, 2, 3, 4, 5, 6, 7, 8, 9],
                  dtype=float,
                  index=list(range(1, 10))), y_t.to_series())