コード例 #1
0
def test_automatically_find_variables_and_gaussian_imputation_on_right_tail(
        df_na):
    # set up transformer
    imputer = EndTailImputer(imputation_method="gaussian",
                             tail="right",
                             fold=3,
                             variables=None)
    X_transformed = imputer.fit_transform(df_na)

    # set up expected output
    X_reference = df_na.copy()
    X_reference["Age"] = X_reference["Age"].fillna(58.94908118478389)
    X_reference["Marks"] = X_reference["Marks"].fillna(1.3244261503263175)

    # test init params
    assert imputer.imputation_method == "gaussian"
    assert imputer.tail == "right"
    assert imputer.fold == 3
    assert imputer.variables is None
    # test fit attr
    assert imputer.variables_ == ["Age", "Marks"]
    assert imputer.n_features_in_ == 6
    imputer.imputer_dict_ = {
        key: round(value, 3)
        for (key, value) in imputer.imputer_dict_.items()
    }
    assert imputer.imputer_dict_ == {
        "Age": 58.949,
        "Marks": 1.324,
    }
    # transform output: indicated vars ==> no NA, not indicated vars with NA
    assert X_transformed[["Age", "Marks"]].isnull().sum().sum() == 0
    assert X_transformed[["City", "Name"]].isnull().sum().sum() > 0
    pd.testing.assert_frame_equal(X_transformed, X_reference)
コード例 #2
0
def test_user_enters_variables_and_max_value_imputation(df_na):
    imputer = EndTailImputer(imputation_method="max",
                             tail="right",
                             fold=2,
                             variables=["Age", "Marks"])
    imputer.fit(df_na)
    assert imputer.imputer_dict_ == {"Age": 82.0, "Marks": 1.8}
コード例 #3
0
def test_user_enters_variables_and_iqr_imputation_on_left_tail(df_na):
    # test case 5: IQR + left tail
    imputer = EndTailImputer(imputation_method="iqr",
                             tail="left",
                             fold=1.5,
                             variables=["Age", "Marks"])
    imputer.fit(df_na)
    assert imputer.imputer_dict_ == {"Age": -6.5, "Marks": 0.36249999999999993}
コード例 #4
0
def test_automatically_select_variables_and_gaussian_imputation_on_left_tail(
        df_na):
    imputer = EndTailImputer(imputation_method="gaussian", tail="left", fold=3)
    imputer.fit(df_na)
    assert imputer.imputer_dict_ == {
        "Age": -1.520509756212462,
        "Marks": 0.04224051634034898,
    }
コード例 #5
0
def test_automatically_select_variables_and_gaussian_imputation_on_left_tail(
        df_na):
    imputer = EndTailImputer(imputation_method="gaussian", tail="left", fold=3)
    imputer.fit(df_na)
    imputer.imputer_dict_ = {
        key: round(value, 3)
        for (key, value) in imputer.imputer_dict_.items()
    }
    assert imputer.imputer_dict_ == {
        "Age": -1.521,
        "Marks": 0.042,
    }
コード例 #6
0
def test_user_enters_variables_and_iqr_imputation_on_right_tail(df_na):
    # set up transformer
    imputer = EndTailImputer(imputation_method="iqr",
                             tail="right",
                             fold=1.5,
                             variables=["Age", "Marks"])
    X_transformed = imputer.fit_transform(df_na)

    # set up expected result
    X_reference = df_na.copy()
    X_reference["Age"] = X_reference["Age"].fillna(65.5)
    X_reference["Marks"] = X_reference["Marks"].fillna(1.0625)

    # test fit  and transform attr and output
    assert imputer.imputer_dict_ == {"Age": 65.5, "Marks": 1.0625}
    assert X_transformed[["Age", "Marks"]].isnull().sum().sum() == 0
    pd.testing.assert_frame_equal(X_transformed, X_reference)
コード例 #7
0
def test_error_when_imputation_method_is_not_permitted():
    with pytest.raises(ValueError):
        EndTailImputer(imputation_method="arbitrary")
コード例 #8
0
def test_non_fitted_error(df_na):
    with pytest.raises(NotFittedError):
        imputer = EndTailImputer()
        imputer.transform(df_na)
コード例 #9
0
def test_error_when_fold_is_1():
    with pytest.raises(ValueError):
        EndTailImputer(fold=-1)
コード例 #10
0
def test_error_when_tail_is_string():
    with pytest.raises(ValueError):
        EndTailImputer(tail="arbitrary")
from feature_engine.transformation import (
    BoxCoxTransformer,
    LogTransformer,
    PowerTransformer,
    ReciprocalTransformer,
    YeoJohnsonTransformer,
)
from feature_engine.wrappers import SklearnTransformerWrapper


# imputation
@parametrize_with_checks([
    MeanMedianImputer(),
    ArbitraryNumberImputer(),
    CategoricalImputer(fill_value=0, ignore_format=True),
    EndTailImputer(),
    AddMissingIndicator(),
    RandomSampleImputer(),
    DropMissingData(),
])
def test_sklearn_compatible_imputer(estimator, check):
    check(estimator)


# encoding
@parametrize_with_checks([
    CountFrequencyEncoder(ignore_format=True),
    DecisionTreeEncoder(regression=False, ignore_format=True),
    MeanEncoder(ignore_format=True),
    OneHotEncoder(ignore_format=True),
    OrdinalEncoder(ignore_format=True),