def test_error_if_capping_method_quantiles_and_fold_value_not_permitted():
    with pytest.raises(ValueError):
        Winsorizer(capping_method="quantiles", fold=0.3)
def test_error_if_missing_values_not_permited():
    with pytest.raises(ValueError):
        Winsorizer(missing_values="other")
def test_error_if_fold_value_not_permitted():
    with pytest.raises(ValueError):
        Winsorizer(fold=-1)
def test_error_if_capping_method_not_permitted():
    # test error raises
    with pytest.raises(ValueError):
        Winsorizer(capping_method="other")
def test_error_if_tail_value_not_permitted():
    with pytest.raises(ValueError):
        Winsorizer(tail="other")
def test_get_feature_names_out_input_features_is_list(df_na):
    input_features = ["Age", "Marks"]

    # when add_indicators is false, we've got the generic check from estimator_checks.
    # We need to test only when true.
    tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_left" for f in input_features]
    assert tr.get_feature_names_out(input_features) == input_features + out

    tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_right" for f in input_features]
    assert tr.get_feature_names_out(input_features) == input_features + out

    tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = ["Age_left", "Age_right", "Marks_left", "Marks_right"]
    assert tr.get_feature_names_out(input_features) == input_features + out
        n_categories=100000000000,
        replace_with=10,
        ignore_format=True,
    ),
    WoEEncoder(ignore_format=True),
    PRatioEncoder(ignore_format=True),
])
def test_sklearn_compatible_encoder(estimator, check):
    check(estimator)


# outliers
@parametrize_with_checks([
    ArbitraryOutlierCapper(max_capping_dict={"0": 10}),
    OutlierTrimmer(),
    Winsorizer(),
])
def test_sklearn_compatible_outliers(estimator, check):
    check(estimator)


# transformers
@parametrize_with_checks([
    BoxCoxTransformer(),
    LogTransformer(),
    PowerTransformer(),
    ReciprocalTransformer(),
    YeoJohnsonTransformer(),
])
def test_sklearn_compatible_transformer(estimator, check):
    check(estimator)
def test_get_feature_names_out_input_features_is_none(df_na):
    original_features = df_na.columns.to_list()
    input_features = ["Age", "Marks"]

    # when indicators is false, we've got the generic check.
    # We need to test only when true
    tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_left" for f in input_features]
    assert tr.get_feature_names_out() == original_features + out

    tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = [f + "_right" for f in input_features]
    assert tr.get_feature_names_out() == original_features + out

    tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore")
    tr.fit(df_na)

    out = ["Age_left", "Age_right", "Marks_left", "Marks_right"]
    assert tr.get_feature_names_out() == original_features + out
def test_transform_raises_error_if_na_in_input_df(df_vartypes, df_na):
    # test case 9: when dataset contains na, transform method
    with pytest.raises(ValueError):
        transformer = Winsorizer()
        transformer.fit(df_vartypes)
        transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
def test_fit_raises_error_if_na_in_inut_df(df_na):
    # test case 8: when dataset contains na, fit method
    with pytest.raises(ValueError):
        transformer = Winsorizer()
        transformer.fit(df_na)
Exemple #11
0
                 color="frequency")

fig.update_layout(
    title="Segmentation",
    width=700,
    height=500,
)

fig.show()

df.drop("Customer", axis=1, inplace=True)
df.drop("Effective To Date", axis=1, inplace=True)

wind = Winsorizer(
    capping_method='iqr',
    tail='both',
    fold=1.5,
    variables=['Customer Lifetime Value', 'Income', 'Total Claim Amount'])

wind.fit(df)
df = wind.transform(df)

dummylist = []

dummy_variables = [
    "State", "Response", "Coverage", "Education", "EmploymentStatus", "Gender",
    "Location Code", "Policy Type", "Policy", "Renew Offer Type",
    "Sales Channel", "Vehicle Class", "Vehicle Size", "Marital Status"
]
for var in dummy_variables:
    dummylist.append(
Exemple #12
0
def test_non_fitted_error(df_vartypes):
    with pytest.raises(NotFittedError):
        transformer = Winsorizer()
        transformer.transform(df_vartypes)