def test_error_if_capping_method_quantiles_and_fold_value_not_permitted(): with pytest.raises(ValueError): Winsorizer(capping_method="quantiles", fold=0.3)
def test_error_if_missing_values_not_permited(): with pytest.raises(ValueError): Winsorizer(missing_values="other")
def test_error_if_fold_value_not_permitted(): with pytest.raises(ValueError): Winsorizer(fold=-1)
def test_error_if_capping_method_not_permitted(): # test error raises with pytest.raises(ValueError): Winsorizer(capping_method="other")
def test_error_if_tail_value_not_permitted(): with pytest.raises(ValueError): Winsorizer(tail="other")
def test_get_feature_names_out_input_features_is_list(df_na): input_features = ["Age", "Marks"] # when add_indicators is false, we've got the generic check from estimator_checks. # We need to test only when true. tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_left" for f in input_features] assert tr.get_feature_names_out(input_features) == input_features + out tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_right" for f in input_features] assert tr.get_feature_names_out(input_features) == input_features + out tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = ["Age_left", "Age_right", "Marks_left", "Marks_right"] assert tr.get_feature_names_out(input_features) == input_features + out
n_categories=100000000000, replace_with=10, ignore_format=True, ), WoEEncoder(ignore_format=True), PRatioEncoder(ignore_format=True), ]) def test_sklearn_compatible_encoder(estimator, check): check(estimator) # outliers @parametrize_with_checks([ ArbitraryOutlierCapper(max_capping_dict={"0": 10}), OutlierTrimmer(), Winsorizer(), ]) def test_sklearn_compatible_outliers(estimator, check): check(estimator) # transformers @parametrize_with_checks([ BoxCoxTransformer(), LogTransformer(), PowerTransformer(), ReciprocalTransformer(), YeoJohnsonTransformer(), ]) def test_sklearn_compatible_transformer(estimator, check): check(estimator)
def test_get_feature_names_out_input_features_is_none(df_na): original_features = df_na.columns.to_list() input_features = ["Age", "Marks"] # when indicators is false, we've got the generic check. # We need to test only when true tr = Winsorizer(tail="left", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_left" for f in input_features] assert tr.get_feature_names_out() == original_features + out tr = Winsorizer(tail="right", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = [f + "_right" for f in input_features] assert tr.get_feature_names_out() == original_features + out tr = Winsorizer(tail="both", add_indicators=True, missing_values="ignore") tr.fit(df_na) out = ["Age_left", "Age_right", "Marks_left", "Marks_right"] assert tr.get_feature_names_out() == original_features + out
def test_transform_raises_error_if_na_in_input_df(df_vartypes, df_na): # test case 9: when dataset contains na, transform method with pytest.raises(ValueError): transformer = Winsorizer() transformer.fit(df_vartypes) transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
def test_fit_raises_error_if_na_in_inut_df(df_na): # test case 8: when dataset contains na, fit method with pytest.raises(ValueError): transformer = Winsorizer() transformer.fit(df_na)
color="frequency") fig.update_layout( title="Segmentation", width=700, height=500, ) fig.show() df.drop("Customer", axis=1, inplace=True) df.drop("Effective To Date", axis=1, inplace=True) wind = Winsorizer( capping_method='iqr', tail='both', fold=1.5, variables=['Customer Lifetime Value', 'Income', 'Total Claim Amount']) wind.fit(df) df = wind.transform(df) dummylist = [] dummy_variables = [ "State", "Response", "Coverage", "Education", "EmploymentStatus", "Gender", "Location Code", "Policy Type", "Policy", "Renew Offer Type", "Sales Channel", "Vehicle Class", "Vehicle Size", "Marital Status" ] for var in dummy_variables: dummylist.append(
def test_non_fitted_error(df_vartypes): with pytest.raises(NotFittedError): transformer = Winsorizer() transformer.transform(df_vartypes)