def test_impute_with_99_and_automatically_select_variables(df_na): # set up the transformer imputer = ArbitraryNumberImputer(arbitrary_number=99, variables=None) X_transformed = imputer.fit_transform(df_na) # set up output reference X_reference = df_na.copy() X_reference["Age"] = X_reference["Age"].fillna(99) X_reference["Marks"] = X_reference["Marks"].fillna(99) # test init params assert imputer.arbitrary_number == 99 assert imputer.variables is None # test fit attributes assert imputer.variables_ == ["Age", "Marks"] assert imputer.n_features_in_ == 6 assert imputer.imputer_dict_ == {"Age": 99, "Marks": 99} # test transform output # selected variables should not contain NA # non selected variables should still contain NA assert X_transformed[["Age", "Marks"]].isnull().sum().sum() == 0 assert X_transformed[["Name", "City"]].isnull().sum().sum() > 0 pd.testing.assert_frame_equal(X_transformed, X_reference)
def test_dictionary_of_imputation_values(df_na): # set up transformer imputer = ArbitraryNumberImputer(imputer_dict={"Age": -42, "Marks": -999}) X_transformed = imputer.fit_transform(df_na) # set up expected output X_reference = df_na.copy() X_reference["Age"] = X_reference["Age"].fillna(-42) X_reference["Marks"] = X_reference["Marks"].fillna(-999) # test fit params assert imputer.input_shape_ == (8, 6) assert imputer.imputer_dict_ == {"Age": -42, "Marks": -999} # test transform params assert X_transformed[["Age", "Marks"]].isnull().sum().sum() == 0 assert X_transformed[["Name", "City"]].isnull().sum().sum() > 0 pd.testing.assert_frame_equal(X_transformed, X_reference)
def test_impute_with_1_and_single_variable_entered_by_user(df_na): # set up transformer imputer = ArbitraryNumberImputer(arbitrary_number=-1, variables=["Age"]) X_transformed = imputer.fit_transform(df_na) # set up output reference X_reference = df_na.copy() X_reference["Age"] = X_reference["Age"].fillna(-1) # test init params assert imputer.arbitrary_number == -1 assert imputer.variables == ["Age"] # test fit attributes assert imputer.input_shape_ == (8, 6) assert imputer.imputer_dict_ == {"Age": -1} # test transform output assert X_transformed["Age"].isnull().sum() == 0 pd.testing.assert_frame_equal(X_transformed, X_reference)
def imputer_error_when_dictionary_value_is_string(): with pytest.raises(ValueError): ArbitraryNumberImputer(imputer_dict={"Age": "arbitrary_number"})
def test_non_fitted_error(df_na): with pytest.raises(NotFittedError): imputer = ArbitraryNumberImputer() imputer.transform(df_na)
def test_error_when_arbitrary_number_is_string(): with pytest.raises(ValueError): ArbitraryNumberImputer(arbitrary_number="arbitrary")
) from feature_engine.timeseries.forecasting import LagFeatures from feature_engine.transformation import ( BoxCoxTransformer, LogTransformer, PowerTransformer, ReciprocalTransformer, YeoJohnsonTransformer, ) from feature_engine.wrappers import SklearnTransformerWrapper # imputation @parametrize_with_checks([ MeanMedianImputer(), ArbitraryNumberImputer(), CategoricalImputer(fill_value=0, ignore_format=True), EndTailImputer(), AddMissingIndicator(), RandomSampleImputer(), DropMissingData(), ]) def test_sklearn_compatible_imputer(estimator, check): check(estimator) # encoding @parametrize_with_checks([ CountFrequencyEncoder(ignore_format=True), DecisionTreeEncoder(regression=False, ignore_format=True), MeanEncoder(ignore_format=True),
df_full[features] = df_full[features].dropna(how="all") print("ok.") # %% # Separa entre treinamento e teste print("Separando em base de treino e teste...") X_train, X_test, y_train, y_test = model_selection.train_test_split( df_full[features], df_full[target], random_state=42, test_size=0.1) print("ok.") # %% print("Ajustando modelo em nosso pipeline...") arbitrary_imputer = ArbitraryNumberImputer(arbitrary_number=-999, variables=features) disc = DecisionTreeDiscretiser(cv=3, scoring='roc_auc', variables=features, regression=False, random_state=42) pca = decomposition.PCA(n_components=120, random_state=42) best_pars = { 'subsample': 0.7, 'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.2 }