def test_error_if_df_contains_negative_values(df_vartypes): # test error when data contains negative values df_neg = df_vartypes.copy() df_neg.loc[1, "Age"] = -1 # test case 5: when variable contains negative value, fit with pytest.raises(ValueError): transformer = LogTransformer() transformer.fit(df_neg) # test case 6: when variable contains negative value, transform with pytest.raises(ValueError): transformer = LogTransformer() transformer.fit(df_vartypes) transformer.transform(df_neg)
def test_non_fitted_error(df_vartypes): with pytest.raises(NotFittedError): transformer = LogTransformer() transformer.transform(df_vartypes)
def test_transform_raises_error_if_na_in_df(df_vartypes, df_na): # test case 4: when dataset contains na, transform method with pytest.raises(ValueError): transformer = LogTransformer() transformer.fit(df_vartypes) transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
print(mean_imputer.imputer_dict_) X_train = mean_imputer.transform(X_train) X_test = mean_imputer.transform(X_test) # %% Varief whether there are missing value. X_train[cat_vars_with_na].isnull().sum() [var for var in cat_vars_with_na if X_test[var].isnull().sum() > 0] #%% Temporal variables. def elapsed_years(df, var): df[var] = df['YrSold'] - df[var] return df for var in ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']: X_train = elapsed_years(X_train, var) X_test = elapsed_years(X_test, var) # now we drop YrSold. drop_features = DropFeatures(features_to_drop=['YrSold']) X_train = mean_imputer.fit_transform(X_train) X_test = mean_imputer.transform(X_test) # %% Numerical variable -- transformation. log_transformer = LogTransformer( variables=["LotFrontage", "1stFlrSF", "GrLivArea"], ) X_train = log_transformer.fit_transform(X_train) X_test = log_transformer.transform(X_test) # check that test set does not contain null values in the engineered variables [var for var in ["LotFrontage", "1stFlrSF", "GrLivArea"] if X_test[var].isnull().sum() > 0]