예제 #1
0
def test_error_if_df_contains_negative_values(df_vartypes):
    # test error when data contains negative values
    df_neg = df_vartypes.copy()
    df_neg.loc[1, "Age"] = -1

    # test case 5: when variable contains negative value, fit
    with pytest.raises(ValueError):
        transformer = LogTransformer()
        transformer.fit(df_neg)

    # test case 6: when variable contains negative value, transform
    with pytest.raises(ValueError):
        transformer = LogTransformer()
        transformer.fit(df_vartypes)
        transformer.transform(df_neg)
예제 #2
0
def test_non_fitted_error(df_vartypes):
    with pytest.raises(NotFittedError):
        transformer = LogTransformer()
        transformer.transform(df_vartypes)
예제 #3
0
def test_transform_raises_error_if_na_in_df(df_vartypes, df_na):
    # test case 4: when dataset contains na, transform method
    with pytest.raises(ValueError):
        transformer = LogTransformer()
        transformer.fit(df_vartypes)
        transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
예제 #4
0
print(mean_imputer.imputer_dict_)

X_train = mean_imputer.transform(X_train)
X_test = mean_imputer.transform(X_test)

# %% Varief whether there are missing value.
X_train[cat_vars_with_na].isnull().sum()
[var for var in cat_vars_with_na if X_test[var].isnull().sum() > 0]

#%% Temporal variables.
def elapsed_years(df, var):
    df[var] = df['YrSold'] - df[var]
    return df
for var in ['YearBuilt', 'YearRemodAdd', 'GarageYrBlt']:
    X_train = elapsed_years(X_train, var)
    X_test = elapsed_years(X_test, var)

# now we drop YrSold.
drop_features = DropFeatures(features_to_drop=['YrSold'])
X_train = mean_imputer.fit_transform(X_train)
X_test = mean_imputer.transform(X_test)

# %% Numerical variable -- transformation.
log_transformer = LogTransformer(
    variables=["LotFrontage", "1stFlrSF", "GrLivArea"],
)
X_train = log_transformer.fit_transform(X_train)
X_test = log_transformer.transform(X_test)

# check that test set does not contain null values in the engineered variables
[var for var in ["LotFrontage", "1stFlrSF", "GrLivArea"] if X_test[var].isnull().sum() > 0]