def test_raises_error_if_df_contains_na(df_enc_big, df_enc_big_na): # test case 4: when dataset contains na, fit method with pytest.raises(ValueError): encoder = OneHotEncoder() encoder.fit(df_enc_big_na) # test case 4: when dataset contains na, transform method with pytest.raises(ValueError): encoder = OneHotEncoder() encoder.fit(df_enc_big) encoder.transform(df_enc_big_na)
security_doors = word_convert( st.selectbox('Do you want security doors ?', ('Yes', 'No'))) cctv = word_convert( st.selectbox('Do you want CCTV surveillance ?', ('Yes', 'No'))) bq = word_convert(st.selectbox('Do you want Boys Quarters ?', ('Yes', 'No'))) gym = word_convert(st.selectbox('Do you need gym facilities ?', ('Yes', 'No'))) pool = word_convert(st.selectbox('Do you need swimming pool ?', ('Yes', 'No'))) # Modeling step # Encoding Step encode = OneHotEncoder() target = data['Price'] features = data.drop('Price', 1) encode.fit(features) features = encode.transform(features) # Getting the target and features variables # print(data.head()) X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=0) # Creating the algorithm class model = RandomForestRegressor() # Creating algorithm object model.fit(X_train, y_train) # Predicted values estimate = pd.DataFrame(
num_features = df.select_dtypes(include=['int64', 'float64']).drop( 'Survived', axis=1).columns num_features # %% cat_features = df.select_dtypes(include=['category', 'object']).columns cat_features #%% features = df.drop('Survived', axis=1).columns.to_list() features # %% onehot = OneHotEncoder(variables=['Pclass', 'Sex', 'Embarked'], drop_last=False) # %% onehot.fit(df[features]) onehot.transform(df[features]).head() # %% X = onehot.transform(df[features]) y = df['Survived'] print(X.shape) # %% # Separate into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) logistic_model = LogisticRegression(penalty='l2', C=1.0, solver='liblinear').fit(X_train, y_train) print(logistic_model) # %%
def encorder(self, y): """Y dataframe""" encode = OneHotEncoder() encode.fit(y) return encode.transform(y)