def AB_random_search(X_train, y_train): model = AdaBoostRegressor(random_state=0) params = { 'n_estimators': np.arange(10, 500, 10), 'learning_rate': np.arange(0.005, 0.5, 0.001), } cv_inner = KFold(n_splits=3, shuffle=True, random_state=1) search = RandomizedSearchCV(model, params, cv=cv_inner, scoring='neg_mean_absolute_error', verbose=0, n_jobs=-1, n_iter=100, refit=False, random_state=999) search.fit(X_train, y_train) model.set_params(**search.best_params_) print(search.best_params_) return model
def ab_cv(cv_outer, data): MAE_results = [] RMSE_results = [] MedAE_results = [] r2_results = [] model_params = [] for train_index, test_index in cv_outer: X_train, y_train = data.iloc[train_index, 6:].values, data.iloc[train_index, 3].values X_test, y_test = data.iloc[test_index, 6:].values, data.iloc[test_index, 3].values cv_inner = KFold(n_splits=3, shuffle=True, random_state=1) model = AdaBoostRegressor(random_state=0) params = { 'n_estimators': np.arange(10, 500, 10), 'learning_rate': np.arange(0.005, 0.5, 0.001), } search = RandomizedSearchCV(model, params, cv=cv_inner, scoring='neg_mean_absolute_error', verbose=0, n_jobs=-1, n_iter=100, refit=False, random_state=999) search.fit(X_train, y_train) model_params.append(search.best_params_) model.set_params(**search.best_params_) model.fit(X_train, y_train) print(search.best_params_) y_pred = model.predict(X_test) mae = MAE(y_test, y_pred) MAE_results.append(mae) rmse = mean_squared_error(y_test, y_pred, squared=False) RMSE_results.append(rmse) med = median_absolute_error(y_test, y_pred) MedAE_results.append(med) r2 = r2_score(y_test, y_pred) r2_results.append(r2) return MAE_results, RMSE_results, MedAE_results, r2_results, model_params
def train_and_save_final_model(X, y, X_train, y_train, params, save_model_file_path, test_data): adbr = AdaBoostRegressor(random_state=0) adbr.set_params(**params) if test_data == None: adbr.fit(X_train, y_train) else: adbr.fit(X, y) #save model model_file_path = save_model_file_path + 'adbr.sav' pickle.dump(adbr, open(model_file_path, 'wb'))
def predict_validation(model_params, X_train, y_train, X_valid, y_valid): predictions = [] for param in model_params: model = AdaBoostRegressor(random_state=0) model.set_params(**param) model.fit(X_train, y_train) y_pred = model.predict(X_valid) predictions.append(y_pred) y_hat = list(map(lambda x: sum(x) / len(x), np.array(predictions).T)) return y_hat