def grid_search(parameters, X_train_res, y_train_res, X_test, y_test, useTrainCV=False): xgbmodel = XGBRegressor() kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=10) grid_search_xg = GridSearchCV(xgbmodel, parameters, scoring='roc_auc', n_jobs=1, cv=kfold, verbose=1) result_gcv_xgb = grid_search_xg.fit(X_train_res, y_train_res) best_params = result_gcv_xgb.best_params_ print("Best params: %s" % (best_params)) # rebuild using best params xg_reg = XGBRegressor(objective=best_params['objective'], learning_rate=best_params['learning_rate'], max_depth=best_params['max_depth'], n_estimators=best_params['n_estimators'], min_child_weight=best_params['min_child_weight'], gamma=best_params['gamma'], colsample_bytree=best_params['colsample_bytree'], subsample=best_params['subsample'], reg_alpha=best_params['reg_alpha']) if useTrainCV: xgb_param = xg_reg.get_xgb_params() xgtrain = DMatrix(X_train_res, label=y_train_res) cvresult = cv(xgb_param, xgtrain, num_boost_round=xg_reg.get_params()['n_estimators'], folds=kfold, metrics='auc', early_stopping_rounds=20) xg_reg.set_params(n_estimators=cvresult.shape[0]) print("Best number of estimators: %i" % (cvresult.shape[0])) eval_set = [(X_test, y_test)] xg_reg.fit(X_train_res, y_train_res, eval_metric="error", eval_set=eval_set, verbose=False) y_pred_train = xg_reg.predict(X_train_res) #print("Accuracy train: %f" % (accuracy_score(y_train_res, y_pred_train))) #print("Recall train: %f" % (recall_score(y_train_res, y_pred_train))) #print("Precision train: %f" % (precision_score(y_train_res, y_pred_train))) print("AUC train: %f" % (roc_auc_score(y_train_res, y_pred_train))) y_pred = xg_reg.predict(X_test) #print("Accuracy test: %f" % (accuracy_score(y_test, y_pred))) #print("Recall test: %f" % (recall_score(y_test, y_pred))) #print("Precision test: %f" % (precision_score(y_test, y_pred))) print("AUC test: %f" % (roc_auc_score(y_test, y_pred)))
warnings.filterwarnings('ignore') xgtrain = xgb.DMatrix(train_df, label=target.values) #finding numof boosting rounds and learning rate alg = XGBRegressor( learning_rate =0.1, n_estimators=1000, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8, objective= 'reg:squarederror', seed=27) xgb_param = alg.get_xgb_params() cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=5,metrics='rmse', early_stopping_rounds=50) n_estimators = cvresult.shape[0] param_test1 = { 'max_depth':range(3,10,2), 'min_child_weight':range(1,6,2) } gsearch1 = GridSearchCV(estimator = XGBRegressor( learning_rate =0.1, n_estimators=n_estimators, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8, objective= 'reg:squarederror', nthread=4, seed=27), param_grid = param_test1, scoring=make_scorer(mean_squared_error),n_jobs=4,iid=False, cv=5) gsearch1.fit(train_df,target)
x_train = df[list(features)].values y_train = df["SPEED_AVG"].values gb = XGBRegressor(learning_rate=0.1, n_estimators=750, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8, objective='reg:gamma', nthread=4, scale_pos_weight=1, seed=27) xgb_param = gb.get_xgb_params() xgtrain = xgb.DMatrix(df[features].values, label=df['SPEED_AVG'].values) cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=gb.get_params()['n_estimators'], nfold=10, metrics='mae', early_stopping_rounds=50) gb.set_params(n_estimators=cvresult.shape[0]) gb.fit(x_train, y_train, eval_metric='mae') def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100