def evaluate(params, X, y): # Initilize instance of estimator est = LGBMRegressor(boosting='gbdt', n_jobs=-1, random_state=2018) # Set params est.set_params(**params) # Calc CV score scores = cross_val_score(estimator=est, X=X, y=y, scoring='r2', cv=4) score = np.mean(scores) return score
rstate=np.random.RandomState(seed=2018)) # Print best parameters best_params = space_eval(hyper_space, best_vals) print("BEST PARAMETERS: " + str(best_params)) # Print best CV score scores = [-trial['result']['loss'] for trial in trials.trials] print("BEST CV SCORE: " + str(np.max(scores))) # Print execution time tdiff = trials.trials[-1]['book_time'] - trials.trials[0]['book_time'] print("ELAPSED TIME: " + str(tdiff.total_seconds() / 60)) # Set params est.set_params(**best_params) # Fit est.fit(X_train, y_train) y_pred = est.predict(X_test) # Predict score = r2_score(y_test, y_pred) print("R2 SCORE ON TEST DATA: {}".format(score)) #============================================================================== # Tree structure of hyperparameter space (Optional) #============================================================================== # You must change the evaluate function in order to extract learning rate # and n_estimators from choices. Please add the following code to the start of # evaluate function
class LightGBM(BaseModel): """XGBoost Class.""" def __init__(self, tuning_metric='mse', trials='trials', bottom_coding=None, transform=None, **kwargs): """Initialize hyperparameters.""" super(LightGBM, self).__init__(bottom_coding=bottom_coding, transform=transform) self.model = LGBMRegressor self.tuning_metric = tuning_metric self.trials = Trials() \ if trials == 'trials' \ else MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp1') self.set_parameters() def set_parameters(self): """Set the model hyperparameter sweep.""" self.space = { "objective": self.tuning_metric, "device": "gpu", 'min_data_in_leaf': hp.choice('min_data_in_leaf', [100, 1000, 300]), 'boosting_type': hp.choice('boosting_type', ['gbdt']), 'num_leaves': scope.int(hp.quniform('num_leaves', 30, 250, 1)), 'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)), 'subsample_for_bin': scope.int(hp.quniform('subsample_for_bin', 20000, 300000, 20000)), 'reg_alpha': hp.uniform('reg_alpha', 0.0, 1.0), 'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0), 'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0) } def tune(self, training_set, logger=None, saver=None): self.training_set = training_set objective = generate_objective(self.training_set, self.tuning_metric) best = space_eval( self.space, fmin(fn=objective, space=self.space, trials=self.trials, algo=tpe.suggest, max_evals=self.max_evals)) print(f'Search space: {self.space}') print(f'Best hyperparams: {best}') self.model = LGBMRegressor() self.model.set_params(**best) self.model.fit(training_set.X, training_set.y) def instantiate_model(self, params): model = LGBMRegressor() model.set_params(**params) return model
min_child_weight=0.001, min_child_samples=20, min_split_gain=0.1, subsample=0.8, colsample_bytree=0.8, objective= 'binary', random_state=7) lgbm_param = lgbm_model.get_params() lgbm_train = lgb.Dataset(X,Y) '''使用交叉验证的方式确定最优的树数量''' cvresult = lgb.cv(lgbm_param, lgbm_train, num_boost_round=lgbm_param['n_estimators'],nfold=5,metrics='auc',early_stopping_rounds=100) best_n_estimators=len(cvresult['auc-mean']) lgbm_model.set_params(n_estimators=best_n_estimators) lgbm_model.fit(X,Y,eval_metric='auc') feat_imp = pd.Series(lgbm_model.feature_importances_,index=X.columns) feat_imp=feat_imp.sort_values(ascending=False) valid_feature_num=len(np.where(feat_imp>0)[0]) #有效变量是有feature_importance的变量(在lgbm树模型中有贡献的变量,其他的变量没有用到) print(valid_feature_num) # In[74]: '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ''''''''' LGB调优feature_num '''''''''''
def instantiate_model(self, params): model = LGBMRegressor() model.set_params(**params) return model