def lgb_cv_tuning( dataset_x: pd.DataFrame, dataset_y: pd.DataFrame, cv_index: List[Tuple[pd.RangeIndex]], ) -> Tuple[lgb.Booster, dict]: train_dataset = lgb.Dataset(data=dataset_x, label=dataset_y) lgbtuner = LightGBMTunerCV( params={ "objective": "regression", "metric": "rmse" }, train_set=train_dataset, folds=cv_index, nfold=5, verbose_eval=False, num_boost_round=10000, early_stopping_rounds=100, return_cvbooster=True, ) lgbtuner.run() models = lgbtuner.get_best_booster().boosters best_params = lgbtuner.best_params print(" Params: ") for key, value in best_params.items(): print(" {}: {}".format(key, value)) return models, best_params
def optimize(self, dtrain: lgbDataset, deval: lgbDataset): # Define the base parameters if self.objective == "binary": params: Dict = {"objective": self.objective} elif self.objective == "multiclass": params: Dict = { "objective": self.objective, "metric": "multi_logloss" } elif self.objective == "regression": params: Dict = {"objective": self.objective, "metric": "rmse"} if self.verbose: params["verbosity"] = 1 else: params["verbosity"] = -1 if self.objective != "regression": params["is_unbalance"] = self.is_unbalance if self.objective == "multiclass": params["num_class"] = self.num_class # Reformat the data for LightGBM cross validation method train_set = lgb.Dataset( data=pd.concat([dtrain.data, deval.data]).reset_index(drop=True), label=pd.concat([dtrain.label, deval.label]).reset_index(drop=True), categorical_feature=dtrain.categorical_feature, free_raw_data=False, ) train_index = range(len(dtrain.data)) valid_index = range(len(dtrain.data), len(train_set.data)) # Run the hyper-parameter tuning self.tuner = LightGBMTunerCV( params=params, train_set=train_set, folds=[(train_index, valid_index)], verbose_eval=False, num_boost_round=1000, early_stopping_rounds=50, ) self.tuner.run() self.best = self.tuner.best_params # since n_estimators is not among the params that Optuna optimizes we # need to add it manually. We add a high value since it will be used # with early_stopping_rounds self.best["n_estimators"] = 1000 # type: ignore
class LGBOptimizerOptuna(object): def __init__( self, objective: str = "binary", verbose: bool = False, ): self.objective = objective self.verbose = verbose self.best: Dict[str, Any] = {} # Best hyper-parameters def optimize(self, dtrain: lgbDataset, deval: lgbDataset): # Define the base parameters params: Dict = {"objective": self.objective} # , "metric": "rmse"} if self.verbose: params["verbosity"] = 1 else: params["verbosity"] = -1 # Reformat the data for LightGBM cross validation method train_set = lgb.Dataset( data=pd.concat([dtrain.data, deval.data]).reset_index(drop=True), label=pd.concat([dtrain.label, deval.label]).reset_index(drop=True), categorical_feature=dtrain.categorical_feature, free_raw_data=False, ) train_index = range(len(dtrain.data)) valid_index = range(len(dtrain.data), len(train_set.data)) # Run the hyper-parameter tuning self.tuner = LightGBMTunerCV( params=params, train_set=train_set, folds=[(train_index, valid_index)], verbose_eval=False, num_boost_round=1000, early_stopping_rounds=50, ) self.tuner.run() self.best = self.tuner.best_params # since n_estimators is not among the params that Optuna optimizes we # need to add it manually. We add a high value since it will be used # with early_stopping_rounds self.best["n_estimators"] = 1000 # type: ignore