def test_tune_best_score_reproducibility(self) -> None: boston = sklearn.datasets.load_boston() X_trainval, X_test, y_trainval, y_test = train_test_split( boston.data, boston.target, random_state=0) train = lgb.Dataset(X_trainval, y_trainval) valid = lgb.Dataset(X_test, y_test) params = { "objective": "regression", "metric": "rmse", "random_seed": 0 } tuner_first_try = lgb.LightGBMTuner( params, train, valid_sets=valid, early_stopping_rounds=3, optuna_seed=10, ) tuner_first_try.run() best_score_first_try = tuner_first_try.best_score tuner_second_try = lgb.LightGBMTuner( params, train, valid_sets=valid, early_stopping_rounds=3, optuna_seed=10, ) tuner_second_try.run() best_score_second_try = tuner_second_try.best_score assert best_score_second_try == best_score_first_try
def test_tune_num_leaves_negative_max_depth(self): # type: () -> None params = { "metric": "binary_logloss", "max_depth": -1, } # type: Dict[str, Any] X_trn = np.random.uniform(10, size=(10, 5)) y_trn = np.random.randint(2, size=10) train_dataset = lgb.Dataset(X_trn, label=y_trn) valid_dataset = lgb.Dataset(X_trn, label=y_trn) tuning_history = [] # type: List[Dict[str, float]] with warnings.catch_warnings(): warnings.simplefilter("ignore", category=DeprecationWarning) runner = lgb.LightGBMTuner( params, train_dataset, num_boost_round=3, early_stopping_rounds=2, valid_sets=valid_dataset, tuning_history=tuning_history, ) runner.tune_num_leaves() assert len(tuning_history) == 20 assert len(runner.study.trials) == 20
def test_no_eval_set_args(self) -> None: params: Dict[str, Any] = {} train_set = lgb.Dataset(None) with pytest.raises(ValueError) as excinfo: lgb.LightGBMTuner(params, train_set, num_boost_round=5, early_stopping_rounds=2) assert excinfo.type == ValueError assert str(excinfo.value) == "`valid_sets` is required."
def _get_tuner_object(self, params={}, train_set=None, kwargs_options={}): # type: (Dict[str, Any], lgb.Dataset, Dict[str, Any]) -> lgb.LightGBMTuner # Required keyword arguments. dummy_dataset = lgb.Dataset(None) kwargs = dict(num_boost_round=5, early_stopping_rounds=2, valid_sets=dummy_dataset,) kwargs.update(kwargs_options) runner = lgb.LightGBMTuner(params, train_set, **kwargs) return runner
def test_tune_best_score_reproducibility(self) -> None: california = sklearn.datasets.fetch_california_housing() X_trainval, X_test, y_trainval, y_test = train_test_split( california.data, california.target, random_state=0) train = lgb.Dataset(X_trainval, y_trainval) valid = lgb.Dataset(X_test, y_test) params = { "objective": "regression", "metric": "rmse", "random_seed": 0, "deterministic": True, "force_col_wise": True, "verbosity": -1, } tuner_first_try = lgb.LightGBMTuner( params, train, valid_sets=valid, early_stopping_rounds=3, optuna_seed=10, callbacks=[log_evaluation(-1)], ) tuner_first_try.run() best_score_first_try = tuner_first_try.best_score tuner_second_try = lgb.LightGBMTuner( params, train, valid_sets=valid, early_stopping_rounds=3, optuna_seed=10, callbacks=[log_evaluation(-1)], ) tuner_second_try.run() best_score_second_try = tuner_second_try.best_score assert best_score_second_try == best_score_first_try
def test_deprecated_args( self, best_params: Optional[Dict[str, Any]], tuning_history: Optional[List[Dict[str, Any]]] ) -> None: # Required keyword arguments. params = {} # type: Dict[str, Any] train_set = lgb.Dataset(None) with pytest.warns(DeprecationWarning): lgb.LightGBMTuner( params, train_set, valid_sets=[train_set], best_params=best_params, tuning_history=tuning_history, )
def test__parse_args_wrapper_args(self) -> None: params = {} # type: Dict[str, Any] train_set = lgb.Dataset(None) val_set = lgb.Dataset(None) kwargs = dict( num_boost_round=12, early_stopping_rounds=10, valid_sets=val_set, time_budget=600, sample_size=1000, ) runner = lgb.LightGBMTuner(params, train_set, **kwargs) new_args = ["time_budget", "time_budget", "sample_size"] for new_arg in new_args: assert new_arg not in runner.lgbm_kwargs assert new_arg in runner.auto_options
def _get_tuner_object( self, params: Dict[str, Any] = {}, train_set: Optional[lgb.Dataset] = None, kwargs_options: Dict[str, Any] = {}, study: Optional[Study] = None, ) -> lgb.LightGBMTuner: # Required keyword arguments. dummy_dataset = lgb.Dataset(None) kwargs = dict( num_boost_round=5, early_stopping_rounds=2, valid_sets=dummy_dataset, study=study ) kwargs.update(kwargs_options) runner = lgb.LightGBMTuner(params, train_set, **kwargs) return runner
def test_inconsistent_study_direction(self, metric: str, study_direction: str) -> None: params = {} # type: Dict[str, Any] if metric is not None: params["metric"] = metric train_set = lgb.Dataset(None) valid_set = lgb.Dataset(None) study = optuna.create_study(direction=study_direction) with pytest.raises(ValueError) as excinfo: lgb.LightGBMTuner( params, train_set, valid_sets=[train_set, valid_set], num_boost_round=5, early_stopping_rounds=2, study=study, ) assert excinfo.type == ValueError assert str(excinfo.value).startswith("Study direction is inconsistent with the metric")
def test_tune_num_leaves_negative_max_depth(self) -> None: params = { "metric": "binary_logloss", "max_depth": -1, } # type: Dict[str, Any] X_trn = np.random.uniform(10, size=(10, 5)) y_trn = np.random.randint(2, size=10) train_dataset = lgb.Dataset(X_trn, label=y_trn) valid_dataset = lgb.Dataset(X_trn, label=y_trn) runner = lgb.LightGBMTuner( params, train_dataset, num_boost_round=3, early_stopping_rounds=2, valid_sets=valid_dataset, ) runner.tune_num_leaves() assert len(runner.study.trials) == 20
def test__parse_args_wrapper_args(self): # type: () -> None params = {} # type: Dict[str, Any] train_set = lgb.Dataset(None) val_set = lgb.Dataset(None) kwargs = dict( num_boost_round=12, early_stopping_rounds=10, valid_sets=val_set, time_budget=600, best_params={}, sample_size=1000, ) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=DeprecationWarning) runner = lgb.LightGBMTuner(params, train_set, **kwargs) new_args = ["time_budget", "time_budget", "best_params", "sample_size"] for new_arg in new_args: assert new_arg not in runner.lgbm_kwargs assert new_arg in runner.auto_options
def test_tune_num_leaves_negative_max_depth(self): # type: () -> None params = { 'metric': 'binary_logloss', 'max_depth': -1, } # type: Dict[str, Any] X_trn = np.random.uniform(10, size=(10, 5)) y_trn = np.random.randint(2, size=10) train_dataset = lgb.Dataset(X_trn, label=y_trn) valid_dataset = lgb.Dataset(X_trn, label=y_trn) tuning_history = [] # type: List[Dict[str, float]] runner = lgb.LightGBMTuner(params, train_dataset, num_boost_round=3, early_stopping_rounds=2, valid_sets=valid_dataset, tuning_history=tuning_history) runner.tune_num_leaves() assert len(tuning_history) == 20
X, y = df_to_X_y(train_set) X_train, y_train, X_val, y_val = train_test_split( X, y, date_vec=train_set['date_block_num'].values, train_start=16) dtrain = lgb.Dataset(X_train, label=y_train) dval = lgb.Dataset(X_val, label=y_val) params = {**DEFAULT_PARAMS} study = optuna.create_study( load_if_exists=True, study_name=output_path, storage=trials_db) tuner = optuna_lgb.LightGBMTuner(params, dtrain, valid_sets=[dtrain, dval], early_stopping_rounds=100, verbose_eval=10, study=study) tuner.run() try: model = tuner.get_best_booster() except ValueError: model = lgb.train(params, dtrain, valid_sets=[dval], early_stopping_rounds=100, num_boost_round=1000, verbose_eval=10) del X_train del y_train del X_val del y_val