Beispiel #1
0
    def test_optuna_callback(self) -> None:
        params = {"verbose": -1}  # type: Dict[str, Any]
        dataset = lgb.Dataset(np.zeros((10, 10)))

        callback_mock = mock.MagicMock()

        study = optuna.create_study()
        tuner = LightGBMTunerCV(params, dataset, study=study, optuna_callbacks=[callback_mock],)

        with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]):
            tuner._tune_params(["num_leaves"], 10, optuna.samplers.TPESampler(), "num_leaves")

        assert callback_mock.call_count == 10
Beispiel #2
0
    def test_inconsistent_study_direction(self, metric: str,
                                          study_direction: str) -> None:

        params = {}  # type: Dict[str, Any]
        if metric is not None:
            params["metric"] = metric
        train_set = lgb.Dataset(None)
        valid_set = lgb.Dataset(None)
        study = optuna.create_study(direction=study_direction)
        with pytest.raises(ValueError) as excinfo:
            lgb.LightGBMTuner(
                params,
                train_set,
                valid_sets=[train_set, valid_set],
                num_boost_round=5,
                early_stopping_rounds=2,
                study=study,
            )

        assert excinfo.type == ValueError
        assert str(excinfo.value).startswith(
            "Study direction is inconsistent with the metric")
Beispiel #3
0
    def _learning_race_lgb(self, this_model_name, target):
        # テスト用のデータを評価用と検証用に分ける
        X_eval, X_valid, y_eval, y_valid = train_test_split(self.X_test,
                                                            self.y_test,
                                                            random_state=42)
        X_eval_weight = X_eval["weight"]
        X_eval = X_eval.drop("weight", axis=1)

        # データセットを生成する
        lgb_train = lgb.Dataset(self.X_train.drop("weight", axis=1),
                                self.y_train,
                                weight=self.X_train["weight"]
                                )  #, categorical_feature=self.categ_columns)
        lgb_eval = lgb.Dataset(
            X_eval, y_eval, reference=lgb_train,
            weight=X_eval_weight)  #, categorical_feature=self.categ_columns)

        if self.test_flag:
            num_boost_round = 5
            early_stopping_rounds = 3
        else:
            num_boost_round = 1000
            early_stopping_rounds = 50

        # 上記のパラメータでモデルを学習する
        best_params, history = {}, []
        this_param = self.lgbm_params[target]
        model = lgb.train(this_param,
                          lgb_train,
                          valid_sets=lgb_eval,
                          verbose_eval=False,
                          num_boost_round=num_boost_round,
                          early_stopping_rounds=early_stopping_rounds,
                          best_params=best_params,
                          tuning_history=history)
        print("Bset Paramss:", best_params)
        print('Tuning history:', history)

        self._save_learning_model(model, this_model_name)
Beispiel #4
0
    def _get_tuner_object(self, params={}, train_set=None, kwargs_options={}, study=None):
        # type: (Dict[str, Any], lgb.Dataset, Dict[str, Any], Optional[Study]) -> lgb.LightGBMTuner

        # Required keyword arguments.
        dummy_dataset = lgb.Dataset(None)

        kwargs = dict(
            num_boost_round=5, early_stopping_rounds=2, valid_sets=dummy_dataset, study=study
        )
        kwargs.update(kwargs_options)

        runner = lgb.LightGBMTuner(params, train_set, **kwargs)
        return runner
Beispiel #5
0
    def test_tune_best_score_reproducibility(self) -> None:
        california = sklearn.datasets.fetch_california_housing()
        X_trainval, X_test, y_trainval, y_test = train_test_split(
            california.data, california.target, random_state=0)

        train = lgb.Dataset(X_trainval, y_trainval)
        valid = lgb.Dataset(X_test, y_test)
        params = {
            "objective": "regression",
            "metric": "rmse",
            "random_seed": 0,
            "deterministic": True,
            "force_col_wise": True,
            "verbosity": -1,
        }

        tuner_first_try = lgb.LightGBMTuner(
            params,
            train,
            valid_sets=valid,
            early_stopping_rounds=3,
            optuna_seed=10,
            callbacks=[log_evaluation(-1)],
        )
        tuner_first_try.run()
        best_score_first_try = tuner_first_try.best_score

        tuner_second_try = lgb.LightGBMTuner(
            params,
            train,
            valid_sets=valid,
            early_stopping_rounds=3,
            optuna_seed=10,
            callbacks=[log_evaluation(-1)],
        )
        tuner_second_try.run()
        best_score_second_try = tuner_second_try.best_score

        assert best_score_second_try == best_score_first_try
Beispiel #6
0
    def test_tune_num_leaves_negative_max_depth(self) -> None:

        params: Dict[str, Any] = {
            "metric": "binary_logloss",
            "max_depth": -1,
            "verbose": -1
        }
        X_trn = np.random.uniform(10, size=(10, 5))
        y_trn = np.random.randint(2, size=10)
        train_dataset = lgb.Dataset(X_trn, label=y_trn)
        valid_dataset = lgb.Dataset(X_trn, label=y_trn)

        runner = lgb.LightGBMTuner(
            params,
            train_dataset,
            num_boost_round=3,
            early_stopping_rounds=2,
            valid_sets=valid_dataset,
            callbacks=[log_evaluation(-1)],
        )
        runner.tune_num_leaves()
        assert len(runner.study.trials) == 20
Beispiel #7
0
    def test_no_eval_set_args(self):
        # type: () -> None

        params = {}  # type: Dict[str, Any]
        train_set = lgb.Dataset(None)
        with pytest.raises(ValueError) as excinfo:
            lgb.LightGBMTuner(params,
                              train_set,
                              num_boost_round=5,
                              early_stopping_rounds=2)

        assert excinfo.type == ValueError
        assert str(excinfo.value) == "`valid_sets` is required."
Beispiel #8
0
    def test_tune_num_leaves_negative_max_depth(self):
        # type: () -> None

        params = {
            "metric": "binary_logloss",
            "max_depth": -1,
        }  # type: Dict[str, Any]
        X_trn = np.random.uniform(10, size=(10, 5))
        y_trn = np.random.randint(2, size=10)
        train_dataset = lgb.Dataset(X_trn, label=y_trn)
        valid_dataset = lgb.Dataset(X_trn, label=y_trn)

        tuning_history = []  # type: List[Dict[str, float]]
        runner = lgb.LightGBMTuner(
            params,
            train_dataset,
            num_boost_round=3,
            early_stopping_rounds=2,
            valid_sets=valid_dataset,
            tuning_history=tuning_history,
        )
        runner.tune_num_leaves()
        assert len(tuning_history) == 20
Beispiel #9
0
 def test_deprecated_args(
     self, best_params: Optional[Dict[str, Any]], tuning_history: Optional[List[Dict[str, Any]]]
 ) -> None:
     # Required keyword arguments.
     params = {}  # type: Dict[str, Any]
     train_set = lgb.Dataset(None)
     with pytest.warns(DeprecationWarning):
         lgb.LightGBMTuner(
             params,
             train_set,
             valid_sets=[train_set],
             best_params=best_params,
             tuning_history=tuning_history,
         )
Beispiel #10
0
    def test_call(self):
        # type: () -> None

        target_param_names = ['lambda_l1']
        lgbm_params = {}  # type: Dict[str, Any]
        train_set = lgb.Dataset(None)
        val_set = lgb.Dataset(None)

        lgbm_kwargs = {'valid_sets': val_set}
        best_score = -np.inf

        with turnoff_train():
            objective = OptunaObjective(
                target_param_names,
                lgbm_params,
                train_set,
                lgbm_kwargs,
                best_score,
            )
            study = optuna.create_study(direction='minimize')
            study.optimize(objective, n_trials=10)

            assert study.best_value == 0.5
Beispiel #11
0
    def test_tune_best_score_reproducibility(self) -> None:
        boston = sklearn.datasets.load_boston()
        X_trainval, X_test, y_trainval, y_test = train_test_split(
            boston.data, boston.target, random_state=0
        )

        train = lgb.Dataset(X_trainval, y_trainval)
        valid = lgb.Dataset(X_test, y_test)
        params = {
            "objective": "regression",
            "metric": "rmse",
            "random_seed": 0,
            "deterministic": True,
            "verbosity": -1,
        }

        tuner_first_try = lgb.LightGBMTuner(
            params,
            train,
            valid_sets=valid,
            early_stopping_rounds=3,
            optuna_seed=10,
        )
        tuner_first_try.run()
        best_score_first_try = tuner_first_try.best_score

        tuner_second_try = lgb.LightGBMTuner(
            params,
            train,
            valid_sets=valid,
            early_stopping_rounds=3,
            optuna_seed=10,
        )
        tuner_second_try.run()
        best_score_second_try = tuner_second_try.best_score

        assert best_score_second_try == best_score_first_try
def objective(trial):
    X, y = ember.read_vectorized_features('./sample/merge', 20000, 3154)

    train_x, val_x, train_y, val_y = train_test_split(X, y, test_size=0.4, random_state=777)
    valid_x, test_x, valid_y, test_y = train_test_split(val_x, val_y, test_size=0.5, random_state=777)
    sc = StandardScaler()
    train_x = sc.fit_transform(train_x)
    valid_x = sc.transform(valid_x)
    test_x = sc.transform(test_x)

    train_data_set = lgb.Dataset(train_x, train_y)
    valid_data_sets = lgb.Dataset(valid_x, valid_y)

    param = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        "verbosity": -1,
        "boosting_type": "gbdt",
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        # 'num_leaves': 2048,  # 전체 트리의 leave 수, 디폴트값 31
        # 'max_depth': 16,  # 트리 최대 깊이
        # 'min_data_in_leaf': 1000,  # 리프가 갖는 최소한의 레코드, 디폴트값은  20으로 최적의 값
        # 'num_iterations': 1000,  # 1000 -> 1500
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100)
    }

    gbm = lgb.train(param, train_data_set, valid_sets=[valid_data_sets], verbose_eval=False)
    pred_y = gbm.predict(test_x)
    y_pred = np.where(np.array(pred_y) > 0.7, 1, 0)
    accuracy = sklearn.metrics.accuracy_score(test_y, y_pred)
    return accuracy
Beispiel #13
0
    def test_call(self) -> None:

        target_param_names = ["lambda_l1"]
        lgbm_params: Dict[str, Any] = {}
        train_set = lgb.Dataset(None)
        val_set = lgb.Dataset(None)

        lgbm_kwargs = {"valid_sets": val_set}
        best_score = -np.inf

        with turnoff_train():
            objective = _OptunaObjective(
                target_param_names,
                lgbm_params,
                train_set,
                lgbm_kwargs,
                best_score,
                "tune_lambda_l1",
                None,
            )
            study = optuna.create_study(direction="minimize")
            study.optimize(objective, n_trials=10)

            assert study.best_value == 0.5
Beispiel #14
0
    def test_get_booster_best_score__using_valid_names_as_str(self) -> None:

        expected_value = 1.0

        class DummyBooster(object):
            def __init__(self) -> None:

                self.best_score = {"dev": {"binary_logloss": expected_value}}

        booster = DummyBooster()
        dummy_dataset = lgb.Dataset(None)

        tuner = _BaseTuner(lgbm_kwargs={"valid_names": "dev", "valid_sets": dummy_dataset})
        val_score = tuner._get_booster_best_score(booster)
        assert val_score == expected_value
Beispiel #15
0
    def test_get_booster_best_score(self) -> None:

        expected_value = 1.0

        class DummyBooster(object):
            def __init__(self) -> None:

                self.best_score = {"valid_0": {"binary_logloss": expected_value}}

        booster = DummyBooster()
        dummy_dataset = lgb.Dataset(None)

        tuner = _BaseTuner(lgbm_kwargs=dict(valid_sets=dummy_dataset))
        val_score = tuner._get_booster_best_score(booster)
        assert val_score == expected_value
Beispiel #16
0
    def test_run_show_progress_bar(self, show_progress_bar: bool, expected: int) -> None:
        params: Dict = {"verbose": -1}
        dataset = lgb.Dataset(np.zeros((10, 10)))

        study = optuna.create_study()
        tuner = LightGBMTunerCV(
            params, dataset, study=study, time_budget=1, show_progress_bar=show_progress_bar
        )

        with mock.patch.object(
            _OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]
        ), mock.patch("tqdm.tqdm") as mock_tqdm:
            tuner.run()

        assert mock_tqdm.call_count == expected
Beispiel #17
0
    def train(self, tr_x, tr_y, va_x=None, va_y=None, te_x=None):

        # データのセット
        validation = va_x is not None
        lgb_train = optuna_lgb.Dataset(tr_x, tr_y, categorical_feature=self.categorical_features, free_raw_data=False)
        if validation:
            lgb_eval = optuna_lgb.Dataset(va_x, va_y, reference=lgb_train, categorical_feature=self.categorical_features,
                                          free_raw_data=False)

        # ハイパーパラメータの設定
        params = dict(self.params)
        num_round = params.pop('num_boost_round')
        best_params, tuning_history = dict(), list()

        # 学習
        if validation:
            early_stopping_rounds = params.pop('early_stopping_rounds')
            self.model = optuna_lgb.train(
                params, lgb_train, num_round,
                valid_sets=[lgb_train, lgb_eval],
                verbose_eval=1000,
                early_stopping_rounds=early_stopping_rounds,
                best_params=best_params,
                tuning_history=tuning_history
            )
        else:
            self.model = optuna_lgb.train(
                params, lgb_train, num_round,
                valid_sets=[lgb_train],
                verbose_eval=1000,
                best_params=best_params,
                tuning_history=tuning_history
            )
        print('Best Params:', best_params)
        with open(f'../output/model/{self.run_fold_name}_best_params.json', 'w') as f:
            json.dump(best_params, f, indent=4, separators=(',', ': '))
Beispiel #18
0
    def test_optuna_callback(self) -> None:
        params: Dict[str, Any] = {"verbose": -1}
        dataset = lgb.Dataset(np.zeros((10, 10)))

        callback_mock = mock.MagicMock()

        study = optuna.create_study()
        tuner = LightGBMTuner(
            params, dataset, valid_sets=dataset, study=study, optuna_callbacks=[callback_mock]
        )

        with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=1.0):
            tuner._tune_params(["num_leaves"], 10, optuna.samplers.TPESampler(), "num_leaves")

        assert callback_mock.call_count == 10
Beispiel #19
0
def _single_train(features, targets, params):
    '''
    train single column of target
    '''

    trainval = lgb.Dataset(features, targets)
    tuner = lgb.LightGBMTunerCV(
        params,
        trainval,
        verbose_eval=100,
        early_stopping_rounds=100,
        folds=KFold(n_splits=3),
    )
    tuner.run()
    return tuner.best_params, tuner.best_score
Beispiel #20
0
    def test_sample_train_set(self) -> None:

        sample_size = 3

        X_trn = np.random.uniform(10, size=50).reshape((10, 5))
        y_trn = np.random.randint(2, size=10)
        train_dataset = lgb.Dataset(X_trn, label=y_trn)
        runner = self._get_tuner_object(
            train_set=train_dataset, kwargs_options=dict(sample_size=sample_size)
        )
        runner.sample_train_set()

        # Workaround for mypy.
        if not TYPE_CHECKING:
            runner.train_subset.construct()  # Cannot get label before construct `lgb.Dataset`.
            assert runner.train_subset.get_label().shape[0] == sample_size
Beispiel #21
0
    def test_resume_run(self) -> None:
        params = {"verbose": -1}  # type: Dict
        dataset = lgb.Dataset(np.zeros((10, 10)))

        study = optuna.create_study()
        tuner = LightGBMTunerCV(params, dataset, study=study)

        with mock.patch.object(OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]):
            tuner.tune_regularization_factors()

        n_trials = len(study.trials)
        assert n_trials == len(study.trials)

        tuner2 = LightGBMTuner(params, dataset, valid_sets=dataset, study=study)
        with mock.patch.object(OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]):
            tuner2.tune_regularization_factors()
        assert n_trials == len(study.trials)
Beispiel #22
0
    def test_resume_run(self) -> None:
        params: Dict = {"verbose": -1}
        dataset = lgb.Dataset(np.zeros((10, 10)))

        study = optuna.create_study()
        tuner = LightGBMTuner(params, dataset, valid_sets=dataset, study=study)

        with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=1.0):
            tuner.tune_regularization_factors()

        n_trials = len(study.trials)
        assert n_trials == len(study.trials)

        tuner2 = LightGBMTuner(params, dataset, valid_sets=dataset, study=study)
        with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=1.0):
            tuner2.tune_regularization_factors()
        assert n_trials == len(study.trials)
Beispiel #23
0
    def test_when_a_step_does_not_improve_best_score(self) -> None:

        params = {}  # type: Dict
        valid_data = np.zeros((10, 10))
        valid_sets = lgb.Dataset(valid_data)

        tuner = LightGBMTuner(params, None, valid_sets=valid_sets)
        assert not tuner.higher_is_better()

        with mock.patch("lightgbm.train"), mock.patch.object(
                _BaseTuner, "_get_booster_best_score", return_value=0.9):
            tuner.tune_feature_fraction()

        assert "feature_fraction" in tuner.best_params
        assert tuner.best_score == 0.9

        # Assume that tuning `num_leaves` doesn't improve the `best_score`.
        with mock.patch("lightgbm.train"), mock.patch.object(
                _BaseTuner, "_get_booster_best_score", return_value=1.1):
            tuner.tune_num_leaves()
Beispiel #24
0
    def test_get_booster_best_score__using_valid_names_as_str(self):
        # type: () -> None

        expected_value = 1.0

        class DummyBooster(object):
            def __init__(self):
                # type: () -> None

                self.best_score = {'dev': {'binary_logloss': expected_value}}

        booster = DummyBooster()
        dummy_dataset = lgb.Dataset(None)

        tuner = BaseTuner(lgbm_kwargs={
            'valid_names': 'dev',
            'valid_sets': dummy_dataset,
        })
        val_score = tuner._get_booster_best_score(booster)
        assert val_score == expected_value
Beispiel #25
0
    def test_run_show_progress_bar(self, show_progress_bar: bool, expected: int) -> None:
        params: Dict = {"verbose": -1}
        dataset = lgb.Dataset(np.zeros((10, 10)))

        study = optuna.create_study()
        tuner = LightGBMTuner(
            params,
            dataset,
            valid_sets=dataset,
            study=study,
            time_budget=1,
            show_progress_bar=show_progress_bar,
        )

        with mock.patch.object(
            _BaseTuner, "_get_booster_best_score", return_value=1.0
        ), mock.patch("tqdm.tqdm") as mock_tqdm:
            tuner.run()

        assert mock_tqdm.call_count == expected
Beispiel #26
0
    def test_run_verbosity(self, verbosity: int, level: int) -> None:
        # We need to reconstruct our default handler to properly capture stderr.
        optuna.logging._reset_library_root_logger()
        optuna.logging.set_verbosity(optuna.logging.INFO)

        params: Dict = {"verbose": -1}
        dataset = lgb.Dataset(np.zeros((10, 10)))

        study = optuna.create_study()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=FutureWarning)
            tuner = LightGBMTunerCV(
                params, dataset, study=study, verbosity=verbosity, time_budget=1
            )

        with mock.patch.object(_OptunaObjectiveCV, "_get_cv_scores", return_value=[1.0]):
            tuner.run()

        assert optuna.logging.get_verbosity() == level
        assert tuner.lgbm_params["verbose"] == -1
Beispiel #27
0
    def test_best_booster_with_model_dir(self) -> None:
        params = {"verbose": -1}  # type: Dict
        dataset = lgb.Dataset(np.zeros((10, 10)))

        study = optuna.create_study()
        with TemporaryDirectory() as tmpdir:
            tuner = LightGBMTuner(
                params, dataset, valid_sets=dataset, study=study, model_dir=tmpdir
            )

            with mock.patch.object(BaseTuner, "_get_booster_best_score", return_value=0.0):
                tuner.tune_regularization_factors()

            best_booster = tuner.get_best_booster()

            tuner2 = LightGBMTuner(
                params, dataset, valid_sets=dataset, study=study, model_dir=tmpdir
            )
            best_booster2 = tuner2.get_best_booster()

            assert best_booster.params == best_booster2.params
Beispiel #28
0
    def test_create_stepwise_study(self, direction: str,
                                   overall_best: int) -> None:

        tuner = LightGBMTuner({},
                              None,
                              valid_sets=lgb.Dataset(np.zeros((10, 10))))

        def objective(trial: optuna.trial.Trial, value: float) -> float:

            trial.set_system_attr(
                optuna.integration._lightgbm_tuner.optimize._STEP_NAME_KEY,
                "step{:.0f}".format(value),
            )
            return trial.suggest_uniform("x", value, value)

        study = optuna.create_study(direction=direction)
        study_step1 = tuner._create_stepwise_study(study, "step1")

        with pytest.raises(ValueError):
            study_step1.best_trial

        study_step1.optimize(lambda t: objective(t, 1), n_trials=1)

        study_step2 = tuner._create_stepwise_study(study, "step2")

        # `study` has a trial, but `study_step2` has no trials.
        with pytest.raises(ValueError):
            study_step2.best_trial

        study_step2.optimize(lambda t: objective(t, 2), n_trials=2)

        assert len(study_step1.trials) == 1
        assert len(study_step2.trials) == 2
        assert len(study.trials) == 3

        assert study_step1.best_trial.value == 1
        assert study_step2.best_trial.value == 2
        assert study.best_trial.value == overall_best
Beispiel #29
0
    def test_get_best_booster(self) -> None:
        unexpected_value = 20  # out of scope.

        params: Dict = {"verbose": -1, "lambda_l1": unexpected_value}
        dataset = lgb.Dataset(np.zeros((10, 10)))

        study = optuna.create_study()
        tuner = LightGBMTuner(params, dataset, valid_sets=dataset, study=study)

        with pytest.raises(ValueError):
            tuner.get_best_booster()

        with mock.patch.object(_BaseTuner, "_get_booster_best_score", return_value=0.0):
            tuner.tune_regularization_factors()

        best_booster = tuner.get_best_booster()
        assert best_booster.params["lambda_l1"] != unexpected_value

        tuner2 = LightGBMTuner(params, dataset, valid_sets=dataset, study=study)

        # Resumed study does not have the best booster.
        with pytest.raises(ValueError):
            tuner2.get_best_booster()
Beispiel #30
0
    def test_get_best_booster_with_error(self) -> None:
        params = {"verbose": -1}  # type: Dict
        dataset = lgb.Dataset(np.zeros((10, 10)))
        study = optuna.create_study()

        tuner = LightGBMTunerCV(params,
                                dataset,
                                study=study,
                                model_dir=None,
                                return_cvbooster=True)
        # No trial is completed yet.
        with pytest.raises(ValueError):
            tuner.get_best_booster()

        with mock.patch.object(_OptunaObjectiveCV,
                               "_get_cv_scores",
                               return_value=[1.0]):
            tuner.tune_regularization_factors()

        tuner2 = LightGBMTunerCV(params,
                                 dataset,
                                 study=study,
                                 model_dir=None,
                                 return_cvbooster=True)
        # Resumed the study does not have the best booster.
        with pytest.raises(ValueError):
            tuner2.get_best_booster()

        with TemporaryDirectory() as tmpdir:
            tuner3 = LightGBMTunerCV(params,
                                     dataset,
                                     study=study,
                                     model_dir=tmpdir,
                                     return_cvbooster=True)
            # The booster was not saved hence not found in the `model_dir`.
            with pytest.raises(ValueError):
                tuner3.get_best_booster()