Example #1
0
def test__handling_alias_parameters():
    # type: () -> None

    params = {"reg_alpha": 0.1}
    _handling_alias_parameters(params)
    assert "reg_alpha" not in params
    assert "lambda_l1" in params
Example #2
0
    def run(self) -> None:
        """Perform the hyperparameter-tuning with given parameters."""
        verbosity = self.auto_options["verbosity"]
        if verbosity is not None:
            if verbosity > 1:
                optuna.logging.set_verbosity(optuna.logging.DEBUG)
            elif verbosity == 1:
                optuna.logging.set_verbosity(optuna.logging.INFO)
            elif verbosity == 0:
                optuna.logging.set_verbosity(optuna.logging.WARNING)
            else:
                optuna.logging.set_verbosity(optuna.logging.CRITICAL)

        # Handling aliases.
        _handling_alias_parameters(self.lgbm_params)

        # Sampling.
        self.sample_train_set()

        self.tune_feature_fraction()
        self.tune_num_leaves()
        self.tune_bagging()
        self.tune_feature_fraction_stage2()
        self.tune_regularization_factors()
        self.tune_min_data_in_leaf()
Example #3
0
def test_handling_alias_parameter() -> None:

    params = {
        "num_boost_round": 5,
        "early_stopping_rounds": 2,
        "min_data": 0.2,
    }
    _handling_alias_parameters(params)
    assert "min_data" not in params
    assert "min_data_in_leaf" in params
    assert params["min_data_in_leaf"] == 0.2
Example #4
0
def test_handling_alias_parameter_with_user_supplied_param() -> None:

    params = {
        "num_boost_round": 5,
        "early_stopping_rounds": 2,
        "eta": 0.5,
    }
    _handling_alias_parameters(params)

    assert "eta" not in params
    assert "learning_rate" in params
    assert params["learning_rate"] == 0.5
Example #5
0
    def run(self) -> None:
        """Perform the hyperparameter-tuning with given parameters."""
        # Suppress log messages.
        if self.auto_options["verbosity"] == 0:
            optuna.logging.disable_default_handler()
            self.lgbm_params["verbose"] = -1
            self.lgbm_kwargs["verbose_eval"] = False

        # Handling aliases.
        _handling_alias_parameters(self.lgbm_params)

        # Sampling.
        self.sample_train_set()

        self.tune_feature_fraction()
        self.tune_num_leaves()
        self.tune_bagging()
        self.tune_feature_fraction_stage2()
        self.tune_regularization_factors()
        self.tune_min_data_in_leaf()
Example #6
0
    def run(self) -> None:
        verbosity = self.auto_options["verbosity"]
        if verbosity is not None:
            if verbosity > 1:
                optuna.logging.set_verbosity(optuna.logging.DEBUG)
            elif verbosity == 1:
                optuna.logging.set_verbosity(optuna.logging.INFO)
            elif verbosity == 0:
                optuna.logging.set_verbosity(optuna.logging.WARNING)
            else:
                optuna.logging.set_verbosity(optuna.logging.CRITICAL)

        # Handling aliases.
        _handling_alias_parameters(self.lgbm_params)

        # Sampling.
        self.sample_train_set()

        self.tune_feature_fraction(self.n_trials_config[0])
        self.tune_num_leaves(self.n_trials_config[1])
        self.tune_bagging(self.n_trials_config[2])
        self.tune_feature_fraction_stage2(self.n_trials_config[3])
        self.tune_regularization_factors(self.n_trials_config[4])
        self.tune_min_data_in_leaf()
Example #7
0
    def fit(
        self,
        X: TwoDimArrayLikeType,
        y: OneDimArrayLikeType,
        sample_weight: Optional[OneDimArrayLikeType] = None,
        group: Optional[OneDimArrayLikeType] = None,
        eval_metric: Optional[Union[Callable, List[str], str]] = None,
        early_stopping_rounds: Optional[int] = 10,
        feature_name: Union[List[str], str] = "auto",
        categorical_feature: Union[List[int], List[str], str] = "auto",
        callbacks: Optional[List[Callable]] = None,
        init_model: Optional[Union[lgb.Booster, lgb.LGBMModel, str]] = None,
        groups: Optional[OneDimArrayLikeType] = None,
        optuna_callbacks: Optional[List[Callable]] = None,
        **fit_params: Any
    ) -> "LGBMModel":
        """Fit the model according to the given training data.

        Parameters
        ----------
        X
            Training data.

        y
            Target.

        sample_weight
            Weights of training data.

        group
            Group data of training data.

        eval_metric
            Evaluation metric. See
            https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric.

        early_stopping_rounds
            Used to activate early stopping. The model will train until the
            validation score stops improving.

        feature_name
            Feature names. If 'auto' and data is pandas DataFrame, data columns
            names are used.

        categorical_feature
            Categorical features. If list of int, interpreted as indices. If
            list of strings, interpreted as feature names. If 'auto' and data
            is pandas DataFrame, pandas categorical columns are used. All
            values in categorical features should be less than int32 max value
            (2147483647). Large values could be memory consuming. Consider
            using consecutive integers starting from zero. All negative values
            in categorical features will be treated as missing values.

        callbacks
            List of callback functions that are applied at each iteration.

        init_model
            Filename of LightGBM model, Booster instance or LGBMModel instance
            used for continue training.

        groups
            Group labels for the samples used while splitting the dataset into
            train/test set. If `group` is not None, this parameter is ignored.

        optuna_callbacks
            List of Optuna callback functions that are invoked at the end of
            each trial.

        **fit_params
            Always ignored. This parameter exists for compatibility.

        Returns
        -------
        self
            Return self.
        """
        logger = logging.getLogger(__name__)

        X, y, sample_weight = check_fit_params(
            X,
            y,
            sample_weight=sample_weight,
            accept_sparse=True,
            ensure_min_samples=2,
            estimator=self,
            force_all_finite=False,
        )

        # See https://github.com/microsoft/LightGBM/issues/2319
        if group is None and groups is not None:
            groups, _ = pd.factorize(groups)
            indices = np.argsort(groups)
            X = _safe_indexing(X, indices)
            y = _safe_indexing(y, indices)
            sample_weight = _safe_indexing(sample_weight, indices)
            groups = _safe_indexing(groups, indices)
            _, group = np.unique(groups, return_counts=True)

        n_samples, self._n_features = X.shape  # type: Tuple[int, int]

        self._n_features_in = self._n_features

        is_classifier = self._estimator_type == "classifier"
        cv = check_cv(self.cv, y, classifier=is_classifier)

        seed = self._get_random_state()

        for key, value in fit_params.items():
            logger.warning("{}={} will be ignored.".format(key, value))

        params = self.get_params()

        alias._handling_alias_parameters(params)

        if (
            not any(
                verbose_alias in params
                for verbose_alias in ("verbose", "verbosity")
            )
            and self.silent
        ):
            params["verbose"] = -1

        for attr in (
            "class_weight",
            "cv",
            "enable_pruning",
            "importance_type",
            "n_estimators",
            "n_trials",
            "param_distributions",
            "refit",
            "silent",
            "study",
            "timeout",
            "model_dir",
        ):
            params.pop(attr, None)

        params["objective"] = self._get_objective()
        params["random_state"] = seed

        if self._n_classes is not None and self._n_classes > 2:
            params["num_classes"] = self._n_classes

        if callable(eval_metric):
            params["metric"] = "None"
            feval = _EvalFunctionWrapper(eval_metric)

            args = [p.name for p in signature(eval_metric).parameters.values()]

            if len(args) > 3:
                eval_name, _, is_higher_better = eval_metric(
                    y, y, sample_weight, group
                )
            elif len(args) > 2:
                eval_name, _, is_higher_better = eval_metric(
                    y, y, sample_weight
                )
            else:
                eval_name, _, is_higher_better = eval_metric(y, y)

        elif isinstance(eval_metric, list):
            raise ValueError("eval_metric is not allowed to be a list.")

        else:
            if eval_metric is None:
                params["metric"] = OBJECTIVE2METRIC[params["objective"]]
            else:
                params["metric"] = eval_metric

            feval = None
            eval_name = params["metric"]
            is_higher_better = _is_higher_better(params["metric"])

        fobj = (
            _ObjectiveFunctionWrapper(self.objective)
            if callable(self.objective)
            else None
        )

        init_model = (
            init_model.booster_
            if isinstance(init_model, lgb.LGBMModel)
            else init_model
        )

        self.study_ = self._make_study(is_higher_better)

        dataset = lgb.Dataset(
            X,
            label=y,
            group=group,
            weight=sample_weight,
            feature_name=feature_name,
            categorical_feature=categorical_feature,
        )

        model_dir = self._get_model_dir()
        weights = np.array(
            [
                np.sum(sample_weight[train])
                for train, _ in cv.split(X, y, groups=groups)
            ]
        )

        objective = _Objective(
            params,
            dataset,
            eval_name,
            is_higher_better,
            n_samples,
            model_dir,
            callbacks=callbacks,
            cv=cv,
            early_stopping_rounds=early_stopping_rounds,
            enable_pruning=self.enable_pruning,
            feval=feval,
            fobj=fobj,
            init_model=init_model,
            n_estimators=self.n_estimators,
            param_distributions=self.param_distributions,
        )

        logger.info("Searching the best hyperparameters...")

        start_time = time.perf_counter()

        self.study_.optimize(
            objective,
            callbacks=optuna_callbacks,
            catch=(),
            n_trials=self.n_trials,
            timeout=self.timeout,
        )

        elapsed_time = time.perf_counter() - start_time

        best_iteration = self.study_.best_trial.user_attrs["best_iteration"]

        self._best_iteration = (
            None if early_stopping_rounds is None else best_iteration
        )
        self._best_score = self.study_.best_value
        self._objective = params["objective"]
        self.best_params_ = {**params, **self.study_.best_params}
        self.n_splits_ = cv.get_n_splits(X, y, groups=groups)

        logger.info(
            "Finished hyperparemeter search! "
            "(elapsed time: {:.3f} sec.) "
            "The best_iteration is {}.".format(elapsed_time, best_iteration)
        )

        logger.info("Making booster(s)...")

        start_time = time.perf_counter()

        self._Booster = self._make_booster(
            self.best_params_,
            dataset,
            best_iteration,
            self.best_index_,
            weights,
            fobj=fobj,
            feature_name=feature_name,
            categorical_feature=categorical_feature,
            callbacks=callbacks,
            init_model=init_model,
        )

        elapsed_time = time.perf_counter() - start_time

        logger.info(
            "Finished making booster(s)! "
            "(elapsed time: {:.3f} sec.)".format(elapsed_time)
        )

        if self.refit:
            self.refit_time_ = elapsed_time

        return self