Beispiel #1
0
    def _ray_fit_preprocess(self, y) -> Callable:
        """This has been separated out so that it can be easily overwritten
        should a future xgboost version remove label encoding"""
        # pylint: disable = attribute-defined-outside-init,too-many-statements
        can_use_label_encoder = True
        use_label_encoder = getattr(self, "use_label_encoder", True)
        label_encoding_check_error = (
            "The label must consist of integer "
            "labels of form 0, 1, 2, ..., [num_class - 1].")
        label_encoder_deprecation_msg = (
            "The use of label encoder in XGBClassifier is deprecated and will "
            "be removed in a future release. To remove this warning, do the "
            "following: 1) Pass option use_label_encoder=False when "
            "constructing XGBClassifier object; and 2) Encode your labels (y) "
            "as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].")

        # ray: modified this to allow for compatibility with legacy xgboost
        if (_is_cudf_df and _is_cudf_df(y)) or (_is_cudf_ser
                                                and _is_cudf_ser(y)):
            import cupy as cp  # pylint: disable=E0401

            self.classes_ = cp.unique(y.values)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
            expected_classes = cp.arange(self.n_classes_)
            if (self.classes_.shape != expected_classes.shape
                    or not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        elif (_is_cupy_array and _is_cupy_array(y)):
            import cupy as cp  # pylint: disable=E0401

            self.classes_ = cp.unique(y)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
            expected_classes = cp.arange(self.n_classes_)
            if (self.classes_.shape != expected_classes.shape
                    or not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        else:
            self.classes_ = np.unique(y)
            self.n_classes_ = len(self.classes_)
            if not use_label_encoder and (not np.array_equal(
                    self.classes_, np.arange(self.n_classes_))):
                raise ValueError(label_encoding_check_error)

        if use_label_encoder:
            if not can_use_label_encoder:
                raise ValueError(
                    "The option use_label_encoder=True is incompatible with "
                    "inputs of type cuDF or cuPy. Please set "
                    "use_label_encoder=False when  constructing XGBClassifier "
                    "object. NOTE:" + label_encoder_deprecation_msg)
            if hasattr(self, "use_label_encoder"):
                warnings.warn(label_encoder_deprecation_msg, UserWarning)
            self._le = XGBoostLabelEncoder().fit(y)
            label_transform = self._le.transform
        else:
            label_transform = lambda x: x  # noqa: E731

        return label_transform
Beispiel #2
0
def save_model_to_local_file(booster, model_params, meta, filename):
    from sklearn2pmml import PMMLPipeline, sklearn2pmml
    try:
        from xgboost.compat import XGBoostLabelEncoder
    except:  # noqa: E722
        # xgboost==0.82.0 does not have XGBoostLabelEncoder
        # in xgboost.compat.py
        from xgboost.sklearn import XGBLabelEncoder as XGBoostLabelEncoder

    objective = model_params.get("objective")
    bst_meta = dict()

    if objective.startswith("binary:") or objective.startswith("multi:"):
        if objective.startswith("binary:"):
            num_class = 2
        else:
            num_class = model_params.get("num_class")
            assert num_class is not None and num_class > 0, \
                "num_class should not be None"

        # To fake a trained XGBClassifier, there must be "_le", "classes_",
        # inside XGBClassifier. See here:
        # https://github.com/dmlc/xgboost/blob/d19cec70f1b40ea1e1a35101ca22e46dd4e4eecd/python-package/xgboost/sklearn.py#L356
        model = xgb.XGBClassifier()
        label_encoder = XGBoostLabelEncoder()
        label_encoder.fit(list(range(num_class)))
        model._le = label_encoder
        model.classes_ = model._le.classes_

        bst_meta["_le"] = {"classes_": model.classes_.tolist()}
        bst_meta["classes_"] = model.classes_.tolist()
    elif objective.startswith("reg:"):
        model = xgb.XGBRegressor()
    elif objective.startswith("rank:"):
        model = xgb.XGBRanker()
    else:
        raise ValueError(
            "Not supported objective {} for saving PMML".format(objective))

    model_type = type(model).__name__
    bst_meta["type"] = model_type

    # Meta data is needed for saving sklearn pipeline. See here:
    # https://github.com/dmlc/xgboost/blob/d19cec70f1b40ea1e1a35101ca22e46dd4e4eecd/python-package/xgboost/sklearn.py#L356
    booster.set_attr(scikit_learn=json.dumps(bst_meta))
    booster.save_model(filename)
    save_model_metadata("model_meta.json", meta)
    booster.set_attr(scikit_learn=None)
    model.load_model(filename)

    pipeline = PMMLPipeline([(model_type, model)])
    sklearn2pmml(pipeline, "{}.pmml".format(filename))
Beispiel #3
0
class RayXGBClassifier(XGBClassifier, RayXGBMixin):
    __init__ = _xgboost_version_warn(XGBClassifier.__init__)

    @_deprecate_positional_args
    def fit(
        self,
        X,
        y,
        *,
        sample_weight=None,
        base_margin=None,
        eval_set=None,
        eval_metric=None,
        early_stopping_rounds=None,
        verbose=True,
        xgb_model=None,
        sample_weight_eval_set=None,
        base_margin_eval_set=None,
        feature_weights=None,
        callbacks=None,
        ray_params: Union[None, RayParams, Dict] = None,
        _remote: Optional[bool] = None,
        ray_dmatrix_params: Optional[Dict] = None,
    ):

        evals_result = {}
        ray_dmatrix_params = ray_dmatrix_params or {}

        params = self.get_xgb_params()

        train_dmatrix, evals = _check_if_params_are_ray_dmatrix(
            X, sample_weight, base_margin, eval_set, sample_weight_eval_set,
            base_margin_eval_set)

        if train_dmatrix is not None:
            if not hasattr(self, "use_label_encoder"):
                warnings.warn("If X is a RayDMatrix, no label encoding"
                              " will be performed. Ensure the labels are"
                              " encoded.")
            elif self.use_label_encoder:
                raise ValueError(
                    "X cannot be a RayDMatrix if `use_label_encoder` "
                    "is set to True")
            if "num_class" not in params:
                raise ValueError(
                    "`num_class` must be set during initalization if X"
                    " is a RayDMatrix")
            self.classes_ = list(range(0, params["num_class"]))
            self.n_classes_ = params["num_class"]
            if self.n_classes_ <= 2:
                params.pop("num_class")
            label_transform = lambda x: x  # noqa: E731
        else:
            if len(X.shape) != 2:
                # Simply raise an error here since there might be many
                # different ways of reshaping
                raise ValueError(
                    "Please reshape the input data X into 2-dimensional "
                    "matrix.")

            label_transform = self._ray_fit_preprocess(y)

        if callable(self.objective):
            obj = _objective_decorator(self.objective)
            # Use default value. Is it really not used ?
            params["objective"] = "binary:logistic"
        else:
            obj = None

        if self.n_classes_ > 2:
            # Switch to using a multiclass objective in the underlying
            # XGB instance
            params["objective"] = "multi:softprob"
            params["num_class"] = self.n_classes_

        try:
            model, feval, params = self._configure_fit(xgb_model, eval_metric,
                                                       params)
        except TypeError:
            # XGBoost >= 1.6.0
            (model, feval, params, early_stopping_rounds,
             callbacks) = self._configure_fit(xgb_model, eval_metric, params,
                                              early_stopping_rounds, callbacks)

        if train_dmatrix is None:
            train_dmatrix, evals = _wrap_evaluation_matrices(
                missing=self.missing,
                X=X,
                y=y,
                group=None,
                qid=None,
                sample_weight=sample_weight,
                base_margin=base_margin,
                feature_weights=feature_weights,
                eval_set=eval_set,
                sample_weight_eval_set=sample_weight_eval_set,
                base_margin_eval_set=base_margin_eval_set,
                eval_group=None,
                eval_qid=None,
                # changed in xgboost-ray:
                create_dmatrix=lambda **kwargs: RayDMatrix(**{
                    **kwargs,
                    **ray_dmatrix_params
                }),
                **self._ray_get_wrap_evaluation_matrices_compat_kwargs(
                    label_transform=label_transform))

        # remove those as they will be set in RayXGBoostActor
        params.pop("n_jobs", None)
        params.pop("nthread", None)

        ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs)

        additional_results = {}

        self._Booster = train(
            params,
            train_dmatrix,
            self.get_num_boosting_rounds(),
            evals=evals,
            early_stopping_rounds=early_stopping_rounds,
            evals_result=evals_result,
            obj=obj,
            feval=feval,
            verbose_eval=verbose,
            xgb_model=model,
            callbacks=callbacks,
            # changed in xgboost-ray:
            additional_results=additional_results,
            ray_params=ray_params,
            _remote=_remote,
        )

        if not callable(self.objective):
            self.objective = params["objective"]

        self.additional_results_ = additional_results

        self._set_evaluation_result(evals_result)
        return self

    fit.__doc__ = _treat_X_doc(XGBClassifier.fit.__doc__) + _RAY_PARAMS_DOC

    def _ray_fit_preprocess(self, y) -> Callable:
        """This has been separated out so that it can be easily overwritten
        should a future xgboost version remove label encoding"""
        # pylint: disable = attribute-defined-outside-init,too-many-statements
        can_use_label_encoder = True
        use_label_encoder = getattr(self, "use_label_encoder", True)
        label_encoding_check_error = (
            "The label must consist of integer "
            "labels of form 0, 1, 2, ..., [num_class - 1].")
        label_encoder_deprecation_msg = (
            "The use of label encoder in XGBClassifier is deprecated and will "
            "be removed in a future release. To remove this warning, do the "
            "following: 1) Pass option use_label_encoder=False when "
            "constructing XGBClassifier object; and 2) Encode your labels (y) "
            "as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].")

        # ray: modified this to allow for compatibility with legacy xgboost
        if (_is_cudf_df and _is_cudf_df(y)) or (_is_cudf_ser
                                                and _is_cudf_ser(y)):
            import cupy as cp  # pylint: disable=E0401

            self.classes_ = cp.unique(y.values)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
            expected_classes = cp.arange(self.n_classes_)
            if (self.classes_.shape != expected_classes.shape
                    or not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        elif (_is_cupy_array and _is_cupy_array(y)):
            import cupy as cp  # pylint: disable=E0401

            self.classes_ = cp.unique(y)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
            expected_classes = cp.arange(self.n_classes_)
            if (self.classes_.shape != expected_classes.shape
                    or not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        else:
            self.classes_ = np.unique(y)
            self.n_classes_ = len(self.classes_)
            if not use_label_encoder and (not np.array_equal(
                    self.classes_, np.arange(self.n_classes_))):
                raise ValueError(label_encoding_check_error)

        if use_label_encoder:
            if not can_use_label_encoder:
                raise ValueError(
                    "The option use_label_encoder=True is incompatible with "
                    "inputs of type cuDF or cuPy. Please set "
                    "use_label_encoder=False when  constructing XGBClassifier "
                    "object. NOTE:" + label_encoder_deprecation_msg)
            if hasattr(self, "use_label_encoder"):
                warnings.warn(label_encoder_deprecation_msg, UserWarning)
            self._le = XGBoostLabelEncoder().fit(y)
            label_transform = self._le.transform
        else:
            label_transform = lambda x: x  # noqa: E731

        return label_transform

    def _can_use_inplace_predict(self) -> bool:
        return False

    def predict(
        self,
        X,
        output_margin=False,
        ntree_limit=None,
        validate_features=True,
        base_margin=None,
        iteration_range: Optional[Tuple[int, int]] = None,
        ray_params: Union[None, RayParams, Dict] = None,
        _remote: Optional[bool] = None,
        ray_dmatrix_params: Optional[Dict] = None,
    ):
        class_probs = self._ray_predict(X=X,
                                        output_margin=output_margin,
                                        ntree_limit=ntree_limit,
                                        validate_features=validate_features,
                                        base_margin=base_margin,
                                        iteration_range=iteration_range,
                                        ray_params=ray_params,
                                        _remote=_remote,
                                        ray_dmatrix_params=ray_dmatrix_params)
        if output_margin:
            # If output_margin is active, simply return the scores
            return class_probs

        if len(class_probs.shape) > 1:
            # turns softprob into softmax
            column_indexes = np.argmax(class_probs, axis=1)
        else:
            # turns soft logit into class label
            column_indexes = np.repeat(0, class_probs.shape[0])
            column_indexes[class_probs > 0.5] = 1

        if hasattr(self, "_le"):
            return self._le.inverse_transform(column_indexes)
        return column_indexes

    predict.__doc__ = _treat_X_doc(XGBModel.predict.__doc__) + _RAY_PARAMS_DOC

    def predict_proba(
        self,
        X,
        ntree_limit=None,
        validate_features=False,
        base_margin=None,
        iteration_range: Optional[Tuple[int, int]] = None,
        ray_params: Union[None, RayParams, Dict] = None,
        _remote: Optional[bool] = None,
        ray_dmatrix_params: Optional[Dict] = None,
    ) -> np.ndarray:

        class_probs = self._ray_predict(
            X=X,
            output_margin=self.objective == "multi:softmax",
            ntree_limit=ntree_limit,
            validate_features=validate_features,
            base_margin=base_margin,
            iteration_range=iteration_range,
            ray_params=ray_params,
            _remote=_remote,
            ray_dmatrix_params=ray_dmatrix_params)
        # If model is loaded from a raw booster there's no `n_classes_`
        return _cls_predict_proba(getattr(self, "n_classes_", None),
                                  class_probs, np.vstack)

    def load_model(self, fname):
        if not hasattr(self, "_Booster"):
            self._Booster = Booster()
        return super().load_model(fname)

    predict_proba.__doc__ = (
        _treat_X_doc(XGBClassifier.predict_proba.__doc__) + _RAY_PARAMS_DOC)
Beispiel #4
0
    def cv(
            self,
            # Dataset
            data,
            cat_features=None,
            groups=None,
            folds=KFold(n_splits=5),
            # Model
            params={},
            fit_params={},
            convert_to_sklearn=True,
            # Optuna
            tune_model=False,
            optuna_params=None,
            maximize=None,
            eval_metric=None,
            n_trials=None,
            timeout=None,
            lgbm_n_trials=[7, 20, 10, 6, 20],
            # Misc
            logger=None,
            n_jobs=-1):

        self._data_check(data)
        self.n_features = data[0].shape[1]
        self.n_classes = len(np.unique(data[1]))
        if isinstance(data[0], pd.DataFrame):
            self.feature_names = data[0].columns.tolist()
        else:
            self.feature_names = [f'f{i}' for i in range(self.n_features)]

        main_metric, maximize = self._parse_eval_metric(params, maximize)
        if eval_metric is not None and maximize is None:
            raise ValueError('metric direction must be specified.')

        if isinstance(logger, (str, Path)):
            logger = LGBMLogger(logger, stdout=True, file=True)
        elif logger is None:
            logger = LGBMLogger(logger, stdout=True, file=False)
        assert isinstance(logger, LGBMLogger)

        # logger(f'params: \n{params}')
        # logger(f'fit_params: \n{fit_params}')

        if self.model_name in MODEL_ZOO['cat']:
            raise NotImplementedError('catboost is incompatible with .cv().')

        elif self.model_name in MODEL_ZOO['lgb']:
            ''' LightGBM '''
            dtrain = lgb.Dataset(data=data[0],
                                 label=data[1],
                                 weight=data[2] if len(data) == 3 else None,
                                 categorical_feature=cat_features)

            # Optuna intergration
            if tune_model:
                _fit_params = fit_params.copy()
                _params = params.copy()
                _params.update({'metric': main_metric})
                if 'verbose_eval' in _fit_params.keys():
                    _fit_params.update({'verbose_eval': False})
                tuner = lgb_tune.LightGBMTunerCV(
                    _params,
                    dtrain,
                    folds=folds,
                    n_trials_config=lgbm_n_trials,
                    return_cvbooster=True,
                    optuna_callbacks=[logger.optuna],
                    show_progress_bar=False,
                    **_fit_params)
                tuner.run()
                self.model = tuner.get_best_booster().boosters
                self.best_iteration = tuner.get_best_booster().best_iteration
                self.best_score = tuner.best_score
                del tuner
            else:
                _params = params.copy()
                model_extractor = ModelExtractor()
                res = lgb.cv(_params,
                             train_set=dtrain,
                             folds=folds,
                             callbacks=[logger.lgbm, model_extractor],
                             **fit_params)
                self.model = model_extractor.get_model().boosters
                self.best_iteration = model_extractor.get_best_iteration()
                self.evals_result = res
                if maximize:
                    self.best_score = np.max(res[f'{main_metric}-mean'])
                else:
                    self.best_score = np.min(res[f'{main_metric}-mean'])

            for i in range(len(self.model)):
                self.model[i].best_iteration = self.best_iteration

            logger(
                f'[{self.best_iteration}]\tbest score is {self.best_score:.6f}'
            )

            if convert_to_sklearn:
                for i in range(len(self.model)):
                    self.model[i] = booster2sklearn(self.model[i],
                                                    self.model_type,
                                                    self.n_features,
                                                    self.n_classes)

                    if self.model_name == 'LGBMClassifier':
                        self.model[i]._le = _LGBMLabelEncoder().fit(data[1])

        elif self.model_name in MODEL_ZOO['xgb']:
            ''' XGBoost '''
            dtrain = xgb.DMatrix(data=data[0],
                                 label=data[1],
                                 weight=data[2] if len(data) == 3 else None,
                                 nthread=n_jobs)

            # Optuna integration
            if tune_model:
                _fit_params = fit_params.copy()
                if 'verbose_eval' in _fit_params.keys():
                    _fit_params.update({'verbose_eval': False})
                self.best_score = None
                self.best_model = None
                self.best_iteration = 0

                def xgb_objective(trial):
                    _params = params.copy()
                    if optuna_params is None:
                        _params = PARAMS_ZOO[self.model_name](trial, params)
                    else:
                        _params = optuna_params(trial, _params)

                    model_extractor = ModelExtractor()
                    res = xgb.cv(_params,
                                 dtrain=dtrain,
                                 folds=folds,
                                 maximize=maximize,
                                 callbacks=[model_extractor],
                                 **_fit_params)
                    self.model = model_extractor.get_model()

                    if eval_metric is None:
                        if maximize:
                            score = np.max(res[f'test-{main_metric}-mean'])
                            best_iteration = np.argmax(
                                res[f'test-{main_metric}-mean'])
                        else:
                            score = np.min(res[f'test-{main_metric}-mean'])
                            best_iteration = np.argmin(
                                res[f'test-{main_metric}-mean'])
                    else:
                        raise NotImplementedError(
                            'Do not use custom eval_metric for .cv() :(')

                    if self.best_score is None:
                        self.best_score = score
                        self.best_model = self.model.copy()
                        self.evals_result = res

                    if maximize == True and self.best_score < score:
                        self.best_score = score
                        self.best_model = self.model.copy()
                        self.best_iteration = best_iteration
                        self.evals_result = res
                    elif maximize == False and self.best_score > score:
                        self.best_score = score
                        self.best_model = self.model.copy()
                        self.best_iteration = best_iteration
                        self.evals_result = res

                    return score

                study = optuna.create_study(
                    direction='maximize' if maximize else 'minimize')
                study.optimize(xgb_objective,
                               n_trials=n_trials,
                               timeout=timeout,
                               callbacks=[logger.optuna],
                               n_jobs=1)
                self.model = self.best_model.copy()
                del self.best_model
            else:
                _params = params.copy()

                model_extractor = ModelExtractor()
                res = xgb.cv(_params,
                             dtrain=dtrain,
                             folds=folds,
                             maximize=maximize,
                             callbacks=[logger.lgbm, model_extractor],
                             **fit_params)
                self.model = model_extractor.get_model()

                if maximize:
                    self.best_score = np.max(res[f'test-{main_metric}-mean'])
                    self.best_iteration = np.argmax(
                        res[f'test-{main_metric}-mean'])
                else:
                    self.best_score = np.min(res[f'test-{main_metric}-mean'])
                    self.best_iteration = np.argmin(
                        res[f'test-{main_metric}-mean'])

                for i in range(len(self.model)):
                    self.model[i].best_ntree_limit = self.best_iteration

            logger(
                f'[{self.best_iteration}]\tbest score is {self.best_score:.6f}'
            )

            if convert_to_sklearn:
                for i in range(len(self.model)):
                    self.model[i] = booster2sklearn(self.model[i],
                                                    self.model_type,
                                                    self.n_features,
                                                    self.n_classes)
                    if self.model_name == 'XGBClassifier':
                        self.model[i]._le = XGBoostLabelEncoder().fit(data[1])

        else:
            raise NotImplementedError(
                f'{self.model_name} is incompatible with .cv().')

        self.is_trained = True
Beispiel #5
0
    def train(
            self,
            # Dataset
            train_data,
            valid_data=(),
            cat_features=None,
            # Model
            params={},
            fit_params={},
            convert_to_sklearn=True,
            # Probability calibration
            calibration=False,
            calibration_method='isotonic',
            calibration_cv=5,
            # Optuna
            tune_model=False,
            optuna_params=None,
            maximize=None,
            eval_metric=None,
            n_trials=None,
            timeout=None,
            lgbm_n_trials=[7, 20, 10, 6, 20],
            # Misc
            logger=None,
            n_jobs=-1):

        self._data_check(train_data)
        self._data_check(valid_data)
        self.n_features = train_data[0].shape[1]
        self.n_classes = len(np.unique(train_data[1]))
        if isinstance(train_data[0], pd.DataFrame):
            self.feature_names = train_data[0].columns.tolist()
        else:
            self.feature_names = [f'f{i}' for i in range(self.n_features)]

        main_metric, maximize = self._parse_eval_metric(params, maximize)
        if eval_metric is not None and maximize is None:
            raise ValueError('metric direction must be specified.')

        if isinstance(logger, (str, Path)):
            logger = LGBMLogger(logger, stdout=True, file=True)
        elif logger is None:
            logger = LGBMLogger(logger, stdout=True, file=False)
        assert isinstance(logger, LGBMLogger)

        # logger(f'params: \n{params}')
        # logger(f'fit_params: \n{fit_params}')

        if self.model_name in MODEL_ZOO['cat']:
            ''' Catboost '''
            dtrain = CatPool(
                data=train_data[0],
                label=train_data[1],
                weight=train_data[2] if len(train_data) == 3 else None,
                cat_features=cat_features,
                thread_count=n_jobs)
            if len(valid_data) > 0:
                dvalid = CatPool(
                    data=valid_data[0],
                    label=valid_data[1],
                    weight=valid_data[2] if len(train_data) == 3 else None,
                    cat_features=cat_features,
                    thread_count=n_jobs)
            else:
                dvalid = dtrain

            # Optuna integration
            if tune_model:
                _fit_params = fit_params.copy()
                if 'verbose_eval' in _fit_params.keys():
                    _fit_params.update({'verbose_eval': False})
                self.best_score = None
                self.best_model = None

                def cat_objective(trial):
                    _params = params.copy()
                    if optuna_params is None:
                        _params = PARAMS_ZOO[self.model_name](trial, _params)
                    else:
                        _params = optuna_params(trial, _params)

                    self.model = self.model_type(**_params)
                    self.model.fit(X=dtrain, eval_set=dvalid, **_fit_params)

                    if eval_metric is None:
                        score = self.model.get_best_score(
                        )['validation'][main_metric]
                    else:
                        score = eval_metric(self.model, valid_data)

                    if self.best_score is None:
                        self.best_score = score
                        self.best_model = self.model.copy()
                        self.best_iteration = self.model.get_best_iteration()
                        self.evals_result = self.model.get_evals_result()

                    if maximize == True and self.best_score < score:
                        self.best_model = self.model.copy()
                        self.best_score = score
                        self.best_iteration = self.model.get_best_iteration()
                        self.evals_result = self.model.get_evals_result()
                    elif maximize == False and self.best_score > score:
                        self.best_model = self.model.copy()
                        self.best_score = score
                        self.best_iteration = self.model.get_best_iteration()
                        self.evals_result = self.model.get_evals_result()

                    return score

                study = optuna.create_study(
                    direction='maximize' if maximize else 'minimize')
                study.optimize(cat_objective,
                               n_trials=n_trials,
                               timeout=timeout,
                               callbacks=[logger.optuna],
                               n_jobs=1)
                self.model = self.best_model.copy()
                del self.best_model

            else:
                _params = params.copy()
                self.model = self.model_type(**_params)
                self.model.fit(X=dtrain, eval_set=dvalid, **fit_params)
                self.best_score = self.model.get_best_score(
                )['validation'][main_metric]
                self.evals_result = self.model.get_evals_result()
                self.best_iteration = self.model.get_best_iteration()

        elif self.model_name in MODEL_ZOO['lgb']:
            ''' LightGBM '''
            dtrain = lgb.Dataset(
                data=train_data[0],
                label=train_data[1],
                weight=train_data[2] if len(train_data) == 3 else None,
                categorical_feature=cat_features)
            if len(valid_data) > 0:
                dvalid = lgb.Dataset(
                    data=valid_data[0],
                    label=valid_data[1],
                    weight=valid_data[2] if len(train_data) == 3 else None,
                    categorical_feature=cat_features)
            else:
                dvalid = dtrain

            # Optuna intergration
            if tune_model:
                _fit_params = fit_params.copy()
                _params = params.copy()
                if 'verbose_eval' in _fit_params.keys():
                    _fit_params.update({'verbose_eval': False})
                _params.update({'metric': main_metric})
                tuner = lgb_tune.LightGBMTuner(
                    _params,
                    train_set=dtrain,
                    valid_sets=[dtrain, dvalid],
                    n_trials_config=lgbm_n_trials,
                    show_progress_bar=False,
                    optuna_callbacks=[logger.optuna],
                    **_fit_params)
                tuner.run()
                self.model = tuner.get_best_booster()
                self.best_score = tuner.best_score
                del tuner
            else:
                _params = params.copy()

                res = {}
                self.model = lgb.train(_params,
                                       train_set=dtrain,
                                       valid_sets=[dtrain, dvalid],
                                       callbacks=[logger.lgbm],
                                       evals_result=res,
                                       **fit_params)
                if maximize:
                    self.best_score = np.max(res['valid_1'][main_metric])
                else:
                    self.best_score = np.min(res['valid_1'][main_metric])
                self.evals_result = res

            self.best_iteration = self.model.best_iteration

            if convert_to_sklearn:
                self.model = booster2sklearn(self.model, self.model_type,
                                             self.n_features, self.n_classes)
                if self.model_name == 'LGBMClassifier':
                    self.model._le = _LGBMLabelEncoder().fit(
                        train_data[1])  # internal label encoder

        elif self.model_name in MODEL_ZOO['xgb']:
            ''' XGBoost '''
            dtrain = xgb.DMatrix(
                data=train_data[0],
                label=train_data[1],
                weight=train_data[2] if len(train_data) == 3 else None,
                nthread=n_jobs)
            if len(valid_data) > 0:
                dvalid = xgb.DMatrix(
                    data=valid_data[0],
                    label=valid_data[1],
                    weight=valid_data[2] if len(train_data) == 3 else None,
                    nthread=n_jobs)
            else:
                dvalid = dtrain

            # Optuna integration
            if tune_model:
                _fit_params = fit_params.copy()
                if 'verbose_eval' in _fit_params.keys():
                    _fit_params.update({'verbose_eval': False})
                self.best_score = None
                self.best_model = None

                def xgb_objective(trial):
                    _params = params.copy()
                    if optuna_params is None:
                        _params = PARAMS_ZOO[self.model_name](trial, _params)
                    else:
                        _params = optuna_params(trial, _params)

                    res = {}
                    pruning_callback = optuna.integration.XGBoostPruningCallback(
                        trial, f'valid-{main_metric}')
                    self.model = xgb.train(_params,
                                           dtrain=dtrain,
                                           evals=[(dtrain, 'train'),
                                                  (dvalid, 'valid')],
                                           evals_result=res,
                                           callbacks=[pruning_callback],
                                           **_fit_params)

                    if eval_metric is None:
                        if maximize:
                            score = np.max(res['valid'][main_metric])
                        else:
                            score = np.min(res['valid'][main_metric])
                    else:
                        score = eval_metric(self.model, dvalid)

                    if self.best_score is None:
                        self.best_score = score
                        self.best_model = self.model.copy()
                        self.evals_result = res

                    if maximize == True and self.best_score < score:
                        self.best_score = score
                        self.best_model = self.model.copy()
                        self.evals_result = res
                    elif maximize == False and self.best_score > score:
                        self.best_score = score
                        self.best_model = self.model.copy()
                        self.evals_result = res

                    return score

                study = optuna.create_study(
                    direction='maximize' if maximize else 'minimize')
                study.optimize(xgb_objective,
                               n_trials=n_trials,
                               timeout=timeout,
                               callbacks=[logger.optuna],
                               n_jobs=1)
                self.model = self.best_model.copy()
                del self.best_model
            else:
                _params = params.copy()

                res = {}
                self.model = xgb.train(_params,
                                       dtrain=dtrain,
                                       evals=[(dtrain, 'train'),
                                              (dvalid, 'valid')],
                                       callbacks=[logger.lgbm],
                                       evals_result=res,
                                       **fit_params)
                if maximize:
                    self.best_score = np.max(res['valid'][main_metric])
                else:
                    self.best_score = np.min(res['valid'][main_metric])
                self.evals_result = res

            self.best_iteration = self.model.best_ntree_limit

            if convert_to_sklearn:
                self.model = booster2sklearn(self.model, self.model_type,
                                             self.n_features, self.n_classes)
                if self.model_name == 'XGBClassifier':
                    self.model._le = XGBoostLabelEncoder().fit(train_data[1])

        else:
            ''' Other skelarn models '''
            if eval_metric is None:
                if self.n_classes == 2:
                    eval_metric = auc_metric
                    maximize = True
                else:
                    eval_metric = mse_metric
                    maximize = False
                print('eval_metric automatically selected.')

            # Optuna integration
            if tune_model:
                self.best_score = None
                self.best_model = None

                def sklearn_objective(trial):
                    _params = params.copy()
                    if optuna_params is None:
                        _params = PARAMS_ZOO[self.model_name](trial, params)
                    else:
                        _params = optuna_params(trial, _params)

                    self.model = self.model_type(**_params)
                    self.model.fit(train_data[0], train_data[1], **fit_params)
                    score = eval_metric(self.model, valid_data)

                    if self.best_score is None:
                        self.best_score = score
                        self.best_model = deepcopy(self.model)

                    if maximize == True and self.best_score < score:
                        self.best_model = deepcopy(self.model)
                        self.best_score = score
                    elif maximize == False and self.best_score > score:
                        self.best_model = deepcopy(self.model)
                        self.best_score = score

                    return score

                study = optuna.create_study(
                    direction='maximize' if maximize else 'minimize')
                study.optimize(sklearn_objective,
                               n_trials=n_trials,
                               timeout=timeout,
                               callbacks=[logger.optuna],
                               n_jobs=1)
                self.model = deepcopy(self.best_model)
                del self.best_model
            else:
                self.model = self.model_type(**params)
                self.model.fit(train_data[0], train_data[1], **fit_params)
                self.best_score = eval_metric(self.model, valid_data)
                logger(f'[None]\tbest score is {self.best_score:.6f}')

        self.is_trained = True

        if calibration:
            pass