def _ray_fit_preprocess(self, y) -> Callable: """This has been separated out so that it can be easily overwritten should a future xgboost version remove label encoding""" # pylint: disable = attribute-defined-outside-init,too-many-statements can_use_label_encoder = True use_label_encoder = getattr(self, "use_label_encoder", True) label_encoding_check_error = ( "The label must consist of integer " "labels of form 0, 1, 2, ..., [num_class - 1].") label_encoder_deprecation_msg = ( "The use of label encoder in XGBClassifier is deprecated and will " "be removed in a future release. To remove this warning, do the " "following: 1) Pass option use_label_encoder=False when " "constructing XGBClassifier object; and 2) Encode your labels (y) " "as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].") # ray: modified this to allow for compatibility with legacy xgboost if (_is_cudf_df and _is_cudf_df(y)) or (_is_cudf_ser and _is_cudf_ser(y)): import cupy as cp # pylint: disable=E0401 self.classes_ = cp.unique(y.values) self.n_classes_ = len(self.classes_) can_use_label_encoder = False expected_classes = cp.arange(self.n_classes_) if (self.classes_.shape != expected_classes.shape or not (self.classes_ == expected_classes).all()): raise ValueError(label_encoding_check_error) elif (_is_cupy_array and _is_cupy_array(y)): import cupy as cp # pylint: disable=E0401 self.classes_ = cp.unique(y) self.n_classes_ = len(self.classes_) can_use_label_encoder = False expected_classes = cp.arange(self.n_classes_) if (self.classes_.shape != expected_classes.shape or not (self.classes_ == expected_classes).all()): raise ValueError(label_encoding_check_error) else: self.classes_ = np.unique(y) self.n_classes_ = len(self.classes_) if not use_label_encoder and (not np.array_equal( self.classes_, np.arange(self.n_classes_))): raise ValueError(label_encoding_check_error) if use_label_encoder: if not can_use_label_encoder: raise ValueError( "The option use_label_encoder=True is incompatible with " "inputs of type cuDF or cuPy. Please set " "use_label_encoder=False when constructing XGBClassifier " "object. NOTE:" + label_encoder_deprecation_msg) if hasattr(self, "use_label_encoder"): warnings.warn(label_encoder_deprecation_msg, UserWarning) self._le = XGBoostLabelEncoder().fit(y) label_transform = self._le.transform else: label_transform = lambda x: x # noqa: E731 return label_transform
def save_model_to_local_file(booster, model_params, meta, filename): from sklearn2pmml import PMMLPipeline, sklearn2pmml try: from xgboost.compat import XGBoostLabelEncoder except: # noqa: E722 # xgboost==0.82.0 does not have XGBoostLabelEncoder # in xgboost.compat.py from xgboost.sklearn import XGBLabelEncoder as XGBoostLabelEncoder objective = model_params.get("objective") bst_meta = dict() if objective.startswith("binary:") or objective.startswith("multi:"): if objective.startswith("binary:"): num_class = 2 else: num_class = model_params.get("num_class") assert num_class is not None and num_class > 0, \ "num_class should not be None" # To fake a trained XGBClassifier, there must be "_le", "classes_", # inside XGBClassifier. See here: # https://github.com/dmlc/xgboost/blob/d19cec70f1b40ea1e1a35101ca22e46dd4e4eecd/python-package/xgboost/sklearn.py#L356 model = xgb.XGBClassifier() label_encoder = XGBoostLabelEncoder() label_encoder.fit(list(range(num_class))) model._le = label_encoder model.classes_ = model._le.classes_ bst_meta["_le"] = {"classes_": model.classes_.tolist()} bst_meta["classes_"] = model.classes_.tolist() elif objective.startswith("reg:"): model = xgb.XGBRegressor() elif objective.startswith("rank:"): model = xgb.XGBRanker() else: raise ValueError( "Not supported objective {} for saving PMML".format(objective)) model_type = type(model).__name__ bst_meta["type"] = model_type # Meta data is needed for saving sklearn pipeline. See here: # https://github.com/dmlc/xgboost/blob/d19cec70f1b40ea1e1a35101ca22e46dd4e4eecd/python-package/xgboost/sklearn.py#L356 booster.set_attr(scikit_learn=json.dumps(bst_meta)) booster.save_model(filename) save_model_metadata("model_meta.json", meta) booster.set_attr(scikit_learn=None) model.load_model(filename) pipeline = PMMLPipeline([(model_type, model)]) sklearn2pmml(pipeline, "{}.pmml".format(filename))
class RayXGBClassifier(XGBClassifier, RayXGBMixin): __init__ = _xgboost_version_warn(XGBClassifier.__init__) @_deprecate_positional_args def fit( self, X, y, *, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, base_margin_eval_set=None, feature_weights=None, callbacks=None, ray_params: Union[None, RayParams, Dict] = None, _remote: Optional[bool] = None, ray_dmatrix_params: Optional[Dict] = None, ): evals_result = {} ray_dmatrix_params = ray_dmatrix_params or {} params = self.get_xgb_params() train_dmatrix, evals = _check_if_params_are_ray_dmatrix( X, sample_weight, base_margin, eval_set, sample_weight_eval_set, base_margin_eval_set) if train_dmatrix is not None: if not hasattr(self, "use_label_encoder"): warnings.warn("If X is a RayDMatrix, no label encoding" " will be performed. Ensure the labels are" " encoded.") elif self.use_label_encoder: raise ValueError( "X cannot be a RayDMatrix if `use_label_encoder` " "is set to True") if "num_class" not in params: raise ValueError( "`num_class` must be set during initalization if X" " is a RayDMatrix") self.classes_ = list(range(0, params["num_class"])) self.n_classes_ = params["num_class"] if self.n_classes_ <= 2: params.pop("num_class") label_transform = lambda x: x # noqa: E731 else: if len(X.shape) != 2: # Simply raise an error here since there might be many # different ways of reshaping raise ValueError( "Please reshape the input data X into 2-dimensional " "matrix.") label_transform = self._ray_fit_preprocess(y) if callable(self.objective): obj = _objective_decorator(self.objective) # Use default value. Is it really not used ? params["objective"] = "binary:logistic" else: obj = None if self.n_classes_ > 2: # Switch to using a multiclass objective in the underlying # XGB instance params["objective"] = "multi:softprob" params["num_class"] = self.n_classes_ try: model, feval, params = self._configure_fit(xgb_model, eval_metric, params) except TypeError: # XGBoost >= 1.6.0 (model, feval, params, early_stopping_rounds, callbacks) = self._configure_fit(xgb_model, eval_metric, params, early_stopping_rounds, callbacks) if train_dmatrix is None: train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, X=X, y=y, group=None, qid=None, sample_weight=sample_weight, base_margin=base_margin, feature_weights=feature_weights, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, base_margin_eval_set=base_margin_eval_set, eval_group=None, eval_qid=None, # changed in xgboost-ray: create_dmatrix=lambda **kwargs: RayDMatrix(**{ **kwargs, **ray_dmatrix_params }), **self._ray_get_wrap_evaluation_matrices_compat_kwargs( label_transform=label_transform)) # remove those as they will be set in RayXGBoostActor params.pop("n_jobs", None) params.pop("nthread", None) ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs) additional_results = {} self._Booster = train( params, train_dmatrix, self.get_num_boosting_rounds(), evals=evals, early_stopping_rounds=early_stopping_rounds, evals_result=evals_result, obj=obj, feval=feval, verbose_eval=verbose, xgb_model=model, callbacks=callbacks, # changed in xgboost-ray: additional_results=additional_results, ray_params=ray_params, _remote=_remote, ) if not callable(self.objective): self.objective = params["objective"] self.additional_results_ = additional_results self._set_evaluation_result(evals_result) return self fit.__doc__ = _treat_X_doc(XGBClassifier.fit.__doc__) + _RAY_PARAMS_DOC def _ray_fit_preprocess(self, y) -> Callable: """This has been separated out so that it can be easily overwritten should a future xgboost version remove label encoding""" # pylint: disable = attribute-defined-outside-init,too-many-statements can_use_label_encoder = True use_label_encoder = getattr(self, "use_label_encoder", True) label_encoding_check_error = ( "The label must consist of integer " "labels of form 0, 1, 2, ..., [num_class - 1].") label_encoder_deprecation_msg = ( "The use of label encoder in XGBClassifier is deprecated and will " "be removed in a future release. To remove this warning, do the " "following: 1) Pass option use_label_encoder=False when " "constructing XGBClassifier object; and 2) Encode your labels (y) " "as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].") # ray: modified this to allow for compatibility with legacy xgboost if (_is_cudf_df and _is_cudf_df(y)) or (_is_cudf_ser and _is_cudf_ser(y)): import cupy as cp # pylint: disable=E0401 self.classes_ = cp.unique(y.values) self.n_classes_ = len(self.classes_) can_use_label_encoder = False expected_classes = cp.arange(self.n_classes_) if (self.classes_.shape != expected_classes.shape or not (self.classes_ == expected_classes).all()): raise ValueError(label_encoding_check_error) elif (_is_cupy_array and _is_cupy_array(y)): import cupy as cp # pylint: disable=E0401 self.classes_ = cp.unique(y) self.n_classes_ = len(self.classes_) can_use_label_encoder = False expected_classes = cp.arange(self.n_classes_) if (self.classes_.shape != expected_classes.shape or not (self.classes_ == expected_classes).all()): raise ValueError(label_encoding_check_error) else: self.classes_ = np.unique(y) self.n_classes_ = len(self.classes_) if not use_label_encoder and (not np.array_equal( self.classes_, np.arange(self.n_classes_))): raise ValueError(label_encoding_check_error) if use_label_encoder: if not can_use_label_encoder: raise ValueError( "The option use_label_encoder=True is incompatible with " "inputs of type cuDF or cuPy. Please set " "use_label_encoder=False when constructing XGBClassifier " "object. NOTE:" + label_encoder_deprecation_msg) if hasattr(self, "use_label_encoder"): warnings.warn(label_encoder_deprecation_msg, UserWarning) self._le = XGBoostLabelEncoder().fit(y) label_transform = self._le.transform else: label_transform = lambda x: x # noqa: E731 return label_transform def _can_use_inplace_predict(self) -> bool: return False def predict( self, X, output_margin=False, ntree_limit=None, validate_features=True, base_margin=None, iteration_range: Optional[Tuple[int, int]] = None, ray_params: Union[None, RayParams, Dict] = None, _remote: Optional[bool] = None, ray_dmatrix_params: Optional[Dict] = None, ): class_probs = self._ray_predict(X=X, output_margin=output_margin, ntree_limit=ntree_limit, validate_features=validate_features, base_margin=base_margin, iteration_range=iteration_range, ray_params=ray_params, _remote=_remote, ray_dmatrix_params=ray_dmatrix_params) if output_margin: # If output_margin is active, simply return the scores return class_probs if len(class_probs.shape) > 1: # turns softprob into softmax column_indexes = np.argmax(class_probs, axis=1) else: # turns soft logit into class label column_indexes = np.repeat(0, class_probs.shape[0]) column_indexes[class_probs > 0.5] = 1 if hasattr(self, "_le"): return self._le.inverse_transform(column_indexes) return column_indexes predict.__doc__ = _treat_X_doc(XGBModel.predict.__doc__) + _RAY_PARAMS_DOC def predict_proba( self, X, ntree_limit=None, validate_features=False, base_margin=None, iteration_range: Optional[Tuple[int, int]] = None, ray_params: Union[None, RayParams, Dict] = None, _remote: Optional[bool] = None, ray_dmatrix_params: Optional[Dict] = None, ) -> np.ndarray: class_probs = self._ray_predict( X=X, output_margin=self.objective == "multi:softmax", ntree_limit=ntree_limit, validate_features=validate_features, base_margin=base_margin, iteration_range=iteration_range, ray_params=ray_params, _remote=_remote, ray_dmatrix_params=ray_dmatrix_params) # If model is loaded from a raw booster there's no `n_classes_` return _cls_predict_proba(getattr(self, "n_classes_", None), class_probs, np.vstack) def load_model(self, fname): if not hasattr(self, "_Booster"): self._Booster = Booster() return super().load_model(fname) predict_proba.__doc__ = ( _treat_X_doc(XGBClassifier.predict_proba.__doc__) + _RAY_PARAMS_DOC)
def cv( self, # Dataset data, cat_features=None, groups=None, folds=KFold(n_splits=5), # Model params={}, fit_params={}, convert_to_sklearn=True, # Optuna tune_model=False, optuna_params=None, maximize=None, eval_metric=None, n_trials=None, timeout=None, lgbm_n_trials=[7, 20, 10, 6, 20], # Misc logger=None, n_jobs=-1): self._data_check(data) self.n_features = data[0].shape[1] self.n_classes = len(np.unique(data[1])) if isinstance(data[0], pd.DataFrame): self.feature_names = data[0].columns.tolist() else: self.feature_names = [f'f{i}' for i in range(self.n_features)] main_metric, maximize = self._parse_eval_metric(params, maximize) if eval_metric is not None and maximize is None: raise ValueError('metric direction must be specified.') if isinstance(logger, (str, Path)): logger = LGBMLogger(logger, stdout=True, file=True) elif logger is None: logger = LGBMLogger(logger, stdout=True, file=False) assert isinstance(logger, LGBMLogger) # logger(f'params: \n{params}') # logger(f'fit_params: \n{fit_params}') if self.model_name in MODEL_ZOO['cat']: raise NotImplementedError('catboost is incompatible with .cv().') elif self.model_name in MODEL_ZOO['lgb']: ''' LightGBM ''' dtrain = lgb.Dataset(data=data[0], label=data[1], weight=data[2] if len(data) == 3 else None, categorical_feature=cat_features) # Optuna intergration if tune_model: _fit_params = fit_params.copy() _params = params.copy() _params.update({'metric': main_metric}) if 'verbose_eval' in _fit_params.keys(): _fit_params.update({'verbose_eval': False}) tuner = lgb_tune.LightGBMTunerCV( _params, dtrain, folds=folds, n_trials_config=lgbm_n_trials, return_cvbooster=True, optuna_callbacks=[logger.optuna], show_progress_bar=False, **_fit_params) tuner.run() self.model = tuner.get_best_booster().boosters self.best_iteration = tuner.get_best_booster().best_iteration self.best_score = tuner.best_score del tuner else: _params = params.copy() model_extractor = ModelExtractor() res = lgb.cv(_params, train_set=dtrain, folds=folds, callbacks=[logger.lgbm, model_extractor], **fit_params) self.model = model_extractor.get_model().boosters self.best_iteration = model_extractor.get_best_iteration() self.evals_result = res if maximize: self.best_score = np.max(res[f'{main_metric}-mean']) else: self.best_score = np.min(res[f'{main_metric}-mean']) for i in range(len(self.model)): self.model[i].best_iteration = self.best_iteration logger( f'[{self.best_iteration}]\tbest score is {self.best_score:.6f}' ) if convert_to_sklearn: for i in range(len(self.model)): self.model[i] = booster2sklearn(self.model[i], self.model_type, self.n_features, self.n_classes) if self.model_name == 'LGBMClassifier': self.model[i]._le = _LGBMLabelEncoder().fit(data[1]) elif self.model_name in MODEL_ZOO['xgb']: ''' XGBoost ''' dtrain = xgb.DMatrix(data=data[0], label=data[1], weight=data[2] if len(data) == 3 else None, nthread=n_jobs) # Optuna integration if tune_model: _fit_params = fit_params.copy() if 'verbose_eval' in _fit_params.keys(): _fit_params.update({'verbose_eval': False}) self.best_score = None self.best_model = None self.best_iteration = 0 def xgb_objective(trial): _params = params.copy() if optuna_params is None: _params = PARAMS_ZOO[self.model_name](trial, params) else: _params = optuna_params(trial, _params) model_extractor = ModelExtractor() res = xgb.cv(_params, dtrain=dtrain, folds=folds, maximize=maximize, callbacks=[model_extractor], **_fit_params) self.model = model_extractor.get_model() if eval_metric is None: if maximize: score = np.max(res[f'test-{main_metric}-mean']) best_iteration = np.argmax( res[f'test-{main_metric}-mean']) else: score = np.min(res[f'test-{main_metric}-mean']) best_iteration = np.argmin( res[f'test-{main_metric}-mean']) else: raise NotImplementedError( 'Do not use custom eval_metric for .cv() :(') if self.best_score is None: self.best_score = score self.best_model = self.model.copy() self.evals_result = res if maximize == True and self.best_score < score: self.best_score = score self.best_model = self.model.copy() self.best_iteration = best_iteration self.evals_result = res elif maximize == False and self.best_score > score: self.best_score = score self.best_model = self.model.copy() self.best_iteration = best_iteration self.evals_result = res return score study = optuna.create_study( direction='maximize' if maximize else 'minimize') study.optimize(xgb_objective, n_trials=n_trials, timeout=timeout, callbacks=[logger.optuna], n_jobs=1) self.model = self.best_model.copy() del self.best_model else: _params = params.copy() model_extractor = ModelExtractor() res = xgb.cv(_params, dtrain=dtrain, folds=folds, maximize=maximize, callbacks=[logger.lgbm, model_extractor], **fit_params) self.model = model_extractor.get_model() if maximize: self.best_score = np.max(res[f'test-{main_metric}-mean']) self.best_iteration = np.argmax( res[f'test-{main_metric}-mean']) else: self.best_score = np.min(res[f'test-{main_metric}-mean']) self.best_iteration = np.argmin( res[f'test-{main_metric}-mean']) for i in range(len(self.model)): self.model[i].best_ntree_limit = self.best_iteration logger( f'[{self.best_iteration}]\tbest score is {self.best_score:.6f}' ) if convert_to_sklearn: for i in range(len(self.model)): self.model[i] = booster2sklearn(self.model[i], self.model_type, self.n_features, self.n_classes) if self.model_name == 'XGBClassifier': self.model[i]._le = XGBoostLabelEncoder().fit(data[1]) else: raise NotImplementedError( f'{self.model_name} is incompatible with .cv().') self.is_trained = True
def train( self, # Dataset train_data, valid_data=(), cat_features=None, # Model params={}, fit_params={}, convert_to_sklearn=True, # Probability calibration calibration=False, calibration_method='isotonic', calibration_cv=5, # Optuna tune_model=False, optuna_params=None, maximize=None, eval_metric=None, n_trials=None, timeout=None, lgbm_n_trials=[7, 20, 10, 6, 20], # Misc logger=None, n_jobs=-1): self._data_check(train_data) self._data_check(valid_data) self.n_features = train_data[0].shape[1] self.n_classes = len(np.unique(train_data[1])) if isinstance(train_data[0], pd.DataFrame): self.feature_names = train_data[0].columns.tolist() else: self.feature_names = [f'f{i}' for i in range(self.n_features)] main_metric, maximize = self._parse_eval_metric(params, maximize) if eval_metric is not None and maximize is None: raise ValueError('metric direction must be specified.') if isinstance(logger, (str, Path)): logger = LGBMLogger(logger, stdout=True, file=True) elif logger is None: logger = LGBMLogger(logger, stdout=True, file=False) assert isinstance(logger, LGBMLogger) # logger(f'params: \n{params}') # logger(f'fit_params: \n{fit_params}') if self.model_name in MODEL_ZOO['cat']: ''' Catboost ''' dtrain = CatPool( data=train_data[0], label=train_data[1], weight=train_data[2] if len(train_data) == 3 else None, cat_features=cat_features, thread_count=n_jobs) if len(valid_data) > 0: dvalid = CatPool( data=valid_data[0], label=valid_data[1], weight=valid_data[2] if len(train_data) == 3 else None, cat_features=cat_features, thread_count=n_jobs) else: dvalid = dtrain # Optuna integration if tune_model: _fit_params = fit_params.copy() if 'verbose_eval' in _fit_params.keys(): _fit_params.update({'verbose_eval': False}) self.best_score = None self.best_model = None def cat_objective(trial): _params = params.copy() if optuna_params is None: _params = PARAMS_ZOO[self.model_name](trial, _params) else: _params = optuna_params(trial, _params) self.model = self.model_type(**_params) self.model.fit(X=dtrain, eval_set=dvalid, **_fit_params) if eval_metric is None: score = self.model.get_best_score( )['validation'][main_metric] else: score = eval_metric(self.model, valid_data) if self.best_score is None: self.best_score = score self.best_model = self.model.copy() self.best_iteration = self.model.get_best_iteration() self.evals_result = self.model.get_evals_result() if maximize == True and self.best_score < score: self.best_model = self.model.copy() self.best_score = score self.best_iteration = self.model.get_best_iteration() self.evals_result = self.model.get_evals_result() elif maximize == False and self.best_score > score: self.best_model = self.model.copy() self.best_score = score self.best_iteration = self.model.get_best_iteration() self.evals_result = self.model.get_evals_result() return score study = optuna.create_study( direction='maximize' if maximize else 'minimize') study.optimize(cat_objective, n_trials=n_trials, timeout=timeout, callbacks=[logger.optuna], n_jobs=1) self.model = self.best_model.copy() del self.best_model else: _params = params.copy() self.model = self.model_type(**_params) self.model.fit(X=dtrain, eval_set=dvalid, **fit_params) self.best_score = self.model.get_best_score( )['validation'][main_metric] self.evals_result = self.model.get_evals_result() self.best_iteration = self.model.get_best_iteration() elif self.model_name in MODEL_ZOO['lgb']: ''' LightGBM ''' dtrain = lgb.Dataset( data=train_data[0], label=train_data[1], weight=train_data[2] if len(train_data) == 3 else None, categorical_feature=cat_features) if len(valid_data) > 0: dvalid = lgb.Dataset( data=valid_data[0], label=valid_data[1], weight=valid_data[2] if len(train_data) == 3 else None, categorical_feature=cat_features) else: dvalid = dtrain # Optuna intergration if tune_model: _fit_params = fit_params.copy() _params = params.copy() if 'verbose_eval' in _fit_params.keys(): _fit_params.update({'verbose_eval': False}) _params.update({'metric': main_metric}) tuner = lgb_tune.LightGBMTuner( _params, train_set=dtrain, valid_sets=[dtrain, dvalid], n_trials_config=lgbm_n_trials, show_progress_bar=False, optuna_callbacks=[logger.optuna], **_fit_params) tuner.run() self.model = tuner.get_best_booster() self.best_score = tuner.best_score del tuner else: _params = params.copy() res = {} self.model = lgb.train(_params, train_set=dtrain, valid_sets=[dtrain, dvalid], callbacks=[logger.lgbm], evals_result=res, **fit_params) if maximize: self.best_score = np.max(res['valid_1'][main_metric]) else: self.best_score = np.min(res['valid_1'][main_metric]) self.evals_result = res self.best_iteration = self.model.best_iteration if convert_to_sklearn: self.model = booster2sklearn(self.model, self.model_type, self.n_features, self.n_classes) if self.model_name == 'LGBMClassifier': self.model._le = _LGBMLabelEncoder().fit( train_data[1]) # internal label encoder elif self.model_name in MODEL_ZOO['xgb']: ''' XGBoost ''' dtrain = xgb.DMatrix( data=train_data[0], label=train_data[1], weight=train_data[2] if len(train_data) == 3 else None, nthread=n_jobs) if len(valid_data) > 0: dvalid = xgb.DMatrix( data=valid_data[0], label=valid_data[1], weight=valid_data[2] if len(train_data) == 3 else None, nthread=n_jobs) else: dvalid = dtrain # Optuna integration if tune_model: _fit_params = fit_params.copy() if 'verbose_eval' in _fit_params.keys(): _fit_params.update({'verbose_eval': False}) self.best_score = None self.best_model = None def xgb_objective(trial): _params = params.copy() if optuna_params is None: _params = PARAMS_ZOO[self.model_name](trial, _params) else: _params = optuna_params(trial, _params) res = {} pruning_callback = optuna.integration.XGBoostPruningCallback( trial, f'valid-{main_metric}') self.model = xgb.train(_params, dtrain=dtrain, evals=[(dtrain, 'train'), (dvalid, 'valid')], evals_result=res, callbacks=[pruning_callback], **_fit_params) if eval_metric is None: if maximize: score = np.max(res['valid'][main_metric]) else: score = np.min(res['valid'][main_metric]) else: score = eval_metric(self.model, dvalid) if self.best_score is None: self.best_score = score self.best_model = self.model.copy() self.evals_result = res if maximize == True and self.best_score < score: self.best_score = score self.best_model = self.model.copy() self.evals_result = res elif maximize == False and self.best_score > score: self.best_score = score self.best_model = self.model.copy() self.evals_result = res return score study = optuna.create_study( direction='maximize' if maximize else 'minimize') study.optimize(xgb_objective, n_trials=n_trials, timeout=timeout, callbacks=[logger.optuna], n_jobs=1) self.model = self.best_model.copy() del self.best_model else: _params = params.copy() res = {} self.model = xgb.train(_params, dtrain=dtrain, evals=[(dtrain, 'train'), (dvalid, 'valid')], callbacks=[logger.lgbm], evals_result=res, **fit_params) if maximize: self.best_score = np.max(res['valid'][main_metric]) else: self.best_score = np.min(res['valid'][main_metric]) self.evals_result = res self.best_iteration = self.model.best_ntree_limit if convert_to_sklearn: self.model = booster2sklearn(self.model, self.model_type, self.n_features, self.n_classes) if self.model_name == 'XGBClassifier': self.model._le = XGBoostLabelEncoder().fit(train_data[1]) else: ''' Other skelarn models ''' if eval_metric is None: if self.n_classes == 2: eval_metric = auc_metric maximize = True else: eval_metric = mse_metric maximize = False print('eval_metric automatically selected.') # Optuna integration if tune_model: self.best_score = None self.best_model = None def sklearn_objective(trial): _params = params.copy() if optuna_params is None: _params = PARAMS_ZOO[self.model_name](trial, params) else: _params = optuna_params(trial, _params) self.model = self.model_type(**_params) self.model.fit(train_data[0], train_data[1], **fit_params) score = eval_metric(self.model, valid_data) if self.best_score is None: self.best_score = score self.best_model = deepcopy(self.model) if maximize == True and self.best_score < score: self.best_model = deepcopy(self.model) self.best_score = score elif maximize == False and self.best_score > score: self.best_model = deepcopy(self.model) self.best_score = score return score study = optuna.create_study( direction='maximize' if maximize else 'minimize') study.optimize(sklearn_objective, n_trials=n_trials, timeout=timeout, callbacks=[logger.optuna], n_jobs=1) self.model = deepcopy(self.best_model) del self.best_model else: self.model = self.model_type(**params) self.model.fit(train_data[0], train_data[1], **fit_params) self.best_score = eval_metric(self.model, valid_data) logger(f'[None]\tbest score is {self.best_score:.6f}') self.is_trained = True if calibration: pass