def testWarmStart(self): # Hard to get introspection so we just test that it runs. config = self.base_params([LogisticRegression(), LogisticRegression()]) config["early_stopping"] = True config["early_stop_type"] = get_early_stop_type( config["estimator_list"][0], True) trainable = _Trainable(config) trainable.train() trainable.train() trainable.stop()
def testPartialFit(self): config = self.base_params([SGDClassifier(), SGDClassifier()]) config["early_stopping"] = True config["early_stop_type"] = get_early_stop_type( config["estimator_list"][0], True) trainable = _Trainable(config) trainable.train() assert trainable.estimator_list[0].t_ > 0 previous_t = trainable.estimator_list[0].t_ trainable.train() assert trainable.estimator_list[0].t_ > previous_t trainable.stop()
def testXGBoostEarlyStop(self): estimator_list = [create_xgboost(), create_xgboost()] config = self.base_params(estimator_list=estimator_list) config["early_stopping"] = True config["early_stop_type"] = get_early_stop_type( estimator_list[0], True) trainable = _Trainable(config) trainable.train() assert all(trainable.saved_models) trainable.train() assert all(trainable.saved_models) trainable.stop()
def testLGBMEarlyStop(self): config = self.base_params( estimator_list=[create_lightgbm(), create_lightgbm()]) config["early_stopping"] = True config["early_stop_type"] = get_early_stop_type( config["estimator_list"][0], True) trainable = _Trainable(config) trainable.train() assert all(trainable.saved_models) trainable.train() assert all(trainable.saved_models) trainable.stop()
def base_params(self, estimator_list): config = {"estimator_list": estimator_list} cv = check_cv( cv=len(estimator_list), y=self.y, classifier=estimator_list[0]) config["X_id"] = self.X_id config["y_id"] = self.y_id config["early_stopping"] = False config["early_stop_type"] = get_early_stop_type( estimator_list[0], False) config["max_iters"] = 1 config["groups"] = None config["cv"] = cv config["fit_params"] = None config["scoring"], _ = _check_multimetric_scoring( estimator_list[0], scoring=None) config["return_train_score"] = False config["n_jobs"] = 1 return config
def __init__(self, estimator, early_stopping=None, scoring=None, n_jobs=None, sk_n_jobs=-1, cv=5, refit=True, verbose=0, error_score="raise", return_train_score=False, local_dir="~/ray_results", max_iters=1, use_gpu=False, loggers=None, pipeline_auto_early_stop=True): if max_iters < 1: raise ValueError("max_iters must be greater than or equal to 1.") self.estimator = estimator self.base_estimator = estimator self.pipeline_auto_early_stop = pipeline_auto_early_stop if self.pipeline_auto_early_stop and check_is_pipeline(estimator): _, self.base_estimator = self.base_estimator.steps[-1] self.early_stop_type = get_early_stop_type(self.base_estimator, bool(early_stopping)) if not self._can_early_stop(): if early_stopping: raise ValueError("Early stopping is not supported because " "the estimator does not have `partial_fit`, " "does not support warm_start, or is a " "tree classifier. Set " "`early_stopping=False`.") if not early_stopping and max_iters > 1: warnings.warn( "max_iters is set > 1 but incremental/partial training " "is not enabled. To enable partial training, " "ensure the estimator has `partial_fit` or " "`warm_start` and set `early_stopping=True`. " "Automatically setting max_iters=1.", category=UserWarning) max_iters = 1 # Get metric scoring name self.scoring = scoring self.refit = refit if not hasattr(self, "is_multi"): self.scoring, self.is_multi = _check_multimetric_scoring( self.estimator, self.scoring) if self.is_multi: self._base_metric_name = self.refit else: self._base_metric_name = "score" self._metric_name = "average_test_%s" % self._base_metric_name if early_stopping: if not self._can_early_stop() and is_lightgbm_model( self.base_estimator): warnings.warn("lightgbm>=3.0.0 required for early_stopping " "functionality.") assert self._can_early_stop() if max_iters == 1: warnings.warn( "early_stopping is enabled but max_iters = 1. " "To enable partial training, set max_iters > 1.", category=UserWarning) if self.early_stop_type == EarlyStopping.XGB: warnings.warn( "tune-sklearn implements incremental learning " "for xgboost models following this: " "https://github.com/dmlc/xgboost/issues/1686. " "This may negatively impact performance. To " "disable, set `early_stopping=False`.", category=UserWarning) elif self.early_stop_type == EarlyStopping.LGBM: warnings.warn( "tune-sklearn implements incremental learning " "for lightgbm models following this: " "https://lightgbm.readthedocs.io/en/latest/pythonapi/" "lightgbm.LGBMModel.html#lightgbm.LGBMModel.fit " "This may negatively impact performance. To " "disable, set `early_stopping=False`.", category=UserWarning) elif self.early_stop_type == EarlyStopping.CATBOOST: warnings.warn( "tune-sklearn implements incremental learning " "for Catboost models following this: " "https://catboost.ai/docs/concepts/python-usages-" "examples.html#training-continuation " "This may negatively impact performance. To " "disable, set `early_stopping=False`.", category=UserWarning) if early_stopping is True: # Override the early_stopping variable so # that it is resolved appropriately in # the next block early_stopping = "AsyncHyperBandScheduler" # Resolve the early stopping object early_stopping = resolve_early_stopping(early_stopping, max_iters, self._metric_name) self.early_stopping = early_stopping self.max_iters = max_iters self.cv = cv self.n_jobs = int(n_jobs or -1) if os.environ.get("SKLEARN_N_JOBS") is not None: self.sk_n_jobs = int(os.environ.get("SKLEARN_N_JOBS")) else: self.sk_n_jobs = sk_n_jobs self.verbose = verbose self.error_score = error_score self.return_train_score = return_train_score self.local_dir = local_dir self.use_gpu = use_gpu self.loggers = resolve_loggers(loggers) assert isinstance(self.n_jobs, int)
def __init__(self, estimator, early_stopping=None, scoring=None, n_jobs=None, sk_n_jobs=-1, cv=5, refit=True, verbose=0, error_score="raise", return_train_score=False, local_dir="~/ray_results", max_iters=1, use_gpu=False, loggers=None, pipeline_auto_early_stop=True): if max_iters < 1: raise ValueError("max_iters must be greater than or equal to 1.") self.estimator = estimator self.base_estimator = estimator self.pipeline_auto_early_stop = pipeline_auto_early_stop if self.pipeline_auto_early_stop and check_is_pipeline(estimator): _, self.base_estimator = self.base_estimator.steps[-1] self.early_stop_type = get_early_stop_type(self.base_estimator, bool(early_stopping)) if not self._can_early_stop(): if early_stopping: raise ValueError("Early stopping is not supported because " "the estimator does not have `partial_fit`, " "does not support warm_start, or is a " "tree classifier. Set " "`early_stopping=False`.") if not early_stopping and max_iters > 1: warnings.warn( "max_iters is set > 1 but incremental/partial training " "is not enabled. To enable partial training, " "ensure the estimator has `partial_fit` or " "`warm_start` and set `early_stopping=True`. " "Automatically setting max_iters=1.", category=UserWarning) max_iters = 1 if early_stopping: assert self._can_early_stop() if max_iters == 1: warnings.warn( "early_stopping is enabled but max_iters = 1. " "To enable partial training, set max_iters > 1.", category=UserWarning) if self.early_stop_type == EarlyStopping.XGB: warnings.warn( "tune-sklearn implements incremental learning " "for xgboost models following this: " "https://github.com/dmlc/xgboost/issues/1686. " "This may negatively impact performance. To " "disable, set `early_stopping=False`.", category=UserWarning) if early_stopping is True: # Override the early_stopping variable so # that it is resolved appropriately in # the next block early_stopping = "AsyncHyperBandScheduler" # Resolve the early stopping object early_stopping = resolve_early_stopping(early_stopping, max_iters) self.early_stopping = early_stopping self.max_iters = max_iters self.cv = cv self.scoring = scoring self.n_jobs = int(n_jobs or -1) if os.environ.get("SKLEARN_N_JOBS") is not None: self.sk_n_jobs = int(os.environ.get("SKLEARN_N_JOBS")) else: self.sk_n_jobs = sk_n_jobs self.refit = refit self.verbose = verbose self.error_score = error_score self.return_train_score = return_train_score self.local_dir = local_dir self.use_gpu = use_gpu self.loggers = resolve_loggers(loggers) assert isinstance(self.n_jobs, int)