Example #1
0
 def testWarmStart(self):
     # Hard to get introspection so we just test that it runs.
     config = self.base_params([LogisticRegression(), LogisticRegression()])
     config["early_stopping"] = True
     config["early_stop_type"] = get_early_stop_type(
         config["estimator_list"][0], True)
     trainable = _Trainable(config)
     trainable.train()
     trainable.train()
     trainable.stop()
Example #2
0
 def testPartialFit(self):
     config = self.base_params([SGDClassifier(), SGDClassifier()])
     config["early_stopping"] = True
     config["early_stop_type"] = get_early_stop_type(
         config["estimator_list"][0], True)
     trainable = _Trainable(config)
     trainable.train()
     assert trainable.estimator_list[0].t_ > 0
     previous_t = trainable.estimator_list[0].t_
     trainable.train()
     assert trainable.estimator_list[0].t_ > previous_t
     trainable.stop()
Example #3
0
 def testXGBoostEarlyStop(self):
     estimator_list = [create_xgboost(), create_xgboost()]
     config = self.base_params(estimator_list=estimator_list)
     config["early_stopping"] = True
     config["early_stop_type"] = get_early_stop_type(
         estimator_list[0], True)
     trainable = _Trainable(config)
     trainable.train()
     assert all(trainable.saved_models)
     trainable.train()
     assert all(trainable.saved_models)
     trainable.stop()
Example #4
0
 def testLGBMEarlyStop(self):
     config = self.base_params(
         estimator_list=[create_lightgbm(),
                         create_lightgbm()])
     config["early_stopping"] = True
     config["early_stop_type"] = get_early_stop_type(
         config["estimator_list"][0], True)
     trainable = _Trainable(config)
     trainable.train()
     assert all(trainable.saved_models)
     trainable.train()
     assert all(trainable.saved_models)
     trainable.stop()
Example #5
0
 def base_params(self, estimator_list):
     config = {"estimator_list": estimator_list}
     cv = check_cv(
         cv=len(estimator_list), y=self.y, classifier=estimator_list[0])
     config["X_id"] = self.X_id
     config["y_id"] = self.y_id
     config["early_stopping"] = False
     config["early_stop_type"] = get_early_stop_type(
         estimator_list[0], False)
     config["max_iters"] = 1
     config["groups"] = None
     config["cv"] = cv
     config["fit_params"] = None
     config["scoring"], _ = _check_multimetric_scoring(
         estimator_list[0], scoring=None)
     config["return_train_score"] = False
     config["n_jobs"] = 1
     return config
Example #6
0
    def __init__(self,
                 estimator,
                 early_stopping=None,
                 scoring=None,
                 n_jobs=None,
                 sk_n_jobs=-1,
                 cv=5,
                 refit=True,
                 verbose=0,
                 error_score="raise",
                 return_train_score=False,
                 local_dir="~/ray_results",
                 max_iters=1,
                 use_gpu=False,
                 loggers=None,
                 pipeline_auto_early_stop=True):
        if max_iters < 1:
            raise ValueError("max_iters must be greater than or equal to 1.")
        self.estimator = estimator
        self.base_estimator = estimator
        self.pipeline_auto_early_stop = pipeline_auto_early_stop

        if self.pipeline_auto_early_stop and check_is_pipeline(estimator):
            _, self.base_estimator = self.base_estimator.steps[-1]

        self.early_stop_type = get_early_stop_type(self.base_estimator,
                                                   bool(early_stopping))

        if not self._can_early_stop():
            if early_stopping:
                raise ValueError("Early stopping is not supported because "
                                 "the estimator does not have `partial_fit`, "
                                 "does not support warm_start, or is a "
                                 "tree classifier. Set "
                                 "`early_stopping=False`.")
        if not early_stopping and max_iters > 1:
            warnings.warn(
                "max_iters is set > 1 but incremental/partial training "
                "is not enabled. To enable partial training, "
                "ensure the estimator has `partial_fit` or "
                "`warm_start` and set `early_stopping=True`. "
                "Automatically setting max_iters=1.",
                category=UserWarning)
            max_iters = 1

        # Get metric scoring name
        self.scoring = scoring
        self.refit = refit
        if not hasattr(self, "is_multi"):
            self.scoring, self.is_multi = _check_multimetric_scoring(
                self.estimator, self.scoring)

        if self.is_multi:
            self._base_metric_name = self.refit
        else:
            self._base_metric_name = "score"

        self._metric_name = "average_test_%s" % self._base_metric_name

        if early_stopping:
            if not self._can_early_stop() and is_lightgbm_model(
                    self.base_estimator):
                warnings.warn("lightgbm>=3.0.0 required for early_stopping "
                              "functionality.")
            assert self._can_early_stop()
            if max_iters == 1:
                warnings.warn(
                    "early_stopping is enabled but max_iters = 1. "
                    "To enable partial training, set max_iters > 1.",
                    category=UserWarning)
            if self.early_stop_type == EarlyStopping.XGB:
                warnings.warn(
                    "tune-sklearn implements incremental learning "
                    "for xgboost models following this: "
                    "https://github.com/dmlc/xgboost/issues/1686. "
                    "This may negatively impact performance. To "
                    "disable, set `early_stopping=False`.",
                    category=UserWarning)
            elif self.early_stop_type == EarlyStopping.LGBM:
                warnings.warn(
                    "tune-sklearn implements incremental learning "
                    "for lightgbm models following this: "
                    "https://lightgbm.readthedocs.io/en/latest/pythonapi/"
                    "lightgbm.LGBMModel.html#lightgbm.LGBMModel.fit "
                    "This may negatively impact performance. To "
                    "disable, set `early_stopping=False`.",
                    category=UserWarning)
            elif self.early_stop_type == EarlyStopping.CATBOOST:
                warnings.warn(
                    "tune-sklearn implements incremental learning "
                    "for Catboost models following this: "
                    "https://catboost.ai/docs/concepts/python-usages-"
                    "examples.html#training-continuation "
                    "This may negatively impact performance. To "
                    "disable, set `early_stopping=False`.",
                    category=UserWarning)
            if early_stopping is True:
                # Override the early_stopping variable so
                # that it is resolved appropriately in
                # the next block
                early_stopping = "AsyncHyperBandScheduler"
            # Resolve the early stopping object
            early_stopping = resolve_early_stopping(early_stopping, max_iters,
                                                    self._metric_name)

        self.early_stopping = early_stopping
        self.max_iters = max_iters

        self.cv = cv
        self.n_jobs = int(n_jobs or -1)
        if os.environ.get("SKLEARN_N_JOBS") is not None:
            self.sk_n_jobs = int(os.environ.get("SKLEARN_N_JOBS"))
        else:
            self.sk_n_jobs = sk_n_jobs

        self.verbose = verbose
        self.error_score = error_score
        self.return_train_score = return_train_score
        self.local_dir = local_dir
        self.use_gpu = use_gpu
        self.loggers = resolve_loggers(loggers)
        assert isinstance(self.n_jobs, int)
    def __init__(self,
                 estimator,
                 early_stopping=None,
                 scoring=None,
                 n_jobs=None,
                 sk_n_jobs=-1,
                 cv=5,
                 refit=True,
                 verbose=0,
                 error_score="raise",
                 return_train_score=False,
                 local_dir="~/ray_results",
                 max_iters=1,
                 use_gpu=False,
                 loggers=None,
                 pipeline_auto_early_stop=True):
        if max_iters < 1:
            raise ValueError("max_iters must be greater than or equal to 1.")
        self.estimator = estimator
        self.base_estimator = estimator
        self.pipeline_auto_early_stop = pipeline_auto_early_stop

        if self.pipeline_auto_early_stop and check_is_pipeline(estimator):
            _, self.base_estimator = self.base_estimator.steps[-1]

        self.early_stop_type = get_early_stop_type(self.base_estimator,
                                                   bool(early_stopping))

        if not self._can_early_stop():
            if early_stopping:
                raise ValueError("Early stopping is not supported because "
                                 "the estimator does not have `partial_fit`, "
                                 "does not support warm_start, or is a "
                                 "tree classifier. Set "
                                 "`early_stopping=False`.")
        if not early_stopping and max_iters > 1:
            warnings.warn(
                "max_iters is set > 1 but incremental/partial training "
                "is not enabled. To enable partial training, "
                "ensure the estimator has `partial_fit` or "
                "`warm_start` and set `early_stopping=True`. "
                "Automatically setting max_iters=1.",
                category=UserWarning)
            max_iters = 1

        if early_stopping:
            assert self._can_early_stop()
            if max_iters == 1:
                warnings.warn(
                    "early_stopping is enabled but max_iters = 1. "
                    "To enable partial training, set max_iters > 1.",
                    category=UserWarning)
            if self.early_stop_type == EarlyStopping.XGB:
                warnings.warn(
                    "tune-sklearn implements incremental learning "
                    "for xgboost models following this: "
                    "https://github.com/dmlc/xgboost/issues/1686. "
                    "This may negatively impact performance. To "
                    "disable, set `early_stopping=False`.",
                    category=UserWarning)
            if early_stopping is True:
                # Override the early_stopping variable so
                # that it is resolved appropriately in
                # the next block
                early_stopping = "AsyncHyperBandScheduler"
            # Resolve the early stopping object
            early_stopping = resolve_early_stopping(early_stopping, max_iters)

        self.early_stopping = early_stopping
        self.max_iters = max_iters

        self.cv = cv
        self.scoring = scoring
        self.n_jobs = int(n_jobs or -1)
        if os.environ.get("SKLEARN_N_JOBS") is not None:
            self.sk_n_jobs = int(os.environ.get("SKLEARN_N_JOBS"))
        else:
            self.sk_n_jobs = sk_n_jobs
        self.refit = refit
        self.verbose = verbose
        self.error_score = error_score
        self.return_train_score = return_train_score
        self.local_dir = local_dir
        self.use_gpu = use_gpu
        self.loggers = resolve_loggers(loggers)
        assert isinstance(self.n_jobs, int)