コード例 #1
0
    def test_multi_best_regression(self):
        x, y = make_regression(n_samples=100, n_features=10, n_informative=5)
        model = SGDRegressor()
        parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}

        scoring = ("neg_mean_absolute_error", "neg_mean_squared_error")

        search_methods = ["random", "bayesian", "hyperopt", "bohb"]
        for search_method in search_methods:

            tune_search = TuneSearchCV(
                model,
                parameter_grid,
                scoring=scoring,
                search_optimization=search_method,
                cv=2,
                n_trials=3,
                n_jobs=1,
                refit="neg_mean_absolute_error")
            tune_search.fit(x, y)
            self.assertAlmostEqual(
                tune_search.best_score_,
                max(tune_search.cv_results_[
                    "mean_test_neg_mean_absolute_error"]),
                places=10)

            p = tune_search.cv_results_["params"]
            scores = tune_search.cv_results_[
                "mean_test_neg_mean_absolute_error"]
            cv_best_param = max(
                list(zip(scores, p)), key=lambda pair: pair[0])[1]
            self.assertEqual(tune_search.best_params_, cv_best_param)
コード例 #2
0
    def test_multi_best_classification_scoring_dict(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target
        model = SGDClassifier()

        parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}
        scoring = {"acc": "accuracy", "f1": "f1_micro"}
        search_methods = ["random", "bayesian", "hyperopt", "bohb"]
        for search_method in search_methods:

            tune_search = TuneSearchCV(
                model,
                parameter_grid,
                scoring=scoring,
                search_optimization=search_method,
                cv=2,
                n_trials=3,
                n_jobs=1,
                refit="acc")
            tune_search.fit(x, y)
            self.assertAlmostEqual(
                tune_search.best_score_,
                max(tune_search.cv_results_["mean_test_acc"]),
                places=10)

            p = tune_search.cv_results_["params"]
            scores = tune_search.cv_results_["mean_test_acc"]
            cv_best_param = max(
                list(zip(scores, p)), key=lambda pair: pair[0])[1]
            self.assertEqual(tune_search.best_params_, cv_best_param)
コード例 #3
0
    def test_multi_best(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}

        scoring = ("accuracy", "f1_micro")

        tune_search = TuneSearchCV(
            SGDClassifier(),
            parameter_grid,
            scoring=scoring,
            max_iters=20,
            refit="accuracy")
        tune_search.fit(x, y)
        self.assertAlmostEqual(
            tune_search.best_score_,
            max(tune_search.cv_results_["mean_test_accuracy"]),
            places=10)

        p = tune_search.cv_results_["params"]
        scores = tune_search.cv_results_["mean_test_accuracy"]
        cv_best_param = max(list(zip(scores, p)), key=lambda pair: pair[0])[1]
        self.assertEqual(tune_search.best_params_, cv_best_param)
コード例 #4
0
    def test_multi_refit_false(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target
        model = SGDClassifier()

        parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}
        scoring = ("accuracy", "f1_micro")

        tune_search = TuneSearchCV(
            model,
            parameter_grid,
            scoring=scoring,
            search_optimization="random",
            cv=2,
            n_trials=3,
            n_jobs=1,
            refit=False)
        tune_search.fit(x, y)
        with self.assertRaises(ValueError) as exc:
            tune_search.best_score_
        self.assertTrue(("instance was initialized with refit=False. "
                         "For multi-metric evaluation,") in str(exc.exception))
        with self.assertRaises(ValueError) as exc:
            tune_search.best_index_
        self.assertTrue(("instance was initialized with refit=False. "
                         "For multi-metric evaluation,") in str(exc.exception))
        with self.assertRaises(ValueError) as exc:
            tune_search.best_params_
        self.assertTrue(("instance was initialized with refit=False. "
                         "For multi-metric evaluation,") in str(exc.exception))
コード例 #5
0
 def test_warn_early_stop(self):
     with self.assertWarnsRegex(UserWarning, "max_iters = 1"):
         TuneSearchCV(
             LogisticRegression(), {"C": [1, 2]}, early_stopping=True)
     with self.assertWarnsRegex(UserWarning, "max_iters = 1"):
         TuneSearchCV(
             SGDClassifier(), {"epsilon": [0.1, 0.2]}, early_stopping=True)
コード例 #6
0
    def test_pipeline_early_stop(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        pipe = Pipeline([("reduce_dim", PCA()), ("classify", SGDClassifier())])
        parameter_grid = [
            {
                "classify__alpha": [1e-4, 1e-1, 1],
                "classify__epsilon": [0.01, 0.1]
            },
        ]

        with self.assertRaises(ValueError) as exc:
            TuneSearchCV(
                pipe,
                parameter_grid,
                early_stopping=True,
                pipeline_auto_early_stop=False,
                max_iters=10)
        self.assertTrue((
            "Early stopping is not supported because the estimator does "
            "not have `partial_fit`, does not support warm_start, or "
            "is a tree classifier. Set `early_stopping=False`."
        ) in str(exc.exception))

        tune_search = TuneSearchCV(
            pipe, parameter_grid, early_stopping=True, max_iters=10)
        tune_search.fit(x, y)
コード例 #7
0
    def test_plateau(self):
        try:
            from ray.tune.stopper import TrialPlateauStopper
        except ImportError:
            self.skipTest("`TrialPlateauStopper` not available in "
                          "current Ray version.")
            return

        X, y = make_classification(n_samples=50,
                                   n_features=50,
                                   n_informative=3,
                                   random_state=0)

        clf = PlateauClassifier(converge_after=4)

        stopper = TrialPlateauStopper(metric="objective")

        search = TuneSearchCV(clf, {"foo_param": [2.0, 3.0, 4.0]},
                              cv=2,
                              max_iters=20,
                              stopper=stopper,
                              early_stopping=True)

        search.fit(X, y)

        print(search.cv_results_)

        for iters in search.cv_results_["training_iteration"]:
            # Converges after 4 iterations, but the stopper needs another
            # 4 to detect it converged.
            self.assertLessEqual(iters, 8)
コード例 #8
0
    def sweep(
        self,
        params: Dict,
        X,
        y,
        search_algorithm: str = "bayesian",
        num_trials: int = 3,
        scoring_func: str = "r2",
    ):

        from tune_sklearn import TuneGridSearchCV, TuneSearchCV

        X, y = (
            torch.tensor(X).float().to(device=self.device),
            torch.tensor(y).float().to(device=self.device),
        )
        tune_search = TuneSearchCV(
            self.model,
            params,
            search_optimization=search_algorithm,
            n_trials=num_trials,
            early_stopping=True,
            scoring=scoring_func,
        )
        tune_search.fit(X, y)

        return tune_search
コード例 #9
0
    def test_warm_start_detection(self):
        parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
        from sklearn.ensemble import VotingClassifier, RandomForestClassifier
        clf = VotingClassifier(estimators=[(
            "rf", RandomForestClassifier(n_estimators=50, random_state=0))])
        tune_search = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search.early_stop_type,
                         EarlyStopping.NO_EARLY_STOP)

        from sklearn.tree import DecisionTreeClassifier
        clf = DecisionTreeClassifier(random_state=0)
        tune_search2 = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search2.early_stop_type,
                         EarlyStopping.NO_EARLY_STOP)

        from sklearn.linear_model import LogisticRegression
        clf = LogisticRegression()
        tune_search3 = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")

        self.assertEqual(tune_search3.early_stop_type,
                         EarlyStopping.NO_EARLY_STOP)

        tune_search4 = TuneSearchCV(
            clf,
            parameter_grid,
            early_stopping=True,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search4.early_stop_type,
                         EarlyStopping.WARM_START_ITER)

        clf = RandomForestClassifier()
        tune_search5 = TuneSearchCV(
            clf,
            parameter_grid,
            early_stopping=True,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search5.early_stop_type,
                         EarlyStopping.WARM_START_ENSEMBLE)
コード例 #10
0
 def test_early_stop_catboost_warn(self):
     from catboost import CatBoostClassifier
     with self.assertWarnsRegex(UserWarning, "Catboost"):
         TuneSearchCV(CatBoostClassifier(), {"learning_rate": [0.1, 0.5]},
                      early_stopping=True,
                      max_iters=10)
     with self.assertWarnsRegex(UserWarning, "max_iters"):
         TuneSearchCV(CatBoostClassifier(), {"learning_rate": [0.1, 0.5]},
                      early_stopping=True,
                      max_iters=1)
コード例 #11
0
 def test_early_stop_lightgbm_warn(self):
     from lightgbm import LGBMClassifier
     with self.assertWarnsRegex(UserWarning, "lightgbm"):
         TuneSearchCV(LGBMClassifier(), {"learning_rate": [0.1, 0.5]},
                      early_stopping=True,
                      max_iters=10)
     with self.assertWarnsRegex(UserWarning, "max_iters"):
         TuneSearchCV(LGBMClassifier(), {"learning_rate": [0.1, 0.5]},
                      early_stopping=True,
                      max_iters=1)
コード例 #12
0
 def test_early_stop_xgboost_warn(self):
     from xgboost.sklearn import XGBClassifier
     with self.assertWarnsRegex(UserWarning, "github.com"):
         TuneSearchCV(XGBClassifier(), {"C": [1, 2]},
                      early_stopping=True,
                      max_iters=10)
     with self.assertWarnsRegex(UserWarning, "max_iters"):
         TuneSearchCV(XGBClassifier(), {"C": [1, 2]},
                      early_stopping=True,
                      max_iters=1)
コード例 #13
0
    def test_warm_start_detection(self):
        parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
        from sklearn.ensemble import RandomForestClassifier
        clf = RandomForestClassifier(max_depth=2, random_state=0)
        tune_search = TuneSearchCV(clf,
                                   parameter_grid,
                                   n_jobs=1,
                                   max_iters=10,
                                   local_dir="./test-result")
        self.assertFalse(tune_search._can_early_stop())

        from sklearn.tree import DecisionTreeClassifier
        clf = DecisionTreeClassifier(random_state=0)
        tune_search2 = TuneSearchCV(clf,
                                    parameter_grid,
                                    n_jobs=1,
                                    max_iters=10,
                                    local_dir="./test-result")
        self.assertFalse(tune_search2._can_early_stop())

        from sklearn.linear_model import LogisticRegression
        clf = LogisticRegression()
        tune_search3 = TuneSearchCV(clf,
                                    parameter_grid,
                                    n_jobs=1,
                                    max_iters=10,
                                    local_dir="./test-result")

        self.assertTrue(tune_search3._can_early_stop())
コード例 #14
0
def tune_remote(x_train, y_train):
    clf = RandomForestClassifier()
    param_distributions = {
        "n_estimators": randint(20, 80),
        "max_depth": randint(2, 10)
    }

    tune_search = TuneSearchCV(clf, param_distributions, n_trials=3)

    tune_search.fit(x_train, y_train)
    return tune_search
コード例 #15
0
    def test_trivial_cv_results_attr(self):
        # Test search over a "grid" with only one point.
        # Non-regression test: grid_scores_ wouldn't be set by
        # dcv.GridSearchCV.
        clf = MockClassifier()
        grid_search = TuneGridSearchCV(clf, {"foo_param": [1]}, cv=3)
        grid_search.fit(X, y)
        self.assertTrue(hasattr(grid_search, "cv_results_"))

        random_search = TuneSearchCV(clf, {"foo_param": [0]}, n_iter=1, cv=3)
        random_search.fit(X, y)
        self.assertTrue(hasattr(random_search, "cv_results_"))
コード例 #16
0
 def test_warn_reduce_maxiters(self):
     parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
     from sklearn.ensemble import RandomForestClassifier
     clf = RandomForestClassifier(max_depth=2, random_state=0)
     with self.assertWarnsRegex(UserWarning, "max_iters is set"):
         TuneSearchCV(
             clf, parameter_grid, max_iters=10, local_dir="./test-result")
     with self.assertWarnsRegex(UserWarning, "max_iters is set"):
         TuneSearchCV(
             SGDClassifier(),
             parameter_grid,
             max_iters=10,
             local_dir="./test-result")
コード例 #17
0
    def sweep(self, params: Dict, X, y):

        tune_search = TuneSearchCV(
            self.model,
            param_distributions=params,
            n_trials=3,
            # early_stopping=True,
            # use_gpu=True
        )

        tune_search.fit(X, y)

        return tune_search
コード例 #18
0
    def sweep(self, X, y, params: Dict = None):
        if not params:
            raise NotImplementedError

        tune_search = TuneSearchCV(
            self.model,
            param_distributions=params,
            n_trials=3,
            # early_stopping=True,
            # use_gpu=True
        )

        tune_search.fit(X, y)

        return tune_search
コード例 #19
0
 def test_warm_start_error(self):
     parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
     from sklearn.ensemble import RandomForestClassifier
     clf = RandomForestClassifier(max_depth=2, random_state=0)
     tune_search = TuneSearchCV(clf,
                                parameter_grid,
                                n_jobs=1,
                                early_stopping=False,
                                max_iters=10,
                                local_dir="./test-result")
     self.assertFalse(tune_search._can_early_stop())
     with self.assertRaises(ValueError):
         tune_search = TuneSearchCV(clf,
                                    parameter_grid,
                                    n_jobs=1,
                                    early_stopping=True,
                                    max_iters=10,
                                    local_dir="./test-result")
コード例 #20
0
 def test_early_stop_xgboost_pipeline(self):
     from xgboost.sklearn import XGBClassifier
     from sklearn.pipeline import Pipeline
     TuneSearchCV(Pipeline([("model", XGBClassifier())]),
                  {"model__C": [1, 2]},
                  early_stopping=True,
                  pipeline_auto_early_stop=True,
                  cv=2,
                  n_trials=2,
                  max_iters=10)
コード例 #21
0
 def test_early_stop_lightgbm_pipeline(self):
     from lightgbm import LGBMClassifier
     from sklearn.pipeline import Pipeline
     TuneSearchCV(Pipeline([("model", LGBMClassifier())]),
                  {"model__learning_rate": [0.1, 0.5]},
                  early_stopping=True,
                  pipeline_auto_early_stop=True,
                  cv=2,
                  n_trials=2,
                  max_iters=10)
コード例 #22
0
    def test_local_mode(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        clf = SGDClassifier()
        parameter_grid = {
            "alpha": Real(1e-4, 1e-1, 1),
            "epsilon": Real(0.01, 0.1)
        }
        tune_search = TuneSearchCV(clf,
                                   parameter_grid,
                                   n_jobs=1,
                                   max_iters=10,
                                   local_dir="./test-result")
        import ray
        with patch.object(ray, "init", wraps=ray.init) as wrapped_init:
            tune_search.fit(x, y)
        self.assertTrue(wrapped_init.call_args[1]["local_mode"])
コード例 #23
0
 def test_early_stop_catboost_pipeline(self):
     from catboost import CatBoostClassifier
     from sklearn.pipeline import Pipeline
     TuneSearchCV(Pipeline([("model", CatBoostClassifier())]),
                  {"model__learning_rate": [0.1, 0.5]},
                  early_stopping=True,
                  pipeline_auto_early_stop=True,
                  cv=2,
                  n_trials=2,
                  max_iters=10)
コード例 #24
0
    def test_max_iters(self):
        X, y = make_classification(n_samples=50,
                                   n_features=50,
                                   n_informative=3,
                                   random_state=0)

        clf = PlateauClassifier(converge_after=20)

        search = TuneSearchCV(clf, {"foo_param": [2.0, 3.0, 4.0]},
                              cv=2,
                              max_iters=6,
                              early_stopping=True)

        search.fit(X, y)

        print(search.cv_results_)

        for iters in search.cv_results_["training_iteration"]:
            # Stop after 6 iterations.
            self.assertLessEqual(iters, 6)
コード例 #25
0
    def test_local_dir(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        clf = SGDClassifier()
        parameter_grid = {
            "alpha": Real(1e-4, 1e-1, 1),
            "epsilon": Real(0.01, 0.1)
        }

        scheduler = MedianStoppingRule(grace_period=10.0)

        tune_search = TuneSearchCV(clf,
                                   parameter_grid,
                                   early_stopping=scheduler,
                                   max_iters=10,
                                   local_dir="./test-result")
        tune_search.fit(x, y)

        self.assertTrue(len(os.listdir("./test-result")) != 0)
コード例 #26
0
    def _test_method(self, search_method):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        tune_search = TuneSearchCV(self.clf,
                                   self.parameter_grid,
                                   search_optimization=search_method,
                                   cv=2,
                                   n_trials=3,
                                   n_jobs=1,
                                   refit=True)
        tune_search.fit(x, y)
        params = tune_search.best_estimator_.get_params()
        print({
            k: v
            for k, v in params.items() if k in ("alpha", "epsilon", "penalty")
        })
        self.assertTrue(1e-4 <= params["alpha"] <= 0.5)
        self.assertTrue(0.01 <= params["epsilon"] <= 0.05)
        self.assertTrue(params["penalty"] in ("elasticnet", "l1"))
コード例 #27
0
def tune_ray(clf,
             params,
             X_train,
             y_train,
             X_test,
             y_test,
             n_params=-1,
             max_epochs=-1,
             n_jobs=4):
    common = dict(random_state=42)
    split = ShuffleSplit(test_size=0.20, n_splits=1, random_state=42)
    clf = clone(clf).set_params(prefix="ray")
    from tune_sklearn import TuneSearchCV

    search = TuneSearchCV(
        clf,
        params,
        cv=split,
        early_stopping=True,
        max_iters=max_epochs,
        n_iter=n_params,
        random_state=42,
        refit=False,
    )

    start = time()
    search.fit(X_train, y_train)
    fit_time = time() - start

    data = {
        "library": "ray",
        "fit_time": fit_time,
        "start_time": start,
        "n_params": n_params,
        "n_jobs": n_jobs,
        "max_epochs": max_epochs,
    }
    return search, data
コード例 #28
0
    def test_timeout(self):
        X, y = make_classification(n_samples=50,
                                   n_features=50,
                                   n_informative=3,
                                   random_state=0)

        clf = SleepClassifier()
        # SleepClassifier sleeps for `foo_param` seconds, `cv` times.
        # Thus, the time budget is exhausted after testing the first two
        # `foo_param`s.
        search = TuneSearchCV(clf, {"foo_param": [1.1, 1.2, 2.5]},
                              time_budget_s=5.0,
                              cv=2,
                              max_iters=5,
                              early_stopping=True)

        start = time.time()
        search.fit(X, y)
        taken = time.time() - start

        print(search)
        # Without timeout we would need over 50 seconds for this to
        # finish. Allow for some initialization overhead
        self.assertLess(taken, 25.0)
コード例 #29
0
    def _test_seed_run(self, search_optimization, seed):
        digits = datasets.load_digits()

        x = digits.data
        y = digits.target

        parameters = {
            "classify__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1],
            "classify__epsilon": [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
        }

        pipe = Pipeline([("reduce_dim", PCA()), ("classify", SGDClassifier())])

        if isinstance(seed, str):
            _seed = np.random.RandomState(seed=int(seed))
        else:
            _seed = seed
        tune_search_1 = TuneSearchCV(pipe,
                                     parameters.copy(),
                                     early_stopping=True,
                                     max_iters=1,
                                     search_optimization=search_optimization,
                                     random_state=_seed)
        tune_search_1.fit(x, y)

        if isinstance(seed, str):
            _seed = np.random.RandomState(seed=int(seed))
        else:
            _seed = seed
        tune_search_2 = TuneSearchCV(pipe,
                                     parameters.copy(),
                                     early_stopping=True,
                                     max_iters=1,
                                     search_optimization=search_optimization,
                                     random_state=_seed)
        tune_search_2.fit(x, y)

        try:
            self.assertSequenceEqual(tune_search_1.cv_results_["params"],
                                     tune_search_2.cv_results_["params"])
        except AssertionError:
            print(f"Seeds: {tune_search_1.seed} == {tune_search_2.seed}?")
            raise
コード例 #30
0
    def test_warm_start_error(self):
        parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
        from sklearn.ensemble import VotingClassifier, RandomForestClassifier
        clf = VotingClassifier(estimators=[(
            "rf", RandomForestClassifier(n_estimators=50, random_state=0))])
        tune_search = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            early_stopping=False,
            max_iters=10,
            local_dir="./test-result")
        self.assertFalse(tune_search._can_early_stop())
        with self.assertRaises(ValueError):
            tune_search = TuneSearchCV(
                clf,
                parameter_grid,
                n_jobs=1,
                early_stopping=True,
                max_iters=10,
                local_dir="./test-result")

        from sklearn.linear_model import LogisticRegression
        clf = LogisticRegression()
        with self.assertRaises(ValueError):
            parameter_grid = {"max_iter": [1, 2]}
            TuneSearchCV(
                clf,
                parameter_grid,
                early_stopping=True,
                n_jobs=1,
                max_iters=10,
                local_dir="./test-result")

        from sklearn.ensemble import RandomForestClassifier
        clf = RandomForestClassifier()
        with self.assertRaises(ValueError):
            parameter_grid = {"n_estimators": [1, 2]}
            TuneSearchCV(
                clf,
                parameter_grid,
                early_stopping=True,
                n_jobs=1,
                max_iters=10,
                local_dir="./test-result")