def test_plateau(self):
        try:
            from ray.tune.stopper import TrialPlateauStopper
        except ImportError:
            self.skipTest("`TrialPlateauStopper` not available in "
                          "current Ray version.")
            return

        X, y = make_classification(n_samples=50,
                                   n_features=50,
                                   n_informative=3,
                                   random_state=0)

        clf = PlateauClassifier(converge_after=4)

        stopper = TrialPlateauStopper(metric="objective")

        search = TuneGridSearchCV(clf, {"foo_param": [2.0, 3.0, 4.0]},
                                  cv=2,
                                  max_iters=20,
                                  stopper=stopper,
                                  early_stopping=True)

        search.fit(X, y)

        print(search.cv_results_)

        for iters in search.cv_results_["training_iteration"]:
            # Converges after 4 iterations, but the stopper needs another
            # 4 to detect it converged.
            self.assertLessEqual(iters, 8)
    def test_grid_search_groups(self):
        # Check if ValueError (when groups is None) propagates to
        # dcv.GridSearchCV
        # And also check if groups is correctly passed to the cv object
        rng = np.random.RandomState(0)

        X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
        groups = rng.randint(0, 3, 15)

        clf = LinearSVC(random_state=0)
        grid = {"C": [1]}

        group_cvs = [
            LeaveOneGroupOut(),
            LeavePGroupsOut(2),
            GroupKFold(n_splits=3),
            GroupShuffleSplit(n_splits=3),
        ]
        for cv in group_cvs:
            gs = TuneGridSearchCV(clf, grid, cv=cv)
            with self.assertRaises(ValueError) as exc:
                gs.fit(X, y)
            self.assertTrue(
                "parameter should not be None" in str(exc.exception))

            gs.fit(X, y, groups=groups)

        non_group_cvs = [
            StratifiedKFold(n_splits=3),
            StratifiedShuffleSplit(n_splits=3)
        ]
        for cv in non_group_cvs:
            gs = TuneGridSearchCV(clf, grid, cv=cv)
            # Should not raise an error
            gs.fit(X, y)
Exemple #3
0
    def test_no_refit(self):
        # Test that GSCV can be used for model selection alone without
        # refitting
        clf = MockClassifier()
        grid_search = TuneGridSearchCV(clf, {"foo_param": [1, 2, 3]},
                                       refit=False,
                                       cv=3)
        grid_search.fit(X, y)
        self.assertFalse(hasattr(grid_search, "best_estimator_"))
        self.assertFalse(hasattr(grid_search, "best_index_"))
        self.assertFalse(hasattr(grid_search, "best_score_"))
        self.assertFalse(hasattr(grid_search, "best_params_"))

        # Make sure the predict/transform etc fns raise meaningful error msg
        for fn_name in (
                "predict",
                "predict_proba",
                "predict_log_proba",
                "transform",
                "inverse_transform",
        ):
            with self.assertRaises(NotFittedError) as exc:
                getattr(grid_search, fn_name)(X)
            self.assertTrue(
                ("refit=False. %s is available only after refitting on the "
                 "best parameters" % fn_name) in str(exc.exception))
Exemple #4
0
    def test_grid_search_bad_param_grid(self):
        param_dict = {"C": 1.0}
        clf = SVC()

        with self.assertRaises(ValueError) as exc:
            TuneGridSearchCV(clf, param_dict)
        self.assertTrue(("Parameter grid for parameter (C) needs to"
                         " be a list or numpy array") in str(exc.exception))

        param_dict = {"C": []}
        clf = SVC()

        with self.assertRaises(ValueError) as exc:
            TuneGridSearchCV(clf, param_dict)
        self.assertTrue(
            ("Parameter values for parameter (C) need to be a non-empty "
             "sequence.") in str(exc.exception))

        param_dict = {"C": "1,2,3"}
        clf = SVC()

        with self.assertRaises(ValueError) as exc:
            TuneGridSearchCV(clf, param_dict)
        self.assertTrue(("Parameter grid for parameter (C) needs to"
                         " be a list or numpy array") in str(exc.exception))

        param_dict = {"C": np.ones(6).reshape(3, 2)}
        clf = SVC()
        with self.assertRaises(ValueError):
            TuneGridSearchCV(clf, param_dict)
Exemple #5
0
    def test_digits(self):
        # Loading the Digits dataset
        digits = datasets.load_digits()

        # To apply an classifier on this data, we need to flatten the image, to
        # turn the data in a (samples, feature) matrix:
        n_samples = len(digits.images)
        X = digits.images.reshape((n_samples, -1))
        y = digits.target

        # Split the dataset in two equal parts
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            random_state=0)

        # Set the parameters by cross-validation
        tuned_parameters = {
            "kernel": ["rbf"],
            "gamma": [1e-3, 1e-4],
            "C": [1, 10, 100, 1000]
        }

        tune_search = TuneGridSearchCV(SVC(), tuned_parameters, max_iters=20)
        tune_search.fit(X_train, y_train)

        pred = tune_search.predict(X_test)
        print(pred)
        accuracy = np.count_nonzero(
            np.array(pred) == np.array(y_test)) / len(pred)
        print(accuracy)
Exemple #6
0
    def test_diabetes(self):
        # load the diabetes datasets
        dataset = datasets.load_diabetes()
        X = dataset.data
        y = dataset.target
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            random_state=0)
        # prepare a range of alpha values to test
        alphas = np.array([1, 0.1, 0.01, 0.001, 0.0001, 0])
        param_grid = dict(alpha=alphas)
        # create and fit a ridge regression model, testing each alpha
        model = linear_model.Ridge()

        tune_search = TuneGridSearchCV(
            model,
            param_grid,
        )
        tune_search.fit(X_train, y_train)

        pred = tune_search.predict(X_test)
        print(pred)
        error = sum(np.array(pred) - np.array(y_test)) / len(pred)
        print(error)
Exemple #7
0
    def test_gridsearch_multi_cv_results(self):
        parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}

        scoring = ("accuracy", "f1_micro")
        cv = 2

        tune_search = TuneGridSearchCV(
            SGDClassifier(),
            parameter_grid,
            scoring=scoring,
            max_iters=20,
            refit=False,
            cv=cv)
        tune_search.fit(X, y)
        result = tune_search.cv_results_

        keys_to_check = []

        for s in scoring:
            keys_to_check.append("mean_test_%s" % s)
            for i in range(cv):
                keys_to_check.append("split%d_test_%s" % (i, s))

        for key in keys_to_check:
            self.assertIn(key, result)
Exemple #8
0
 def test_grid_search_precomputed_kernel_error_nonsquare(self):
     # Test that grid search returns an error with a non-square precomputed
     # training kernel matrix
     K_train = np.zeros((10, 20))
     y_train = np.ones((10, ))
     clf = SVC(kernel="precomputed")
     cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
     with self.assertRaises(TuneError):
         cv.fit(K_train, y_train)
Exemple #9
0
    def test_y_as_list(self):
        # Pass y as list in dcv.GridSearchCV
        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)

        clf = CheckingClassifier(check_y=lambda x: isinstance(x, list))
        cv = KFold(n_splits=3)
        grid_search = TuneGridSearchCV(clf, {"foo_param": [1, 2, 3]}, cv=cv)
        grid_search.fit(X, y.tolist()).score(X, y)
        self.assertTrue(hasattr(grid_search, "cv_results_"))
Exemple #10
0
 def test_gridsearch_nd(self):
     # Pass X as list in dcv.GridSearchCV
     X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
     y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
     clf = CheckingClassifier(
         check_X=lambda x: x.shape[1:] == (5, 3, 2),
         check_y=lambda x: x.shape[1:] == (7, 11),
     )
     grid_search = TuneGridSearchCV(clf, {"foo_param": [1, 2, 3]}, cv=3)
     grid_search.fit(X_4d, y_3d).score(X, y)
     self.assertTrue(hasattr(grid_search, "cv_results_"))
Exemple #11
0
    def test_grid_search_error(self):
        # Test that grid search will capture errors on data with different
        # length
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        with self.assertRaises(TuneError):
            cv.fit(X_[:180], y_)
Exemple #12
0
    def test_refit(self):
        # Regression test for bug in refitting
        # Simulates re-fitting a broken estimator; this used to break with
        # sparse SVMs.
        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)

        clf = TuneGridSearchCV(BrokenClassifier(), {"parameter": [0, 1]},
                               scoring="accuracy",
                               refit=True)
        clf.fit(X, y)
 def test_grid_search_precomputed_kernel_error_nonsquare(self):
     # Test that grid search returns an error with a non-square precomputed
     # training kernel matrix
     K_train = np.zeros((10, 20))
     y_train = np.ones((10, ))
     clf = SVC(kernel="precomputed")
     cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
     # with self.assertRaises(TuneError):
     with self.assertRaises(ValueError) as exc:
         cv.fit(K_train, y_train)
     self.assertTrue(
         ("X should be a square kernel matrix") in str(exc.exception))
Exemple #14
0
    def test_grid_search_error(self):
        # Test that grid search will capture errors on data with different
        # length
        X_, y_ = make_classification(
            n_samples=200, n_features=100, random_state=0)

        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        with self.assertRaises(ValueError) as exc:
            cv.fit(X_[:180], y_)
        self.assertTrue(("Found input variables with inconsistent numbers of "
                         "samples: [180, 200]") in str(exc.exception))
Exemple #15
0
    def test_trivial_cv_results_attr(self):
        # Test search over a "grid" with only one point.
        # Non-regression test: grid_scores_ wouldn't be set by
        # dcv.GridSearchCV.
        clf = MockClassifier()
        grid_search = TuneGridSearchCV(clf, {"foo_param": [1]}, cv=3)
        grid_search.fit(X, y)
        self.assertTrue(hasattr(grid_search, "cv_results_"))

        random_search = TuneSearchCV(clf, {"foo_param": [0]}, n_iter=1, cv=3)
        random_search.fit(X, y)
        self.assertTrue(hasattr(random_search, "cv_results_"))
Exemple #16
0
    def test_grid_search_one_grid_point(self):
        X_, y_ = make_classification(
            n_samples=200, n_features=100, random_state=0)
        param_dict = {"C": [1.0], "kernel": ["rbf"], "gamma": [0.1]}

        clf = SVC()
        cv = TuneGridSearchCV(clf, param_dict)
        cv.fit(X_, y_)

        clf = SVC(C=1.0, kernel="rbf", gamma=0.1)
        clf.fit(X_, y_)

        assert_array_equal(clf.dual_coef_, cv.best_estimator_.dual_coef_)
Exemple #17
0
    def test_grid_search_error(self):
        # Test that grid search will capture errors on data with different
        # length
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        with self.assertLogs("ray.tune") as cm:
            cv.fit(X_[:180], y_)
        self.assertTrue(("ValueError: Found input variables with inconsistent "
                         "numbers of samples: [180, 200]") in str(cm.output))
Exemple #18
0
    def test_local_mode(self):
        # Pass X as list in dcv.GridSearchCV
        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)

        clf = CheckingClassifier(check_X=lambda x: isinstance(x, list))
        cv = KFold(n_splits=3)
        with patch.object(ray, "init", wraps=ray.init) as wrapped_init:
            grid_search = TuneGridSearchCV(
                clf, {"foo_param": [1, 2, 3]}, n_jobs=1, cv=cv)
            grid_search.fit(X.tolist(), y).score(X, y)

        self.assertTrue(hasattr(grid_search, "cv_results_"))
        self.assertTrue(wrapped_init.call_args[1]["local_mode"])
Exemple #19
0
    def test_gridsearch_no_predict(self):
        # test grid-search with an estimator without predict.
        # slight duplication of a test from KDE
        def custom_scoring(estimator, X):
            return 42 if estimator.bandwidth == 0.1 else 0

        X, _ = make_blobs(
            cluster_std=0.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
        search = TuneGridSearchCV(
            KernelDensity(),
            param_grid=dict(bandwidth=[0.01, 0.1, 1]),
            scoring=custom_scoring,
        )
        search.fit(X)
        self.assertEqual(search.best_params_["bandwidth"], 0.1)
        self.assertEqual(search.best_score_, 42)
 def test_tune_search_spaces(self):
     # Test mixed search spaces
     clf = MockClassifier()
     foo = [1, 2, 3]
     bar = [1, 2]
     grid_search = TuneGridSearchCV(clf, {
         "foo_param": tune.grid_search(foo),
         "bar_param": bar
     },
                                    refit=False,
                                    cv=3)
     grid_search.fit(X, y)
     params = grid_search.cv_results_["params"]
     results_grid = {k: {dic[k] for dic in params} for k in params[0]}
     self.assertTrue(len(results_grid["foo_param"]) == len(foo))
     self.assertTrue(len(results_grid["bar_param"]) == len(bar))
Exemple #21
0
    def sweep(self, X, y):

        optimizers = ["rmsprop", "adam"]
        kernel_initializer = ["glorot_uniform", "normal"]
        epochs = [5, 10]
        param_grid = dict(optimizer=optimizers,
                          nb_epoch=epochs,
                          kernel_initializer=kernel_initializer)
        grid = TuneGridSearchCV(
            estimator=self.model,
            param_grid=param_grid,
            scoring="neg_mean_squared_error",
        )
        grid_result = grid.fit(X, y)

        return grid_result
Exemple #22
0
    def test_gridsearch_no_multi_cv_results(self):
        parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}

        cv = 2

        tune_search = TuneGridSearchCV(
            SGDClassifier(), parameter_grid, max_iters=20, refit=False, cv=cv)
        tune_search.fit(X, y)
        result = tune_search.cv_results_

        keys_to_check = ["mean_test_score"]

        for i in range(cv):
            keys_to_check.append("split%d_test_score" % i)

        for key in keys_to_check:
            self.assertIn(key, result)
Exemple #23
0
    def test_grid_search_precomputed_kernel(self):
        # Test that grid search works when the input features are given in the
        # form of a precomputed kernel matrix
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        # compute the training kernel matrix corresponding to the linear kernel
        K_train = np.dot(X_[:180], X_[:180].T)
        y_train = y_[:180]

        clf = SVC(kernel="precomputed")
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        cv.fit(K_train, y_train)

        self.assertTrue(cv.best_score_ >= 0)

        # compute the test kernel matrix
        K_test = np.dot(X_[180:], X_[:180].T)
        y_test = y_[180:]

        y_pred = cv.predict(K_test)

        self.assertTrue(np.mean(y_pred == y_test) >= 0)

        # test error is raised when the precomputed kernel is not array-like
        # or sparse
        with self.assertRaises(TuneError):
            cv.fit(K_train.tolist(), y_train)
    def test_timeout(self):
        clf = SleepClassifier()
        # SleepClassifier sleeps for `foo_param` seconds, `cv` times.
        # Thus, the time budget is exhausted after testing the first two
        # `foo_param`s.
        grid_search = TuneGridSearchCV(clf, {"foo_param": [1.1, 1.2, 2.5]},
                                       time_budget_s=5.0,
                                       cv=2,
                                       max_iters=5,
                                       early_stopping=True)

        start = time.time()
        grid_search.fit(X, y)
        taken = time.time() - start

        print(grid_search)
        # Without timeout we would need over 50 seconds for this to
        # finish. Allow for some initialization overhead
        self.assertLess(taken, 18.0)
Exemple #25
0
    def test_grid_search_sparse(self):
        # Test that grid search works with both dense and sparse matrices
        X_, y_ = make_classification(
            n_samples=200, n_features=100, random_state=0)

        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        cv.fit(X_[:180], y_[:180])
        y_pred = cv.predict(X_[180:])
        C = cv.best_estimator_.C

        X_ = sp.csr_matrix(X_)
        clf = LinearSVC()
        cv = TuneGridSearchCV(clf, {"C": [0.1, 1.0]})
        cv.fit(X_[:180].tocoo(), y_[:180])
        y_pred2 = cv.predict(X_[180:])
        C2 = cv.best_estimator_.C

        self.assertTrue(np.mean(y_pred == y_pred2) >= 0.9)
        self.assertEqual(C, C2)
    def test_max_iters(self):
        X, y = make_classification(n_samples=50,
                                   n_features=50,
                                   n_informative=3,
                                   random_state=0)

        clf = PlateauClassifier(converge_after=20)

        search = TuneGridSearchCV(clf, {"foo_param": [2.0, 3.0, 4.0]},
                                  cv=2,
                                  max_iters=6,
                                  early_stopping=True)

        search.fit(X, y)

        print(search.cv_results_)

        for iters in search.cv_results_["training_iteration"]:
            # Stop after 6 iterations.
            self.assertLessEqual(iters, 6)
Exemple #27
0
    def test_pandas_input(self):
        # check cross_val_score doesn't destroy pandas dataframe
        types = [(MockDataFrame, MockDataFrame)]
        try:
            from pandas import Series, DataFrame

            types.append((DataFrame, Series))
        except ImportError:
            pass

        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)

        for InputFeatureType, TargetType in types:
            # X dataframe, y series
            X_df, y_ser = InputFeatureType(X), TargetType(y)
            clf = CheckingClassifier(
                check_X=lambda x: isinstance(x, InputFeatureType),
                check_y=lambda x: isinstance(x, TargetType),
            )

            grid_search = TuneGridSearchCV(clf, {"foo_param": [1, 2, 3]})
            grid_search.fit(X_df, y_ser).score(X_df, y_ser)
            grid_search.predict(X_df)
            self.assertTrue(hasattr(grid_search, "cv_results_"))
Exemple #28
0
    def test_unsupervised_grid_search(self):
        # test grid-search with unsupervised estimator
        X, y = make_blobs(random_state=0)
        km = KMeans(random_state=0)
        grid_search = TuneGridSearchCV(km,
                                       param_grid=dict(n_clusters=[2, 3, 4]),
                                       scoring="adjusted_rand_score")
        grid_search.fit(X, y)
        # ARI can find the right number :)
        self.assertEqual(grid_search.best_params_["n_clusters"], 3)

        # Now without a score, and without y
        grid_search = TuneGridSearchCV(km,
                                       param_grid=dict(n_clusters=[2, 3, 4]))
        grid_search.fit(X)
        self.assertEqual(grid_search.best_params_["n_clusters"], 4)
Exemple #29
0
        self.dense0 = nn.Linear(20, num_units)
        self.nonlin = nonlin
        self.dropout = nn.Dropout(0.5)
        self.dense1 = nn.Linear(num_units, 10)
        self.output = nn.Linear(10, 2)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = F.relu(self.dense1(X))
        X = F.softmax(self.output(X))
        return X


net = NeuralNetClassifier(
    MyModule,
    max_epochs=10,
    lr=0.1,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
)

params = {
    "lr": [0.01, 0.02],
    "module__num_units": [10, 20],
}

gs = TuneGridSearchCV(net, params, scoring="accuracy")
gs.fit(X, y)
print(gs.best_score_, gs.best_params_)
Y_test = np_utils.to_categorical(y_test, nb_classes)


def create_model(optimizer="rmsprop", kernel_initializer="glorot_uniform"):
    model = Sequential()
    model.add(Dense(512, input_shape=(784, )))
    model.add(Activation("relu"))
    model.add(Dropout(0.2))
    model.add(Dense(512, kernel_initializer=kernel_initializer))
    model.add(Activation("relu"))
    model.add(Dropout(0.2))
    model.add(Dense(10, kernel_initializer=kernel_initializer))
    model.add(Activation("softmax"))  # This special "softmax" a
    model.compile(loss="binary_crossentropy",
                  optimizer=optimizer,
                  metrics=["accuracy"])
    return model


model = KerasClassifier(build_fn=create_model)
optimizers = ["rmsprop", "adam"]
kernel_initializer = ["glorot_uniform", "normal"]
epochs = [5, 10]
param_grid = dict(optimizer=optimizers,
                  nb_epoch=epochs,
                  kernel_initializer=kernel_initializer)
grid = TuneGridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train)
print(grid_result.best_params_)
print(grid_result.cv_results_)