def test_fit(self): """Tests GridSearchCV fit().""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) param_grid = {'n_estimators': (2, 4), 'max_depth': range(3, 5)} rf = RandomForestClassifier() searcher = GridSearchCV(rf, param_grid) searcher.fit(x, y) expected_keys = { 'param_max_depth', 'param_n_estimators', 'params', 'mean_test_score', 'std_test_score', 'rank_test_score' } split_keys = {'split%d_test_score' % i for i in range(5)} expected_keys.update(split_keys) self.assertSetEqual(set(searcher.cv_results_.keys()), expected_keys) expected_params = [(3, 2), (3, 4), (4, 2), (4, 4)] for params in searcher.cv_results_['params']: m = params['max_depth'] n = params['n_estimators'] self.assertIn((m, n), expected_params) expected_params.remove((m, n)) self.assertEqual(len(expected_params), 0) self.assertTrue(hasattr(searcher, 'best_estimator_')) self.assertTrue(hasattr(searcher, 'best_score_')) self.assertTrue(hasattr(searcher, 'best_params_')) self.assertTrue(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_')) self.assertEqual(searcher.n_splits_, 5)
def main(): x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) parameters = { 'n_estimators': (1, 2, 4, 8, 16, 32), 'max_depth': range(3, 5) } rf = RandomForestClassifier() searcher = GridSearchCV(rf, parameters, cv=5) np.random.seed(0) searcher.fit(x, y) print(searcher.cv_results_['params']) print(searcher.cv_results_['mean_test_score']) pd_df = pd.DataFrame.from_dict(searcher.cv_results_) print(pd_df[['params', 'mean_test_score']]) with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(pd_df) print(searcher.best_estimator_) print(searcher.best_score_) print(searcher.best_params_) print(searcher.best_index_) print(searcher.scorer_) print(searcher.n_splits_)
def test_cv_invalid(self): """Tests GridSearchCV with invalid cv parameter.""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) rf = RandomForestClassifier() param_grid = {'n_estimators': (2, 4)} with self.assertRaises(ValueError): searcher = GridSearchCV(rf, param_grid, cv={}) searcher.fit(x, y)
def test_scoring_callable(self): """Tests GridSearchCV with callable scoring parameter.""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) param_grid = {'n_estimators': (2, 4)} rf = RandomForestClassifier() def scoring(clf, x_score, y_real): return clf.score(x_score, y_real) searcher = GridSearchCV(rf, param_grid, cv=3, scoring=scoring) searcher.fit(x, y) self.assertTrue(hasattr(searcher, 'cv_results_')) self.assertTrue(hasattr(searcher, 'best_estimator_')) self.assertTrue(hasattr(searcher, 'best_score_')) self.assertTrue(hasattr(searcher, 'best_params_')) self.assertTrue(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_')) def invalid_scoring(clf, x_score, y_score): return '2' searcher = GridSearchCV(rf, param_grid, cv=3, scoring=invalid_scoring) with self.assertRaisesRegex(ValueError, 'scoring must return a number'): searcher.fit(x, y)
def test_cv_class(self): """Tests GridSearchCV with a class cv parameter.""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) rf = RandomForestClassifier() param_grid = {'n_estimators': (2, 4)} searcher = GridSearchCV(rf, param_grid, cv=KFold(4)) searcher.fit(x, y) self.assertTrue(hasattr(searcher, 'cv_results_')) self.assertTrue(hasattr(searcher, 'best_estimator_')) self.assertTrue(hasattr(searcher, 'best_score_')) self.assertTrue(hasattr(searcher, 'best_params_')) self.assertTrue(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_'))
def test_scoring_invalid(self): """Tests GridSearchCV raises error with invalid scoring parameter.""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) param_grid = {'n_estimators': (2, 4)} rf = RandomForestClassifier() searcher = GridSearchCV(rf, param_grid, cv=3, scoring='roc_auc', refit=False) with self.assertRaises(ValueError): searcher.fit(x, y)
def test_fit_2(self): """Tests GridSearchCV fit() with different data.""" x_np, y_np = datasets.load_breast_cancer(return_X_y=True) x = ds.array(x_np, block_size=(100, 10)) x = StandardScaler().fit_transform(x) y = ds.array(y_np.reshape(-1, 1), block_size=(100, 1)) parameters = {'c': [0.1], 'gamma': [0.1]} csvm = CascadeSVM() searcher = GridSearchCV(csvm, parameters, cv=5) searcher.fit(x, y) self.assertTrue(hasattr(searcher, 'best_estimator_')) self.assertTrue(hasattr(searcher, 'best_score_')) self.assertTrue(hasattr(searcher, 'best_params_')) self.assertTrue(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_')) self.assertEqual(searcher.n_splits_, 5)
def test_refit_false(self): """Tests GridSearchCV fit() with refit=False.""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) seed = 0 x, y = shuffle(x, y, random_state=seed) param_grid = {'max_iter': range(1, 5)} csvm = CascadeSVM(check_convergence=False) searcher = GridSearchCV(csvm, param_grid, cv=3, refit=False) searcher.fit(x, y) self.assertFalse(hasattr(searcher, 'best_estimator_')) self.assertTrue(hasattr(searcher, 'best_score_')) self.assertTrue(hasattr(searcher, 'best_params_')) self.assertTrue(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_')) self.assertEqual(searcher.n_splits_, 3)
def test_scoring_dict(self): """Tests GridSearchCV with scoring parameter of type dict.""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) param_grid = {'n_estimators': (2, 4)} rf = RandomForestClassifier() def hard_vote_score(rand_forest, x, y): rand_forest.hard_vote = True score = rand_forest.score(x, y) rand_forest.hard_vote = False return score scoring = {'default_score': None, 'custom_score': hard_vote_score} searcher = GridSearchCV(rf, param_grid, cv=3, scoring=scoring, refit=False) searcher.fit(x, y) self.assertTrue(hasattr(searcher, 'cv_results_')) self.assertFalse(hasattr(searcher, 'best_estimator_')) self.assertFalse(hasattr(searcher, 'best_score_')) self.assertFalse(hasattr(searcher, 'best_params_')) self.assertFalse(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_')) searcher = GridSearchCV(rf, param_grid, cv=3, scoring=scoring, refit=True) with self.assertRaises(ValueError): searcher.fit(x, y)
def test_refit_callable(self): """Tests GridSearchCV with callable refit parameter.""" x_np, y_np = datasets.load_iris(return_X_y=True) x = ds.array(x_np, (30, 4)) y = ds.array(y_np[:, np.newaxis], (30, 1)) param_grid = {'n_estimators': (2, 4)} rf = RandomForestClassifier() best_index = 1 def refit(results): return best_index searcher = GridSearchCV(rf, param_grid, cv=3, refit=refit) searcher.fit(x, y) self.assertTrue(hasattr(searcher, 'cv_results_')) self.assertTrue(hasattr(searcher, 'best_estimator_')) self.assertFalse(hasattr(searcher, 'best_score_')) self.assertTrue(hasattr(searcher, 'best_params_')) self.assertTrue(hasattr(searcher, 'best_index_')) self.assertTrue(hasattr(searcher, 'scorer_')) best_index = 'str' searcher = GridSearchCV(rf, param_grid, cv=3, refit=refit) with self.assertRaises(TypeError): searcher.fit(x, y) best_index = -1 searcher = GridSearchCV(rf, param_grid, cv=3, refit=refit) with self.assertRaises(IndexError): searcher.fit(x, y)