def test_multi_best_regression(self): x, y = make_regression(n_samples=100, n_features=10, n_informative=5) model = SGDRegressor() parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]} scoring = ("neg_mean_absolute_error", "neg_mean_squared_error") search_methods = ["random", "bayesian", "hyperopt", "bohb"] for search_method in search_methods: tune_search = TuneSearchCV( model, parameter_grid, scoring=scoring, search_optimization=search_method, cv=2, n_trials=3, n_jobs=1, refit="neg_mean_absolute_error") tune_search.fit(x, y) self.assertAlmostEqual( tune_search.best_score_, max(tune_search.cv_results_[ "mean_test_neg_mean_absolute_error"]), places=10) p = tune_search.cv_results_["params"] scores = tune_search.cv_results_[ "mean_test_neg_mean_absolute_error"] cv_best_param = max( list(zip(scores, p)), key=lambda pair: pair[0])[1] self.assertEqual(tune_search.best_params_, cv_best_param)
def test_multi_best_classification_scoring_dict(self): digits = datasets.load_digits() x = digits.data y = digits.target model = SGDClassifier() parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]} scoring = {"acc": "accuracy", "f1": "f1_micro"} search_methods = ["random", "bayesian", "hyperopt", "bohb"] for search_method in search_methods: tune_search = TuneSearchCV( model, parameter_grid, scoring=scoring, search_optimization=search_method, cv=2, n_trials=3, n_jobs=1, refit="acc") tune_search.fit(x, y) self.assertAlmostEqual( tune_search.best_score_, max(tune_search.cv_results_["mean_test_acc"]), places=10) p = tune_search.cv_results_["params"] scores = tune_search.cv_results_["mean_test_acc"] cv_best_param = max( list(zip(scores, p)), key=lambda pair: pair[0])[1] self.assertEqual(tune_search.best_params_, cv_best_param)
def test_multi_refit_false(self): digits = datasets.load_digits() x = digits.data y = digits.target model = SGDClassifier() parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]} scoring = ("accuracy", "f1_micro") tune_search = TuneSearchCV( model, parameter_grid, scoring=scoring, search_optimization="random", cv=2, n_trials=3, n_jobs=1, refit=False) tune_search.fit(x, y) with self.assertRaises(ValueError) as exc: tune_search.best_score_ self.assertTrue(("instance was initialized with refit=False. " "For multi-metric evaluation,") in str(exc.exception)) with self.assertRaises(ValueError) as exc: tune_search.best_index_ self.assertTrue(("instance was initialized with refit=False. " "For multi-metric evaluation,") in str(exc.exception)) with self.assertRaises(ValueError) as exc: tune_search.best_params_ self.assertTrue(("instance was initialized with refit=False. " "For multi-metric evaluation,") in str(exc.exception))
def test_multi_best(self): digits = datasets.load_digits() x = digits.data y = digits.target parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]} scoring = ("accuracy", "f1_micro") tune_search = TuneSearchCV( SGDClassifier(), parameter_grid, scoring=scoring, max_iters=20, refit="accuracy") tune_search.fit(x, y) self.assertAlmostEqual( tune_search.best_score_, max(tune_search.cv_results_["mean_test_accuracy"]), places=10) p = tune_search.cv_results_["params"] scores = tune_search.cv_results_["mean_test_accuracy"] cv_best_param = max(list(zip(scores, p)), key=lambda pair: pair[0])[1] self.assertEqual(tune_search.best_params_, cv_best_param)
def test_pipeline_early_stop(self): digits = datasets.load_digits() x = digits.data y = digits.target pipe = Pipeline([("reduce_dim", PCA()), ("classify", SGDClassifier())]) parameter_grid = [ { "classify__alpha": [1e-4, 1e-1, 1], "classify__epsilon": [0.01, 0.1] }, ] with self.assertRaises(ValueError) as exc: TuneSearchCV( pipe, parameter_grid, early_stopping=True, pipeline_auto_early_stop=False, max_iters=10) self.assertTrue(( "Early stopping is not supported because the estimator does " "not have `partial_fit`, does not support warm_start, or " "is a tree classifier. Set `early_stopping=False`." ) in str(exc.exception)) tune_search = TuneSearchCV( pipe, parameter_grid, early_stopping=True, max_iters=10) tune_search.fit(x, y)
def test_plateau(self): try: from ray.tune.stopper import TrialPlateauStopper except ImportError: self.skipTest("`TrialPlateauStopper` not available in " "current Ray version.") return X, y = make_classification(n_samples=50, n_features=50, n_informative=3, random_state=0) clf = PlateauClassifier(converge_after=4) stopper = TrialPlateauStopper(metric="objective") search = TuneSearchCV(clf, {"foo_param": [2.0, 3.0, 4.0]}, cv=2, max_iters=20, stopper=stopper, early_stopping=True) search.fit(X, y) print(search.cv_results_) for iters in search.cv_results_["training_iteration"]: # Converges after 4 iterations, but the stopper needs another # 4 to detect it converged. self.assertLessEqual(iters, 8)
def sweep( self, params: Dict, X, y, search_algorithm: str = "bayesian", num_trials: int = 3, scoring_func: str = "r2", ): from tune_sklearn import TuneGridSearchCV, TuneSearchCV X, y = ( torch.tensor(X).float().to(device=self.device), torch.tensor(y).float().to(device=self.device), ) tune_search = TuneSearchCV( self.model, params, search_optimization=search_algorithm, n_trials=num_trials, early_stopping=True, scoring=scoring_func, ) tune_search.fit(X, y) return tune_search
def tune_remote(x_train, y_train): clf = RandomForestClassifier() param_distributions = { "n_estimators": randint(20, 80), "max_depth": randint(2, 10) } tune_search = TuneSearchCV(clf, param_distributions, n_trials=3) tune_search.fit(x_train, y_train) return tune_search
def test_trivial_cv_results_attr(self): # Test search over a "grid" with only one point. # Non-regression test: grid_scores_ wouldn't be set by # dcv.GridSearchCV. clf = MockClassifier() grid_search = TuneGridSearchCV(clf, {"foo_param": [1]}, cv=3) grid_search.fit(X, y) self.assertTrue(hasattr(grid_search, "cv_results_")) random_search = TuneSearchCV(clf, {"foo_param": [0]}, n_iter=1, cv=3) random_search.fit(X, y) self.assertTrue(hasattr(random_search, "cv_results_"))
def sweep(self, params: Dict, X, y): tune_search = TuneSearchCV( self.model, param_distributions=params, n_trials=3, # early_stopping=True, # use_gpu=True ) tune_search.fit(X, y) return tune_search
def sweep(self, X, y, params: Dict = None): if not params: raise NotImplementedError tune_search = TuneSearchCV( self.model, param_distributions=params, n_trials=3, # early_stopping=True, # use_gpu=True ) tune_search.fit(X, y) return tune_search
def test_local_mode(self): digits = datasets.load_digits() x = digits.data y = digits.target clf = SGDClassifier() parameter_grid = { "alpha": Real(1e-4, 1e-1, 1), "epsilon": Real(0.01, 0.1) } tune_search = TuneSearchCV(clf, parameter_grid, n_jobs=1, max_iters=10, local_dir="./test-result") import ray with patch.object(ray, "init", wraps=ray.init) as wrapped_init: tune_search.fit(x, y) self.assertTrue(wrapped_init.call_args[1]["local_mode"])
def _test_seed_run(self, search_optimization, seed): digits = datasets.load_digits() x = digits.data y = digits.target parameters = { "classify__alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1], "classify__epsilon": [0.01, 0.02, 0.03, 0.04, 0.05, 0.06] } pipe = Pipeline([("reduce_dim", PCA()), ("classify", SGDClassifier())]) if isinstance(seed, str): _seed = np.random.RandomState(seed=int(seed)) else: _seed = seed tune_search_1 = TuneSearchCV(pipe, parameters.copy(), early_stopping=True, max_iters=1, search_optimization=search_optimization, random_state=_seed) tune_search_1.fit(x, y) if isinstance(seed, str): _seed = np.random.RandomState(seed=int(seed)) else: _seed = seed tune_search_2 = TuneSearchCV(pipe, parameters.copy(), early_stopping=True, max_iters=1, search_optimization=search_optimization, random_state=_seed) tune_search_2.fit(x, y) try: self.assertSequenceEqual(tune_search_1.cv_results_["params"], tune_search_2.cv_results_["params"]) except AssertionError: print(f"Seeds: {tune_search_1.seed} == {tune_search_2.seed}?") raise
def test_max_iters(self): X, y = make_classification(n_samples=50, n_features=50, n_informative=3, random_state=0) clf = PlateauClassifier(converge_after=20) search = TuneSearchCV(clf, {"foo_param": [2.0, 3.0, 4.0]}, cv=2, max_iters=6, early_stopping=True) search.fit(X, y) print(search.cv_results_) for iters in search.cv_results_["training_iteration"]: # Stop after 6 iterations. self.assertLessEqual(iters, 6)
def _test_method(self, search_method): digits = datasets.load_digits() x = digits.data y = digits.target tune_search = TuneSearchCV(self.clf, self.parameter_grid, search_optimization=search_method, cv=2, n_trials=3, n_jobs=1, refit=True) tune_search.fit(x, y) params = tune_search.best_estimator_.get_params() print({ k: v for k, v in params.items() if k in ("alpha", "epsilon", "penalty") }) self.assertTrue(1e-4 <= params["alpha"] <= 0.5) self.assertTrue(0.01 <= params["epsilon"] <= 0.05) self.assertTrue(params["penalty"] in ("elasticnet", "l1"))
def test_local_dir(self): digits = datasets.load_digits() x = digits.data y = digits.target clf = SGDClassifier() parameter_grid = { "alpha": Real(1e-4, 1e-1, 1), "epsilon": Real(0.01, 0.1) } scheduler = MedianStoppingRule(grace_period=10.0) tune_search = TuneSearchCV(clf, parameter_grid, early_stopping=scheduler, max_iters=10, local_dir="./test-result") tune_search.fit(x, y) self.assertTrue(len(os.listdir("./test-result")) != 0)
def tune_ray(clf, params, X_train, y_train, X_test, y_test, n_params=-1, max_epochs=-1, n_jobs=4): common = dict(random_state=42) split = ShuffleSplit(test_size=0.20, n_splits=1, random_state=42) clf = clone(clf).set_params(prefix="ray") from tune_sklearn import TuneSearchCV search = TuneSearchCV( clf, params, cv=split, early_stopping=True, max_iters=max_epochs, n_iter=n_params, random_state=42, refit=False, ) start = time() search.fit(X_train, y_train) fit_time = time() - start data = { "library": "ray", "fit_time": fit_time, "start_time": start, "n_params": n_params, "n_jobs": n_jobs, "max_epochs": max_epochs, } return search, data
def test_timeout(self): X, y = make_classification(n_samples=50, n_features=50, n_informative=3, random_state=0) clf = SleepClassifier() # SleepClassifier sleeps for `foo_param` seconds, `cv` times. # Thus, the time budget is exhausted after testing the first two # `foo_param`s. search = TuneSearchCV(clf, {"foo_param": [1.1, 1.2, 2.5]}, time_budget_s=5.0, cv=2, max_iters=5, early_stopping=True) start = time.time() search.fit(X, y) taken = time.time() - start print(search) # Without timeout we would need over 50 seconds for this to # finish. Allow for some initialization overhead self.assertLess(taken, 25.0)
y = cancer.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = lgb.LGBMClassifier() param_dists = { "n_estimators": [400, 700, 1000], "colsample_bytree": [0.7, 0.8], "max_depth": [15, 20, 25], "num_leaves": [50, 100, 200], "reg_alpha": [1.1, 1.2, 1.3], "reg_lambda": [1.1, 1.2, 1.3], "min_split_gain": [0.3, 0.4], "subsample": [0.7, 0.8, 0.9], "subsample_freq": [20] } gs = TuneSearchCV(model, param_dists, n_trials=5, scoring="accuracy") gs.fit(X_train, y_train) print(gs.cv_results_) pred = gs.predict(X_test) correct = 0 for i in range(len(y_test)): if pred[i] == y_test[i]: correct += 1 print("Accuracy:", correct / len(pred))
from tune_sklearn import TuneSearchCV # Other imports from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.linear_model import SGDClassifier # Set training and validation sets X, y = make_classification(n_samples=11000, n_features=1000, n_informative=50, n_redundant=0, n_classes=10, class_sep=2.5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000) # Example parameter distributions to tune from SGDClassifier # Note the use of tuples instead if non-random optimization is desired param_dists = {"alpha": (1e-4, 1e-1), "epsilon": (1e-2, 1e-1)} bohb_tune_search = TuneSearchCV( SGDClassifier(), param_distributions=param_dists, n_trials=20, max_iters=100, search_optimization="bohb", ) bohb_tune_search.fit(X_train, y_train)
pipe = Pipeline([("reduce_dim", PCA()), ("classify", SGDClassifier())]) param_grid = [ { "classify__alpha": [1e-4, 1e-1, 1], "classify__epsilon": [0.01, 0.1] }, ] random = TuneSearchCV(pipe, param_grid, search_optimization="random", early_stopping=True, max_iters=10, pipeline_auto_early_stop=True) random.fit(X, y) print(random.cv_results_) grid = TuneGridSearchCV(pipe, param_grid=param_grid, early_stopping=True, max_iters=10, pipeline_auto_early_stop=True) grid.fit(X, y) print(grid.cv_results_) # warm start iter pipe = Pipeline([("reduce_dim", PCA()), ("classify", LogisticRegression(max_iter=1000))])
# A parameter grid for XGBoost params = { "min_child_weight": [1, 5, 10], "gamma": [0.5, 1, 1.5, 2, 5], "subsample": [0.6, 0.8, 1.0], "colsample_bytree": [0.6, 0.8, 1.0], "max_depth": [3, 4, 5], } xgb = XGBClassifier( learning_rate=0.02, n_estimators=50, objective="binary:logistic", nthread=4, # tree_method="gpu_hist" # this enables GPU. # See https://github.com/dmlc/xgboost/issues/2819 ) digit_search = TuneSearchCV( xgb, param_distributions=params, n_trials=3, early_stopping=True, # use_gpu=True # Commented out for testing on github actions, # but this is how you would use gpu ) digit_search.fit(x_train, y_train) print(digit_search.best_params_) print(digit_search.cv_results_)
def sweep( self, params: Dict, X, y, search_algorithm: str = "bayesian", num_trials: int = 3, scoring_func: str = "r2", early_stopping: bool = False, results_csv_path: str = "outputs/results.csv", splitting_criteria: str = "CV", test_indices: Union[None, List[int]] = None, num_splits: int = 5, ) -> pd.DataFrame: if self.scale_data: X, y = self.scalar(X, y) if splitting_criteria.lower() == "cv": cv = None elif splitting_criteria.lower() == "timeseries": cv = TimeSeriesSplit(n_splits=num_splits) elif splitting_criteria.lower() == "grouped": cv = GroupShuffleSplit(n_splits=num_splits) elif splitting_criteria.lower() == "fixed": if type(test_indices) != list: raise ValueError( "fixed split used but no test-indices provided...") cv = PredefinedSplit(test_fold=test_indices) else: raise ValueError( "Unknowing splitting criteria provided: {splitting_criteria}, should be one of [cv, timeseries, grouped]" ) # early stopping only supported for learners that have a # `partial_fit` method from tune_sklearn import TuneSearchCV import mlflow import time mlflow.set_tracking_uri(os.path.join("file:/", os.getcwd(), "outputs")) # start mlflow auto-logging # mlflow.sklearn.autolog() if search_algorithm.lower() == "bohb": early_stopping = True if any([ search_algorithm.lower() in ["bohb", "bayesian", "hyperopt", "optuna"] ]): search = TuneSearchCV( self.model, params, search_optimization=search_algorithm, cv=cv, n_trials=num_trials, early_stopping=early_stopping, scoring=scoring_func, loggers=["csv", "tensorboard"], verbose=1, ) elif search_algorithm == "grid": search = GridSearchCV( self.model, param_grid=params, refit=True, cv=cv, scoring=scoring_func, verbose=1, ) elif search_algorithm == "random": search = RandomizedSearchCV( self.model, param_distributions=params, refit=True, cv=cv, scoring=scoring_func, verbose=1, ) else: raise NotImplementedError( "Search algorithm should be one of grid, hyperopt, bohb, optuna, bayesian, or random" ) # with mlflow.start_run() as run: search.fit(X, y) self.model = search.best_estimator_ results_df = pd.DataFrame(search.cv_results_) if not pathlib.Path(results_csv_path).parent.exists(): pathlib.Path(results_csv_path).parent.mkdir(exist_ok=True, parents=True) final_path = (results_csv_path[:-4] + "_" + time.strftime("%Y%m%d-%H%M%S") + ".csv") logger.info(f"Saving sweeping results to {final_path}") results_df.to_csv(final_path) logger.info(f"Best hyperparams: {search.best_params_}") logger.info(f"Best score: {search.best_score_}") return results_df
from tune_sklearn import TuneSearchCV from sklearn.linear_model import SGDClassifier from sklearn import datasets from sklearn.model_selection import train_test_split from ray.tune.schedulers import MedianStoppingRule import numpy as np digits = datasets.load_digits() x = digits.data y = digits.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2) clf = SGDClassifier() parameter_grid = {"alpha": (1e-4, 1), "epsilon": (0.01, 0.1)} scheduler = MedianStoppingRule(grace_period=10.0) tune_search = TuneSearchCV(clf, parameter_grid, search_optimization="bayesian", n_iter=3, early_stopping=scheduler, max_iters=10) tune_search.fit(x_train, y_train) pred = tune_search.predict(x_test) accuracy = np.count_nonzero(np.array(pred) == np.array(y_test)) / len(pred) print(accuracy)
'reg_lambda':[1e-5, 1e-2, 0.45], 'subsample':[0.6,0.95] } t_search = TuneSearchCV( xgb_model, param_distributions=parameters_for_testing, n_trials=3, early_stopping=True, use_gpu=True # Commented out for testing on github actions, # but this is how you would use gpu ) # gsearch1 = GridSearchCV(estimator = xgb_model, param_grid = parameters_for_testing, n_jobs=6,iid=False, verbose=10,scoring='neg_mean_squared_error') t_search.fit(X_train,y_train) print (t_search.scorer_) print('best params') print (t_search.best_params_) print('best score') print (t_search.best_score_) final_xgb = xgboost.XGBRegressor(colsample_bytree= 0.6, gamma= 0.1, min_child_weight= 1.5, learning_rate= 0.07, max_depth= 5, n_estimators= 1000, reg_alpha= 0.01, reg_lambda= 1e-05, subsample= 0.95) trained = final_xgb.fit(X_train,y_train) y_pred = trained.predict(X_test) def mean_absolute_percentage_error(y_test, y_pred): y_test, y_pred = np.array(y_test), np.array(y_pred) return np.mean(np.abs((y_test - y_pred) / y_test)) * 100
def sweep( self, params: Dict, X, y, search_algorithm: str = "bayesian", num_trials: int = 3, scoring_func: str = "r2", early_stopping: bool = False, results_csv_path: str = "outputs/results.csv", splitting_criteria: str = "timeseries", num_splits: int = 5, ): start_dir = str(pathlib.Path(os.getcwd()).parent) module_dir = str(pathlib.Path(__file__).parent) # temporarily change directory to file directory and then reset os.chdir(module_dir) if self.scale_data: X, y = self.scalar(X, y) X, y = ( torch.tensor(X).float().to(device=self.device), torch.tensor(y).float().to(device=self.device), ) if splitting_criteria.lower() == "cv": cv = None elif splitting_criteria.lower() == "timeseries": cv = TimeSeriesSplit(n_splits=num_splits) elif splitting_criteria.lower() == "grouped": cv = GroupShuffleSplit(n_splits=num_splits) elif splitting_criteria.lower() == "fixed": if type(test_indices) != list: raise ValueError( "fixed split used but no test-indices provided...") cv = PredefinedSplit(test_fold=test_indices) else: raise ValueError( "Unknowing splitting criteria provided: {splitting_criteria}, should be one of [cv, timeseries, grouped]" ) if search_algorithm.lower() == "bohb": early_stopping = True if any([ search_algorithm.lower() in ["bohb", "bayesian", "hyperopt", "optuna"] ]): search = TuneSearchCV( self.model, params, search_optimization=search_algorithm, n_trials=num_trials, early_stopping=early_stopping, scoring=scoring_func, ) elif search_algorithm == "grid": search = GridSearchCV( self.model, param_grid=params, refit=True, cv=num_trials, scoring=scoring_func, ) elif search_algorithm == "random": search = RandomizedSearchCV( self.model, param_distributions=params, refit=True, cv=num_trials, scoring=scoring_func, ) else: raise NotImplementedError( "Search algorithm should be one of grid, hyperopt, bohb, optuna, bayesian, or random" ) with mlflow.start_run() as run: search.fit(X, y) self.model = search.best_estimator_ # set path back to initial os.chdir(start_dir) results_df = pd.DataFrame(search.cv_results_) logger.info(f"Best hyperparams: {search.best_params_}") if not pathlib.Path(results_csv_path).parent.exists(): pathlib.Path(results_csv_path).parent.mkdir(exist_ok=True, parents=True) logger.info(f"Saving sweeping results to {results_csv_path}") logger.info(f"Best score: {search.best_score_}") results_df.to_csv(results_csv_path) cols_keep = [col for col in results_df if "param_" in col] cols_keep += ["mean_test_score"] results_df = results_df[cols_keep] return results_df
def test_random_search_cv_results(self): # Make a dataset with a lot of noise to get various kind of prediction # errors across CV folds and parameter settings X, y = make_classification(n_samples=200, n_features=100, n_informative=3, random_state=0) # scipy.stats dists now supports `seed` but we still support scipy 0.12 # which doesn't support the seed. Hence the assertions in the test for # random_search alone should not depend on randomization. n_splits = 3 n_search_iter = 30 params = dict(C=expon(scale=10), gamma=expon(scale=0.1)) random_search = TuneSearchCV( SVC(), n_iter=n_search_iter, cv=n_splits, param_distributions=params, return_train_score=True, ) random_search.fit(X, y) param_keys = ("param_C", "param_gamma") score_keys = ( "mean_test_score", "mean_train_score", "rank_test_score", "rank_train_score", "split0_test_score", "split1_test_score", "split2_test_score", "split0_train_score", "split1_train_score", "split2_train_score", "std_test_score", "std_train_score", "time_total_s", ) n_cand = n_search_iter def test_check_cv_results_array_types(cv_results, param_keys, score_keys): # Check if the search `cv_results`'s array are of correct types self.assertTrue( all( isinstance(cv_results[param], np.ma.MaskedArray) for param in param_keys)) self.assertTrue( all(cv_results[key].dtype == object for key in param_keys)) self.assertFalse( any( isinstance(cv_results[key], np.ma.MaskedArray) for key in score_keys)) self.assertTrue( all(cv_results[key].dtype == np.float64 for key in score_keys if not key.startswith("rank"))) self.assertEquals(cv_results["rank_test_score"].dtype, np.int32) def test_check_cv_results_keys(cv_results, param_keys, score_keys, n_cand): # Test the search.cv_results_ contains all the required results assert_array_equal(sorted(cv_results.keys()), sorted(param_keys + score_keys + ("params", ))) self.assertTrue( all(cv_results[key].shape == (n_cand, ) for key in param_keys + score_keys)) cv_results = random_search.cv_results_ # Check results structure test_check_cv_results_array_types(cv_results, param_keys, score_keys) test_check_cv_results_keys(cv_results, param_keys, score_keys, n_cand) # For random_search, all the param array vals should be unmasked self.assertFalse( any(cv_results["param_C"].mask) or any(cv_results["param_gamma"].mask))
"n_estimators": randint(50, 1000), "max_depth": randint(2, 7), 'max_features': randint(5, 25), 'min_weight_fraction_leaf': [0.0, 0.03, 0.05, 0.07, 0.10, 0.15], 'min_impurity_decrease': [0.0, 0.00001, 0.0001, 0.001, 0.01, 0.1] } tune_search = TuneSearchCV(estimator=rf, param_distributions=param_random, search_optimization="random", early_stopping=False, n_iter=30, scoring='accuracy', n_jobs=12, cv=cv, verbose=1) tune_search.fit(X_train, y_train, sample_weight=sample_weights) # bayesian search tune_search = TuneSearchCV(rf, param_random, search_optimization='bayesian', max_iters=100, scoring='accuracy', n_jobs=12, cv=cv, verbose=1) tune_search.fit(X_train, y_train, sample_weight=sample_weights) # scores clf_predictions = tune_search.predict(X_test) tune_search.best_params_