def test_input_errors_randomized(params, expected_error_message): # tests specific to HalvingRandomSearchCV base_estimator = FastClassifier() param_grid = {'a': [1]} X, y = make_classification(100) sh = HalvingRandomSearchCV(base_estimator, param_grid, **params) with pytest.raises(ValueError, match=expected_error_message): sh.fit(X, y)
def test_random_search_discrete_distributions(param_distributions, expected_n_candidates): # Make sure random search samples the appropriate number of candidates when # we ask for more than what's possible. How many parameters are sampled # depends whether the distributions are 'all lists' or not (see # ParameterSampler for details). This is somewhat redundant with the checks # in ParameterSampler but interaction bugs were discovered during # developement of SH n_samples = 1024 X, y = make_classification(n_samples=n_samples, random_state=0) base_estimator = FastClassifier() sh = HalvingRandomSearchCV(base_estimator, param_distributions, n_candidates=10) sh.fit(X, y) assert sh.n_candidates_[0] == expected_n_candidates
def test_random_search(max_resources, n_candidates, expected_n_candidates): # Test random search and make sure the number of generated candidates is # as expected n_samples = 1024 X, y = make_classification(n_samples=n_samples, random_state=0) param_grid = {'a': norm, 'b': norm} base_estimator = FastClassifier() sh = HalvingRandomSearchCV(base_estimator, param_grid, n_candidates=n_candidates, cv=2, max_resources=max_resources, factor=2, min_resources=4) sh.fit(X, y) assert sh.n_candidates_[0] == expected_n_candidates if n_candidates == 'exhaust': # Make sure 'exhaust' makes the last iteration use as much resources as # we can assert sh.n_resources_[-1] == max_resources
clf = RandomForestClassifier(n_estimators=20, random_state=rng) param_dist = { "max_depth": [3, None], "max_features": randint(1, 11), "min_samples_split": randint(2, 11), "bootstrap": [True, False], "criterion": ["gini", "entropy"] } rsh = HalvingRandomSearchCV(estimator=clf, param_distributions=param_dist, factor=2, random_state=rng) rsh.fit(X, y) # %% # We can now use the `cv_results_` attribute of the search estimator to inspect # and plot the evolution of the search. results = pd.DataFrame(rsh.cv_results_) results['params_str'] = results.params.apply(str) results.drop_duplicates(subset=('params_str', 'iter'), inplace=True) mean_scores = results.pivot(index='iter', columns='params_str', values='mean_test_score') ax = mean_scores.plot(legend=False, alpha=.6) labels = [ f'iter={i}\nn_samples={rsh.n_resources_[i]}\n'
n_iter=5, scoring='roc_auc', n_jobs=-1) clf = clf.fit(train_features, train_labels) # Score model score_randomized = roc_auc_score(test_labels, clf.predict_proba(test_features)[:, 1]) print(f'ROC AUC Score for RandomizedSearchCV model: {score_randomized}') print(clf.best_params_) # Fit rf model with HalvingRandomSearchCV clf_halving = HalvingRandomSearchCV(pipe, param_grid, cv=cv, verbose=1, scoring='roc_auc', n_jobs=-1, aggressive_elimination=True, factor=2, min_resources=20) clf_halving = clf_halving.fit(train_features, train_labels) # Score model score_halving = roc_auc_score(test_labels, clf_halving.predict_proba(test_features)[:, 1]) print(f'ROC AUC Score for HalvingRandomSearchCV model: {score_halving}') print(clf_halving.best_params_) print(f'ROC AUC Score for out of the box model: {score_rf}') print(f'ROC AUC Score for RandomizedSearchCV model: {score_randomized}') print(f'ROC AUC Score for HalvingRandomSearchCV model: {score_halving}')
_ = halving_cv.fit(X, y) # deal with class imbalance counts = pd.Series(y.flatten()).value_counts() scale_pos_weight = counts["No"] / counts["Yes"] param_grid_2 = { "max_depth": [3, 4, 5], "gamma": [5, 30, 50], "learning_rate": [0.01, 0.1, 0.3, 0.5], "min_child_weight": [1, 3, 5], "reg_lambda": [50, 100, 300], "scale_pos_weight": [scale_pos_weight], # Fix scale_pos_weight "subsample": [0.7, 0.8, 0.9], "colsample_bytree": [0.7, 0.8, 0.9], } from sklearn.model_selection import HalvingRandomSearchCV halving_random_cv = HalvingRandomSearchCV( xgb_cl, param_grid_2, scoring="roc_auc", n_jobs=-1, n_candidates="exhaust", factor=4 ) _ = halving_random_cv.fit(X, y)
sampler=["mala", "langevin", "tempered mala", "tempered langevin"], weight_decay=expon(1e-3), # max_iter=poisson(30), replay_prob=beta(a=9, b=1), adversary_weight=beta(a=1, b=1), num_units=poisson(32), num_layers=poisson(3), max_replay=poisson(10), ) clf_cv = HalvingRandomSearchCV(clf, distributions, random_state=0, n_jobs=5, resource="max_iter", max_resources=max_resources) search = clf_cv.fit(X.values) clf = clf_cv.best_estimator_ elif do_search == "bohb": distributions = CS.ConfigurationSpace(seed=42) distributions.add_hyperparameter( CSH.UniformFloatHyperparameter("lr", 1e-4, 3e-1, log=True, default_value=6e-3)) distributions.add_hyperparameter( CSH.UniformFloatHyperparameter("sampler_lr", 1e-4, 3e-1, log=True, default_value=1e-3))