def test_skip_last_raise(): model, param_dist, X, y, rng = setup() search = HyperbandSearchCV(model, param_dist, skip_last=10, random_state=rng) search.fit(X, y)
def test_min_resource_param(): model, param_dist, X, y, rng = setup() search = HyperbandSearchCV(model, param_dist, min_iter=3, random_state=rng, verbose=1) search.fit(X, y) assert (search.cv_results_['param_n_estimators'].data.min() == 3)
def test_skip_last(): model, param_dist, X, y, rng = setup() search = HyperbandSearchCV(model, param_dist, skip_last=1, random_state=rng) search.fit(X, y) # 177 Because in every round the last search is dropped # 187 - (1 + 1 + 1 + 2 + 5) assert (len(search.cv_results_['hyperband_bracket']) == 177)
def train_hyperband(X_train, X_test, y_train, y_test, mtype, common_name_model, problemtype, classes, default_featurenames, transform_model, settings, model_session): # install curdir = os.getcwd() os.chdir(prev_dir(os.getcwd()) + '/training/helpers/hyperband') os.system('python3 setup.py install') from hyperband import HyperbandSearchCV os.chdir(curdir) # training and testing sets files = list() model_name = common_name_model + '.pickle' if mtype in ['classification', 'c']: model = RandomForestClassifier() param_dist = { 'max_depth': [3, None], 'max_features': sp_randint(1, 11), 'min_samples_split': sp_randint(2, 11), 'min_samples_leaf': sp_randint(1, 11), 'bootstrap': [True, False], 'criterion': ['gini', 'entropy'] } search = HyperbandSearchCV(model, param_dist, resource_param='n_estimators', scoring='roc_auc') search.fit(X_train, y_train) params = search.best_params_ print('-----') print('best params: ') print(params) print('------') accuracy = search.score(X_test, y_test) # SAVE ML MODEL modelfile = open(model_name, 'wb') pickle.dump(search, modelfile) modelfile.close() elif mtype in ['regression', 'r']: print('hyperband currently does not support regression modeling.') model_name = '' model_dir = os.getcwd() files.append(model_name) return model_name, model_dir, files
def test_multimetric_hyperband(): model, param_dist, X, y, rng = setup() # multimetric scoring is only supported for 1-D classification first_label = (y == 1) y[first_label] = 1 y[~first_label] = 0 multimetric = ['roc_auc', 'accuracy'] search = HyperbandSearchCV(model, param_dist, refit='roc_auc', scoring=multimetric, random_state=rng) search.fit(X, y) assert ('mean_test_roc_auc' in search.cv_results_.keys()) assert ('mean_test_accuracy' in search.cv_results_.keys()) assert (len(search.cv_results_['hyperband_bracket']) == 187)
def test_check_resource_param(): model, param_dist = setup() HyperbandSearchCV(model, param_dist, resource_param='wrong_name')._validate_input()
def test_check_eta(): model, param_dist = setup() HyperbandSearchCV(model, param_dist, eta=0)._validate_input()
def test_check_skip_last(): model, param_dist = setup() HyperbandSearchCV(model, param_dist, skip_last=-1)._validate_input()
def test_check_min_iter_smaller_max_iter(): model, param_dist = setup() HyperbandSearchCV(model, param_dist, min_iter=30, max_iter=15)._validate_input()
def test_check_max_iter(): model, param_dist = setup() HyperbandSearchCV(model, param_dist, max_iter=-1)._validate_input()
#'min_child_weight' : [], #'min_child_samples' : [], #'subsample' : [], #'subsample_freq' : [], #'colsample_bytree' : [], #'reg_alpha' : [], #'reg_lambda' : [], 'n_jobs': [-1] } # In[42]: search = HyperbandSearchCV(hb_lgb_model, lgb_hb_param_dict, cv=3, verbose=1, max_iter=200, min_iter=50, scoring='neg_log_loss') # In[43]: search.fit(x_train, y_train) # In[44]: search.best_params_ # # Fit new # In[56]:
from scipy.stats import randint as sp_randint from sklearn.datasets import load_digits from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelBinarizer if __name__ == '__main__': model = RandomForestClassifier() param_dist = { 'max_depth': [3, None], 'max_features': sp_randint(1, 11), 'min_samples_split': sp_randint(2, 11), 'min_samples_leaf': sp_randint(1, 11), 'bootstrap': [True, False], 'criterion': ['gini', 'entropy'] } digits = load_digits() X, y = digits.data, digits.target y = LabelBinarizer().fit_transform(y) search = HyperbandSearchCV(model, param_dist, resource_param='n_estimators', scoring='roc_auc', n_jobs=1, verbose=1) search.fit(X, y) print(search.best_params_) print(search.best_score_)