def test_skip_last_raise():
    model, param_dist, X, y, rng = setup()
    search = HyperbandSearchCV(model,
                               param_dist,
                               skip_last=10,
                               random_state=rng)
    search.fit(X, y)
def test_min_resource_param():
    model, param_dist, X, y, rng = setup()
    search = HyperbandSearchCV(model,
                               param_dist,
                               min_iter=3,
                               random_state=rng,
                               verbose=1)
    search.fit(X, y)

    assert (search.cv_results_['param_n_estimators'].data.min() == 3)
def test_skip_last():
    model, param_dist, X, y, rng = setup()
    search = HyperbandSearchCV(model,
                               param_dist,
                               skip_last=1,
                               random_state=rng)
    search.fit(X, y)

    # 177 Because in every round the last search is dropped
    # 187 - (1 + 1 + 1 + 2 + 5)
    assert (len(search.cv_results_['hyperband_bracket']) == 177)
Example #4
0
def train_hyperband(X_train, X_test, y_train, y_test, mtype, common_name_model,
                    problemtype, classes, default_featurenames,
                    transform_model, settings, model_session):
    # install
    curdir = os.getcwd()
    os.chdir(prev_dir(os.getcwd()) + '/training/helpers/hyperband')
    os.system('python3 setup.py install')
    from hyperband import HyperbandSearchCV
    os.chdir(curdir)

    # training and testing sets
    files = list()
    model_name = common_name_model + '.pickle'

    if mtype in ['classification', 'c']:

        model = RandomForestClassifier()
        param_dist = {
            'max_depth': [3, None],
            'max_features': sp_randint(1, 11),
            'min_samples_split': sp_randint(2, 11),
            'min_samples_leaf': sp_randint(1, 11),
            'bootstrap': [True, False],
            'criterion': ['gini', 'entropy']
        }

        search = HyperbandSearchCV(model,
                                   param_dist,
                                   resource_param='n_estimators',
                                   scoring='roc_auc')
        search.fit(X_train, y_train)
        params = search.best_params_
        print('-----')
        print('best params: ')
        print(params)
        print('------')
        accuracy = search.score(X_test, y_test)

        # SAVE ML MODEL
        modelfile = open(model_name, 'wb')
        pickle.dump(search, modelfile)
        modelfile.close()

    elif mtype in ['regression', 'r']:

        print('hyperband currently does not support regression modeling.')
        model_name = ''

    model_dir = os.getcwd()
    files.append(model_name)

    return model_name, model_dir, files
def test_multimetric_hyperband():
    model, param_dist, X, y, rng = setup()

    # multimetric scoring is only supported for 1-D classification
    first_label = (y == 1)
    y[first_label] = 1
    y[~first_label] = 0

    multimetric = ['roc_auc', 'accuracy']

    search = HyperbandSearchCV(model,
                               param_dist,
                               refit='roc_auc',
                               scoring=multimetric,
                               random_state=rng)
    search.fit(X, y)

    assert ('mean_test_roc_auc' in search.cv_results_.keys())
    assert ('mean_test_accuracy' in search.cv_results_.keys())
    assert (len(search.cv_results_['hyperband_bracket']) == 187)
Example #6
0
def test_check_resource_param():
    model, param_dist = setup()
    HyperbandSearchCV(model, param_dist,
                      resource_param='wrong_name')._validate_input()
Example #7
0
def test_check_eta():
    model, param_dist = setup()
    HyperbandSearchCV(model, param_dist, eta=0)._validate_input()
Example #8
0
def test_check_skip_last():
    model, param_dist = setup()
    HyperbandSearchCV(model, param_dist, skip_last=-1)._validate_input()
Example #9
0
def test_check_min_iter_smaller_max_iter():
    model, param_dist = setup()
    HyperbandSearchCV(model, param_dist, min_iter=30,
                      max_iter=15)._validate_input()
Example #10
0
def test_check_max_iter():
    model, param_dist = setup()
    HyperbandSearchCV(model, param_dist, max_iter=-1)._validate_input()
Example #11
0
    #'min_child_weight' : [],
    #'min_child_samples' : [],
    #'subsample' : [],
    #'subsample_freq' : [],
    #'colsample_bytree' : [],
    #'reg_alpha' : [],
    #'reg_lambda' : [],
    'n_jobs': [-1]
}

# In[42]:

search = HyperbandSearchCV(hb_lgb_model,
                           lgb_hb_param_dict,
                           cv=3,
                           verbose=1,
                           max_iter=200,
                           min_iter=50,
                           scoring='neg_log_loss')

# In[43]:

search.fit(x_train, y_train)

# In[44]:

search.best_params_

# # Fit new

# In[56]:
Example #12
0
from scipy.stats import randint as sp_randint
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelBinarizer

if __name__ == '__main__':
    model = RandomForestClassifier()
    param_dist = {
        'max_depth': [3, None],
        'max_features': sp_randint(1, 11),
        'min_samples_split': sp_randint(2, 11),
        'min_samples_leaf': sp_randint(1, 11),
        'bootstrap': [True, False],
        'criterion': ['gini', 'entropy']
    }

    digits = load_digits()
    X, y = digits.data, digits.target
    y = LabelBinarizer().fit_transform(y)

    search = HyperbandSearchCV(model,
                               param_dist,
                               resource_param='n_estimators',
                               scoring='roc_auc',
                               n_jobs=1,
                               verbose=1)
    search.fit(X, y)
    print(search.best_params_)
    print(search.best_score_)