Exemple #1
0
def main(args):

    # ensure directories are setup
    dirs = [args.data_dir, args.ckpt_dir]
    prepare_dirs(dirs)

    # create base model
    model = get_base_model()

    # define params
    params = {
        # '0_dropout': ['uniform', 0.1, 0.5],
        # '0_act': ['choice', ['relu', 'selu', 'elu', 'tanh', 'sigmoid']],
        # '0_l2': ['log_uniform', 1e-1, 2],
        # '2_act': ['choice', ['selu', 'elu', 'tanh', 'sigmoid']],
        # '2_l1': ['log_uniform', 1e-1, 2],
        '2_hidden': ['quniform', 512, 1000, 1],
        '4_hidden': ['quniform', 128, 512, 1],
        'all_act': ['choice', [[0], ['choice', ['selu', 'elu', 'tanh']]]],
        'all_dropout': ['choice', [[0], ['uniform', 0.1, 0.5]]],
        'all_batchnorm': ['choice', [0, 1]],
        'all_l2': ['uniform', 1e-8, 1e-5],
        'optim': ['choice', ["adam", "sgd"]],
        'lr': ['uniform', 1e-3, 8e-3],
        # 'batch_size': ['quniform', 32, 128, 1]
    }

    # instantiate hyperband object
    hyperband = Hyperband(args, model, params)

    # tune
    results = hyperband.tune()

    # dump results
    save_results(results)
Exemple #2
0
def regression_meta_model(data, output_file='results.pkl', max_iter=81, eta=3):
    if not output_file.endswith('.pkl'):
        output_file += '.pkl'
    print("Will save results to", output_file)

    #
    try_params_data = partial(try_params_r, data=data)

    hb = Hyperband(get_params_r, try_params_data, max_iter=max_iter, eta=eta)
    results = hb.run(skip_last=1)

    print("{} total, best:\n".format(len(results)))

    for r in sorted(results, key=lambda x: x['loss'])[:5]:
        print("loss: {:.2%} | {} seconds | {:.1f} iterations | run {} ".format(
            r['loss'], r['seconds'], r['iterations'], r['counter']))
        pprint(r['params'])
        print()

    print("saving...")

    with open(output_file, 'wb') as f:
        pickle.dump(results, f)

    return results
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(description='Hyperband main script')
    parser.add_argument('bench',
                        action='store',
                        nargs=None,
                        const=None,
                        default=None,
                        type=str,
                        choices=['MLPWithMNIST'],
                        help='the benchmark function you want to run',
                        metavar=None)
    parser.add_argument(
        '--max_iter',
        type=int,
        default=27,
        help=
        'maximum amount of resource that can be allocated to a single configuration'
    )
    parser.add_argument(
        '--eta',
        type=int,
        default=3,
        help='proportion of the configurations discarded in each round of SH')
    parser.add_argument('--patience',
                        type=int,
                        default=5,
                        help='threshold for original early-stopping')
    parser.add_argument('--gcp', action='store_true')

    args = parser.parse_args()
    params = get_param_with_bench(args.bench)
    params['max_iter'] = args.max_iter
    params['eta'] = args.eta
    params['patience'] = args.patience
    params['homedir'] = '/hyperband_sandbox/' if args.gcp else './'

    # run optimization
    hb = Hyperband(**params)
    best = hb.run()
    print("best:{}".format(best))

    separate_history = hb.separate_history
    print("separate_history:{}".format(separate_history))
    i = 0
    for k, v in separate_history.items():
        df = pd.DataFrame(v)
        df.to_csv("./log_{}.csv".format(i))
        i += 1

    plot_util.plot_separately(separate_history, homedir=params['homedir'])
def _xgboost_hyperband_model(task, numeric_features, categoric_features,
                             learning_rate):
    param_space = {
        'max_depth': randint(2, 11),
        'min_child_weight': randint(1, 11),
        'subsample': uniform(0.5, 0.5),
        'colsample_bytree': uniform(0.5, 0.5),
        'colsample_bylevel': uniform(0.5, 0.5),
        'gamma': uniform(0, 1),
        'reg_alpha': uniform(0, 1),
        'reg_lambda': uniform(0, 10),
        'base_score': uniform(0.1, 0.9),
        'scale_pos_weight': uniform(0.1, 9.9)
    }

    model = XGBClassifier(learning_rate=learning_rate) \
        if task == 'classification' else XGBRegressor(learning_rate=learning_rate)

    return make_pipeline(
        make_union(
            make_pipeline(ColumnsSelector(categoric_features), FillNaN('nan'),
                          ColumnApplier(TolerantLabelEncoder())),
            make_pipeline(ColumnsSelector(numeric_features),
                          Imputer(strategy='mean'), StandardScaler())),
        Hyperband(model, feat_space=param_space, task=task))
Exemple #5
0
    'do1' : hp.uniform('do1', 0.2, 0.3),
    'do2' : hp.uniform('do2', 0.2, 0.3),
    'do3' : hp.uniform('do3', 0.4, 0.5),
    'extra_first_layers' : hp.choice('extra_first_layers', [1, 2, 3]),
    'extra_second_layers' : hp.choice('extra_first_layers', [1, 2]),
    }

dummy_space = {
    'x' : hp.uniform('x', 0.2, 0.9),
    }

def dummy_get_params():
    return sample(dummy_space)

def dummy_try_params(n, p):
    acc = p['x'] * n;
    return {'acc' : acc}

def get_params():
    params = sample(space)
    return params

def try_params(n, p):
    km.train_model(n, p)
    
hb = Hyperband(dummy_get_params, dummy_try_params)
results = hb.run()
hb.print_best_results(5)
 
print (hb.get_best_config())
#!/usr/bin/env python

"bare-bones demonstration of using hyperband to tune sklearn GBT"

from hyperband import Hyperband
from defs.gb import get_params, try_params

hb = Hyperband(get_params, try_params)

# no actual tuning, doesn't call try_params()
# results = hb.run( dry_run = True )

results = hb.run(skip_last=1)  # shorter run
results = hb.run()
Exemple #7
0
def main(directory):

    dir_list = os.listdir(directory)
    for e in dir_list:
        file_name = directory + os.path.basename(e)

        metalearning = ml.main(os.path.basename(e))

        load = Load_Data(file_name)

        train, valid, test = load.split_train_test_valid()

        for i in range(1):

            try:
                output_file1 = sys.argv[1]
                output_file2 = sys.argv[1]
                if not output_file1.endswith('.pkl'):
                    output_file1 += '.pkl'
                    output_file2 += '.pkl'
            except IndexError:
                output_file1 = 'results_ab_' + os.path.basename(e) + '_' + str(
                    i) + '.pkl'
                output_file2 = 'results_ab_test_' + os.path.basename(
                    e) + '_' + str(i) + '.pkl'

                print("Will save results to", output_file1, output_file2)

            # data = load(file_name)

            hb = Hyperband(get_params, try_params, train, valid, test,
                           metalearning)
            results = hb.run(skip_last=1)
            # print(results)
            test_results = hb.tests(results)

            print("{} total, best in validation:\n".format(len(results)))

            for r in sorted(results, key=lambda x: x['loss']):
                print("loss: {:.2} | {} seconds | {:.1f} instances | run {} ".
                      format(r['loss'], r['seconds'], r['instances'],
                             r['counter']))
                pprint(r['params'])
                print

            print("test results")
            for r in range(len(test_results)):
                t = test_results[r]
                print(
                    "loss: {:.2%} | auc: {:.2%} | {} seconds | {} run ".format(
                        t['loss'], t['auc'], t['seconds'], t['counter']))
                pprint(t['params'])
                print

            print("results: ", results)
            print("test results:    ", test_results)
            print("saving...")

            with open(output_file1, 'wb') as f:
                pickle.dump(results, f)

            with open(output_file2, 'wb') as f:
                pickle.dump(test_results, f)

    return 'finished'
Exemple #8
0
def main():
    data = load_mnist()
    hb = Hyperband(data, get_params, try_params)
    results = hb.run()
    print(results)
Exemple #9
0
    architecture_file = join(outdir, model_arch + '_best_archit.json')
    optimizer_file = join(outdir, model_arch + '_best_optimer.pkl')
    weight_file = join(outdir, model_arch + '_bestmodel_weights.h5'
                       ) if args.weightfile is None else args.weightfile
    last_weight_file = join(outdir, model_arch + '_lastmodel_weights.h5'
                            ) if args.lweightfile is None else args.lweightfile
    evalout = join(outdir, model_arch + '_eval.txt')

    tmpdir = mkdtemp()
    system(' '.join(['cp', args.model, join(tmpdir, 'mymodel.py')]))
    sys.path.append(tmpdir)
    import mymodel
    hb = Hyperband(mymodel.get_params,
                   mymodel.try_params,
                   args.topdir,
                   max_iter=args.hyperiter,
                   datamode=args.datamode)

    if args.hyper:
        ## Hyper-parameter tuning
        results = hb.run(skip_last=1)

        best_result = sorted(results, key=lambda x: x['loss'])[0]
        pprint(best_result['params'])

        best_archit, best_optim, best_optim_config, best_lossfunc = best_result[
            'model']
        open(architecture_file, 'w').write(best_archit)
        cPickle.dump((best_optim, best_optim_config, best_lossfunc),
                     open(optimizer_file, 'wb'))
Exemple #10
0
from hyperband import Hyperband
from pprint import pprint

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('data_file', help = 'Path to training dataset')
    parser.add_argument('mode', help = 'Background type')
    parser.add_argument('channel', help = 'Decay channel')
    args = parser.parse_args()
    
    print('Loading dataset from: %s with test size 0.05' %(args.data_file))
    dataDMatrix = methods.xgb.load_data(args.data_file, args.mode, args.channel)

    start = time.time()
    print('Running HyperBand')
    hb = Hyperband(dataDMatrix, methods.xgb.get_hyp_config, methods.xgb.run_hyp_config)
    results = hb.run(skip_random_search = True)

    delta_t = time.time() - start
    output = os.path.join('models', args.mode + args.channel + '.json')
    
    print("{} Total, Leaderboard:\n".format(len(results)))

    for r in sorted(results, key = lambda x: x['auc'])[:10]:
        print("auc: {:.2%} | {} s | {:.1f} iterations | run {} ".format( 
                        r['auc'], r['seconds'], r['iterations'], r['counter']))
        pprint(r['params'])
        print

    print('Hyperparameter search complete. Results saved to %s\n' %(output))
    with open(output, 'w') as f:
Exemple #11
0
def main():
    """Input for the parameter optimization process

    Given the parameter space, running function and training/valid dataset, 
    using hyperband/hyperopt strategy to find the top parameter set with the best metrics 
    :input:
    ------                                              
    run_func: running function used in optimization 
    log_file_name: string, name of log file
    top_num: int, number of best configurations you want to choose
    train_train: data frame, the last column should be the target value, training data set in optimization process
    train_valid: data frame, the last column should be the target value, validation data set in optimization process
    :return: 
    --------
    top parameters and their score on log file

    """

    # define the log file name for hyperopt method
    log_file_hyperopt = 'hyperopt_xgboost.txt'
    top_num = 2
    # set the input file name
    train_train = pd.read_csv('fargo_train_train.csv')
    train_valid = pd.read_csv('fargo_train_valid.csv')

    # shuffle the input file
    train_train_shuffle = train_train.reindex(
        np.random.permutation(train_train.index)).sort_index()
    train_valid_shuffle = train_valid.reindex(
        np.random.permutation(train_valid.index)).sort_index()

    # split the dataset into data and target
    train_data, train_target = train_train_shuffle.values[:, 0:-1].astype(
        np.float32), train_train_shuffle.values[:, -1]
    valid_data, valid_target = train_valid_shuffle.values[:, 0:-1].astype(
        np.float32), train_valid_shuffle.values[:, -1]

    # define search space
    space_hyperopt = {
        'max_depth': hp.uniform('max_depth', 3, 10),
        'min_child_weight': hp.uniform('min_child_weight', 0.5, 5),
        'subsample': hp.uniform('subsample', 0.5, 1),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
        'reg_alpha': hp.uniform('reg_alpha', 0, 0.01),
        'epochs': hp.choice('epochs', [100, 200]),
        'learning_rate': hp.choice('learning_rate', [0.10, 0.12])
    }

    # using HyperOPT to optimize the parameter set
    para_hyperopt = HyperOPT(space=space_hyperopt,
                             run_func=run_func,
                             log_file=log_file_hyperopt)

    # using fit api to fit the parameter set
    para_hyperopt.fit(train_data=train_data,
                      train_target=train_target,
                      valid_data=valid_data,
                      valid_target=valid_target,
                      n_iter=2)
    # using get best to take best parameter set
    para_hyperopt.get_best(top_num)

    # set the log file name for hyperband method
    log_file_hyperband = 'hyperband_xgboost.txt'

    # define parameter set, must contain epochs !
    space_hyperband = {
        'max_depth': hp.uniform('max_depth', 3, 10),
        'min_child_weight': hp.uniform('min_child_weight', 0.5, 5),
        'subsample': hp.uniform('subsample', 0.5, 1),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
        'reg_alpha': hp.uniform('reg_alpha', 0, 0.01),
        'epochs': hp.choice('epochs', [100, 200]),
        'learning_rate': hp.choice('learning_rate', [0.10, 0.12])
    }

    # using Hyperband to optimize the parameter set
    para_hyperband = Hyperband(space=space_hyperband,
                               run_func=run_func,
                               log_file=log_file_hyperband,
                               max_iter=4,
                               eta=2)

    # using fit api to fit the parameter set
    para_hyperband.fit(train_data=train_data,
                       train_target=train_target,
                       valid_data=valid_data,
                       valid_target=valid_target)

    # using get best to take best parameter set
    para_hyperband.get_best(top_num)
def main():
    """Input for the parameter optimization process
    
    Given the parameter space, running function and training/valid dataset, 
    using hyperband/hyperopt strategy to find the top parameter set with the best metrics 
    :input:
    ------                                              
    run_func: running function used in optimization 
    log_file_name: string, name of log file
    top_num: int, number of best configurations you want to choose
    train_train: data frame, the last column should be the target value, training data set in optimization process
    train_valid: data frame, the last column should be the target value, validation data set in optimization process
    :return: 
    --------
    top parameters and their score on log file
    
    """
    # define the log file name for hyperopt method
    log_file_hyperopt = 'hyperopt.txt'
    # set the number of parameter sets to be recorded
    top_num = 2
    # set the input file name
    train_train = pd.read_csv('fargo_train_train.csv')
    train_valid = pd.read_csv('fargo_train_valid.csv')

    # shuffle the input file
    train_train_shuffle = train_train.reindex(
        np.random.permutation(train_train.index)).sort_index()
    train_valid_shuffle = train_valid.reindex(
        np.random.permutation(train_valid.index)).sort_index()

    # split the dataset into data and target
    train_data, train_target = train_train_shuffle.values[:, 0:-1].astype(
        np.float32), train_train_shuffle.values[:, -1]
    valid_data, valid_target = train_valid_shuffle.values[:, 0:-1].astype(
        np.float32), train_valid_shuffle.values[:, -1]

    # define parameter space, must contain epochs !
    max_layers = 6

    space_hyperopt = {
        'n_layers':
        hp.quniform('n_layers', 2, max_layers, 1),
        'init':
        hp.choice('init',
                  ('uniform', 'normal', 'glorot_uniform', 'glorot_normal')),
        'batch_size':
        hp.choice('batch_size', (16, 32, 64, 128)),
        'epochs':
        hp.choice('epochs', [1, 2]),
        'optimizer':
        'adam',
        'residual':
        hp.choice('residual', [True, False]),
        'highway':
        hp.choice('highway', [True, False]),
    }

    # for each hidden layer, we choose size, activation and extras individually
    for i in range(1, max_layers + 1):
        space_hyperopt['layer_{}_size'.format(i)] = hp.quniform(
            'ls{}'.format(i), 20, 200, 20)
        space_hyperopt['layer_{}_activation'.format(i)] = 'relu'
        space_hyperopt['layer_{}_dropout'.format(i)] = hp.quniform(
            'dropout{}'.format(i), 0.0, 0.5, 0.05)

    # using HyperOPT to optimize the parameter set
    para_hyperopt = HyperOPT(space=space_hyperopt,
                             run_func=run_func,
                             log_file=log_file_hyperopt)

    # using fit api to fit the parameter set
    para_hyperopt.fit(train_data=train_data,
                      train_target=train_target,
                      valid_data=valid_data,
                      valid_target=valid_target,
                      n_iter=2)
    # using get best to take best parameter set
    para_hyperopt.get_best(top_num)

    # set the log file name for hyperband method
    log_file_hyperband = 'hyperband.txt'

    # define parameter set, must contain epochs !
    space_hyperband = {
        'n_layers':
        hp.quniform('n_layers', 2, max_layers, 1),
        'init':
        hp.choice('init',
                  ('uniform', 'normal', 'glorot_uniform', 'glorot_normal')),
        'batch_size':
        hp.choice('batch_size', (16, 32, 64, 128)),
        'optimizer':
        'adam',
        'residual':
        hp.choice('residual', [True, False]),
        'highway':
        hp.choice('highway', [True, False]),
    }

    # for each hidden layer, we choose size, activation and extras individually
    for i in range(1, max_layers + 1):
        space_hyperband['layer_{}_size'.format(i)] = hp.quniform(
            'ls{}'.format(i), 20, 200, 20)
        space_hyperband['layer_{}_activation'.format(i)] = 'relu'
        space_hyperband['layer_{}_dropout'.format(i)] = hp.quniform(
            'dropout{}'.format(i), 0.0, 0.5, 0.05)

    # using Hyperband to optimize the parameter set
    para_hyperband = Hyperband(space=space_hyperband,
                               run_func=run_func,
                               log_file=log_file_hyperband,
                               max_iter=4,
                               eta=2)

    # using fit api to fit the parameter set
    para_hyperband.fit(train_data=train_data,
                       train_target=train_target,
                       valid_data=valid_data,
                       valid_target=valid_target)
    # using get best to take best parameter set
    para_hyperband.get_best(top_num)
        })
        rmse = np.sum(np.square(out_v_test[-1][0]/c.lambda_max - x_values_sm_b))

        # mse = MSE(np.squeeze(x_values_sm_b), np.squeeze(out_v_test[-1][0]/c.lambda_max))
        # rmse = np.sqrt(mse)
        
        mae = MAE(np.squeeze(x_values_sm_b), np.squeeze(out_v_test[-1][0]/c.lambda_max))
    sess.close()
    r = { 'loss': rmse, 'rmse': rmse, 'mae': mae}
    if return_data:
        r["data"] = (out_v, out_v_test)
    print rmse
    return r

from hyperband import Hyperband

hb = Hyperband(get_params, try_params)
results = hb.run(skip_last = False)


# p = {'weight_init': 2.246314076891554, 'tau_b': 24.007448833081085, 'tau_c': 1.3402075787402872, 'tau_a': 10.881136694144896, 'lrate': 0.5851586265640217, 'theta': 24.0809893295545, 'tau_syn': 0.19291385527484867, 'per_epoch_shift': 22.910523230821795}
# r = try_params(10, p)

# p = {'weight_init': 0.09642636619530962, 'tau_b': 1.9493176363660059, 'tau_c': 1.7340754569936037, 'tau_a': 0.462867947572219, 'lrate': 0.6594933794300799, 'theta': 14.677925945506452, 'tau_syn': 20.646859326121326, 'per_epoch_shift': 22.329439955821854}
# r = try_params(10, p)

top_five = sorted(results, key=lambda k: k["loss"])[0:5]

r = try_params(81, top_five[0]["params"], return_data=True)

out_v, out_v_test = r["data"]