Beispiel #1
0
import os
import subprocess
import time
import numpy as np
from importlib import import_module
from HPOlib.format_converter.tpe_to_smac import convert_tpe_to_smac_from_object

# generate .pcs from space.py
module = import_module('space')
search_space = module.space
smac_space = convert_tpe_to_smac_from_object(search_space)
smac_space_file = 'smac_2_06_01-dev/params.pcs'
fh = open(smac_space_file, 'w')
fh.write(smac_space)
fh.close()
print('Sapce file for SMAC generated: %s' % smac_space_file)

rand_stamp = np.random.randint(10000, 99999)
call_smac = 'HPOlib-run -o ../../optimizers/smac/smac_2_06_01-dev -s %d' % rand_stamp
print 'Command:', call_smac
subprocess.call(call_smac, shell=True)
Beispiel #2
0
def main():
    parser = ArgumentParser()

    parser.add_argument('-p',
                        '--space',
                        dest='spaceFile',
                        help='Where is the space.py located?')
    parser.add_argument(
        '--use_optimal_design',
        dest='use_optimal_design',
        help='Use optimal design or pure random initialization?')
    parser.add_argument('--init_budget',
                        dest='init_budget',
                        help='How many evaluations for random burning period?')
    parser.add_argument(
        '--ei_budget',
        dest='ei_budget',
        help='How many evaluations for EI controlled online period?')
    parser.add_argument(
        '--bopt_budget',
        dest='bopt_budget',
        help=
        'How many evaluations for Bayesian optimization after get subspace?')
    parser.add_argument(
        '--ei_xi',
        dest='ei_xi',
        help='What is the exploration parameter for computing EI?')
    parser.add_argument(
        '--top_k_pipelines',
        dest='top_k_pipelines',
        help='How many top (LR predicted) pipelines to cover in subspace?')
    parser.add_argument('-s',
                        '--seed',
                        default='1',
                        dest='seed',
                        type=int,
                        help='Seed for the algorithm')

    parser.add_argument(
        '-a',
        '--algo',
        default='SMAC',
        dest='algo',
        type=str,
        help='Specify the algorithm after LR, can be SMAC or TPE')

    parser.add_argument(
        '-r',
        '--restore',
        action='store_true',
        dest='restore',
        help='When this flag is set state.pkl is restored in ' +
        'the current working directory')
    parser.add_argument('--random',
                        default=False,
                        action='store_true',
                        dest='random',
                        help='Use a random search')
    parser.add_argument('--cwd',
                        help='Change the working directory before '
                        'optimizing.')

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    if not os.path.exists(args.spaceFile):
        logger.critical('Search space not found: %s' % args.spaceFile)
        sys.exit(1)

    # First remove '.py'
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append('./')
    sys.path.append('')

    module = import_module(space)
    search_space = module.space
    ni = [len(d)
          for d in module.layer_dict_list]  # number of units in each layer
    cum_ni = np.cumsum(ni)

    log_filename = 'lr.pkl'

    # Random burning period as initialization
    init_budget = int(args.init_budget)
    if args.use_optimal_design == '1':
        picks = get_random_picks_by_optimal_design(ni, init_budget)
    else:
        picks = get_pure_random_picks(ni, init_budget)
    for i in range(init_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo)
        logger.info('Total evaluation times: %d, valid times: %d' %
                    (times, valid_times))
        logger.info('Random burning period times: %d, valid times: %d' %
                    (times, valid_times))
        subspace = construct_subspace(module, picks[i])
        params = sample(subspace)
        cv.main(params)
    valid_times_in_random_period = get_num_of_trials(log_filename,
                                                     filter_valid=True)

    # Train the first LR model before entering into EI controlled period
    fh = open(log_filename)
    log = cPickle.load(fh)
    trials = log['trials']
    fh.close()
    X = []
    y = []
    y_time = []
    for trial in trials:
        result = trial['result']
        time = trial['duration']
        # make sure the logged result is a number (accept evaluations return 100.0)
        if result <= 100:
            params = trial['params']
            rescaling = params['-rescaling']
            balancing = params['-balancing']
            feat_pre = params['-feat_pre']
            clf = params['-classifier']
            x = [[0] * n for n in ni]
            x[0][module.d_rescaling[rescaling]] = 1
            x[1][module.d_balancing[balancing]] = 1
            x[2][module.d_feat_pre[feat_pre]] = 1
            x[3][module.d_clf[clf]] = 1
            x_flat = np.array(x[0] + x[1] + x[2] + x[3])
            X.append(x_flat)
            y.append(result)
            y_time.append(np.log(time))
    X = np.array(X)
    alpha = 1.0
    lr = linear_model.Ridge(alpha=alpha)
    lr.fit(X, y)
    lr_time = linear_model.Ridge(alpha=alpha)
    lr_time.fit(X, y_time)

    # Online period controlled by EI
    ei_budget = int(args.ei_budget)
    for i in range(ei_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('Total evaluation times: %d, valid times: %d' %
                    (times, valid_times))
        logger.info(
            'EI controlled period times: %d, valid times: %d' %
            (times - init_budget, valid_times - valid_times_in_random_period))
        ebeta = lr.coef_[:cum_ni[0]], \
                lr.coef_[cum_ni[0]:cum_ni[1]], \
                lr.coef_[cum_ni[1]:cum_ni[2]], \
                lr.coef_[cum_ni[2]:]
        logger.info('LR model estimated unit ranking: %s %s %s %s' %
                    (str(ebeta[0].argsort()), str(ebeta[1].argsort()),
                     str(ebeta[2].argsort()), str(ebeta[3].argsort())))
        ebeta_time = lr_time.coef_[:cum_ni[0]], \
                     lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                     lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                     lr_time.coef_[cum_ni[2]:]
        logger.info(
            'LR Time model estimated unit ranking: %s %s %s %s' %
            (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()),
             str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort())))
        # pick the best pipeline by EI
        x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y,
                                float(args.ei_xi))
        pick = [[np.argmax(x_next_i)] for x_next_i in x_next]
        subspace = construct_subspace(module, pick)
        params = sample(subspace)
        cv.main(params)

        result, time = get_last_run(log_filename)
        if result <= 100:
            x_next_flat = np.array(x_next[0] + x_next[1] + x_next[2] +
                                   x_next[3])
            X = np.vstack([X, x_next_flat])
            y.append(result)
            y_time.append(np.log(time))
            lr = linear_model.Ridge(alpha=alpha)
            lr.fit(X, y)
            lr_time = linear_model.Ridge(alpha=alpha)
            lr_time.fit(X, y_time)
    valid_times_in_ei_period = get_num_of_trials(
        log_filename, filter_valid=True) - valid_times_in_random_period

    # Construct subspace based on LR prediction
    final_ebeta = lr.coef_[:cum_ni[0]], \
                  lr.coef_[cum_ni[0]:cum_ni[1]], \
                  lr.coef_[cum_ni[1]:cum_ni[2]], \
                  lr.coef_[cum_ni[2]:]
    final_ebeta_time = lr_time.coef_[:cum_ni[0]], \
                       lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                       lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                       lr_time.coef_[cum_ni[2]:]
    final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0,
                                         int(args.top_k_pipelines))
    final_subspace = construct_subspace(module, final_pick)

    logger.info('LR model estimated unit ranking: %s %s %s %s' %
                (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()),
                 str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort())))
    logger.info(
        'LR Time model estimated unit ranking: %s %s %s %s' %
        (str(final_ebeta_time[0].argsort()), str(
            final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()),
         str(final_ebeta_time[3].argsort())))
    logger.info('Selected pipelines: %s %s %s %s' %
                (final_pick[0], final_pick[1], final_pick[2], final_pick[3]))

    # Phase 3 with SMAC
    if args.algo == 'SMAC':
        fh = file('pickup.txt', 'w')
        for layer_pick in final_pick:
            for i in layer_pick:
                fh.write('%d ' % i)
            fh.write('\n')
        fh.close()
        subspace = construct_subspace(module, final_pick)
        new_space = convert_tpe_to_smac_from_object(subspace)
        fh = open('params.pcs', 'w')
        fh.write(new_space)
        fh.close()

    # Phase 3 with TPE
    elif args.algo == 'TPE':
        fn = cv.main
        domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed))
        trials = hyperopt.Trials()
        bopt_budget = int(args.bopt_budget)
        for i in range(bopt_budget):
            times = get_num_of_trials(log_filename, filter_valid=False)
            valid_times = get_num_of_trials(log_filename, filter_valid=True)
            logger.info('Total evaluation times: %d, valid times: %d' %
                        (times, valid_times))
            logger.info(
                'TPE period times: %d, valid times: %d' %
                (times - init_budget - ei_budget, valid_times -
                 valid_times_in_random_period - valid_times_in_ei_period))
            logger.info(
                'LR model estimated unit ranking: %s %s %s %s' %
                (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()),
                 str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort())))
            logger.info('LR Time model estimated unit ranking: %s %s %s %s' %
                        (str(final_ebeta_time[0].argsort()),
                         str(final_ebeta_time[1].argsort()),
                         str(final_ebeta_time[2].argsort()),
                         str(final_ebeta_time[3].argsort())))
            logger.info(
                'Selected pipelines: %s %s %s %s' %
                (final_pick[0], final_pick[1], final_pick[2], final_pick[3]))
            # in exhaust, the number of evaluations is max_evals - num_done
            tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))
            rval = hyperopt.FMinIter(tpe_with_seed,
                                     domain,
                                     trials,
                                     max_evals=i)
            rval.exhaust()
Beispiel #3
0
def main():
    parser = ArgumentParser()

    parser.add_argument('-p', '--space',
                        dest='spaceFile', help='Where is the space.py located?')
    parser.add_argument('--use_optimal_design',
                        dest='use_optimal_design', help='Use optimal design or pure random initialization?')
    parser.add_argument('--init_budget',
                        dest='init_budget', help='How many evaluations for random burning period?')
    parser.add_argument('--ei_budget',
                        dest='ei_budget', help='How many evaluations for EI controlled online period?')
    parser.add_argument('--bopt_budget',
                        dest='bopt_budget', help='How many evaluations for Bayesian optimization after get subspace?')
    parser.add_argument('--ei_xi',
                        dest='ei_xi', help='What is the exploration parameter for computing EI?')
    parser.add_argument('--top_k_pipelines',
                        dest='top_k_pipelines', help='How many top (LR predicted) pipelines to cover in subspace?')
    parser.add_argument('-s', '--seed', default='1',
                        dest='seed', type=int, help='Seed for the algorithm')

    parser.add_argument('-a', '--algo', default='SMAC',
                        dest='algo', type=str, help='Specify the algorithm after LR, can be SMAC or TPE')

    parser.add_argument('-r', '--restore', action='store_true',
                        dest='restore', help='When this flag is set state.pkl is restored in ' +
                             'the current working directory')
    parser.add_argument('--random', default=False, action='store_true',
                        dest='random', help='Use a random search')
    parser.add_argument('--cwd', help='Change the working directory before '
                                      'optimizing.')

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    if not os.path.exists(args.spaceFile):
        logger.critical('Search space not found: %s' % args.spaceFile)
        sys.exit(1)

    # First remove '.py'
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append('./')
    sys.path.append('')

    module = import_module(space)
    search_space = module.space
    ni = [len(d) for d in module.layer_dict_list]  # number of units in each layer
    cum_ni = np.cumsum(ni)

    log_filename = 'lr.pkl'

    # Random burning period as initialization
    init_budget = int(args.init_budget)
    if args.use_optimal_design == '1':
        picks = get_random_picks_by_optimal_design(ni, init_budget)
    else:
        picks = get_pure_random_picks(ni, init_budget)
    for i in range(init_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo)
        logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times))
        logger.info('Random burning period times: %d, valid times: %d' % (times, valid_times))
        subspace = construct_subspace(module, picks[i])
        params = sample(subspace)
        cv.main(params)
    valid_times_in_random_period = get_num_of_trials(log_filename, filter_valid=True)

    # Train the first LR model before entering into EI controlled period
    fh = open(log_filename)
    log = cPickle.load(fh)
    trials = log['trials']
    fh.close()
    X = []
    y = []
    y_time = []
    for trial in trials:
        result = trial['result']
        time = trial['duration']
        # make sure the logged result is a number (accept evaluations return 100.0)
        if result <= 100:
            params = trial['params']
            rescaling = params['-rescaling']
            balancing = params['-balancing']
            feat_pre = params['-feat_pre']
            clf = params['-classifier']
            x = [[0]*n for n in ni]
            x[0][module.d_rescaling[rescaling]] = 1
            x[1][module.d_balancing[balancing]] = 1
            x[2][module.d_feat_pre[feat_pre]] = 1
            x[3][module.d_clf[clf]] = 1
            x_flat = np.array(x[0]+x[1]+x[2]+x[3])
            X.append(x_flat)
            y.append(result)
            y_time.append(np.log(time))
    X = np.array(X)
    alpha = 1.0
    lr = linear_model.Ridge(alpha=alpha)
    lr.fit(X, y)
    lr_time = linear_model.Ridge(alpha=alpha)
    lr_time.fit(X, y_time)

    # Online period controlled by EI
    ei_budget = int(args.ei_budget)
    for i in range(ei_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times))
        logger.info('EI controlled period times: %d, valid times: %d' % (times - init_budget,
                                                                         valid_times - valid_times_in_random_period))
        ebeta = lr.coef_[:cum_ni[0]], \
                lr.coef_[cum_ni[0]:cum_ni[1]], \
                lr.coef_[cum_ni[1]:cum_ni[2]], \
                lr.coef_[cum_ni[2]:]
        logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(ebeta[0].argsort()),
                                                                      str(ebeta[1].argsort()),
                                                                      str(ebeta[2].argsort()),
                                                                      str(ebeta[3].argsort())))
        ebeta_time = lr_time.coef_[:cum_ni[0]], \
                     lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                     lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                     lr_time.coef_[cum_ni[2]:]
        logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(ebeta_time[0].argsort()),
                                                                           str(ebeta_time[1].argsort()),
                                                                           str(ebeta_time[2].argsort()),
                                                                           str(ebeta_time[3].argsort())))
        # pick the best pipeline by EI
        x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y, float(args.ei_xi))
        pick = [[np.argmax(x_next_i)] for x_next_i in x_next]
        subspace = construct_subspace(module, pick)
        params = sample(subspace)
        cv.main(params)

        result, time = get_last_run(log_filename)
        if result <= 100:
            x_next_flat = np.array(x_next[0]+x_next[1]+x_next[2]+x_next[3])
            X = np.vstack([X, x_next_flat])
            y.append(result)
            y_time.append(np.log(time))
            lr = linear_model.Ridge(alpha=alpha)
            lr.fit(X, y)
            lr_time = linear_model.Ridge(alpha=alpha)
            lr_time.fit(X, y_time)
    valid_times_in_ei_period = get_num_of_trials(log_filename, filter_valid=True) - valid_times_in_random_period

    # Construct subspace based on LR prediction
    final_ebeta = lr.coef_[:cum_ni[0]], \
                  lr.coef_[cum_ni[0]:cum_ni[1]], \
                  lr.coef_[cum_ni[1]:cum_ni[2]], \
                  lr.coef_[cum_ni[2]:]
    final_ebeta_time = lr_time.coef_[:cum_ni[0]], \
                       lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                       lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                       lr_time.coef_[cum_ni[2]:]
    final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0, int(args.top_k_pipelines))
    final_subspace = construct_subspace(module, final_pick)

    logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()),
                                                                  str(final_ebeta[1].argsort()),
                                                                  str(final_ebeta[2].argsort()),
                                                                  str(final_ebeta[3].argsort())))
    logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()),
                                                                       str(final_ebeta_time[1].argsort()),
                                                                       str(final_ebeta_time[2].argsort()),
                                                                       str(final_ebeta_time[3].argsort())))
    logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0],
                                                     final_pick[1],
                                                     final_pick[2],
                                                     final_pick[3]))

    # Phase 3 with SMAC
    if args.algo == 'SMAC':
        fh = file('pickup.txt', 'w')
        for layer_pick in final_pick:
            for i in layer_pick:
                fh.write('%d ' % i)
            fh.write('\n')
        fh.close()
        subspace = construct_subspace(module, final_pick)
        new_space = convert_tpe_to_smac_from_object(subspace)
        fh = open('params.pcs', 'w')
        fh.write(new_space)
        fh.close()

    # Phase 3 with TPE
    elif args.algo == 'TPE':
        fn = cv.main
        domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed))
        trials = hyperopt.Trials()
        bopt_budget = int(args.bopt_budget)
        for i in range(bopt_budget):
            times = get_num_of_trials(log_filename, filter_valid=False)
            valid_times = get_num_of_trials(log_filename, filter_valid=True)
            logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times))
            logger.info('TPE period times: %d, valid times: %d' %
                        (times - init_budget - ei_budget,
                         valid_times - valid_times_in_random_period - valid_times_in_ei_period))
            logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()),
                                                                          str(final_ebeta[1].argsort()),
                                                                          str(final_ebeta[2].argsort()),
                                                                          str(final_ebeta[3].argsort())))
            logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()),
                                                                               str(final_ebeta_time[1].argsort()),
                                                                               str(final_ebeta_time[2].argsort()),
                                                                               str(final_ebeta_time[3].argsort())))
            logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0],
                                                             final_pick[1],
                                                             final_pick[2],
                                                             final_pick[3]))
            # in exhaust, the number of evaluations is max_evals - num_done
            tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))
            rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i)
            rval.exhaust()
Beispiel #4
0
import os
import subprocess
import time
import numpy as np
from importlib import import_module
from HPOlib.format_converter.tpe_to_smac import convert_tpe_to_smac_from_object


# generate .pcs from space.py
module = import_module('space')
search_space = module.space
smac_space = convert_tpe_to_smac_from_object(search_space)
smac_space_file = 'smac_2_06_01-dev/params.pcs'
fh = open(smac_space_file, 'w')
fh.write(smac_space)
fh.close()
print ('Sapce file for SMAC generated: %s' % smac_space_file)

rand_stamp = np.random.randint(10000, 99999)
call_smac = 'HPOlib-run -o ../../optimizers/smac/smac_2_06_01-dev -s %d' % rand_stamp
print 'Command:', call_smac
subprocess.call(call_smac, shell=True)