import os import subprocess import time import numpy as np from importlib import import_module from HPOlib.format_converter.tpe_to_smac import convert_tpe_to_smac_from_object # generate .pcs from space.py module = import_module('space') search_space = module.space smac_space = convert_tpe_to_smac_from_object(search_space) smac_space_file = 'smac_2_06_01-dev/params.pcs' fh = open(smac_space_file, 'w') fh.write(smac_space) fh.close() print('Sapce file for SMAC generated: %s' % smac_space_file) rand_stamp = np.random.randint(10000, 99999) call_smac = 'HPOlib-run -o ../../optimizers/smac/smac_2_06_01-dev -s %d' % rand_stamp print 'Command:', call_smac subprocess.call(call_smac, shell=True)
def main(): parser = ArgumentParser() parser.add_argument('-p', '--space', dest='spaceFile', help='Where is the space.py located?') parser.add_argument( '--use_optimal_design', dest='use_optimal_design', help='Use optimal design or pure random initialization?') parser.add_argument('--init_budget', dest='init_budget', help='How many evaluations for random burning period?') parser.add_argument( '--ei_budget', dest='ei_budget', help='How many evaluations for EI controlled online period?') parser.add_argument( '--bopt_budget', dest='bopt_budget', help= 'How many evaluations for Bayesian optimization after get subspace?') parser.add_argument( '--ei_xi', dest='ei_xi', help='What is the exploration parameter for computing EI?') parser.add_argument( '--top_k_pipelines', dest='top_k_pipelines', help='How many top (LR predicted) pipelines to cover in subspace?') parser.add_argument('-s', '--seed', default='1', dest='seed', type=int, help='Seed for the algorithm') parser.add_argument( '-a', '--algo', default='SMAC', dest='algo', type=str, help='Specify the algorithm after LR, can be SMAC or TPE') parser.add_argument( '-r', '--restore', action='store_true', dest='restore', help='When this flag is set state.pkl is restored in ' + 'the current working directory') parser.add_argument('--random', default=False, action='store_true', dest='random', help='Use a random search') parser.add_argument('--cwd', help='Change the working directory before ' 'optimizing.') args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) if not os.path.exists(args.spaceFile): logger.critical('Search space not found: %s' % args.spaceFile) sys.exit(1) # First remove '.py' space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append('./') sys.path.append('') module = import_module(space) search_space = module.space ni = [len(d) for d in module.layer_dict_list] # number of units in each layer cum_ni = np.cumsum(ni) log_filename = 'lr.pkl' # Random burning period as initialization init_budget = int(args.init_budget) if args.use_optimal_design == '1': picks = get_random_picks_by_optimal_design(ni, init_budget) else: picks = get_pure_random_picks(ni, init_budget) for i in range(init_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('Random burning period times: %d, valid times: %d' % (times, valid_times)) subspace = construct_subspace(module, picks[i]) params = sample(subspace) cv.main(params) valid_times_in_random_period = get_num_of_trials(log_filename, filter_valid=True) # Train the first LR model before entering into EI controlled period fh = open(log_filename) log = cPickle.load(fh) trials = log['trials'] fh.close() X = [] y = [] y_time = [] for trial in trials: result = trial['result'] time = trial['duration'] # make sure the logged result is a number (accept evaluations return 100.0) if result <= 100: params = trial['params'] rescaling = params['-rescaling'] balancing = params['-balancing'] feat_pre = params['-feat_pre'] clf = params['-classifier'] x = [[0] * n for n in ni] x[0][module.d_rescaling[rescaling]] = 1 x[1][module.d_balancing[balancing]] = 1 x[2][module.d_feat_pre[feat_pre]] = 1 x[3][module.d_clf[clf]] = 1 x_flat = np.array(x[0] + x[1] + x[2] + x[3]) X.append(x_flat) y.append(result) y_time.append(np.log(time)) X = np.array(X) alpha = 1.0 lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) # Online period controlled by EI ei_budget = int(args.ei_budget) for i in range(ei_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info( 'EI controlled period times: %d, valid times: %d' % (times - init_budget, valid_times - valid_times_in_random_period)) ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(ebeta[0].argsort()), str(ebeta[1].argsort()), str(ebeta[2].argsort()), str(ebeta[3].argsort()))) ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] logger.info( 'LR Time model estimated unit ranking: %s %s %s %s' % (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()), str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort()))) # pick the best pipeline by EI x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y, float(args.ei_xi)) pick = [[np.argmax(x_next_i)] for x_next_i in x_next] subspace = construct_subspace(module, pick) params = sample(subspace) cv.main(params) result, time = get_last_run(log_filename) if result <= 100: x_next_flat = np.array(x_next[0] + x_next[1] + x_next[2] + x_next[3]) X = np.vstack([X, x_next_flat]) y.append(result) y_time.append(np.log(time)) lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) valid_times_in_ei_period = get_num_of_trials( log_filename, filter_valid=True) - valid_times_in_random_period # Construct subspace based on LR prediction final_ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] final_ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0, int(args.top_k_pipelines)) final_subspace = construct_subspace(module, final_pick) logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info( 'LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str( final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # Phase 3 with SMAC if args.algo == 'SMAC': fh = file('pickup.txt', 'w') for layer_pick in final_pick: for i in layer_pick: fh.write('%d ' % i) fh.write('\n') fh.close() subspace = construct_subspace(module, final_pick) new_space = convert_tpe_to_smac_from_object(subspace) fh = open('params.pcs', 'w') fh.write(new_space) fh.close() # Phase 3 with TPE elif args.algo == 'TPE': fn = cv.main domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed)) trials = hyperopt.Trials() bopt_budget = int(args.bopt_budget) for i in range(bopt_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info( 'TPE period times: %d, valid times: %d' % (times - init_budget - ei_budget, valid_times - valid_times_in_random_period - valid_times_in_ei_period)) logger.info( 'LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str(final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info( 'Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # in exhaust, the number of evaluations is max_evals - num_done tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust()
def main(): parser = ArgumentParser() parser.add_argument('-p', '--space', dest='spaceFile', help='Where is the space.py located?') parser.add_argument('--use_optimal_design', dest='use_optimal_design', help='Use optimal design or pure random initialization?') parser.add_argument('--init_budget', dest='init_budget', help='How many evaluations for random burning period?') parser.add_argument('--ei_budget', dest='ei_budget', help='How many evaluations for EI controlled online period?') parser.add_argument('--bopt_budget', dest='bopt_budget', help='How many evaluations for Bayesian optimization after get subspace?') parser.add_argument('--ei_xi', dest='ei_xi', help='What is the exploration parameter for computing EI?') parser.add_argument('--top_k_pipelines', dest='top_k_pipelines', help='How many top (LR predicted) pipelines to cover in subspace?') parser.add_argument('-s', '--seed', default='1', dest='seed', type=int, help='Seed for the algorithm') parser.add_argument('-a', '--algo', default='SMAC', dest='algo', type=str, help='Specify the algorithm after LR, can be SMAC or TPE') parser.add_argument('-r', '--restore', action='store_true', dest='restore', help='When this flag is set state.pkl is restored in ' + 'the current working directory') parser.add_argument('--random', default=False, action='store_true', dest='random', help='Use a random search') parser.add_argument('--cwd', help='Change the working directory before ' 'optimizing.') args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) if not os.path.exists(args.spaceFile): logger.critical('Search space not found: %s' % args.spaceFile) sys.exit(1) # First remove '.py' space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append('./') sys.path.append('') module = import_module(space) search_space = module.space ni = [len(d) for d in module.layer_dict_list] # number of units in each layer cum_ni = np.cumsum(ni) log_filename = 'lr.pkl' # Random burning period as initialization init_budget = int(args.init_budget) if args.use_optimal_design == '1': picks = get_random_picks_by_optimal_design(ni, init_budget) else: picks = get_pure_random_picks(ni, init_budget) for i in range(init_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('Random burning period times: %d, valid times: %d' % (times, valid_times)) subspace = construct_subspace(module, picks[i]) params = sample(subspace) cv.main(params) valid_times_in_random_period = get_num_of_trials(log_filename, filter_valid=True) # Train the first LR model before entering into EI controlled period fh = open(log_filename) log = cPickle.load(fh) trials = log['trials'] fh.close() X = [] y = [] y_time = [] for trial in trials: result = trial['result'] time = trial['duration'] # make sure the logged result is a number (accept evaluations return 100.0) if result <= 100: params = trial['params'] rescaling = params['-rescaling'] balancing = params['-balancing'] feat_pre = params['-feat_pre'] clf = params['-classifier'] x = [[0]*n for n in ni] x[0][module.d_rescaling[rescaling]] = 1 x[1][module.d_balancing[balancing]] = 1 x[2][module.d_feat_pre[feat_pre]] = 1 x[3][module.d_clf[clf]] = 1 x_flat = np.array(x[0]+x[1]+x[2]+x[3]) X.append(x_flat) y.append(result) y_time.append(np.log(time)) X = np.array(X) alpha = 1.0 lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) # Online period controlled by EI ei_budget = int(args.ei_budget) for i in range(ei_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('EI controlled period times: %d, valid times: %d' % (times - init_budget, valid_times - valid_times_in_random_period)) ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(ebeta[0].argsort()), str(ebeta[1].argsort()), str(ebeta[2].argsort()), str(ebeta[3].argsort()))) ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()), str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort()))) # pick the best pipeline by EI x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y, float(args.ei_xi)) pick = [[np.argmax(x_next_i)] for x_next_i in x_next] subspace = construct_subspace(module, pick) params = sample(subspace) cv.main(params) result, time = get_last_run(log_filename) if result <= 100: x_next_flat = np.array(x_next[0]+x_next[1]+x_next[2]+x_next[3]) X = np.vstack([X, x_next_flat]) y.append(result) y_time.append(np.log(time)) lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) valid_times_in_ei_period = get_num_of_trials(log_filename, filter_valid=True) - valid_times_in_random_period # Construct subspace based on LR prediction final_ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] final_ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0, int(args.top_k_pipelines)) final_subspace = construct_subspace(module, final_pick) logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str(final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # Phase 3 with SMAC if args.algo == 'SMAC': fh = file('pickup.txt', 'w') for layer_pick in final_pick: for i in layer_pick: fh.write('%d ' % i) fh.write('\n') fh.close() subspace = construct_subspace(module, final_pick) new_space = convert_tpe_to_smac_from_object(subspace) fh = open('params.pcs', 'w') fh.write(new_space) fh.close() # Phase 3 with TPE elif args.algo == 'TPE': fn = cv.main domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed)) trials = hyperopt.Trials() bopt_budget = int(args.bopt_budget) for i in range(bopt_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('TPE period times: %d, valid times: %d' % (times - init_budget - ei_budget, valid_times - valid_times_in_random_period - valid_times_in_ei_period)) logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str(final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # in exhaust, the number of evaluations is max_evals - num_done tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust()
import os import subprocess import time import numpy as np from importlib import import_module from HPOlib.format_converter.tpe_to_smac import convert_tpe_to_smac_from_object # generate .pcs from space.py module = import_module('space') search_space = module.space smac_space = convert_tpe_to_smac_from_object(search_space) smac_space_file = 'smac_2_06_01-dev/params.pcs' fh = open(smac_space_file, 'w') fh.write(smac_space) fh.close() print ('Sapce file for SMAC generated: %s' % smac_space_file) rand_stamp = np.random.randint(10000, 99999) call_smac = 'HPOlib-run -o ../../optimizers/smac/smac_2_06_01-dev -s %d' % rand_stamp print 'Command:', call_smac subprocess.call(call_smac, shell=True)