def __iter__(self): """Search parameter distribution for unique states. As each state is defined using hp.choice, we don't explicitly know each of the unique states that our estimator can be set to. We sample the distribution of states up until max_tries times to get these unique states and return an iterable of them. if max_tries is None (set in constructor), then we sample the search space and add each sampled value. Returns: iterable of unique states. """ # check if all distributions are given as lists # in this case we want to sample without replacement rng = check_random_state(self.random_state) prev_samples = [] max_tries = self.max_tries if self.max_tries is not None else 1 for _ in range(self.n_iter): sample = stoch.sample(self.param_distributions(), rng=rng) n_tries = 0 while sample not in prev_samples or n_tries < max_tries: if sample not in prev_samples or self.max_tries is None: prev_samples.append(sample) break sample = stoch.sample(self.param_distributions(), rng=rng) n_tries += 1 return iter(prev_samples)
def add_trials(points): test_trials = Trials() for tid, row in enumerate(points): vals = {} for key in sample(space).keys(): vals[key] = [row['params'][key]] hyperopt_trial = Trials().new_trial_docs( tids=[tid], specs=[None], results=[row], miscs=[{ 'tid': tid, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'workdir': None, 'idxs': { **{key: [tid] for key in sample(space).keys()} }, 'vals': vals }]) hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE test_trials.insert_trial_docs(hyperopt_trial) test_trials.refresh() return test_trials
def test_repeatable(): u = scope.uniform(0, 1) aa = as_apply( dict(u=u, n=scope.normal(5, 0.1), l=[0, 1, scope.one_of(2, 3), u])) dd1 = sample(aa, np.random.RandomState(3)) dd2 = sample(aa, np.random.RandomState(3)) dd3 = sample(aa, np.random.RandomState(4)) assert dd1 == dd2 assert dd1 != dd3
def test_repeatable(): u = scope.uniform(0, 1) aa = as_apply(dict( u=u, n=scope.normal(5, 0.1), l=[0, 1, scope.one_of(2, 3), u])) dd1 = sample(aa, np.random.RandomState(3)) dd2 = sample(aa, np.random.RandomState(3)) dd3 = sample(aa, np.random.RandomState(4)) assert dd1 == dd2 assert dd1 != dd3
def xgb_cv(self, space): """ Function to perform XGBoost Cross-Validation with stochastic parameters for hyperparameter optimization. Parameters ---------- space: dict() The set of possible arguments to `fn` is the set of objects that could be created with non-zero probability by drawing randomly from this stochastic program involving involving hp """ # cvr cvr = xgb.cv( params=sample(self.space), dtrain=self.dtrain_, num_boost_round=self.num_boost_round, nfold=self.n_splits, stratified=self.stratified, metrics=self.metrics, early_stopping_rounds=self.early_stopping_rounds, seed=self.random_state, shuffle=self.shuffle, verbose_eval=self.verbose, ) # loss loss = cvr.iloc[-1:, 0] return {"loss": loss, "status": STATUS_OK}
def test(): from hyperopt.pyll.stochastic import sample space = get_space(overfit=True) for _ in range(10): print(fix_args(**sample(space)))
def PosteriorPlot(space_search, trials, Nsamples=1000): # create dic of experiences d = {} for k in space_search.keys(): d[k] = np.array([ (t['misc']['vals'][k][0], t['result']['loss']) for t in trials.trials]) # create dic of samples from prior samples = {} for k in d.keys(): samples[k] = [sample(space_search)[k] for x in range(100)] # plot prior and posterior # inspired by https://github.com/MBKraus/Hyperopt/blob/master/Hyperopt.ipynb # https://www.codementor.io/mikekraus/using-bayesian-optimisation-to-reduce-the-time-spent-on-hyperparameter-tuning-tgc3ikmp2 for k in d.keys(): f, ax = plt.subplots(figsize=(10,6)) sns.set_palette("husl") sns.despine() ax = sns.kdeplot(np.array(samples[k]), label = 'Prior', linewidth = 3) ax = sns.kdeplot(d[k][:, 0], label = 'Posterior (as complete path)', linewidth = 3) ax.set_ylabel('Density', fontsize=12, fontweight='bold') ax2 = ax.twinx() ax2.scatter(d[k][:, 0], d[k][:, 1], c='blue', label='Loss ind. value') ax2.set_ylabel('loss', fontsize=12, fontweight='bold', color='blue') plt.title(k, fontsize=18, fontweight='bold') plt.xlabel(k, fontsize=12, fontweight='bold') plt.legend() #plt.setp(ax.get_legend().get_texts(), fontsize='12', fontweight='bold') plt.plot()
def RandomSample(): space = { 'DROPOUT': hp.choice( 'drop', ( 0.2, 0.5)), 'DELTA': hp.choice( 'delta', ( 1e-04, 1e-06, 1e-08)), 'MOMENT': hp.choice( 'moment', (0.9, 0.99, 0.999 )) } params = sample(space) return params
def sample_space(self, space): value = stochastic.sample(space) if isinstance(value, str): value = value.strip() if " " in value: # surround with quotes so it is treated as a single entity value = '"' + value + '"' return value
def get_params(): """El objetivo de esta funcion es seleccionar aleatoriamente una configuracion determinada. :returns: Retorna una configuracion para el modelo correspondiente. """ params = sample(space) params = {k: v for k, v in params.items() if v is not "default"} return handle_integers(params)
def next(self): value = stochastic.sample(self.dist_func) if isinstance(value, str): value = value.strip() if " " in value: # surround with quotes so it is treated as a single entity value = '"' + value + '"' return value
def test_sample(): u = scope.uniform(0, 1) aa = as_apply( dict(u=u, n=scope.normal(5, 0.1), l=[0, 1, scope.one_of(2, 3), u])) print(aa) dd = sample(aa, np.random.RandomState(3)) assert 0 < dd["u"] < 1 assert 4 < dd["n"] < 6 assert dd["u"] == dd["l"][3] assert dd["l"][:2] == (0, 1) assert dd["l"][2] in (2, 3)
def get_params(): params = sample(space) new_params = {} for k, v in params.items(): if type(v) == float and int(v) == v: new_params[k] = int(v) else: new_params[k] = v return new_params
def run(self): space = {k: hp.choice(k, v) for k, v in self.choices.items()} while True: values = sample(space) yield PredictMLPv2ThresholdVariable( mode='evaluation', num_orders_per_user=values['num_orders_per_user'], product_history=values['product_history'], product_embedding=values['product_embedding'], hidden_layers=values['hidden_layers'], dropout=values['dropout'], global_orders_ratio=0.25)
def test_uniformint_arguments(arguments): """ Test whether uniformint can accept both positional and keyword arguments. Related to PR #704. """ if isinstance(arguments, list): space = hp.uniformint(*arguments) if isinstance(arguments, dict): space = hp.uniformint(**arguments) rng = np.random.default_rng(np.random.PCG64(123)) values = [sample(space, rng=rng) for _ in range(10)] assert values == [7, 1, 2, 2, 2, 8, 9, 3, 8, 9]
def test_uniformint_arguments(arguments): """ Test whether uniformint can accept both positional and keyword arguments. Related to PR #704. """ if isinstance(arguments, list): space = hp.uniformint(*arguments) if isinstance(arguments, dict): space = hp.uniformint(**arguments) rng = np.random.RandomState(123) values = [sample(space, rng=rng) for _ in range(10)] assert values == [7, 3, 2, 6, 7, 4, 10, 7, 5, 4]
def get_params(): space = { 'learning_rate': hyperopt.hp.choice('lr', [hyperopt.hp.loguniform('lr_', -5., 0.)]), 'subsample': hyperopt.hp.choice('ss', [hyperopt.hp.uniform('ss_', 0., 1.)]), 'l2_leaf_reg': hyperopt.hp.choice('l2lr', [hyperopt.hp.loguniform('l2lr_', 0., np.log(10.))]), 'random_strength': hyperopt.hp.choice('rs', [hyperopt.hp.choice('rs_', np.arange(1, 21))]), 'leaf_estimation_iterations': hyperopt.hp.choice('lei', [hyperopt.hp.choice('lei_', np.arange(1, 11))]) } params = sample(space) params = {k: v for k, v in params.items() if v is not 'default'} return handle_integers(params)
def test_sample(): u = scope.uniform(0, 1) aa = as_apply(dict( u=u, n=scope.normal(5, 0.1), l=[0, 1, scope.one_of(2, 3), u])) print(aa) dd = sample(aa, np.random.RandomState(3)) assert 0 < dd['u'] < 1 assert 4 < dd['n'] < 6 assert dd['u'] == dd['l'][3] assert dd['l'][:2] == (0, 1) assert dd['l'][2] in (2, 3)
def get_params(): params = sample(space) # handle floats which should be integers new_params = {} for k, v in params.items(): if type(v) == float and int(v) == v: new_params[k] = int(v) else: new_params[k] = v return new_params
def sample_plot(self, sample_space, n_iter, chart_scale=15): """ Documentation: --- Definition: Visualizes a single hyperopt theoretical distribution. Useful for helping to determine a distribution to use when setting up hyperopt distribution objects for actual parameter tuning. --- Parameters: sample_space : dictionary Dictionary of 'param name: hyperopt distribution object' key/value pairs. The name can be arbitrarily chosen, and the value is a defined hyperopt distribution. n_iter : int Number of iterations to draw from theoretical distribution in order to visualize the theoretical distribution. Higher number leads to more robust distribution but can take considerably longer to create. chart_scale : float, default=15 Controls proportions of visualizations. larger values scale visual up in size, smaller values scale visual down in size. """ # iterate through each parameter for param in sample_space.keys(): # sample from theoretical distribution for n_iters theoretical_dist = [] for _ in range(n_iter): theoretical_dist.append(sample(sample_space)[param]) theoretical_dist = np.array(theoretical_dist) # create prettierplot object p = PrettierPlot(chart_scale=chart_scale) # add canvas to prettierplot object ax = p.make_canvas( title="actual vs. theoretical plot\n* {}".format(param), y_shift=0.8, position=111, ) # add kernel density plot to canvas p.kde_plot( theoretical_dist, color=style.style_grey, y_units="p", x_units="fff" if np.nanmax(theoretical_dist) <= 5.0 else "ff", ax=ax, )
def sample_space(self) -> Any: """Sample from hyperparameter distributions. Parameters ---------- None Returns ------- dict Key/value pairs, key is hyperparameter name and value is a sample from the hyperparemeter's statistical distribution """ hypers: Dict[str, Any] = {} for param, dist in self.space.items(): hypers[param] = int(sample(dist)) \ if param in ['n_estimators', 'max_delta_step', 'max_depth', 'min_child_weight'] \ else sample(dist) # Add seed hypers['random_state'] = self.seed return hypers
def get_params_VAE(args): params = sample(space_VAE) params = handle_integers(params) params['train_folder'] = args.train_folder params['batchsize'] = args.batchsize params['seed'] = args.seed params['cuda'] = args.cuda params['load_model'] = args.load_model params['model_type'] = args.model_type params['time_gap'] = 1 params['num_images'] = 1 params['stat_data_file'] = args.stat_data_file return params
def run(self): space = {k: hp.choice(k, v) for k, v in self.choices.items()} while True: values = sample(space) yield PredictRNNv5ReorderSizeKnown( mode='evaluation', product_history=values['product_history'], embedding_dim=values['embedding_dim'], lstm_size=values['lstm_size'], lstm_layers=values['lstm_layers'], hidden_layers=values['hidden_layers'], hidden_nonlinearily=values['hidden_nonlinearily'], dropout=values['dropout'], global_orders_ratio=0.1)
def RandomSample(): space = { 'DROPOUT': hp.choice('drop', (0.2, 0.5)), 'DELTA': hp.choice('delta', (1e-04, 1e-06, 1e-08)), 'MOMENT': hp.choice('moment', (0.9, 0.99, 0.999)), # 'DELTA1': hp.choice( 'delta1', ( 0.0001, 0.005)), # 'MOMENT1': hp.choice( 'moment1', (0.9, 0.99, 0.999 )), # # 'MOMENT2': hp.choice( 'moment11', (0.9, 0.99, 0.999 )), # 'DELTA2': hp.choice( 'delta1', ( 0.01, 0.001,0.1)), # 'MOMENT2': hp.choice( 'moment2', (0.99,0.9)), } params = sample(space) return params
def build_hp_sample(num_trials: int) -> pd.DataFrame: from addict import Dict all_hps = [Dict(deepcopy(template)) for _ in range(num_trials)] for i, hp in enumerate(all_hps): hp.update(flatten(dict(sample(SEARCH_SPACE)))) df = pd.concat([ pd.DataFrame.from_dict({k: [v] for k, v in hp.items()}) for hp in all_hps ]) for col in df.columns: if 'float' in str(df[col].dtype): if 'distance' in col: pass else: df[col] = df[col].astype(int) return df
def dry_run( B=None, nonfusibles_kvs=None, epochs=None, iters_per_epoch=None, env_vars=None, ): params = [{ **handle_integers(sample(fusibles, rng=rng_state)), **nonfusibles_kvs } for _ in range(max(B, 1))] if B > 0: params = fuse_dicts(params) else: params = params[0] return _run(None, epochs, iters_per_epoch, params, env_vars=env_vars)
def parameter_optimzation(space): # Sample from the full space x = sample(space) x['num_leaves'] = int(x['num_leaves']) # Create the parameter optimization algorithm. tpe_algorithm = tpe.suggest # Record results trials = Trials() # Run optimization fmin(fn=boosting_cv, space=x, algo=tpe_algorithm, trials=trials, max_evals=MAX_EVALS) # Getting highest auc trials = sorted(trials.results, key=lambda x: x['loss']) return trials[:1]
def run(self): space = {k: hp.choice(k, v) for k, v in self.choices.items()} while True: values = sample(space) yield PredictRNNv3ReorderSizeKnown( mode='evaluation', max_days=values['max_days'], max_products_per_day=values['max_products_per_day'], max_prior_orders=values['max_prior_orders'], embedding_dim=values['embedding_dim'], lstm_layers=values['lstm_layers'], lstm_units=values['lstm_units'], hidden_layers=values['hidden_layers'], dropout=values['dropout'], global_orders_ratio=0.25, validation_orders_ratio=0.1, batch_size=1024, epochs=10)
def get_sampled_params_for_lm(space, index=1): sample = stoc.sample(space) sample['learning_rate'] = 10**(sample['learning_rate']) sample['embedding_size'] = int(sample['embedding_size']) sample['hidden_size'] = int(sample['hidden_size']) sample['num_layers'] = int(sample['num_layers']) print("Sweep ", index, sample) output = 'lr_%.5f_do_%.1f_nl_%d_hs_%d_es_%d.out' % ( sample['learning_rate'], sample['dropout'], sample['num_layers'], sample['hidden_size'], sample['embedding_size']) params = '-lr %.5f -do %.1f -nl %d -hs %d -es %d' % ( sample['learning_rate'], sample['dropout'], sample['num_layers'], sample['hidden_size'], sample['embedding_size']) return params, output
def run(self): rng = RandomState(self.random_seed) space = {k: hp.choice(k, v) for k, v in self.choices.items()} while True: values = sample(space, rng) yield PredictRNNv1( stage=2, imputation=values['imputation'], sample_ratio=0.1, deploy_date=date(2017, 6, 18), from_date=date(2017, 7, 1), to_date=date(2017, 8, 31), num_days_before=values['num_days_before'], lstm_size_factor=values['lstm_size_factor'], hidden_layers=values['hidden_layers'], hidden_nonlinearily=values['hidden_nonlinearily'], hidden_dropout=values['hidden_dropout'], loss=values['loss'], learning_rate=values['learning_rate'], max_grad_norm=values['max_grad_norm'])
def load_param(name, space, scope=None): """ Loads a parameter specified by its `name` from the given `scope`. When no such parameter can be found in `scope`, a random sample will be drawn from the given parameter `space`. Finally, the parameter value is decoded using `load_model(value)`. """ if scope and name in scope: # Use the given value: value = scope[name] else: # Draw a random sample from the parameter space using pyll: from hyperopt.pyll import stochastic hp_space = label_vars(space, name) value = stochastic.sample(hp_space) # Decode the value into an object tree: return load_model(value)
def run(self): from hyperopt import hp from hyperopt.pyll.stochastic import sample space = {k: hp.choice(k, v) for k, v in self.choices.items()} while True: values = sample(space) yield PredictRNNv2ReorderSizeKnown( mode='evaluation', max_days=values['max_days'], max_products_per_day=values['max_products_per_day'], product_embedding_dim=values['product_embedding_dim'], days_attention_layers=values['days_attention_layers'], days_attention_activation=values['days_attention_activation'], lstm_units=values['lstm_units'], hidden_layers=values['hidden_layers'], hidden_layers_activation=values['hidden_layers_activation'], optimizer=values['optimizer'], global_orders_ratio=0.25, validation_orders_ratio=0.1, users_per_batch=8, epochs=10)
def get_sampled_params_for_classifier(args, space, index=1, has_pretrained_encoder=False): sample = stoc.sample(space) sample['learning_rate'] = 10**(sample['learning_rate']) sample['hidden_size'] = int(sample['hidden_size']) sample['embedding_size'] = int(sample['embedding_size']) sample['num_layers'] = int(sample['num_layers']) sample['encoding_size'] = int(sample['encoding_size']) sample['encoder_num_layers'] = int(sample['encoder_num_layers']) output = 'lr_%.5f_nl_%d_hs_%d_do_%.1f' % ( sample['learning_rate'], sample['num_layers'], sample['hidden_size'], sample['dropout']) params = '-lr %.5f -nl %d -hs %d -do %.1f' % ( sample['learning_rate'], sample['num_layers'], sample['hidden_size'], sample['dropout']) embedding_size = sample[ "embedding_size"] if args.embedding_size is None else args.embedding_size if has_pretrained_encoder: sample.pop('encoder_num_layers') sample.pop('encoding_size') sample.pop('embedding_size') output += '_ed_%d_es_%d_enl_%d.out' % ( embedding_size, args.encoder_num_layers, args.encoding_size) else: output += '_ed_%d_es_%d_enl_%d.out' % (embedding_size, sample['encoding_size'], sample['encoder_num_layers']) params += ' -es %d --encoding_size %d --encoder_num_layers %d' % ( embedding_size, sample['encoding_size'], sample['encoder_num_layers']) print("Sweep ", index, sample) return params, output
def get_sample_experiment(): from hyperopt.pyll.stochastic import sample from pylearn2.config import yaml_parse from os.path import join import sys sys.path.append('..') from hyperopt_api.parser import build from yaml_parser import yaml_parser as yp from hyperopt_api.search_space import get_search_space import configuration.model as config from utils.common import get_timestamp # prepare all variables that don't need to be updated with each iteration spa = get_search_space() # define search space over possible models path = config.data_path # obtain the yaml skelton with open(config.yaml_skelton_path) as f: default_string = f.read() samp = sample(spa) # generate sample (will give a description of a model) mod = build(samp) # based on description generated build an object that will fit into yaml_paser # define weight decay parameters. They depend on the number of layers (there is one parameter fo each layer) weight_decay_coeffs = yp.parse_weight_decay(mod) # generate a filename to store the best model pkl_filename = join(config.path_for_storing, get_timestamp() + "_best.pkl") # create dictionary with hyper parameters hyper_params = {'model': yp.parse_to_yaml(mod), 'path': yp.parse_to_yaml(path), 'weight_decay_coeffs': weight_decay_coeffs, 'pkl_filename': pkl_filename} # fill the yaml skelton with hyperparameters yaml_string = default_string % hyper_params network = yaml_parse.load(yaml_string) return network
from hyperopt import hp, fmin, rand, tpe, space_eval from hyperopt.pyll.stochastic import sample def q(args): x, y = args return x ** 2 + y ** 2 if __name__ == '__main__': space = [hp.uniform('x', 0, 1), hp.normal('y', 0, 1)] for i in range(0, 100): print sample(space)
scope.PCA( n_components=1 + hp.qlognormal( 'pca_n_comp', np.log(10), np.log(10), 1), whiten=hp.choice( 'pca_whiten', [False, True])), scope.GMM( n_components=1 + hp.qlognormal( 'gmm_n_comp', np.log(100), np.log(10), 1), covariance_type=hp.choice( 'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])), ]) sklearn_space = {'pre_processing': pre_processing, 'classifier': classifier} from hyperopt.pyll.stochastic import sample print sample(sklearn_space) print sample(sklearn_space) # -- (2) DEFINE AN OBJECTIVE FUNCTION def objective(args): preprocessing = args['pre_processing'] classifier = args['classifier'] X, y = load_data() Xpp = preprocessing.transform(X) classifier.fit(Xpp, y) return { 'loss': classifier.score(Xpp, y), 'status': 'ok', 'foo': 123, # -- can save more diagnotics 'other-stuff': None,
spa = get_search_space() # define search space over possible models # define data paths path = config.data_path # obtain the yaml skelton with open(config.yaml_skelton_path) as f: default_string = f.read() # for each sample that will be generated from search space space for i in xrange(20): timestamp = get_timestamp() print t.bold_red('ITERATION:'), t.bold_red(str(i)), "started at: ", timestamp samp = sample(spa) # generate sample (will give a description of a model) print t.bold_cyan('SAMP'), samp mod = build(samp) # based on description generated build an object that will fit into yaml_parser print t.bold_blue('MODEL'), mod # define weight decay parameters. They depend on the number of layers (there is one parameter fo each layer) weight_decay_coeffs = yp.parse_weight_decay(mod) # generate a filename to store the best model pkl_filename = join(config.path_for_storing, timestamp+"best_"+str(i)+'_'+".pkl") # create dictionary with hyper parameters hyper_params = {'model': yp.parse_to_yaml(mod), 'path': yp.parse_to_yaml(path), 'weight_decay_coeffs': weight_decay_coeffs, 'pkl_filename': pkl_filename}
def main(): parser = ArgumentParser() parser.add_argument('-p', '--space', dest='spaceFile', help='Where is the space.py located?') parser.add_argument('--use_optimal_design', dest='use_optimal_design', help='Use optimal design or pure random initialization?') parser.add_argument('--init_budget', dest='init_budget', help='How many evaluations for random burning period?') parser.add_argument('--ei_budget', dest='ei_budget', help='How many evaluations for EI controlled online period?') parser.add_argument('--bopt_budget', dest='bopt_budget', help='How many evaluations for Bayesian optimization after get subspace?') parser.add_argument('--ei_xi', dest='ei_xi', help='What is the exploration parameter for computing EI?') parser.add_argument('--top_k_pipelines', dest='top_k_pipelines', help='How many top (LR predicted) pipelines to cover in subspace?') parser.add_argument('-s', '--seed', default='1', dest='seed', type=int, help='Seed for the algorithm') parser.add_argument('-a', '--algo', default='SMAC', dest='algo', type=str, help='Specify the algorithm after LR, can be SMAC or TPE') parser.add_argument('-r', '--restore', action='store_true', dest='restore', help='When this flag is set state.pkl is restored in ' + 'the current working directory') parser.add_argument('--random', default=False, action='store_true', dest='random', help='Use a random search') parser.add_argument('--cwd', help='Change the working directory before ' 'optimizing.') args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) if not os.path.exists(args.spaceFile): logger.critical('Search space not found: %s' % args.spaceFile) sys.exit(1) # First remove '.py' space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append('./') sys.path.append('') module = import_module(space) search_space = module.space ni = [len(d) for d in module.layer_dict_list] # number of units in each layer cum_ni = np.cumsum(ni) log_filename = 'lr.pkl' # Random burning period as initialization init_budget = int(args.init_budget) if args.use_optimal_design == '1': picks = get_random_picks_by_optimal_design(ni, init_budget) else: picks = get_pure_random_picks(ni, init_budget) for i in range(init_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('Random burning period times: %d, valid times: %d' % (times, valid_times)) subspace = construct_subspace(module, picks[i]) params = sample(subspace) cv.main(params) valid_times_in_random_period = get_num_of_trials(log_filename, filter_valid=True) # Train the first LR model before entering into EI controlled period fh = open(log_filename) log = cPickle.load(fh) trials = log['trials'] fh.close() X = [] y = [] y_time = [] for trial in trials: result = trial['result'] time = trial['duration'] # make sure the logged result is a number (accept evaluations return 100.0) if result <= 100: params = trial['params'] rescaling = params['-rescaling'] balancing = params['-balancing'] feat_pre = params['-feat_pre'] clf = params['-classifier'] x = [[0]*n for n in ni] x[0][module.d_rescaling[rescaling]] = 1 x[1][module.d_balancing[balancing]] = 1 x[2][module.d_feat_pre[feat_pre]] = 1 x[3][module.d_clf[clf]] = 1 x_flat = np.array(x[0]+x[1]+x[2]+x[3]) X.append(x_flat) y.append(result) y_time.append(np.log(time)) X = np.array(X) alpha = 1.0 lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) # Online period controlled by EI ei_budget = int(args.ei_budget) for i in range(ei_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('EI controlled period times: %d, valid times: %d' % (times - init_budget, valid_times - valid_times_in_random_period)) ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(ebeta[0].argsort()), str(ebeta[1].argsort()), str(ebeta[2].argsort()), str(ebeta[3].argsort()))) ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()), str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort()))) # pick the best pipeline by EI x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y, float(args.ei_xi)) pick = [[np.argmax(x_next_i)] for x_next_i in x_next] subspace = construct_subspace(module, pick) params = sample(subspace) cv.main(params) result, time = get_last_run(log_filename) if result <= 100: x_next_flat = np.array(x_next[0]+x_next[1]+x_next[2]+x_next[3]) X = np.vstack([X, x_next_flat]) y.append(result) y_time.append(np.log(time)) lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) valid_times_in_ei_period = get_num_of_trials(log_filename, filter_valid=True) - valid_times_in_random_period # Construct subspace based on LR prediction final_ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] final_ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0, int(args.top_k_pipelines)) final_subspace = construct_subspace(module, final_pick) logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str(final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # Phase 3 with SMAC if args.algo == 'SMAC': fh = file('pickup.txt', 'w') for layer_pick in final_pick: for i in layer_pick: fh.write('%d ' % i) fh.write('\n') fh.close() subspace = construct_subspace(module, final_pick) new_space = convert_tpe_to_smac_from_object(subspace) fh = open('params.pcs', 'w') fh.write(new_space) fh.close() # Phase 3 with TPE elif args.algo == 'TPE': fn = cv.main domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed)) trials = hyperopt.Trials() bopt_budget = int(args.bopt_budget) for i in range(bopt_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('TPE period times: %d, valid times: %d' % (times - init_budget - ei_budget, valid_times - valid_times_in_random_period - valid_times_in_ei_period)) logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str(final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # in exhaust, the number of evaluations is max_evals - num_done tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust()
def test_sample_deterministic(): aa = as_apply([0, 1]) print(aa) dd = sample(aa, np.random.RandomState(3)) assert dd == (0, 1)