def work(self): bandit = self.bandit random_algo = Random(bandit) # build an experiment of 10 trials trials = Trials() exp = Experiment(trials, random_algo) #print random_algo.s_specs_idxs_vals exp.run(10) ids = trials.tids assert len(ids) == 10 tpe_algo = TreeParzenEstimator(bandit) #print pyll.as_apply(tpe_algo.post_idxs) #print pyll.as_apply(tpe_algo.post_vals) argmemo = {} print trials.miscs idxs, vals = miscs_to_idxs_vals(trials.miscs) argmemo[tpe_algo.observed['idxs']] = idxs argmemo[tpe_algo.observed['vals']] = vals argmemo[tpe_algo.observed_loss['idxs']] = trials.tids argmemo[tpe_algo.observed_loss['vals']] = trials.losses() stuff = pyll.rec_eval( [tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']], memo=argmemo) print stuff
def test_opt_qn_normal(f=hp_normal): bandit = Bandit( {'loss': -scope.sum([f('v%i' % ii, 0, 1) for ii in range(25)]) ** 2}, loss_target=0) algo = TreeParzenEstimator(bandit, prior_weight=.5, n_startup_jobs=0, n_EI_candidates=1, gamma=0.15) trials = Trials() experiment = Experiment(trials, algo, async=False) experiment.max_queue_len = 1 experiment.run(40) print list(sorted(trials.losses())) idxs, vals = miscs_to_idxs_vals(trials.miscs) if 1: import hyperopt.plotting hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1) else: import matplotlib.pyplot as plt begin = [v[:10] for k, v in vals.items()] end = [v[-10:] for k, v in vals.items()] plt.subplot(2, 1, 1) plt.title('before') plt.hist(np.asarray(begin).flatten()) plt.subplot(2, 1, 2) plt.title('after') plt.hist(np.asarray(end).flatten()) plt.show()
def test_injector(trials): # -- test is disabled because CoinFlipInjector is gone # The point of the test would be to ensure that there is no problem # submitting jobs from worker processes. CoinFlipInjector = None # XXX find old def with `git grep` bandit_algo = hyperopt.Random(CoinFlipInjector(), cmd=("bandit_json evaluate", "hyperopt.base.CoinFlipInjector")) # -- also test that injections from a particular experiment (exp_key) # are visible only within that experiment. view2 = trials.view(exp_key="fff") view3 = trials.view(exp_key="asdf") assert len(trials) == 0 exp = Experiment(view2, bandit_algo, max_queue_len=1, async=True) exp.run(1, block_until_done=True) ##even though we ran 1 trial, there are 2 results because one was injected trials.refresh() view2.refresh() view3.refresh() assert len(trials) == 2 assert len(view2) == 2 assert len(view3) == 0 exp.run(1, block_until_done=True) trials.refresh() view2.refresh() view3.refresh() assert len(trials) == 4 assert len(view2) == 4 assert len(view3) == 0 tids = [d["tid"] for d in trials] for doc in trials: if "from_tid" in doc["misc"]: assert doc["misc"]["from_tid"] in tids assert doc["exp_key"] == view2._exp_key
def work(self): trials = Trials() bandit = self.bandit tpe_algo = TreeParzenEstimator(bandit) tpe_algo.n_EI_candidates = 3 exp = Experiment(trials, tpe_algo) exp.run(10)
def work(self): bandit = self.bandit random_algo = Random(bandit) # build an experiment of 10 trials trials = Trials() exp = Experiment(trials, random_algo) #print random_algo.s_specs_idxs_vals exp.run(10) ids = trials.tids assert len(ids) == 10 tpe_algo = TreeParzenEstimator(bandit) #print pyll.as_apply(tpe_algo.post_idxs) #print pyll.as_apply(tpe_algo.post_vals) argmemo = {} print trials.miscs idxs, vals = miscs_to_idxs_vals(trials.miscs) argmemo[tpe_algo.observed['idxs']] = idxs argmemo[tpe_algo.observed['vals']] = vals argmemo[tpe_algo.observed_loss['idxs']] = trials.tids argmemo[tpe_algo.observed_loss['vals']] = trials.losses() stuff = pyll.rec_eval([tpe_algo.post_below['idxs'], tpe_algo.post_below['vals']], memo=argmemo) print stuff
def work(self): bandit = self.bandit assert bandit.name is not None print 'Bandit', bandit.name algo = TreeParzenEstimator( bandit, gamma=self.gammas.get(bandit.name, TreeParzenEstimator.gamma), prior_weight=self.prior_weights.get( bandit.name, TreeParzenEstimator.prior_weight), n_EI_candidates=self.n_EIs.get( bandit.name, TreeParzenEstimator.n_EI_candidates), ) LEN = self.LEN.get(bandit.name, 50) trials = Trials() exp = Experiment(trials, algo) exp.catch_bandit_exceptions = False exp.run(LEN) assert len(trials) == LEN if 1: rtrials = Trials() exp = Experiment(rtrials, Random(bandit)) exp.run(LEN) print 'RANDOM MINS', list(sorted(rtrials.losses()))[:6] #logx = np.log([s['x'] for s in rtrials.specs]) #print 'RND MEAN', np.mean(logx) #print 'RND STD ', np.std(logx) print algo.n_EI_candidates print algo.gamma print algo.prior_weight if 0: plt.subplot(2, 2, 1) plt.scatter(range(LEN), trials.losses()) plt.title('TPE losses') plt.subplot(2, 2, 2) plt.scatter(range(LEN), ([s['x'] for s in trials.specs])) plt.title('TPE x') plt.subplot(2, 2, 3) plt.title('RND losses') plt.scatter(range(LEN), rtrials.losses()) plt.subplot(2, 2, 4) plt.title('RND x') plt.scatter(range(LEN), ([s['x'] for s in rtrials.specs])) plt.show() if 0: plt.hist([t['x'] for t in self.experiment.trials], bins=20) #print trials.losses() print 'TPE MINS', list(sorted(trials.losses()))[:6] #logx = np.log([s['x'] for s in trials.specs]) #print 'TPE MEAN', np.mean(logx) #print 'TPE STD ', np.std(logx) thresh = self.thresholds[bandit.name] print 'Thresh', thresh assert min(trials.losses()) < thresh
def test_basic(self): bandit = self._bandit_cls() algo = Random(bandit) trials = Trials() experiment = Experiment(trials, algo, async=False) experiment.max_queue_len = 50 experiment.run(self._n_steps) print print self._bandit_cls print bandit.loss_target print trials.average_best_error(bandit) assert trials.average_best_error(bandit) - bandit.loss_target < .2 print
def test_basic(self): bandit = self._bandit_cls() print 'bandit params', bandit.params algo = Random(bandit) print 'algo params', algo.vh.params trials = Trials() experiment = Experiment(trials, algo, async=False) experiment.catch_bandit_exceptions = False experiment.max_queue_len = 50 experiment.run(self._n_steps) print print self._bandit_cls print bandit.loss_target print trials.average_best_error(bandit) assert trials.average_best_error(bandit) - bandit.loss_target < .2 print
def setUp(self): bandit = self.bandit = many_dists() algo = TreeParzenEstimator(bandit) trials = Trials() experiment = Experiment(trials, algo, async=False) experiment.max_queue_len = 1 N=200 if 0: import cProfile stats = cProfile.runctx('experiment.run(N)', globals={}, locals=locals(), filename='fooprof') import pstats p = pstats.Stats('fooprof') p.sort_stats('cumulative').print_stats(10) p.sort_stats('time').print_stats(10) else: experiment.run(N) self.trials = trials
def test_injector(trials): bandit_algo = hyperopt.Random(CoinFlipInjector(), cmd=('bandit_json evaluate','hyperopt.base.CoinFlipInjector')) # -- also test that injections from a particular experiment (exp_key) # are visible only within that experiment. view2 = trials.view(exp_key='fff') view3 = trials.view(exp_key='asdf') assert len(trials) == 0 exp = Experiment(view2, bandit_algo, max_queue_len=1, async=True) exp.run(1, block_until_done=True) ##even though we ran 1 trial, there are 2 results because one was injected trials.refresh() view2.refresh() view3.refresh() assert len(trials) == 2 assert len(view2) == 2 assert len(view3) == 0 exp.run(1, block_until_done=True) trials.refresh() view2.refresh() view3.refresh() assert len(trials) == 4 assert len(view2) == 4 assert len(view3) == 0 tids = [d['tid'] for d in trials] for doc in trials: if 'from_tid' in doc['misc']: assert doc['misc']['from_tid'] in tids assert doc['exp_key'] == view2._exp_key
def test_injector(trials): # -- test is disabled because CoinFlipInjector is gone # The point of the test would be to ensure that there is no problem # submitting jobs from worker processes. CoinFlipInjector = None # XXX find old def with `git grep` bandit_algo = hyperopt.Random(CoinFlipInjector(), cmd=('bandit_json evaluate', 'hyperopt.base.CoinFlipInjector')) # -- also test that injections from a particular experiment (exp_key) # are visible only within that experiment. view2 = trials.view(exp_key='fff') view3 = trials.view(exp_key='asdf') assert len(trials) == 0 exp = Experiment(view2, bandit_algo, max_queue_len=1, async=True) exp.run(1, block_until_done=True) ##even though we ran 1 trial, there are 2 results because one was injected trials.refresh() view2.refresh() view3.refresh() assert len(trials) == 2 assert len(view2) == 2 assert len(view3) == 0 exp.run(1, block_until_done=True) trials.refresh() view2.refresh() view3.refresh() assert len(trials) == 4 assert len(view2) == 4 assert len(view3) == 0 tids = [d['tid'] for d in trials] for doc in trials: if 'from_tid' in doc['misc']: assert doc['misc']['from_tid'] in tids assert doc['exp_key'] == view2._exp_key
def work(self): """ Run a small experiment with several workers running in parallel using Python threads. """ n_threads = self.n_threads jobs_per_thread = self.jobs_per_thread n_trials_per_exp = n_threads * jobs_per_thread n_trials_total = n_trials_per_exp * len(self.exp_keys) with TempMongo() as tm: mj = tm.mongo_jobs('foodb') def newth(ii): n_jobs = jobs_per_thread * len(self.exp_keys) return threading.Thread( target=self.worker_thread_fn, args=(('hostname', ii), n_jobs, 30.0)) threads = map(newth, range(n_threads)) [th.start() for th in threads] exp_list = [] trials_list = [] try: for key in self.exp_keys: print 'running experiment' trials = MongoTrials(tm.connection_string('foodb'), key) assert len(trials) == 0 if hasattr(self, 'prep_trials'): self.prep_trials(trials) bandit = self.bandit if self.use_stop: bandit_algo = RandomStop(n_threads * jobs_per_thread, self.bandit, cmd=self.cmd) print bandit_algo exp = Experiment(trials, bandit_algo, max_queue_len=1) exp.run(sys.maxint, block_until_done=False) else: bandit_algo = Random(self.bandit, cmd=self.cmd) exp = Experiment(trials, bandit_algo, max_queue_len=10000) exp.run(n_threads * jobs_per_thread, block_until_done=(len(self.exp_keys) == 1)) exp_list.append(exp) trials_list.append(trials) finally: print 'joining worker thread...' [th.join() for th in threads] for exp in exp_list: exp.block_until_done() for trials in trials_list: assert trials.count_by_state_synced(JOB_STATE_DONE)\ == n_trials_per_exp, (trials.count_by_state_synced(JOB_STATE_DONE), n_trials_per_exp) assert trials.count_by_state_unsynced(JOB_STATE_DONE)\ == n_trials_per_exp assert len(trials) == n_trials_per_exp, ( 'trials failure %d %d ' % (len(trials) , n_trials_per_exp)) assert len(trials.results) == n_trials_per_exp, ( 'results failure %d %d ' % (len(trials.results), n_trials_per_exp)) all_trials = MongoTrials(tm.connection_string('foodb')) assert len(all_trials) == n_trials_total
def work(self): bandit = self.bandit assert bandit.name is not None print 'Bandit', bandit.name algo = TreeParzenEstimator(bandit, gamma=self.gammas.get(bandit.name, TreeParzenEstimator.gamma), prior_weight=self.prior_weights.get(bandit.name, TreeParzenEstimator.prior_weight), n_EI_candidates=self.n_EIs.get(bandit.name, TreeParzenEstimator.n_EI_candidates), ) LEN = self.LEN.get(bandit.name, 50) trials = Trials() exp = Experiment(trials, algo) exp.catch_bandit_exceptions = False exp.run(LEN) assert len(trials) == LEN if 1: rtrials = Trials() exp = Experiment(rtrials, Random(bandit)) exp.run(LEN) print 'RANDOM MINS', list(sorted(rtrials.losses()))[:6] #logx = np.log([s['x'] for s in rtrials.specs]) #print 'RND MEAN', np.mean(logx) #print 'RND STD ', np.std(logx) print algo.n_EI_candidates print algo.gamma print algo.prior_weight if 0: plt.subplot(2, 2, 1) plt.scatter(range(LEN), trials.losses()) plt.title('TPE losses') plt.subplot(2, 2, 2) plt.scatter(range(LEN), ([s['x'] for s in trials.specs])) plt.title('TPE x') plt.subplot(2, 2, 3) plt.title('RND losses') plt.scatter(range(LEN), rtrials.losses()) plt.subplot(2, 2, 4) plt.title('RND x') plt.scatter(range(LEN), ([s['x'] for s in rtrials.specs])) plt.show() if 0: plt.hist( [t['x'] for t in self.experiment.trials], bins=20) #print trials.losses() print 'TPE MINS', list(sorted(trials.losses()))[:6] #logx = np.log([s['x'] for s in trials.specs]) #print 'TPE MEAN', np.mean(logx) #print 'TPE STD ', np.std(logx) thresh = self.thresholds[bandit.name] print 'Thresh', thresh assert min(trials.losses()) < thresh
def work(self, **kwargs): self.__dict__.update(kwargs) bandit = opt_q_uniform(self.target) prior_weight = 2.5 gamma = 0.20 algo = TreeParzenEstimator(bandit, prior_weight=prior_weight, n_startup_jobs=2, n_EI_candidates=128, gamma=gamma) print algo.opt_idxs['x'] print algo.opt_vals['x'] trials = Trials() experiment = Experiment(trials, algo) experiment.run(self.LEN) if self.show_vars: import hyperopt.plotting hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1) idxs, vals = miscs_to_idxs_vals(trials.miscs) idxs = idxs['x'] vals = vals['x'] print "VALS", vals losses = trials.losses() from hyperopt.tpe import ap_filter_trials from hyperopt.tpe import adaptive_parzen_samplers qu = scope.quniform(1.01, 10, 1) fn = adaptive_parzen_samplers['quniform'] fn_kwargs = dict(size=(4,), rng=np.random) s_below = pyll.Literal() s_above = pyll.Literal() b_args = [s_below, prior_weight] + qu.pos_args b_post = fn(*b_args, **fn_kwargs) a_args = [s_above, prior_weight] + qu.pos_args a_post = fn(*a_args, **fn_kwargs) #print b_post #print a_post fn_lpdf = getattr(scope, a_post.name + '_lpdf') print fn_lpdf # calculate the llik of b_post under both distributions a_kwargs = dict([(n, a) for n, a in a_post.named_args if n not in ('rng', 'size')]) b_kwargs = dict([(n, a) for n, a in b_post.named_args if n not in ('rng', 'size')]) below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs) above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs) new_node = scope.broadcast_best(b_post, below_llik, above_llik) print '=' * 80 do_show = self.show_steps import matplotlib.pyplot as plt for ii in range(2, 9): if ii > len(idxs): break print '-' * 80 print 'ROUND', ii print '-' * 80 all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10] below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii], losses[:ii], gamma) below = below.astype('int') above = above.astype('int') print 'BB0', below print 'BB1', above #print 'BELOW', zip(range(100), np.bincount(below, minlength=11)) #print 'ABOVE', zip(range(100), np.bincount(above, minlength=11)) memo = {b_post: all_vals, s_below: below, s_above: above} bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node], memo=memo) #print bl - al print 'BB2', dict(zip(all_vals, bl - al)) print 'BB3', dict(zip(all_vals, bl)) print 'BB4', dict(zip(all_vals, al)) print 'ORIG PICKED', vals[ii] print 'PROPER OPT PICKS:', nv #assert np.allclose(below, [3, 3, 9]) #assert len(below) + len(above) == len(vals) if do_show: plt.subplot(8, 1, ii) #plt.scatter(all_vals, # np.bincount(below, minlength=11)[2:], c='b') #plt.scatter(all_vals, # np.bincount(above, minlength=11)[2:], c='c') plt.scatter(all_vals, bl, c='g') plt.scatter(all_vals, al, c='r') if do_show: plt.show()
# ...(other options not yet implemented)... "search_strategy": "grid", # `grid_search_space` specifies values to be searched for each hyperparameter # each entry needs to follow the format {"param_name" : List(Any)} "grid_search_space": { "layer1_nodes": [16, 32, 64], "layer2_nodes": [16, 32, 64], "optimizer": ["adam", "sgd"], }, # `grid_search_settings` contain other settings for grid search strategy # `save_every_n_outputs`: how often should the trial results be saved. # The more often we save the results, the less likely we lose data in # the event of a crash, but it takes more time. # `num_samples`: how many repeated trials to run for each point in search space # [Not yet implemented] "grid_search_settings": { "save_every_n_outputs": 1, "num_samples": 1 } } # trainer must be a function that takes an hpset as input and returns (hpset, # metric, logs) as output # `hpset`: hyperparameter values, see hyperopt.Experiment._generate_hpsets # `metric`: objective for maximization, evaluated at the point specified by `hpset` # `logs`: any other useful information that should be saved exper = Experiment(trainer, config) exper.search() exper.summary()
def work(self, **kwargs): self.__dict__.update(kwargs) bandit = opt_q_uniform(self.target) prior_weight = 2.5 gamma = 0.20 algo = TreeParzenEstimator(bandit, prior_weight=prior_weight, n_startup_jobs=2, n_EI_candidates=128, gamma=gamma) print algo.opt_idxs['x'] print algo.opt_vals['x'] trials = Trials() experiment = Experiment(trials, algo) experiment.run(self.LEN) if self.show_vars: import hyperopt.plotting hyperopt.plotting.main_plot_vars(trials, bandit, do_show=1) idxs, vals = miscs_to_idxs_vals(trials.miscs) idxs = idxs['x'] vals = vals['x'] print "VALS", vals losses = trials.losses() from hyperopt.tpe import ap_filter_trials from hyperopt.tpe import adaptive_parzen_samplers qu = scope.quniform(1.01, 10, 1) fn = adaptive_parzen_samplers['quniform'] fn_kwargs = dict(size=(4, ), rng=np.random) s_below = pyll.Literal() s_above = pyll.Literal() b_args = [s_below, prior_weight] + qu.pos_args b_post = fn(*b_args, **fn_kwargs) a_args = [s_above, prior_weight] + qu.pos_args a_post = fn(*a_args, **fn_kwargs) #print b_post #print a_post fn_lpdf = getattr(scope, a_post.name + '_lpdf') print fn_lpdf # calculate the llik of b_post under both distributions a_kwargs = dict([(n, a) for n, a in a_post.named_args if n not in ('rng', 'size')]) b_kwargs = dict([(n, a) for n, a in b_post.named_args if n not in ('rng', 'size')]) below_llik = fn_lpdf(*([b_post] + b_post.pos_args), **b_kwargs) above_llik = fn_lpdf(*([b_post] + a_post.pos_args), **a_kwargs) new_node = scope.broadcast_best(b_post, below_llik, above_llik) print '=' * 80 do_show = self.show_steps import matplotlib.pyplot as plt for ii in range(2, 9): if ii > len(idxs): break print '-' * 80 print 'ROUND', ii print '-' * 80 all_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10] below, above = ap_filter_trials(idxs[:ii], vals[:ii], idxs[:ii], losses[:ii], gamma) below = below.astype('int') above = above.astype('int') print 'BB0', below print 'BB1', above #print 'BELOW', zip(range(100), np.bincount(below, minlength=11)) #print 'ABOVE', zip(range(100), np.bincount(above, minlength=11)) memo = {b_post: all_vals, s_below: below, s_above: above} bl, al, nv = pyll.rec_eval([below_llik, above_llik, new_node], memo=memo) #print bl - al print 'BB2', dict(zip(all_vals, bl - al)) print 'BB3', dict(zip(all_vals, bl)) print 'BB4', dict(zip(all_vals, al)) print 'ORIG PICKED', vals[ii] print 'PROPER OPT PICKS:', nv #assert np.allclose(below, [3, 3, 9]) #assert len(below) + len(above) == len(vals) if do_show: plt.subplot(8, 1, ii) #plt.scatter(all_vals, # np.bincount(below, minlength=11)[2:], c='b') #plt.scatter(all_vals, # np.bincount(above, minlength=11)[2:], c='c') plt.scatter(all_vals, bl, c='g') plt.scatter(all_vals, al, c='r') if do_show: plt.show()