def setUp(self): numpy.random.seed(555) self.algo_a = GPAlgo(GaussWave2()) self.algo_a.n_startup_jobs = 10 self.algo_a.EI_ambition = 0.75 self.algo_b = GPAlgo(GaussWave2()) self.algo_b.n_startup_jobs = 10 self.algo_b.EI_ambition = 0.75 self.exp_a = SerialExperiment(self.algo_a) self.exp_b = SerialExperiment(self.algo_b)
def test_2var_unequal(self): algo = GPAlgo(TestGaussian2D.Bandit(1, 0)) algo.n_startup_jobs = 25 se = SerialExperiment(algo) se.run(50) l0 = algo.kernels[0].lenscale() l1 = algo.kernels[1].lenscale() #N.B. a ratio in log-length scales is a big difference! assert l1 / l0 > 3 assert min(se.losses()) < .005
def test_SerialExperiment_calls_suggest(): # I just changed the suggest api in base.Experiment. # This test verifies that MongoExperiment.run # calls it right. d = SerialExperiment(bandit_algos.Random(TwoArms())) d.run(3) assert len(d.trials) == 3 assert len(d.results) == 3
def test_fit3(self): bandit = self.bandit(self.dbn_template3()) bandit_algo = GPAlgo(bandit) bandit_algo.n_startup_jobs = 20 serial_exp = SerialExperiment(bandit_algo) for i in range(50): serial_exp.run(1) if i > bandit_algo.n_startup_jobs: d = numpy.diag(bandit_algo.GP_train_K()) #print 'max abs err', numpy.max(abs(d - 1)) assert numpy.max(abs(d - 1)) < .0001 assert 'float64' == str(d.dtype)
def test_fit_categorical(): numpy.random.seed(555) serial_exp = SerialExperiment(GPAlgo(TwoArms())) serial_exp.bandit_algo.n_startup_jobs = 7 serial_exp.run(100) arm0count = len([t for t in serial_exp.trials if t['x'] == 0]) arm1count = len([t for t in serial_exp.trials if t['x'] == 1]) print 'arm 0 count', arm0count print 'arm 1 count', arm1count # this is just a test of the gm_algo candidate proposal mechanism # since the GP doesn't apply to discrete variables. assert arm0count > 60
class TestGM_DummyDBN(unittest.TestCase): def setUp(self): self.experiment = SerialExperiment( bandit_algo=GM_BanditAlgo( bandit=Dummy_DBN_Base(), good_estimator=IndependentAdaptiveParzenEstimator(), bad_estimator=IndependentAdaptiveParzenEstimator())) self._old = theano.gof.link.raise_with_op.print_thunk_trace theano.gof.link.raise_with_op.print_thunk_trace = True def tearDown(self): theano.gof.link.raise_with_op.print_thunk_trace = self._old def test_optimize_20(self): def callback(node, thunk, storage_map, compute_map): numeric_outputs = [storage_map[v][0] for v in node.outputs if isinstance(v.type, theano.tensor.TensorType)] numeric_inputs = [storage_map[v][0] for v in node.inputs if isinstance(v.type, theano.tensor.TensorType)] if not all([numpy.all(numpy.isfinite(n)) for n in numeric_outputs]): theano.printing.debugprint(node, depth=8) print 'inputs' print numeric_inputs print 'outputs' print numeric_outputs raise ValueError('non-finite created in', node) mode = theano.Mode( optimizer='fast_compile', linker=theano.gof.vm.VM_Linker(callback=callback)) self.experiment.bandit_algo.build_helpers(mode=mode) _helper = self.experiment.bandit_algo._helper theano.printing.debugprint(_helper) for i in range(50): print 'ITER', i try: self.experiment.run(1) except: raise if 0: import matplotlib.pyplot as plt plt.subplot(1,2,1) plt.plot(self.experiment.losses()) plt.subplot(1,2,2) plt.scatter( [t['x'] for t in self.experiment.trials], range(len(self.experiment.trials))) plt.show()
def test_2var_equal(self): algo = GPAlgo(TestGaussian2D.Bandit(1, 1)) algo.n_startup_jobs = 5 se = SerialExperiment(algo) se.run(25) l0 = algo.kernels[0].lenscale() l1 = algo.kernels[1].lenscale() assert .85 < l0 / l1 < 1.15 # XXX: consider using this tighter bound # when the mean and std are estimated from the # startup jobs. #assert min(se.losses()) < .005, min(se.losses()) assert min(se.losses()) < .05, min(se.losses())
def test_fit0(self): bandit = self.bandit(self.dbn_template0()) bandit_algo = GPAlgo(bandit) bandit_algo.n_startup_jobs = 20 serial_exp = SerialExperiment(bandit_algo) for i in range(50): serial_exp.run(1) if i > bandit_algo.n_startup_jobs: #print 'LENSCALES', #print [k.lenscale() for k in bandit_algo.kernels] d = numpy.diag(bandit_algo.GP_train_K()) #print 'max abs err', numpy.max(abs(d - 1)) assert numpy.max(abs(d - 1)) < .0001 assert 'float64' == str(d.dtype)
def test_fit_quantized_lognormal(self): bandit_algo = GPAlgo(self.qln_bandit) bandit_algo.n_startup_jobs = 5 serial_exp = SerialExperiment(bandit_algo) serial_exp.run(bandit_algo.n_startup_jobs) # check that the Lognormal kernel has been # identified as refinable k = bandit_algo.kernels[0] assert bandit_algo.is_refinable[k] assert bandit_algo.bounds[k][0] > 0 serial_exp.run(25) xvec = numpy.asarray([t['x'] for t in serial_exp.trials]) if 0: show_bandit_algo(bandit_algo, serial_exp.trials, serial_exp.results, xlim_low=1, xlim_high=xvec.max() + 1, ) assert min(serial_exp.losses()) == 0, ( serial_exp.losses(), min(serial_exp.losses())) # check that all points were positive assert xvec.min() > 0 # assert that the step size was respected assert numpy.all(numpy.fmod(xvec, 1) == 0) # the lenscale is about 1.8 Is that about right? What's right? print bandit_algo.kernels[0].lenscale()
class TestGaussWave3(unittest.TestCase): """ GP_BanditAlgo has different code paths for mulsets of one choice vs mulsets with multiple choices. This tests both kinds. """ def setUp(self): class Bandit(GensonBandit): loss_target = -3 test_str = """ { "x": uniform(-20, 20), "hf": choice([ {"kind": "raw"}, {"kind": "negcos", "amp": uniform(0, 1)}]), "y": choice([0, uniform(3, 4), uniform(2, 5), uniform(1, 6), choice([uniform(5, 6), uniform(4, 6.5)])]) } """ def __init__(self): GensonBandit.__init__(self, source_string=self.test_str) def evaluate(self, config, ctrl): r = numpy.random.randn() * .1 x = config['x'] r -= 2 * numpy.exp(-(x/5.0)**2) # up to 2 if config['hf']['kind'] == 'negcos': r -= numpy.sin(x) * config['hf']['amp'] r -= config['y'] return dict(loss=r, status='ok') def loss_variance(self, result, config=None): return 0.01 self.algo = GPAlgo(Bandit()) self.algo.n_startup_jobs = 5 self.serial_exp = SerialExperiment(self.algo) def test_fit(self): for i in range(50): self.serial_exp.run(1) if i > self.algo.n_startup_jobs: print [k.lenscale() for k in self.algo.kernels] d = numpy.diag(self.algo.GP_train_K()) #print 'max abs err', numpy.max(abs(d - 1)) assert numpy.max(abs(d-1)) < .001 assert 'float64' == str(d.dtype)
def test_4var_some_irrelevant(self): return # XXX enable when compilation is faster bandit_algo = GPAlgo(TestGaussian4D.Bandit(1, 0, 0, 1)) serial_exp = SerialExperiment(bandit_algo) bandit_algo.n_startup_jobs = 10 serial_exp.run(50) l0 = bandit_algo.kernels[0].lenscale() l1 = bandit_algo.kernels[1].lenscale() l2 = bandit_algo.kernels[2].lenscale() l3 = bandit_algo.kernels[3].lenscale() l4 = bandit_algo.kernels[4].lenscale() for k in bandit_algo.kernels: print 'last kernel fit', k, k.lenscale() assert min(serial_exp.losses()) < .05 assert max(l1, l4) * 3 < min(l2, l3)
class TestGaussWave(unittest.TestCase): def setUp(self): numpy.random.seed(555) self.algo = GPAlgo(GaussWave()) self.algo.n_startup_jobs = 20 self.serial_exp = SerialExperiment(self.algo) def test_fit(self): for i in range(100): self.serial_exp.run(1) if i > self.algo.n_startup_jobs: print [k.lenscale() for k in self.algo.kernels] assert numpy.allclose( numpy.diag(self.algo.GP_train_K()), 1.0)
def test_4var_all_relevant(self): bandit_algo = GPAlgo(TestGaussian4D.Bandit(1, .5, 2, 1)) serial_exp = SerialExperiment(bandit_algo) bandit_algo.n_startup_jobs = 10 serial_exp.run(50) l0 = bandit_algo.kernels[0].lenscale() l1 = bandit_algo.kernels[1].lenscale() l2 = bandit_algo.kernels[2].lenscale() l3 = bandit_algo.kernels[3].lenscale() l4 = bandit_algo.kernels[4].lenscale() for k in bandit_algo.kernels: print 'last kernel fit', k, k.lenscale() assert min(serial_exp.losses()) < .05 gauss_scales = numpy.asarray([l1, l2, l3, l4]) assert gauss_scales.min() * 3 > gauss_scales.max()
def setUp(self): class Bandit(GensonBandit): loss_target = -3 test_str = """ { "x": uniform(-20, 20), "hf": choice([ {"kind": "raw"}, {"kind": "negcos", "amp": uniform(0, 1)}]), "y": choice([0, uniform(3, 4), uniform(2, 5), uniform(1, 6), choice([uniform(5, 6), uniform(4, 6.5)])]) } """ def __init__(self): GensonBandit.__init__(self, source_string=self.test_str) def evaluate(self, config, ctrl): r = numpy.random.randn() * .1 x = config['x'] r -= 2 * numpy.exp(-(x/5.0)**2) # up to 2 if config['hf']['kind'] == 'negcos': r -= numpy.sin(x) * config['hf']['amp'] r -= config['y'] return dict(loss=r, status='ok') def loss_variance(self, result, config=None): return 0.01 self.algo = GPAlgo(Bandit()) self.algo.n_startup_jobs = 5 self.serial_exp = SerialExperiment(self.algo)
def setUp(self): self.experiment = SerialExperiment( bandit_algo=GM_BanditAlgo( bandit=Dummy_DBN_Base(), good_estimator=IndependentAdaptiveParzenEstimator(), bad_estimator=IndependentAdaptiveParzenEstimator())) self._old = theano.gof.link.raise_with_op.print_thunk_trace theano.gof.link.raise_with_op.print_thunk_trace = True
class TestGM_Q1Lognormal(unittest.TestCase): # Tests lognormal def setUp(self): self.experiment = SerialExperiment( bandit_algo=GM_BanditAlgo( bandit=hyperopt.bandits.Q1Lognormal(), good_estimator=IndependentAdaptiveParzenEstimator(), bad_estimator=IndependentAdaptiveParzenEstimator())) def test_optimize_20(self): self.experiment.run(50) import matplotlib.pyplot as plt plt.subplot(1,2,1) plt.plot(self.experiment.losses()) plt.subplot(1,2,2) if 0: plt.hist( [t['x'] for t in self.experiment.trials], bins=20) else: plt.scatter( [t['x'] for t in self.experiment.trials], range(len(self.experiment.trials))) print self.experiment.losses() print 'MIN', min(self.experiment.losses()) assert min(self.experiment.losses()) < .01 if 0: plt.show()
def test_basic(self): self.algo.n_startup_jobs = 7 n_iter = 40 serial_exp = SerialExperiment(self.algo) serial_exp.run(self.algo.n_startup_jobs) serial_exp.run(n_iter) assert min(serial_exp.losses()) < 1e-2
def test_fit_uniform(self): bandit_algo = GPAlgo(self.bandit) bandit_algo.n_startup_jobs = 5 serial_exp = SerialExperiment(bandit_algo) k = bandit_algo.kernels[0] assert bandit_algo.is_refinable[k] assert bandit_algo.bounds[k] == (self.xlim_low, self.xlim_high) serial_exp.run(bandit_algo.n_startup_jobs) serial_exp.run(20) # a grid spacing would have used 25 points to cover 5 units of # distance # so be no more than 1/5**2 == .04. Here we test that the GP gets the # error below .005 assert min(serial_exp.losses()) < 5e-3, serial_exp.results # assert that the sampler has not exceeded the boundaries assert min([t['x'] for t in serial_exp.trials]) >= self.xlim_low assert min([t['x'] for t in serial_exp.trials]) <= self.xlim_high
def test_fit_lognormal(self): bandit_algo = GPAlgo(self.ln_bandit) bandit_algo.n_startup_jobs = 5 serial_exp = SerialExperiment(bandit_algo) serial_exp.run(bandit_algo.n_startup_jobs) # check that the Lognormal kernel has been # identified as refinable k = bandit_algo.kernels[0] assert bandit_algo.is_refinable[k] assert bandit_algo.bounds[k][0] > 0 serial_exp.run(25) assert min(serial_exp.losses()) < .005 # check that all points were positive assert min([t['x'] for t in serial_exp.trials]) > 0 # the lenscale is about 1.8 Is that about right? What's right? print bandit_algo.kernels[0].lenscale()
class TestGM_TwoArms(unittest.TestCase): # Tests one_of def setUp(self): self.experiment = SerialExperiment( bandit_algo=GM_BanditAlgo( bandit=hyperopt.bandits.TwoArms(), good_estimator=IndependentAdaptiveParzenEstimator(), bad_estimator=IndependentAdaptiveParzenEstimator())) def test_optimize_20(self): self.experiment.bandit_algo.build_helpers() HL = self.experiment.bandit_algo.helper_locals assert len(HL['Gsamples']) == 1 Gpseudocounts = HL['Gsamples'][0].vals.owner.inputs[1] Bpseudocounts = HL['Bsamples'][0].vals.owner.inputs[1] f = self.experiment.bandit_algo._helper debug = theano.function( [HL['n_to_draw'], HL['n_to_keep'], HL['y_thresh'], HL['yvals']] + HL['s_obs'].flatten(), (HL['Gobs'].flatten() + [Gpseudocounts] + [Bpseudocounts] + [HL['yvals'][where(HL['yvals'] < HL['y_thresh'])]] + [HL['yvals'][where(HL['yvals'] >= HL['y_thresh'])]] ), allow_input_downcast=True, ) debug_rval = [None] def _helper(*args): rval = f(*args) debug_rval[0] = debug(*args) return rval self.experiment.bandit_algo._helper = _helper self.experiment.run(200) gobs_idxs, gobs_vals, Gpseudo, Bpseudo, Gyvals, Byvals = debug_rval[0] print gobs_idxs print 'Gpseudo', Gpseudo print 'Bpseudo', Bpseudo import matplotlib.pyplot as plt plt.subplot(1,4,1) Xs = [t['x'] for t in self.experiment.trials] Ys = self.experiment.losses() plt.plot(Ys) plt.xlabel('time') plt.ylabel('loss') plt.subplot(1,4,2) plt.scatter(Xs,Ys ) plt.xlabel('X') plt.ylabel('loss') plt.subplot(1,4,3) plt.hist(Xs ) plt.xlabel('X') plt.ylabel('freq') plt.subplot(1,4,4) plt.hist(Gyvals, bins=20) plt.hist(Byvals, bins=20) print self.experiment.losses() print 'MIN', min(self.experiment.losses()) assert min(self.experiment.losses()) < -3.00 if 0: plt.show()
class TestGM_Distractor(unittest.TestCase): # Tests normal def setUp(self): self.experiment = SerialExperiment( bandit_algo=GM_BanditAlgo( bandit=hyperopt.bandits.Distractor(), good_estimator=IndependentAdaptiveParzenEstimator(), bad_estimator=IndependentAdaptiveParzenEstimator())) def test_op_counts(self): # If everything is done right, there should be # 2 adaptive parzen estimators in the algorithm # - one for fitting the good examples # - one for fitting the rest of the examples # 1 GMM1 Op for drawing from the fit of good examples def gmms(fn): return [ap for ap in fn.maker.env.toposort() if isinstance(ap.op, montetheano.distributions.GMM1)] def adaptive_parzens(fn): return [ap for ap in fn.maker.env.toposort() if isinstance(ap.op, idxs_vals_rnd.AdaptiveParzen)] self.experiment.bandit_algo.build_helpers(do_compile=True) HL = self.experiment.bandit_algo.helper_locals if 1: f = theano.function( [HL['n_to_draw'], HL['n_to_keep'], HL['y_thresh'], HL['yvals']] + HL['s_obs'].flatten(), HL['G_ll'], allow_input_downcast=True, ) # theano.printing.debugprint(f) assert len(gmms(f)) == 1 assert len(adaptive_parzens(f)) == 1 if 1: f = theano.function( [HL['n_to_draw'], HL['n_to_keep'], HL['y_thresh'], HL['yvals']] + HL['s_obs'].flatten(), HL['G_ll'] - HL['B_ll'], allow_input_downcast=True, ) #print gmms(f) #print adaptive_parzens(f) assert len(gmms(f)) == 1 assert len(adaptive_parzens(f)) == 2 self.experiment.bandit_algo.build_helpers(do_compile=True) _helper = self.experiment.bandit_algo._helper assert len(gmms(_helper)) == 1 assert len(adaptive_parzens(_helper)) == 2 def test_optimize_20(self): self.experiment.run(50) import matplotlib.pyplot as plt plt.subplot(1,2,1) plt.plot(self.experiment.losses()) plt.subplot(1,2,2) plt.hist( [t['x'] for t in self.experiment.trials], bins=20) print self.experiment.losses() print 'MIN', min(self.experiment.losses()) assert min(self.experiment.losses()) < -1.85 if 0: plt.show()
class TestGaussWave2(unittest.TestCase): # Tests nested search def setUp(self): self.experiment = SerialExperiment( bandit_algo=GM_BanditAlgo( bandit=hyperopt.bandits.GaussWave2(), good_estimator=IndependentAdaptiveParzenEstimator(), bad_estimator=IndependentAdaptiveParzenEstimator())) def test_op_counts_in_llik(self): self.experiment.bandit_algo.build_helpers(do_compile=True, mode='FAST_RUN') HL = self.experiment.bandit_algo.helper_locals f = theano.function( [HL['n_to_draw'], HL['n_to_keep'], HL['y_thresh'], HL['yvals']] + HL['s_obs'].flatten(), HL['log_EI'], no_default_updates=True, mode='FAST_RUN') # required for shape inference try: assert len(gmms(f)) == 0 assert len(bgmms(f)) == 2 # sampling from good assert len(categoricals(f)) == 1 # sampling from good assert len(adaptive_parzens(f)) == 4 # fitting both good and bad except: theano.printing.debugprint(f) raise def test_op_counts_in_Gsamples(self): self.experiment.bandit_algo.build_helpers(do_compile=True, mode='FAST_RUN') HL = self.experiment.bandit_algo.helper_locals f = theano.function( [HL['n_to_draw'], HL['n_to_keep'], HL['y_thresh'], HL['yvals']] + HL['s_obs'].flatten(), HL['Gsamples'].flatten(), no_default_updates=True, # allow prune priors mode='FAST_RUN') # required for shape inference try: assert len(gmms(f)) == 0 assert len(bgmms(f)) == 2 # sampling from good assert len(categoricals(f)) == 1 # sampling from good assert len(adaptive_parzens(f)) == 2 # fitting both good and bad except: theano.printing.debugprint(f) raise def test_optimize_20(self): self.experiment.run(50) import matplotlib.pyplot as plt plt.subplot(1,2,1) plt.plot(self.experiment.losses()) plt.subplot(1,2,2) plt.scatter( [t['x'] for t in self.experiment.trials], range(len(self.experiment.trials))) print self.experiment.losses() print 'MIN', min(self.experiment.losses()) assert min(self.experiment.losses()) < -1.75 if 0: plt.show() def test_fit(self): self.experiment.run(150) plt.plot( range(len(self.experiment.losses())), self.experiment.losses()) plt.figure() hyperopt.plotting.main_plot_vars(self.experiment, end_with_show=True)
class TestPickle(unittest.TestCase): def setUp(self): numpy.random.seed(555) self.algo_a = GPAlgo(GaussWave2()) self.algo_a.n_startup_jobs = 10 self.algo_a.EI_ambition = 0.75 self.algo_b = GPAlgo(GaussWave2()) self.algo_b.n_startup_jobs = 10 self.algo_b.EI_ambition = 0.75 self.exp_a = SerialExperiment(self.algo_a) self.exp_b = SerialExperiment(self.algo_b) def test_reproducible(self): self.exp_a.run(21) self.exp_b.run(21) for i, (ta, tb) in enumerate(zip( self.exp_a.trials, self.exp_b.trials)): print i, ta, tb print self.exp_a.losses() print self.exp_b.losses() # N.B. exact comparison, not approximate assert numpy.all(self.exp_a.losses() == self.exp_b.losses()) def test_reproducible_w_recompiling(self): for i in range(21): self.exp_b.run(1) if not i % 5: todel = [k for k, v in self.algo_b.__dict__.items() if isinstance(v, theano.compile.Function)] for name in todel: delattr(self.algo_b, name) self.exp_a.run(21) for i, (ta, tb) in enumerate(zip( self.exp_a.trials, self.exp_b.trials)): print i, ta, tb print self.exp_a.losses() print self.exp_b.losses() # N.B. exact comparison, not approximate assert numpy.all(self.exp_a.losses() == self.exp_b.losses()) def test_reproducible_w_pickling(self): self.exp_a.bandit_algo.trace_on = True self.exp_b.bandit_algo.trace_on = True ITERS = 12 for i in range(ITERS): print 'running experiment b', i self.exp_b.run(1) if not i % 5: # This knocks out the theano functions # (see test_reproducible_w_recompiling) # but also deep-copies the rest of the experiment ####print 'pickling' pstr = cPickle.dumps(self.exp_b) ####print 'unpickling' self.exp_b = cPickle.loads(pstr) self.exp_a.run(ITERS) trace_a = self.exp_a.bandit_algo._trace trace_b = self.exp_b.bandit_algo._trace for ta, tb in zip(trace_a, trace_b): assert ta[0] == tb[0], (ta[0], tb[0]) print 'matching', ta[0] na = numpy.asarray(ta[1]) nb = numpy.asarray(tb[1]) if not numpy.all(na == nb): print ta[0] print '' print na.shape print na print '' print nb.shape print nb print '' print (na - nb) assert 0 for i, (ta, tb) in enumerate(zip( self.exp_a.trials, self.exp_b.trials)): ###print 'trial', i ###print ' exp a', ta ###print ' exp b', tb pass print self.exp_a.losses() print self.exp_b.losses() assert numpy.allclose(self.exp_a.losses(), self.exp_b.losses())
def setUp(self): self.experiment = SerialExperiment( bandit_algo=GM_BanditAlgo( bandit=hyperopt.bandits.Distractor(), good_estimator=IndependentAdaptiveParzenEstimator(), bad_estimator=IndependentAdaptiveParzenEstimator()))
def setUp(self): numpy.random.seed(555) self.algo = GPAlgo(GaussWave2()) self.algo.n_startup_jobs = 20 self.algo.EI_ambition = 0.75 self.serial_exp = SerialExperiment(self.algo)