def testShekelGPPrior(self): # see how the GP works on the Shekel function S5 = Shekel5() pX = lhcSample(S5.bounds, 100, seed=8) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, S5.bounds, k=10, seed=103) X = lhcSample(S5.bounds, 10, seed=9) Y = [S5.f(x) for x in X] hv = .1 hyper = [hv, hv, hv, hv] gkernel = GaussianKernel_ard(hyper) priorGP = GaussianProcess(gkernel, X, Y, prior=prior) nopriorGP = GaussianProcess(gkernel, X, Y) S = lhcSample(S5.bounds, 1000, seed=10) nopriorErr = mean([(S5.f(x)-nopriorGP.mu(x))**2 for x in S]) priorErr = mean([(S5.f(x)-priorGP.mu(x))**2 for x in S]) # print '\nno prior Err =', nopriorErr # print 'prior Err =', priorErr self.failUnless(priorErr < nopriorErr*.8)
def testGPPrior(self): # see how GP works with the dataprior... def foo(x): return sum(sin(x * 20)) bounds = [[0., 1.]] # train prior pX = lhcSample([[0., 1.]], 100, seed=6) pY = [foo(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, bounds, k=10, seed=102) X = lhcSample([[0., 1.]], 2, seed=7) Y = [foo(x) for x in X] kernel = GaussianKernel_ard(array([.1])) GP = GaussianProcess(kernel, X, Y, prior=prior) GPnoprior = GaussianProcess(kernel, X, Y) S = arange(0, 1, .01) nopriorErr = mean([(foo(x) - GPnoprior.mu(x))**2 for x in S]) priorErr = mean([(foo(x) - GP.mu(x))**2 for x in S]) self.failUnless(priorErr < nopriorErr * .5) if False: figure(1) clf() plot(S, [prior.mu(x) for x in S], 'g-', alpha=0.3) plot(S, [GPnoprior.mu(x) for x in S], 'b-', alpha=0.3) plot(S, [GP.mu(x) for x in S], 'k-', lw=2) plot(X, Y, 'ko') show()
def _testKernelMaxEI(self): # test different methods of optimizing kernel S5 = Shekel5() hv = 0.1 testkernels = [GaussianKernel_iso([hv]), GaussianKernel_ard([hv, hv, hv, hv]), MaternKernel3([hv, 1.0])] # MaternKernel5([hv, 1.0])] for kernel in testkernels: # print # print kernel.__class__ # train GPs X = lhcSample(S5.bounds, 10, seed=0) Y = [S5.f(x) for x in X] GP = GaussianProcess(kernel, X, Y) eif = EI(GP) dopt, doptx = direct(eif.negf, S5.bounds, maxiter=10) copt, coptx = cdirect(eif.negf, S5.bounds, maxiter=10) mopt, moptx = maximizeEI(GP, S5.bounds, maxiter=10) # print dopt, doptx # print copt, coptx # print mopt, moptx self.failUnlessAlmostEqual(dopt, copt, 4) self.failUnlessAlmostEqual(-dopt, mopt, 4) self.failUnlessAlmostEqual(-copt, mopt, 4) self.failUnless(sum(abs(doptx-coptx)) < .01) self.failUnless(sum(abs(moptx-coptx)) < .01) self.failUnless(sum(abs(moptx-doptx)) < .01) # train GP w/prior pX = lhcSample(S5.bounds, 100, seed=101) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, bounds=S5.bounds, k=10, seed=102) GP = GaussianProcess(kernel, X, Y, prior=prior) eif = EI(GP) pdopt, pdoptx = direct(eif.negf, S5.bounds, maxiter=10) pcopt, pcoptx = cdirect(eif.negf, S5.bounds, maxiter=10) pmopt, pmoptx = maximizeEI(GP, S5.bounds, maxiter=10) self.failIfAlmostEqual(pdopt, dopt, 3) self.failUnlessAlmostEqual(pdopt, pcopt, 4) self.failUnlessAlmostEqual(-pdopt, pmopt, 4) self.failUnlessAlmostEqual(-pcopt, pmopt, 4) self.failUnless(sum(abs(pdoptx-pcoptx)) < .01) self.failUnless(sum(abs(pmoptx-pcoptx)) < .01) self.failUnless(sum(abs(pmoptx-pdoptx)) < .01)
def testFunctionValues(self): for TestFunction in TestFunctions: tf = TestFunction(maximize=False) for i in range(100): x = lhcSample(tf.bounds, 1, seed=i)[0] self.failIf(tf.f(x) < tf.minimum) for TestFunction in TestFunctions: tf = TestFunction(maximize=True) for i in range(100): x = lhcSample(tf.bounds, 1, seed=i)[0] self.failIf(tf.f(x) > -tf.minimum)
def testFunctionValues(self): for TestFunction in TestFunctions: tf = TestFunction(maximize=False) for i in xrange(100): x = lhcSample(tf.bounds, 1, seed=i)[0] self.failIf(tf.f(x) < tf.minimum) for TestFunction in TestFunctions: tf = TestFunction(maximize=True) for i in xrange(100): x = lhcSample(tf.bounds, 1, seed=i)[0] self.failIf(tf.f(x) > -tf.minimum)
def test2DpyEI(self): f = lambda x: sum(sin(x)) bounds = [[0., 5.], [0., 5.]] X = lhcSample(bounds, 5, seed=24) Y = [f(x) for x in X] kernel = GaussianKernel_ard(array([1.0, 1.0])) GP = GaussianProcess(kernel, X, Y) maxei = maximizeEI(GP, bounds) if False: figure(1) c0 = [(i/50.)*(bounds[0][1]-bounds[0][0])+bounds[0][0] for i in xrange(51)] c1 = [(i/50.)*(bounds[1][1]-bounds[1][0])+bounds[1][0] for i in xrange(51)] z = array([[GP.ei(array([i, j])) for i in c0] for j in c1]) ax = plt.subplot(111) cs = ax.contour(c0, c1, z, 10, alpha=0.5, cmap=cm.Blues_r) plot([x[0] for x in X], [x[1] for x in X], 'ro') for i in xrange(len(X)): annotate('%2f'%Y[i], X[i]) plot(maxei[1][0], maxei[1][1], 'ko') show()
def testGDelta(self): # usually, Gdelta==G GP = GaussianProcess(GaussianKernel_iso([0.05])) X = lhcSample([[0., 1.]], 5, seed=10) Y = [x**2 for x in X] GP.train(X, Y) G = (Y[0] - max(Y)) / (Y[0] - 1) self.failUnlessEqual(G, Gdelta(GP, [[0., 1.]], Y[0], 1.0, 0.01)) # sometimes, though, Gdelta > G -- this GP has a very high confidence # prediction of a very good point at x ~ .65 GP = GaussianProcess(GaussianKernel_iso([0.1])) X = array([[.5], [.51], [.59], [.6]]) Y = array([1., 2., 2., 1.]) GP.train(X, Y) # figure(1) # A = arange(0, 1, 0.01) # post = [GP.posterior(x) for x in A] # plot(A, [p[0] for p in post], 'k-') # plot(A, [p[0]+p[1] for p in post], 'k:') # show() G = (Y[0] - max(Y)) / (Y[0] - 4.0) Gd = Gdelta(GP, [[0., 1.]], Y[0], 4.0, 0.01) self.failUnless(G < Gd) # however, if there is more variance, we will collapse back to G GP = GaussianProcess(GaussianKernel_iso([.001])) GP.train(X, Y) G = (Y[0] - max(Y)) / (Y[0] - 4.0) self.failUnlessEqual(G, Gdelta(GP, [[0., 1.]], Y[0], 4.0, 0.01))
def testFastGallery(self): tf = Hartman3() kernel = tf.createKernel(GaussianKernel_ard) X = lhcSample(tf.bounds, 10, seed=23) Y = [tf.f(x) for x in X] prefs = query2prefs(X, tf.f) GP = PrefGaussianProcess(kernel) GP.addPreferences(prefs) gallery = fastUCBGallery(GP, tf.bounds, 4) print('gallery returned:') for x in gallery: print('\t', x) GP.addPreferences(query2prefs(gallery, tf.f)) # make sure we don't return anything out of bounds bounds = copy(tf.bounds) bounds[0] = [0., 0.] gallery = fastUCBGallery(GP, bounds, 4) print('gallery returned:') for x in gallery: print('\t', x) for v, b in zip(x, bounds): self.failUnless(v >= b[0] and v <= b[1])
def testFastGallery(self): tf = Hartman3() kernel = tf.createKernel(GaussianKernel_ard) X = lhcSample(tf.bounds, 10, seed=23) Y = [tf.f(x) for x in X] prefs = query2prefs(X, tf.f) GP = PrefGaussianProcess(kernel) GP.addPreferences(prefs) gallery = fastUCBGallery(GP, tf.bounds, 4) print 'gallery returned:' for x in gallery: print '\t', x GP.addPreferences(query2prefs(gallery, tf.f)) # make sure we don't return anything out of bounds bounds = copy(tf.bounds) bounds[0] = [0., 0.] gallery = fastUCBGallery(GP, bounds, 4) print 'gallery returned:' for x in gallery: print '\t', x for v, b in zip(x, bounds): self.failUnless(v>=b[0] and v<=b[1])
def testPriorAndPrefs(self): S5 = Shekel5() pX = lhcSample(S5.bounds, 100, seed=13) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, S5.bounds, k=10) hv = .1 hyper = [hv, hv, hv, hv] gkernel = GaussianKernel_ard(hyper) GP = PrefGaussianProcess(gkernel, prior=prior) X = [array([i + .5] * 4) for i in range(5)] valX = [x.copy() for x in X] prefs = [] for i in range(len(X)): for j in range(i): if S5.f(X[i]) > S5.f(X[j]): prefs.append((X[i], X[j], 0)) else: prefs.append((X[j], X[i], 0)) GP.addPreferences(prefs) opt, optx = maximizeEI(GP, S5.bounds)
def testRBFN_1D(self): # sample from a synthetic function and see how much we improve the # error by using the prior function def foo(x): return sum(sin(x*20)) X = lhcSample([[0., 1.]], 50, seed=3) Y = [foo(x) for x in X] prior = RBFNMeanPrior() prior.train(X, Y, [[0., 1.]], k=10, seed=100) # See how well we fit the function by getting the average squared error # over 100 samples of the function. Baseline foo(x)=0 MSE is 0.48. # We will aim for MSE < 0.05. S = arange(0, 1, .01) error = mean([foo(x)-prior.mu(x) for x in S]) self.failUnless(error < 0.05) # for debugging if False: figure(1) plot(S, [foo(x) for x in S], 'b-') plot(S, [prior.mu(x) for x in S], 'k-') show()
def testGDelta(self): # usually, Gdelta==G GP = GaussianProcess(GaussianKernel_iso([0.05])) X = lhcSample([[0., 1.]], 5, seed=10) Y = [x**2 for x in X] GP.train(X, Y) G = (Y[0]-max(Y)) / (Y[0]-1) self.failUnlessEqual(G, Gdelta(GP, [[0.,1.]], Y[0], 1.0, 0.01)) # sometimes, though, Gdelta > G -- this GP has a very high confidence # prediction of a very good point at x ~ .65 GP = GaussianProcess(GaussianKernel_iso([0.1])) X = array([[.5], [.51], [.59], [.6]]) Y = array([1., 2., 2., 1.]) GP.train(X, Y) # figure(1) # A = arange(0, 1, 0.01) # post = [GP.posterior(x) for x in A] # plot(A, [p[0] for p in post], 'k-') # plot(A, [p[0]+p[1] for p in post], 'k:') # show() G = (Y[0]-max(Y)) / (Y[0]-4.0) Gd = Gdelta(GP, [[0., 1.]], Y[0], 4.0, 0.01) self.failUnless(G < Gd) # however, if there is more variance, we will collapse back to G GP = GaussianProcess(GaussianKernel_iso([.001])) GP.train(X, Y) G = (Y[0]-max(Y)) / (Y[0]-4.0) self.failUnlessEqual(G, Gdelta(GP, [[0., 1.]], Y[0], 4.0, 0.01))
def testRBFN_1D(self): # sample from a synthetic function and see how much we improve the # error by using the prior function def foo(x): return sum(sin(x * 20)) X = lhcSample([[0., 1.]], 50, seed=3) Y = [foo(x) for x in X] prior = RBFNMeanPrior() prior.train(X, Y, [[0., 1.]], k=10, seed=100) # See how well we fit the function by getting the average squared error # over 100 samples of the function. Baseline foo(x)=0 MSE is 0.48. # We will aim for MSE < 0.05. S = arange(0, 1, .01) error = mean([foo(x) - prior.mu(x) for x in S]) self.failUnless(error < 0.05) # for debugging if False: figure(1) plot(S, [foo(x) for x in S], 'b-') plot(S, [prior.mu(x) for x in S], 'k-') show()
def testPriorAndPrefs(self): S5 = Shekel5() pX = lhcSample(S5.bounds, 100, seed=13) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, S5.bounds, k=10) hv = .1 hyper = [hv, hv, hv, hv] gkernel = GaussianKernel_ard(hyper) GP = PrefGaussianProcess(gkernel, prior=prior) X = [array([i+.5]*4) for i in xrange(5)] valX = [x.copy() for x in X] prefs = [] for i in xrange(len(X)): for j in xrange(i): if S5.f(X[i]) > S5.f(X[j]): prefs.append((X[i], X[j], 0)) else: prefs.append((X[j], X[i], 0)) GP.addPreferences(prefs) opt, optx = maximizeEI(GP, S5.bounds)
def test2DpyEI(self): f = lambda x: sum(sin(x)) bounds = [[0., 5.], [0., 5.]] X = lhcSample(bounds, 5, seed=24) Y = [f(x) for x in X] kernel = GaussianKernel_ard(array([1.0, 1.0])) GP = GaussianProcess(kernel, X, Y) maxei = maximizeEI(GP, bounds) if False: figure(1) c0 = [(i / 50.) * (bounds[0][1] - bounds[0][0]) + bounds[0][0] for i in range(51)] c1 = [(i / 50.) * (bounds[1][1] - bounds[1][0]) + bounds[1][0] for i in range(51)] z = array([[GP.ei(array([i, j])) for i in c0] for j in c1]) ax = plt.subplot(111) cs = ax.contour(c0, c1, z, 10, alpha=0.5, cmap=cm.Blues_r) plot([x[0] for x in X], [x[1] for x in X], 'ro') for i in range(len(X)): annotate('%2f' % Y[i], X[i]) plot(maxei[1][0], maxei[1][1], 'ko') show()
def testMaxEIPrior(self): # make sure that the prior works with the different methods of EI # maximization S5 = Shekel5() pX = lhcSample(S5.bounds, 100, seed=511) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, bounds=S5.bounds, k=10, seed=504) hv = .1 hyper = [hv, hv, hv, hv] kernel = GaussianKernel_ard(hyper) # train GPs X = lhcSample(S5.bounds, 10, seed=512) Y = [S5.f(x) for x in X] # validation valX = list(x.copy() for x in X) valY = copy(Y) GP = GaussianProcess(kernel, X, Y, prior=prior) eif = EI(GP) copt, _ = cdirect(eif.negf, S5.bounds, maxiter=20) mopt, _ = maximizeEI(GP, S5.bounds, maxiter=20) self.failUnlessAlmostEqual(-copt, mopt, 2) for i in xrange(len(GP.X)): self.failUnless(all(valX[i]==GP.X[i])) self.failUnless(valY[i]==GP.Y[i]) GP.prior.mu(GP.X[0]) self.failUnless(all(valX[0]==GP.X[0])) # print GP.X for i in xrange(len(GP.X)): self.failUnless(all(valX[i]==GP.X[i])) self.failUnless(valY[i]==GP.Y[i]) GP.prior.mu(GP.X[0]) self.failUnless(all(valX[0]==GP.X[0]))
def learnHyper(tf, Kernel): """ for a given kernel and test functions, learn some hyperparameters """ D = len(tf.bounds) X = lhcSample(tf.bounds, D*40) Y = array([tf.f(x) for x in X]) loghyper = fmin_bfgs(nlml, log(ones(1)*.5), dnlml, args=[Kernel, X, Y]) return exp(loghyper)
def testRNFN_10D(self): # as above, but with a 10D test function and more data def foo(x): return sum(sin(x * 2)) bounds = [[0., 1.]] * 10 X = lhcSample(bounds, 100, seed=4) Y = [foo(x) for x in X] prior = RBFNMeanPrior() prior.train(X, Y, bounds, k=20, seed=5) S = lhcSample(bounds, 100, seed=6) RBNError = mean([(foo(x) - prior.mu(x))**2 for x in S]) baselineError = mean([foo(x)**2 for x in S]) self.failUnless(RBNError < baselineError)
def testLatinHypercubeSampling(self): # test 1D case samples = lhcSample([[0., 1.]], 100, seed=20) self.assertEqual(len(samples), 100) self.assert_(min(samples) >= 0.0) self.assert_(max(samples) <= 1.0) isamp = [int(s*100) for s in samples] self.assertNotEqual(isamp, range(100)) isamp.sort() self.assertEqual(isamp, range(100)) # test 100D case samples = lhcSample([(i*10., (i+1.)*10.) for i in xrange(100)], 10, seed=21) for s in samples: self.assertEqual(len(s), 100) for i, x in enumerate(s): self.assert_(x > i*10.) self.assert_(x < (i+1)*10.)
def testRNFN_10D(self): # as above, but with a 10D test function and more data def foo(x): return sum(sin(x*2)) bounds = [[0., 1.]]*10 X = lhcSample(bounds, 100, seed=4) Y = [foo(x) for x in X] prior = RBFNMeanPrior() prior.train(X, Y, bounds, k=20, seed=5) S = lhcSample(bounds, 100, seed=6) RBNError = mean([(foo(x)-prior.mu(x))**2 for x in S]) baselineError = mean([foo(x)**2 for x in S]) # print '\nRBN err =', RBNError # print 'baseline =', baselineError self.failUnless(RBNError < baselineError)
def checkMinimum(testfuncs): """ try minimizing the function and see if we find a better minimum than we currently have """ for tf in testfuncs: argmin = fmin_bfgs(tf.f, tf.argmin) print '[%s] was told argmin = %s, min = %.2f' % (tf.name, tf.argmin, tf.minimum) print '[%s] check argmin = %s, min = %.2f' % (tf.name, tf.argmin, tf.f(tf.argmin)) print '[%s] found argmin = %s, min = %.2f' % (tf.name, argmin, tf.f(argmin)) for x in lhcSample(tf.bounds, 100): if tf.f(x) < tf.minimum: print 'sample x = %s, y = %.4f is lower than minimum %.4f' % (x, tf.f(x), tf.minimum)
def testGPPrior(self): # see how GP works with the dataprior... def foo(x): return sum(sin(x*20)) bounds = [[0., 1.]] # train prior pX = lhcSample([[0., 1.]], 100, seed=6) pY = [foo(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, bounds, k=10, seed=102) X = lhcSample([[0., 1.]], 2, seed=7) Y = [foo(x) for x in X] kernel = GaussianKernel_ard(array([.1])) GP = GaussianProcess(kernel, X, Y, prior=prior) GPnoprior = GaussianProcess(kernel, X, Y) S = arange(0, 1, .01) nopriorErr = mean([(foo(x)-GPnoprior.mu(x))**2 for x in S]) priorErr = mean([(foo(x)-GP.mu(x))**2 for x in S]) # print '\nno prior Err =', nopriorErr # print 'prior Err =', priorErr self.failUnless(priorErr < nopriorErr*.5) if False: figure(1) clf() plot(S, [prior.mu(x) for x in S], 'g-', alpha=0.3) plot(S, [GPnoprior.mu(x) for x in S], 'b-', alpha=0.3) plot(S, [GP.mu(x) for x in S], 'k-', lw=2) plot(X, Y, 'ko') show()
def testNoise(self): tf = Branin() X = lhcSample(tf.bounds, 10, seed=0) Y = [tf.f(x) for x in X] GP1 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=1e-4) self.failUnlessEqual(GP1.noise, 1e-4) eif1 = EI(GP1) dopt1, _ = direct(eif1.negf, tf.bounds, maxiter=10) copt1, _ = cdirect(eif1.negf, tf.bounds, maxiter=10) mopt1, _ = maximizeEI(GP1, tf.bounds, maxiter=10) self.failUnlessAlmostEqual(dopt1, copt1, 4) self.failUnlessAlmostEqual(-dopt1, mopt1, 4) self.failUnlessAlmostEqual(-copt1, mopt1, 4) GP2 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=0.01) self.failUnlessEqual(GP2.noise, 0.01) eif2 = EI(GP2) dopt2, _ = direct(eif2.negf, tf.bounds, maxiter=10) copt2, _ = cdirect(eif2.negf, tf.bounds, maxiter=10) mopt2, _ = maximizeEI(GP2, tf.bounds, maxiter=10) self.failUnlessAlmostEqual(dopt2, copt2, 4) self.failUnlessAlmostEqual(-dopt2, mopt2, 4) self.failUnlessAlmostEqual(-copt2, mopt2, 4) self.failIfAlmostEqual(dopt1, dopt2, 4) self.failIfAlmostEqual(copt1, copt2, 4) self.failIfAlmostEqual(mopt1, mopt2, 4) GP3 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=0.1) self.failUnlessEqual(GP3.noise, 0.1) eif3 = EI(GP3) dopt3, _ = direct(eif3.negf, tf.bounds, maxiter=10) copt3, _ = cdirect(eif3.negf, tf.bounds, maxiter=10) mopt3, _ = maximizeEI(GP3, tf.bounds, maxiter=10) self.failUnlessAlmostEqual(dopt3, copt3, 4) self.failUnlessAlmostEqual(-dopt3, mopt3, 4) self.failUnlessAlmostEqual(-copt3, mopt3, 4) self.failIfAlmostEqual(dopt1, dopt3, 4) self.failIfAlmostEqual(copt1, copt3, 4) self.failIfAlmostEqual(mopt1, mopt3, 4) self.failIfAlmostEqual(dopt2, dopt3, 4) self.failIfAlmostEqual(copt2, copt3, 4) self.failIfAlmostEqual(mopt2, mopt3, 4)
def testXi(self): S5 = Shekel5() GP1 = GaussianProcess(GaussianKernel_iso([.2])) # self.failUnlessEqual(GP1.xi, 0.0) X = lhcSample(S5.bounds, 10, seed=0) Y = [S5.f(x) for x in X] GP1.addData(X, Y) eif1 = EI(GP1, xi=0.0) dopt1, _ = direct(eif1.negf, S5.bounds, maxiter=10) copt1, _ = cdirect(eif1.negf, S5.bounds, maxiter=10) mopt1, _ = maximizeEI(GP1, S5.bounds, xi=0.0, maxiter=10) self.failUnlessAlmostEqual(dopt1, copt1, 4) self.failUnlessAlmostEqual(-dopt1, mopt1, 4) self.failUnlessAlmostEqual(-copt1, mopt1, 4) GP2 = GaussianProcess(GaussianKernel_iso([.3]), X, Y) eif2 = EI(GP2, xi=0.01) self.failUnlessEqual(eif2.xi, 0.01) dopt2, _ = direct(eif2.negf, S5.bounds, maxiter=10) copt2, _ = cdirect(eif2.negf, S5.bounds, maxiter=10) mopt2, _ = maximizeEI(GP2, S5.bounds, xi=0.01, maxiter=10) self.failUnlessAlmostEqual(dopt2, copt2, 4) self.failUnlessAlmostEqual(-dopt2, mopt2, 4) self.failUnlessAlmostEqual(-copt2, mopt2, 4) self.failIfAlmostEqual(dopt1, dopt2, 4) self.failIfAlmostEqual(copt1, copt2, 4) self.failIfAlmostEqual(mopt1, mopt2, 4) GP3 = GaussianProcess(GaussianKernel_iso([.3]), X, Y) eif3 = EI(GP3, xi=0.1) dopt3, _ = direct(eif3.negf, S5.bounds, maxiter=10) copt3, _ = cdirect(eif3.negf, S5.bounds, maxiter=10) mopt3, _ = maximizeEI(GP3, S5.bounds, xi=0.1, maxiter=10) self.failUnlessAlmostEqual(dopt3, copt3, 4) self.failUnlessAlmostEqual(-dopt3, mopt3, 4) self.failUnlessAlmostEqual(-copt3, mopt3, 4) self.failIfAlmostEqual(dopt1, dopt3, 4) self.failIfAlmostEqual(copt1, copt3, 4) self.failIfAlmostEqual(mopt1, mopt3, 4) self.failIfAlmostEqual(dopt2, dopt3, 4) self.failIfAlmostEqual(copt2, copt3, 4) self.failIfAlmostEqual(mopt2, mopt3, 4)
def testXi(self): S5 = Shekel5() GP1 = GaussianProcess(GaussianKernel_iso([.2])) # self.failUnlessEqual(GP1.xi, 0.0) X = lhcSample(S5.bounds, 10, seed=0) Y = [S5.f(x) for x in X] GP1.addData(X, Y) ucbf1 = UCB(GP1, len(S5.bounds), scale=0.5) dopt1, _ = direct(ucbf1.negf, S5.bounds, maxiter=10) copt1, _ = cdirect(ucbf1.negf, S5.bounds, maxiter=10) mopt1, _ = maximizeUCB(GP1, S5.bounds, scale=0.5, maxiter=10) self.failUnlessAlmostEqual(dopt1, copt1, 4) self.failUnlessAlmostEqual(-dopt1, mopt1, 4) self.failUnlessAlmostEqual(-copt1, mopt1, 4) GP2 = GaussianProcess(GaussianKernel_iso([.3]), X, Y) ucbf2 = UCB(GP2, len(S5.bounds), scale=0.01) dopt2, _ = direct(ucbf2.negf, S5.bounds, maxiter=10) copt2, _ = cdirect(ucbf2.negf, S5.bounds, maxiter=10) mopt2, _ = maximizeUCB(GP2, S5.bounds, scale=.01, maxiter=10) self.failUnlessAlmostEqual(dopt2, copt2, 4) self.failUnlessAlmostEqual(-dopt2, mopt2, 4) self.failUnlessAlmostEqual(-copt2, mopt2, 4) self.failIfAlmostEqual(dopt1, dopt2, 4) self.failIfAlmostEqual(copt1, copt2, 4) self.failIfAlmostEqual(mopt1, mopt2, 4) GP3 = GaussianProcess(GaussianKernel_iso([.3]), X, Y) ucbf3 = UCB(GP3, len(S5.bounds), scale=.9) dopt3, _ = direct(ucbf3.negf, S5.bounds, maxiter=10) copt3, _ = cdirect(ucbf3.negf, S5.bounds, maxiter=10) mopt3, _ = maximizeUCB(GP3, S5.bounds, scale=0.9, maxiter=10) self.failUnlessAlmostEqual(dopt3, copt3, 4) self.failUnlessAlmostEqual(-dopt3, mopt3, 4) self.failUnlessAlmostEqual(-copt3, mopt3, 4) self.failIfAlmostEqual(dopt1, dopt3, 4) self.failIfAlmostEqual(copt1, copt3, 4) self.failIfAlmostEqual(mopt1, mopt3, 4) self.failIfAlmostEqual(dopt2, dopt3, 4) self.failIfAlmostEqual(copt2, copt3, 4) self.failIfAlmostEqual(mopt2, mopt3, 4)
def test1DcEI(self): f = lambda x: float(sin(x*5.)) X = lhcSample([[0., 1.]], 5, seed=22) Y = [f(x) for x in X] kernel = GaussianKernel_ard(array([1.0])) GP = GaussianProcess(kernel) GP.addData(X, Y) # should use optimizeGP.cpp maxei = maximizeEI(GP, [[0., 1.]]) if False: figure(1) plot(X, Y, 'ro') plot([x/100 for x in xrange(100)], [GP.ei(x/100) for x in xrange(100)]) plot(maxei[1][0], maxei[0], 'ko') show()
def __init__(self, kernel, bounds, NX, noise=0.05, xstar=None, **kwargs): super(Synthetic, self).__init__("Synthetic", 0, None, bounds, **kwargs) self.name += ' %d'%len(bounds) self.GP = GaussianProcess(kernel) X = lhcSample(bounds, NX) self.GP.addData([X[0]], [normal(0, 1)]) if xstar is not None: ystar = min(self.GP.Y[0]-1.0, -2.0) self.GP.addData(xstar, ystar) for x in X[1:]: mu, sig2 = self.GP.posterior(x) y = normal(mu, sqrt(sig2)) + normal(0, noise) # preserve min if necessary if xstar is not None and y < ystar+.5: y = ystar+.5 self.GP.addData(x, y) # now, try minimizing with BFGS start = self.GP.X[argmin(self.GP.Y)] xopt = fmin_bfgs(self.GP.mu, start, disp=False) print "\t[synthetic] optimization started at %s, ended at %s" % (start, xopt) if xstar is not None: print '\t[synthetic] realigning minimum' # now, align minimum with what we specified for i, (target, origin) in enumerate(zip(xstar, xopt)): self.GP.X[:,i] += target-origin xopt = xstar self.minimum = self.GP.mu(xopt) self.xstar = xopt # print self.GP.X # print self.GP.Y print '\t[synthetic] x+ = %s, f(x+) = %.3f' % (self.xstar, self.f(self.xstar))
def test1DcUCB(self): f = lambda x: float(sin(x*5.)) X = lhcSample([[0., 1.]], 5, seed=22) Y = [f(x) for x in X] kernel = GaussianKernel_ard(array([1.0])) GP = GaussianProcess(kernel) GP.addData(X, Y) # should use optimizeGP.cpp ucbf = UCB(GP, 1) dopt, doptx = direct(ucbf.negf, [[0., 1.]], maxiter=10) copt, coptx = cdirect(ucbf.negf, [[0., 1.]], maxiter=10) mopt, moptx = maximizeUCB(GP, [[0., 1.]], maxiter=10) self.failUnlessAlmostEqual(dopt, copt, 4) self.failUnlessAlmostEqual(-dopt, mopt, 4) self.failUnlessAlmostEqual(-copt, mopt, 4) self.failUnless(sum(abs(doptx-coptx)) < .01) self.failUnless(sum(abs(moptx-coptx)) < .01) self.failUnless(sum(abs(moptx-doptx)) < .01)
def test1DcUCB(self): f = lambda x: float(sin(x * 5.)) X = lhcSample([[0., 1.]], 5, seed=22) Y = [f(x) for x in X] kernel = GaussianKernel_ard(array([1.0])) GP = GaussianProcess(kernel) GP.addData(X, Y) # should use optimizeGP.cpp ucbf = UCB(GP, 1) dopt, doptx = direct(ucbf.negf, [[0., 1.]], maxiter=10) copt, coptx = cdirect(ucbf.negf, [[0., 1.]], maxiter=10) mopt, moptx = maximizeUCB(GP, [[0., 1.]], maxiter=10) self.failUnlessAlmostEqual(dopt, copt, 4) self.failUnlessAlmostEqual(-dopt, mopt, 4) self.failUnlessAlmostEqual(-copt, mopt, 4) self.failUnless(sum(abs(doptx - coptx)) < .01) self.failUnless(sum(abs(moptx - coptx)) < .01) self.failUnless(sum(abs(moptx - doptx)) < .01)
def fastUCBGallery(GP, bounds, N, useBest=True, samples=300, useCDIRECT=True): """ Use UCB to generate a gallery of N instances using Monte Carlo to approximate the optimization of the utility function. """ gallery = [] if len(GP.X) > 0: if useBest: # find best sample already seen, that lies within the bounds bestY = -inf bestX = None for x, y in zip(GP.X, GP.Y): if y > bestY: for v, b in zip(x, bounds): if v < b[0] or v > b[1]: break else: bestY = y bestX = x if bestX is not None: gallery.append(bestX) # create a "fake" GP from the GP that was passed in (can't just copy # b/c original could have been PrefGP) hallucGP = GaussianProcess(deepcopy(GP.kernel), deepcopy(GP.X), deepcopy(GP.Y), prior=GP.prior) elif GP.prior is None: # if we have no data and no prior, start in the center x = array([(b[0] + b[1]) / 2. for b in bounds]) gallery.append(x) hallucGP = GaussianProcess(deepcopy(GP.kernel), [x], [0.0], prior=GP.prior) else: # optimize from prior bestmu = -inf bestX = None for m in GP.prior.means: argmin = fmin_bfgs(GP.negmu, m, disp=False) if GP.mu(argmin) > bestmu: bestX = argmin bestmu = GP.mu(argmin) gallery.append(bestX) hallucGP = GaussianProcess(deepcopy(GP.kernel), bestX, bestmu, prior=GP.prior) while len(gallery) < N: bestUCB = -inf bestX = None # ut = UCB(hallucGP, len(bounds), N) ut = EI(hallucGP, xi=.4) opt, optx = maximizeEI(hallucGP, bounds, xi=.3, useCDIRECT=useCDIRECT) if len(gallery) == 0 or min(norm(optx - gx) for gx in gallery) > .5: bestUCB = opt bestX = optx # try some random samples for x in lhcSample(bounds, samples): u = -ut.negf(x) if u > bestUCB and min(norm(x - gx) for gx in gallery) > .5: '\they, this one is even better!' bestUCB = u bestX = x # now try the prior means if hallucGP.prior is not None: for x in hallucGP.prior.means: x = array([ clip(x[i], bounds[i][0], bounds[i][1]) for i in xrange(len(x)) ]) x = x * hallucGP.prior.width + hallucGP.prior.lowerb u = -ut.negf(x) if u > bestUCB: if len(gallery) == 0 or min( norm(x - gx) for gx in gallery) > .5: bestUCB = u bestX = x gallery.append(bestX) hallucGP.addData(bestX, hallucGP.mu(bestX)) return gallery
def cdirectGP(model, bounds, maxiter, maxtime, maxsample, acqfunc=None, xi=-1, beta=-1, scale=-1, delta=-1, **kwargs): try: if acqfunc == "ei": acquisition = 0 parm = xi elif acqfunc == "pi": acquisition = 1 parm = xi elif acqfunc == "ucb": acquisition = 2 t = len(model.Y) + 1 NA = len(bounds) parm = sqrt(scale * 2.0 * log(t ** (NA / 2 + 2) * pi ** 2 / (3.0 * delta))) else: raise NotImplementedError("unknown acquisition function %s" % acqfunc) if isinstance(model.kernel, GaussianKernel_ard): kerneltype = 0 elif isinstance(model.kernel, GaussianKernel_iso): kerneltype = 1 elif isinstance(model.kernel, MaternKernel3): kerneltype = 2 elif isinstance(model.kernel, MaternKernel5): print "Matern 5" kerneltype = 3 else: raise NotImplementedError("kernel not implemented in C++: %s" % model.kernel.__class__) lpath = ctypes.util.find_library("ego") if lpath is None: import ASAW_config lpath = ASAW_config.get_ego_lib_dir() + "libego.so" # print '\n[python] could not find ego library! Did you forget to export DYLD_LIBRARY_PATH?' lib = cdll[lpath] lib.acqmaxGP.restype = POINTER(c_double) lib.acqmaxGP.argtypes = [ c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double), POINTER(c_double), POINTER(c_double), c_int, c_int, c_int, POINTER(c_double), c_int, POINTER(c_double), POINTER(c_double), c_double, POINTER(c_double), POINTER(c_double), c_double, c_double, c_int, c_int, c_int, ] NX = len(model.Y) NA = len(bounds) npbases = 0 if model.prior is None else len(model.prior.means) pbtheta = 0 if model.prior is None else model.prior.theta if model.prior is None: c_pmeans = array([0], dtype=c_double) c_pbeta = array([0], dtype=c_double) c_pblowerb = array([0], dtype=c_double) c_pbwidth = array([0], dtype=c_double) else: c_pmeans = array(array(model.prior.means).reshape(-1), dtype=c_double) c_pbeta = array(model.prior.beta, dtype=c_double) c_pblowerb = array(model.prior.lowerb, dtype=c_double) c_pbwidth = array(model.prior.width, dtype=c_double) c_lower = array([b[0] for b in bounds], dtype=c_double) c_upper = array([b[1] for b in bounds], dtype=c_double) c_hyper = array(model.kernel.hyperparams, dtype=c_double) # TODO: use cholesky on the C++ side if isinstance(model, PrefGaussianProcess) and model.C is not None: c_invR = array(linalg.inv(model.R + linalg.inv(model.C)).reshape(-1), dtype=c_double) else: c_invR = array(linalg.inv(model.R).reshape(-1), dtype=c_double) c_X = array(array(model.X).reshape(-1), dtype=c_double) c_Y = array(model.Y, dtype=c_double) # print c_int(NA) # print c_lower.ctypes.data_as(POINTER(c_double)) # print c_upper.ctypes.data_as(POINTER(c_double)) # print c_invR.ctypes.data_as(POINTER(c_double)) # print c_X.ctypes.data_as(POINTER(c_double)) # print c_Y.ctypes.data_as(POINTER(c_double)) # print c_int(NX) # print c_int(acqfunc) # print c_int(kerneltype) # print c_hyper.ctypes.data_as(POINTER(c_double)) # print c_int(npbases) # print c_pmeans.ctypes.data_as(POINTER(c_double)) # print c_pbeta.ctypes.data_as(POINTER(c_double)) # print c_double(pbtheta) # print c_pblowerb.ctypes.data_as(POINTER(c_double)) # print c_pbwidth.ctypes.data_as(POINTER(c_double)) # print c_double(xi) # print c_double(model.noise) # print c_int(maxiter) # print c_int(maxtime) # print c_int(maxsample) # print '[python] calling C++ %s (%d) with X.shape = %s' % (acqfunc, acquisition, model.X.shape) result = lib.acqmaxGP( c_int(NA), c_lower.ctypes.data_as(POINTER(c_double)), c_upper.ctypes.data_as(POINTER(c_double)), c_invR.ctypes.data_as(POINTER(c_double)), c_X.ctypes.data_as(POINTER(c_double)), c_Y.ctypes.data_as(POINTER(c_double)), c_int(NX), c_int(acquisition), c_int(kerneltype), c_hyper.ctypes.data_as(POINTER(c_double)), c_int(npbases), c_pmeans.ctypes.data_as(POINTER(c_double)), c_pbeta.ctypes.data_as(POINTER(c_double)), c_double(pbtheta), c_pblowerb.ctypes.data_as(POINTER(c_double)), c_pbwidth.ctypes.data_as(POINTER(c_double)), c_double(parm), c_double(model.noise), c_int(maxiter), c_int(maxtime), c_int(maxsample), ) # print '[python] result =', result.__class__ # print '[python] result =', result[0] opt = -result[0] optx = array([x for x in result[1 : NA + 1]]) # free the pointer libc = CDLL(ctypes.util.find_library("libc")) libc.free.argtypes = [c_void_p] libc.free.restype = None libc.free(result) except: try: print "[python] C++ MaxEI implementation unavailable, attempting C++ DIRECT on Python objective function." opt, optx = cdirect(ei.negf, bounds, maxiter=maxiter, maxtime=maxtime, maxsample=maxsample, **kwargs) except: # couldn't access cDIRECT, use Python DIRECT print "[python] C++ DIRECT unavailable, attempting Python DIRECT" opt, optx = direct(ei.negf, bounds, maxiter=maxiter, maxtime=maxtime, maxsample=maxsample, **kwargs) opt = -opt if False: # do a few random searches to see if we can get a better result. # mostly necessary for 1D or 2D optimizations which terminate too # soon. ei = EI(model) for s in lhcSample(bounds, 500): if -ei.negf(s) > opt: opt = -ei.negf(s) optx = s return opt, optx
def cdirectGP(model, bounds, maxiter, maxtime, maxsample, acqfunc=None, xi=-1, beta=-1, scale=-1, delta=-1, **kwargs): try: if acqfunc == 'ei': acquisition = 0 parm = xi elif acqfunc == 'pi': acquisition = 1 parm = xi elif acqfunc == 'ucb': acquisition = 2 t = len(model.Y) + 1 NA = len(bounds) parm = sqrt(scale * 2.0 * log(t**(NA / 2 + 2) * pi**2 / (3.0 * delta))) else: raise NotImplementedError('unknown acquisition function %s' % acqfunc) if isinstance(model.kernel, GaussianKernel_ard): kerneltype = 0 elif isinstance(model.kernel, GaussianKernel_iso): kerneltype = 1 elif isinstance(model.kernel, MaternKernel3): kerneltype = 2 elif isinstance(model.kernel, MaternKernel5): print('Matern 5') kerneltype = 3 else: raise NotImplementedError('kernel not implemented in C++: %s' % model.kernel.__class__) lpath = ctypes.util.find_library('ego') while lpath is None: for lp in [ 'cpp/libs/libego.dylib', '/Users/eric/Dropbox/EGOcode/ego/libs/libego.so' ]: if os.path.exists(lp): lpath = lp if lpath is None: print( '\n[python] could not find ego library! Did you forget to export DYLD_LIBRARY_PATH?' ) lib = cdll[lpath] lib.acqmaxGP.restype = POINTER(c_double) lib.acqmaxGP.argtypes = [ c_int, POINTER(c_double), POINTER(c_double), POINTER(c_double), POINTER(c_double), POINTER(c_double), c_int, c_int, c_int, POINTER(c_double), c_int, POINTER(c_double), POINTER(c_double), c_double, POINTER(c_double), POINTER(c_double), c_double, c_double, c_int, c_int, c_int ] NX = len(model.Y) NA = len(bounds) npbases = 0 if model.prior is None else len(model.prior.means) pbtheta = 0 if model.prior is None else model.prior.theta if model.prior is None: c_pmeans = array([0], dtype=c_double) c_pbeta = array([0], dtype=c_double) c_pblowerb = array([0], dtype=c_double) c_pbwidth = array([0], dtype=c_double) else: c_pmeans = array(array(model.prior.means).reshape(-1), dtype=c_double) c_pbeta = array(model.prior.beta, dtype=c_double) c_pblowerb = array(model.prior.lowerb, dtype=c_double) c_pbwidth = array(model.prior.width, dtype=c_double) c_lower = array([b[0] for b in bounds], dtype=c_double) c_upper = array([b[1] for b in bounds], dtype=c_double) c_hyper = array(model.kernel.hyperparams, dtype=c_double) # TODO: use cholesky on the C++ side if isinstance(model, PrefGaussianProcess) and model.C is not None: c_invR = array(linalg.inv(model.R + linalg.inv(model.C)).reshape(-1), dtype=c_double) else: c_invR = array(linalg.inv(model.R).reshape(-1), dtype=c_double) c_X = array(array(model.X).reshape(-1), dtype=c_double) c_Y = array(model.Y, dtype=c_double) # print c_int(NA) # print c_lower.ctypes.data_as(POINTER(c_double)) # print c_upper.ctypes.data_as(POINTER(c_double)) # print c_invR.ctypes.data_as(POINTER(c_double)) # print c_X.ctypes.data_as(POINTER(c_double)) # print c_Y.ctypes.data_as(POINTER(c_double)) # print c_int(NX) # print c_int(acqfunc) # print c_int(kerneltype) # print c_hyper.ctypes.data_as(POINTER(c_double)) # print c_int(npbases) # print c_pmeans.ctypes.data_as(POINTER(c_double)) # print c_pbeta.ctypes.data_as(POINTER(c_double)) # print c_double(pbtheta) # print c_pblowerb.ctypes.data_as(POINTER(c_double)) # print c_pbwidth.ctypes.data_as(POINTER(c_double)) # print c_double(xi) # print c_double(model.noise) # print c_int(maxiter) # print c_int(maxtime) # print c_int(maxsample) # print '[python] calling C++ %s (%d) with X.shape = %s' % (acqfunc, acquisition, model.X.shape) result = lib.acqmaxGP(c_int(NA), c_lower.ctypes.data_as(POINTER(c_double)), c_upper.ctypes.data_as(POINTER(c_double)), c_invR.ctypes.data_as(POINTER(c_double)), c_X.ctypes.data_as(POINTER(c_double)), c_Y.ctypes.data_as(POINTER(c_double)), c_int(NX), c_int(acquisition), c_int(kerneltype), c_hyper.ctypes.data_as(POINTER(c_double)), c_int(npbases), c_pmeans.ctypes.data_as(POINTER(c_double)), c_pbeta.ctypes.data_as(POINTER(c_double)), c_double(pbtheta), c_pblowerb.ctypes.data_as(POINTER(c_double)), c_pbwidth.ctypes.data_as(POINTER(c_double)), c_double(parm), c_double(model.noise), c_int(maxiter), c_int(maxtime), c_int(maxsample)) # print '[python] result =', result.__class__ # print '[python] result =', result[0] opt = -result[0] optx = array([x for x in result[1:NA + 1]]) # free the pointer libc = CDLL(ctypes.util.find_library('libc')) libc.free.argtypes = [c_void_p] libc.free.restype = None libc.free(result) except: try: print( '[python] C++ MaxEI implementation unavailable, attempting C++ DIRECT on Python objective function.' ) opt, optx = cdirect(ei.negf, bounds, maxiter=maxiter, maxtime=maxtime, maxsample=maxsample, **kwargs) except: # couldn't access cDIRECT, use Python DIRECT print('[python] C++ DIRECT unavailable, attempting Python DIRECT') opt, optx = direct(ei.negf, bounds, maxiter=maxiter, maxtime=maxtime, maxsample=maxsample, **kwargs) opt = -opt if False: # do a few random searches to see if we can get a better result. # mostly necessary for 1D or 2D optimizations which terminate too # soon. ei = EI(model) for s in lhcSample(bounds, 500): if -ei.negf(s) > opt: opt = -ei.negf(s) optx = s return opt, optx
def fastUCBGallery(GP, bounds, N, useBest=True, samples=300, useCDIRECT=True): """ Use UCB to generate a gallery of N instances using Monte Carlo to approximate the optimization of the utility function. """ gallery = [] if len(GP.X) > 0: if useBest: # find best sample already seen, that lies within the bounds bestY = -inf bestX = None for x, y in zip(GP.X, GP.Y): if y > bestY: for v, b in zip(x, bounds): if v < b[0] or v > b[1]: break else: bestY = y bestX = x if bestX is not None: gallery.append(bestX) # create a "fake" GP from the GP that was passed in (can't just copy # b/c original could have been PrefGP) hallucGP = GaussianProcess(deepcopy(GP.kernel), deepcopy(GP.X), deepcopy(GP.Y), prior=GP.prior) elif GP.prior is None: # if we have no data and no prior, start in the center x = array([(b[0]+b[1])/2. for b in bounds]) gallery.append(x) hallucGP = GaussianProcess(deepcopy(GP.kernel), [x], [0.0], prior=GP.prior) else: # optimize from prior if DEBUG: print 'GET DATA FROM PRIOR' bestmu = -inf bestX = None for m in GP.prior.means: argmin = fmin_bfgs(GP.negmu, m, disp=False) if DEBUG: print argmin, for i in xrange(len(argmin)): argmin[i] = clip(argmin[i], bounds[i][0], bounds[i][1]) # if DEBUG: print 'converted to', argmin if GP.mu(argmin) > bestmu: bestX = argmin bestmu = GP.mu(argmin) if DEBUG: print '***** bestmu =', bestmu if DEBUG: print '***** bestX =', bestX gallery.append(bestX) hallucGP = GaussianProcess(deepcopy(GP.kernel), bestX, bestmu, prior=GP.prior) while len(gallery) < N: if DEBUG: print '\n\n\thave %d data for gallery' % len(gallery) bestUCB = -inf bestX = None # ut = UCB(hallucGP, len(bounds), N) ut = EI(hallucGP, xi=.4) if DEBUG: print '\tget with max EI' opt, optx = maximizeEI(hallucGP, bounds, xi=.3, useCDIRECT=useCDIRECT) if len(gallery)==0 or min(norm(optx-gx) for gx in gallery) > .5: if DEBUG: print '\tgot one' bestUCB = opt bestX = optx else: if DEBUG: print '\ttoo close to existing' # try some random samples if DEBUG: print '\ttry random samples' for x in lhcSample(bounds, samples): u = -ut.negf(x) if u > bestUCB and min(norm(x-gx) for gx in gallery) > .5: '\they, this one is even better!' bestUCB = u bestX = x # now try the prior means if hallucGP.prior is not None: if DEBUG: print '\ttry prior means (bestUCB = %f)'%bestUCB for x in hallucGP.prior.means: x = array([clip(x[i], bounds[i][0], bounds[i][1]) for i in xrange(len(x))]) x = x * hallucGP.prior.width + hallucGP.prior.lowerb u = -ut.negf(x) # if DEBUG: print 'u = %f', u if u > bestUCB: if len(gallery)==0 or min(norm(x-gx) for gx in gallery) > .5: if DEBUG: print '\tthis one is even better! prior mean %s has u = %f' % (x, u) bestUCB = u bestX = x gallery.append(bestX) hallucGP.addData(bestX, hallucGP.mu(bestX)) return gallery