def _testKernelMaxEI(self): # test different methods of optimizing kernel S5 = Shekel5() hv = 0.1 testkernels = [GaussianKernel_iso([hv]), GaussianKernel_ard([hv, hv, hv, hv]), MaternKernel3([hv, 1.0])] # MaternKernel5([hv, 1.0])] for kernel in testkernels: # print # print kernel.__class__ # train GPs X = lhcSample(S5.bounds, 10, seed=0) Y = [S5.f(x) for x in X] GP = GaussianProcess(kernel, X, Y) eif = EI(GP) dopt, doptx = direct(eif.negf, S5.bounds, maxiter=10) copt, coptx = cdirect(eif.negf, S5.bounds, maxiter=10) mopt, moptx = maximizeEI(GP, S5.bounds, maxiter=10) # print dopt, doptx # print copt, coptx # print mopt, moptx self.failUnlessAlmostEqual(dopt, copt, 4) self.failUnlessAlmostEqual(-dopt, mopt, 4) self.failUnlessAlmostEqual(-copt, mopt, 4) self.failUnless(sum(abs(doptx-coptx)) < .01) self.failUnless(sum(abs(moptx-coptx)) < .01) self.failUnless(sum(abs(moptx-doptx)) < .01) # train GP w/prior pX = lhcSample(S5.bounds, 100, seed=101) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, bounds=S5.bounds, k=10, seed=102) GP = GaussianProcess(kernel, X, Y, prior=prior) eif = EI(GP) pdopt, pdoptx = direct(eif.negf, S5.bounds, maxiter=10) pcopt, pcoptx = cdirect(eif.negf, S5.bounds, maxiter=10) pmopt, pmoptx = maximizeEI(GP, S5.bounds, maxiter=10) self.failIfAlmostEqual(pdopt, dopt, 3) self.failUnlessAlmostEqual(pdopt, pcopt, 4) self.failUnlessAlmostEqual(-pdopt, pmopt, 4) self.failUnlessAlmostEqual(-pcopt, pmopt, 4) self.failUnless(sum(abs(pdoptx-pcoptx)) < .01) self.failUnless(sum(abs(pmoptx-pcoptx)) < .01) self.failUnless(sum(abs(pmoptx-pdoptx)) < .01)
def testNoise(self): tf = Branin() X = lhcSample(tf.bounds, 10, seed=0) Y = [tf.f(x) for x in X] GP1 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=1e-4) self.failUnlessEqual(GP1.noise, 1e-4) eif1 = EI(GP1) dopt1, _ = direct(eif1.negf, tf.bounds, maxiter=10) copt1, _ = cdirect(eif1.negf, tf.bounds, maxiter=10) mopt1, _ = maximizeEI(GP1, tf.bounds, maxiter=10) self.failUnlessAlmostEqual(dopt1, copt1, 4) self.failUnlessAlmostEqual(-dopt1, mopt1, 4) self.failUnlessAlmostEqual(-copt1, mopt1, 4) GP2 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=0.01) self.failUnlessEqual(GP2.noise, 0.01) eif2 = EI(GP2) dopt2, _ = direct(eif2.negf, tf.bounds, maxiter=10) copt2, _ = cdirect(eif2.negf, tf.bounds, maxiter=10) mopt2, _ = maximizeEI(GP2, tf.bounds, maxiter=10) self.failUnlessAlmostEqual(dopt2, copt2, 4) self.failUnlessAlmostEqual(-dopt2, mopt2, 4) self.failUnlessAlmostEqual(-copt2, mopt2, 4) self.failIfAlmostEqual(dopt1, dopt2, 4) self.failIfAlmostEqual(copt1, copt2, 4) self.failIfAlmostEqual(mopt1, mopt2, 4) GP3 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=0.1) self.failUnlessEqual(GP3.noise, 0.1) eif3 = EI(GP3) dopt3, _ = direct(eif3.negf, tf.bounds, maxiter=10) copt3, _ = cdirect(eif3.negf, tf.bounds, maxiter=10) mopt3, _ = maximizeEI(GP3, tf.bounds, maxiter=10) self.failUnlessAlmostEqual(dopt3, copt3, 4) self.failUnlessAlmostEqual(-dopt3, mopt3, 4) self.failUnlessAlmostEqual(-copt3, mopt3, 4) self.failIfAlmostEqual(dopt1, dopt3, 4) self.failIfAlmostEqual(copt1, copt3, 4) self.failIfAlmostEqual(mopt1, mopt3, 4) self.failIfAlmostEqual(dopt2, dopt3, 4) self.failIfAlmostEqual(copt2, copt3, 4) self.failIfAlmostEqual(mopt2, mopt3, 4)
def testXi(self): S5 = Shekel5() GP1 = GaussianProcess(GaussianKernel_iso([.2])) # self.failUnlessEqual(GP1.xi, 0.0) X = lhcSample(S5.bounds, 10, seed=0) Y = [S5.f(x) for x in X] GP1.addData(X, Y) eif1 = EI(GP1, xi=0.0) dopt1, _ = direct(eif1.negf, S5.bounds, maxiter=10) copt1, _ = cdirect(eif1.negf, S5.bounds, maxiter=10) mopt1, _ = maximizeEI(GP1, S5.bounds, xi=0.0, maxiter=10) self.failUnlessAlmostEqual(dopt1, copt1, 4) self.failUnlessAlmostEqual(-dopt1, mopt1, 4) self.failUnlessAlmostEqual(-copt1, mopt1, 4) GP2 = GaussianProcess(GaussianKernel_iso([.3]), X, Y) eif2 = EI(GP2, xi=0.01) self.failUnlessEqual(eif2.xi, 0.01) dopt2, _ = direct(eif2.negf, S5.bounds, maxiter=10) copt2, _ = cdirect(eif2.negf, S5.bounds, maxiter=10) mopt2, _ = maximizeEI(GP2, S5.bounds, xi=0.01, maxiter=10) self.failUnlessAlmostEqual(dopt2, copt2, 4) self.failUnlessAlmostEqual(-dopt2, mopt2, 4) self.failUnlessAlmostEqual(-copt2, mopt2, 4) self.failIfAlmostEqual(dopt1, dopt2, 4) self.failIfAlmostEqual(copt1, copt2, 4) self.failIfAlmostEqual(mopt1, mopt2, 4) GP3 = GaussianProcess(GaussianKernel_iso([.3]), X, Y) eif3 = EI(GP3, xi=0.1) dopt3, _ = direct(eif3.negf, S5.bounds, maxiter=10) copt3, _ = cdirect(eif3.negf, S5.bounds, maxiter=10) mopt3, _ = maximizeEI(GP3, S5.bounds, xi=0.1, maxiter=10) self.failUnlessAlmostEqual(dopt3, copt3, 4) self.failUnlessAlmostEqual(-dopt3, mopt3, 4) self.failUnlessAlmostEqual(-copt3, mopt3, 4) self.failIfAlmostEqual(dopt1, dopt3, 4) self.failIfAlmostEqual(copt1, copt3, 4) self.failIfAlmostEqual(mopt1, mopt3, 4) self.failIfAlmostEqual(dopt2, dopt3, 4) self.failIfAlmostEqual(copt2, copt3, 4) self.failIfAlmostEqual(mopt2, mopt3, 4)
def testMaxEIPrior(self): # make sure that the prior works with the different methods of EI # maximization S5 = Shekel5() pX = lhcSample(S5.bounds, 100, seed=511) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, bounds=S5.bounds, k=10, seed=504) hv = .1 hyper = [hv, hv, hv, hv] kernel = GaussianKernel_ard(hyper) # train GPs X = lhcSample(S5.bounds, 10, seed=512) Y = [S5.f(x) for x in X] # validation valX = list(x.copy() for x in X) valY = copy(Y) GP = GaussianProcess(kernel, X, Y, prior=prior) eif = EI(GP) copt, _ = cdirect(eif.negf, S5.bounds, maxiter=20) mopt, _ = maximizeEI(GP, S5.bounds, maxiter=20) self.failUnlessAlmostEqual(-copt, mopt, 2) for i in xrange(len(GP.X)): self.failUnless(all(valX[i]==GP.X[i])) self.failUnless(valY[i]==GP.Y[i]) GP.prior.mu(GP.X[0]) self.failUnless(all(valX[0]==GP.X[0])) # print GP.X for i in xrange(len(GP.X)): self.failUnless(all(valX[i]==GP.X[i])) self.failUnless(valY[i]==GP.Y[i]) GP.prior.mu(GP.X[0]) self.failUnless(all(valX[0]==GP.X[0]))
def demoObservations(): """ Simple demo for a scenario where we have direct observations (ie ratings or responses) with noise. The model has three parameters, but after initial training, we fix one to be 1.0 and optimize the other two. At each step, we visualize the posterior mean, variance and expected improvement. We then find the point of maximum expected improvement and ask the user for the scalar response value. To see how the model adapts to inputs, try rating the first few values higher or lower than predicted and see what happens to the visualizations. """ # the kernel parameters control the impact of different values on the # parameters. we are defining a model with three parameters kernel = GaussianKernel_ard(array([.5, .5, .3])) # we want to allow some noise in the observations -- the noise parameter # is the variance of the additive Gaussian noise Y + N(0, noise) noise = 0.1 # create the Gaussian Process using the kernel we've just defined GP = GaussianProcess(kernel, noise=noise) # add some data to the model. the data must have the same dimensionality # as the kernel X = [ array([1, 1.5, 0.9]), array([.8, -.2, -0.1]), array([2, .8, -.2]), array([0, 0, .5]) ] Y = [1, .7, .6, -.1] print 'adding data to model' for x, y in zip(X, Y): print '\tx = %s, y = %.1f' % (x, y) GP.addData(X, Y) # the GP.posterior(x) function returns, for x, the posterior distribution # at x, characterized as a normal distribution with mean mu, variance # sigma^2 testX = [array([1, 1.45, 1.0]), array([-10, .5, -10])] for tx in testX: mu, sig2 = GP.posterior(tx) print 'the posterior of %s is a normal distribution N(%.3f, %.3f)' % ( tx, mu, sig2) # now, let's find the best points to evaluate next. we fix the first # dimension to be 1 and for the others, we search the range [-2, 2] bound = [[1, 1], [-1.99, 1.98], [-1.99, 1.98]] figure(1, figsize=(5, 10)) while True: _, optx = maximizeEI(GP, bound, xi=.1) # visualize the mean, variance and expected improvement functions on # the free parameters x1 = arange(bound[1][0], bound[1][1], 0.1) x2 = arange(bound[2][0], bound[2][1], 0.1) X1, X2 = meshgrid(x1, x2) ei = zeros_like(X1) m = zeros_like(X1) v = zeros_like(X1) for i in xrange(X1.shape[0]): for j in xrange(X1.shape[1]): z = array([1.0, X1[i, j], X2[i, j]]) ei[i, j] = -EI(GP).negf(z) m[i, j], v[i, j] = GP.posterior(z) clf() for i, (func, title) in enumerate( ([m, 'prediction (posterior mean)' ], [v, 'uncertainty (posterior variance)'], [ei, 'utility (expected improvement)'])): ax = subplot(3, 1, i + 1) cs = ax.contourf(X1, X2, func, 20) ax.plot(optx[1], optx[2], 'wo') colorbar(cs) ax.set_title(title) ax.set_xlabel('x[1]') ax.set_ylabel('x[2]') ax.set_xticks([-2, 0, 2]) ax.set_yticks([-2, 0, 2]) show() m, v = GP.posterior(optx) try: response = input( '\nmaximum expected improvement is at parameters x = [%.3f, %.3f, %.3f], where mean is %.3f, variance is %.3f. \nwhat is the value there (non-numeric to quit)? ' % (optx[0], optx[1], optx[2], m, v)) except: break GP.addData(optx, response) print 'updating model.'
def fastUCBGallery(GP, bounds, N, useBest=True, samples=300, useCDIRECT=True): """ Use UCB to generate a gallery of N instances using Monte Carlo to approximate the optimization of the utility function. """ gallery = [] if len(GP.X) > 0: if useBest: # find best sample already seen, that lies within the bounds bestY = -inf bestX = None for x, y in zip(GP.X, GP.Y): if y > bestY: for v, b in zip(x, bounds): if v < b[0] or v > b[1]: break else: bestY = y bestX = x if bestX is not None: gallery.append(bestX) # create a "fake" GP from the GP that was passed in (can't just copy # b/c original could have been PrefGP) hallucGP = GaussianProcess(deepcopy(GP.kernel), deepcopy(GP.X), deepcopy(GP.Y), prior=GP.prior) elif GP.prior is None: # if we have no data and no prior, start in the center x = array([(b[0] + b[1]) / 2. for b in bounds]) gallery.append(x) hallucGP = GaussianProcess(deepcopy(GP.kernel), [x], [0.0], prior=GP.prior) else: # optimize from prior bestmu = -inf bestX = None for m in GP.prior.means: argmin = fmin_bfgs(GP.negmu, m, disp=False) if GP.mu(argmin) > bestmu: bestX = argmin bestmu = GP.mu(argmin) gallery.append(bestX) hallucGP = GaussianProcess(deepcopy(GP.kernel), bestX, bestmu, prior=GP.prior) while len(gallery) < N: bestUCB = -inf bestX = None # ut = UCB(hallucGP, len(bounds), N) ut = EI(hallucGP, xi=.4) opt, optx = maximizeEI(hallucGP, bounds, xi=.3, useCDIRECT=useCDIRECT) if len(gallery) == 0 or min(norm(optx - gx) for gx in gallery) > .5: bestUCB = opt bestX = optx # try some random samples for x in lhcSample(bounds, samples): u = -ut.negf(x) if u > bestUCB and min(norm(x - gx) for gx in gallery) > .5: '\they, this one is even better!' bestUCB = u bestX = x # now try the prior means if hallucGP.prior is not None: for x in hallucGP.prior.means: x = array([ clip(x[i], bounds[i][0], bounds[i][1]) for i in xrange(len(x)) ]) x = x * hallucGP.prior.width + hallucGP.prior.lowerb u = -ut.negf(x) if u > bestUCB: if len(gallery) == 0 or min( norm(x - gx) for gx in gallery) > .5: bestUCB = u bestX = x gallery.append(bestX) hallucGP.addData(bestX, hallucGP.mu(bestX)) return gallery