def testShekelClass(self): S = Shekel5() # get 50 latin hypercube samples X = lhcSample(S.bounds, 50, seed=2) Y = [S.f(x) for x in X] hyper = [.2, .2, .2, .2] noise = 0.1 gkernel = GaussianKernel_ard(hyper) # print gkernel.sf2 GP = GaussianProcess(gkernel, X, Y, noise=noise) # let's take a look at the trained GP. first, make sure variance at # the samples is determined by noise mu, sig2 = GP.posteriors(X) for m, s, y in zip(mu, sig2, Y): # print m, s self.failUnless(s < 1 / (1 + noise)) self.failUnless(abs(m - y) < 2 * noise) # now get some test samples and see how well we are fitting the function testX = lhcSample(S.bounds, 50, seed=3) testY = [S.f(x) for x in X] for tx, ty in zip(testX, testY): m, s = GP.posterior(tx) # prediction should be within one stdev of mean self.failUnless(abs(ty - m) / sqrt(s) < 1)
def testShekelGPPrior(self): # see how the GP works on the Shekel function S5 = Shekel5() pX = lhcSample(S5.bounds, 100, seed=8) pY = [S5.f(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, S5.bounds, k=10, seed=103) hv = .1 hyper = [hv, hv, hv, hv] gkernel = GaussianKernel_ard(hyper) X = lhcSample(S5.bounds, 10, seed=9) Y = [S5.f(x) for x in X] priorGP = GaussianProcess(gkernel, X, Y, prior=prior) nopriorGP = GaussianProcess(gkernel, X, Y, prior=None) S = lhcSample(S5.bounds, 1000, seed=10) nopriorErr = mean([(S5.f(x) - nopriorGP.mu(x))**2 for x in S]) priorErr = mean([(S5.f(x) - priorGP.mu(x))**2 for x in S]) # print '\nno prior Err =', nopriorErr # print 'prior Err =', priorErr self.failUnless(priorErr < nopriorErr * .8)
def test1DGP(self): f = lambda x: float(sin(x * 5.)) X = lhcSample([[0., 1.]], 5, seed=25) Y = [f(x) for x in X] kernel = GaussianKernel_ard(array([1.0, 1.0])) GP = GaussianProcess(kernel, X=X, Y=Y)
def test1DPreferences(self): showit = False # show figures for debugging x1 = array([.2]) x2 = array([.7]) x3 = array([.4]) x4 = array([.35]) x5 = array([.9]) x6 = array([.1]) GP = PrefGaussianProcess(GaussianKernel_ard(array([.1]))) GP.addPreferences([(x1, x2, 0)]) self.failUnless(GP.mu(x1) > GP.mu(x2)) # print GP.X if showit: figure(1) clf() S = arange(0, 1, .01) ax = subplot(1, 3, 1) ax.plot(S, [GP.mu(x) for x in S], 'k-') ax.plot(GP.X, GP.Y, 'ro') GP.addPreferences([(x3, x4, 0)]) self.failUnless(GP.mu(x1) > GP.mu(x2)) self.failUnless(GP.mu(x3) > GP.mu(x4)) if showit: ax = subplot(1, 3, 2) ax.plot(S, [GP.mu(x) for x in S], 'k-') ax.plot(GP.X, GP.Y, 'ro') # x5 is greatly preferred to x6 - we should expect f(x5)-f(x6) to have # the most pronounced difference GP.addPreferences([(x5, x6, 1)]) self.failUnless(GP.mu(x1) > GP.mu(x2)) self.failUnless(GP.mu(x3) > GP.mu(x4)) self.failUnless(GP.mu(x5) > GP.mu(x6)) self.failUnless(GP.mu(x5) - GP.mu(x6) > GP.mu(x1) - GP.mu(x2)) self.failUnless(GP.mu(x5) - GP.mu(x6) > GP.mu(x3) - GP.mu(x4)) if showit: ax = subplot(1, 3, 3) ax.plot(S, [GP.mu(x) for x in S], 'k-') ax.plot(GP.X, GP.Y, 'ro') show()
def testGPPrior(self): # see how GP works with the dataprior... def foo(x): return sum(sin(x * 20)) bounds = [[0., 1.]] # train prior pX = lhcSample([[0., 1.]], 100, seed=6) pY = [foo(x) for x in pX] prior = RBFNMeanPrior() prior.train(pX, pY, bounds, k=10, seed=102) X = lhcSample([[0., 1.]], 2, seed=7) Y = [foo(x) for x in X] kernel = GaussianKernel_ard(array([.1])) GP = GaussianProcess(kernel, X, Y, prior=prior) GPnoprior = GaussianProcess(kernel, X, Y) S = arange(0, 1, .01) nopriorErr = mean([(foo(x) - GPnoprior.mu(x))**2 for x in S]) priorErr = mean([(foo(x) - GP.mu(x))**2 for x in S]) # print '\nno prior Err =', nopriorErr # print 'prior Err =', priorErr self.failUnless(priorErr < nopriorErr * .5) if False: figure(1) clf() plot(S, [prior.mu(x) for x in S], 'g-', alpha=0.3) plot(S, [GPnoprior.mu(x) for x in S], 'b-', alpha=0.3) plot(S, [GP.mu(x) for x in S], 'k-', lw=2) plot(X, Y, 'ko') show()
def testARDGaussianKernelHyperparameterLearning(self): hyper = array([2., 2., .1]) # test derivatives target0 = matrix( '[0 .0046 .0001 0; .0046 0 .0268 0; .0001 .0268 0 0; 0 0 0 0]') target1 = matrix( '[0 .0345 .0006 0; .0345 0 .2044 0; .0006 .2044 0 0; 0 0 0 0]') target2 = matrix( '[0 .4561 .54 .012; .4561 0 0 0; .54 .0 0 0; .012 0 0 0]') target3 = matrix( '[2 .2281 .27 .0017; .2281 2 1.7528 0; .27 1.7528 2 0; .0017 0 0 2]' ) pder0 = GaussianKernel_ard(hyper).derivative(self.X, 0) pder1 = GaussianKernel_ard(hyper).derivative(self.X, 1) pder2 = GaussianKernel_ard(hyper).derivative(self.X, 2) # pder3 = GaussianKernel_ard(hyper).derivative(self.X, 3) epsilon = .0001 for i, (target, pder) in enumerate([(target0, pder0), (target1, pder1), (target2, pder2)]): for j in xrange(4): for k in xrange(4): if abs(target[j, k] - pder[j, k]) > epsilon: print '\nelement [%d, %d] of pder%d differs from expected by > %f' % ( j, k, i, epsilon) print '\ntarget:' print target print '\npder:' print pder assert False # marginal likelihood and likelihood gradient gkernel = GaussianKernel_ard(hyper) margl, marglderiv = marginalLikelihood(gkernel, self.X, self.Y, len(hyper), useCholesky=True) self.assertAlmostEqual(margl, 5.8404, 2) for d, t in zip(marglderiv, [0.0039, 0.0302, -0.1733, -1.8089]): self.assertAlmostEqual(d, t, 2) # make sure we're getting the same results for inversion and cholesky imargl, imarglderiv = marginalLikelihood(gkernel, self.X, self.Y, len(hyper), useCholesky=False) self.assertAlmostEqual(margl, imargl, 2) for c, i in zip(marglderiv, imarglderiv): self.assertAlmostEqual(c, i, 2) # optimize the marginal likelihood over log hyperparameters using BFGS hyper = array([2., 2., .1, 1.]) argmin = optimize.fmin_bfgs( nlml, log(hyper), dnlml, args=[SVGaussianKernel_ard, self.X, self.Y], disp=False) for v, t in zip(argmin, [6.9714, 0.95405, -0.9769, 0.36469]): self.assertAlmostEqual(v, t, 2)