Example #1
0
    def _testKernelMaxEI(self):
        
        # test different methods of optimizing kernel
        S5 = Shekel5()
        
        hv = 0.1
        testkernels = [GaussianKernel_iso([hv]), 
                   GaussianKernel_ard([hv, hv, hv, hv]),
                   MaternKernel3([hv, 1.0])]
                   # MaternKernel5([hv, 1.0])]

        for kernel in testkernels:
            # print
            # print kernel.__class__
            
        
            # train GPs
            X = lhcSample(S5.bounds, 10, seed=0)
            Y = [S5.f(x) for x in X]
        
            GP = GaussianProcess(kernel, X, Y)
        
            eif = EI(GP)
            dopt, doptx = direct(eif.negf, S5.bounds, maxiter=10)
            copt, coptx = cdirect(eif.negf, S5.bounds, maxiter=10)
            mopt, moptx = maximizeEI(GP, S5.bounds, maxiter=10)
            # print dopt, doptx
            # print copt, coptx
            # print mopt, moptx
        
            self.failUnlessAlmostEqual(dopt, copt, 4)
            self.failUnlessAlmostEqual(-dopt, mopt, 4)
            self.failUnlessAlmostEqual(-copt, mopt, 4)
        
            self.failUnless(sum(abs(doptx-coptx)) < .01)
            self.failUnless(sum(abs(moptx-coptx)) < .01)
            self.failUnless(sum(abs(moptx-doptx)) < .01)
        
            # train GP w/prior
            pX = lhcSample(S5.bounds, 100, seed=101)
            pY = [S5.f(x) for x in pX]
            prior = RBFNMeanPrior()
            prior.train(pX, pY, bounds=S5.bounds, k=10, seed=102)
        
            GP = GaussianProcess(kernel, X, Y, prior=prior)        
        
            eif = EI(GP)
            pdopt, pdoptx = direct(eif.negf, S5.bounds, maxiter=10)
            pcopt, pcoptx = cdirect(eif.negf, S5.bounds, maxiter=10)
            pmopt, pmoptx = maximizeEI(GP, S5.bounds, maxiter=10)
        
            self.failIfAlmostEqual(pdopt, dopt, 3)
            self.failUnlessAlmostEqual(pdopt, pcopt, 4)
            self.failUnlessAlmostEqual(-pdopt, pmopt, 4)
            self.failUnlessAlmostEqual(-pcopt, pmopt, 4)
        
            self.failUnless(sum(abs(pdoptx-pcoptx)) < .01)
            self.failUnless(sum(abs(pmoptx-pcoptx)) < .01)
            self.failUnless(sum(abs(pmoptx-pdoptx)) < .01)
Example #2
0
    def testNoise(self):

        tf = Branin()

        X = lhcSample(tf.bounds, 10, seed=0)
        Y = [tf.f(x) for x in X]
        GP1 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=1e-4)
        self.failUnlessEqual(GP1.noise, 1e-4)

        eif1 = EI(GP1)
        dopt1, _ = direct(eif1.negf, tf.bounds, maxiter=10)
        copt1, _ = cdirect(eif1.negf, tf.bounds, maxiter=10)
        mopt1, _ = maximizeEI(GP1, tf.bounds, maxiter=10)

        self.failUnlessAlmostEqual(dopt1, copt1, 4)
        self.failUnlessAlmostEqual(-dopt1, mopt1, 4)
        self.failUnlessAlmostEqual(-copt1, mopt1, 4)

        GP2 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=0.01)
        self.failUnlessEqual(GP2.noise, 0.01)

        eif2 = EI(GP2)
        dopt2, _ = direct(eif2.negf, tf.bounds, maxiter=10)
        copt2, _ = cdirect(eif2.negf, tf.bounds, maxiter=10)
        mopt2, _ = maximizeEI(GP2, tf.bounds, maxiter=10)
        self.failUnlessAlmostEqual(dopt2, copt2, 4)
        self.failUnlessAlmostEqual(-dopt2, mopt2, 4)
        self.failUnlessAlmostEqual(-copt2, mopt2, 4)

        self.failIfAlmostEqual(dopt1, dopt2, 4)
        self.failIfAlmostEqual(copt1, copt2, 4)
        self.failIfAlmostEqual(mopt1, mopt2, 4)

        GP3 = GaussianProcess(MaternKernel3([1.0, 1.0]), X, Y, noise=0.1)
        self.failUnlessEqual(GP3.noise, 0.1)
        eif3 = EI(GP3)
        dopt3, _ = direct(eif3.negf, tf.bounds, maxiter=10)
        copt3, _ = cdirect(eif3.negf, tf.bounds, maxiter=10)
        mopt3, _ = maximizeEI(GP3, tf.bounds, maxiter=10)
        self.failUnlessAlmostEqual(dopt3, copt3, 4)
        self.failUnlessAlmostEqual(-dopt3, mopt3, 4)
        self.failUnlessAlmostEqual(-copt3, mopt3, 4)

        self.failIfAlmostEqual(dopt1, dopt3, 4)
        self.failIfAlmostEqual(copt1, copt3, 4)
        self.failIfAlmostEqual(mopt1, mopt3, 4)
        self.failIfAlmostEqual(dopt2, dopt3, 4)
        self.failIfAlmostEqual(copt2, copt3, 4)
        self.failIfAlmostEqual(mopt2, mopt3, 4)
Example #3
0
    def testXi(self):

        S5 = Shekel5()

        GP1 = GaussianProcess(GaussianKernel_iso([.2]))
        # self.failUnlessEqual(GP1.xi, 0.0)
        X = lhcSample(S5.bounds, 10, seed=0)
        Y = [S5.f(x) for x in X]
        GP1.addData(X, Y)

        eif1 = EI(GP1, xi=0.0)
        dopt1, _ = direct(eif1.negf, S5.bounds, maxiter=10)
        copt1, _ = cdirect(eif1.negf, S5.bounds, maxiter=10)
        mopt1, _ = maximizeEI(GP1, S5.bounds, xi=0.0, maxiter=10)

        self.failUnlessAlmostEqual(dopt1, copt1, 4)
        self.failUnlessAlmostEqual(-dopt1, mopt1, 4)
        self.failUnlessAlmostEqual(-copt1, mopt1, 4)

        GP2 = GaussianProcess(GaussianKernel_iso([.3]), X, Y)
        eif2 = EI(GP2, xi=0.01)
        self.failUnlessEqual(eif2.xi, 0.01)
        dopt2, _ = direct(eif2.negf, S5.bounds, maxiter=10)
        copt2, _ = cdirect(eif2.negf, S5.bounds, maxiter=10)
        mopt2, _ = maximizeEI(GP2, S5.bounds, xi=0.01, maxiter=10)
        self.failUnlessAlmostEqual(dopt2, copt2, 4)
        self.failUnlessAlmostEqual(-dopt2, mopt2, 4)
        self.failUnlessAlmostEqual(-copt2, mopt2, 4)

        self.failIfAlmostEqual(dopt1, dopt2, 4)
        self.failIfAlmostEqual(copt1, copt2, 4)
        self.failIfAlmostEqual(mopt1, mopt2, 4)

        GP3 = GaussianProcess(GaussianKernel_iso([.3]), X, Y)
        eif3 = EI(GP3, xi=0.1)
        dopt3, _ = direct(eif3.negf, S5.bounds, maxiter=10)
        copt3, _ = cdirect(eif3.negf, S5.bounds, maxiter=10)
        mopt3, _ = maximizeEI(GP3, S5.bounds, xi=0.1, maxiter=10)
        self.failUnlessAlmostEqual(dopt3, copt3, 4)
        self.failUnlessAlmostEqual(-dopt3, mopt3, 4)
        self.failUnlessAlmostEqual(-copt3, mopt3, 4)

        self.failIfAlmostEqual(dopt1, dopt3, 4)
        self.failIfAlmostEqual(copt1, copt3, 4)
        self.failIfAlmostEqual(mopt1, mopt3, 4)
        self.failIfAlmostEqual(dopt2, dopt3, 4)
        self.failIfAlmostEqual(copt2, copt3, 4)
        self.failIfAlmostEqual(mopt2, mopt3, 4)
Example #4
0
    def testMaxEIPrior(self):

        # make sure that the prior works with the different methods of EI
        # maximization
        
        S5 = Shekel5()
        pX = lhcSample(S5.bounds, 100, seed=511)
        pY = [S5.f(x) for x in pX]
        prior = RBFNMeanPrior()
        prior.train(pX, pY, bounds=S5.bounds, k=10, seed=504)
        
        hv = .1
        hyper = [hv, hv, hv, hv]
        kernel = GaussianKernel_ard(hyper)
        
        # train GPs
        X = lhcSample(S5.bounds, 10, seed=512)
        Y = [S5.f(x) for x in X]
        
        # validation
        valX = list(x.copy() for x in X)
        valY = copy(Y)
        
        GP = GaussianProcess(kernel, X, Y, prior=prior)
        
        eif = EI(GP)
        copt, _ = cdirect(eif.negf, S5.bounds, maxiter=20)
        mopt, _ = maximizeEI(GP, S5.bounds, maxiter=20)

        self.failUnlessAlmostEqual(-copt, mopt, 2)
        
        for i in xrange(len(GP.X)):
            self.failUnless(all(valX[i]==GP.X[i]))
            self.failUnless(valY[i]==GP.Y[i])
        
        GP.prior.mu(GP.X[0])
        self.failUnless(all(valX[0]==GP.X[0]))
        # print GP.X
        
        for i in xrange(len(GP.X)):
            self.failUnless(all(valX[i]==GP.X[i]))
            self.failUnless(valY[i]==GP.Y[i])
        
        GP.prior.mu(GP.X[0])
        self.failUnless(all(valX[0]==GP.X[0]))
Example #5
0
def demoObservations():
    """
    Simple demo for a scenario where we have direct observations (ie ratings
    or responses) with noise.  The model has three parameters, but after
    initial training, we fix one to be 1.0 and optimize the other two.  At
    each step, we visualize the posterior mean, variance and expected
    improvement.  We then find the point of maximum expected improvement and
    ask the user for the scalar response value.  
    
    To see how the model adapts to inputs, try rating the first few values 
    higher or lower than predicted and see what happens to the visualizations.
    """

    # the kernel parameters control the impact of different values on the
    # parameters.  we are defining a model with three parameters
    kernel = GaussianKernel_ard(array([.5, .5, .3]))

    # we want to allow some noise in the observations -- the noise parameter
    # is the variance of the additive Gaussian noise   Y + N(0, noise)
    noise = 0.1

    # create the Gaussian Process using the kernel we've just defined
    GP = GaussianProcess(kernel, noise=noise)

    # add some data to the model.  the data must have the same dimensionality
    # as the kernel
    X = [
        array([1, 1.5, 0.9]),
        array([.8, -.2, -0.1]),
        array([2, .8, -.2]),
        array([0, 0, .5])
    ]
    Y = [1, .7, .6, -.1]

    print 'adding data to model'
    for x, y in zip(X, Y):
        print '\tx = %s, y = %.1f' % (x, y)

    GP.addData(X, Y)

    # the GP.posterior(x) function returns, for x, the posterior distribution
    # at x, characterized as a normal distribution with mean mu, variance
    # sigma^2
    testX = [array([1, 1.45, 1.0]), array([-10, .5, -10])]

    for tx in testX:
        mu, sig2 = GP.posterior(tx)
        print 'the posterior of %s is a normal distribution N(%.3f, %.3f)' % (
            tx, mu, sig2)

    # now, let's find the best points to evaluate next.  we fix the first
    # dimension to be 1 and for the others, we search the range [-2, 2]
    bound = [[1, 1], [-1.99, 1.98], [-1.99, 1.98]]

    figure(1, figsize=(5, 10))
    while True:
        _, optx = maximizeEI(GP, bound, xi=.1)

        # visualize the mean, variance and expected improvement functions on
        # the free parameters
        x1 = arange(bound[1][0], bound[1][1], 0.1)
        x2 = arange(bound[2][0], bound[2][1], 0.1)
        X1, X2 = meshgrid(x1, x2)
        ei = zeros_like(X1)
        m = zeros_like(X1)
        v = zeros_like(X1)
        for i in xrange(X1.shape[0]):
            for j in xrange(X1.shape[1]):
                z = array([1.0, X1[i, j], X2[i, j]])
                ei[i, j] = -EI(GP).negf(z)
                m[i, j], v[i, j] = GP.posterior(z)

        clf()
        for i, (func, title) in enumerate(
            ([m, 'prediction (posterior mean)'
              ], [v, 'uncertainty (posterior variance)'],
             [ei, 'utility (expected improvement)'])):
            ax = subplot(3, 1, i + 1)
            cs = ax.contourf(X1, X2, func, 20)
            ax.plot(optx[1], optx[2], 'wo')
            colorbar(cs)
            ax.set_title(title)
            ax.set_xlabel('x[1]')
            ax.set_ylabel('x[2]')
            ax.set_xticks([-2, 0, 2])
            ax.set_yticks([-2, 0, 2])

        show()

        m, v = GP.posterior(optx)
        try:
            response = input(
                '\nmaximum expected improvement is at parameters x = [%.3f, %.3f, %.3f], where mean is %.3f, variance is %.3f.  \nwhat is the value there (non-numeric to quit)? '
                % (optx[0], optx[1], optx[2], m, v))
        except:
            break
        GP.addData(optx, response)
        print 'updating model.'
Example #6
0
def fastUCBGallery(GP, bounds, N, useBest=True, samples=300, useCDIRECT=True):
    """
    Use UCB to generate a gallery of N instances using Monte Carlo to 
    approximate the optimization of the utility function.
    """
    gallery = []

    if len(GP.X) > 0:
        if useBest:
            # find best sample already seen, that lies within the bounds
            bestY = -inf
            bestX = None
            for x, y in zip(GP.X, GP.Y):
                if y > bestY:
                    for v, b in zip(x, bounds):
                        if v < b[0] or v > b[1]:
                            break
                    else:
                        bestY = y
                        bestX = x
            if bestX is not None:
                gallery.append(bestX)
    
        # create a "fake" GP from the GP that was passed in (can't just copy 
        # b/c original could have been PrefGP)
        hallucGP = GaussianProcess(deepcopy(GP.kernel), deepcopy(GP.X), deepcopy(GP.Y), prior=GP.prior)
    elif GP.prior is None:            
        # if we have no data and no prior, start in the center
        x = array([(b[0]+b[1])/2. for b in bounds])
        gallery.append(x)
        hallucGP = GaussianProcess(deepcopy(GP.kernel), [x], [0.0], prior=GP.prior)
    else:
        # optimize from prior
        if DEBUG: print 'GET DATA FROM PRIOR'
        bestmu = -inf
        bestX = None
        for m in GP.prior.means:
            argmin = fmin_bfgs(GP.negmu, m, disp=False)
            if DEBUG: print argmin,
            for i in xrange(len(argmin)):
                argmin[i] = clip(argmin[i], bounds[i][0], bounds[i][1])
            # if DEBUG: print 'converted to', argmin
            if GP.mu(argmin) > bestmu:
                bestX = argmin
                bestmu = GP.mu(argmin)
                if DEBUG: print '***** bestmu =', bestmu
                if DEBUG: print '***** bestX =', bestX
        gallery.append(bestX)
        hallucGP = GaussianProcess(deepcopy(GP.kernel), bestX, bestmu, prior=GP.prior)
        
        
    while len(gallery) < N:
        if DEBUG: print '\n\n\thave %d data for gallery' % len(gallery)
        bestUCB = -inf
        bestX = None
        # ut = UCB(hallucGP, len(bounds), N)
        ut = EI(hallucGP, xi=.4)
        
        if DEBUG: print '\tget with max EI'
        opt, optx = maximizeEI(hallucGP, bounds, xi=.3, useCDIRECT=useCDIRECT)
        if len(gallery)==0 or min(norm(optx-gx) for gx in gallery) > .5:
            if DEBUG: print '\tgot one'
            bestUCB = opt
            bestX = optx
        else:
            if DEBUG: print '\ttoo close to existing'
        
        # try some random samples
        if DEBUG: print '\ttry random samples'
        for x in lhcSample(bounds, samples):
            u = -ut.negf(x)
            if u > bestUCB and min(norm(x-gx) for gx in gallery) > .5:
                '\they, this one is even better!'
                bestUCB = u
                bestX = x
        
        # now try the prior means
        if hallucGP.prior is not None:
            if DEBUG: print '\ttry prior means (bestUCB = %f)'%bestUCB
            for x in hallucGP.prior.means:
                x = array([clip(x[i], bounds[i][0], bounds[i][1]) for i in xrange(len(x))])
                x = x * hallucGP.prior.width + hallucGP.prior.lowerb
                u = -ut.negf(x)
                # if DEBUG: print 'u = %f', u
                if u > bestUCB:
                    if len(gallery)==0 or min(norm(x-gx) for gx in gallery) > .5:
                        if DEBUG: print '\tthis one is even better!  prior mean %s has u = %f' % (x, u)
                        bestUCB = u
                        bestX = x
                    
        gallery.append(bestX)
        
        hallucGP.addData(bestX, hallucGP.mu(bestX))
        
    return gallery
Example #7
0
def fastUCBGallery(GP, bounds, N, useBest=True, samples=300, useCDIRECT=True):
    """
    Use UCB to generate a gallery of N instances using Monte Carlo to 
    approximate the optimization of the utility function.
    """
    gallery = []

    if len(GP.X) > 0:
        if useBest:
            # find best sample already seen, that lies within the bounds
            bestY = -inf
            bestX = None
            for x, y in zip(GP.X, GP.Y):
                if y > bestY:
                    for v, b in zip(x, bounds):
                        if v < b[0] or v > b[1]:
                            break
                    else:
                        bestY = y
                        bestX = x
            if bestX is not None:
                gallery.append(bestX)

        # create a "fake" GP from the GP that was passed in (can't just copy
        # b/c original could have been PrefGP)
        hallucGP = GaussianProcess(deepcopy(GP.kernel),
                                   deepcopy(GP.X),
                                   deepcopy(GP.Y),
                                   prior=GP.prior)
    elif GP.prior is None:
        # if we have no data and no prior, start in the center
        x = array([(b[0] + b[1]) / 2. for b in bounds])
        gallery.append(x)
        hallucGP = GaussianProcess(deepcopy(GP.kernel), [x], [0.0],
                                   prior=GP.prior)
    else:
        # optimize from prior
        bestmu = -inf
        bestX = None
        for m in GP.prior.means:
            argmin = fmin_bfgs(GP.negmu, m, disp=False)
            if GP.mu(argmin) > bestmu:
                bestX = argmin
                bestmu = GP.mu(argmin)
        gallery.append(bestX)
        hallucGP = GaussianProcess(deepcopy(GP.kernel),
                                   bestX,
                                   bestmu,
                                   prior=GP.prior)

    while len(gallery) < N:
        bestUCB = -inf
        bestX = None
        # ut = UCB(hallucGP, len(bounds), N)
        ut = EI(hallucGP, xi=.4)

        opt, optx = maximizeEI(hallucGP, bounds, xi=.3, useCDIRECT=useCDIRECT)
        if len(gallery) == 0 or min(norm(optx - gx) for gx in gallery) > .5:
            bestUCB = opt
            bestX = optx

        # try some random samples
        for x in lhcSample(bounds, samples):
            u = -ut.negf(x)
            if u > bestUCB and min(norm(x - gx) for gx in gallery) > .5:
                '\they, this one is even better!'
                bestUCB = u
                bestX = x

        # now try the prior means
        if hallucGP.prior is not None:
            for x in hallucGP.prior.means:
                x = array([
                    clip(x[i], bounds[i][0], bounds[i][1])
                    for i in xrange(len(x))
                ])
                x = x * hallucGP.prior.width + hallucGP.prior.lowerb
                u = -ut.negf(x)
                if u > bestUCB:
                    if len(gallery) == 0 or min(
                            norm(x - gx) for gx in gallery) > .5:
                        bestUCB = u
                        bestX = x

        gallery.append(bestX)

        hallucGP.addData(bestX, hallucGP.mu(bestX))

    return gallery