Esempio n. 1
0
    def testGDelta(self):

        # usually, Gdelta==G
        GP = GaussianProcess(GaussianKernel_iso([0.05]))
        X = lhcSample([[0., 1.]], 5, seed=10)
        Y = [x**2 for x in X]
        GP.train(X, Y)

        G = (Y[0] - max(Y)) / (Y[0] - 1)
        self.failUnlessEqual(G, Gdelta(GP, [[0., 1.]], Y[0], 1.0, 0.01))

        # sometimes, though, Gdelta > G -- this GP has a very high confidence
        # prediction of a very good point at x ~ .65
        GP = GaussianProcess(GaussianKernel_iso([0.1]))
        X = array([[.5], [.51], [.59], [.6]])
        Y = array([1., 2., 2., 1.])
        GP.train(X, Y)
        # figure(1)
        # A = arange(0, 1, 0.01)
        # post = [GP.posterior(x) for x in A]
        # plot(A, [p[0] for p in post], 'k-')
        # plot(A, [p[0]+p[1] for p in post], 'k:')
        # show()
        G = (Y[0] - max(Y)) / (Y[0] - 4.0)
        Gd = Gdelta(GP, [[0., 1.]], Y[0], 4.0, 0.01)
        self.failUnless(G < Gd)

        # however, if there is more variance, we will collapse back to G
        GP = GaussianProcess(GaussianKernel_iso([.001]))
        GP.train(X, Y)
        G = (Y[0] - max(Y)) / (Y[0] - 4.0)
        self.failUnlessEqual(G, Gdelta(GP, [[0., 1.]], Y[0], 4.0, 0.01))
Esempio n. 2
0
    def testXi(self):

        S5 = Shekel5()

        GP1 = GaussianProcess(GaussianKernel_iso([.2]))
        # self.failUnlessEqual(GP1.xi, 0.0)
        X = lhcSample(S5.bounds, 10, seed=0)
        Y = [S5.f(x) for x in X]
        GP1.addData(X, Y)

        eif1 = EI(GP1, xi=0.0)
        dopt1, _ = direct(eif1.negf, S5.bounds, maxiter=10)
        copt1, _ = cdirect(eif1.negf, S5.bounds, maxiter=10)
        mopt1, _ = maximizeEI(GP1, S5.bounds, xi=0.0, maxiter=10)

        self.failUnlessAlmostEqual(dopt1, copt1, 4)
        self.failUnlessAlmostEqual(-dopt1, mopt1, 4)
        self.failUnlessAlmostEqual(-copt1, mopt1, 4)

        GP2 = GaussianProcess(GaussianKernel_iso([.3]), X, Y)
        eif2 = EI(GP2, xi=0.01)
        self.failUnlessEqual(eif2.xi, 0.01)
        dopt2, _ = direct(eif2.negf, S5.bounds, maxiter=10)
        copt2, _ = cdirect(eif2.negf, S5.bounds, maxiter=10)
        mopt2, _ = maximizeEI(GP2, S5.bounds, xi=0.01, maxiter=10)
        self.failUnlessAlmostEqual(dopt2, copt2, 4)
        self.failUnlessAlmostEqual(-dopt2, mopt2, 4)
        self.failUnlessAlmostEqual(-copt2, mopt2, 4)

        self.failIfAlmostEqual(dopt1, dopt2, 4)
        self.failIfAlmostEqual(copt1, copt2, 4)
        self.failIfAlmostEqual(mopt1, mopt2, 4)

        GP3 = GaussianProcess(GaussianKernel_iso([.3]), X, Y)
        eif3 = EI(GP3, xi=0.1)
        dopt3, _ = direct(eif3.negf, S5.bounds, maxiter=10)
        copt3, _ = cdirect(eif3.negf, S5.bounds, maxiter=10)
        mopt3, _ = maximizeEI(GP3, S5.bounds, xi=0.1, maxiter=10)
        self.failUnlessAlmostEqual(dopt3, copt3, 4)
        self.failUnlessAlmostEqual(-dopt3, mopt3, 4)
        self.failUnlessAlmostEqual(-copt3, mopt3, 4)

        self.failIfAlmostEqual(dopt1, dopt3, 4)
        self.failIfAlmostEqual(copt1, copt3, 4)
        self.failIfAlmostEqual(mopt1, mopt3, 4)
        self.failIfAlmostEqual(dopt2, dopt3, 4)
        self.failIfAlmostEqual(copt2, copt3, 4)
        self.failIfAlmostEqual(mopt2, mopt3, 4)
Esempio n. 3
0
    def test2DcEI(self):

        f = lambda x: sum(sin(x))
        bounds = [[0., 5.], [0., 5.]]
        X = lhcSample(bounds, 5, seed=23)
        Y = [f(x) for x in X]

        kernel = GaussianKernel_iso(array([1.0]))
        GP = GaussianProcess(kernel, X, Y)

        maxei = maximizeEI(GP, bounds)

        if False:
            figure(1)
            c0 = [(i / 100.) * (bounds[0][1] - bounds[0][0]) + bounds[0][0]
                  for i in range(101)]
            c1 = [(i / 100.) * (bounds[1][1] - bounds[1][0]) + bounds[1][0]
                  for i in range(101)]
            z = array([[GP.ei(array([i, j])) for i in c0] for j in c1])

            ax = plt.subplot(111)
            ax.contour(c0, c1, z, 10, alpha=0.5, cmap=cm.Blues_r)
            plot([x[0] for x in X], [x[1] for x in X], 'ro')
            for i in range(len(X)):
                annotate('%2f' % Y[i], X[i])
            plot(maxei[1][0], maxei[1][1], 'ko')
            show()
Esempio n. 4
0
def test():
    GP = GaussianProcess(GaussianKernel_iso([.2, 1.0]))
    X = array([[.2], [.3], [.5], [1.5]])
    Y = [1, 0, 1, .75]
    GP.addData(X, Y)

    figure(1)
    A = arange(0, 2, 0.01)
    mu = array([GP.mu(x) for x in A])
    sig2 = array([GP.posterior(x)[1] for x in A])

    Ei = EI(GP)
    ei = [-Ei.negf(x) for x in A]

    Pi = PI(GP)
    pi = [-Pi.negf(x) for x in A]

    Ucb = UCB(GP, 1, T=2)
    ucb = [-Ucb.negf(x) for x in A]

    ax = subplot(1, 1, 1)
    ax.plot(A, mu, 'k-', lw=2)
    xv, yv = poly_between(A, mu - sig2, mu + sig2)
    ax.fill(xv, yv, color="#CCCCCC")

    ax.plot(A, ei, 'g-', lw=2, label='EI')
    ax.plot(A, ucb, 'g--', lw=2, label='UCB')
    ax.plot(A, pi, 'g:', lw=2, label='PI')
    ax.plot(X, Y, 'ro')
    ax.legend()
    draw()
    show()
Esempio n. 5
0
    def testXi(self):

        S5 = Shekel5()

        GP1 = GaussianProcess(GaussianKernel_iso([.2]))
        # self.failUnlessEqual(GP1.xi, 0.0)
        X = lhcSample(S5.bounds, 10, seed=0)
        Y = [S5.f(x) for x in X]
        GP1.addData(X, Y)

        ucbf1 = UCB(GP1, len(S5.bounds), scale=0.5)
        dopt1, _ = direct(ucbf1.negf, S5.bounds, maxiter=10)
        copt1, _ = cdirect(ucbf1.negf, S5.bounds, maxiter=10)
        mopt1, _ = maximizeUCB(GP1, S5.bounds, scale=0.5, maxiter=10)

        self.failUnlessAlmostEqual(dopt1, copt1, 4)
        self.failUnlessAlmostEqual(-dopt1, mopt1, 4)
        self.failUnlessAlmostEqual(-copt1, mopt1, 4)

        GP2 = GaussianProcess(GaussianKernel_iso([.3]), X, Y)
        ucbf2 = UCB(GP2, len(S5.bounds), scale=0.01)
        dopt2, _ = direct(ucbf2.negf, S5.bounds, maxiter=10)
        copt2, _ = cdirect(ucbf2.negf, S5.bounds, maxiter=10)
        mopt2, _ = maximizeUCB(GP2, S5.bounds, scale=.01, maxiter=10)
        self.failUnlessAlmostEqual(dopt2, copt2, 4)
        self.failUnlessAlmostEqual(-dopt2, mopt2, 4)
        self.failUnlessAlmostEqual(-copt2, mopt2, 4)

        self.failIfAlmostEqual(dopt1, dopt2, 4)
        self.failIfAlmostEqual(copt1, copt2, 4)
        self.failIfAlmostEqual(mopt1, mopt2, 4)

        GP3 = GaussianProcess(GaussianKernel_iso([.3]), X, Y)
        ucbf3 = UCB(GP3, len(S5.bounds), scale=.9)
        dopt3, _ = direct(ucbf3.negf, S5.bounds, maxiter=10)
        copt3, _ = cdirect(ucbf3.negf, S5.bounds, maxiter=10)
        mopt3, _ = maximizeUCB(GP3, S5.bounds, scale=0.9, maxiter=10)
        self.failUnlessAlmostEqual(dopt3, copt3, 4)
        self.failUnlessAlmostEqual(-dopt3, mopt3, 4)
        self.failUnlessAlmostEqual(-copt3, mopt3, 4)

        self.failIfAlmostEqual(dopt1, dopt3, 4)
        self.failIfAlmostEqual(copt1, copt3, 4)
        self.failIfAlmostEqual(mopt1, mopt3, 4)
        self.failIfAlmostEqual(dopt2, dopt3, 4)
        self.failIfAlmostEqual(copt2, copt3, 4)
        self.failIfAlmostEqual(mopt2, mopt3, 4)
Esempio n. 6
0
    def _testKernelMaxEI(self):
        
        # test different methods of optimizing kernel
        S5 = Shekel5()
        
        hv = 0.1
        testkernels = [GaussianKernel_iso([hv]), 
                   GaussianKernel_ard([hv, hv, hv, hv]),
                   MaternKernel3([hv, 1.0])]
                   # MaternKernel5([hv, 1.0])]

        for kernel in testkernels:
            # print
            # print kernel.__class__
            
        
            # train GPs
            X = lhcSample(S5.bounds, 10, seed=0)
            Y = [S5.f(x) for x in X]
        
            GP = GaussianProcess(kernel, X, Y)
        
            eif = EI(GP)
            dopt, doptx = direct(eif.negf, S5.bounds, maxiter=10)
            copt, coptx = cdirect(eif.negf, S5.bounds, maxiter=10)
            mopt, moptx = maximizeEI(GP, S5.bounds, maxiter=10)
            # print dopt, doptx
            # print copt, coptx
            # print mopt, moptx
        
            self.failUnlessAlmostEqual(dopt, copt, 4)
            self.failUnlessAlmostEqual(-dopt, mopt, 4)
            self.failUnlessAlmostEqual(-copt, mopt, 4)
        
            self.failUnless(sum(abs(doptx-coptx)) < .01)
            self.failUnless(sum(abs(moptx-coptx)) < .01)
            self.failUnless(sum(abs(moptx-doptx)) < .01)
        
            # train GP w/prior
            pX = lhcSample(S5.bounds, 100, seed=101)
            pY = [S5.f(x) for x in pX]
            prior = RBFNMeanPrior()
            prior.train(pX, pY, bounds=S5.bounds, k=10, seed=102)
        
            GP = GaussianProcess(kernel, X, Y, prior=prior)        
        
            eif = EI(GP)
            pdopt, pdoptx = direct(eif.negf, S5.bounds, maxiter=10)
            pcopt, pcoptx = cdirect(eif.negf, S5.bounds, maxiter=10)
            pmopt, pmoptx = maximizeEI(GP, S5.bounds, maxiter=10)
        
            self.failIfAlmostEqual(pdopt, dopt, 3)
            self.failUnlessAlmostEqual(pdopt, pcopt, 4)
            self.failUnlessAlmostEqual(-pdopt, pmopt, 4)
            self.failUnlessAlmostEqual(-pcopt, pmopt, 4)
        
            self.failUnless(sum(abs(pdoptx-pcoptx)) < .01)
            self.failUnless(sum(abs(pmoptx-pcoptx)) < .01)
            self.failUnless(sum(abs(pmoptx-pdoptx)) < .01)
Esempio n. 7
0
    def train(self, X, Y, bounds=None, k=10, delta=100, kernel=None, seed=None):
        """
        using the X, Y data, train a Radial Basis Function network as follows:
    
            1.  cluster the data into k clusters using k-means
            2.  using the centroids of each cluster as the origins of the RBFs,
                learn the RBF weights beta
    
        'RBF' is a radial basis function of the form y = rbf(r, args=rbfargs), where
        r = ||c-x|| and 'args' is any necessary arguments for the RBF.
    
        Returns the means and weights of the RNF network.
        """
        def RBFN(means, x):
            norms = [linalg.norm(m-x) for m in means]
            rbf = array([self.RBF(n) for n in norms])
            return rbf
    
        rs = numpy.random.RandomState(seed)
        
        # let's cluster the data using k means.  first, project X to unit hypercube
        if bounds is not None:
            self.lowerb = array([b[0] for b in bounds])
            self.width = array([b[1]-b[0] for b in bounds])
        X = array([(x-self.lowerb)/self.width for x in X])
        
        Y = array(Y)
    
        NX, _ = X.shape
    
        r = range(NX)
        rs.shuffle(r)
        means = X[r[:k]]
    
        for _ in xrange(10):
            # assign each cluster to closest mean
            C = [argmin([linalg.norm(m-x) for m in means]) for x in X]
        
            # means become centroids of their clusters
            means = []
            for j in xrange(k):
                clust = [x for x, c in zip(X, C) if c==j]
                if len(clust)==0:
                    # for empty clusters, restart centered on a random datum
                    means.append(X[rs.randint(NX)])
                else:
                    means.append(array(clust).mean(0))
    
        # okay, now we can analytically compute RBF weights
        if kernel is None:
            kernel = GaussianKernel_iso(array([.2]))
        
        reg = .1
        # L = cholesky(kernel.covMatrix(X))
        while True:
            try:
                invK = linalg.inv(matrix(kernel.covMatrix(X)) + eye(NX)*reg) # add regularizer
            except LinAlgError:
                reg *= 2
                print 'LinAlgError: increase regularizer to %f' % reg
            else:
                break
                
    
        # compute unweighted basis values for the data
        H = matrix([RBFN(means, x) for x in X])
        Y = matrix(Y)
    
        try:
            beta = linalg.inv(H.T*invK*H + delta**-2) * (H.T*invK*Y.T)
        except linalg.linalg.LinAlgError:
            # add a powerful regularizer...
            beta = linalg.inv(H.T*invK*H + eye(H.shape[1]) + delta**-2) * (H.T*invK*Y.T)

        ## this is from Ruben's paper.  I can't get it to make sense...
        # a = b = 10.
        # sighat = diag((b + Y*K.I*Y.T - (H.T*K.I*H + delta**-2) * (muhat*muhat.T)) / (NX+a+2))

        # beta should be a 1D array
        self.beta = array(beta).reshape(-1)
        self.means = means
        
Esempio n. 8
0
    def train(self,
              X,
              Y,
              bounds=None,
              k=10,
              delta=100,
              kernel=None,
              seed=None):
        """
        using the X, Y data, train a Radial Basis Function network as follows:
    
            1.  cluster the data into k clusters using k-means
            2.  using the centroids of each cluster as the origins of the RBFs,
                learn the RBF weights beta
    
        'RBF' is a radial basis function of the form y = rbf(r, args=rbfargs), where
        r = ||c-x|| and 'args' is any necessary arguments for the RBF.
    
        Returns the means and weights of the RNF network.
        """
        def RBFN(means, x):
            norms = [linalg.norm(m - x) for m in means]
            rbf = array([self.RBF(n) for n in norms])
            return rbf

        rs = numpy.random.RandomState(seed)

        # let's cluster the data using k means.  first, project X to unit hypercube
        if bounds is not None:
            self.lowerb = array([b[0] for b in bounds])
            self.width = array([b[1] - b[0] for b in bounds])
        X = array([(x - self.lowerb) / self.width for x in X])

        Y = array(Y)

        NX, _ = X.shape

        r = range(NX)
        rs.shuffle(r)
        means = X[r[:k]]

        for _ in xrange(10):
            # assign each cluster to closest mean
            C = [argmin([linalg.norm(m - x) for m in means]) for x in X]

            # means become centroids of their clusters
            means = []
            for j in xrange(k):
                clust = [x for x, c in zip(X, C) if c == j]
                if len(clust) == 0:
                    # for empty clusters, restart centered on a random datum
                    means.append(X[rs.randint(NX)])
                else:
                    means.append(array(clust).mean(0))

        # okay, now we can analytically compute RBF weights
        if kernel is None:
            kernel = GaussianKernel_iso(array([.2]))

        reg = .1
        # L = cholesky(kernel.covMatrix(X))
        while True:
            try:
                invK = linalg.inv(matrix(kernel.covMatrix(X)) +
                                  eye(NX) * reg)  # add regularizer
            except LinAlgError:
                reg *= 2
                print 'LinAlgError: increase regularizer to %f' % reg
            else:
                break

        # compute unweighted basis values for the data
        H = matrix([RBFN(means, x) for x in X])
        Y = matrix(Y)

        try:
            beta = linalg.inv(H.T * invK * H + delta**-2) * (H.T * invK * Y.T)
        except linalg.linalg.LinAlgError:
            # add a powerful regularizer...
            beta = linalg.inv(H.T * invK * H + eye(H.shape[1]) +
                              delta**-2) * (H.T * invK * Y.T)

        ## this is from Ruben's paper.  I can't get it to make sense...
        # a = b = 10.
        # sighat = diag((b + Y*K.I*Y.T - (H.T*K.I*H + delta**-2) * (muhat*muhat.T)) / (NX+a+2))

        # beta should be a 1D array
        self.beta = array(beta).reshape(-1)
        self.means = means