Exemple #1
0
 def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
     if not isinstance(likfunc, lik.likGauss):
         raise Exception ('Exact inference only possible with Gaussian likelihood')
     n, D = x.shape
     K = covfunc.proceed(x)                                 # evaluate covariance matrix
     m = meanfunc.proceed(x)                                # evaluate mean vector
     sn2   = np.exp(2.*likfunc.hyp[0])                      # noise variance of likGauss
     L     = np.linalg.cholesky(K/sn2+np.eye(n)).T          # Cholesky factor of covariance with noise
     alpha = solve_chol(L,y-m)/sn2
     post = postStruct()
     post.alpha = alpha                                     # return the posterior parameters
     post.sW    = np.ones((n,1))/np.sqrt(sn2)               # sqrt of noise precision vector
     post.L     = L                                         # L = chol(eye(n)+sW*sW'.*K)
     if nargout>1:                                                # do we want the marginal likelihood?
         nlZ = np.dot((y-m).T,alpha)/2. + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik
         if nargout>2:                                            # do we want derivatives?
             dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)        # allocate space for derivatives
             Q = solve_chol(L,np.eye(n))/sn2 - np.dot(alpha,alpha.T) # precompute for convenience
             dnlZ.lik = [sn2*np.trace(Q)]
             if covfunc.hyp:
                 for ii in range(len(covfunc.hyp)):
                     dnlZ.cov[ii] = (Q*covfunc.proceed(x, None, ii)).sum()/2.
             if meanfunc.hyp:
                 for ii in range(len(meanfunc.hyp)): 
                     dnlZ.mean[ii] = np.dot(-meanfunc.proceed(x, ii).T,alpha)
             return [post, nlZ[0][0], dnlZ]
         return [post, nlZ[0][0]]
     return [post]
Exemple #2
0
    def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        if not isinstance(likfunc, lik.Gauss):                  # NOTE: no explicit call to likGauss
            raise Exception ('Exact inference only possible with Gaussian likelihood')
        if not isinstance(covfunc, cov.FITCOfKernel):
            raise Exception('Only covFITC supported.')          # check cov

        diagK,Kuu,Ku = covfunc.getCovMatrix(x=x, mode='train')  # evaluate covariance matrix
        m  = meanfunc.getMean(x)                                # evaluate mean vector
        n, D = x.shape
        nu = Kuu.shape[0]

        sn2   = np.exp(2*likfunc.hyp[0])                         # noise variance of likGauss
        snu2  = 1.e-6*sn2                                        # hard coded inducing inputs noise
        Luu   = np.linalg.cholesky(Kuu+snu2*np.eye(nu)).T        # Kuu + snu2*I = Luu'*Luu
        V     = np.linalg.solve(Luu.T,Ku)                        # V = inv(Luu')*Ku => V'*V = Q
        g_sn2 = diagK + sn2 - np.array([(V*V).sum(axis=0)]).T    # g + sn2 = diag(K) + sn2 - diag(Q)
        Lu    = np.linalg.cholesky(np.eye(nu) + np.dot(V/np.tile(g_sn2.T,(nu,1)),V.T)).T  # Lu'*Lu=I+V*diag(1/g_sn2)*V'
        r     = (y-m)/np.sqrt(g_sn2)
        be    = np.linalg.solve(Lu.T,np.dot(V,r/np.sqrt(g_sn2)))
        iKuu  = solve_chol(Luu,np.eye(nu))                       # inv(Kuu + snu2*I) = iKuu

        post = postStruct()
        post.alpha = np.linalg.solve(Luu,np.linalg.solve(Lu,be)) # return the posterior parameters
        post.L  = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu   # Sigma-inv(Kuu)
        post.sW = np.ones((n,1))/np.sqrt(sn2)                    # unused for FITC prediction  with gp.m

        if nargout>1:                                            # do we want the marginal likelihood
            nlZ = np.log(np.diag(Lu)).sum() + (np.log(g_sn2).sum() + n*np.log(2*np.pi) + np.dot(r.T,r) - np.dot(be.T,be))/2.
            if nargout>2:                                        # do we want derivatives?
                dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)    # allocate space for derivatives
                al = r/np.sqrt(g_sn2) - np.dot(V.T,np.linalg.solve(Lu,be))/g_sn2 # al = (Kt+sn2*eye(n))\y
                B = np.dot(iKuu,Ku)
                w = np.dot(B,al)
                W = np.linalg.solve(Lu.T,V/np.tile(g_sn2.T,(nu,1)))
                for ii in range(len(covfunc.hyp)):
                    [ddiagKi,dKuui,dKui] = covfunc.getDerMatrix(x=x, mode='train', der=ii)    # eval cov deriv
                    R = 2.*dKui-np.dot(dKuui,B)
                    v = ddiagKi - np.array([(R*B).sum(axis=0)]).T          # diag part of cov deriv
                    dnlZ.cov[ii] = ( np.dot(ddiagKi.T,1./g_sn2) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al))) \
                                   - np.dot(al.T,(v*al)) - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2.
                    dnlZ.cov[ii] = dnlZ.cov[ii][0,0]
                dnlZ.lik = sn2*((1./g_sn2).sum() - (np.array([(W*W).sum(axis=0)])).sum() - np.dot(al.T,al))
                dKuui = 2*snu2
                R = -dKuui*B
                v = -np.array([(R*B).sum(axis=0)]).T     # diag part of cov deriv
                dnlZ.lik += (np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \
                                 - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2.
                dnlZ.lik = list(dnlZ.lik[0])
                for ii in range(len(meanfunc.hyp)):
                    dnlZ.mean[ii] = np.dot(-meanfunc.getDerMatrix(x, ii).T, al)
                    dnlZ.mean[ii] = dnlZ.mean[ii][0,0]

                return post, nlZ[0,0], dnlZ
            return post, nlZ[0,0]
        return post
Exemple #3
0
    def evaluate(self, meanfunc, covfunc, likfunc, x, y, scaleprior = None, nargout=1):
        if not isinstance(likfunc, lik.Gauss):
            raise Exception ('Exact inference only possible with Gaussian likelihood')
        n, D = x.shape
        K = covfunc.getCovMatrix(x=x, mode='train')            # evaluate covariance matrix
        m = meanfunc.getMean(x)                                # evaluate mean vector

        sn2   = np.exp(2*likfunc.hyp[0])                       # noise variance of likGauss
        L     = np.linalg.cholesky(K/sn2+np.eye(n)).T          # Cholesky factor of covariance with noise
        alpha = solve_chol(L,y-m)/sn2
        post       = postStruct()
        post.alpha = alpha                                     # return the posterior parameters
        post.sW    = np.ones((n,1))/np.sqrt(sn2)               # sqrt of noise precision vector
        post.L     = L                                         # L = chol(eye(n)+sW*sW'.*K)

        if nargout>1:                                          # do we want the marginal likelihood?
            if scaleprior:
                alpha0, beta0 = scaleprior
                df = 2*alpha0
                Z = -scspec.gammaln(0.5*(df+n)) + scspec.gammaln(0.5*df) + 0.5*n*np.log(2*np.pi*beta0)
                dscale = np.log(1.0+np.dot((y-m).T,alpha)/(2.0*beta0))
                nlZ = 0.5*(df + n)*dscale + np.log(np.diag(L)).sum() + Z
            else:
                nlZ = np.dot((y-m).T,alpha)/2. + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik
            if nargout>2:                                      # do we want derivatives?
                dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)  # allocate space for derivatives
                if scaleprior:
                    corrFactor = (df+n)/(2*beta0 + np.dot((y-m).T,alpha))
                    Q = solve_chol(L,np.eye(n))/sn2 - corrFactor*np.dot(alpha,alpha.T) # precompute for convenience
                    dscale += scspec.digamma(alpha0) - scspec.digamma(alpha0 + 0.5*n)
                else:
	                Q = solve_chol(L,np.eye(n))/sn2 - np.dot(alpha,alpha.T) # precompute for convenience
                dnlZ.lik = [sn2*np.trace(Q)]
                if covfunc.hyp:
                    for ii in range(len(covfunc.hyp)):
                        dnlZ.cov[ii] = (Q*covfunc.getDerMatrix(x=x, mode='train', der=ii)).sum()/2.
                if meanfunc.hyp:
                    for ii in range(len(meanfunc.hyp)):
                        dnlZ.mean[ii] = np.dot(-meanfunc.getDerMatrix(x, ii).T,alpha)
                        dnlZ.mean[ii] = dnlZ.mean[ii][0,0]
                if scaleprior:
					return post, nlZ[0,0], dnlZ, dscale
                else:
	                return post, nlZ[0,0], dnlZ
            return post, nlZ[0,0]
        return post
Exemple #4
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        if not isinstance(likfunc, lik.Gauss):
            raise Exception(
                'Exact inference only possible with Gaussian likelihood')
        n, D = x.shape
        K = covfunc.proceed(x)  # evaluate covariance matrix
        m = meanfunc.proceed(x)  # evaluate mean vector

        sn2 = np.exp(2 * likfunc.hyp[0])  # noise variance of likGauss
        L = np.linalg.cholesky(
            K / sn2 + np.eye(n)).T  # Cholesky factor of covariance with noise
        alpha = solve_chol(L, y - m) / sn2
        post = postStruct()
        post.alpha = alpha  # return the posterior parameters
        post.sW = np.ones(
            (n, 1)) / np.sqrt(sn2)  # sqrt of noise precision vector
        post.L = L  # L = chol(eye(n)+sW*sW'.*K)

        if nargout > 1:  # do we want the marginal likelihood?
            nlZ = np.dot(
                (y - m).T, alpha) / 2. + np.log(np.diag(L)).sum() + n * np.log(
                    2 * np.pi * sn2) / 2.  # -log marg lik
            if nargout > 2:  # do we want derivatives?
                dnlZ = dnlZStruct(meanfunc, covfunc,
                                  likfunc)  # allocate space for derivatives
                Q = solve_chol(L, np.eye(n)) / sn2 - np.dot(
                    alpha, alpha.T)  # precompute for convenience
                dnlZ.lik = [sn2 * np.trace(Q)]
                if covfunc.hyp:
                    for ii in range(len(covfunc.hyp)):
                        dnlZ.cov[ii] = (
                            Q * covfunc.proceed(x, None, ii)).sum() / 2.
                if meanfunc.hyp:
                    for ii in range(len(meanfunc.hyp)):
                        dnlZ.mean[ii] = np.dot(-meanfunc.proceed(x, ii).T,
                                               alpha)
                        dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]
                return post, nlZ[0, 0], dnlZ
            return post, nlZ[0, 0]
        return post
Exemple #5
0
 def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
     tol = 1e-4; max_sweep = 10; min_sweep = 2 # tolerance to stop EP iterations
     n = x.shape[0]
     inffunc = self
     K = covfunc.proceed(x)       # evaluate the covariance matrix
     m = meanfunc.proceed(x)      # evaluate the mean vector
     nlZ0 = -likfunc.proceed(y, m, np.reshape(np.diag(K),(np.diag(K).shape[0],1)), inffunc).sum()
     if self.last_ttau == None:                  # find starting point for tilde parameters
         ttau  = np.zeros((n,1))             # initialize to zero if we have no better guess
         tnu   = np.zeros((n,1))
         Sigma = K                           # initialize Sigma and mu, the parameters of ..
         mu    = np.zeros((n,1))             # .. the Gaussian posterior approximation
         nlZ   = nlZ0
     else:
         ttau = self.last_ttau              # try the tilde values from previous call
         tnu  = self.last_tnu
         [Sigma, mu, nlZ, L] = self.epComputeParams(K, y, ttau, tnu, likfunc, m, inffunc)
         if nlZ > nlZ0:                                # if zero is better ..
             ttau = np.zeros((n,1))                    # .. then initialize with zero instead
             tnu  = np.zeros((n,1)) 
             Sigma = K                              # initialize Sigma and mu, the parameters of ..
             mu = np.zeros((n,1))                   # .. the Gaussian posterior approximation
             nlZ = nlZ0
     nlZ_old = np.inf; sweep = 0               # converged, max. sweeps or min. sweeps?
     while (np.abs(nlZ-nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep):
         nlZ_old = nlZ; sweep += 1
         rperm = range(n)                  #randperm(n)
         for ii in rperm:       # iterate EP updates (in random order) over examples
             tau_ni = 1/Sigma[ii,ii] - ttau[ii]      #  first find the cavity distribution ..
             nu_ni  = mu[ii]/Sigma[ii,ii] + m[ii]*tau_ni - tnu[ii]    # .. params tau_ni and nu_ni
             # compute the desired derivatives of the indivdual log partition function
             vargout = likfunc.proceed(y[ii], nu_ni/tau_ni, 1/tau_ni, inffunc, None, 3)
             lZ = vargout[0]; dlZ = vargout[1]; d2lZ = vargout[2] 
             ttau_old = copy(ttau[ii])   # then find the new tilde parameters, keep copy of old
             ttau[ii] = -d2lZ  /(1.+d2lZ/tau_ni)
             ttau[ii] = max(ttau[ii],0) # enforce positivity i.e. lower bound ttau by zero
             tnu[ii]  = ( dlZ + (m[ii]-nu_ni/tau_ni)*d2lZ )/(1.+d2lZ/tau_ni)
             ds2 = ttau[ii] - ttau_old                   # finally rank-1 update Sigma ..
             si  = np.reshape(Sigma[:,ii],(Sigma.shape[0],1))
             Sigma = Sigma - ds2/(1.+ds2*si[ii])*np.dot(si,si.T)   # takes 70# of total time
             mu = np.dot(Sigma,tnu)                                # .. and recompute mu
         # recompute since repeated rank-one updates can destroy numerical precision
         [Sigma, mu, nlZ, L] = self.epComputeParams(K, y, ttau, tnu, likfunc, m, inffunc)
     if sweep == max_sweep:
         raise Exception('maximum number of sweeps reached in function infEP')
     self.last_ttau = ttau; self.last_tnu = tnu      # remember for next call
     sW = np.sqrt(ttau); alpha = tnu-sW*solve_chol(L,sW*np.dot(K,tnu))
     post = postStruct()
     post.alpha = alpha                                # return the posterior params
     post.sW    = sW
     post.L     = L
     if nargout>2:                                           # do we want derivatives?
         dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)       # allocate space for derivatives
         ssi  = np.sqrt(ttau)
         V = np.linalg.solve(L.T,np.tile(ssi,(1,n))*K)
         Sigma = K - np.dot(V.T,V)
         mu = np.dot(Sigma,tnu)
         Dsigma = np.reshape(np.diag(Sigma),(np.diag(Sigma).shape[0],1))
         tau_n = 1/Dsigma-ttau                    # compute the log marginal likelihood
         nu_n  = mu/Dsigma-tnu                    # vectors of cavity parameters
         F = np.dot(alpha,alpha.T) - np.tile(sW,(1,n))* \
             solve_chol(L,np.diag(np.reshape(sW,(sW.shape[0],))))   # covariance hypers
         for jj in range(len(covfunc.hyp)):
             dK = covfunc.proceed(x, None, jj)
             dnlZ.cov[jj] = -(F*dK).sum()/2.
         for ii in range(len(likfunc.hyp)):
             dlik = likfunc.proceed(y, nu_n/tau_n, 1/tau_n, inffunc, ii)
             dnlZ.lik[ii] = -dlik.sum()
         [junk,dlZ] = likfunc.proceed(y, nu_n/tau_n, 1/tau_n, inffunc, None, 2) # mean hyps
         for ii in range(len(meanfunc.hyp)):
             dm = meanfunc.proceed(x, ii)
             dnlZ.mean[ii] = -np.dot(dlZ.T,dm)
             dnlZ.mean[ii] = dnlZ.mean[ii][0][0]
         vargout = [post, nlZ[0], dnlZ]
     else:
         vargout = [post, nlZ[0]]
     return vargout
Exemple #6
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        tol = 1e-6                           # tolerance for when to stop the Newton iterations
        smax = 2; Nline = 20; thr = 1e-4     # line search parameters
        maxit = 20                           # max number of Newton steps in f
        inffunc = self
        K = covfunc.proceed(x)       # evaluate the covariance matrix
        m = meanfunc.proceed(x)      # evaluate the mean vector
        n, D = x.shape
        Psi_old = np.inf    # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:          # find a good starting point for alpha and f
            alpha = np.zeros((n,1))
            f = np.dot(K,alpha) + m       # start at mean if sizes not match 
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W= -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = np.dot(K,alpha) + m                      # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W= -d2lp
            Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha
            vargout = - likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def =  vargout[0]                         # objective for default init f==m
            if Psi_def < Psi_new:                         # if default is better, we use it
                alpha = np.zeros((n,1))
                f = np.dot(K,alpha) + m 
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
                W=-d2lp; Psi_new = -lp.sum()
        isWneg = np.any(W<0)       # flag indicating whether we found negative values of W
        it = 0                     # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it<maxit:          # begin Newton
            Psi_old = Psi_new; it += 1
            if isWneg:       # stabilise the Newton direction in case W has negative values
                W = np.maximum(W,0)      # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-10             # increase accuracy to also get the derivatives right
            sW = np.sqrt(W); L = np.linalg.cholesky(np.eye(n) + np.dot(sW,sW.T)*K).T
            b = W*(f-m) + dlp; 
            dalpha = b - sW*solve_chol(L,sW*np.dot(K,b)) - alpha
            vargout = brentmin(0,smax,Nline,thr,self.Psi_line,4,dalpha,alpha,K,m,likfunc,y,inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W<0)
        self.last_alpha = alpha                                     # remember for next call
        vargout = likfunc.proceed(y,f,None,inffunc,None,4) 
        lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3] 
        W = -d2lp; isWneg = np.any(W<0)
        post = postStruct()
        post.alpha = alpha    # return the posterior parameters
        post.sW = np.sqrt(np.abs(W))*np.sign(W)             # preserve sign in case of negative
        if isWneg:
            [ldA,iA,post.L] = self.logdetA(K,W,3)
            nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() + ldA/2.
            nlZ = nlZ[0] 
        else:
            sW = post.sW
            post.L = np.linalg.cholesky(np.eye(n)+np.dot(sW,sW.T)*K).T 
            nlZ = np.dot(alpha.T,(f-m))/2. + (np.log(np.diag(post.L))-np.reshape(lp,(lp.shape[0],))).sum()
            nlZ = nlZ[0]
        if nargout>2:                                           # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)       # allocate space for derivatives
            if isWneg:                  # switch between Cholesky and LU decomposition mode
                Z = -post.L                                                 # inv(K+inv(W))
                g = np.atleast_2d((iA*K).sum(axis=1)).T /2       # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
            else:
                Z = np.tile(sW,(1,n))*solve_chol(post.L,np.diag(np.reshape(sW,(sW.shape[0],)))) #sW*inv(B)*sW=inv(K+inv(W))
                C = np.linalg.solve(post.L.T,np.tile(sW,(1,n))*K)              # deriv. of ln|B| wrt W
                g = np.atleast_2d((np.diag(K)-(C**2).sum(axis=0).T)).T /2.      # g = diag(inv(inv(K)+W))/2
            dfhat = g* d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):                                  # covariance hypers
                dK = covfunc.proceed(x, None, ii)
                dnlZ.cov[ii] = (Z*dK).sum()/2. - np.dot(alpha.T,np.dot(dK,alpha))/2.    # explicit part
                b = np.dot(dK,dlp)                            # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.cov[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b)))            # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0][0]
            for ii in range(len(likfunc.hyp)):                  # likelihood hypers
                [lp_dhyp,dlp_dhyp,d2lp_dhyp] = likfunc.proceed(y,f,None,inffunc,ii,3)
                dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum()      # explicit part
                b = np.dot(K,dlp_dhyp)                        # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.lik[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b)))   # implicit part
                
                dnlZ.lik[ii] = dnlZ.lik[ii][0][0]
            for ii in range(len(meanfunc.hyp)):                  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T,dm)                # explicit part
                dnlZ.mean[ii] -= np.dot(dfhat.T,dm-np.dot(K,np.dot(Z,dm))) # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0][0]
            vargout = [post,nlZ[0],dnlZ]
        else:
            vargout = [post, nlZ[0]]
        return vargout
Exemple #7
0
 def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
     tol = 1e-4
     max_sweep = 10
     min_sweep = 2  # tolerance to stop EP iterations
     n = x.shape[0]
     inffunc = self
     K = covfunc.proceed(x)  # evaluate the covariance matrix
     m = meanfunc.proceed(x)  # evaluate the mean vector
     nlZ0 = -likfunc.proceed(
         y, m, np.reshape(np.diag(K),
                          (np.diag(K).shape[0], 1)), inffunc).sum()
     if self.last_ttau == None:  # find starting point for tilde parameters
         ttau = np.zeros(
             (n, 1))  # initialize to zero if we have no better guess
         tnu = np.zeros((n, 1))
         Sigma = K  # initialize Sigma and mu, the parameters of ..
         mu = np.zeros((n, 1))  # .. the Gaussian posterior approximation
         nlZ = nlZ0
     else:
         ttau = self.last_ttau  # try the tilde values from previous call
         tnu = self.last_tnu
         Sigma, mu, nlZ, L = self.epComputeParams(K, y, ttau, tnu, likfunc,
                                                  m, inffunc)
         if nlZ > nlZ0:  # if zero is better ..
             ttau = np.zeros((n, 1))  # .. then initialize with zero instead
             tnu = np.zeros((n, 1))
             Sigma = K  # initialize Sigma and mu, the parameters of ..
             mu = np.zeros(
                 (n, 1))  # .. the Gaussian posterior approximation
             nlZ = nlZ0
     nlZ_old = np.inf
     sweep = 0  # converged, max. sweeps or min. sweeps?
     while (np.abs(nlZ - nlZ_old) > tol
            and sweep < max_sweep) or (sweep < min_sweep):
         nlZ_old = nlZ
         sweep += 1
         rperm = xrange(n)  # randperm(n)
         for ii in rperm:  # iterate EP updates (in random order) over examples
             tau_ni = 1 / Sigma[ii, ii] - ttau[
                 ii]  #  first find the cavity distribution ..
             nu_ni = mu[ii] / Sigma[ii, ii] + m[ii] * tau_ni - tnu[
                 ii]  # .. params tau_ni and nu_ni
             # compute the desired derivatives of the indivdual log partition function
             lZ, dlZ, d2lZ = likfunc.proceed(y[ii], nu_ni / tau_ni,
                                             1 / tau_ni, inffunc, None, 3)
             ttau_old = copy(
                 ttau[ii]
             )  # then find the new tilde parameters, keep copy of old
             ttau[ii] = -d2lZ / (1. + d2lZ / tau_ni)
             ttau[ii] = max(
                 ttau[ii],
                 0)  # enforce positivity i.e. lower bound ttau by zero
             tnu[ii] = (dlZ +
                        (m[ii] - nu_ni / tau_ni) * d2lZ) / (1. +
                                                            d2lZ / tau_ni)
             ds2 = ttau[ii] - ttau_old  # finally rank-1 update Sigma ..
             si = np.reshape(Sigma[:, ii], (Sigma.shape[0], 1))
             Sigma = Sigma - ds2 / (1. + ds2 * si[ii]) * np.dot(
                 si, si.T)  # takes 70# of total time
             mu = np.dot(Sigma, tnu)  # .. and recompute mu
         # recompute since repeated rank-one updates can destroy numerical precision
         Sigma, mu, nlZ, L = self.epComputeParams(K, y, ttau, tnu, likfunc,
                                                  m, inffunc)
     if sweep == max_sweep:
         print 'maximum number of sweeps reached in function infEP'
     self.last_ttau = ttau
     self.last_tnu = tnu  # remember for next call
     sW = np.sqrt(ttau)
     alpha = tnu - sW * solve_chol(L, sW * np.dot(K, tnu))
     post = postStruct()
     post.alpha = alpha  # return the posterior params
     post.sW = sW
     post.L = L
     if nargout > 2:  # do we want derivatives?
         dnlZ = dnlZStruct(meanfunc, covfunc,
                           likfunc)  # allocate space for derivatives
         ssi = np.sqrt(ttau)
         V = np.linalg.solve(L.T, np.tile(ssi, (1, n)) * K)
         Sigma = K - np.dot(V.T, V)
         mu = np.dot(Sigma, tnu)
         Dsigma = np.reshape(np.diag(Sigma), (np.diag(Sigma).shape[0], 1))
         tau_n = 1 / Dsigma - ttau  # compute the log marginal likelihood
         nu_n = mu / Dsigma - tnu  # vectors of cavity parameters
         F = np.dot(alpha,alpha.T) - np.tile(sW,(1,n))* \
             solve_chol(L,np.diag(np.reshape(sW,(sW.shape[0],))))   # covariance hypers
         for jj in range(len(covfunc.hyp)):
             dK = covfunc.proceed(x, None, jj)
             dnlZ.cov[jj] = -(F * dK).sum() / 2.
         for ii in range(len(likfunc.hyp)):
             dlik = likfunc.proceed(y, nu_n / tau_n, 1 / tau_n, inffunc, ii)
             dnlZ.lik[ii] = -dlik.sum()
         junk, dlZ = likfunc.proceed(y, nu_n / tau_n, 1 / tau_n, inffunc,
                                     None, 2)  # mean hyps
         for ii in range(len(meanfunc.hyp)):
             dm = meanfunc.proceed(x, ii)
             dnlZ.mean[ii] = -np.dot(dlZ.T, dm)
             dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]
         return post, nlZ[0], dnlZ
     else:
         return post, nlZ[0]
Exemple #8
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        tol = 1e-6  # tolerance for when to stop the Newton iterations
        smax = 2
        Nline = 20
        thr = 1e-4  # line search parameters
        maxit = 20  # max number of Newton steps in f
        inffunc = self
        K = covfunc.proceed(x)  # evaluate the covariance matrix
        m = meanfunc.proceed(x)  # evaluate the mean vector
        n, D = x.shape
        Psi_old = np.inf  # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:  # find a good starting point for alpha and f
            alpha = np.zeros((n, 1))
            f = np.dot(K, alpha) + m  # start at mean if sizes not match
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = np.dot(K, alpha) + m  # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = np.dot(
                alpha.T, (f - m)) / 2. - lp.sum()  # objective for last alpha
            vargout = -likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def = vargout[0]  # objective for default init f==m
            if Psi_def < Psi_new:  # if default is better, we use it
                alpha = np.zeros((n, 1))
                f = np.dot(K, alpha) + m
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]
                dlp = vargout[1]
                d2lp = vargout[2]
                W = -d2lp
                Psi_new = -lp.sum()
        isWneg = np.any(
            W < 0)  # flag indicating whether we found negative values of W
        it = 0  # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
            Psi_old = Psi_new
            it += 1
            if isWneg:  # stabilise the Newton direction in case W has negative values
                W = np.maximum(
                    W, 0
                )  # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-10  # increase accuracy to also get the derivatives right
            sW = np.sqrt(W)
            L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
            b = W * (f - m) + dlp
            dalpha = b - sW * solve_chol(L, sW * np.dot(K, b)) - alpha
            vargout = brentmin(0, smax, Nline, thr, self.Psi_line, 4, dalpha,
                               alpha, K, m, likfunc, y, inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W < 0)
        self.last_alpha = alpha  # remember for next call
        vargout = likfunc.proceed(y, f, None, inffunc, None, 4)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        d3lp = vargout[3]
        W = -d2lp
        isWneg = np.any(W < 0)
        post = postStruct()
        post.alpha = alpha  # return the posterior parameters
        post.sW = np.sqrt(np.abs(W)) * np.sign(
            W)  # preserve sign in case of negative
        if isWneg:
            [ldA, iA, post.L] = self.logdetA(K, W, 3)
            nlZ = np.dot(alpha.T, (f - m)) / 2. - lp.sum() + ldA / 2.
            nlZ = nlZ[0]
        else:
            sW = post.sW
            post.L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
            nlZ = np.dot(alpha.T,
                         (f - m)) / 2. + (np.log(np.diag(post.L)) -
                                          np.reshape(lp,
                                                     (lp.shape[0], ))).sum()
            nlZ = nlZ[0]
        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc,
                              likfunc)  # allocate space for derivatives
            if isWneg:  # switch between Cholesky and LU decomposition mode
                Z = -post.L  # inv(K+inv(W))
                g = np.atleast_2d(
                    (iA * K).sum(axis=1)
                ).T / 2  # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
            else:
                Z = np.tile(sW, (1, n)) * solve_chol(
                    post.L, np.diag(np.reshape(
                        sW, (sW.shape[0], ))))  #sW*inv(B)*sW=inv(K+inv(W))
                C = np.linalg.solve(post.L.T,
                                    np.tile(sW, (1, n)) *
                                    K)  # deriv. of ln|B| wrt W
                g = np.atleast_2d(
                    (np.diag(K) -
                     (C**2).sum(axis=0).T)).T / 2.  # g = diag(inv(inv(K)+W))/2
            dfhat = g * d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):  # covariance hypers
                dK = covfunc.proceed(x, None, ii)
                dnlZ.cov[ii] = (Z * dK).sum() / 2. - np.dot(
                    alpha.T, np.dot(dK, alpha)) / 2.  # explicit part
                b = np.dot(dK, dlp)  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.cov[ii] -= np.dot(
                    dfhat.T, b - np.dot(K, np.dot(Z, b)))  # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0, 0]
            for ii in range(len(likfunc.hyp)):  # likelihood hypers
                [lp_dhyp, dlp_dhyp,
                 d2lp_dhyp] = likfunc.proceed(y, f, None, inffunc, ii, 3)
                dnlZ.lik[ii] = -np.dot(
                    g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
                b = np.dot(K, dlp_dhyp)  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.lik[ii] -= np.dot(
                    dfhat.T, b - np.dot(K, np.dot(Z, b)))  # implicit part
                dnlZ.lik[ii] = dnlZ.lik[ii][0, 0]
            for ii in range(len(meanfunc.hyp)):  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
                dnlZ.mean[ii] -= np.dot(
                    dfhat.T, dm - np.dot(K, np.dot(Z, dm)))  # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]
            return post, nlZ[0], dnlZ
        else:
            return post, nlZ[0]
Exemple #9
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        if not isinstance(likfunc,
                          lik.Gauss):  # NOTE: no explicit call to likGauss
            raise Exception(
                'Exact inference only possible with Gaussian likelihood')
        if not isinstance(covfunc, cov.FITCOfKernel):
            raise Exception('Only covFITC supported.')  # check cov

        diagK, Kuu, Ku = covfunc.proceed(x)  # evaluate covariance matrix
        m = meanfunc.proceed(x)  # evaluate mean vector
        n, D = x.shape
        nu = Kuu.shape[0]

        sn2 = np.exp(2 * likfunc.hyp[0])  # noise variance of likGauss
        snu2 = 1.e-6 * sn2  # hard coded inducing inputs noise
        Luu = np.linalg.cholesky(Kuu + snu2 *
                                 np.eye(nu)).T  # Kuu + snu2*I = Luu'*Luu
        V = np.linalg.solve(Luu.T, Ku)  # V = inv(Luu')*Ku => V'*V = Q
        g_sn2 = diagK + sn2 - np.array(
            [(V * V).sum(axis=0)]).T  # g + sn2 = diag(K) + sn2 - diag(Q)
        Lu = np.linalg.cholesky(
            np.eye(nu) +
            np.dot(V / np.tile(g_sn2.T,
                               (nu, 1)), V.T)).T  # Lu'*Lu=I+V*diag(1/g_sn2)*V'
        r = (y - m) / np.sqrt(g_sn2)
        be = np.linalg.solve(Lu.T, np.dot(V, r / np.sqrt(g_sn2)))
        iKuu = solve_chol(Luu, np.eye(nu))  # inv(Kuu + snu2*I) = iKuu

        post = postStruct()
        post.alpha = np.linalg.solve(Luu, np.linalg.solve(
            Lu, be))  # return the posterior parameters
        post.L = solve_chol(np.dot(Lu, Luu),
                            np.eye(nu)) - iKuu  # Sigma-inv(Kuu)
        post.sW = np.ones(
            (n, 1)) / np.sqrt(sn2)  # unused for FITC prediction  with gp.m

        if nargout > 1:  # do we want the marginal likelihood
            nlZ = np.log(np.diag(
                Lu)).sum() + (np.log(g_sn2).sum() + n * np.log(2 * np.pi) +
                              np.dot(r.T, r) - np.dot(be.T, be)) / 2.
            if nargout > 2:  # do we want derivatives?
                dnlZ = dnlZStruct(meanfunc, covfunc,
                                  likfunc)  # allocate space for derivatives
                al = r / np.sqrt(g_sn2) - np.dot(V.T, np.linalg.solve(
                    Lu, be)) / g_sn2  # al = (Kt+sn2*eye(n))\y
                B = np.dot(iKuu, Ku)
                w = np.dot(B, al)
                W = np.linalg.solve(Lu.T, V / np.tile(g_sn2.T, (nu, 1)))
                for ii in range(len(covfunc.hyp)):
                    [ddiagKi, dKuui,
                     dKui] = covfunc.proceed(x, None, ii)  # eval cov deriv
                    R = 2. * dKui - np.dot(dKuui, B)
                    v = ddiagKi - np.array([(R * B).sum(axis=0)
                                            ]).T  # diag part of cov deriv
                    dnlZ.cov[ii] = ( np.dot(ddiagKi.T,1./g_sn2) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al))) \
                                   - np.dot(al.T,(v*al)) - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2.
                    dnlZ.cov[ii] = dnlZ.cov[ii][0, 0]
                dnlZ.lik = sn2 * (
                    (1. / g_sn2).sum() -
                    (np.array([(W * W).sum(axis=0)])).sum() - np.dot(al.T, al))
                dKuui = 2 * snu2
                R = -dKuui * B
                v = -np.array([(R * B).sum(axis=0)
                               ]).T  # diag part of cov deriv
                dnlZ.lik += (np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \
                                 - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2.
                dnlZ.lik = list(dnlZ.lik[0])
                for ii in range(len(meanfunc.hyp)):
                    dnlZ.mean[ii] = np.dot(-meanfunc.proceed(x, ii).T, al)
                    dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]

                return post, nlZ[0, 0], dnlZ
            return post, nlZ[0, 0]
        return post