def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if not isinstance(likfunc, lik.likGauss): raise Exception ('Exact inference only possible with Gaussian likelihood') n, D = x.shape K = covfunc.proceed(x) # evaluate covariance matrix m = meanfunc.proceed(x) # evaluate mean vector sn2 = np.exp(2.*likfunc.hyp[0]) # noise variance of likGauss L = np.linalg.cholesky(K/sn2+np.eye(n)).T # Cholesky factor of covariance with noise alpha = solve_chol(L,y-m)/sn2 post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.ones((n,1))/np.sqrt(sn2) # sqrt of noise precision vector post.L = L # L = chol(eye(n)+sW*sW'.*K) if nargout>1: # do we want the marginal likelihood? nlZ = np.dot((y-m).T,alpha)/2. + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives Q = solve_chol(L,np.eye(n))/sn2 - np.dot(alpha,alpha.T) # precompute for convenience dnlZ.lik = [sn2*np.trace(Q)] if covfunc.hyp: for ii in range(len(covfunc.hyp)): dnlZ.cov[ii] = (Q*covfunc.proceed(x, None, ii)).sum()/2. if meanfunc.hyp: for ii in range(len(meanfunc.hyp)): dnlZ.mean[ii] = np.dot(-meanfunc.proceed(x, ii).T,alpha) return [post, nlZ[0][0], dnlZ] return [post, nlZ[0][0]] return [post]
def evaluate(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if not isinstance(likfunc, lik.Gauss): # NOTE: no explicit call to likGauss raise Exception ('Exact inference only possible with Gaussian likelihood') if not isinstance(covfunc, cov.FITCOfKernel): raise Exception('Only covFITC supported.') # check cov diagK,Kuu,Ku = covfunc.getCovMatrix(x=x, mode='train') # evaluate covariance matrix m = meanfunc.getMean(x) # evaluate mean vector n, D = x.shape nu = Kuu.shape[0] sn2 = np.exp(2*likfunc.hyp[0]) # noise variance of likGauss snu2 = 1.e-6*sn2 # hard coded inducing inputs noise Luu = np.linalg.cholesky(Kuu+snu2*np.eye(nu)).T # Kuu + snu2*I = Luu'*Luu V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q g_sn2 = diagK + sn2 - np.array([(V*V).sum(axis=0)]).T # g + sn2 = diag(K) + sn2 - diag(Q) Lu = np.linalg.cholesky(np.eye(nu) + np.dot(V/np.tile(g_sn2.T,(nu,1)),V.T)).T # Lu'*Lu=I+V*diag(1/g_sn2)*V' r = (y-m)/np.sqrt(g_sn2) be = np.linalg.solve(Lu.T,np.dot(V,r/np.sqrt(g_sn2))) iKuu = solve_chol(Luu,np.eye(nu)) # inv(Kuu + snu2*I) = iKuu post = postStruct() post.alpha = np.linalg.solve(Luu,np.linalg.solve(Lu,be)) # return the posterior parameters post.L = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu # Sigma-inv(Kuu) post.sW = np.ones((n,1))/np.sqrt(sn2) # unused for FITC prediction with gp.m if nargout>1: # do we want the marginal likelihood nlZ = np.log(np.diag(Lu)).sum() + (np.log(g_sn2).sum() + n*np.log(2*np.pi) + np.dot(r.T,r) - np.dot(be.T,be))/2. if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives al = r/np.sqrt(g_sn2) - np.dot(V.T,np.linalg.solve(Lu,be))/g_sn2 # al = (Kt+sn2*eye(n))\y B = np.dot(iKuu,Ku) w = np.dot(B,al) W = np.linalg.solve(Lu.T,V/np.tile(g_sn2.T,(nu,1))) for ii in range(len(covfunc.hyp)): [ddiagKi,dKuui,dKui] = covfunc.getDerMatrix(x=x, mode='train', der=ii) # eval cov deriv R = 2.*dKui-np.dot(dKuui,B) v = ddiagKi - np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv dnlZ.cov[ii] = ( np.dot(ddiagKi.T,1./g_sn2) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al))) \ - np.dot(al.T,(v*al)) - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. dnlZ.cov[ii] = dnlZ.cov[ii][0,0] dnlZ.lik = sn2*((1./g_sn2).sum() - (np.array([(W*W).sum(axis=0)])).sum() - np.dot(al.T,al)) dKuui = 2*snu2 R = -dKuui*B v = -np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv dnlZ.lik += (np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \ - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. dnlZ.lik = list(dnlZ.lik[0]) for ii in range(len(meanfunc.hyp)): dnlZ.mean[ii] = np.dot(-meanfunc.getDerMatrix(x, ii).T, al) dnlZ.mean[ii] = dnlZ.mean[ii][0,0] return post, nlZ[0,0], dnlZ return post, nlZ[0,0] return post
def evaluate(self, meanfunc, covfunc, likfunc, x, y, scaleprior = None, nargout=1): if not isinstance(likfunc, lik.Gauss): raise Exception ('Exact inference only possible with Gaussian likelihood') n, D = x.shape K = covfunc.getCovMatrix(x=x, mode='train') # evaluate covariance matrix m = meanfunc.getMean(x) # evaluate mean vector sn2 = np.exp(2*likfunc.hyp[0]) # noise variance of likGauss L = np.linalg.cholesky(K/sn2+np.eye(n)).T # Cholesky factor of covariance with noise alpha = solve_chol(L,y-m)/sn2 post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.ones((n,1))/np.sqrt(sn2) # sqrt of noise precision vector post.L = L # L = chol(eye(n)+sW*sW'.*K) if nargout>1: # do we want the marginal likelihood? if scaleprior: alpha0, beta0 = scaleprior df = 2*alpha0 Z = -scspec.gammaln(0.5*(df+n)) + scspec.gammaln(0.5*df) + 0.5*n*np.log(2*np.pi*beta0) dscale = np.log(1.0+np.dot((y-m).T,alpha)/(2.0*beta0)) nlZ = 0.5*(df + n)*dscale + np.log(np.diag(L)).sum() + Z else: nlZ = np.dot((y-m).T,alpha)/2. + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives if scaleprior: corrFactor = (df+n)/(2*beta0 + np.dot((y-m).T,alpha)) Q = solve_chol(L,np.eye(n))/sn2 - corrFactor*np.dot(alpha,alpha.T) # precompute for convenience dscale += scspec.digamma(alpha0) - scspec.digamma(alpha0 + 0.5*n) else: Q = solve_chol(L,np.eye(n))/sn2 - np.dot(alpha,alpha.T) # precompute for convenience dnlZ.lik = [sn2*np.trace(Q)] if covfunc.hyp: for ii in range(len(covfunc.hyp)): dnlZ.cov[ii] = (Q*covfunc.getDerMatrix(x=x, mode='train', der=ii)).sum()/2. if meanfunc.hyp: for ii in range(len(meanfunc.hyp)): dnlZ.mean[ii] = np.dot(-meanfunc.getDerMatrix(x, ii).T,alpha) dnlZ.mean[ii] = dnlZ.mean[ii][0,0] if scaleprior: return post, nlZ[0,0], dnlZ, dscale else: return post, nlZ[0,0], dnlZ return post, nlZ[0,0] return post
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if not isinstance(likfunc, lik.Gauss): raise Exception( 'Exact inference only possible with Gaussian likelihood') n, D = x.shape K = covfunc.proceed(x) # evaluate covariance matrix m = meanfunc.proceed(x) # evaluate mean vector sn2 = np.exp(2 * likfunc.hyp[0]) # noise variance of likGauss L = np.linalg.cholesky( K / sn2 + np.eye(n)).T # Cholesky factor of covariance with noise alpha = solve_chol(L, y - m) / sn2 post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.ones( (n, 1)) / np.sqrt(sn2) # sqrt of noise precision vector post.L = L # L = chol(eye(n)+sW*sW'.*K) if nargout > 1: # do we want the marginal likelihood? nlZ = np.dot( (y - m).T, alpha) / 2. + np.log(np.diag(L)).sum() + n * np.log( 2 * np.pi * sn2) / 2. # -log marg lik if nargout > 2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives Q = solve_chol(L, np.eye(n)) / sn2 - np.dot( alpha, alpha.T) # precompute for convenience dnlZ.lik = [sn2 * np.trace(Q)] if covfunc.hyp: for ii in range(len(covfunc.hyp)): dnlZ.cov[ii] = ( Q * covfunc.proceed(x, None, ii)).sum() / 2. if meanfunc.hyp: for ii in range(len(meanfunc.hyp)): dnlZ.mean[ii] = np.dot(-meanfunc.proceed(x, ii).T, alpha) dnlZ.mean[ii] = dnlZ.mean[ii][0, 0] return post, nlZ[0, 0], dnlZ return post, nlZ[0, 0] return post
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): tol = 1e-4; max_sweep = 10; min_sweep = 2 # tolerance to stop EP iterations n = x.shape[0] inffunc = self K = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector nlZ0 = -likfunc.proceed(y, m, np.reshape(np.diag(K),(np.diag(K).shape[0],1)), inffunc).sum() if self.last_ttau == None: # find starting point for tilde parameters ttau = np.zeros((n,1)) # initialize to zero if we have no better guess tnu = np.zeros((n,1)) Sigma = K # initialize Sigma and mu, the parameters of .. mu = np.zeros((n,1)) # .. the Gaussian posterior approximation nlZ = nlZ0 else: ttau = self.last_ttau # try the tilde values from previous call tnu = self.last_tnu [Sigma, mu, nlZ, L] = self.epComputeParams(K, y, ttau, tnu, likfunc, m, inffunc) if nlZ > nlZ0: # if zero is better .. ttau = np.zeros((n,1)) # .. then initialize with zero instead tnu = np.zeros((n,1)) Sigma = K # initialize Sigma and mu, the parameters of .. mu = np.zeros((n,1)) # .. the Gaussian posterior approximation nlZ = nlZ0 nlZ_old = np.inf; sweep = 0 # converged, max. sweeps or min. sweeps? while (np.abs(nlZ-nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep): nlZ_old = nlZ; sweep += 1 rperm = range(n) #randperm(n) for ii in rperm: # iterate EP updates (in random order) over examples tau_ni = 1/Sigma[ii,ii] - ttau[ii] # first find the cavity distribution .. nu_ni = mu[ii]/Sigma[ii,ii] + m[ii]*tau_ni - tnu[ii] # .. params tau_ni and nu_ni # compute the desired derivatives of the indivdual log partition function vargout = likfunc.proceed(y[ii], nu_ni/tau_ni, 1/tau_ni, inffunc, None, 3) lZ = vargout[0]; dlZ = vargout[1]; d2lZ = vargout[2] ttau_old = copy(ttau[ii]) # then find the new tilde parameters, keep copy of old ttau[ii] = -d2lZ /(1.+d2lZ/tau_ni) ttau[ii] = max(ttau[ii],0) # enforce positivity i.e. lower bound ttau by zero tnu[ii] = ( dlZ + (m[ii]-nu_ni/tau_ni)*d2lZ )/(1.+d2lZ/tau_ni) ds2 = ttau[ii] - ttau_old # finally rank-1 update Sigma .. si = np.reshape(Sigma[:,ii],(Sigma.shape[0],1)) Sigma = Sigma - ds2/(1.+ds2*si[ii])*np.dot(si,si.T) # takes 70# of total time mu = np.dot(Sigma,tnu) # .. and recompute mu # recompute since repeated rank-one updates can destroy numerical precision [Sigma, mu, nlZ, L] = self.epComputeParams(K, y, ttau, tnu, likfunc, m, inffunc) if sweep == max_sweep: raise Exception('maximum number of sweeps reached in function infEP') self.last_ttau = ttau; self.last_tnu = tnu # remember for next call sW = np.sqrt(ttau); alpha = tnu-sW*solve_chol(L,sW*np.dot(K,tnu)) post = postStruct() post.alpha = alpha # return the posterior params post.sW = sW post.L = L if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives ssi = np.sqrt(ttau) V = np.linalg.solve(L.T,np.tile(ssi,(1,n))*K) Sigma = K - np.dot(V.T,V) mu = np.dot(Sigma,tnu) Dsigma = np.reshape(np.diag(Sigma),(np.diag(Sigma).shape[0],1)) tau_n = 1/Dsigma-ttau # compute the log marginal likelihood nu_n = mu/Dsigma-tnu # vectors of cavity parameters F = np.dot(alpha,alpha.T) - np.tile(sW,(1,n))* \ solve_chol(L,np.diag(np.reshape(sW,(sW.shape[0],)))) # covariance hypers for jj in range(len(covfunc.hyp)): dK = covfunc.proceed(x, None, jj) dnlZ.cov[jj] = -(F*dK).sum()/2. for ii in range(len(likfunc.hyp)): dlik = likfunc.proceed(y, nu_n/tau_n, 1/tau_n, inffunc, ii) dnlZ.lik[ii] = -dlik.sum() [junk,dlZ] = likfunc.proceed(y, nu_n/tau_n, 1/tau_n, inffunc, None, 2) # mean hyps for ii in range(len(meanfunc.hyp)): dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(dlZ.T,dm) dnlZ.mean[ii] = dnlZ.mean[ii][0][0] vargout = [post, nlZ[0], dnlZ] else: vargout = [post, nlZ[0]] return vargout
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): tol = 1e-6 # tolerance for when to stop the Newton iterations smax = 2; Nline = 20; thr = 1e-4 # line search parameters maxit = 20 # max number of Newton steps in f inffunc = self K = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector n, D = x.shape Psi_old = np.inf # make sure while loop starts by the largest old objective val if self.last_alpha == None: # find a good starting point for alpha and f alpha = np.zeros((n,1)) f = np.dot(K,alpha) + m # start at mean if sizes not match vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W= -d2lp Psi_new = -lp.sum() else: alpha = self.last_alpha f = np.dot(K,alpha) + m # try last one vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W= -d2lp Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha vargout = - likfunc.proceed(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it alpha = np.zeros((n,1)) f = np.dot(K,alpha) + m vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W=-d2lp; Psi_new = -lp.sum() isWneg = np.any(W<0) # flag indicating whether we found negative values of W it = 0 # this happens for the Student's t likelihood while (Psi_old - Psi_new > tol) and it<maxit: # begin Newton Psi_old = Psi_new; it += 1 if isWneg: # stabilise the Newton direction in case W has negative values W = np.maximum(W,0) # stabilise the Hessian to guarantee postive definiteness tol = 1e-10 # increase accuracy to also get the derivatives right sW = np.sqrt(W); L = np.linalg.cholesky(np.eye(n) + np.dot(sW,sW.T)*K).T b = W*(f-m) + dlp; dalpha = b - sW*solve_chol(L,sW*np.dot(K,b)) - alpha vargout = brentmin(0,smax,Nline,thr,self.Psi_line,4,dalpha,alpha,K,m,likfunc,y,inffunc) s = vargout[0] Psi_new = vargout[1] Nfun = vargout[2] alpha = vargout[3] f = vargout[4] dlp = vargout[5] W = vargout[6] isWneg = np.any(W<0) self.last_alpha = alpha # remember for next call vargout = likfunc.proceed(y,f,None,inffunc,None,4) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3] W = -d2lp; isWneg = np.any(W<0) post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.sqrt(np.abs(W))*np.sign(W) # preserve sign in case of negative if isWneg: [ldA,iA,post.L] = self.logdetA(K,W,3) nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() + ldA/2. nlZ = nlZ[0] else: sW = post.sW post.L = np.linalg.cholesky(np.eye(n)+np.dot(sW,sW.T)*K).T nlZ = np.dot(alpha.T,(f-m))/2. + (np.log(np.diag(post.L))-np.reshape(lp,(lp.shape[0],))).sum() nlZ = nlZ[0] if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives if isWneg: # switch between Cholesky and LU decomposition mode Z = -post.L # inv(K+inv(W)) g = np.atleast_2d((iA*K).sum(axis=1)).T /2 # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2 else: Z = np.tile(sW,(1,n))*solve_chol(post.L,np.diag(np.reshape(sW,(sW.shape[0],)))) #sW*inv(B)*sW=inv(K+inv(W)) C = np.linalg.solve(post.L.T,np.tile(sW,(1,n))*K) # deriv. of ln|B| wrt W g = np.atleast_2d((np.diag(K)-(C**2).sum(axis=0).T)).T /2. # g = diag(inv(inv(K)+W))/2 dfhat = g* d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers dK = covfunc.proceed(x, None, ii) dnlZ.cov[ii] = (Z*dK).sum()/2. - np.dot(alpha.T,np.dot(dK,alpha))/2. # explicit part b = np.dot(dK,dlp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.cov[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b))) # implicit part dnlZ.cov[ii] = dnlZ.cov[ii][0][0] for ii in range(len(likfunc.hyp)): # likelihood hypers [lp_dhyp,dlp_dhyp,d2lp_dhyp] = likfunc.proceed(y,f,None,inffunc,ii,3) dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum() # explicit part b = np.dot(K,dlp_dhyp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.lik[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b))) # implicit part dnlZ.lik[ii] = dnlZ.lik[ii][0][0] for ii in range(len(meanfunc.hyp)): # mean hypers dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(alpha.T,dm) # explicit part dnlZ.mean[ii] -= np.dot(dfhat.T,dm-np.dot(K,np.dot(Z,dm))) # implicit part dnlZ.mean[ii] = dnlZ.mean[ii][0][0] vargout = [post,nlZ[0],dnlZ] else: vargout = [post, nlZ[0]] return vargout
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): tol = 1e-4 max_sweep = 10 min_sweep = 2 # tolerance to stop EP iterations n = x.shape[0] inffunc = self K = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector nlZ0 = -likfunc.proceed( y, m, np.reshape(np.diag(K), (np.diag(K).shape[0], 1)), inffunc).sum() if self.last_ttau == None: # find starting point for tilde parameters ttau = np.zeros( (n, 1)) # initialize to zero if we have no better guess tnu = np.zeros((n, 1)) Sigma = K # initialize Sigma and mu, the parameters of .. mu = np.zeros((n, 1)) # .. the Gaussian posterior approximation nlZ = nlZ0 else: ttau = self.last_ttau # try the tilde values from previous call tnu = self.last_tnu Sigma, mu, nlZ, L = self.epComputeParams(K, y, ttau, tnu, likfunc, m, inffunc) if nlZ > nlZ0: # if zero is better .. ttau = np.zeros((n, 1)) # .. then initialize with zero instead tnu = np.zeros((n, 1)) Sigma = K # initialize Sigma and mu, the parameters of .. mu = np.zeros( (n, 1)) # .. the Gaussian posterior approximation nlZ = nlZ0 nlZ_old = np.inf sweep = 0 # converged, max. sweeps or min. sweeps? while (np.abs(nlZ - nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep): nlZ_old = nlZ sweep += 1 rperm = xrange(n) # randperm(n) for ii in rperm: # iterate EP updates (in random order) over examples tau_ni = 1 / Sigma[ii, ii] - ttau[ ii] # first find the cavity distribution .. nu_ni = mu[ii] / Sigma[ii, ii] + m[ii] * tau_ni - tnu[ ii] # .. params tau_ni and nu_ni # compute the desired derivatives of the indivdual log partition function lZ, dlZ, d2lZ = likfunc.proceed(y[ii], nu_ni / tau_ni, 1 / tau_ni, inffunc, None, 3) ttau_old = copy( ttau[ii] ) # then find the new tilde parameters, keep copy of old ttau[ii] = -d2lZ / (1. + d2lZ / tau_ni) ttau[ii] = max( ttau[ii], 0) # enforce positivity i.e. lower bound ttau by zero tnu[ii] = (dlZ + (m[ii] - nu_ni / tau_ni) * d2lZ) / (1. + d2lZ / tau_ni) ds2 = ttau[ii] - ttau_old # finally rank-1 update Sigma .. si = np.reshape(Sigma[:, ii], (Sigma.shape[0], 1)) Sigma = Sigma - ds2 / (1. + ds2 * si[ii]) * np.dot( si, si.T) # takes 70# of total time mu = np.dot(Sigma, tnu) # .. and recompute mu # recompute since repeated rank-one updates can destroy numerical precision Sigma, mu, nlZ, L = self.epComputeParams(K, y, ttau, tnu, likfunc, m, inffunc) if sweep == max_sweep: print 'maximum number of sweeps reached in function infEP' self.last_ttau = ttau self.last_tnu = tnu # remember for next call sW = np.sqrt(ttau) alpha = tnu - sW * solve_chol(L, sW * np.dot(K, tnu)) post = postStruct() post.alpha = alpha # return the posterior params post.sW = sW post.L = L if nargout > 2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives ssi = np.sqrt(ttau) V = np.linalg.solve(L.T, np.tile(ssi, (1, n)) * K) Sigma = K - np.dot(V.T, V) mu = np.dot(Sigma, tnu) Dsigma = np.reshape(np.diag(Sigma), (np.diag(Sigma).shape[0], 1)) tau_n = 1 / Dsigma - ttau # compute the log marginal likelihood nu_n = mu / Dsigma - tnu # vectors of cavity parameters F = np.dot(alpha,alpha.T) - np.tile(sW,(1,n))* \ solve_chol(L,np.diag(np.reshape(sW,(sW.shape[0],)))) # covariance hypers for jj in range(len(covfunc.hyp)): dK = covfunc.proceed(x, None, jj) dnlZ.cov[jj] = -(F * dK).sum() / 2. for ii in range(len(likfunc.hyp)): dlik = likfunc.proceed(y, nu_n / tau_n, 1 / tau_n, inffunc, ii) dnlZ.lik[ii] = -dlik.sum() junk, dlZ = likfunc.proceed(y, nu_n / tau_n, 1 / tau_n, inffunc, None, 2) # mean hyps for ii in range(len(meanfunc.hyp)): dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(dlZ.T, dm) dnlZ.mean[ii] = dnlZ.mean[ii][0, 0] return post, nlZ[0], dnlZ else: return post, nlZ[0]
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): tol = 1e-6 # tolerance for when to stop the Newton iterations smax = 2 Nline = 20 thr = 1e-4 # line search parameters maxit = 20 # max number of Newton steps in f inffunc = self K = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector n, D = x.shape Psi_old = np.inf # make sure while loop starts by the largest old objective val if self.last_alpha == None: # find a good starting point for alpha and f alpha = np.zeros((n, 1)) f = np.dot(K, alpha) + m # start at mean if sizes not match vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = -lp.sum() else: alpha = self.last_alpha f = np.dot(K, alpha) + m # try last one vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = np.dot( alpha.T, (f - m)) / 2. - lp.sum() # objective for last alpha vargout = -likfunc.proceed(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it alpha = np.zeros((n, 1)) f = np.dot(K, alpha) + m vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = -lp.sum() isWneg = np.any( W < 0) # flag indicating whether we found negative values of W it = 0 # this happens for the Student's t likelihood while (Psi_old - Psi_new > tol) and it < maxit: # begin Newton Psi_old = Psi_new it += 1 if isWneg: # stabilise the Newton direction in case W has negative values W = np.maximum( W, 0 ) # stabilise the Hessian to guarantee postive definiteness tol = 1e-10 # increase accuracy to also get the derivatives right sW = np.sqrt(W) L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T b = W * (f - m) + dlp dalpha = b - sW * solve_chol(L, sW * np.dot(K, b)) - alpha vargout = brentmin(0, smax, Nline, thr, self.Psi_line, 4, dalpha, alpha, K, m, likfunc, y, inffunc) s = vargout[0] Psi_new = vargout[1] Nfun = vargout[2] alpha = vargout[3] f = vargout[4] dlp = vargout[5] W = vargout[6] isWneg = np.any(W < 0) self.last_alpha = alpha # remember for next call vargout = likfunc.proceed(y, f, None, inffunc, None, 4) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] d3lp = vargout[3] W = -d2lp isWneg = np.any(W < 0) post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.sqrt(np.abs(W)) * np.sign( W) # preserve sign in case of negative if isWneg: [ldA, iA, post.L] = self.logdetA(K, W, 3) nlZ = np.dot(alpha.T, (f - m)) / 2. - lp.sum() + ldA / 2. nlZ = nlZ[0] else: sW = post.sW post.L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T nlZ = np.dot(alpha.T, (f - m)) / 2. + (np.log(np.diag(post.L)) - np.reshape(lp, (lp.shape[0], ))).sum() nlZ = nlZ[0] if nargout > 2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives if isWneg: # switch between Cholesky and LU decomposition mode Z = -post.L # inv(K+inv(W)) g = np.atleast_2d( (iA * K).sum(axis=1) ).T / 2 # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2 else: Z = np.tile(sW, (1, n)) * solve_chol( post.L, np.diag(np.reshape( sW, (sW.shape[0], )))) #sW*inv(B)*sW=inv(K+inv(W)) C = np.linalg.solve(post.L.T, np.tile(sW, (1, n)) * K) # deriv. of ln|B| wrt W g = np.atleast_2d( (np.diag(K) - (C**2).sum(axis=0).T)).T / 2. # g = diag(inv(inv(K)+W))/2 dfhat = g * d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers dK = covfunc.proceed(x, None, ii) dnlZ.cov[ii] = (Z * dK).sum() / 2. - np.dot( alpha.T, np.dot(dK, alpha)) / 2. # explicit part b = np.dot(dK, dlp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.cov[ii] -= np.dot( dfhat.T, b - np.dot(K, np.dot(Z, b))) # implicit part dnlZ.cov[ii] = dnlZ.cov[ii][0, 0] for ii in range(len(likfunc.hyp)): # likelihood hypers [lp_dhyp, dlp_dhyp, d2lp_dhyp] = likfunc.proceed(y, f, None, inffunc, ii, 3) dnlZ.lik[ii] = -np.dot( g.T, d2lp_dhyp) - lp_dhyp.sum() # explicit part b = np.dot(K, dlp_dhyp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.lik[ii] -= np.dot( dfhat.T, b - np.dot(K, np.dot(Z, b))) # implicit part dnlZ.lik[ii] = dnlZ.lik[ii][0, 0] for ii in range(len(meanfunc.hyp)): # mean hypers dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(alpha.T, dm) # explicit part dnlZ.mean[ii] -= np.dot( dfhat.T, dm - np.dot(K, np.dot(Z, dm))) # implicit part dnlZ.mean[ii] = dnlZ.mean[ii][0, 0] return post, nlZ[0], dnlZ else: return post, nlZ[0]
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if not isinstance(likfunc, lik.Gauss): # NOTE: no explicit call to likGauss raise Exception( 'Exact inference only possible with Gaussian likelihood') if not isinstance(covfunc, cov.FITCOfKernel): raise Exception('Only covFITC supported.') # check cov diagK, Kuu, Ku = covfunc.proceed(x) # evaluate covariance matrix m = meanfunc.proceed(x) # evaluate mean vector n, D = x.shape nu = Kuu.shape[0] sn2 = np.exp(2 * likfunc.hyp[0]) # noise variance of likGauss snu2 = 1.e-6 * sn2 # hard coded inducing inputs noise Luu = np.linalg.cholesky(Kuu + snu2 * np.eye(nu)).T # Kuu + snu2*I = Luu'*Luu V = np.linalg.solve(Luu.T, Ku) # V = inv(Luu')*Ku => V'*V = Q g_sn2 = diagK + sn2 - np.array( [(V * V).sum(axis=0)]).T # g + sn2 = diag(K) + sn2 - diag(Q) Lu = np.linalg.cholesky( np.eye(nu) + np.dot(V / np.tile(g_sn2.T, (nu, 1)), V.T)).T # Lu'*Lu=I+V*diag(1/g_sn2)*V' r = (y - m) / np.sqrt(g_sn2) be = np.linalg.solve(Lu.T, np.dot(V, r / np.sqrt(g_sn2))) iKuu = solve_chol(Luu, np.eye(nu)) # inv(Kuu + snu2*I) = iKuu post = postStruct() post.alpha = np.linalg.solve(Luu, np.linalg.solve( Lu, be)) # return the posterior parameters post.L = solve_chol(np.dot(Lu, Luu), np.eye(nu)) - iKuu # Sigma-inv(Kuu) post.sW = np.ones( (n, 1)) / np.sqrt(sn2) # unused for FITC prediction with gp.m if nargout > 1: # do we want the marginal likelihood nlZ = np.log(np.diag( Lu)).sum() + (np.log(g_sn2).sum() + n * np.log(2 * np.pi) + np.dot(r.T, r) - np.dot(be.T, be)) / 2. if nargout > 2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives al = r / np.sqrt(g_sn2) - np.dot(V.T, np.linalg.solve( Lu, be)) / g_sn2 # al = (Kt+sn2*eye(n))\y B = np.dot(iKuu, Ku) w = np.dot(B, al) W = np.linalg.solve(Lu.T, V / np.tile(g_sn2.T, (nu, 1))) for ii in range(len(covfunc.hyp)): [ddiagKi, dKuui, dKui] = covfunc.proceed(x, None, ii) # eval cov deriv R = 2. * dKui - np.dot(dKuui, B) v = ddiagKi - np.array([(R * B).sum(axis=0) ]).T # diag part of cov deriv dnlZ.cov[ii] = ( np.dot(ddiagKi.T,1./g_sn2) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al))) \ - np.dot(al.T,(v*al)) - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. dnlZ.cov[ii] = dnlZ.cov[ii][0, 0] dnlZ.lik = sn2 * ( (1. / g_sn2).sum() - (np.array([(W * W).sum(axis=0)])).sum() - np.dot(al.T, al)) dKuui = 2 * snu2 R = -dKuui * B v = -np.array([(R * B).sum(axis=0) ]).T # diag part of cov deriv dnlZ.lik += (np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \ - np.dot(np.array([(W*W).sum(axis=0)]),v) - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. dnlZ.lik = list(dnlZ.lik[0]) for ii in range(len(meanfunc.hyp)): dnlZ.mean[ii] = np.dot(-meanfunc.proceed(x, ii).T, al) dnlZ.mean[ii] = dnlZ.mean[ii][0, 0] return post, nlZ[0, 0], dnlZ return post, nlZ[0, 0] return post