def infExact(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1): # Exact inference for a GP with Gaussian likelihood. Compute a parametrization # of the posterior, the negative log marginal likelihood and its derivatives # w.r.t. the hyperparameters. See also "help infMethods". # # Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2013-01-21 # # See also INFMETHODS.M. if not (likfunc[0] == 'lik.likGauss'): # NOTE: no explicit call to likGauss raise Exception ('Exact inference only possible with Gaussian likelihood') #end n, D = x.shape K = Tools.general.feval(covfunc, hyp.cov, x) # evaluate covariance matrix m = Tools.general.feval(meanfunc, hyp.mean, x) # evaluate mean vector sn2 = np.exp(2.*hyp.lik) # noise variance of likGauss L = np.linalg.cholesky(K/sn2+np.eye(n)).T # Cholesky factor of covariance with noise alpha = solve_chol(L,y-m)/sn2 post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.ones((n,1))/np.sqrt(sn2) # sqrt of noise precision vector post.L = L # L = chol(eye(n)+sW*sW'.*K) if nargout>1: # do we want the marginal likelihood? nlZ = np.dot((y-m).T,alpha/2) + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik if nargout>2: # do we want derivatives? dnlZ = deepcopy(hyp) # allocate space for derivatives Q = solve_chol(L,np.eye(n))/sn2 - np.dot(alpha,alpha.T) # precompute for convenience for ii in range(len(hyp.cov)): dnlZ.cov[ii] = (Q*Tools.general.feval(covfunc, hyp.cov, x, None, ii)).sum()/2. #end dnlZ.lik = sn2*np.trace(Q) for ii in range(len(hyp.mean)): dnlZ.mean[ii] = np.dot(-Tools.general.feval(meanfunc, hyp.mean, x, ii).T,alpha) #end return post, nlZ, dnlZ #end return post, nlZ #end return post
def infFITC(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1): # FITC approximation to the posterior Gaussian process. The function is # equivalent to infExact with the covariance function: # Kt = Q + G; G = diag(g); g = diag(K-Q); Q = Ku'*inv(Quu)*Ku; # where Ku and Kuu are covariances w.r.t. to inducing inputs xu, snu2 = sn2/1e6 # is the noise of the inducing inputs and Quu = Kuu + snu2*eye(nu). # We fixed the standard deviation of the inducing inputs snu to be a one per mil # of the measurement noise's standard deviation sn. # The implementation exploits the Woodbury matrix identity # inv(Kt) = inv(G) - inv(G)*V'*inv(eye(nu)+V*inv(G)*V')*V*inv(G) # in order to be applicable to large datasets. The computational complexity # is O(n nu^2) where n is the number of data points x and nu the number of # inducing inputs in xu. # The function takes a specified covariance function (see covFunction.m) and # likelihood function (see likFunction.m), and is designed to be used with # gp.m and in conjunction with covFITC and likGauss. # # Copyright (c) by Ed Snelson, Carl Edward Rasmussen # and Hannes Nickisch, 2012-11-20. # # See also INFMETHODS.M, COVFITC.M. if not (likfunc[0] == 'lik.likGauss'): # NOTE: no explicit call to likGauss raise Exception ('Exact inference only possible with Gaussian likelihood') #end cov1 = covfunc[0] if not cov1 == ['kernels.covFITC']: raise Exception('Only covFITC supported.') # check cov #end diagK,Kuu,Ku = Tools.general.feval(covfunc, hyp.cov, x) # evaluate covariance matrix m = Tools.general.feval(meanfunc, hyp.mean, x) # evaluate mean vector n, D = x.shape; nu = Kuu.shape[0] sn2 = np.exp(2*hyp.lik) # noise variance of likGauss snu2 = 1.e-6*sn2 # hard coded inducing inputs noise Luu = np.linalg.cholesky(Kuu+snu2*np.eye(nu)).T # Kuu + snu2*I = Luu'*Luu V = np.linalg.solve(Luu.T,Ku) # V = inv(Luu')*Ku => V'*V = Q g_sn2 = diagK + sn2 - np.array([(V*V).sum(axis=0)]).T # g + sn2 = diag(K) + sn2 - diag(Q) Lu = np.linalg.cholesky(np.eye(nu) + np.dot(V/np.tile(g_sn2.T,(nu,1)),V.T)).T # Lu'*Lu=I+V*diag(1/g_sn2)*V' r = (y-m)/np.sqrt(g_sn2) be = np.linalg.solve(Lu.T,np.dot(V,r/np.sqrt(g_sn2))) iKuu = solve_chol(Luu,np.eye(nu)) # inv(Kuu + snu2*I) = iKuu post = postStruct() post.alpha = np.linalg.solve(Luu,np.linalg.solve(Lu,be)) # return the posterior parameters post.L = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu # Sigma-inv(Kuu) post.sW = np.ones((n,1))/np.sqrt(sn2) # unused for FITC prediction with gp.m if nargout>1: # do we want the marginal likelihood nlZ = np.log(np.diag(Lu)).sum() + np.log(g_sn2).sum() + n*np.log(2*np.pi) + np.dot(r.T,r) - np.dot(be.T,be)/2. if nargout>2: # do we want derivatives? dnlZ = deepcopy(hyp) # allocate space for derivatives al = r/np.sqrt(g_sn2) - np.dot(V.T,np.linalg.solve(Lu,be))/g_sn2 # al = (Kt+sn2*eye(n))\y B = np.dot(iKuu,Ku); w = np.dot(B,al) W = np.linalg.solve(Lu.T,V/np.tile(g_sn2.T,(nu,1))) for ii in range(len(hyp.cov)): [ddiagKi,dKuui,dKui] = feval(covfunc, hyp.cov, x, None, ii) # eval cov deriv R = 2.*dKui-np.dot(dKuui,B); v = ddiagKi - np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv dnlZ.cov[ii] = ( np.dot(ddiagKi.T,1./g_sn2) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al)) - np.dot(al.T,(v*al)) \ - np.array([(W*W).sum(axis=0)])*v - (np.dot(R,W.T)*np.dot(B,W.T)).sum()) )/2. #end dnlZ.lik = sn2*((1./g_sn2).sum() - (np.array([(W*W).sum(axis=0)])).sum() - np.dot(al.T,al)) # since snu2 is a fixed fraction of sn2, there is a covariance-like term in # the derivative as well dKuui = 2*snu2; R = -np.dot(dKuui,B); v = -np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv dnlZ.lik += (np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \ - np.array([(W*W).sum(axis=0)])*v - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. for ii in range(len(hyp.mean)): dnlZ.mean[ii] = np.dot(-Tools.general.feval(meanfunc, hyp.mean, x, ii).T,*al) #end return post,nlZ,dnlZ #end return post,nlZ #end return post
def infEP(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1): #function [post nlZ dnlZ] = infEP(hyp, mean, cov, lik, x, y) # Expectation Propagation approximation to the posterior Gaussian Process. # The function takes a specified covariance function (see covFunction.m) and # likelihood function (see likFunction.m), and is designed to be used with # gp.m. See also infFunctions.m. In the EP algorithm, the sites are # updated in random order, for better performance when cases are ordered # according to the targets. # # Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch 2010-02-25. # # See also INFMETHODS.M. tol = 1e-4; max_sweep = 10; min_sweep = 2 # tolerance to stop EP iterations inffunc = 'inf.infEP' n = x.shape[0] K = Tools.general.feval(covfunc, hyp.cov, x) # evaluate the covariance matrix m = Tools.general.feval(meanfunc, hyp.mean, x) # evaluate the mean vector # A note on naming: variables are given short but descriptive names in # accordance with Rasmussen & Williams "GPs for Machine Learning" (2006): mu # and s2 are mean and variance, nu and tau are natural parameters. A leading t # means tilde, a subscript _ni means "not i" (for cavity parameters), or _n # for a vector of cavity parameters. # marginal likelihood for ttau = tnu = zeros(n,1); equals n*log(2) for likCum* nlZ0 = -Tools.general.feval(likfunc, hyp.lik, y, m, np.reshape(np.diag(K),(np.diag(K).shape[0],1)), inffunc).sum() if "last_ttau" not in infEP.__dict__: # find starting point for tilde parameters ttau = np.zeros((n,1)) # initialize to zero if we have no better guess tnu = np.zeros((n,1)) Sigma = K # initialize Sigma and mu, the parameters of .. mu = np.zeros((n,1)) # .. the Gaussian posterior approximation nlZ = nlZ0 else: ttau = infEP.last_ttau # try the tilde values from previous call tnu = infEP.last_tnu [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, likfunc, hyp, m, inffunc) if nlZ > nlZ0: # if zero is better .. ttau = np.zeros((n,1)) # .. then initialize with zero instead tnu = np.zeros((n,1)) Sigma = K # initialize Sigma and mu, the parameters of .. mu = np.zeros((n,1)) # .. the Gaussian posterior approximation nlZ = nlZ0 #end #end nlZ_old = np.inf; sweep = 0 # converged, max. sweeps or min. sweeps? while (np.abs(nlZ-nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep): nlZ_old = nlZ; sweep += 1 rperm = range(n)#randperm(n) for ii in rperm: # iterate EP updates (in random order) over examples tau_ni = 1/Sigma[ii,ii] - ttau[ii] # first find the cavity distribution .. nu_ni = mu[ii]/Sigma[ii,ii] + m[ii]*tau_ni - tnu[ii] # .. params tau_ni and nu_ni # compute the desired derivatives of the indivdual log partition function vargout = Tools.general.feval(likfunc, hyp.lik, y[ii], nu_ni/tau_ni, 1/tau_ni, inffunc, None, 3) lZ = vargout[0]; dlZ = vargout[1]; d2lZ = vargout[2] ttau_old = copy(ttau[ii]) # then find the new tilde parameters, keep copy of old ttau[ii] = -d2lZ /(1.+d2lZ/tau_ni) ttau[ii] = max(ttau[ii],0) # enforce positivity i.e. lower bound ttau by zero tnu[ii] = ( dlZ + (m[ii]-nu_ni/tau_ni)*d2lZ )/(1.+d2lZ/tau_ni) ds2 = ttau[ii] - ttau_old # finally rank-1 update Sigma .. si = np.reshape(Sigma[:,ii],(Sigma.shape[0],1)) Sigma = Sigma - ds2/(1.+ds2*si[ii])*np.dot(si,si.T) # takes 70# of total time mu = np.dot(Sigma,tnu) # .. and recompute mu #end # recompute since repeated rank-one updates can destroy numerical precision [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, likfunc, hyp, m, inffunc) #end if sweep == max_sweep: raise Exception('maximum number of sweeps reached in function infEP') #end infEP.last_ttau = ttau; infEP.last_tnu = tnu # remember for next call sW = np.sqrt(ttau); alpha = tnu-sW*solve_chol(L,sW*np.dot(K,tnu)) post = postStruct() post.alpha = alpha # return the posterior params post.sW = sW post.L = L if nargout>2: # do we want derivatives? dnlZ = deepcopy(hyp) # allocate space for derivatives ssi = np.sqrt(ttau) V = np.linalg.solve(L.T,np.tile(ssi,(1,n))*K) Sigma = K - np.dot(V.T,V) mu = np.dot(Sigma,tnu) Dsigma = np.reshape(np.diag(Sigma),(np.diag(Sigma).shape[0],1)) tau_n = 1/Dsigma-ttau # compute the log marginal likelihood nu_n = mu/Dsigma-tnu # vectors of cavity parameters F = np.dot(alpha,alpha.T) - np.tile(sW,(1,n))* \ solve_chol(L,np.reshape(np.diag(sW),(np.diag(sW).shape[0],1))) # covariance hypers for jj in range(len(hyp.cov)): dK = Tools.general.feval(covfunc, hyp.cov, x, None, jj) dnlZ.cov[jj] = -(F*dK).sum()/2. #end for ii in range(len(hyp.lik)): dlik = Tools.general.feval(likfunc, hyp.lik, y, nu_n/tau_n+m, 1/tau_n, inffunc, ii) dnlZ.lik[ii] = -dlik.sum() #end [junk,dlZ] = Tools.general.feval(likfunc, hyp.lik, y, nu_n/tau_n+m, 1/tau_n, inffunc) # mean hyps for ii in range(len(hyp.mean)): dm = Tools.general.feval(meanfunc, hyp.mean, x, ii) dnlZ.mean[ii] = -np.dot(dlZ.T,dm) #end vargout = [post, nlZ, dnlZ] else: vargout = [post, nlZ] #end return vargout