def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if not isinstance(covfunc, cov.FITCOfKernel): raise Exception('Only covFITC supported.') tol = 1e-6 # tolerance for when to stop the Newton iterations smax = 2; Nline = 100; thr = 1e-4 # line search parameters maxit = 20 # max number of Newton steps in f inffunc = infLaplace() diagK,Kuu,Ku = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector if likfunc.hyp: # hard coded inducing inputs noise sn2 = np.exp(2.*likfunc.hyp[-1]) snu2 = 1.e-6*sn2 # similar to infFITC else: snu2 = 1.e-6 n, D = x.shape nu = Kuu.shape[0] rot180 = lambda A: np.rot90(np.rot90(A)) # little helper functions chol_inv = lambda A: np.linalg.solve( rot180( np.linalg.cholesky(rot180(A)) ),np.eye(nu)) # chol(inv(A)) R0 = chol_inv(Kuu+snu2*np.eye(nu)) # initial R, used for refresh O(nu^3) V = np.dot(R0,Ku); d0 = diagK - np.array([(V*V).sum(axis=0)]).T # initial d, needed Psi_old = np.inf # make sure while loop starts by the largest old objective val if self.last_alpha == None: # find a good starting point for alpha and f alpha = np.zeros((n,1)) f = self.mvmK(alpha,V,d0) + m # start at mean if sizes not match vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W=-d2lp; Psi_new = -lp.sum() else: alpha = self.last_alpha f = self.mvmK(alpha,V,d0) + m # try last one vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W=-d2lp Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha vargout = - likfunc.proceed(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it alpha = np.zeros((n,1)) f = self.mvmK(alpha,V,d0) + m vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W=-d2lp; Psi_new = -lp.sum() isWneg = np.any(W<0) # flag indicating whether we found negative values of W it = 0 # this happens for the Student's t likelihood while (Psi_old - Psi_new > tol) and it<maxit: # begin Newton Psi_old = Psi_new it += 1 if isWneg: # stabilise the Newton direction in case W has negative values W = np.maximum(W,0) # stabilise the Hessian to guarantee postive definiteness tol = 1e-8 # increase accuracy to also get the derivatives right b = W*(f-m) + dlp; dd = 1/(1+W*d0) RV = np.dot( chol_inv( np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T)),V ) dalpha = dd*b - (W*dd)*np.dot(RV.T,np.dot(RV,(dd*b))) - alpha # Newt dir + line search vargout = brentmin(0,smax,Nline,thr,self.Psi_lineFITC,4,dalpha,alpha,V,d0,m,likfunc,y,inffunc) s = vargout[0]; Psi_new = vargout[1]; Nfun = vargout[2]; alpha = vargout[3] f = vargout[4]; dlp = vargout[5]; W = vargout[6] isWneg = np.any(W<0) self.last_alpha = alpha # remember for next call vargout = likfunc.proceed(y,f,None,inffunc,None,4) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3] W=-d2lp; isWneg = np.any(W<0) post = postStruct() post.alpha = np.dot(R0.T,np.dot(V,alpha)) # return the posterior parameters post.sW = np.sqrt(np.abs(W))*np.sign(W) # preserve sign in case of negative dd = 1/(1+d0*W) # temporary variable O(n) A = np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T) # temporary variable O(n*nu^2) R0tV = np.dot(R0.T,V); B = R0tV*np.tile((W*dd).T,(nu,1)) # temporary variables O(n*nu^2) post.L = -np.dot(B,R0tV.T) # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part if np.any(1+d0*W<0): raise Exception('W is too negative; nlZ and dnlZ cannot be computed.') nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \ np.log(np.diag(np.linalg.cholesky(A).T)).sum() RV = np.dot(chol_inv(A),V) RVdd = RV * np.tile((W*dd).T,(nu,1)) # RVdd needed for dnlZ B = np.dot(B,RV.T) post.L += np.dot(B,B.T) if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives [d,P,R] = self.fitcRefresh(d0,Ku,R0,V,W) # g = diag(inv(inv(K)+W))/2 g = d/2 + 0.5*np.atleast_2d((np.dot(np.dot(R,R0),P)**2).sum(axis=0)).T t = W/(1+W*d0) dfhat = g*d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers ddiagK,dKuu,dKu = covfunc.proceed(x, None, ii) # eval cov derivatives dA = 2.*dKu.T-np.dot(R0tV.T,dKuu) # dQ = dA*R0tV w = np.atleast_2d((dA*R0tV.T).sum(axis=1)).T v = ddiagK-w # w = diag(dQ); v = diag(dK)-diag(dQ); dnlZ.cov[ii] = np.dot(ddiagK.T,t) - np.dot((RVdd*RVdd).sum(axis=0),v) # explicit part dnlZ.cov[ii] -= (np.dot(RVdd,dA)*np.dot(RVdd,R0tV.T)).sum() # explicit part dnlZ.cov[ii] = 0.5*dnlZ.cov[ii] - np.dot(alpha.T,np.dot(dA,np.dot(R0tV,alpha))+v*alpha)/2. # explicit b = np.dot(dA,np.dot(R0tV,dlp)) + v*dlp # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b KZb = self.mvmK(self.mvmZ(b,RVdd,t),V,d0) dnlZ.cov[ii] -= np.dot(dfhat.T,(b-KZb)) # implicit part dnlZ.cov[ii] = dnlZ.cov[ii][0,0] for ii in range(len(likfunc.hyp)): # likelihood hypers vargout = likfunc.proceed(y,f,None,inffunc,ii,3) lp_dhyp = vargout[0]; dlp_dhyp = vargout[1]; d2lp_dhyp = vargout[2] dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum() # explicit part b = self.mvmK(dlp_dhyp,V,d0) # implicit part dnlZ.lik[ii] -= np.dot(dfhat.T,b-self.mvmK(self.mvmZ(b,RVdd,t),V,d0)) if ii == len(likfunc.hyp)-1: # since snu2 is a fixed fraction of sn2, there is a covariance-like term # in the derivative as well snu = np.sqrt(snu2); T = chol_inv(Kuu + snu2*np.eye(nu)); T = np.dot(T.T,np.dot(T,snu*Ku)); t = np.array([(T*T).sum(axis=0)]).T z = np.dot(alpha.T,np.dot(T.T,np.dot(T,alpha))-t*alpha) - np.dot(np.array([(RVdd*RVdd).sum(axis=0)]),t) z += (np.dot(RVdd,T.T)**2).sum() b = (t*dlp-np.dot(T.T,np.dot(T,dlp)))/2. KZb = self.mvmK(self.mvmZ(b,RVdd,t),V,d0) z -= np.dot(dfhat.T,b-KZb) dnlZ.lik[ii] += z dnlZ.lik[ii] = dnlZ.lik[ii][0,0] for ii in range(len(meanfunc.hyp)): # mean hypers dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(alpha.T,dm) # explicit part Zdm = self.mvmZ(dm,RVdd,t) dnlZ.mean[ii] -= np.dot(dfhat.T,(dm-self.mvmK(Zdm,V,d0))) # implicit part dnlZ.mean[ii] = dnlZ.mean[ii][0,0] vargout = [post,nlZ[0,0],dnlZ] else: vargout = [post, nlZ[0,0]] return vargout
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): if not isinstance(covfunc, cov.FITCOfKernel): raise Exception('Only covFITC supported.') tol = 1e-6 # tolerance for when to stop the Newton iterations smax = 2 Nline = 100 thr = 1e-4 # line search parameters maxit = 20 # max number of Newton steps in f inffunc = Laplace() diagK, Kuu, Ku = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector if likfunc.hyp: # hard coded inducing inputs noise sn2 = np.exp(2. * likfunc.hyp[-1]) snu2 = 1.e-6 * sn2 # similar to infFITC else: snu2 = 1.e-6 n, D = x.shape nu = Kuu.shape[0] rot180 = lambda A: np.rot90(np.rot90(A)) # little helper functions chol_inv = lambda A: np.linalg.solve( rot180(np.linalg.cholesky(rot180(A))), np.eye(nu)) # chol(inv(A)) R0 = chol_inv(Kuu + snu2 * np.eye(nu)) # initial R, used for refresh O(nu^3) V = np.dot(R0, Ku) d0 = diagK - np.array([(V * V).sum(axis=0)]).T # initial d, needed Psi_old = np.inf # make sure while loop starts by the largest old objective val if self.last_alpha == None: # find a good starting point for alpha and f alpha = np.zeros((n, 1)) f = self.mvmK(alpha, V, d0) + m # start at mean if sizes not match vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = -lp.sum() else: alpha = self.last_alpha f = self.mvmK(alpha, V, d0) + m # try last one vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = np.dot( alpha.T, (f - m)) / 2. - lp.sum() # objective for last alpha vargout = -likfunc.proceed(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it alpha = np.zeros((n, 1)) f = self.mvmK(alpha, V, d0) + m vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = -lp.sum() isWneg = np.any( W < 0) # flag indicating whether we found negative values of W it = 0 # this happens for the Student's t likelihood while (Psi_old - Psi_new > tol) and it < maxit: # begin Newton Psi_old = Psi_new it += 1 if isWneg: # stabilise the Newton direction in case W has negative values W = np.maximum( W, 0 ) # stabilise the Hessian to guarantee postive definiteness tol = 1e-8 # increase accuracy to also get the derivatives right b = W * (f - m) + dlp dd = 1 / (1 + W * d0) RV = np.dot( chol_inv( np.eye(nu) + np.dot(V * np.tile((W * dd).T, (nu, 1)), V.T)), V) dalpha = dd * b - (W * dd) * np.dot(RV.T, np.dot( RV, (dd * b))) - alpha # Newt dir + line search vargout = brentmin(0, smax, Nline, thr, self.Psi_lineFITC, 4, dalpha, alpha, V, d0, m, likfunc, y, inffunc) s = vargout[0] Psi_new = vargout[1] Nfun = vargout[2] alpha = vargout[3] f = vargout[4] dlp = vargout[5] W = vargout[6] isWneg = np.any(W < 0) self.last_alpha = alpha # remember for next call vargout = likfunc.proceed(y, f, None, inffunc, None, 4) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] d3lp = vargout[3] W = -d2lp isWneg = np.any(W < 0) post = postStruct() post.alpha = np.dot(R0.T, np.dot(V, alpha)) # return the posterior parameters post.sW = np.sqrt(np.abs(W)) * np.sign( W) # preserve sign in case of negative dd = 1 / (1 + d0 * W) # temporary variable O(n) A = np.eye(nu) + np.dot(V * np.tile( (W * dd).T, (nu, 1)), V.T) # temporary variable O(n*nu^2) R0tV = np.dot(R0.T, V) B = R0tV * np.tile((W * dd).T, (nu, 1)) # temporary variables O(n*nu^2) post.L = -np.dot( B, R0tV.T) # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part if np.any(1 + d0 * W < 0): raise Exception( 'W is too negative; nlZ and dnlZ cannot be computed.') nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \ np.log(np.diag(np.linalg.cholesky(A).T)).sum() RV = np.dot(chol_inv(A), V) RVdd = RV * np.tile((W * dd).T, (nu, 1)) # RVdd needed for dnlZ B = np.dot(B, RV.T) post.L += np.dot(B, B.T) if nargout > 2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives [d, P, R] = self.fitcRefresh(d0, Ku, R0, V, W) # g = diag(inv(inv(K)+W))/2 g = d / 2 + 0.5 * np.atleast_2d( (np.dot(np.dot(R, R0), P)**2).sum(axis=0)).T t = W / (1 + W * d0) dfhat = g * d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers ddiagK, dKuu, dKu = covfunc.proceed(x, None, ii) # eval cov derivatives dA = 2. * dKu.T - np.dot(R0tV.T, dKuu) # dQ = dA*R0tV w = np.atleast_2d((dA * R0tV.T).sum(axis=1)).T # w = diag(dQ) v = ddiagK - w # v = diag(dK)-diag(dQ); dnlZ.cov[ii] = np.dot(ddiagK.T, t) - np.dot( (RVdd * RVdd).sum(axis=0), v) # explicit part dnlZ.cov[ii] -= (np.dot(RVdd, dA) * np.dot(RVdd, R0tV.T)).sum() # explicit part dnlZ.cov[ii] = 0.5 * dnlZ.cov[ii] - np.dot( alpha.T, np.dot(dA, np.dot(R0tV, alpha)) + v * alpha) / 2. # explicit b = np.dot(dA, np.dot( R0tV, dlp)) + v * dlp # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b KZb = self.mvmK(self.mvmZ(b, RVdd, t), V, d0) dnlZ.cov[ii] -= np.dot(dfhat.T, (b - KZb)) # implicit part dnlZ.cov[ii] = dnlZ.cov[ii][0, 0] for ii in range(len(likfunc.hyp)): # likelihood hypers vargout = likfunc.proceed(y, f, None, inffunc, ii, 3) lp_dhyp = vargout[0] dlp_dhyp = vargout[1] d2lp_dhyp = vargout[2] dnlZ.lik[ii] = -np.dot( g.T, d2lp_dhyp) - lp_dhyp.sum() # explicit part b = self.mvmK(dlp_dhyp, V, d0) # implicit part dnlZ.lik[ii] -= np.dot( dfhat.T, b - self.mvmK(self.mvmZ(b, RVdd, t), V, d0)) if ii == len(likfunc.hyp) - 1: # since snu2 is a fixed fraction of sn2, there is a covariance-like term # in the derivative as well snu = np.sqrt(snu2) T = chol_inv(Kuu + snu2 * np.eye(nu)) T = np.dot(T.T, np.dot(T, snu * Ku)) t = np.array([(T * T).sum(axis=0)]).T z = np.dot( alpha.T, np.dot(T.T, np.dot(T, alpha)) - t * alpha) - np.dot( np.array([(RVdd * RVdd).sum(axis=0)]), t) z += (np.dot(RVdd, T.T)**2).sum() b = (t * dlp - np.dot(T.T, np.dot(T, dlp))) / 2. KZb = self.mvmK(self.mvmZ(b, RVdd, t), V, d0) z -= np.dot(dfhat.T, b - KZb) dnlZ.lik[ii] += z dnlZ.lik[ii] = dnlZ.lik[ii][0, 0] for ii in range(len(meanfunc.hyp)): # mean hypers dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(alpha.T, dm) # explicit part Zdm = self.mvmZ(dm, RVdd, t) dnlZ.mean[ii] -= np.dot( dfhat.T, (dm - self.mvmK(Zdm, V, d0))) # implicit part dnlZ.mean[ii] = dnlZ.mean[ii][0, 0] return post, nlZ[0, 0], dnlZ else: return post, nlZ[0, 0]
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): tol = 1e-6 # tolerance for when to stop the Newton iterations smax = 2; Nline = 20; thr = 1e-4 # line search parameters maxit = 20 # max number of Newton steps in f inffunc = self K = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector n, D = x.shape Psi_old = np.inf # make sure while loop starts by the largest old objective val if self.last_alpha == None: # find a good starting point for alpha and f alpha = np.zeros((n,1)) f = np.dot(K,alpha) + m # start at mean if sizes not match vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W= -d2lp Psi_new = -lp.sum() else: alpha = self.last_alpha f = np.dot(K,alpha) + m # try last one vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W= -d2lp Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha vargout = - likfunc.proceed(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it alpha = np.zeros((n,1)) f = np.dot(K,alpha) + m vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2] W=-d2lp; Psi_new = -lp.sum() isWneg = np.any(W<0) # flag indicating whether we found negative values of W it = 0 # this happens for the Student's t likelihood while (Psi_old - Psi_new > tol) and it<maxit: # begin Newton Psi_old = Psi_new; it += 1 if isWneg: # stabilise the Newton direction in case W has negative values W = np.maximum(W,0) # stabilise the Hessian to guarantee postive definiteness tol = 1e-10 # increase accuracy to also get the derivatives right sW = np.sqrt(W); L = np.linalg.cholesky(np.eye(n) + np.dot(sW,sW.T)*K).T b = W*(f-m) + dlp; dalpha = b - sW*solve_chol(L,sW*np.dot(K,b)) - alpha vargout = brentmin(0,smax,Nline,thr,self.Psi_line,4,dalpha,alpha,K,m,likfunc,y,inffunc) s = vargout[0] Psi_new = vargout[1] Nfun = vargout[2] alpha = vargout[3] f = vargout[4] dlp = vargout[5] W = vargout[6] isWneg = np.any(W<0) self.last_alpha = alpha # remember for next call vargout = likfunc.proceed(y,f,None,inffunc,None,4) lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3] W = -d2lp; isWneg = np.any(W<0) post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.sqrt(np.abs(W))*np.sign(W) # preserve sign in case of negative if isWneg: [ldA,iA,post.L] = self.logdetA(K,W,3) nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() + ldA/2. nlZ = nlZ[0] else: sW = post.sW post.L = np.linalg.cholesky(np.eye(n)+np.dot(sW,sW.T)*K).T nlZ = np.dot(alpha.T,(f-m))/2. + (np.log(np.diag(post.L))-np.reshape(lp,(lp.shape[0],))).sum() nlZ = nlZ[0] if nargout>2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives if isWneg: # switch between Cholesky and LU decomposition mode Z = -post.L # inv(K+inv(W)) g = np.atleast_2d((iA*K).sum(axis=1)).T /2 # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2 else: Z = np.tile(sW,(1,n))*solve_chol(post.L,np.diag(np.reshape(sW,(sW.shape[0],)))) #sW*inv(B)*sW=inv(K+inv(W)) C = np.linalg.solve(post.L.T,np.tile(sW,(1,n))*K) # deriv. of ln|B| wrt W g = np.atleast_2d((np.diag(K)-(C**2).sum(axis=0).T)).T /2. # g = diag(inv(inv(K)+W))/2 dfhat = g* d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers dK = covfunc.proceed(x, None, ii) dnlZ.cov[ii] = (Z*dK).sum()/2. - np.dot(alpha.T,np.dot(dK,alpha))/2. # explicit part b = np.dot(dK,dlp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.cov[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b))) # implicit part dnlZ.cov[ii] = dnlZ.cov[ii][0][0] for ii in range(len(likfunc.hyp)): # likelihood hypers [lp_dhyp,dlp_dhyp,d2lp_dhyp] = likfunc.proceed(y,f,None,inffunc,ii,3) dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum() # explicit part b = np.dot(K,dlp_dhyp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.lik[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b))) # implicit part dnlZ.lik[ii] = dnlZ.lik[ii][0][0] for ii in range(len(meanfunc.hyp)): # mean hypers dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(alpha.T,dm) # explicit part dnlZ.mean[ii] -= np.dot(dfhat.T,dm-np.dot(K,np.dot(Z,dm))) # implicit part dnlZ.mean[ii] = dnlZ.mean[ii][0][0] vargout = [post,nlZ[0],dnlZ] else: vargout = [post, nlZ[0]] return vargout
def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1): tol = 1e-6 # tolerance for when to stop the Newton iterations smax = 2 Nline = 20 thr = 1e-4 # line search parameters maxit = 20 # max number of Newton steps in f inffunc = self K = covfunc.proceed(x) # evaluate the covariance matrix m = meanfunc.proceed(x) # evaluate the mean vector n, D = x.shape Psi_old = np.inf # make sure while loop starts by the largest old objective val if self.last_alpha == None: # find a good starting point for alpha and f alpha = np.zeros((n, 1)) f = np.dot(K, alpha) + m # start at mean if sizes not match vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = -lp.sum() else: alpha = self.last_alpha f = np.dot(K, alpha) + m # try last one vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = np.dot( alpha.T, (f - m)) / 2. - lp.sum() # objective for last alpha vargout = -likfunc.proceed(y, m, None, inffunc, None, 1) Psi_def = vargout[0] # objective for default init f==m if Psi_def < Psi_new: # if default is better, we use it alpha = np.zeros((n, 1)) f = np.dot(K, alpha) + m vargout = likfunc.proceed(y, f, None, inffunc, None, 3) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] W = -d2lp Psi_new = -lp.sum() isWneg = np.any( W < 0) # flag indicating whether we found negative values of W it = 0 # this happens for the Student's t likelihood while (Psi_old - Psi_new > tol) and it < maxit: # begin Newton Psi_old = Psi_new it += 1 if isWneg: # stabilise the Newton direction in case W has negative values W = np.maximum( W, 0 ) # stabilise the Hessian to guarantee postive definiteness tol = 1e-10 # increase accuracy to also get the derivatives right sW = np.sqrt(W) L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T b = W * (f - m) + dlp dalpha = b - sW * solve_chol(L, sW * np.dot(K, b)) - alpha vargout = brentmin(0, smax, Nline, thr, self.Psi_line, 4, dalpha, alpha, K, m, likfunc, y, inffunc) s = vargout[0] Psi_new = vargout[1] Nfun = vargout[2] alpha = vargout[3] f = vargout[4] dlp = vargout[5] W = vargout[6] isWneg = np.any(W < 0) self.last_alpha = alpha # remember for next call vargout = likfunc.proceed(y, f, None, inffunc, None, 4) lp = vargout[0] dlp = vargout[1] d2lp = vargout[2] d3lp = vargout[3] W = -d2lp isWneg = np.any(W < 0) post = postStruct() post.alpha = alpha # return the posterior parameters post.sW = np.sqrt(np.abs(W)) * np.sign( W) # preserve sign in case of negative if isWneg: [ldA, iA, post.L] = self.logdetA(K, W, 3) nlZ = np.dot(alpha.T, (f - m)) / 2. - lp.sum() + ldA / 2. nlZ = nlZ[0] else: sW = post.sW post.L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T nlZ = np.dot(alpha.T, (f - m)) / 2. + (np.log(np.diag(post.L)) - np.reshape(lp, (lp.shape[0], ))).sum() nlZ = nlZ[0] if nargout > 2: # do we want derivatives? dnlZ = dnlZStruct(meanfunc, covfunc, likfunc) # allocate space for derivatives if isWneg: # switch between Cholesky and LU decomposition mode Z = -post.L # inv(K+inv(W)) g = np.atleast_2d( (iA * K).sum(axis=1) ).T / 2 # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2 else: Z = np.tile(sW, (1, n)) * solve_chol( post.L, np.diag(np.reshape( sW, (sW.shape[0], )))) #sW*inv(B)*sW=inv(K+inv(W)) C = np.linalg.solve(post.L.T, np.tile(sW, (1, n)) * K) # deriv. of ln|B| wrt W g = np.atleast_2d( (np.diag(K) - (C**2).sum(axis=0).T)).T / 2. # g = diag(inv(inv(K)+W))/2 dfhat = g * d3lp # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2 for ii in range(len(covfunc.hyp)): # covariance hypers dK = covfunc.proceed(x, None, ii) dnlZ.cov[ii] = (Z * dK).sum() / 2. - np.dot( alpha.T, np.dot(dK, alpha)) / 2. # explicit part b = np.dot(dK, dlp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.cov[ii] -= np.dot( dfhat.T, b - np.dot(K, np.dot(Z, b))) # implicit part dnlZ.cov[ii] = dnlZ.cov[ii][0, 0] for ii in range(len(likfunc.hyp)): # likelihood hypers [lp_dhyp, dlp_dhyp, d2lp_dhyp] = likfunc.proceed(y, f, None, inffunc, ii, 3) dnlZ.lik[ii] = -np.dot( g.T, d2lp_dhyp) - lp_dhyp.sum() # explicit part b = np.dot(K, dlp_dhyp) # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b dnlZ.lik[ii] -= np.dot( dfhat.T, b - np.dot(K, np.dot(Z, b))) # implicit part dnlZ.lik[ii] = dnlZ.lik[ii][0, 0] for ii in range(len(meanfunc.hyp)): # mean hypers dm = meanfunc.proceed(x, ii) dnlZ.mean[ii] = -np.dot(alpha.T, dm) # explicit part dnlZ.mean[ii] -= np.dot( dfhat.T, dm - np.dot(K, np.dot(Z, dm))) # implicit part dnlZ.mean[ii] = dnlZ.mean[ii][0, 0] return post, nlZ[0], dnlZ else: return post, nlZ[0]