Esempio n. 1
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        if not isinstance(covfunc, cov.FITCOfKernel):
            raise Exception('Only covFITC supported.')            
        tol = 1e-6                             # tolerance for when to stop the Newton iterations
        smax = 2; Nline = 100; thr = 1e-4      # line search parameters
        maxit = 20                             # max number of Newton steps in f
        inffunc = infLaplace()
        diagK,Kuu,Ku = covfunc.proceed(x)      # evaluate the covariance matrix
        m = meanfunc.proceed(x)                # evaluate the mean vector
        if likfunc.hyp:                        # hard coded inducing inputs noise
            sn2  = np.exp(2.*likfunc.hyp[-1]) 
            snu2 = 1.e-6*sn2                   # similar to infFITC
        else:
            snu2 = 1.e-6        
        
        n, D = x.shape
        nu = Kuu.shape[0]
        rot180   = lambda A: np.rot90(np.rot90(A))      # little helper functions
        chol_inv = lambda A: np.linalg.solve( rot180( np.linalg.cholesky(rot180(A)) ),np.eye(nu)) # chol(inv(A))
        R0 = chol_inv(Kuu+snu2*np.eye(nu))              # initial R, used for refresh O(nu^3)
        V  = np.dot(R0,Ku); d0 = diagK - np.array([(V*V).sum(axis=0)]).T     # initial d, needed
    
        Psi_old = np.inf    # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:         # find a good starting point for alpha and f
            alpha = np.zeros((n,1))
            f = self.mvmK(alpha,V,d0) + m        # start at mean if sizes not match 
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W=-d2lp; Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = self.mvmK(alpha,V,d0) + m            # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W=-d2lp
            Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum()           # objective for last alpha
            vargout = - likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def =  vargout[0]                                   # objective for default init f==m
            if Psi_def < Psi_new:                                   # if default is better, we use it
                alpha = np.zeros((n,1))
                f = self.mvmK(alpha,V,d0) + m
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
                W=-d2lp; Psi_new = -lp.sum()

        isWneg = np.any(W<0)        # flag indicating whether we found negative values of W
        it = 0                      # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it<maxit:          # begin Newton
            Psi_old = Psi_new
            it += 1
            if isWneg:                      # stabilise the Newton direction in case W has negative values
                W = np.maximum(W,0)         # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-8                  # increase accuracy to also get the derivatives right
            b = W*(f-m) + dlp; dd = 1/(1+W*d0)
            RV = np.dot( chol_inv( np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T)),V ) 
            dalpha = dd*b - (W*dd)*np.dot(RV.T,np.dot(RV,(dd*b))) - alpha # Newt dir + line search
            vargout = brentmin(0,smax,Nline,thr,self.Psi_lineFITC,4,dalpha,alpha,V,d0,m,likfunc,y,inffunc)
            s = vargout[0]; Psi_new = vargout[1]; Nfun = vargout[2]; alpha = vargout[3]
            f = vargout[4]; dlp = vargout[5]; W = vargout[6]
            isWneg = np.any(W<0)

        self.last_alpha = alpha                                     # remember for next call
        vargout = likfunc.proceed(y,f,None,inffunc,None,4) 
        lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3]  

        W=-d2lp; isWneg = np.any(W<0)
        post = postStruct()
        post.alpha = np.dot(R0.T,np.dot(V,alpha))                   # return the posterior parameters
        post.sW = np.sqrt(np.abs(W))*np.sign(W)                     # preserve sign in case of negative
        dd = 1/(1+d0*W)                                             # temporary variable O(n)
        A = np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T)     # temporary variable O(n*nu^2)
        R0tV = np.dot(R0.T,V); B = R0tV*np.tile((W*dd).T,(nu,1))    # temporary variables O(n*nu^2)
        post.L = -np.dot(B,R0tV.T)          # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part
        if np.any(1+d0*W<0):
            raise Exception('W is too negative; nlZ and dnlZ cannot be computed.')
        nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \
            np.log(np.diag(np.linalg.cholesky(A).T)).sum()
        RV = np.dot(chol_inv(A),V)
        RVdd = RV * np.tile((W*dd).T,(nu,1))  # RVdd needed for dnlZ
        B = np.dot(B,RV.T)
        post.L += np.dot(B,B.T)

        if nargout>2:                                                   # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)               # allocate space for derivatives
            [d,P,R] = self.fitcRefresh(d0,Ku,R0,V,W)                    # g = diag(inv(inv(K)+W))/2
            g = d/2 + 0.5*np.atleast_2d((np.dot(np.dot(R,R0),P)**2).sum(axis=0)).T
            t = W/(1+W*d0)
            
            dfhat = g*d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):                          # covariance hypers
                ddiagK,dKuu,dKu = covfunc.proceed(x, None, ii)          # eval cov derivatives
                dA = 2.*dKu.T-np.dot(R0tV.T,dKuu)                       # dQ = dA*R0tV
                w = np.atleast_2d((dA*R0tV.T).sum(axis=1)).T
                v = ddiagK-w                              # w = diag(dQ); v = diag(dK)-diag(dQ);
                dnlZ.cov[ii] = np.dot(ddiagK.T,t) - np.dot((RVdd*RVdd).sum(axis=0),v)   # explicit part
                dnlZ.cov[ii] -= (np.dot(RVdd,dA)*np.dot(RVdd,R0tV.T)).sum()             # explicit part
                dnlZ.cov[ii] = 0.5*dnlZ.cov[ii] - np.dot(alpha.T,np.dot(dA,np.dot(R0tV,alpha))+v*alpha)/2.  # explicit
                b = np.dot(dA,np.dot(R0tV,dlp)) + v*dlp                                 # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                KZb = self.mvmK(self.mvmZ(b,RVdd,t),V,d0)
                dnlZ.cov[ii] -= np.dot(dfhat.T,(b-KZb))                                 # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0,0]
                
            for ii in range(len(likfunc.hyp)):                                          # likelihood hypers
                vargout = likfunc.proceed(y,f,None,inffunc,ii,3)
                lp_dhyp = vargout[0]; dlp_dhyp = vargout[1]; d2lp_dhyp = vargout[2] 
                dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum()                   # explicit part
                b = self.mvmK(dlp_dhyp,V,d0)                                            # implicit part
                dnlZ.lik[ii] -= np.dot(dfhat.T,b-self.mvmK(self.mvmZ(b,RVdd,t),V,d0))
                if ii == len(likfunc.hyp)-1:
                    # since snu2 is a fixed fraction of sn2, there is a covariance-like term
                    # in the derivative as well
                    snu = np.sqrt(snu2);
                    T = chol_inv(Kuu + snu2*np.eye(nu)); 
                    T = np.dot(T.T,np.dot(T,snu*Ku)); 
                    t = np.array([(T*T).sum(axis=0)]).T 
                    z = np.dot(alpha.T,np.dot(T.T,np.dot(T,alpha))-t*alpha) - np.dot(np.array([(RVdd*RVdd).sum(axis=0)]),t)
                    z += (np.dot(RVdd,T.T)**2).sum()
                    b = (t*dlp-np.dot(T.T,np.dot(T,dlp)))/2.
                    KZb = self.mvmK(self.mvmZ(b,RVdd,t),V,d0)
                    z -= np.dot(dfhat.T,b-KZb)
                    dnlZ.lik[ii] += z
                    dnlZ.lik[ii] = dnlZ.lik[ii][0,0]
        
            for ii in range(len(meanfunc.hyp)):                                     # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T,dm)                             # explicit part
                Zdm = self.mvmZ(dm,RVdd,t)
                dnlZ.mean[ii] -= np.dot(dfhat.T,(dm-self.mvmK(Zdm,V,d0)))            # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0,0]

            vargout = [post,nlZ[0,0],dnlZ]
        else:
            vargout = [post, nlZ[0,0]]
        return vargout
Esempio n. 2
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        if not isinstance(covfunc, cov.FITCOfKernel):
            raise Exception('Only covFITC supported.')
        tol = 1e-6  # tolerance for when to stop the Newton iterations
        smax = 2
        Nline = 100
        thr = 1e-4  # line search parameters
        maxit = 20  # max number of Newton steps in f
        inffunc = Laplace()
        diagK, Kuu, Ku = covfunc.proceed(x)  # evaluate the covariance matrix
        m = meanfunc.proceed(x)  # evaluate the mean vector
        if likfunc.hyp:  # hard coded inducing inputs noise
            sn2 = np.exp(2. * likfunc.hyp[-1])
            snu2 = 1.e-6 * sn2  # similar to infFITC
        else:
            snu2 = 1.e-6

        n, D = x.shape
        nu = Kuu.shape[0]
        rot180 = lambda A: np.rot90(np.rot90(A))  # little helper functions
        chol_inv = lambda A: np.linalg.solve(
            rot180(np.linalg.cholesky(rot180(A))), np.eye(nu))  # chol(inv(A))
        R0 = chol_inv(Kuu +
                      snu2 * np.eye(nu))  # initial R, used for refresh O(nu^3)
        V = np.dot(R0, Ku)
        d0 = diagK - np.array([(V * V).sum(axis=0)]).T  # initial d, needed

        Psi_old = np.inf  # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:  # find a good starting point for alpha and f
            alpha = np.zeros((n, 1))
            f = self.mvmK(alpha, V, d0) + m  # start at mean if sizes not match
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = self.mvmK(alpha, V, d0) + m  # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = np.dot(
                alpha.T, (f - m)) / 2. - lp.sum()  # objective for last alpha
            vargout = -likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def = vargout[0]  # objective for default init f==m
            if Psi_def < Psi_new:  # if default is better, we use it
                alpha = np.zeros((n, 1))
                f = self.mvmK(alpha, V, d0) + m
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]
                dlp = vargout[1]
                d2lp = vargout[2]
                W = -d2lp
                Psi_new = -lp.sum()

        isWneg = np.any(
            W < 0)  # flag indicating whether we found negative values of W
        it = 0  # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
            Psi_old = Psi_new
            it += 1
            if isWneg:  # stabilise the Newton direction in case W has negative values
                W = np.maximum(
                    W, 0
                )  # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-8  # increase accuracy to also get the derivatives right
            b = W * (f - m) + dlp
            dd = 1 / (1 + W * d0)
            RV = np.dot(
                chol_inv(
                    np.eye(nu) +
                    np.dot(V * np.tile((W * dd).T, (nu, 1)), V.T)), V)
            dalpha = dd * b - (W * dd) * np.dot(RV.T, np.dot(
                RV, (dd * b))) - alpha  # Newt dir + line search
            vargout = brentmin(0, smax, Nline, thr, self.Psi_lineFITC, 4,
                               dalpha, alpha, V, d0, m, likfunc, y, inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W < 0)

        self.last_alpha = alpha  # remember for next call
        vargout = likfunc.proceed(y, f, None, inffunc, None, 4)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        d3lp = vargout[3]

        W = -d2lp
        isWneg = np.any(W < 0)
        post = postStruct()
        post.alpha = np.dot(R0.T,
                            np.dot(V,
                                   alpha))  # return the posterior parameters
        post.sW = np.sqrt(np.abs(W)) * np.sign(
            W)  # preserve sign in case of negative
        dd = 1 / (1 + d0 * W)  # temporary variable O(n)
        A = np.eye(nu) + np.dot(V * np.tile(
            (W * dd).T, (nu, 1)), V.T)  # temporary variable O(n*nu^2)
        R0tV = np.dot(R0.T, V)
        B = R0tV * np.tile((W * dd).T,
                           (nu, 1))  # temporary variables O(n*nu^2)
        post.L = -np.dot(
            B, R0tV.T)  # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part
        if np.any(1 + d0 * W < 0):
            raise Exception(
                'W is too negative; nlZ and dnlZ cannot be computed.')
        nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \
            np.log(np.diag(np.linalg.cholesky(A).T)).sum()
        RV = np.dot(chol_inv(A), V)
        RVdd = RV * np.tile((W * dd).T, (nu, 1))  # RVdd needed for dnlZ
        B = np.dot(B, RV.T)
        post.L += np.dot(B, B.T)

        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc,
                              likfunc)  # allocate space for derivatives
            [d, P, R] = self.fitcRefresh(d0, Ku, R0, V,
                                         W)  # g = diag(inv(inv(K)+W))/2
            g = d / 2 + 0.5 * np.atleast_2d(
                (np.dot(np.dot(R, R0), P)**2).sum(axis=0)).T
            t = W / (1 + W * d0)

            dfhat = g * d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):  # covariance hypers
                ddiagK, dKuu, dKu = covfunc.proceed(x, None,
                                                    ii)  # eval cov derivatives
                dA = 2. * dKu.T - np.dot(R0tV.T, dKuu)  # dQ = dA*R0tV
                w = np.atleast_2d((dA * R0tV.T).sum(axis=1)).T  # w = diag(dQ)
                v = ddiagK - w  # v = diag(dK)-diag(dQ);
                dnlZ.cov[ii] = np.dot(ddiagK.T, t) - np.dot(
                    (RVdd * RVdd).sum(axis=0), v)  # explicit part
                dnlZ.cov[ii] -= (np.dot(RVdd, dA) *
                                 np.dot(RVdd, R0tV.T)).sum()  # explicit part
                dnlZ.cov[ii] = 0.5 * dnlZ.cov[ii] - np.dot(
                    alpha.T,
                    np.dot(dA, np.dot(R0tV, alpha)) +
                    v * alpha) / 2.  # explicit
                b = np.dot(dA, np.dot(
                    R0tV,
                    dlp)) + v * dlp  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                KZb = self.mvmK(self.mvmZ(b, RVdd, t), V, d0)
                dnlZ.cov[ii] -= np.dot(dfhat.T, (b - KZb))  # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0, 0]

            for ii in range(len(likfunc.hyp)):  # likelihood hypers
                vargout = likfunc.proceed(y, f, None, inffunc, ii, 3)
                lp_dhyp = vargout[0]
                dlp_dhyp = vargout[1]
                d2lp_dhyp = vargout[2]
                dnlZ.lik[ii] = -np.dot(
                    g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
                b = self.mvmK(dlp_dhyp, V, d0)  # implicit part
                dnlZ.lik[ii] -= np.dot(
                    dfhat.T, b - self.mvmK(self.mvmZ(b, RVdd, t), V, d0))
                if ii == len(likfunc.hyp) - 1:
                    # since snu2 is a fixed fraction of sn2, there is a covariance-like term
                    # in the derivative as well
                    snu = np.sqrt(snu2)
                    T = chol_inv(Kuu + snu2 * np.eye(nu))
                    T = np.dot(T.T, np.dot(T, snu * Ku))
                    t = np.array([(T * T).sum(axis=0)]).T
                    z = np.dot(
                        alpha.T,
                        np.dot(T.T, np.dot(T, alpha)) - t * alpha) - np.dot(
                            np.array([(RVdd * RVdd).sum(axis=0)]), t)
                    z += (np.dot(RVdd, T.T)**2).sum()
                    b = (t * dlp - np.dot(T.T, np.dot(T, dlp))) / 2.
                    KZb = self.mvmK(self.mvmZ(b, RVdd, t), V, d0)
                    z -= np.dot(dfhat.T, b - KZb)
                    dnlZ.lik[ii] += z
                    dnlZ.lik[ii] = dnlZ.lik[ii][0, 0]

            for ii in range(len(meanfunc.hyp)):  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
                Zdm = self.mvmZ(dm, RVdd, t)
                dnlZ.mean[ii] -= np.dot(
                    dfhat.T, (dm - self.mvmK(Zdm, V, d0)))  # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]

            return post, nlZ[0, 0], dnlZ
        else:
            return post, nlZ[0, 0]
Esempio n. 3
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        tol = 1e-6                           # tolerance for when to stop the Newton iterations
        smax = 2; Nline = 20; thr = 1e-4     # line search parameters
        maxit = 20                           # max number of Newton steps in f
        inffunc = self
        K = covfunc.proceed(x)       # evaluate the covariance matrix
        m = meanfunc.proceed(x)      # evaluate the mean vector
        n, D = x.shape
        Psi_old = np.inf    # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:          # find a good starting point for alpha and f
            alpha = np.zeros((n,1))
            f = np.dot(K,alpha) + m       # start at mean if sizes not match 
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W= -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = np.dot(K,alpha) + m                      # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W= -d2lp
            Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha
            vargout = - likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def =  vargout[0]                         # objective for default init f==m
            if Psi_def < Psi_new:                         # if default is better, we use it
                alpha = np.zeros((n,1))
                f = np.dot(K,alpha) + m 
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
                W=-d2lp; Psi_new = -lp.sum()
        isWneg = np.any(W<0)       # flag indicating whether we found negative values of W
        it = 0                     # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it<maxit:          # begin Newton
            Psi_old = Psi_new; it += 1
            if isWneg:       # stabilise the Newton direction in case W has negative values
                W = np.maximum(W,0)      # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-10             # increase accuracy to also get the derivatives right
            sW = np.sqrt(W); L = np.linalg.cholesky(np.eye(n) + np.dot(sW,sW.T)*K).T
            b = W*(f-m) + dlp; 
            dalpha = b - sW*solve_chol(L,sW*np.dot(K,b)) - alpha
            vargout = brentmin(0,smax,Nline,thr,self.Psi_line,4,dalpha,alpha,K,m,likfunc,y,inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W<0)
        self.last_alpha = alpha                                     # remember for next call
        vargout = likfunc.proceed(y,f,None,inffunc,None,4) 
        lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3] 
        W = -d2lp; isWneg = np.any(W<0)
        post = postStruct()
        post.alpha = alpha    # return the posterior parameters
        post.sW = np.sqrt(np.abs(W))*np.sign(W)             # preserve sign in case of negative
        if isWneg:
            [ldA,iA,post.L] = self.logdetA(K,W,3)
            nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() + ldA/2.
            nlZ = nlZ[0] 
        else:
            sW = post.sW
            post.L = np.linalg.cholesky(np.eye(n)+np.dot(sW,sW.T)*K).T 
            nlZ = np.dot(alpha.T,(f-m))/2. + (np.log(np.diag(post.L))-np.reshape(lp,(lp.shape[0],))).sum()
            nlZ = nlZ[0]
        if nargout>2:                                           # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)       # allocate space for derivatives
            if isWneg:                  # switch between Cholesky and LU decomposition mode
                Z = -post.L                                                 # inv(K+inv(W))
                g = np.atleast_2d((iA*K).sum(axis=1)).T /2       # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
            else:
                Z = np.tile(sW,(1,n))*solve_chol(post.L,np.diag(np.reshape(sW,(sW.shape[0],)))) #sW*inv(B)*sW=inv(K+inv(W))
                C = np.linalg.solve(post.L.T,np.tile(sW,(1,n))*K)              # deriv. of ln|B| wrt W
                g = np.atleast_2d((np.diag(K)-(C**2).sum(axis=0).T)).T /2.      # g = diag(inv(inv(K)+W))/2
            dfhat = g* d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):                                  # covariance hypers
                dK = covfunc.proceed(x, None, ii)
                dnlZ.cov[ii] = (Z*dK).sum()/2. - np.dot(alpha.T,np.dot(dK,alpha))/2.    # explicit part
                b = np.dot(dK,dlp)                            # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.cov[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b)))            # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0][0]
            for ii in range(len(likfunc.hyp)):                  # likelihood hypers
                [lp_dhyp,dlp_dhyp,d2lp_dhyp] = likfunc.proceed(y,f,None,inffunc,ii,3)
                dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum()      # explicit part
                b = np.dot(K,dlp_dhyp)                        # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.lik[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b)))   # implicit part
                
                dnlZ.lik[ii] = dnlZ.lik[ii][0][0]
            for ii in range(len(meanfunc.hyp)):                  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T,dm)                # explicit part
                dnlZ.mean[ii] -= np.dot(dfhat.T,dm-np.dot(K,np.dot(Z,dm))) # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0][0]
            vargout = [post,nlZ[0],dnlZ]
        else:
            vargout = [post, nlZ[0]]
        return vargout
Esempio n. 4
0
    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        tol = 1e-6  # tolerance for when to stop the Newton iterations
        smax = 2
        Nline = 20
        thr = 1e-4  # line search parameters
        maxit = 20  # max number of Newton steps in f
        inffunc = self
        K = covfunc.proceed(x)  # evaluate the covariance matrix
        m = meanfunc.proceed(x)  # evaluate the mean vector
        n, D = x.shape
        Psi_old = np.inf  # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:  # find a good starting point for alpha and f
            alpha = np.zeros((n, 1))
            f = np.dot(K, alpha) + m  # start at mean if sizes not match
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = np.dot(K, alpha) + m  # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = np.dot(
                alpha.T, (f - m)) / 2. - lp.sum()  # objective for last alpha
            vargout = -likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def = vargout[0]  # objective for default init f==m
            if Psi_def < Psi_new:  # if default is better, we use it
                alpha = np.zeros((n, 1))
                f = np.dot(K, alpha) + m
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]
                dlp = vargout[1]
                d2lp = vargout[2]
                W = -d2lp
                Psi_new = -lp.sum()
        isWneg = np.any(
            W < 0)  # flag indicating whether we found negative values of W
        it = 0  # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
            Psi_old = Psi_new
            it += 1
            if isWneg:  # stabilise the Newton direction in case W has negative values
                W = np.maximum(
                    W, 0
                )  # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-10  # increase accuracy to also get the derivatives right
            sW = np.sqrt(W)
            L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
            b = W * (f - m) + dlp
            dalpha = b - sW * solve_chol(L, sW * np.dot(K, b)) - alpha
            vargout = brentmin(0, smax, Nline, thr, self.Psi_line, 4, dalpha,
                               alpha, K, m, likfunc, y, inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W < 0)
        self.last_alpha = alpha  # remember for next call
        vargout = likfunc.proceed(y, f, None, inffunc, None, 4)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        d3lp = vargout[3]
        W = -d2lp
        isWneg = np.any(W < 0)
        post = postStruct()
        post.alpha = alpha  # return the posterior parameters
        post.sW = np.sqrt(np.abs(W)) * np.sign(
            W)  # preserve sign in case of negative
        if isWneg:
            [ldA, iA, post.L] = self.logdetA(K, W, 3)
            nlZ = np.dot(alpha.T, (f - m)) / 2. - lp.sum() + ldA / 2.
            nlZ = nlZ[0]
        else:
            sW = post.sW
            post.L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
            nlZ = np.dot(alpha.T,
                         (f - m)) / 2. + (np.log(np.diag(post.L)) -
                                          np.reshape(lp,
                                                     (lp.shape[0], ))).sum()
            nlZ = nlZ[0]
        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc,
                              likfunc)  # allocate space for derivatives
            if isWneg:  # switch between Cholesky and LU decomposition mode
                Z = -post.L  # inv(K+inv(W))
                g = np.atleast_2d(
                    (iA * K).sum(axis=1)
                ).T / 2  # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
            else:
                Z = np.tile(sW, (1, n)) * solve_chol(
                    post.L, np.diag(np.reshape(
                        sW, (sW.shape[0], ))))  #sW*inv(B)*sW=inv(K+inv(W))
                C = np.linalg.solve(post.L.T,
                                    np.tile(sW, (1, n)) *
                                    K)  # deriv. of ln|B| wrt W
                g = np.atleast_2d(
                    (np.diag(K) -
                     (C**2).sum(axis=0).T)).T / 2.  # g = diag(inv(inv(K)+W))/2
            dfhat = g * d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):  # covariance hypers
                dK = covfunc.proceed(x, None, ii)
                dnlZ.cov[ii] = (Z * dK).sum() / 2. - np.dot(
                    alpha.T, np.dot(dK, alpha)) / 2.  # explicit part
                b = np.dot(dK, dlp)  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.cov[ii] -= np.dot(
                    dfhat.T, b - np.dot(K, np.dot(Z, b)))  # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0, 0]
            for ii in range(len(likfunc.hyp)):  # likelihood hypers
                [lp_dhyp, dlp_dhyp,
                 d2lp_dhyp] = likfunc.proceed(y, f, None, inffunc, ii, 3)
                dnlZ.lik[ii] = -np.dot(
                    g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
                b = np.dot(K, dlp_dhyp)  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.lik[ii] -= np.dot(
                    dfhat.T, b - np.dot(K, np.dot(Z, b)))  # implicit part
                dnlZ.lik[ii] = dnlZ.lik[ii][0, 0]
            for ii in range(len(meanfunc.hyp)):  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
                dnlZ.mean[ii] -= np.dot(
                    dfhat.T, dm - np.dot(K, np.dot(Z, dm)))  # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]
            return post, nlZ[0], dnlZ
        else:
            return post, nlZ[0]