Python brentmin Examples

Programming Language: Python

Namespace/Package Name: tools

Method/Function: brentmin

Examples at hotexamples.com: 4

Python brentmin - 4 examples found. These are the top rated real world Python examples of tools.brentmin extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: inf.py Project: maciejkurek87/ARDEGO

    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        if not isinstance(covfunc, cov.FITCOfKernel):
            raise Exception('Only covFITC supported.')            
        tol = 1e-6                             # tolerance for when to stop the Newton iterations
        smax = 2; Nline = 100; thr = 1e-4      # line search parameters
        maxit = 20                             # max number of Newton steps in f
        inffunc = infLaplace()
        diagK,Kuu,Ku = covfunc.proceed(x)      # evaluate the covariance matrix
        m = meanfunc.proceed(x)                # evaluate the mean vector
        if likfunc.hyp:                        # hard coded inducing inputs noise
            sn2  = np.exp(2.*likfunc.hyp[-1]) 
            snu2 = 1.e-6*sn2                   # similar to infFITC
        else:
            snu2 = 1.e-6        
        
        n, D = x.shape
        nu = Kuu.shape[0]
        rot180   = lambda A: np.rot90(np.rot90(A))      # little helper functions
        chol_inv = lambda A: np.linalg.solve( rot180( np.linalg.cholesky(rot180(A)) ),np.eye(nu)) # chol(inv(A))
        R0 = chol_inv(Kuu+snu2*np.eye(nu))              # initial R, used for refresh O(nu^3)
        V  = np.dot(R0,Ku); d0 = diagK - np.array([(V*V).sum(axis=0)]).T     # initial d, needed
    
        Psi_old = np.inf    # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:         # find a good starting point for alpha and f
            alpha = np.zeros((n,1))
            f = self.mvmK(alpha,V,d0) + m        # start at mean if sizes not match 
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W=-d2lp; Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = self.mvmK(alpha,V,d0) + m            # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W=-d2lp
            Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum()           # objective for last alpha
            vargout = - likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def =  vargout[0]                                   # objective for default init f==m
            if Psi_def < Psi_new:                                   # if default is better, we use it
                alpha = np.zeros((n,1))
                f = self.mvmK(alpha,V,d0) + m
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
                W=-d2lp; Psi_new = -lp.sum()

        isWneg = np.any(W<0)        # flag indicating whether we found negative values of W
        it = 0                      # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it<maxit:          # begin Newton
            Psi_old = Psi_new
            it += 1
            if isWneg:                      # stabilise the Newton direction in case W has negative values
                W = np.maximum(W,0)         # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-8                  # increase accuracy to also get the derivatives right
            b = W*(f-m) + dlp; dd = 1/(1+W*d0)
            RV = np.dot( chol_inv( np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T)),V ) 
            dalpha = dd*b - (W*dd)*np.dot(RV.T,np.dot(RV,(dd*b))) - alpha # Newt dir + line search
            vargout = brentmin(0,smax,Nline,thr,self.Psi_lineFITC,4,dalpha,alpha,V,d0,m,likfunc,y,inffunc)
            s = vargout[0]; Psi_new = vargout[1]; Nfun = vargout[2]; alpha = vargout[3]
            f = vargout[4]; dlp = vargout[5]; W = vargout[6]
            isWneg = np.any(W<0)

        self.last_alpha = alpha                                     # remember for next call
        vargout = likfunc.proceed(y,f,None,inffunc,None,4) 
        lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3]  

        W=-d2lp; isWneg = np.any(W<0)
        post = postStruct()
        post.alpha = np.dot(R0.T,np.dot(V,alpha))                   # return the posterior parameters
        post.sW = np.sqrt(np.abs(W))*np.sign(W)                     # preserve sign in case of negative
        dd = 1/(1+d0*W)                                             # temporary variable O(n)
        A = np.eye(nu) + np.dot(V*np.tile((W*dd).T,(nu,1)),V.T)     # temporary variable O(n*nu^2)
        R0tV = np.dot(R0.T,V); B = R0tV*np.tile((W*dd).T,(nu,1))    # temporary variables O(n*nu^2)
        post.L = -np.dot(B,R0tV.T)          # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part
        if np.any(1+d0*W<0):
            raise Exception('W is too negative; nlZ and dnlZ cannot be computed.')
        nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \
            np.log(np.diag(np.linalg.cholesky(A).T)).sum()
        RV = np.dot(chol_inv(A),V)
        RVdd = RV * np.tile((W*dd).T,(nu,1))  # RVdd needed for dnlZ
        B = np.dot(B,RV.T)
        post.L += np.dot(B,B.T)

        if nargout>2:                                                   # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)               # allocate space for derivatives
            [d,P,R] = self.fitcRefresh(d0,Ku,R0,V,W)                    # g = diag(inv(inv(K)+W))/2
            g = d/2 + 0.5*np.atleast_2d((np.dot(np.dot(R,R0),P)**2).sum(axis=0)).T
            t = W/(1+W*d0)
            
            dfhat = g*d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):                          # covariance hypers
                ddiagK,dKuu,dKu = covfunc.proceed(x, None, ii)          # eval cov derivatives
                dA = 2.*dKu.T-np.dot(R0tV.T,dKuu)                       # dQ = dA*R0tV
                w = np.atleast_2d((dA*R0tV.T).sum(axis=1)).T
                v = ddiagK-w                              # w = diag(dQ); v = diag(dK)-diag(dQ);
                dnlZ.cov[ii] = np.dot(ddiagK.T,t) - np.dot((RVdd*RVdd).sum(axis=0),v)   # explicit part
                dnlZ.cov[ii] -= (np.dot(RVdd,dA)*np.dot(RVdd,R0tV.T)).sum()             # explicit part
                dnlZ.cov[ii] = 0.5*dnlZ.cov[ii] - np.dot(alpha.T,np.dot(dA,np.dot(R0tV,alpha))+v*alpha)/2.  # explicit
                b = np.dot(dA,np.dot(R0tV,dlp)) + v*dlp                                 # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                KZb = self.mvmK(self.mvmZ(b,RVdd,t),V,d0)
                dnlZ.cov[ii] -= np.dot(dfhat.T,(b-KZb))                                 # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0,0]
                
            for ii in range(len(likfunc.hyp)):                                          # likelihood hypers
                vargout = likfunc.proceed(y,f,None,inffunc,ii,3)
                lp_dhyp = vargout[0]; dlp_dhyp = vargout[1]; d2lp_dhyp = vargout[2] 
                dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum()                   # explicit part
                b = self.mvmK(dlp_dhyp,V,d0)                                            # implicit part
                dnlZ.lik[ii] -= np.dot(dfhat.T,b-self.mvmK(self.mvmZ(b,RVdd,t),V,d0))
                if ii == len(likfunc.hyp)-1:
                    # since snu2 is a fixed fraction of sn2, there is a covariance-like term
                    # in the derivative as well
                    snu = np.sqrt(snu2);
                    T = chol_inv(Kuu + snu2*np.eye(nu)); 
                    T = np.dot(T.T,np.dot(T,snu*Ku)); 
                    t = np.array([(T*T).sum(axis=0)]).T 
                    z = np.dot(alpha.T,np.dot(T.T,np.dot(T,alpha))-t*alpha) - np.dot(np.array([(RVdd*RVdd).sum(axis=0)]),t)
                    z += (np.dot(RVdd,T.T)**2).sum()
                    b = (t*dlp-np.dot(T.T,np.dot(T,dlp)))/2.
                    KZb = self.mvmK(self.mvmZ(b,RVdd,t),V,d0)
                    z -= np.dot(dfhat.T,b-KZb)
                    dnlZ.lik[ii] += z
                    dnlZ.lik[ii] = dnlZ.lik[ii][0,0]
        
            for ii in range(len(meanfunc.hyp)):                                     # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T,dm)                             # explicit part
                Zdm = self.mvmZ(dm,RVdd,t)
                dnlZ.mean[ii] -= np.dot(dfhat.T,(dm-self.mvmK(Zdm,V,d0)))            # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0,0]

            vargout = [post,nlZ[0,0],dnlZ]
        else:
            vargout = [post, nlZ[0,0]]
        return vargout

Example #2

Show file

    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        if not isinstance(covfunc, cov.FITCOfKernel):
            raise Exception('Only covFITC supported.')
        tol = 1e-6  # tolerance for when to stop the Newton iterations
        smax = 2
        Nline = 100
        thr = 1e-4  # line search parameters
        maxit = 20  # max number of Newton steps in f
        inffunc = Laplace()
        diagK, Kuu, Ku = covfunc.proceed(x)  # evaluate the covariance matrix
        m = meanfunc.proceed(x)  # evaluate the mean vector
        if likfunc.hyp:  # hard coded inducing inputs noise
            sn2 = np.exp(2. * likfunc.hyp[-1])
            snu2 = 1.e-6 * sn2  # similar to infFITC
        else:
            snu2 = 1.e-6

        n, D = x.shape
        nu = Kuu.shape[0]
        rot180 = lambda A: np.rot90(np.rot90(A))  # little helper functions
        chol_inv = lambda A: np.linalg.solve(
            rot180(np.linalg.cholesky(rot180(A))), np.eye(nu))  # chol(inv(A))
        R0 = chol_inv(Kuu +
                      snu2 * np.eye(nu))  # initial R, used for refresh O(nu^3)
        V = np.dot(R0, Ku)
        d0 = diagK - np.array([(V * V).sum(axis=0)]).T  # initial d, needed

        Psi_old = np.inf  # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:  # find a good starting point for alpha and f
            alpha = np.zeros((n, 1))
            f = self.mvmK(alpha, V, d0) + m  # start at mean if sizes not match
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = self.mvmK(alpha, V, d0) + m  # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = np.dot(
                alpha.T, (f - m)) / 2. - lp.sum()  # objective for last alpha
            vargout = -likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def = vargout[0]  # objective for default init f==m
            if Psi_def < Psi_new:  # if default is better, we use it
                alpha = np.zeros((n, 1))
                f = self.mvmK(alpha, V, d0) + m
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]
                dlp = vargout[1]
                d2lp = vargout[2]
                W = -d2lp
                Psi_new = -lp.sum()

        isWneg = np.any(
            W < 0)  # flag indicating whether we found negative values of W
        it = 0  # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
            Psi_old = Psi_new
            it += 1
            if isWneg:  # stabilise the Newton direction in case W has negative values
                W = np.maximum(
                    W, 0
                )  # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-8  # increase accuracy to also get the derivatives right
            b = W * (f - m) + dlp
            dd = 1 / (1 + W * d0)
            RV = np.dot(
                chol_inv(
                    np.eye(nu) +
                    np.dot(V * np.tile((W * dd).T, (nu, 1)), V.T)), V)
            dalpha = dd * b - (W * dd) * np.dot(RV.T, np.dot(
                RV, (dd * b))) - alpha  # Newt dir + line search
            vargout = brentmin(0, smax, Nline, thr, self.Psi_lineFITC, 4,
                               dalpha, alpha, V, d0, m, likfunc, y, inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W < 0)

        self.last_alpha = alpha  # remember for next call
        vargout = likfunc.proceed(y, f, None, inffunc, None, 4)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        d3lp = vargout[3]

        W = -d2lp
        isWneg = np.any(W < 0)
        post = postStruct()
        post.alpha = np.dot(R0.T,
                            np.dot(V,
                                   alpha))  # return the posterior parameters
        post.sW = np.sqrt(np.abs(W)) * np.sign(
            W)  # preserve sign in case of negative
        dd = 1 / (1 + d0 * W)  # temporary variable O(n)
        A = np.eye(nu) + np.dot(V * np.tile(
            (W * dd).T, (nu, 1)), V.T)  # temporary variable O(n*nu^2)
        R0tV = np.dot(R0.T, V)
        B = R0tV * np.tile((W * dd).T,
                           (nu, 1))  # temporary variables O(n*nu^2)
        post.L = -np.dot(
            B, R0tV.T)  # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part
        if np.any(1 + d0 * W < 0):
            raise Exception(
                'W is too negative; nlZ and dnlZ cannot be computed.')
        nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() - np.log(dd).sum()/2. + \
            np.log(np.diag(np.linalg.cholesky(A).T)).sum()
        RV = np.dot(chol_inv(A), V)
        RVdd = RV * np.tile((W * dd).T, (nu, 1))  # RVdd needed for dnlZ
        B = np.dot(B, RV.T)
        post.L += np.dot(B, B.T)

        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc,
                              likfunc)  # allocate space for derivatives
            [d, P, R] = self.fitcRefresh(d0, Ku, R0, V,
                                         W)  # g = diag(inv(inv(K)+W))/2
            g = d / 2 + 0.5 * np.atleast_2d(
                (np.dot(np.dot(R, R0), P)**2).sum(axis=0)).T
            t = W / (1 + W * d0)

            dfhat = g * d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):  # covariance hypers
                ddiagK, dKuu, dKu = covfunc.proceed(x, None,
                                                    ii)  # eval cov derivatives
                dA = 2. * dKu.T - np.dot(R0tV.T, dKuu)  # dQ = dA*R0tV
                w = np.atleast_2d((dA * R0tV.T).sum(axis=1)).T  # w = diag(dQ)
                v = ddiagK - w  # v = diag(dK)-diag(dQ);
                dnlZ.cov[ii] = np.dot(ddiagK.T, t) - np.dot(
                    (RVdd * RVdd).sum(axis=0), v)  # explicit part
                dnlZ.cov[ii] -= (np.dot(RVdd, dA) *
                                 np.dot(RVdd, R0tV.T)).sum()  # explicit part
                dnlZ.cov[ii] = 0.5 * dnlZ.cov[ii] - np.dot(
                    alpha.T,
                    np.dot(dA, np.dot(R0tV, alpha)) +
                    v * alpha) / 2.  # explicit
                b = np.dot(dA, np.dot(
                    R0tV,
                    dlp)) + v * dlp  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                KZb = self.mvmK(self.mvmZ(b, RVdd, t), V, d0)
                dnlZ.cov[ii] -= np.dot(dfhat.T, (b - KZb))  # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0, 0]

            for ii in range(len(likfunc.hyp)):  # likelihood hypers
                vargout = likfunc.proceed(y, f, None, inffunc, ii, 3)
                lp_dhyp = vargout[0]
                dlp_dhyp = vargout[1]
                d2lp_dhyp = vargout[2]
                dnlZ.lik[ii] = -np.dot(
                    g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
                b = self.mvmK(dlp_dhyp, V, d0)  # implicit part
                dnlZ.lik[ii] -= np.dot(
                    dfhat.T, b - self.mvmK(self.mvmZ(b, RVdd, t), V, d0))
                if ii == len(likfunc.hyp) - 1:
                    # since snu2 is a fixed fraction of sn2, there is a covariance-like term
                    # in the derivative as well
                    snu = np.sqrt(snu2)
                    T = chol_inv(Kuu + snu2 * np.eye(nu))
                    T = np.dot(T.T, np.dot(T, snu * Ku))
                    t = np.array([(T * T).sum(axis=0)]).T
                    z = np.dot(
                        alpha.T,
                        np.dot(T.T, np.dot(T, alpha)) - t * alpha) - np.dot(
                            np.array([(RVdd * RVdd).sum(axis=0)]), t)
                    z += (np.dot(RVdd, T.T)**2).sum()
                    b = (t * dlp - np.dot(T.T, np.dot(T, dlp))) / 2.
                    KZb = self.mvmK(self.mvmZ(b, RVdd, t), V, d0)
                    z -= np.dot(dfhat.T, b - KZb)
                    dnlZ.lik[ii] += z
                    dnlZ.lik[ii] = dnlZ.lik[ii][0, 0]

            for ii in range(len(meanfunc.hyp)):  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
                Zdm = self.mvmZ(dm, RVdd, t)
                dnlZ.mean[ii] -= np.dot(
                    dfhat.T, (dm - self.mvmK(Zdm, V, d0)))  # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]

            return post, nlZ[0, 0], dnlZ
        else:
            return post, nlZ[0, 0]

Example #3

Show file

File: inf.py Project: maciejkurek87/ARDEGO

    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        tol = 1e-6                           # tolerance for when to stop the Newton iterations
        smax = 2; Nline = 20; thr = 1e-4     # line search parameters
        maxit = 20                           # max number of Newton steps in f
        inffunc = self
        K = covfunc.proceed(x)       # evaluate the covariance matrix
        m = meanfunc.proceed(x)      # evaluate the mean vector
        n, D = x.shape
        Psi_old = np.inf    # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:          # find a good starting point for alpha and f
            alpha = np.zeros((n,1))
            f = np.dot(K,alpha) + m       # start at mean if sizes not match 
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W= -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = np.dot(K,alpha) + m                      # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
            W= -d2lp
            Psi_new = np.dot(alpha.T,(f-m))/2. - lp.sum() # objective for last alpha
            vargout = - likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def =  vargout[0]                         # objective for default init f==m
            if Psi_def < Psi_new:                         # if default is better, we use it
                alpha = np.zeros((n,1))
                f = np.dot(K,alpha) + m 
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]
                W=-d2lp; Psi_new = -lp.sum()
        isWneg = np.any(W<0)       # flag indicating whether we found negative values of W
        it = 0                     # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it<maxit:          # begin Newton
            Psi_old = Psi_new; it += 1
            if isWneg:       # stabilise the Newton direction in case W has negative values
                W = np.maximum(W,0)      # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-10             # increase accuracy to also get the derivatives right
            sW = np.sqrt(W); L = np.linalg.cholesky(np.eye(n) + np.dot(sW,sW.T)*K).T
            b = W*(f-m) + dlp; 
            dalpha = b - sW*solve_chol(L,sW*np.dot(K,b)) - alpha
            vargout = brentmin(0,smax,Nline,thr,self.Psi_line,4,dalpha,alpha,K,m,likfunc,y,inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W<0)
        self.last_alpha = alpha                                     # remember for next call
        vargout = likfunc.proceed(y,f,None,inffunc,None,4) 
        lp = vargout[0]; dlp = vargout[1]; d2lp = vargout[2]; d3lp = vargout[3] 
        W = -d2lp; isWneg = np.any(W<0)
        post = postStruct()
        post.alpha = alpha    # return the posterior parameters
        post.sW = np.sqrt(np.abs(W))*np.sign(W)             # preserve sign in case of negative
        if isWneg:
            [ldA,iA,post.L] = self.logdetA(K,W,3)
            nlZ = np.dot(alpha.T,(f-m))/2. - lp.sum() + ldA/2.
            nlZ = nlZ[0] 
        else:
            sW = post.sW
            post.L = np.linalg.cholesky(np.eye(n)+np.dot(sW,sW.T)*K).T 
            nlZ = np.dot(alpha.T,(f-m))/2. + (np.log(np.diag(post.L))-np.reshape(lp,(lp.shape[0],))).sum()
            nlZ = nlZ[0]
        if nargout>2:                                           # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc, likfunc)       # allocate space for derivatives
            if isWneg:                  # switch between Cholesky and LU decomposition mode
                Z = -post.L                                                 # inv(K+inv(W))
                g = np.atleast_2d((iA*K).sum(axis=1)).T /2       # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
            else:
                Z = np.tile(sW,(1,n))*solve_chol(post.L,np.diag(np.reshape(sW,(sW.shape[0],)))) #sW*inv(B)*sW=inv(K+inv(W))
                C = np.linalg.solve(post.L.T,np.tile(sW,(1,n))*K)              # deriv. of ln|B| wrt W
                g = np.atleast_2d((np.diag(K)-(C**2).sum(axis=0).T)).T /2.      # g = diag(inv(inv(K)+W))/2
            dfhat = g* d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):                                  # covariance hypers
                dK = covfunc.proceed(x, None, ii)
                dnlZ.cov[ii] = (Z*dK).sum()/2. - np.dot(alpha.T,np.dot(dK,alpha))/2.    # explicit part
                b = np.dot(dK,dlp)                            # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.cov[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b)))            # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0][0]
            for ii in range(len(likfunc.hyp)):                  # likelihood hypers
                [lp_dhyp,dlp_dhyp,d2lp_dhyp] = likfunc.proceed(y,f,None,inffunc,ii,3)
                dnlZ.lik[ii] = -np.dot(g.T,d2lp_dhyp) - lp_dhyp.sum()      # explicit part
                b = np.dot(K,dlp_dhyp)                        # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.lik[ii] -= np.dot(dfhat.T,b-np.dot(K,np.dot(Z,b)))   # implicit part
                
                dnlZ.lik[ii] = dnlZ.lik[ii][0][0]
            for ii in range(len(meanfunc.hyp)):                  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T,dm)                # explicit part
                dnlZ.mean[ii] -= np.dot(dfhat.T,dm-np.dot(K,np.dot(Z,dm))) # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0][0]
            vargout = [post,nlZ[0],dnlZ]
        else:
            vargout = [post, nlZ[0]]
        return vargout

Example #4

Show file

    def proceed(self, meanfunc, covfunc, likfunc, x, y, nargout=1):
        tol = 1e-6  # tolerance for when to stop the Newton iterations
        smax = 2
        Nline = 20
        thr = 1e-4  # line search parameters
        maxit = 20  # max number of Newton steps in f
        inffunc = self
        K = covfunc.proceed(x)  # evaluate the covariance matrix
        m = meanfunc.proceed(x)  # evaluate the mean vector
        n, D = x.shape
        Psi_old = np.inf  # make sure while loop starts by the largest old objective val
        if self.last_alpha == None:  # find a good starting point for alpha and f
            alpha = np.zeros((n, 1))
            f = np.dot(K, alpha) + m  # start at mean if sizes not match
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()
        else:
            alpha = self.last_alpha
            f = np.dot(K, alpha) + m  # try last one
            vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = np.dot(
                alpha.T, (f - m)) / 2. - lp.sum()  # objective for last alpha
            vargout = -likfunc.proceed(y, m, None, inffunc, None, 1)
            Psi_def = vargout[0]  # objective for default init f==m
            if Psi_def < Psi_new:  # if default is better, we use it
                alpha = np.zeros((n, 1))
                f = np.dot(K, alpha) + m
                vargout = likfunc.proceed(y, f, None, inffunc, None, 3)
                lp = vargout[0]
                dlp = vargout[1]
                d2lp = vargout[2]
                W = -d2lp
                Psi_new = -lp.sum()
        isWneg = np.any(
            W < 0)  # flag indicating whether we found negative values of W
        it = 0  # this happens for the Student's t likelihood

        while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
            Psi_old = Psi_new
            it += 1
            if isWneg:  # stabilise the Newton direction in case W has negative values
                W = np.maximum(
                    W, 0
                )  # stabilise the Hessian to guarantee postive definiteness
                tol = 1e-10  # increase accuracy to also get the derivatives right
            sW = np.sqrt(W)
            L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
            b = W * (f - m) + dlp
            dalpha = b - sW * solve_chol(L, sW * np.dot(K, b)) - alpha
            vargout = brentmin(0, smax, Nline, thr, self.Psi_line, 4, dalpha,
                               alpha, K, m, likfunc, y, inffunc)
            s = vargout[0]
            Psi_new = vargout[1]
            Nfun = vargout[2]
            alpha = vargout[3]
            f = vargout[4]
            dlp = vargout[5]
            W = vargout[6]
            isWneg = np.any(W < 0)
        self.last_alpha = alpha  # remember for next call
        vargout = likfunc.proceed(y, f, None, inffunc, None, 4)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        d3lp = vargout[3]
        W = -d2lp
        isWneg = np.any(W < 0)
        post = postStruct()
        post.alpha = alpha  # return the posterior parameters
        post.sW = np.sqrt(np.abs(W)) * np.sign(
            W)  # preserve sign in case of negative
        if isWneg:
            [ldA, iA, post.L] = self.logdetA(K, W, 3)
            nlZ = np.dot(alpha.T, (f - m)) / 2. - lp.sum() + ldA / 2.
            nlZ = nlZ[0]
        else:
            sW = post.sW
            post.L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
            nlZ = np.dot(alpha.T,
                         (f - m)) / 2. + (np.log(np.diag(post.L)) -
                                          np.reshape(lp,
                                                     (lp.shape[0], ))).sum()
            nlZ = nlZ[0]
        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlZStruct(meanfunc, covfunc,
                              likfunc)  # allocate space for derivatives
            if isWneg:  # switch between Cholesky and LU decomposition mode
                Z = -post.L  # inv(K+inv(W))
                g = np.atleast_2d(
                    (iA * K).sum(axis=1)
                ).T / 2  # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
            else:
                Z = np.tile(sW, (1, n)) * solve_chol(
                    post.L, np.diag(np.reshape(
                        sW, (sW.shape[0], ))))  #sW*inv(B)*sW=inv(K+inv(W))
                C = np.linalg.solve(post.L.T,
                                    np.tile(sW, (1, n)) *
                                    K)  # deriv. of ln|B| wrt W
                g = np.atleast_2d(
                    (np.diag(K) -
                     (C**2).sum(axis=0).T)).T / 2.  # g = diag(inv(inv(K)+W))/2
            dfhat = g * d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
            for ii in range(len(covfunc.hyp)):  # covariance hypers
                dK = covfunc.proceed(x, None, ii)
                dnlZ.cov[ii] = (Z * dK).sum() / 2. - np.dot(
                    alpha.T, np.dot(dK, alpha)) / 2.  # explicit part
                b = np.dot(dK, dlp)  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.cov[ii] -= np.dot(
                    dfhat.T, b - np.dot(K, np.dot(Z, b)))  # implicit part
                dnlZ.cov[ii] = dnlZ.cov[ii][0, 0]
            for ii in range(len(likfunc.hyp)):  # likelihood hypers
                [lp_dhyp, dlp_dhyp,
                 d2lp_dhyp] = likfunc.proceed(y, f, None, inffunc, ii, 3)
                dnlZ.lik[ii] = -np.dot(
                    g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
                b = np.dot(K, dlp_dhyp)  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
                dnlZ.lik[ii] -= np.dot(
                    dfhat.T, b - np.dot(K, np.dot(Z, b)))  # implicit part
                dnlZ.lik[ii] = dnlZ.lik[ii][0, 0]
            for ii in range(len(meanfunc.hyp)):  # mean hypers
                dm = meanfunc.proceed(x, ii)
                dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
                dnlZ.mean[ii] -= np.dot(
                    dfhat.T, dm - np.dot(K, np.dot(Z, dm)))  # implicit part
                dnlZ.mean[ii] = dnlZ.mean[ii][0, 0]
            return post, nlZ[0], dnlZ
        else:
            return post, nlZ[0]