Python brentmin Examples

Programming Language: Python

Namespace/Package Name: src.Tools.brentmin

Method/Function: brentmin

Examples at hotexamples.com: 2

Python brentmin - 2 examples found. These are the top rated real world Python examples of src.Tools.brentmin.brentmin extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: inferences.py Project: ebresch/pyGP_PR

def infLaplace(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    """ Laplace approximation to the posterior Gaussian process.
     The function takes a specified covariance function (see kernels.py) and
     likelihood function (see likelihoods.py).
    """

    tol = 1e-6
    # tolerance for when to stop the Newton iterations
    smax = 2
    Nline = 20
    thr = 1e-4
    # line search parameters
    maxit = 20
    # max number of Newton steps in f

    inffunc = "inferences.infLaplace"

    K = src.Tools.general.feval(covfunc, hyp.cov, x)  # evaluate the covariance
    m = src.Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate the mean vector

    n, D = x.shape

    Psi_old = np.inf  # make sure while loop starts by the largest old objective val
    if "last_alpha" not in infLaplace.__dict__:  # find a good starting point for alpha and f
        alpha = np.zeros((n, 1))
        f = np.dot(K, alpha) + m  # start at mean if sizes not match
        vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        W = -d2lp
        Psi_new = -lp.sum()
    else:
        alpha = last_alpha
        f = np.dot(K, alpha) + m  # try last one
        vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        W = -d2lp
        Psi_new = np.dot(alpha.T, (f - m)) / 2.0 - lp.sum()  # objective for last alpha
        vargout = -src.Tools.general.feval(likfunc, hyp.lik, y, m, None, inffunc, None, 1)
        Psi_def = vargout[0]  # objective for default init f==m
        if Psi_def < Psi_new:  # if default is better, we use it
            alpha = np.zeros((n, 1))
            f = np.dot(K, alpha) + m
            vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()

    isWneg = np.any(W < 0)  # flag indicating whether we found negative values of W
    it = 0  # this happens for the Student's t likelihood

    while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
        Psi_old = Psi_new
        it += 1
        if isWneg:  # stabilise the Newton direction in case W has negative values
            W = np.maximum(W, 0)  # stabilise the Hessian to guarantee postive definiteness
            tol = 1e-10
            # increase accuracy to also get the derivatives right
            # In Vanhatalo et. al., GPR with Student's t likelihood, NIPS 2009, they use
            # a more conservative strategy then we do being equivalent to 2 lines below.
            # nu  = exp(hyp.lik(1));                    # degree of freedom hyperparameter
            # W  = W + 2/(nu+1)*dlp.^2;                 # add ridge according to Vanhatalo

        sW = np.sqrt(W)
        L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
        b = W * (f - m) + dlp
        dalpha = b - sW * solve_chol(L, sW * np.dot(K, b)) - alpha
        vargout = brentmin(0, smax, Nline, thr, _Psi_line, 4, dalpha, alpha, hyp, K, m, likfunc, y, inffunc)
        s = vargout[0]
        Psi_new = vargout[1]
        Nfun = vargout[2]
        alpha = vargout[3]
        f = vargout[4]
        dlp = vargout[5]
        W = vargout[6]
        isWneg = np.any(W < 0)

    last_alpha = alpha  # remember for next call
    vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 4)
    lp = vargout[0]
    dlp = vargout[1]
    d2lp = vargout[2]
    d3lp = vargout[3]

    W = -d2lp
    isWneg = np.any(W < 0)
    post = postStruct()
    post.alpha = alpha  # return the posterior parameters
    post.sW = np.sqrt(np.abs(W)) * np.sign(W)  # preserve sign in case of negative
    if isWneg:
        [ldA, iA, post.L] = _logdetA(K, W, 3)
        nlZ = np.dot(alpha.T, (f - m)) / 2.0 - lp.sum() + ldA / 2.0
        nlZ = nlZ[0]
    else:
        sW = post.sW
        post.L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
        nlZ = np.dot(alpha.T, (f - m)) / 2.0 + (np.log(np.diag(post.L)) - np.reshape(lp, (lp.shape[0],))).sum()
        nlZ = nlZ[0]

    if nargout > 2:  # do we want derivatives?
        dnlZ = dnlzStruct(hyp)  # allocate space for derivatives
        if isWneg:  # switch between Cholesky and LU decomposition mode
            Z = -post.L  # inv(K+inv(W))
            g = np.atleast_2d((iA * K).sum(axis=1)).T / 2  # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
        else:
            Z = np.tile(sW, (1, n)) * solve_chol(
                post.L, np.diag(np.reshape(sW, (sW.shape[0],)))
            )  # sW*inv(B)*sW=inv(K+inv(W))
            C = np.linalg.solve(post.L.T, np.tile(sW, (1, n)) * K)  # deriv. of ln|B| wrt W
            g = np.atleast_2d((np.diag(K) - (C ** 2).sum(axis=0).T)).T / 2.0  # g = diag(inv(inv(K)+W))/2

        dfhat = g * d3lp  # deriv. of nlZ wrt. fhat
        for ii in range(len(hyp.cov)):  # covariance hypers
            dK = src.Tools.general.feval(covfunc, hyp.cov, x, None, ii)
            dnlZ.cov[ii] = (Z * dK).sum() / 2.0 - np.dot(alpha.T, np.dot(dK, alpha)) / 2.0  # explicit part
            b = np.dot(dK, dlp)
            tmp = np.dot(dfhat.T, b - np.dot(K, np.dot(Z, b)))
            dnlZ.cov[ii] -= np.dot(dfhat.T, b - np.dot(K, np.dot(Z, b)))[0, 0]  # implicit part

        for ii in range(len(hyp.lik)):  # likelihood hypers
            [lp_dhyp, dlp_dhyp, d2lp_dhyp] = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, ii, 3)
            dnlZ.lik[ii] = -np.dot(g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
            b = np.dot(K, dlp_dhyp)
            dnlZ.lik[ii] -= np.dot(dfhat.T, b - np.dot(K, np.dot(Z, b)))[0, 0]  # implicit part

        for ii in range(len(hyp.mean)):  # mean hypers
            dm = src.Tools.general.feval(meanfunc, hyp.mean, x, ii)
            dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
            dnlZ.mean[ii] -= np.dot(dfhat.T, dm - np.dot(K, np.dot(Z, dm)))[0, 0]  # implicit part

        vargout = [post, nlZ, dnlZ]
    else:
        vargout = [post, nlZ]

    return vargout

Example #2

Show file

File: inferences.py Project: ebresch/pyGP_PR

def infFITC_Laplace(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    """ infFITC_Laplace - FITC-Laplace approximation to the posterior Gaussian process. The function is
     equivalent to infLaplace with the covariance function:
    
       Kt = Q + G; G = diag(g); g = diag(K-Q);  Q = Ku' * inv(Kuu + snu2 * eye(nu)) * Ku
    
     where Ku and Kuu are covariances w.r.t. to inducing inputs xu and
     snu2 = sn2/1e6 is the noise of the inducing inputs. We fixed the standard
     deviation of the inducing inputs snu to be a one per mil of the measurement 
     noise's standard deviation sn. In case of a likelihood without noise
     parameter sn2, we simply use snu2 = 1e-6.
    
     The implementation exploits the Woodbury matrix identity
     inv(Kt) = inv(G) - inv(G) * Ku' * inv(Kuu+Ku * inv(G) * Ku') * Ku * inv(G)
     in order to be applicable to large datasets. The computational complexity
     is O(n nu^2) where n is the number of data points x and nu the number of
     inducing inputs in xu.
     The posterior N(f|h,Sigma) is given by h = m+mu with mu = nn + P' * gg and
     Sigma = inv(inv(K)+diag(W)) = diag(d) + P' * R0' * R' * R * R0 * P.
                 
     The function takes a specified covariance function (see kernels.py) and
     likelihood function (likelihoods.py), and is designed to be used with
     gp.py and in conjunction with covFITC. 
    """

    cov1 = covfunc[0]
    if not cov1 == ["kernels.covFITC"]:
        raise Exception("Only covFITC supported.")  # check cov

    tol = 1e-6
    # tolerance for when to stop the Newton iterations
    smax = 2
    Nline = 100
    thr = 1e-4
    # line search parameters
    maxit = 20
    # max number of Newton steps in f

    inffunc = "inferences.infLaplace"

    diagK, Kuu, Ku = src.Tools.general.feval(covfunc, hyp.cov, x)  # evaluate the covariance
    m = src.Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate the mean vector

    if hyp.lik:  # hard coded inducing inputs noise
        sn2 = np.exp(2.0 * hyp.lik[-1])
        snu2 = 1.0e-6 * sn2  # similar to infFITC
    else:
        snu2 = 1.0e-6

    n, D = x.shape
    nu = Kuu.shape[0]

    rot180 = lambda A: np.rot90(np.rot90(A))  # little helper funct
    chol_inv = lambda A: np.linalg.solve(rot180(np.linalg.cholesky(rot180(A))), np.eye(nu))  # chol(inv(A))

    R0 = chol_inv(Kuu + snu2 * np.eye(nu))  # initial R, used for refresh O(nu^3)
    V = np.dot(R0, Ku)
    d0 = diagK - np.array([(V * V).sum(axis=0)]).T  # initial d, needed

    Psi_old = np.inf  # make sure while loop starts by the largest old objective val
    if "last_alpha" not in infFITC_Laplace.__dict__:  # find a good starting point for alpha and f
        alpha = np.zeros((n, 1))
        f = _mvmK(alpha, V, d0) + m  # start at mean if sizes not match
        vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        W = -d2lp
        Psi_new = -lp.sum()
    else:
        alpha = last_alpha
        f = _mvmK(alpha, V, d0) + m  # try last one
        vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        W = -d2lp
        Psi_new = np.dot(alpha.T, (f - m)) / 2.0 - lp.sum()  # objective for last alpha
        vargout = -src.Tools.general.feval(likfunc, hyp.lik, y, m, None, inffunc, None, 1)
        Psi_def = vargout[0]  # objective for default init f==m
        if Psi_def < Psi_new:  # if default is better, we use it
            alpha = np.zeros((n, 1))
            f = _mvmK(alpha, V, d0) + m
            vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()

    isWneg = np.any(W < 0)  # flag indicating whether we found negative values of W
    it = 0  # this happens for the Student's t likelihood

    while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
        Psi_old = Psi_new
        it += 1
        if isWneg:  # stabilise the Newton direction in case W has negative values
            W = np.maximum(W, 0)  # stabilise the Hessian to guarantee postive definiteness
            tol = 1e-8
            # increase accuracy to also get the derivatives right
            # In Vanhatalo et. al., GPR with Student's t likelihood, NIPS 2009, they use
            # a more conservative strategy then we do being equivalent to 2 lines below.
            # nu  = exp(hyp.lik(1));                  # degree of freedom hyperparameter
            # W  = W + 2/(nu+1)*dlp.^2;               # add ridge according to Vanhatalo

        b = W * (f - m) + dlp
        dd = 1 / (1 + W * d0)
        RV = np.dot(chol_inv(np.eye(nu) + np.dot(V * np.tile((W * dd).T, (nu, 1)), V.T)), V)
        dalpha = dd * b - (W * dd) * np.dot(RV.T, np.dot(RV, (dd * b))) - alpha  # Newt dir + line search
        vargout = brentmin(0, smax, Nline, thr, _Psi_lineFITC, 4, dalpha, alpha, hyp, V, d0, m, likfunc, y, inffunc)
        s = vargout[0]
        Psi_new = vargout[1]
        Nfun = vargout[2]
        alpha = vargout[3]
        f = vargout[4]
        dlp = vargout[5]
        W = vargout[6]

        isWneg = np.any(W < 0)

    last_alpha = alpha  # remember for next call
    vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 4)
    lp = vargout[0]
    dlp = vargout[1]
    d2lp = vargout[2]
    d3lp = vargout[3]

    W = -d2lp
    isWneg = np.any(W < 0)
    post = postStruct()
    post.alpha = np.dot(R0.T, np.dot(V, alpha))  # return the posterior parameters
    post.sW = np.sqrt(np.abs(W)) * np.sign(W)  # preserve sign in case of negative
    dd = 1 / (1 + d0 * W)  # temporary variable O(n)
    A = np.eye(nu) + np.dot(V * np.tile((W * dd).T, (nu, 1)), V.T)  # temporary variable O(n*nu^2)
    R0tV = np.dot(R0.T, V)
    B = R0tV * np.tile((W * dd).T, (nu, 1))  # temporary variables O(n*nu^2)
    post.L = -np.dot(B, R0tV.T)  # L = -R0'*V*inv(Kt+diag(1./ttau))*V'*R0, first part
    if np.any(1 + d0 * W < 0):
        # B = np.dot(B,V.T); post.L += bp.dot(np.dot(B,np.inv(A)),B.T)
        # nlZ = np.nan; dnlZ = struct('cov',0*hyp.cov, 'mean',0*hyp.mean, 'lik',0*hyp.lik);
        raise Exception("W is too negative; nlZ and dnlZ cannot be computed.")

    nlZ = (
        np.dot(alpha.T, (f - m)) / 2.0
        - lp.sum()
        - np.log(dd).sum() / 2.0
        + np.log(np.diag(np.linalg.cholesky(A).T)).sum()
    )
    RV = np.dot(chol_inv(A), V)
    RVdd = RV * np.tile((W * dd).T, (nu, 1))  # RVdd needed for dnlZ
    B = np.dot(B, RV.T)
    post.L += np.dot(B, B.T)

    if nargout > 2:  # do we want derivatives?
        dnlZ = dnlzStruct(hyp)  # allocate space for derivatives
        [d, P, R] = _fitcRefresh(d0, Ku, R0, V, W)  # g = diag(inv(inv(K)+W))/2
        g = d / 2 + 0.5 * np.atleast_2d((np.dot(np.dot(R, R0), P) ** 2).sum(axis=0)).T
        t = W / (1 + W * d0)

        dfhat = g * d3lp  # deriv. of nlZ wrt. fhat: dfhat=diag(inv(inv(K)+W)).*d3lp/2
        for ii in range(len(hyp.cov)):  # covariance hypers
            ddiagK, dKuu, dKu = src.Tools.general.feval(covfunc, hyp.cov, x, None, ii)  # eval cov derivatives
            dA = 2.0 * dKu.T - np.dot(R0tV.T, dKuu)  # dQ = dA*R0tV
            w = np.atleast_2d((dA * R0tV.T).sum(axis=1)).T
            v = ddiagK - w  # w = diag(dQ); v = diag(dK)-diag(dQ);
            dnlZ.cov[ii] = np.dot(ddiagK.T, t) - np.dot((RVdd * RVdd).sum(axis=0), v)  # explicit part
            dnlZ.cov[ii] -= (np.dot(RVdd, dA) * np.dot(RVdd, R0tV.T)).sum()  # explicit part
            dnlZ.cov[ii] = (
                0.5 * dnlZ.cov[ii] - np.dot(alpha.T, np.dot(dA, np.dot(R0tV, alpha)) + v * alpha) / 2.0
            )  # explicit
            b = np.dot(dA, np.dot(R0tV, dlp)) + v * dlp  # b-K*(Z*b) = inv(eye(n)+K*diag(W))*b
            KZb = _mvmK(_mvmZ(b, RVdd, t), V, d0)
            dnlZ.cov[ii] -= np.dot(dfhat.T, (b - KZb))  # implicit part

        for ii in range(len(hyp.lik)):  # likelihood hypers
            vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, ii, 3)
            lp_dhyp = vargout[0]
            dlp_dhyp = vargout[1]
            d2lp_dhyp = vargout[2]
            dnlZ.lik[ii] = -np.dot(g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
            b = _mvmK(dlp_dhyp, V, d0)  # implicit part
            dnlZ.lik[ii] -= np.dot(dfhat.T, b - _mvmK(_mvmZ(b, RVdd, t), V, d0))
            if ii == len(hyp.lik) - 1:
                # since snu2 is a fixed fraction of sn2, there is a covariance-like term in the derivative as well
                snu = np.sqrt(snu2)
                T = chol_inv(Kuu + snu2 * np.eye(nu))
                T = np.dot(T.T, np.dot(T, snu * Ku))
                t = np.array([(T * T).sum(axis=0)]).T
                z = np.dot(alpha.T, np.dot(T.T, np.dot(T, alpha)) - t * alpha) - np.dot(
                    np.array([(RVdd * RVdd).sum(axis=0)]), t
                )
                z += (np.dot(RVdd, T.T) ** 2).sum()
                b = (t * dlp - np.dot(T.T, np.dot(T, dlp))) / 2.0
                KZb = _mvmK(_mvmZ(b, RVdd, t), V, d0)
                z -= np.dot(dfhat.T, b - KZb)
                dnlZ.lik[ii] += z

        for ii in range(len(hyp.mean)):  # mean hypers
            dm = src.Tools.general.feval(meanfunc, hyp.mean, x, ii)
            dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
            Zdm = _mvmZ(dm, RVdd, t)
            dnlZ.mean[ii] -= np.dot(dfhat.T, (dm - _mvmK(Zdm, V, d0)))  # implicit part

        vargout = [post, nlZ[0, 0], dnlZ]
    else:
        vargout = [post, nlZ[0, 0]]

    return vargout