Ejemplo n.º 1
0
def infExact(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    # Exact inference for a GP with Gaussian likelihood. Compute a parametrization
    # of the posterior, the negative log marginal likelihood and its derivatives
    # w.r.t. the hyperparameters. See also "help infMethods".
    #
    # Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2013-01-21
    #
    # See also INFMETHODS.M.

    if not (likfunc[0] == 'lik.likGauss'):                   # NOTE: no explicit call to likGauss
        raise Exception ('Exact inference only possible with Gaussian likelihood')
    #end
 
    n, D = x.shape
    K = Tools.general.feval(covfunc, hyp.cov, x)           # evaluate covariance matrix
    m = Tools.general.feval(meanfunc, hyp.mean, x)         # evaluate mean vector

    sn2   = np.exp(2.*hyp.lik)                            # noise variance of likGauss
    L     = np.linalg.cholesky(K/sn2+np.eye(n)).T          # Cholesky factor of covariance with noise
    alpha = solve_chol(L,y-m)/sn2

    post = postStruct()

    post.alpha = alpha                                          # return the posterior parameters
    post.sW    = np.ones((n,1))/np.sqrt(sn2)                     # sqrt of noise precision vector
    post.L     = L                                               # L = chol(eye(n)+sW*sW'.*K)

    if nargout>1:                                                # do we want the marginal likelihood?
        nlZ = np.dot((y-m).T,alpha/2) + np.log(np.diag(L)).sum() + n*np.log(2*np.pi*sn2)/2. # -log marg lik
        if nargout>2:                                            # do we want derivatives?
            dnlZ = deepcopy(hyp)                                 # allocate space for derivatives
            Q = solve_chol(L,np.eye(n))/sn2 - np.dot(alpha,alpha.T) # precompute for convenience
            for ii in range(len(hyp.cov)):
                dnlZ.cov[ii] = (Q*Tools.general.feval(covfunc, hyp.cov, x, None, ii)).sum()/2.
            #end
            dnlZ.lik = sn2*np.trace(Q)
            for ii in range(len(hyp.mean)): 
                dnlZ.mean[ii] = np.dot(-Tools.general.feval(meanfunc, hyp.mean, x, ii).T,alpha)
            #end
            return post, nlZ, dnlZ
        #end
        return post, nlZ
    #end
    return post
Ejemplo n.º 2
0
def infFITC(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    # FITC approximation to the posterior Gaussian process. The function is
    # equivalent to infExact with the covariance function:
    #   Kt = Q + G; G = diag(g); g = diag(K-Q);  Q = Ku'*inv(Quu)*Ku;
    # where Ku and Kuu are covariances w.r.t. to inducing inputs xu, snu2 = sn2/1e6
    # is the noise of the inducing inputs and Quu = Kuu + snu2*eye(nu).
    # We fixed the standard deviation of the inducing inputs snu to be a one per mil
    # of the measurement noise's standard deviation sn.
    # The implementation exploits the Woodbury matrix identity
    #   inv(Kt) = inv(G) - inv(G)*V'*inv(eye(nu)+V*inv(G)*V')*V*inv(G)
    # in order to be applicable to large datasets. The computational complexity
    # is O(n nu^2) where n is the number of data points x and nu the number of
    # inducing inputs in xu.
    # The function takes a specified covariance function (see covFunction.m) and
    # likelihood function (see likFunction.m), and is designed to be used with
    # gp.m and in conjunction with covFITC and likGauss. 
    #
    # Copyright (c) by Ed Snelson, Carl Edward Rasmussen 
    #                                               and Hannes Nickisch, 2012-11-20.
    #
    # See also INFMETHODS.M, COVFITC.M.

    if not (likfunc[0] == 'lik.likGauss'):                   # NOTE: no explicit call to likGauss
        raise Exception ('Exact inference only possible with Gaussian likelihood')
    #end

    cov1 = covfunc[0] 
    if not cov1 == ['kernels.covFITC']:
        raise Exception('Only covFITC supported.') # check cov
    #end

    diagK,Kuu,Ku = Tools.general.feval(covfunc, hyp.cov, x)    # evaluate covariance matrix
    m            = Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate mean vector
    n, D = x.shape; nu = Kuu.shape[0]

    sn2   = np.exp(2*hyp.lik)                              # noise variance of likGauss
    snu2  = 1.e-6*sn2                              # hard coded inducing inputs noise
    Luu   = np.linalg.cholesky(Kuu+snu2*np.eye(nu)).T  # Kuu + snu2*I = Luu'*Luu
    V     = np.linalg.solve(Luu.T,Ku)               # V = inv(Luu')*Ku => V'*V = Q
    g_sn2 = diagK + sn2 - np.array([(V*V).sum(axis=0)]).T # g + sn2 = diag(K) + sn2 - diag(Q)
    Lu    = np.linalg.cholesky(np.eye(nu) + np.dot(V/np.tile(g_sn2.T,(nu,1)),V.T)).T  # Lu'*Lu=I+V*diag(1/g_sn2)*V'
    r     = (y-m)/np.sqrt(g_sn2)
    be    = np.linalg.solve(Lu.T,np.dot(V,r/np.sqrt(g_sn2)))
    iKuu  = solve_chol(Luu,np.eye(nu))              # inv(Kuu + snu2*I) = iKuu

    post = postStruct()

    post.alpha = np.linalg.solve(Luu,np.linalg.solve(Lu,be)) # return the posterior parameters
    post.L  = solve_chol(np.dot(Lu,Luu),np.eye(nu)) - iKuu   # Sigma-inv(Kuu)
    post.sW = np.ones((n,1))/np.sqrt(sn2)                   # unused for FITC prediction  with gp.m

    if nargout>1:                                # do we want the marginal likelihood
        nlZ = np.log(np.diag(Lu)).sum() + np.log(g_sn2).sum() + n*np.log(2*np.pi) + np.dot(r.T,r) - np.dot(be.T,be)/2. 
        if nargout>2:                                    # do we want derivatives?
            dnlZ = deepcopy(hyp)                        # allocate space for derivatives
            al = r/np.sqrt(g_sn2) - np.dot(V.T,np.linalg.solve(Lu,be))/g_sn2 # al = (Kt+sn2*eye(n))\y
            B = np.dot(iKuu,Ku); w = np.dot(B,al)
            W = np.linalg.solve(Lu.T,V/np.tile(g_sn2.T,(nu,1)))
            for ii in range(len(hyp.cov)):
                [ddiagKi,dKuui,dKui] = feval(covfunc, hyp.cov, x, None, ii)  # eval cov deriv
                R = 2.*dKui-np.dot(dKuui,B); v = ddiagKi - np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv
                dnlZ.cov[ii] = ( np.dot(ddiagKi.T,1./g_sn2) + np.dot(w.T,(np.dot(dKuui,w)-2.*np.dot(dKui,al)) - np.dot(al.T,(v*al)) \
                                 - np.array([(W*W).sum(axis=0)])*v - (np.dot(R,W.T)*np.dot(B,W.T)).sum()) )/2.
            #end  
            dnlZ.lik = sn2*((1./g_sn2).sum() - (np.array([(W*W).sum(axis=0)])).sum() - np.dot(al.T,al))
            # since snu2 is a fixed fraction of sn2, there is a covariance-like term in
            # the derivative as well
            dKuui = 2*snu2; R = -np.dot(dKuui,B); v = -np.array([(R*B).sum(axis=0)]).T # diag part of cov deriv
            dnlZ.lik += (np.dot(w.T,np.dot(dKuui,w)) -np.dot(al.T,(v*al)) \
                                 - np.array([(W*W).sum(axis=0)])*v - (np.dot(R,W.T)*np.dot(B,W.T)).sum() )/2. 
            for ii in range(len(hyp.mean)):
                dnlZ.mean[ii] = np.dot(-Tools.general.feval(meanfunc, hyp.mean, x, ii).T,*al)
            #end
        return post,nlZ,dnlZ
        #end
        return post,nlZ
    #end
    return post
Ejemplo n.º 3
0
def infEP(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    #function [post nlZ dnlZ] = infEP(hyp, mean, cov, lik, x, y)

    # Expectation Propagation approximation to the posterior Gaussian Process.
    # The function takes a specified covariance function (see covFunction.m) and
    # likelihood function (see likFunction.m), and is designed to be used with
    # gp.m. See also infFunctions.m. In the EP algorithm, the sites are 
    # updated in random order, for better performance when cases are ordered
    # according to the targets.
    #
    # Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch 2010-02-25.
    #
    # See also INFMETHODS.M.

    tol = 1e-4; max_sweep = 10; min_sweep = 2 # tolerance to stop EP iterations

    inffunc = 'inf.infEP'
    n = x.shape[0]
    K = Tools.general.feval(covfunc, hyp.cov, x)    # evaluate the covariance matrix
    m = Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate the mean vector

    # A note on naming: variables are given short but descriptive names in 
    # accordance with Rasmussen & Williams "GPs for Machine Learning" (2006): mu
    # and s2 are mean and variance, nu and tau are natural parameters. A leading t
    # means tilde, a subscript _ni means "not i" (for cavity parameters), or _n
    # for a vector of cavity parameters.

    # marginal likelihood for ttau = tnu = zeros(n,1); equals n*log(2) for likCum*
    nlZ0 = -Tools.general.feval(likfunc, hyp.lik, y, m, np.reshape(np.diag(K),(np.diag(K).shape[0],1)), inffunc).sum()
    if "last_ttau" not in infEP.__dict__:   # find starting point for tilde parameters
        ttau  = np.zeros((n,1))             # initialize to zero if we have no better guess
        tnu   = np.zeros((n,1))
        Sigma = K                           # initialize Sigma and mu, the parameters of ..
        mu    = np.zeros((n,1))             # .. the Gaussian posterior approximation
        nlZ   = nlZ0
    else:
        ttau = infEP.last_ttau              # try the tilde values from previous call
        tnu  = infEP.last_tnu
        [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, likfunc, hyp, m, inffunc)
        if nlZ > nlZ0:                                # if zero is better ..
            ttau = np.zeros((n,1))                    # .. then initialize with zero instead
            tnu  = np.zeros((n,1)) 
            Sigma = K                              # initialize Sigma and mu, the parameters of ..
            mu = np.zeros((n,1))                   # .. the Gaussian posterior approximation
            nlZ = nlZ0
        #end
    #end

    nlZ_old = np.inf; sweep = 0               # converged, max. sweeps or min. sweeps?
    while (np.abs(nlZ-nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep):
        nlZ_old = nlZ; sweep += 1
        rperm = range(n)#randperm(n)
        for ii in rperm:       # iterate EP updates (in random order) over examples
            tau_ni = 1/Sigma[ii,ii] - ttau[ii]      #  first find the cavity distribution ..
            nu_ni  = mu[ii]/Sigma[ii,ii] + m[ii]*tau_ni - tnu[ii]    # .. params tau_ni and nu_ni
            # compute the desired derivatives of the indivdual log partition function
            vargout = Tools.general.feval(likfunc, hyp.lik, y[ii], nu_ni/tau_ni, 1/tau_ni, inffunc, None, 3)
            lZ = vargout[0]; dlZ = vargout[1]; d2lZ = vargout[2] 
            ttau_old = copy(ttau[ii])   # then find the new tilde parameters, keep copy of old
    
            ttau[ii] = -d2lZ  /(1.+d2lZ/tau_ni)
            ttau[ii] = max(ttau[ii],0) # enforce positivity i.e. lower bound ttau by zero
            tnu[ii]  = ( dlZ + (m[ii]-nu_ni/tau_ni)*d2lZ )/(1.+d2lZ/tau_ni)
    
            ds2 = ttau[ii] - ttau_old                   # finally rank-1 update Sigma ..
            si  = np.reshape(Sigma[:,ii],(Sigma.shape[0],1))
            Sigma = Sigma - ds2/(1.+ds2*si[ii])*np.dot(si,si.T)   # takes 70# of total time
            mu = np.dot(Sigma,tnu)                                # .. and recompute mu
        #end
        # recompute since repeated rank-one updates can destroy numerical precision
        [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, likfunc, hyp, m, inffunc)
    #end

    if sweep == max_sweep:
        raise Exception('maximum number of sweeps reached in function infEP')
    #end

    infEP.last_ttau = ttau; infEP.last_tnu = tnu      # remember for next call

    sW = np.sqrt(ttau); alpha = tnu-sW*solve_chol(L,sW*np.dot(K,tnu))

    post = postStruct()

    post.alpha = alpha                                # return the posterior params
    post.sW    = sW
    post.L     = L

    if nargout>2:                                           # do we want derivatives?
        dnlZ = deepcopy(hyp)                               # allocate space for derivatives
        ssi  = np.sqrt(ttau)
        V = np.linalg.solve(L.T,np.tile(ssi,(1,n))*K)
        Sigma = K - np.dot(V.T,V)
        mu = np.dot(Sigma,tnu)
        Dsigma = np.reshape(np.diag(Sigma),(np.diag(Sigma).shape[0],1))
        tau_n = 1/Dsigma-ttau                    # compute the log marginal likelihood
        nu_n  = mu/Dsigma-tnu                    # vectors of cavity parameters

        F = np.dot(alpha,alpha.T) - np.tile(sW,(1,n))* \
            solve_chol(L,np.reshape(np.diag(sW),(np.diag(sW).shape[0],1)))   # covariance hypers
        for jj in range(len(hyp.cov)):
            dK = Tools.general.feval(covfunc, hyp.cov, x, None, jj)
            dnlZ.cov[jj] = -(F*dK).sum()/2.
        #end
        for ii in range(len(hyp.lik)):
            dlik = Tools.general.feval(likfunc, hyp.lik, y, nu_n/tau_n+m, 1/tau_n, inffunc, ii)
            dnlZ.lik[ii] = -dlik.sum()
        #end
        [junk,dlZ] = Tools.general.feval(likfunc, hyp.lik, y, nu_n/tau_n+m, 1/tau_n, inffunc) # mean hyps
        for ii in range(len(hyp.mean)):
            dm = Tools.general.feval(meanfunc, hyp.mean, x, ii)
            dnlZ.mean[ii] = -np.dot(dlZ.T,dm)
        #end
        vargout = [post, nlZ, dnlZ]
    else:
        vargout = [post, nlZ]
    #end
    return vargout