Beispiel #1
0
    def give_covariance(self, theta):
        """
        :param theta: hypers
        """
        if self.mode == "Full":
            Kp = self.kernel.cov_func(theta, self.XXp, noise=False)
            Kpp = self.kernel.cov_func(theta, self.XXpp, noise=False)
            Ky = self.kernel.cov_func(theta, self.XX, noise=True)
            L = np.linalg.cholesky(Ky)
            Linv = np.linalg.inv(L)
            LinvKp = np.dot(Linv, Kp)
            return Kpp - np.dot(LinvKp.T, LinvKp)
        elif self.mode == "RR":
            coeffs = self.give_RR_covcoeffs(theta)
            return np.dot(self.Phip, np.dot(coeffs, self.Phip.T))
        elif self.mode == "kron":
            # get the Kronecker matrices
            D = self.XX.shape[0]
            Kp = []
            Kpp = []
            K = []
            for i in xrange(D):
                print theta
                Kp.append(self.kernel[0].cov_func(theta[i],
                                                  self.XXp[i],
                                                  noise=False))
                Kpp.append(self.kernel[0].cov_func(theta[i],
                                                   self.XXpp[i],
                                                   noise=False))
                K.append(self.kernel[0].cov_func(theta[i],
                                                 self.XX[i],
                                                 noise=False))
            Kp = np.asarray(Kp)
            Kpp = np.asarray(Kpp)
            K = np.asarray(K)
            Qs, Lambdas = kt.kron_eig(K)
            QsT = kt.kron_transpose(Qs)
            KpT = kt.kron_transpose(Kp)
            A = kt.kron_matmat(QsT[::-1], KpT)
            Lambda = kt.kron_diag(Lambdas)
            if self.Sigmay is not None:
                Lambda += self.Sigmay
            A = A / Lambda[:, None]  # dot with diagonal inverse
            A = kt.kron_matmat(Qs[::-1], A)
            return kt.kron_kron(Kpp) - kt.kron_kron(Kp).dot(A)

        else:
            raise Exception('Mode %s not supported yet' % self.mode)
Beispiel #2
0
 def __init__(self, x, theta, kernels=["sqexp"], precond_mode="Full"):
     self.D = x.shape[0]
     self.N = kt.kron_N(x)
     self.x = x
     # get individual shapes
     self.Ds = np.zeros(self.D, dtype=np.int8)
     self.precond_mode = precond_mode
     thetan = np.zeros([self.D, 3])
     thetan[:, 0] = theta[0]
     thetan[:, -1] = theta[-1]
     for i in xrange(
             self.D
     ):  # this is how many length scales will be involved in te problem
         self.Ds[i] = self.x[i].shape[0]
         thetan[i, 1] = theta[
             i +
             1]  # assuming we set up the theta vector as [[sigmaf, l_1, sigman], [sigmaf, l_2, ..., sigman]]
     self.theta = theta
     self.eps = self.theta[-1]**2  # default nugget
     # set up kernels for each dimension
     self.kernels = []
     self.Phis = np.empty(self.D, dtype=object)
     self.Lambdas = np.empty(self.D, dtype=object)
     for i, k in enumerate(kernels):
         if k == "sqexp":
             self.kernels.append(
                 expsq.sqexp_op(
                     x[i],
                     thetan[i],
                     np.prod(np.delete(self.Ds, i)),
                     wisdom_file=
                     '/home/landman/Projects/GP/fft_wisdom/test.wisdom.npy',
                     reset_wisdom=True))
             if self.precond_mode == "Full":
                 self.kernels[i].set_eigs(
                 )  # default nugget, reset if it changes
                 self.Phis[i] = self.kernels[i].Q
                 self.Lambdas[i] = self.kernels[i].Lambda_full
             elif self.precond_mode == "RR":
                 self.kernels[i].set_RR_eigs(nugget=theta[-1]**2)
                 self.Phis[i] = self.kernels[i].Phi
                 self.Lambdas[i] = self.kernels[
                     i].S  # approximate power spectrum
             else:
                 raise Exception("Unsupported precond mode %s" %
                                 self.precond_mode)
         else:
             raise Exception("Unsupported kernel %s" % k)
     self.PhisT = kt.kron_transpose(self.Phis)
     self.shape = (self.N, self.N)
     self.dtype = np.float64
Beispiel #3
0
 def give_mean(self, theta):
     """
     Computes the posterior mean function
     :param theta: hypers
     """
     if self.mode == "Full":
         Ky = self.kernel.cov_func(theta, self.XX, noise=True)
         Kp = self.kernel.cov_func(theta, self.XXp, noise=False)
         L = np.linalg.cholesky(Ky)
         Linv = np.linalg.inv(L)
         LinvKp = np.dot(Linv, Kp)
         return self.fp + np.dot(LinvKp.T, np.dot(Linv, self.yDat))
     elif self.mode == "RR":
         fcoeffs = self.give_RR_coeffs(theta)
         return self.fp + np.dot(self.Phip, fcoeffs)
     elif self.mode == "kron":
         D = self.XX.shape[0]
         # broadcast theta (sigmaf and sigman is a shared hyperparameter but easiest to deal with this way)
         thetan = np.zeros([D, 3])
         thetan[:, 0] = theta[0]
         thetan[:, -1] = theta[-1]
         for i in xrange(D): # this is how many length scales will be involved in te problem
             thetan[i, 1] = theta[i+1] # assuming we set up the theta vector as [[sigmaf, l_1, sigman], [sigmaf, l_2, ..., sigman]]
         # get the Kronecker matrices
         Kp = []
         K = []
         for i in xrange(D):
             Kp.append((self.kernel[i].cov_func(thetan[i], self.XXp[i], noise=False)).T)
             K.append(self.kernel[i].cov_func(thetan[i], self.XX[i], noise=False))
         Kp = np.asarray(Kp)
         K = np.asarray(K)
         Lambdas, Qs = kt.kron_eig(K)
         QsT = kt.kron_transpose(Qs)
         alpha = kt.kron_matvec(QsT[::-1], self.yDat)
         Lambda = kt.kron_diag(Lambdas)
         if self.Sigmay is not None:
             Lambda += theta[-1]**2*kt.kron_diag(self.Sigmay)  # absorb weights into Lambdas
         else:
             Lambda += theta[-1] ** 2 * np.ones(self.N)
         alpha = alpha/Lambda  # dot with diagonal inverse
         alpha = kt.kron_matvec(Qs[::-1], alpha)
         return kt.kron_tensorvec(Kp[::-1], alpha)
     else:
         raise Exception('Mode %s not supported yet'%self.mode)
Beispiel #4
0
    def logL(self, theta):
        """
        Computes the negative log marginal posterior
        :param theta: hypers 
        :return logp, dlogp: the negative log marginal posterior and its derivative w.r.t. hypers
        """
        if self.mode == "Full":
            # tmp is Ky
            Ky = self.kernel.cov_func(theta, self.XX, noise=True)
            # tmp is L
            try:
                L = np.linalg.cholesky(Ky)
            except:
                print "Had to add jitter, theta = ", theta
                F = True
                while F:
                    jit = 1e-6
                    try:
                        L = np.linalg.cholesky(Ky + jit * np.eye(self.N))
                        F = False
                    except:
                        jit *= 10.0
                        F = True
            try:
                tmp = np.linalg.cholesky(Ky)
            except:
                logp = 1.0e8
                dlogp = np.ones(theta.size) * 1.0e8
                return logp, dlogp
            detK = 2.0 * np.sum(np.log(np.diag(tmp)))
            # tmp is Linv
            tmp = np.linalg.inv(tmp)
            # tmp2 is Linvy
            tmp2 = np.dot(tmp, self.yDat)
            logp = np.dot(tmp2.conj().T,
                          tmp2).real / 2.0 + detK / 2.0 + self.N * np.log(
                              2 * np.pi) / 2.0
            nhypers = theta.size
            dlogp = np.zeros(nhypers)
            # tmp is Kinv
            tmp = np.dot(tmp.T, tmp)
            # tmp2 becomes Kinvy
            tmp2 = np.reshape(np.dot(tmp, self.yDat), (self.N, 1))
            # tmp2 becomes aaT
            tmp2 = (np.dot(tmp2, tmp2.conj().T)).real
            # tmp2 becomes Kinv - aaT
            tmp2 = tmp - tmp2
            K = self.kernel.cov_func(theta, self.XX, noise=False)
            dKdtheta = self.kernel.dcov_func(theta, self.XX, K, mode=0)
            dlogp[0] = np.sum(np.einsum(
                'ij,ji->i', tmp2,
                dKdtheta)) / 2.0  #computes only the diagonal matrix product
            dKdtheta = self.kernel.dcov_func(theta, self.XX, K, mode=1)
            dlogp[1] = np.sum(np.einsum('ij,ji->i', tmp2, dKdtheta)) / 2.0
            dKdtheta = self.kernel.dcov_func(theta, self.XX, K, mode=2)
            dlogp[2] = np.sum(np.einsum('ij,ji->i', tmp2, dKdtheta)) / 2.0
            return logp, dlogp
        elif self.mode == "RR":
            S = self.kernel.spectral_density(theta, self.s)
            if np.any(S < 1e-13):
                I = np.argwhere(S < 1e-13)
                S[I] += 1.0e-13
            Lambdainv = np.diag(1.0 / S)
            Z = self.PhiTPhi + theta[2]**2 * Lambdainv
            try:
                L = np.linalg.cholesky(Z)
            except:
                print "Had to add jitter, theta = ", theta
                F = True
                while F:
                    jit = 1e-6
                    try:
                        L = np.linalg.cholesky(Z + jit * np.eye(self.M))
                        F = False
                    except:
                        jit *= 10.0
                        F = True
            Linv = np.linalg.inv(L)
            Zinv = np.dot(Linv.T, Linv)
            logdetZ = 2.0 * np.sum(np.log(np.diag(L)))
            # Get the log term
            logQ = (self.N - self.M) * np.log(theta[2]**2) + logdetZ + np.sum(
                np.log(S))
            # Get the quadratic term
            PhiTy = np.dot(self.Phi.T, self.yDat)
            ZinvPhiTy = np.dot(Zinv, PhiTy)
            yTQinvy = (self.yTy - np.dot(PhiTy.T, ZinvPhiTy)) / theta[2]**2
            # Get their derivatives
            dlogQdtheta = np.zeros(theta.size)
            dyTQinvydtheta = np.zeros(theta.size)
            for i in xrange(theta.size - 1):
                dSdtheta = self.kernel.dspectral_density(theta,
                                                         S,
                                                         self.s,
                                                         mode=i)
                dlogQdtheta[i] = np.sum(dSdtheta / S) - theta[2]**2 * np.sum(
                    dSdtheta / S * np.diag(Zinv) / S)
                dyTQinvydtheta[i] = -np.dot(
                    ZinvPhiTy.T, dSdtheta / S * ZinvPhiTy.squeeze() / S)
            # Get derivatives w.r.t. sigma_n
            dlogQdtheta[2] = 2 * theta[2] * (
                (self.N - self.M) / theta[2]**2 + np.sum(np.diag(Zinv) / S))
            dyTQinvydtheta[2] = 2 * (np.dot(ZinvPhiTy.T,
                                            ZinvPhiTy.squeeze() / S) -
                                     yTQinvy) / theta[2]

            logp = (yTQinvy + logQ + self.N * np.log(2 * np.pi)) / 2.0
            dlogp = (dlogQdtheta + dyTQinvydtheta) / 2
            return logp, dlogp
        elif self.mode == "kron":
            # get dims
            D = self.XX.shape[0]
            # broadcast theta (sigmaf and sigman is a shared hyperparameter but easiest to deal with this way)
            thetan = np.zeros([D, 3])
            thetan[0, 0] = theta[0]
            thetan[1::, 0] = 1.0
            thetan[:, -1] = theta[-1]
            for i in xrange(
                    D
            ):  # this is how many length scales will be involved in the problem
                thetan[i, 1] = theta[
                    i +
                    1]  # assuming we set up the theta vector as [[sigmaf, l_1, sigman], [sigmaf, l_2, ..., sigman]]
            # get the Kronecker matrices
            K = []
            for i in xrange(D):
                K.append(self.kernel[i].cov_func(thetan[i],
                                                 self.XX[i],
                                                 noise=False))
            K = np.asarray(K)
            # do eigen-decomposition
            Lambdas, Qs = kt.kron_eig(K)
            QsT = kt.kron_transpose(Qs)
            # get alpha vector
            alpha = kt.kron_matvec(QsT, self.yDat)
            Lambda = kt.kron_diag(Lambdas)
            if self.Sigmay is not None:
                Lambda += theta[-1]**2 * kt.kron_diag(
                    self.Sigmay)  # absorb weights into Lambdas
            else:
                Lambda += theta[-1]**2 * np.ones(self.N)
            alpha = alpha / Lambda  # same as matrix product with inverse of diagonal
            alpha = kt.kron_matvec(Qs, alpha)
            # get negative log marginal likelihood
            logp = 0.5 * (self.yDat.T.dot(alpha) + np.sum(np.log(Lambda)) +
                          self.N * np.log(2.0 * np.pi))
            # get derivatives
            dKdtheta = []
            # first w.r.t. sigmaf which only needs to be done once and will have same shape as K
            dKdtheta.append(
                self.kernel[0].dcov_func(thetan[0], self.XX, K, mode='sigmaf')
            )  # need to pass theta for kernel 0 since that has sigmaf
            # now get w.r.t. length scales
            for i in xrange(D):  # one length scale for each dimension
                dKdtheta.append(self.kernel[i].dcov_func(
                    thetan[i], self.XX[i], K[i],
                    mode='l'))  # here it does matter, will be of shape K[i]
            # finally get deriv w.r.t sigman (also only need to do this once)
            dKdtheta.append(
                self.kernel[0].dcov_func(thetan[0], self.XX, K, mode='sigman')
            )  # ditto remark for theta, will be of shape Sigmay
            dKdtheta = np.asarray(dKdtheta)  # should contain Ntheta arrays
            # compute dZdtheta
            Ntheta = theta.size
            dlogp = np.zeros(Ntheta)
            # first get it for sigmaf
            gamma = []
            for i in xrange(D):
                tmp = dKdtheta[0][i].dot(Qs[i])
                gamma.append(np.einsum('ij,ji->i', Qs[i].T, tmp))
            gamma = np.asarray(gamma)
            gamma = kt.kron_diag(gamma)
            kappa = kt.kron_matvec(dKdtheta[0], alpha)
            dlogp[0] = -self.get_dZdthetai(alpha, kappa, Lambda, gamma)
            # now get it for the length scales
            for i in xrange(1, D + 1):  # i labels length scales
                # compute the gammas = diag(Qd.T dKddthetai Qd)
                gamma = []
                for j in xrange(D):  # j labels dimensions
                    if j == i - 1:  # dimension corresponding to l_i is always one less than the index of the length scale
                        tmp = dKdtheta[i].dot(
                            Qs[j])  # this is the dKdtheta corresponding to l_i
                    else:
                        tmp = K[j].dot(Qs[j])
                    gamma.append(np.einsum(
                        'ij,ji->i', Qs[j].T,
                        tmp))  # computes only the diagonal of the product
                gamma = np.asarray(gamma)
                gamma = kt.kron_diag(
                    gamma)  # exploiting diagonal property of Kronecker product
                dKdtheta_tmp = K.copy()
                dKdtheta_tmp[i - 1] = dKdtheta[
                    i]  # can be made more efficient, just set for clarity
                kappa = kt.kron_matvec(dKdtheta_tmp, alpha)
                dlogp[i] = -self.get_dZdthetai(alpha, kappa, Lambda, gamma)

            # finally get it for sigman
            gamma = []
            for i in xrange(D):
                tmp = dKdtheta[-1][i][:, None] * Qs[i]
                gamma.append(np.einsum('ij,ji->i', Qs[i].T, tmp))
            gamma = np.asarray(gamma)
            gamma = kt.kron_diag(gamma)
            kappa = kt.kron_diag(dKdtheta[-1]) * alpha
            dlogp[-1] = -self.get_dZdthetai(alpha, kappa, Lambda, gamma)
            # logp2, dlogp2 = self.get_full_derivs(K, dKdtheta, alpha, theta[-1]**2*np.eye(self.N), self.yDat)
            # print logp, logp2
            # print dlogp
            # print dlogp2

            return logp, dlogp
        else:
            raise Exception('Mode %s not supported yet' % self.mode)
Beispiel #5
0
def test_transpose(A, K):
    AT = kt.kron_transpose(A)
    res1 = kt.kron_kron(AT)
    res2 = K.T
    compare(res1, res2, "Transpose")