Esempio n. 1
0
    def bound(self):
        """
        Compute the lower bound on the marginal likelihood (conditioned on the
        GP hyper parameters).
        """
        GP_bound = 0.0

        for i, kern in enumerate(self.kern):
            K = kern.K(self.X)
            B_inv = np.diag(1. / ((self.phi[:, i] + 1e-6) / self.variance))

            # Make more stable using cholesky factorization:
            Bi, LB, LBi, Blogdet = pdinv(K+B_inv)

            # Data fit
            # alpha = linalg.cho_solve(linalg.cho_factor(K + B_inv), self.Y)
            # GP_bound += -0.5 * np.dot(self.Y.T, alpha).trace()
            GP_bound -= .5 * dpotrs(LB, self.YYT)[0].trace()

            # Penalty
            # GP_bound += -0.5 * np.linalg.slogdet(K + B_inv)[1]
            GP_bound -= 0.5 * Blogdet

            # Constant, weighted by  model assignment per point
            #GP_bound += -0.5 * (self.phi[:, i] * np.log(2 * np.pi * self.variance)).sum()
            GP_bound -= .5*self.D * np.einsum('j,j->',self.phi[:, i], np.log(2 * np.pi * self.variance))

        return  GP_bound + self.mixing_prop_bound() + self.H
Esempio n. 2
0
    def do_computations(self):
        """
        Here we do all the computations that are required whenever the kernels
        or the variational parameters are changed.
        """
        # sufficient stats.
        self.ybark = np.dot(self.phi.T, self.Y).T

        # compute posterior variances of each cluster (lambda_inv)
        tmp = backsub_both_sides(self.Sy_chol, self.Sf, transpose="right")
        self.Cs = [np.eye(self.D) + tmp * phi_hat_i for phi_hat_i in self.phi_hat]

        self._C_chols = [jitchol(C) for C in self.Cs]
        self.log_det_diff = np.array([2.0 * np.sum(np.log(np.diag(L))) for L in self._C_chols])
        tmp = [dtrtrs(L, self.Sy_chol.T, lower=1)[0] for L in self._C_chols]
        self.Lambda_inv = np.array(
            [
                (self.Sy - np.dot(tmp_i.T, tmp_i)) / phi_hat_i if (phi_hat_i > 1e-6) else self.Sf
                for phi_hat_i, tmp_i in zip(self.phi_hat, tmp)
            ]
        )

        # posterior mean and other useful quantities
        self.Syi_ybark, _ = dpotrs(self.Sy_chol, self.ybark, lower=1)
        self.Syi_ybarkybarkT_Syi = self.Syi_ybark.T[:, None, :] * self.Syi_ybark.T[:, :, None]
        self.muk = (self.Lambda_inv * self.Syi_ybark.T[:, :, None]).sum(1).T
Esempio n. 3
0
    def add_new_data_point(self, x, y):
        """
        Add a new function observation to the GP.

        Parameters
        ----------
        x: 2d-array
        y: 2d-array
        """
        x = np.atleast_2d(x)
        y = np.atleast_2d(y)
        if self.gp is None:
            # Initialize GP
            # inference_method = GPy.inference.latent_function_inference.\
            #     exact_gaussian_inference.ExactGaussianInference()
            self.gp = GPy.core.GP(X=x, Y=y, kernel=self.kernel,
                                  # inference_method=inference_method,
                                  likelihood=self.likelihood)
        else:
            # Add data to GP
            # self.gp.set_XY(np.vstack([self.gp.X, x]),
            #                np.vstack([self.gp.Y, y]))

            # Add data row/col to kernel (a, b)
            # [ K    a ]
            # [ a.T  b ]
            #
            # Now K = L.dot(L.T)
            # The new Cholesky decomposition is then
            # L_new = [ L    0 ]
            #         [ c.T  d ]
            a = self.gp.kern.K(self.gp.X, x)
            b = self.gp.kern.K(x, x)

            b += 1e-8 + self.gp.likelihood.gaussian_variance(
                    self.gp.Y_metadata)

            L = self.gp.posterior.woodbury_chol
            c = sp.linalg.solve_triangular(self.gp.posterior.woodbury_chol, a,
                                           lower=True)

            d = np.sqrt(b - c.T.dot(c))

            L_new = np.asfortranarray(
                    np.bmat([[L, np.zeros_like(c)],
                             [c.T, d]]))

            K_new = np.bmat([[self.gp.posterior._K, a],
                             [a.T, b]])

            self.gp.X = np.vstack((self.gp.X, x))
            self.gp.Y = np.vstack((self.gp.Y, y))

            alpha, _ = dpotrs(L_new, self.gp.Y, lower=1)
            self.gp.posterior = Posterior(woodbury_chol=L_new,
                                          woodbury_vector=alpha,
                                          K=K_new)
        # Increment time step
        self.t += 1
 def woodbury_inv(self):
     """
     The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
     $$
     (K_{xx} + \Sigma_{xx})^{-1}
     \Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
     $$
     """
     if self._woodbury_inv is None:
         if self._woodbury_chol is not None:
             self._woodbury_inv, _ = dpotri(self._woodbury_chol, lower=1)
             symmetrify(self._woodbury_inv)
         elif self._covariance is not None:
             B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance)
             self._woodbury_inv = np.empty_like(B)
             for i in range(B.shape[-1]):
                 tmp, _ = dpotrs(self.K_chol, B[:, :, i])
                 self._woodbury_inv[:, :, i], _ = dpotrs(self.K_chol, tmp.T)
     return self._woodbury_inv
 def _bias_loss(self, c):
     # calculate mean and norm for new bias via a new woodbury_vector
     new_woodbury_vector, _ = dpotrs(self._woodbury_chol,
                                     self._Y - c,
                                     lower=1)
     K = self.gp.kern.K(self.gp.X)
     mean = np.dot(K, new_woodbury_vector)
     norm = new_woodbury_vector.T.dot(mean)
     # loss is least_squares_error + norm
     return np.asscalar(np.sum(np.square(mean + c - self._Y)) + norm)
 def woodbury_vector(self):
     """
     Woodbury vector in the gaussian likelihood case only is defined as
     $$
     (K_{xx} + \Sigma)^{-1}Y
     \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
     $$
     """
     if self._woodbury_vector is None:
         self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean - self._prior_mean)
     return self._woodbury_vector
Esempio n. 7
0
    def update_kern_grads(self):
        """
        Set the derivative of the lower bound wrt the (kernel) parameters
        """
        grad_Lm_variance = 0.0

        for i, kern in enumerate(self.kern):
            K = kern.K(self.X)
            B_inv = np.diag(1. / (self.phi[:, i] / self.variance))

            # Numerically more stable version using cholesky decomposition
            #alpha = linalg.cho_solve(linalg.cho_factor(K + B_inv), self.Y)
            #K_B_inv = pdinv(K + B_inv)[0]
            #dL_dK = .5*(tdot(alpha) - K_B_inv)

            # Make more stable using cholesky factorization:
            Bi, LB, LBi, Blogdet = pdinv(K + B_inv)

            tmp = dpotrs(LB, self.YYT)[0]
            GPy.util.diag.subtract(tmp, 1)
            dL_dB = dpotrs(LB, tmp.T)[0]

            kern.update_gradients_full(dL_dK=.5 * dL_dB, X=self.X)

            # variance gradient

            #for i, kern in enumerate(self.kern):
            K = kern.K(self.X)
            #I = np.eye(self.N)

            B_inv = np.diag(1. / ((self.phi[:, i] + 1e-6) / self.variance))
            #alpha = np.linalg.solve(K + B_inv, self.Y)
            #K_B_inv = pdinv(K + B_inv)[0]
            #dL_dB = tdot(alpha) - K_B_inv
            grad_B_inv = np.diag(1. / (self.phi[:, i] + 1e-6))

            grad_Lm_variance += 0.5 * np.trace(np.dot(dL_dB, grad_B_inv))
            grad_Lm_variance -= .5 * self.D * np.einsum(
                'j,j->', self.phi[:, i], 1. / self.variance)

        self.variance.gradient = grad_Lm_variance
Esempio n. 8
0
    def update_kern_grads(self):
        """
        Set the derivative of the lower bound wrt the (kernel) parameters
        """
        grad_Lm_variance = 0.0

        for i, kern in enumerate(self.kern):
            K = kern.K(self.X)
            B_inv = np.diag(1. / (self.phi[:, i] / self.variance))

            # Numerically more stable version using cholesky decomposition
            #alpha = linalg.cho_solve(linalg.cho_factor(K + B_inv), self.Y)
            #K_B_inv = pdinv(K + B_inv)[0]
            #dL_dK = .5*(tdot(alpha) - K_B_inv)

            # Make more stable using cholesky factorization:
            Bi, LB, LBi, Blogdet = pdinv(K+B_inv)

            tmp = dpotrs(LB, self.YYT)[0]
            GPy.util.diag.subtract(tmp, 1)
            dL_dB = dpotrs(LB, tmp.T)[0]

            kern.update_gradients_full(dL_dK=.5*dL_dB, X=self.X)

            # variance gradient

            #for i, kern in enumerate(self.kern):
            K = kern.K(self.X)
            #I = np.eye(self.N)

            B_inv = np.diag(1. / ((self.phi[:, i] + 1e-6) / self.variance))
            #alpha = np.linalg.solve(K + B_inv, self.Y)
            #K_B_inv = pdinv(K + B_inv)[0]
            #dL_dB = tdot(alpha) - K_B_inv
            grad_B_inv = np.diag(1. / (self.phi[:, i] + 1e-6))

            grad_Lm_variance += 0.5 * np.trace(np.dot(dL_dB, grad_B_inv))
            grad_Lm_variance -= .5*self.D * np.einsum('j,j->',self.phi[:, i], 1./self.variance)

        self.variance.gradient = grad_Lm_variance
Esempio n. 9
0
    def calculate_mu_var(self, X, Y, Z, q_u_mean, q_u_chol, kern, mean_function, num_inducing, num_data, num_outputs):
        """
        Calculate posterior mean and variance for the latent function values for use in the
        expectation over the likelihood
        """
        #expand cholesky representation
        L = choleskies.flat_to_triang(q_u_chol)
        #S = linalg.ijk_ljk_to_ilk(L, L) #L.dot(L.T)
        S = np.empty((num_outputs, num_inducing, num_inducing))
        [np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
        #logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
        logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
        #compute mean function stuff
        if mean_function is not None:
            prior_mean_u = mean_function.f(Z)
            prior_mean_f = mean_function.f(X)
        else:
            prior_mean_u = np.zeros((num_inducing, num_outputs))
            prior_mean_f = np.zeros((num_data, num_outputs))

        #compute kernel related stuff
        Kmm = kern.K(Z)
        #Knm = kern.K(X, Z)
        Kmn = kern.K(Z, X)
        Knn_diag = kern.Kdiag(X)
        #Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm)
        Lm = linalg.jitchol(Kmm)
        logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
        Kmmi, _ = linalg.dpotri(Lm)

        #compute the marginal means and variances of q(f)
        #A = np.dot(Knm, Kmmi)
        A, _ = linalg.dpotrs(Lm, Kmn)
        #mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
        mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
        #v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S), 1)
        v = np.empty((num_data, num_outputs))
        for i in range(num_outputs):
            tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
            v[:,i] = np.sum(np.square(tmp),0)
        v += (Knn_diag - np.sum(A*Kmn,0))[:,None]

        #compute the KL term
        Kmmim = np.dot(Kmmi, q_u_mean)
        #KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KL = KLs.sum()

        latent_detail = LatentFunctionDetails(q_u_mean=q_u_mean, q_u_chol=q_u_chol, mean_function=mean_function,
                                              mu=mu, v=v, prior_mean_u=prior_mean_u, L=L, A=A,
                                              S=S, Kmm=Kmm, Kmmi=Kmmi, Kmmim=Kmmim, KL=KL)
        return latent_detail
Esempio n. 10
0
    def grad_log_like(self, theta):
        '''
        Function to calculate the gradient of the cost
        (negative log-marginal likelihood) with respect to
        the kernel hyperparameters

        Args:
            (array) theta: the kernel hyperparameters in
                            the correct order

        Returns:
            (array) gradient: vector of the gradient
        '''

        # the kernel hyperparameters
        theta = theta.flatten()

        # amplitude
        self.width = theta[0]

        # characteristic lengthscales
        self.scale = theta[1:]

        # Number of parameters
        n_params = len(theta)

        # empty array to record the gradient
        gradient = np.zeros(n_params)

        # compute alpha
        alpha_ = self.alpha()

        # compute k^-1 via triangular method
        kinv = gpl.dpotrs(self.chol_fact, np.eye(self.ntrain), lower=True)[0]

        # see expression for gradient
        dummy = np.einsum('i,j', alpha_.flatten(), alpha_.flatten()) - kinv

        # Gradient calculation with respect
        # to hyperparameters (hard-coded)
        grad = {}
        k_rbf = self.rbf('trainSet', self.theta_, self.theta_)

        grad['0'] = 2.0 * k_rbf
        for i in range(self.ndim):
            dist_ = distanceperdim(self.theta_[:, i], self.theta_[:, i])
            grad[str(i + 1)] = k_rbf * dist_ / np.exp(2.0 * self.scale[i])

        for i in range(n_params):
            gradient[i] = 0.5 * gpl.trace_dot(dummy, grad[str(i)])

        return -gradient
Esempio n. 11
0
def compute_dl_dK(posterior, K, eta, theta, prior_mean = 0):
    tau, v = theta, eta

    tau_tilde_root = np.sqrt(tau)
    Sroot_tilde_K = tau_tilde_root[:,None] * K
    aux_alpha , _ = dpotrs(posterior.L, np.dot(Sroot_tilde_K, v), lower=1)
    alpha = (v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
    LWi, _ = dtrtrs(posterior.L, np.diag(tau_tilde_root), lower=1)
    Wi = np.dot(LWi.T, LWi)
    symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)

    dL_dK = 0.5 * (tdot(alpha) - Wi)
    
    return dL_dK
Esempio n. 12
0
    def variational_q_fd(self, X, Z, q_U, p_U, kern_list, B, N, dims, d):
        """
        Description:  Returns the posterior approximation q(f) for the latent output functions (LOFs)
        Equation:     q(f) = \int p(f|u)q(u)du
        Paper:        In Section 2.2.2 / Variational Bounds
        """
        Q = dims['Q']
        M = dims['M']

        #-----------------------------------------#      POSTERIOR ALGEBRA       #-------------------------------------#
        #######  Algebra for q(u)  #######
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        #######  Algebra for p(f_d|u)  #######
        Kfdu = multi_output.cross_covariance(X, Z, B, kern_list, d)
        Luu = p_U.Luu.copy()
        Kff = multi_output.function_covariance(X, B, kern_list, d)
        Kff_diag = np.diag(Kff)

        ####### Algebra for q(f_d) = E_{q(u)}[p(f_d|u)] #######
        Afdu = np.empty((Q, N, M))  # Afdu = K_{fduq}Ki_{uquq}
        m_fd = np.zeros((N, 1))
        v_fd = np.zeros((N, 1))
        S_fd = np.zeros((N, N))
        v_fd += Kff_diag[:, None]
        S_fd += Kff
        for q in range(Q):
            ####### Expectation w.r.t. u_q part  #######
            R, _ = linalg.dpotrs(np.asfortranarray(Luu[q, :, :]),
                                 Kfdu[:, q * M:(q * M) + M].T)
            Afdu[q, :, :] = R.T
            m_fd += np.dot(Afdu[q, :, :], m_u[:, q, None])  # exp
            tmp = dtrmm(alpha=1.0, a=L_u[q, :, :].T, b=R, lower=0, trans_a=0)
            v_fd += np.sum(np.square(tmp), 0)[:, None] - np.sum(
                R * Kfdu[:, q * M:(q * M) + M].T, 0)[:, None]  # exp
            S_fd += np.dot(np.dot(R.T, S_u[q, :, :]), R) - np.dot(
                Kfdu[:, q * M:(q * M) + M], R)

        if (v_fd < 0).any():
            print('v negative!')

        #--------------------------------------#     VARIATIONAL POSTERIOR (LOFs)  #-----------------------------------#
        ####### Variational output distribution q_fd() #######
        q_fd = qfd(m_fd=m_fd, v_fd=v_fd, Kfdu=Kfdu, Afdu=Afdu, S_fd=S_fd)

        return q_fd
 def woodbury_chol(self):
     """
     return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
     $$
     L_{W}L_{W}^{\top} = W^{-1}
     W^{-1} := \texttt{Woodbury inv}
     $$
     """
     if self._woodbury_chol is None:
         #compute woodbury chol from
         if self._woodbury_inv is not None:
             winv = np.atleast_3d(self._woodbury_inv)
             self._woodbury_chol = np.zeros(winv.shape)
             for p in range(winv.shape[-1]):
                 self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
         elif self._covariance is not None:
             raise NotImplementedError("TODO: check code here")
             B = self._K - self._covariance
             tmp, _ = dpotrs(self.K_chol, B)
             self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
             _, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
         else:
             raise ValueError("insufficient information to compute posterior")
     return self._woodbury_chol
Esempio n. 14
0
def _inference(
    K: np.ndarray,
    ga_approx: GaussianApproximation,
    cav_params: CavityParams,
    Z_tilde: float,
    y: List[Tuple[int, float]],
    yc: List[List[Tuple[int, int]]],
) -> Tuple[Posterior, int, Dict]:
    """
    Compute the posterior approximation
    :param K: prior covariance matrix
    :param ga_approx: Gaussian approximation of the batches
    :param cav_params: Cavity parameters of the posterior
    :param Z_tilde: Log marginal likelihood
    :param y: Direct observations as a list of tuples telling location index (row in X) and observation value.
    :param yc: Batch comparisons in a list of lists of tuples. Each batch is a list and tuples tell the comparisons (winner index, loser index)
    :return: A tuple consisting of the posterior approximation, log marginal likelihood and gradient dictionary
    """

    log_marginal, post_params = _ep_marginal(K, ga_approx, Z_tilde, y, yc)
    tau_tilde_root = sqrtm_block(ga_approx.tau, y, yc)
    Sroot_tilde_K = np.dot(tau_tilde_root, K)
    aux_alpha, _ = dpotrs(post_params.L,
                          np.dot(Sroot_tilde_K, ga_approx.v),
                          lower=1)
    alpha = (ga_approx.v -
             np.dot(tau_tilde_root,
                    aux_alpha))[:,
                                None]  # (K + Sigma^(\tilde))^(-1) /mu^(/tilde)
    LWi, _ = dtrtrs(post_params.L, tau_tilde_root, lower=1)

    Wi = np.dot(LWi.T, LWi)
    symmetrify(Wi)  # (K + Sigma^(\tilde))^(-1)
    dL_dK = 0.5 * (tdot(alpha) - Wi)
    dL_dthetaL = 0
    return (
        Posterior(woodbury_inv=np.asfortranarray(Wi),
                  woodbury_vector=alpha,
                  K=K),
        log_marginal,
        {
            "dL_dK": dL_dK,
            "dL_dthetaL": dL_dthetaL,
            "dL_dm": alpha
        },
    )
Esempio n. 15
0
    def prediction(self, testpoint, returnvar=True):
        '''
        Function to make predictions given a test point

        Args:
            (array) testpoint: a test point of length ndim

            (bool) returnvar: If True, the GP variance will
                    be computed

        Returns:
            (array) mean, var: if returnvar=True
            (array) mean : if returnvar=False
        '''

        # use numpy array instead of list (if any)
        testpoint = np.array(testpoint).flatten()

        assert len(testpoint) == self.ndim, 'different dimension'

        # transform point first
        testpoint_trans = np.dot(self.mu_matrix, testpoint)
        testpoint_trans = testpoint_trans.reshape(1, self.ndim)

        # compute the k_star vector
        k_s = self.kernel('trainTest', self.theta_, testpoint_trans)

        # compute mean GP - super quick
        mean_gp = np.array([(k_s.flatten() * self.alpha_.flatten()).sum(0)])

        # rescale back
        mean_scaled = self.mean_y + self.std_y * mean_gp

        # do extra computations if we want GP variance
        if returnvar:
            variance = gpl.dpotrs(self.chol_fact, k_s, lower=True)[0].flatten()
            k_ss = self.kernel('testSet', testpoint_trans, testpoint_trans)
            var_gp = k_ss - (k_s.flatten() * variance).sum(0)
            var_gp = var_gp.flatten()

            # rescale back
            var = self.std_y**2 * var_gp
            return mean_scaled, var

        return mean_scaled
Esempio n. 16
0
    def prediction(self, testPoint, returnVar=True):
        '''
        Function to make predictions given a test point

        Args:
            (array) testPoint: a test point of length ndim

            (bool) returnVar: If True, the GP variance will
                    be computed

        Returns:
            (array) mean, var: if returnVar=True
            (array) mean : if returnVar=False
        '''

        # use numpy array instead of list (if any)
        testPoint = np.array(testPoint).flatten()

        assert len(testPoint) == self.ndim, 'Different dimension'

        # transform point first
        testPoint_trans = np.dot(self.MU, testPoint)
        testPoint_trans = testPoint_trans.reshape(1, self.d)

        # compute the k_star vector
        ks = self.kernel('trainTest', self.theta_, testPoint_trans)

        # compute mean GP - super quick
        meanGP = np.array([(ks.flatten() * self.alpha_.flatten()).sum(0)])

        # rescale back
        mu = self.mean_y + self.std_y * meanGP

        # do extra computations if we want GP variance
        if returnVar:
            v = gpl.dpotrs(self.L, ks, lower=True)[0].flatten()
            kss = self.kernel('testSet', testPoint_trans, testPoint_trans)
            varGP = kss - (ks.flatten() * v).sum(0)
            varGP = varGP.flatten()

            # rescale back
            var = self.std_y**2 * varGP
            return mu, var
        else:
            return mu
Esempio n. 17
0
def _inference(K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
    log_marginal, post_params = _ep_marginal(K, ga_approx, Z_tilde)

    tau_tilde_root = np.sqrt(ga_approx.tau)
    Sroot_tilde_K = tau_tilde_root[:,None] * K

    aux_alpha , _ = dpotrs(post_params.L, np.dot(Sroot_tilde_K, ga_approx.v), lower=1)
    alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
    LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
    Wi = np.dot(LWi.T,LWi)
    symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)

    dL_dK = 0.5 * (tdot(alpha) - Wi)
    dL_dthetaL = 0 #likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
    #temp2 = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='naive')
    #temp = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata = Y_metadata)
    #print("exact: {}, approx: {}, Ztilde: {}, naive: {}".format(temp, dL_dthetaL, Z_tilde, temp2))
    return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
    def _log_likelihood(self, log_params):
        # Returns log likelihood, p(D|hyperparams)
        params = np.exp(log_params)
        l_scales = params[0:self.X_dim]
        output_var = params[
            self.
            X_dim]  # QUESTION: difference between output and noise variance
        noise_var = params[self.X_dim + 1]
        # compute eta
        eta = np.min(
            self.Y) - params[self.X_dim + 2]  # QUESTION: what is this?
        # compute the observed value for g instead of y
        g_ob = np.sqrt(2.0 * (self.Y - eta))

        kernel = GPy.kern.RBF(input_dim=self.X_dim,
                              ARD=True,
                              variance=output_var,
                              lengthscale=l_scales)
        Kng = kernel.K(self.X)
        # QUESTION: does not seem to follow conditional variance form in eqn 6

        # compute posterior mean distribution for g TODO update this
        # GPg = GPy.models.GPRegression(self.X, g_ob, kernel, noise_var=1e-8)
        # mg,_ = GPg.predict(self.X)
        mg = g_ob

        # approximate covariance matrix of y using linearisation technique
        Kny = mg * Kng * mg.T + (noise_var + 1e-8) * np.eye(Kng.shape[0])

        # compute likelihood terms
        Wi, LW, LWi, W_logdet = pdinv(Kny)  # from GPy module
        # Wi = inverse of Kny (ndarray)
        # LW = Cholesky decomposition of Kny (ndarray)
        # LWi = Cholesky decomposition of inverse of Kny (ndarray)
        # W_logdet = log determinant of Kny (float)

        alpha, _ = dpotrs(LW, self.Y, lower=1)
        loglikelihood = 0.5 * (-self.Y.size * np.log(2 * np.pi) -
                               self.Y.shape[1] * W_logdet -
                               np.sum(alpha * self.Y))
        # Log marginal likelihood for GP, based on Rasmussen eqn 2.30

        return loglikelihood
Esempio n. 19
0
    def calculate_q_f(self, X, Z, q_U, p_U, kern_list, B, M, N, Q, D, d):
        """
        Calculates the mean and variance of q(f_d) as
        Equation: E_q(U)\{p(f_d|U)\}
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        # Algebra for p(f_d|u):
        Kfdu = util.cross_covariance(X, Z, B, kern_list, d)
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        Kff = util.function_covariance(X, B, kern_list, d)
        Kff_diag = np.diag(Kff)

        # Algebra for q(f_d) = E_{q(u)}[p(f_d|u)]
        Afdu = np.empty((Q, N, M))  #Afdu = K_{fduq}Ki_{uquq}
        m_fd = np.zeros((N, 1))
        v_fd = np.zeros((N, 1))
        S_fd = np.zeros((N, N))
        v_fd += Kff_diag[:, None]
        S_fd += Kff
        for q in range(Q):
            # Expectation part
            R, _ = linalg.dpotrs(np.asfortranarray(Luu[q, :, :]),
                                 Kfdu[:, q * M:(q * M) + M].T)
            Afdu[q, :, :] = R.T
            m_fd += np.dot(Afdu[q, :, :], m_u[:, q, None])  #exp
            tmp = dtrmm(alpha=1.0, a=L_u[q, :, :].T, b=R, lower=0, trans_a=0)
            v_fd += np.sum(np.square(tmp), 0)[:, None] - np.sum(
                R * Kfdu[:, q * M:(q * M) + M].T, 0)[:, None]  #exp
            S_fd += np.dot(np.dot(R.T, S_u[q, :, :]), R) - np.dot(
                Kfdu[:, q * M:(q * M) + M], R)

        if (v_fd < 0).any():
            print('v negative!')

        q_fd = qfd(m_fd=m_fd, v_fd=v_fd, Kfdu=Kfdu, Afdu=Afdu, S_fd=S_fd)
        return q_fd
Esempio n. 20
0
    def alpha(self):
        '''
        Function to compute alpha = k^-1 y

        Args:
            None

        Returns:
            (array) alpha of size N x 1
        '''

        # compute the kernel matrix of size N x N
        k = self.kernel('trainSet', self.theta_, self.theta_)

        # compute the Cholesky factor
        self.chol_fact = gpl.jitchol(k)

        # Use triangular method to solve for alpha
        alp = gpl.dpotrs(self.chol_fact, self.output, lower=True)[0]

        return alp
Esempio n. 21
0
    def vb_grad_natgrad(self):
        """
        Natural Gradients of the bound with respect to phi, the variational
        parameters controlling assignment of the data to GPs
        """
        grad_Lm = np.zeros_like(self.phi)
        for i, kern in enumerate(self.kern):
            K = kern.K(self.X)
            I = np.eye(self.N)

            B_inv = np.diag(1. / ((self.phi[:, i] + 1e-6) / self.variance))
            K_B_inv, L_B, _, _ = pdinv(K + B_inv)
            alpha, _ = dpotrs(L_B, self.Y)
            dL_dB_diag = np.sum(np.square(alpha), 1) - np.diag(K_B_inv)

            grad_Lm[:,i] = -0.5 * self.variance * dL_dB_diag / (self.phi[:,i]**2 + 1e-6) 
            
        grad_phi = grad_Lm + self.mixing_prop_bound_grad() + self.Hgrad

        natgrad = grad_phi - np.sum(self.phi * grad_phi, 1)[:, None]
        grad = natgrad * self.phi

        return grad.flatten(), natgrad.flatten()
Esempio n. 22
0
    def vb_grad_natgrad(self):
        """
        Natural Gradients of the bound with respect to phi, the variational
        parameters controlling assignment of the data to GPs
        """
        grad_Lm = np.zeros_like(self.phi)
        for i, kern in enumerate(self.kern):
            K = kern.K(self.X)
            I = np.eye(self.N)

            B_inv = np.diag(1. / ((self.phi[:, i] + 1e-6) / self.variance))
            K_B_inv, L_B, _, _ = pdinv(K + B_inv)
            alpha, _ = dpotrs(L_B, self.Y)
            dL_dB_diag = np.sum(np.square(alpha), 1) - np.diag(K_B_inv)

            grad_Lm[:, i] = -0.5 * self.variance * dL_dB_diag / (
                self.phi[:, i]**2 + 1e-6)

        grad_phi = grad_Lm + self.mixing_prop_bound_grad() + self.Hgrad

        natgrad = grad_phi - np.sum(self.phi * grad_phi, 1)[:, None]
        grad = natgrad * self.phi

        return grad.flatten(), natgrad.flatten()
    def loglikelihood(self, parameters):

        self.assign_parameters(parameters)

        cosmo_ = self.cosmo_params.values()
        a_ia = self.systematics['A_IA']
        a_bary = self.systematics['A_bary']
        # careful here - we have to supply sum of neutrinos, that is,
        # parameters[-1], not 'm_ncdm'
        neut = parameters[-1]
        testpoint = np.concatenate([
            list(cosmo_),
            np.ones(1) * a_bary,
            np.ones(1) * neut,
            np.ones(1) * a_ia
        ])

        index_ee = self.all_bands_ee_to_use == 1
        index_bb = self.all_bands_bb_to_use == 1

        if self.set_random:
            if self.n_realisation == 1:
                cl_ee_total = self.random_sample(testpoint).flatten()
            else:
                cl_ee_total = self.random_sample(testpoint)
        else:
            cl_ee_total = self.mean_prediction(testpoint)

        param_name = 'm_corr'

        if param_name in self.settings.use_nuisance:
            m_m, m_c = self.calc_m_correction()
            covariance = self.covariance / np.asarray(m_c)
            covariance = self.covariance[np.ix_(self.indices_for_bands_to_use,
                                                self.indices_for_bands_to_use)]

            band_powers = self.band_powers / np.asarray(m_m)
            band_powers = self.band_powers[self.indices_for_bands_to_use]
        else:
            band_powers = self.band_powers
            covariance = self.covariance

        cl_sys_bb, cl_sys_ee_noise, cl_sys_bb_noise = self.systematics_calc()

        theory_ee = cl_ee_total + cl_sys_ee_noise[index_ee]
        theory_bb = cl_sys_bb[index_bb] + cl_sys_bb_noise[index_bb]

        if (self.set_random and self.n_realisation > 1):
            theory_bb_nr = np.repeat(theory_bb.reshape(1, len(theory_bb)),
                                     self.n_realisation,
                                     axis=0)
            band_powers_theory = np.concatenate((theory_ee, theory_bb_nr),
                                                axis=1)
            difference_vector = band_powers_theory - band_powers

        else:

            band_powers_theory = np.concatenate((theory_ee, theory_bb))
            difference_vector = band_powers_theory - band_powers

        if np.isinf(band_powers_theory).any() or np.isnan(
                band_powers_theory).any():
            return -1E32

        elif param_name in self.settings.use_nuisance:
            # use a Cholesky decomposition instead:
            chol_fact = cholesky(covariance, lower=True)

            if (self.set_random and self.n_realisation > 1):

                cinv = gpl.dpotrs(chol_fact,
                                  np.eye(chol_fact.shape[0]),
                                  lower=True)[0]
                cinv_diff = np.dot(cinv, difference_vector.T)
                chi2 = np.einsum('ij,ij->j', difference_vector.T, cinv_diff)

                return logsumexp(-0.5 * chi2) - np.log(self.n_realisation)

            else:
                yt = solve_triangular(chol_fact,
                                      difference_vector.T,
                                      lower=True)
                chi2 = yt.dot(yt)
                return -0.5 * chi2
Esempio n. 24
0
    def incremental_inference(self,
                              kern,
                              X,
                              likelihood,
                              Y,
                              mean_function=None,
                              Y_metadata=None,
                              K=None,
                              variance=None,
                              Z_tilde=None):

        # do incremental update
        if mean_function is None:
            m = 0
        else:
            m = mean_function.f(X)

        if variance is None:
            variance = likelihood.gaussian_variance(Y_metadata)

        YYT_factor = Y - m

        # K_tmp = kern.K(X, X[-1:])
        K_inc = kern._K[:-1, -1]
        K_inc2 = kern._K[-1:, -1]
        # self._K = np.block([[self._K, K_inc], [K_inc.T, K_inc2]])

        # Ky = K.copy()
        jitter = variance[
            -1] + 1e-8  # variance can be given for each point individually, in which case we just take the last point
        # diag.add(Ky, jitter)

        # LW_old = self._old_posterior.woodbury_chol

        Wi, LW, LWi, W_logdet = pdinv_inc(self._old_LW, K_inc, K_inc2 + jitter,
                                          self._old_Wi)

        alpha, _ = dpotrs(LW, YYT_factor, lower=1)

        log_marginal = 0.5 * (-Y.size * log_2_pi - Y.shape[1] * W_logdet -
                              np.sum(alpha * YYT_factor))

        if Z_tilde is not None:
            # This is a correction term for the log marginal likelihood
            # In EP this is log Z_tilde, which is the difference between the
            # Gaussian marginal and Z_EP
            log_marginal += Z_tilde

        dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)

        dL_dthetaL = likelihood.exact_inference_gradients(
            np.diag(dL_dK), Y_metadata)

        self._old_LW = LW
        self._old_Wi = Wi
        posterior = Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K)

        # TODO add logdet to posterior ?
        return posterior, log_marginal, {
            'dL_dK': dL_dK,
            'dL_dthetaL': dL_dthetaL,
            'dL_dm': alpha
        }
Esempio n. 25
0
def bifurcation_statistics(omgp_gene, expression_matrix):
    ''' Given an OMGP model and an expression matrix, evaluate how well
    every gene fits the model.
    '''
    bif_stats = pd.DataFrame(index=expression_matrix.index)
    bif_stats['bif_ll'] = np.nan
    bif_stats['amb_ll'] = np.nan
    bif_stats['shuff_bif_ll'] = np.nan
    bif_stats['shuff_amb_ll'] = np.nan

    # Make a "copy" of provided OMGP but assign ambiguous mixture parameters
    omgp_gene_a = OMGP(omgp_gene.X,
                       omgp_gene.Y,
                       K=omgp_gene.K,
                       kernels=[k.copy() for k in omgp_gene.kern],
                       prior_Z=omgp_gene.prior_Z,
                       variance=float(omgp_gene.variance))

    omgp_gene_a.phi = np.ones_like(omgp_gene.phi) * 1. / omgp_gene.K

    # To control FDR, perform the same likelihood calculation, but with permuted X values

    shuff_X = np.array(omgp_gene.X).copy()
    np.random.shuffle(shuff_X)

    omgp_gene_shuff = OMGP(shuff_X,
                           omgp_gene.Y,
                           K=omgp_gene.K,
                           kernels=[k.copy() for k in omgp_gene.kern],
                           prior_Z=omgp_gene.prior_Z,
                           variance=float(omgp_gene.variance))

    omgp_gene_shuff.phi = omgp_gene.phi

    omgp_gene_shuff_a = OMGP(shuff_X,
                             omgp_gene.Y,
                             K=omgp_gene.K,
                             kernels=[k.copy() for k in omgp_gene.kern],
                             prior_Z=omgp_gene.prior_Z,
                             variance=float(omgp_gene.variance))

    omgp_gene_shuff_a.phi = np.ones_like(omgp_gene.phi) * 1. / omgp_gene.K

    # Precalculate response-variable independent parts
    omgps = [omgp_gene, omgp_gene_a, omgp_gene_shuff, omgp_gene_shuff_a]
    column_list = ['bif_ll', 'amb_ll', 'shuff_bif_ll', 'shuff_amb_ll']
    precalcs = [omgp_model_bound(omgp) for omgp in omgps]

    # Calculate the likelihoods of the models for every gene
    for gene in tqdm(expression_matrix.index):
        Y = expression_matrix.ix[gene]
        YYT = np.outer(Y, Y)

        for precalc, column in zip(precalcs, column_list):
            model_bound, LBs = precalc
            GP_data_fit = 0.
            for LB in LBs:
                GP_data_fit -= .5 * dpotrs(LB, YYT)[0].trace()

            bif_stats.ix[gene, column] = model_bound + GP_data_fit

    bif_stats['phi0_corr'] = expression_matrix.corrwith(
        pd.Series(omgp_gene.phi[:, 0], index=expression_matrix.columns), 1)
    bif_stats['D'] = bif_stats['bif_ll'] - bif_stats['amb_ll']
    bif_stats[
        'shuff_D'] = bif_stats['shuff_bif_ll'] - bif_stats['shuff_amb_ll']

    return bif_stats
Esempio n. 26
0
    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, fixed_covs_kerns=None, **kw):

        _, output_dim = Y.shape
        uncertain_inputs = isinstance(X, VariationalPosterior)

        #see whether we've got a different noise variance for each datum
        beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
        #self.YYTfactor = self.get_YYTfactor(Y)
        #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
        het_noise = beta.size > 1

        if het_noise:
            raise(NotImplementedError("Heteroscedastic noise not implemented, should be possible though, feel free to try implementing it :)"))

        if beta.ndim == 1:
            beta = beta[:, None]


        # do the inference:
        num_inducing = Z.shape[0]
        num_data = Y.shape[0]
        # kernel computations, using BGPLVM notation

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        if Lm is None:
            Lm = jitchol(Kmm)

        # The rather complex computations of A, and the psi stats
        if uncertain_inputs:
            psi0 = kern.psi0(Z, X)
            psi1 = kern.psi1(Z, X)
            if het_noise:
                psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
            else:
                psi2_beta = kern.psi2(Z,X) * beta
            LmInv = dtrtri(Lm)
            A = LmInv.dot(psi2_beta.dot(LmInv.T))
        else:
            psi0 = kern.Kdiag(X)
            psi1 = kern.K(X, Z)
            if het_noise:
                tmp = psi1 * (np.sqrt(beta))
            else:
                tmp = psi1 * (np.sqrt(beta))
            tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
            A = tdot(tmp)

        # factor B
        B = np.eye(num_inducing) + A
        LB = jitchol(B)
        # back substutue C into psi1Vf
        #tmp, _ = dtrtrs(Lm, psi1.T.dot(VVT_factor), lower=1, trans=0)
        #_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        #tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
        #Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # data fit and derivative of L w.r.t. Kmm
        #delit = tdot(_LBi_Lmi_psi1Vf)

        # Expose YYT to get additional covariates in (YYT + Kgg):
        tmp, _ = dtrtrs(Lm, psi1.T, lower=1, trans=0)
        _LBi_Lmi_psi1, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1, lower=1, trans=1)
        Cpsi1, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # TODO: cache this:
        # Compute fixed covariates covariance:
        if fixed_covs_kerns is not None:
            K_fixed = 0
            for name, [cov, k] in fixed_covs_kerns.iteritems():
                K_fixed += k.K(cov)

            #trYYT = self.get_trYYT(Y)
            YYT_covs = (tdot(Y) + K_fixed)
            data_term = beta**2 * YYT_covs
            trYYT_covs = np.trace(YYT_covs)
        else:
            data_term = beta**2 * tdot(Y)
            trYYT_covs = self.get_trYYT(Y)

        #trYYT = self.get_trYYT(Y)
        delit = mdot(_LBi_Lmi_psi1, data_term, _LBi_Lmi_psi1.T)
        data_fit = np.trace(delit)

        DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
        if dL_dKmm is None:
            delit = -0.5 * DBi_plus_BiPBi
            delit += -0.5 * B * output_dim
            delit += output_dim * np.eye(num_inducing)
            # Compute dL_dKmm
            dL_dKmm = backsub_both_sides(Lm, delit)

        # derivatives of L w.r.t. psi
        dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
            data_term, Cpsi1, DBi_plus_BiPBi,
            psi1, het_noise, uncertain_inputs)

        # log marginal likelihood
        log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
            psi0, A, LB, trYYT_covs, data_fit, Y)

        if self.save_per_dim:
            self.saved_vals = [psi0, A, LB, _LBi_Lmi_psi1, beta]

        # No heteroscedastics, so no _LBi_Lmi_psi1Vf:
        # For the interested reader, try implementing the heteroscedastic version, it should be possible
        _LBi_Lmi_psi1Vf = None # Is just here for documentation, so you can see, what it was.

        #noise derivatives
        dL_dR = _compute_dL_dR(likelihood,
            het_noise, uncertain_inputs, LB,
            _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
            psi0, psi1, beta,
            data_fit, num_data, output_dim, trYYT_covs, Y, None)

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)

        #put the gradients in the right places
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}

        if fixed_covs_kerns is not None:
            # For now, we do not take the gradients, we can compute them,
            # but the maximum likelihood solution is to switch off the additional covariates....
            dL_dcovs = beta * np.eye(K_fixed.shape[0]) - beta**2*tdot(_LBi_Lmi_psi1.T)
            grad_dict['dL_dcovs'] = -.5 * dL_dcovs

        #get sufficient things for posterior prediction
        #TODO: do we really want to do this in  the loop?
        if 1:
            woodbury_vector = (beta*Cpsi1).dot(Y)
        else:
            import ipdb; ipdb.set_trace()
            psi1V = np.dot(Y.T*beta, psi1).T
            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
            woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
        Bi, _ = dpotri(LB, lower=1)
        symmetrify(Bi)
        Bi = -dpotri(LB, lower=1)[0]
        diag.add(Bi, 1)

        woodbury_inv = backsub_both_sides(Lm, Bi)

        #construct a posterior object
        post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
        return post, log_marginal, grad_dict
Esempio n. 27
0
def bifurcation_statistics(omgp_gene, expression_matrix):
    ''' Given an OMGP model and an expression matrix, evaluate how well
    every gene fits the model.
    '''
    bif_stats = pd.DataFrame(index=expression_matrix.index)
    bif_stats['bif_ll'] = np.nan
    bif_stats['amb_ll'] = np.nan
    bif_stats['shuff_bif_ll'] = np.nan
    bif_stats['shuff_amb_ll'] = np.nan

    # Make a "copy" of provided OMGP but assign ambiguous mixture parameters
    omgp_gene_a = OMGP(omgp_gene.X, omgp_gene.Y,
                       K=omgp_gene.K,
                       kernels=[k.copy() for k in omgp_gene.kern],
                       prior_Z=omgp_gene.prior_Z,
                       variance=float(omgp_gene.variance))

    omgp_gene_a.phi = np.ones_like(omgp_gene.phi) * 1. / omgp_gene.K

    # To control FDR, perform the same likelihood calculation, but with permuted X values

    shuff_X = np.array(omgp_gene.X).copy()
    np.random.shuffle(shuff_X)

    omgp_gene_shuff = OMGP(shuff_X, omgp_gene.Y,
                           K=omgp_gene.K,
                           kernels=[k.copy() for k in omgp_gene.kern],
                           prior_Z=omgp_gene.prior_Z,
                           variance=float(omgp_gene.variance))

    omgp_gene_shuff.phi = omgp_gene.phi

    omgp_gene_shuff_a = OMGP(shuff_X, omgp_gene.Y,
                             K=omgp_gene.K,
                             kernels=[k.copy() for k in omgp_gene.kern],
                             prior_Z=omgp_gene.prior_Z,
                             variance=float(omgp_gene.variance))

    omgp_gene_shuff_a.phi = np.ones_like(omgp_gene.phi) * 1. / omgp_gene.K

    # Precalculate response-variable independent parts
    omgps = [omgp_gene, omgp_gene_a, omgp_gene_shuff, omgp_gene_shuff_a]
    column_list = ['bif_ll', 'amb_ll', 'shuff_bif_ll', 'shuff_amb_ll']
    precalcs = [omgp_model_bound(omgp) for omgp in omgps]

    # Calculate the likelihoods of the models for every gene
    for gene in tqdm(expression_matrix.index):
        Y = expression_matrix.ix[gene]
        YYT = np.outer(Y, Y)

        for precalc, column in zip(precalcs, column_list):
            model_bound, LBs = precalc
            GP_data_fit = 0.
            for LB in LBs:
                GP_data_fit -= .5 * dpotrs(LB, YYT)[0].trace()

            bif_stats.ix[gene, column] = model_bound + GP_data_fit

    bif_stats['phi0_corr'] = expression_matrix.corrwith(pd.Series(omgp_gene.phi[:, 0], index=expression_matrix.columns), 1)
    bif_stats['D'] = bif_stats['bif_ll'] - bif_stats['amb_ll']
    bif_stats['shuff_D'] = bif_stats['shuff_bif_ll'] - bif_stats['shuff_amb_ll']

    return bif_stats