Esempio n. 1
0
    def _updateParameters(self, Y, W, tau, Alpha, Qmean, Qvar, p_cov_inv,
                          p_cov_inv_diag, mask):
        """ Hidden method to compute parameter updates """

        N = Y[0].shape[0]  # this is different from self.N for minibatch
        M = len(Y)
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N, )))
            tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                bar_tmp2 = gpu_utils.array(
                    tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:, k]
            bar = gpu_utils.asnumpy(bar)

            p_cov_inv_k_with_zerodiag = p_cov_inv[
                k, :, :] - p_cov_inv_diag[k, :] * s.eye(N)
            scaled_inv_with_zerodiag = gpu_utils.dot(
                gpu_utils.dot(np.diag(np.sqrt(Alpha[:, k])),
                              p_cov_inv_k_with_zerodiag),
                np.diag(np.sqrt(Alpha[:, k])))

            Qvar[:, k] = 1. / (Alpha[:, k] * p_cov_inv_diag[k, :].transpose() +
                               foo[:, k])
            Qmean[:, k] = Qvar[:, k] * (bar - scaled_inv_with_zerodiag.dot(
                Qmean[:, k]))  # can take all samples here as zeros on diagonal

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qvar': Qvar}
Esempio n. 2
0
    def _updateParameters(self, Y, W, Z, tau, Qmean, Qcov, SigmaUZ, p_cov_inv,
                          mask):
        """ Hidden method to compute parameter updates """

        M = len(Y)
        N = Y[0].shape[0]
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates - term for mean
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N, )))
            tmp_cp1 = gpu_utils.array(Z['E'][:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                bar_tmp2 = gpu_utils.array(
                    tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:, k]
            bar = gpu_utils.asnumpy(bar)

            # note: no Alpha scaling required here compared to Z nodes as done in the updateParameters function
            Mcross = gpu_utils.dot(p_cov_inv[k, :, :], SigmaUZ[k, :, :])
            Mtmp = gpu_utils.dot(
                Mcross, gpu_utils.dot(np.diag(foo[:, k]), Mcross.transpose()))
            Qcov[k, :, :] = np.linalg.inv(Mtmp + p_cov_inv[k, :, :])
            Qmean[:, k] = gpu_utils.dot(
                Qcov[k, :, :],
                gpu_utils.dot(
                    gpu_utils.dot(p_cov_inv[k, :, :], SigmaUZ[k, :, :]), bar))

        return {'Qmean': Qmean, 'Qcov': Qcov}
Esempio n. 3
0
    def _updateParameters(self, Y, W, tau, Mu, Alpha, Qmean, Qvar, mask):
        """ Hidden method to compute parameter updates """
        # Speed analysis: the pre-computation part does not benefit from GPU, but the next updates doe

        N = Y[0].shape[0]  # this is different from self.N for minibatch
        M = len(Y)
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M
            # weights = [(total_w-Y[m].shape[1])/total_w * M / (M-1) for m in range(M)]

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N, )))
            tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                bar_tmp2 = gpu_utils.array(
                    tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:, k]
            bar = gpu_utils.asnumpy(bar)

            Qvar[:, k] = 1. / (Alpha[:, k] + foo[:, k])
            Qmean[:, k] = Qvar[:, k] * (bar + Alpha[:, k] * Mu[:, k])

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qvar': Qvar}
Esempio n. 4
0
    def _updateParameters(self, Y, Z, tau, Mu, Alpha, Qmean, Qvar, coeff, ro):

        for k in range(self.dim[1]):
            foo = coeff * np.dot(Z["E2"][:, k], tau)

            bar_tmp1 = gpu_utils.array(Z["E"][:, k])

            bar_tmp2 = -gpu_utils.dot(
                gpu_utils.array(Z["E"][:, s.arange(self.dim[1]) != k]),
                gpu_utils.array(Qmean[:, s.arange(self.dim[1]) != k].T))
            bar_tmp2 += gpu_utils.array(Y)
            bar_tmp2 *= gpu_utils.array(tau)

            bar = coeff * gpu_utils.asnumpy(gpu_utils.dot(bar_tmp1, bar_tmp2))

            # stochastic update of W
            Qvar[:, k] *= (1 - ro)
            Qvar[:, k] += ro / (Alpha[:, k] + foo)

            # NOTE Do not use "Qvar" in the update like we used to because this
            # does not hold for stochastic because of the ro weighting
            Qmean[:, k] *= (1 - ro)
            Qmean[:,
                  k] += ro * (1 /
                              (Alpha[:, k] + foo)) * (bar +
                                                      Alpha[:, k] * Mu[:, k])
Esempio n. 5
0
    def _updateParameters(self, Y, W, tau, Qmean, Qcov, p_cov_inv, mask):
        """ Hidden method to compute parameter updates """

        N = Y[0].shape[0]  # this is different from self.N for minibatch
        M = len(Y)
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray([total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N,K)))
        precomputed_bar = gpu_utils.array(s.zeros((N,K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu, gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N,)))
            tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:,k])
                bar_tmp2 = gpu_utils.array(tau[m])*(-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:,k]
            bar = gpu_utils.asnumpy(bar)

            Qcov[k,:,:] = np.linalg.inv(np.eye(N) * foo[:,k] + p_cov_inv[k,:,:])
            Qmean[:, k] = gpu_utils.dot(Qcov[k,:,:], bar)

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qcov':Qcov}
Esempio n. 6
0
    def calculateELBO(self):
        # Compute Lower Bound using the Bernoulli likelihood with observed data
        Z = self.markov_blanket["Z"].getExpectation()
        W = self.markov_blanket["W"].getExpectation()
        mask = self.getMask()

        # tmp = s.dot(Z,W.T)
        tmp = gpu_utils.asnumpy( gpu_utils.dot( gpu_utils.array(Z),gpu_utils.array(W).T ) )

        lb = self.obs*tmp - s.log(1.+s.exp(tmp))
        lb[mask] = 0.

        return lb.sum()
Esempio n. 7
0
    def _updateParameters(self, Y, W, WW, Z, ZZ, Pa, Pb, mask, ro, groups):
        """ Hidden method to compute parameter updates """
        Q = self.Q.getParameters()
        Qa, Qb = Q['a'], Q['b']

        # Move matrices to the GPU
        Y_gpu = gpu_utils.array(Y)
        Z_gpu = gpu_utils.array(Z)
        W_gpu = gpu_utils.array(W).T

        # Calculate terms for the update (SPEED EFFICIENT, MEMORY INEFFICIENT FOR GPU)
        # ZW = Z_gpu.dot(W_gpu)
        # tmp = gpu_utils.asnumpy( gpu_utils.square(Y_gpu) \
        #     + gpu_utils.array(ZZ).dot(gpu_utils.array(WW.T)) \
        #     - gpu_utils.dot(gpu_utils.square(Z_gpu),gpu_utils.square(W_gpu)) + gpu_utils.square(ZW) \
        #     - 2*ZW*Y_gpu )
        # tmp[mask] = 0.

        # Calculate terms for the update (SPEED INEFFICIENT, MEMORY EFFICIENT FOR GPU)
        tmp = gpu_utils.asnumpy( gpu_utils.square(Y_gpu) \
            + gpu_utils.array(ZZ).dot(gpu_utils.array(WW.T)) \
            - gpu_utils.dot(gpu_utils.square(Z_gpu),gpu_utils.square(W_gpu)) + gpu_utils.square(Z_gpu.dot(W_gpu)) \
            - 2*Z_gpu.dot(W_gpu)*Y_gpu )
        tmp[mask] = 0.

        # Compute updates
        Qa *= (1 - ro)
        Qb *= (1 - ro)
        for g in range(self.n_groups):
            g_mask = (groups == g)

            n_batch = g_mask.sum()
            if n_batch == 0: continue

            # Calculate scaling coefficient for mini-batch
            coeff = self.n_per_group[g] / n_batch

            Qa[g, :] += ro * (
                Pa[g, :] + 0.5 * coeff *
                (mask[g_mask, :].shape[0] - mask[g_mask, :].sum(axis=0)))
            Qb[g, :] += ro * (Pb[g, :] +
                              0.5 * coeff * tmp[g_mask, :].sum(axis=0))

        return Qa, Qb
Esempio n. 8
0
    def _updateParameters(self, Y, Z, tau, mask, Alpha, Qmean_S1, Qvar_S1,
                          Qvar_S0, Qtheta, SW, theta_lnE, theta_lnEInv, coeff,
                          ro):

        # Mask matrices
        tau[mask] = 0.

        # Copy matrices to GPU
        # Y_gpu = gpu_utils.array(Y)
        tau_gpu = gpu_utils.array(tau)
        Z_gpu = gpu_utils.array(Z["E"])
        ZZ_gpu = gpu_utils.array(Z["E2"])
        # precompute terms
        # tauY_gpu = gpu_utils.array(tau*Y).T
        tauY_gpu = (tau_gpu * gpu_utils.array(Y)).T

        foo = gpu_utils.asnumpy(gpu_utils.dot(ZZ_gpu.T, tau_gpu).T)
        term4_tmp1 = gpu_utils.asnumpy(gpu_utils.dot(tauY_gpu, Z_gpu))

        del tauY_gpu, ZZ_gpu

        # Update each latent variable in turn
        for k in range(self.dim[1]):

            # Compute terms
            term1 = (theta_lnE - theta_lnEInv)[:, k]

            term2 = 0.5 * s.log(Alpha[:, k])
            term3 = 0.5 * coeff * s.log(foo[:, k] + Alpha[:, k])

            # term4_tmp1 = gpu_utils.dot(tauYT, Zk_cp)

            # term4_tmp2_1 = gpu_utils.array(SW[:,s.arange(self.dim[1])!=k].T)
            # term4_tmp2_2 = (Z_gpu[:,k] * gpu_utils.array(Z['E'][:,s.arange(self.dim[1])!=k]).T).T
            # term4_tmp2 = (tau_gpu*gpu_utils.dot(term4_tmp2_2, term4_tmp2_1)).sum(axis=0)
            term4_tmp2 = gpu_utils.asnumpy((tau_gpu * gpu_utils.dot(
                (Z_gpu[:, k] *
                 gpu_utils.array(Z['E'][:, s.arange(self.dim[1]) != k]).T).T,
                gpu_utils.array(SW[:, s.arange(self.dim[1]) != k].T))).sum(
                    axis=0))

            term4_tmp3 = foo[:, k] + Alpha[:, k]

            term4 = coeff * 0.5 * s.divide(
                s.square(term4_tmp1[:, k] - term4_tmp2), term4_tmp3)

            # Update S
            Qtheta[:, k] *= (1 - ro)
            Qtheta[:,
                   k] += ro * (1. /
                               (1. + s.exp(-(term1 + term2 - term3 + term4))))

            # Update W
            tmp_var = 1. / term4_tmp3
            Qvar_S1[:, k] *= (1 - ro)
            Qvar_S1[:, k] += ro * tmp_var

            Qmean_S1[:, k] *= (1 - ro)
            Qmean_S1[:, k] += ro * tmp_var * (term4_tmp1[:, k] - term4_tmp2)

            # Update Expectations for the next iteration
            SW[:, k] = Qtheta[:, k] * Qmean_S1[:, k]

            del term1, term2, term3, term4_tmp2, term4_tmp3

        # update of Qvar_S0
        Qvar_S0 *= (1 - ro)
        Qvar_S0 += ro / Alpha

        # Save updated parameters of the Q distribution
        self.Q.setParameters(mean_B0=s.zeros((self.dim[0], self.dim[1])),
                             var_B0=Qvar_S0,
                             mean_B1=Qmean_S1,
                             var_B1=Qvar_S1,
                             theta=Qtheta)
Esempio n. 9
0
    def _updateParameters(self, Y, W, tau, mask, Alpha, Qmean_T1, Qvar_T1,
                          Qtheta, SZ, theta_lnE, theta_lnEInv):
        """ Hidden method to compute parameter updates """

        # Mask matrices
        for m in range(len(Y)):
            tau[m][mask[m]] = 0.

        # Precompute terms to speed up GPU computation
        N = Qmean_T1.shape[0]
        M = len(Y)
        K = self.dim[1]

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            # weights = [(total_w-Y[m].shape[1])/total_w * M / (M-1) for m in range(M)]
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # term4_tmp1 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ]
        # term4_tmp2 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ]
        # term4_tmp3 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ]
        # for m in range(M):
        #     tau_gpu = gpu_utils.array(tau[m])
        #     Y_gpu = gpu_utils.array(Y[m])
        #     for k in range(K):
        #         Wk_gpu = gpu_utils.array(W[m]["E"][:,k])
        #         WWk_gpu = gpu_utils.array(W[m]["E2"][:,k])
        #         term4_tmp1[k] += gpu_utils.dot(tau_gpu*Y_gpu, Wk_gpu)
        #         term4_tmp3[k] += gpu_utils.dot(tau_gpu, WWk_gpu)
        # del tau_gpu, Y_gpu, Wk_gpu, WWk_gpu

        term4_tmp1 = gpu_utils.array(s.zeros((N, K)) + Alpha)
        term4_tmp2 = gpu_utils.array(s.zeros((N, K)) + Alpha)
        term4_tmp3 = gpu_utils.array(s.zeros((N, K)) + Alpha)

        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            Y_gpu = gpu_utils.array(Y[m])
            W_gpu = gpu_utils.array(W[m]["E"])
            WW_gpu = gpu_utils.array(W[m]["E2"])
            term4_tmp1 += weights[m] * gpu_utils.dot(tau_gpu * Y_gpu, W_gpu)
            term4_tmp3 += weights[m] * gpu_utils.dot(tau_gpu, WW_gpu)
        del tau_gpu, Y_gpu, W_gpu, WW_gpu

        # Update each latent variable in turn (notice that the update of Z[,k] depends on the other values of Z!)
        for k in range(K):
            term1 = (theta_lnE - theta_lnEInv)[:, k]
            term2 = 0.5 * s.log(Alpha[:, k])

            for m in range(M):
                tau_gpu = gpu_utils.array(tau[m])
                Wk_gpu = gpu_utils.array(W[m]["E"][:, k])
                term4_tmp2_tmp = (tau_gpu * gpu_utils.dot(
                    gpu_utils.array(SZ[:, s.arange(K) != k]),
                    (Wk_gpu *
                     gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)))).sum(
                         axis=1)
                term4_tmp2[:, k] += weights[m] * term4_tmp2_tmp
                del tau_gpu, Wk_gpu, term4_tmp2_tmp

            # term4_tmp3[k] += Alpha[:,k]
            term3 = gpu_utils.asnumpy(0.5 * gpu_utils.log(term4_tmp3[:, k]))
            term4 = gpu_utils.asnumpy(0.5 * gpu_utils.divide(
                gpu_utils.square(term4_tmp1[:, k] - term4_tmp2[:, k]),
                term4_tmp3[:, k]))

            # Update S
            # NOTE there could be some precision issues in T --> loads of 1s in result
            Qtheta[:, k] = 1. / (1. + s.exp(-(term1 + term2 - term3 + term4)))
            Qtheta[:, k] = np.nan_to_num(Qtheta[:, k])

            # Update Z
            Qvar_T1[:, k] = gpu_utils.asnumpy(1. / term4_tmp3[:, k])
            Qmean_T1[:,
                     k] = Qvar_T1[:, k] * gpu_utils.asnumpy(term4_tmp1[:, k] -
                                                            term4_tmp2[:, k])

            # Update Expectations for the next iteration
            SZ[:, k] = Qtheta[:, k] * Qmean_T1[:, k]

        return {'mean_B1': Qmean_T1, 'var_B1': Qvar_T1, 'theta': Qtheta}
Esempio n. 10
0
    def _updateParameters(self, U, Sigma, GPparam, Qmean, Qvar, Y, W, tau,
                          mask):
        """ Hidden method to compute parameter updates """

        K = self.dim[1]
        N = Sigma['cov'].shape[1]
        M = len(Y)

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # for non-structured factors take the standard updates for Z, ignoring U

        # Precompute terms to speed up GPU computation (only required for non-structured updates)
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate updates
        for k in range(K):
            unstructured = (Sigma['cov'][k] == np.eye(N)).all(
            )  # TODO: Are there better ways to choose between sparse and non-sparse inference depending on factor smoothness?
            if unstructured:  # updates according to q(z) without sparse inference
                bar = gpu_utils.array(s.zeros((N, )))
                tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
                for m in range(M):
                    tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                    bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                    bar_tmp2 = gpu_utils.array(
                        tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                    bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
                bar += precomputed_bar[:, k]
                bar = gpu_utils.asnumpy(bar)

                Qvar[:, k] = 1. / (1 + foo[:, k])
                Qmean[:, k] = Qvar[:, k] * bar
            else:  # updates according to p(z|u)
                SigmaZZ = Sigma['cov'][k]
                SigmaZU = SigmaZZ[:, self.idx_inducing]
                p_cov_inv = Sigma['inv'][k, :, :]
                mat = gpu_utils.dot(SigmaZU, p_cov_inv)

                Qmean[:, k] = gpu_utils.dot(mat, U['E'][:, k])
                for n in range(N):
                    exp_var = SigmaZZ[n, n] - gpu_utils.dot(
                        gpu_utils.dot(SigmaZZ[n, self.idx_inducing],
                                      p_cov_inv), SigmaZZ[self.idx_inducing,
                                                          n])
                    var_exp = gpu_utils.dot(
                        gpu_utils.dot(mat[n, :], U['cov'][k, :, :]),
                        mat[n, :].transpose())
                    Qvar[n, k] = exp_var + var_exp

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qvar': Qvar}