Example #1
0
    def calculateELBO_k(self, k):
        """
        Method to calculate ELBO per factor - required for optimization in Sigma node
        """
        # Collect parameters and expectations of current node
        Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations()
        Qmean, Qcov = Qpar['mean'], Qpar['cov']
        
        QE  = Qexp['E']

        if 'Sigma' in self.markov_blanket:
            Sigma = self.markov_blanket['Sigma'].getInverseTerms()
            p_cov_inv = Sigma['inv']
            p_cov_inv_logdet = Sigma['inv_logdet']
        else:
            p_cov = self.P.params['cov']
            p_cov_inv = self.p_cov_inv
            p_cov_inv_logdet = np.linalg.slogdet(self.p_cov_inv)[1]

        # compute term from the precision factor in front of the Gaussian
        term1 = 0.5 * p_cov_inv_logdet[k]
        # compute term from the exponential in the Gaussian
        term2 = -0.5 * np.trace(gpu_utils.dot(p_cov_inv[k,:,:], Qcov[k,:,:]))
        term3 = -0.5 *  gpu_utils.dot(QE[:,k].transpose(), gpu_utils.dot(p_cov_inv[k,:,:], QE[:,k]))

        # tmp1 = -0.5 * (np.trace(gpu_utils.dot(p_cov_inv[k,:,:], Qcov[k,:,:])) +  gpu_utils.dot(QE[:,k].transpose(), gpu_utils.dot(p_cov_inv[k,:,:], QE[:,k])))# expectation of quadratic form
        # tmp2 = 0.5 * p_cov_inv_logdet[k]
        # lb_p = tmp1 + tmp2

        lb_p = term1 + term2 + term3

        lb_q = -0.5 * np.linalg.slogdet(Qcov[k,:,:])[1] # term -N*(log(2* np.pi)) cancels out between p and q term; -N/2 is added below

        return lb_p - lb_q
Example #2
0
    def calculateELBO_k(self, k):
        # Collect parameters and expectations of current node
        Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations()
        Qmean, Qcov = Qpar['mean'], Qpar['cov']

        QE = Qexp['E']

        assert "Sigma" in self.markov_blanket, "Sigma not found in Markov blanket of U node"

        Sigma = self.markov_blanket['Sigma'].getExpectations()
        p_cov = Sigma['cov']
        p_cov_inv = Sigma['inv']
        p_cov_inv_logdet = Sigma['inv_logdet']

        # compute term from the exponential in the Gaussian
        tmp1 = -0.5 * (
            np.trace(gpu_utils.dot(p_cov_inv[k, :, :], Qcov[k, :, :])) +
            gpu_utils.dot(QE[:, k].transpose(),
                          gpu_utils.dot(p_cov_inv[k, :, :], QE[:, k]))
        )  # expectation of quadratic form

        # compute term from the precision factor in front of the Gaussian
        tmp2 = 0.5 * p_cov_inv_logdet[k]
        lb_p = tmp1 + tmp2

        lb_q = -0.5 * np.linalg.slogdet(Qcov[k, :, :])[1]

        # term -N*(log(2* np.pi)) cancels out between p and q term; -N/2 is added below
        return lb_p - lb_q
Example #3
0
    def _updateParameters(self, Y, Z, tau, Mu, Alpha, Qmean, Qvar, coeff, ro):

        for k in range(self.dim[1]):
            foo = coeff * np.dot(Z["E2"][:, k], tau)

            bar_tmp1 = gpu_utils.array(Z["E"][:, k])

            bar_tmp2 = -gpu_utils.dot(
                gpu_utils.array(Z["E"][:, s.arange(self.dim[1]) != k]),
                gpu_utils.array(Qmean[:, s.arange(self.dim[1]) != k].T))
            bar_tmp2 += gpu_utils.array(Y)
            bar_tmp2 *= gpu_utils.array(tau)

            bar = coeff * gpu_utils.asnumpy(gpu_utils.dot(bar_tmp1, bar_tmp2))

            # stochastic update of W
            Qvar[:, k] *= (1 - ro)
            Qvar[:, k] += ro / (Alpha[:, k] + foo)

            # NOTE Do not use "Qvar" in the update like we used to because this
            # does not hold for stochastic because of the ro weighting
            Qmean[:, k] *= (1 - ro)
            Qmean[:,
                  k] += ro * (1 /
                              (Alpha[:, k] + foo)) * (bar +
                                                      Alpha[:, k] * Mu[:, k])
Example #4
0
    def calc_Sigma_element(self, par, lidx, k, id1, id2):
        """
        Method to calculated elements of Sigma matrix in hyperparameters
        Only used for debugging purposes
        """
        self.zeta[k] = par[0]
        # set lengthscale parameter
        self.Kc.set_gridix(lidx, k)

        # if required set group parameters
        if self.model_groups:
            sigma = par[1]
            x = par[2:]

            assert len(x) == self.Kg.rank * self.G, \
                "Length of x incorrect: Is %s, should be  %s * %s" % (len(x), self.Kg.rank, self.G)
            x = x.reshape(self.Kg.rank, self.G)
            self.Kg.set_parameters(x=x, sigma=sigma, k=k,
                                   spectral_decomp=self.kronecker)

        if self.kronecker:
            Vc, Dc = self.Kc.get_kernel_components_k(k)
            Kc = gpu_utils.dot(gpu_utils.dot(Vc, np.diag(Dc)), Vc.transpose())
            Vg, Dg = self.Kg.get_kernel_components_k(k)
            Kg = gpu_utils.dot(gpu_utils.dot(Vg, np.diag(Dg)), Vg.transpose())
            val = (1-self.zeta[k]) * np.kron(Kg, Kc) + self.zeta[k] * np.eye(self.Nu)
        else:
            Kc = self.Kc.Kmat[self.Kc.get_best_lidx(k),:,:]
            Kg = self.Kg.Kmat[k,:,:]
            if self.model_groups:
                val = (1-self.zeta[k]) * Kc[ self.covidx, :][:,self.covidx] * Kg[self.groupsidx, :][:, self.groupsidx] + self.zeta[k] *np.eye(self.Nu)
            else:
                val = (1-self.zeta[k]) * Kc + self.zeta[k] *np.eye(self.Nu)

        return val[id1,id2]
Example #5
0
    def _updateParameters(self, Y, W, Z, tau, Qmean, Qcov, SigmaUZ, p_cov_inv,
                          mask):
        """ Hidden method to compute parameter updates """

        M = len(Y)
        N = Y[0].shape[0]
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates - term for mean
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N, )))
            tmp_cp1 = gpu_utils.array(Z['E'][:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                bar_tmp2 = gpu_utils.array(
                    tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:, k]
            bar = gpu_utils.asnumpy(bar)

            # note: no Alpha scaling required here compared to Z nodes as done in the updateParameters function
            Mcross = gpu_utils.dot(p_cov_inv[k, :, :], SigmaUZ[k, :, :])
            Mtmp = gpu_utils.dot(
                Mcross, gpu_utils.dot(np.diag(foo[:, k]), Mcross.transpose()))
            Qcov[k, :, :] = np.linalg.inv(Mtmp + p_cov_inv[k, :, :])
            Qmean[:, k] = gpu_utils.dot(
                Qcov[k, :, :],
                gpu_utils.dot(
                    gpu_utils.dot(p_cov_inv[k, :, :], SigmaUZ[k, :, :]), bar))

        return {'Qmean': Qmean, 'Qcov': Qcov}
Example #6
0
    def _updateParameters(self, Y, W, tau, Mu, Alpha, Qmean, Qvar, mask):
        """ Hidden method to compute parameter updates """
        # Speed analysis: the pre-computation part does not benefit from GPU, but the next updates doe

        N = Y[0].shape[0]  # this is different from self.N for minibatch
        M = len(Y)
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M
            # weights = [(total_w-Y[m].shape[1])/total_w * M / (M-1) for m in range(M)]

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N, )))
            tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                bar_tmp2 = gpu_utils.array(
                    tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:, k]
            bar = gpu_utils.asnumpy(bar)

            Qvar[:, k] = 1. / (Alpha[:, k] + foo[:, k])
            Qmean[:, k] = Qvar[:, k] * (bar + Alpha[:, k] * Mu[:, k])

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qvar': Qvar}
Example #7
0
    def calcELBOgrad_k(self, k, gradSigma):
        """
        Method to calculate ELBO gradients per factor - required for optimization in Sigma node
        """
        Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations()
        Qmean, Qcov = Qpar['mean'], Qpar['cov']
        QE = Qexp['E']

        assert "Sigma" in self.markov_blanket, "Sigma not found in Markov blanket of U node"
        Sigma = self.markov_blanket['Sigma'].getExpectations()
        p_cov = Sigma['cov']
        p_cov_inv = Sigma['inv']
        p_cov_inv_logdet = Sigma['inv_logdet']

        term1 = -0.5 * np.trace(gpu_utils.dot(gradSigma, p_cov_inv[k, :, :]))
        term2 = 0.5 * np.trace(
            gpu_utils.dot(
                p_cov_inv[k, :, :],
                gpu_utils.dot(gradSigma,
                              gpu_utils.dot(p_cov_inv[k, :, :],
                                            Qcov[k, :, :]))))
        term3 = 0.5 * gpu_utils.dot(
            QE[:, k].transpose(),
            gpu_utils.dot(
                p_cov_inv[k, :, :],
                gpu_utils.dot(gradSigma,
                              gpu_utils.dot(p_cov_inv[k, :, :], QE[:, k]))))

        return term1 + term2 + term3
Example #8
0
    def calculateELBO_k(self, k):
        """ Method to calulcate the ELBO term for the k-th factor (required for the grid search on the optimal lengthscale in sigma per factor) """

        # Collect parameters and expectations of current node
        Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations()
        Qmean, Qvar = Qpar['mean'], Qpar['var']
        QE, QE2 = Qexp['E'], Qexp['E2']

        if 'Sigma' in self.markov_blanket:
            Sigma = self.markov_blanket['Sigma'].getExpectations()
            p_cov = Sigma['cov']
            p_cov_inv = Sigma['inv']
            p_cov_inv_diag = Sigma['inv_diag']
            p_cov_inv_logdet = Sigma['inv_logdet']
        else:
            p_cov = self.P.params['cov']
            p_cov_inv = self.p_cov_inv
            p_cov_inv_diag = self.p_cov_inv_diag
            p_cov_inv_logdet = np.linalg.slogdet(self.p_cov_inv)[1]

        if 'AlphaZ' in self.markov_blanket:
            Alpha = self.markov_blanket['AlphaZ'].getExpectations(expand=True)
        else:
            Alpha = dict()
            Alpha['E'] = s.ones((self.N, self.K)) * 1.
            Alpha['lnE'] = s.zeros((self.N, self.K))

        # compute term from the exponential in the Gaussian
        p_cov_inv_k_with_zerodiag = p_cov_inv[
            k, :, :] - p_cov_inv_diag[k, :] * s.eye(self.N)
        scaled_inv_with_zerodiag = gpu_utils.dot(
            gpu_utils.dot(np.diag(np.sqrt(Alpha['E'][:, k])),
                          p_cov_inv_k_with_zerodiag[:, :]),
            np.diag(np.sqrt(Alpha['E'][:, k])))

        tmp1 = -0.5 * QE[:, k].transpose().dot(
            (scaled_inv_with_zerodiag.dot(QE[:, k]))) - 0.5 * (
                (Alpha['E'][:, k] * p_cov_inv_diag[k, :]).dot(QE2[:, k]))

        # compute term from the precision factor in front of the Gaussian
        tmp2 = 0.5 * p_cov_inv_logdet[k] + 0.5 * Alpha["lnE"][:, k].sum()
        lb_p = tmp1 + tmp2

        lb_q = -0.5 * s.log(Qvar[:, k]).sum(
        )  # term -N*K*(log(2* np.pi)) cancels out between p and q term; -N/2 is added below

        return lb_p - lb_q
Example #9
0
    def _updateParameters(self, Y, W, tau, Qmean, Qcov, p_cov_inv, mask):
        """ Hidden method to compute parameter updates """

        N = Y[0].shape[0]  # this is different from self.N for minibatch
        M = len(Y)
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray([total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N,K)))
        precomputed_bar = gpu_utils.array(s.zeros((N,K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu, gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N,)))
            tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:,k])
                bar_tmp2 = gpu_utils.array(tau[m])*(-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:,k]
            bar = gpu_utils.asnumpy(bar)

            Qcov[k,:,:] = np.linalg.inv(np.eye(N) * foo[:,k] + p_cov_inv[k,:,:])
            Qmean[:, k] = gpu_utils.dot(Qcov[k,:,:], bar)

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qcov':Qcov}
Example #10
0
    def calcELBOgrad_k(self, k, gradSigma):
        """
        Method to calculate ELBO gradients per factor - required for optimization in Sigma node
        """
        Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations()
        Qmean, Qcov = Qpar['mean'], Qpar['cov']
        QE = Qexp['E']

        if 'Sigma' in self.markov_blanket:
            Sigma = self.markov_blanket['Sigma'].getInverseTerms()
            p_cov_inv = Sigma['inv']
            p_cov_inv_logdet = Sigma['inv_logdet']
        else:
            p_cov = self.P.params['cov']
            p_cov_inv = self.p_cov_inv
            p_cov_inv_logdet = np.linalg.slogdet(self.p_cov_inv)[1]

        term1 = - 0.5 * np.trace(gpu_utils.dot(gradSigma, p_cov_inv[k, :,:]))
        term2 = 0.5 * np.trace(gpu_utils.dot(p_cov_inv[k, :,:], gpu_utils.dot(gradSigma, gpu_utils.dot(p_cov_inv[k, :,:],  Qcov[k, :, :]))))
        term3 = 0.5 * gpu_utils.dot(QE[:, k].transpose(), gpu_utils.dot(p_cov_inv[k, :,:], gpu_utils.dot(gradSigma, gpu_utils.dot(p_cov_inv[k, :,:], QE[:,k]))))
        return term1 + term2 + term3
Example #11
0
    def _updateParameters(self, Y, W, tau, Alpha, Qmean, Qvar, p_cov_inv,
                          p_cov_inv_diag, mask):
        """ Hidden method to compute parameter updates """

        N = Y[0].shape[0]  # this is different from self.N for minibatch
        M = len(Y)
        K = self.dim[1]

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # Precompute terms to speed up GPU computation
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate variational updates
        for k in range(K):
            bar = gpu_utils.array(s.zeros((N, )))
            tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
            for m in range(M):
                tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                bar_tmp2 = gpu_utils.array(
                    tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
            bar += precomputed_bar[:, k]
            bar = gpu_utils.asnumpy(bar)

            p_cov_inv_k_with_zerodiag = p_cov_inv[
                k, :, :] - p_cov_inv_diag[k, :] * s.eye(N)
            scaled_inv_with_zerodiag = gpu_utils.dot(
                gpu_utils.dot(np.diag(np.sqrt(Alpha[:, k])),
                              p_cov_inv_k_with_zerodiag),
                np.diag(np.sqrt(Alpha[:, k])))

            Qvar[:, k] = 1. / (Alpha[:, k] * p_cov_inv_diag[k, :].transpose() +
                               foo[:, k])
            Qmean[:, k] = Qvar[:, k] * (bar - scaled_inv_with_zerodiag.dot(
                Qmean[:, k]))  # can take all samples here as zeros on diagonal

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qvar': Qvar}
    def calculateELBO(self):
        # Compute Lower Bound using the Bernoulli likelihood with observed data
        Z = self.markov_blanket["Z"].getExpectation()
        W = self.markov_blanket["W"].getExpectation()
        mask = self.getMask()

        # tmp = s.dot(Z,W.T)
        tmp = gpu_utils.asnumpy( gpu_utils.dot( gpu_utils.array(Z),gpu_utils.array(W).T ) )

        lb = self.obs*tmp - s.log(1.+s.exp(tmp))
        lb[mask] = 0.

        return lb.sum()
Example #13
0
    def calc_sigma_terms_k(self, k, only_inverse = False):
        """
        Method to compute the inverse of sigma and its log determinant based on the spectral decomposition
         of the kernel matrices for a given factor k
        """
        if self.zeta[k] == 1:
            self.Sigma_inv[k, :, :] = np.eye(self.Nu)
            self.Sigma_inv_logdet[k] = 1
            self.Sigma[k, :, :] = np.eye(self.N)
        else:
            if self.kronecker:
                components = self.get_components(k)
                term1 = np.kron(components['Vg'], components['Vc'])
                term2diag = 1/ (np.repeat(components['Dg'], self.C) * np.tile(components['Dc'], self.G) + self.zeta[k] / (1-self.zeta[k]))
                term3 = np.kron(components['Vg'].transpose(), components['Vc'].transpose())
                self.Sigma_inv[k, :, :] = 1 / (1 - self.zeta[k]) * gpu_utils.dot(gpu_utils.dot(term1, np.diag(term2diag)), term3)
                self.Sigma_inv_logdet[k] = - self.Nu * s.log(1 - self.zeta[k]) + s.log(term2diag).sum()

                if not only_inverse:
                    components = self.get_components(k)
                    term1 = np.kron(components['Vg'], components['Vc'])
                    term2diag = np.repeat(components['Dg'], self.C) * np.tile(components['Dc'], self.G) + \
                                self.zeta[k] / (1 - self.zeta[k])
                    term3 = np.kron(components['Vg'].transpose(), components['Vc'].transpose())
                    self.Sigma[k, :, :] = (1 - self.zeta[k]) * gpu_utils.dot(term1,
                                                                        gpu_utils.dot(np.diag(term2diag),
                                                                                      term3))
            else:
                if self.model_groups:
                    Sigma = (1 - self.zeta[k]) * self.Kc.Kmat[self.Kc.get_best_lidx(k), self.covidx,:][:, self.covidx] * self.Kg.Kmat[k,self.groupsidx,:][:,self.groupsidx] + self.zeta[k] * np.eye(self.N)
                else:
                    Sigma = (1 - self.zeta[k]) * self.Kc.Kmat[self.Kc.get_best_lidx(k), :, :] + self.zeta[k] * np.eye(self.N)
                self.Sigma_inv[k, :, :] = np.linalg.inv(Sigma)
                self.Sigma_inv_logdet[k] = np.linalg.slogdet(self.Sigma_inv[k, :, :])[1]
                if not only_inverse:
                    self.Sigma[k, :, :] = Sigma
Example #14
0
    def _updateParameters(self, Y, W, WW, Z, ZZ, Pa, Pb, mask, ro, groups):
        """ Hidden method to compute parameter updates """
        Q = self.Q.getParameters()
        Qa, Qb = Q['a'], Q['b']

        # Move matrices to the GPU
        Y_gpu = gpu_utils.array(Y)
        Z_gpu = gpu_utils.array(Z)
        W_gpu = gpu_utils.array(W).T

        # Calculate terms for the update (SPEED EFFICIENT, MEMORY INEFFICIENT FOR GPU)
        # ZW = Z_gpu.dot(W_gpu)
        # tmp = gpu_utils.asnumpy( gpu_utils.square(Y_gpu) \
        #     + gpu_utils.array(ZZ).dot(gpu_utils.array(WW.T)) \
        #     - gpu_utils.dot(gpu_utils.square(Z_gpu),gpu_utils.square(W_gpu)) + gpu_utils.square(ZW) \
        #     - 2*ZW*Y_gpu )
        # tmp[mask] = 0.

        # Calculate terms for the update (SPEED INEFFICIENT, MEMORY EFFICIENT FOR GPU)
        tmp = gpu_utils.asnumpy( gpu_utils.square(Y_gpu) \
            + gpu_utils.array(ZZ).dot(gpu_utils.array(WW.T)) \
            - gpu_utils.dot(gpu_utils.square(Z_gpu),gpu_utils.square(W_gpu)) + gpu_utils.square(Z_gpu.dot(W_gpu)) \
            - 2*Z_gpu.dot(W_gpu)*Y_gpu )
        tmp[mask] = 0.

        # Compute updates
        Qa *= (1 - ro)
        Qb *= (1 - ro)
        for g in range(self.n_groups):
            g_mask = (groups == g)

            n_batch = g_mask.sum()
            if n_batch == 0: continue

            # Calculate scaling coefficient for mini-batch
            coeff = self.n_per_group[g] / n_batch

            Qa[g, :] += ro * (
                Pa[g, :] + 0.5 * coeff *
                (mask[g_mask, :].shape[0] - mask[g_mask, :].sum(axis=0)))
            Qb[g, :] += ro * (Pb[g, :] +
                              0.5 * coeff * tmp[g_mask, :].sum(axis=0))

        return Qa, Qb
Example #15
0
 def updateParameters(self, ix=None, ro=None):
     Z = self.markov_blanket["Z"].getExpectation()
     W = self.markov_blanket["W"].getExpectation()
     # self.params["zeta"] = s.dot(Z,W.T)
     self.params["zeta"] = gpu_utils.dot(gpu_utils.array(Z),
                                         gpu_utils.array(W).T)
Example #16
0
    def _updateParameters(self, Y, Z, tau, mask, Alpha, Qmean_S1, Qvar_S1,
                          Qvar_S0, Qtheta, SW, theta_lnE, theta_lnEInv, coeff,
                          ro):

        # Mask matrices
        tau[mask] = 0.

        # Copy matrices to GPU
        # Y_gpu = gpu_utils.array(Y)
        tau_gpu = gpu_utils.array(tau)
        Z_gpu = gpu_utils.array(Z["E"])
        ZZ_gpu = gpu_utils.array(Z["E2"])
        # precompute terms
        # tauY_gpu = gpu_utils.array(tau*Y).T
        tauY_gpu = (tau_gpu * gpu_utils.array(Y)).T

        foo = gpu_utils.asnumpy(gpu_utils.dot(ZZ_gpu.T, tau_gpu).T)
        term4_tmp1 = gpu_utils.asnumpy(gpu_utils.dot(tauY_gpu, Z_gpu))

        del tauY_gpu, ZZ_gpu

        # Update each latent variable in turn
        for k in range(self.dim[1]):

            # Compute terms
            term1 = (theta_lnE - theta_lnEInv)[:, k]

            term2 = 0.5 * s.log(Alpha[:, k])
            term3 = 0.5 * coeff * s.log(foo[:, k] + Alpha[:, k])

            # term4_tmp1 = gpu_utils.dot(tauYT, Zk_cp)

            # term4_tmp2_1 = gpu_utils.array(SW[:,s.arange(self.dim[1])!=k].T)
            # term4_tmp2_2 = (Z_gpu[:,k] * gpu_utils.array(Z['E'][:,s.arange(self.dim[1])!=k]).T).T
            # term4_tmp2 = (tau_gpu*gpu_utils.dot(term4_tmp2_2, term4_tmp2_1)).sum(axis=0)
            term4_tmp2 = gpu_utils.asnumpy((tau_gpu * gpu_utils.dot(
                (Z_gpu[:, k] *
                 gpu_utils.array(Z['E'][:, s.arange(self.dim[1]) != k]).T).T,
                gpu_utils.array(SW[:, s.arange(self.dim[1]) != k].T))).sum(
                    axis=0))

            term4_tmp3 = foo[:, k] + Alpha[:, k]

            term4 = coeff * 0.5 * s.divide(
                s.square(term4_tmp1[:, k] - term4_tmp2), term4_tmp3)

            # Update S
            Qtheta[:, k] *= (1 - ro)
            Qtheta[:,
                   k] += ro * (1. /
                               (1. + s.exp(-(term1 + term2 - term3 + term4))))

            # Update W
            tmp_var = 1. / term4_tmp3
            Qvar_S1[:, k] *= (1 - ro)
            Qvar_S1[:, k] += ro * tmp_var

            Qmean_S1[:, k] *= (1 - ro)
            Qmean_S1[:, k] += ro * tmp_var * (term4_tmp1[:, k] - term4_tmp2)

            # Update Expectations for the next iteration
            SW[:, k] = Qtheta[:, k] * Qmean_S1[:, k]

            del term1, term2, term3, term4_tmp2, term4_tmp3

        # update of Qvar_S0
        Qvar_S0 *= (1 - ro)
        Qvar_S0 += ro / Alpha

        # Save updated parameters of the Q distribution
        self.Q.setParameters(mean_B0=s.zeros((self.dim[0], self.dim[1])),
                             var_B0=Qvar_S0,
                             mean_B1=Qmean_S1,
                             var_B1=Qvar_S1,
                             theta=Qtheta)
Example #17
0
    def _updateParameters(self, U, Sigma, GPparam, Qmean, Qvar, Y, W, tau,
                          mask):
        """ Hidden method to compute parameter updates """

        K = self.dim[1]
        N = Sigma['cov'].shape[1]
        M = len(Y)

        # Masking
        for m in range(M):
            tau[m][mask[m]] = 0.

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # for non-structured factors take the standard updates for Z, ignoring U

        # Precompute terms to speed up GPU computation (only required for non-structured updates)
        foo = gpu_utils.array(s.zeros((N, K)))
        precomputed_bar = gpu_utils.array(s.zeros((N, K)))
        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            foo += weights[m] * gpu_utils.dot(tau_gpu,
                                              gpu_utils.array(W[m]["E2"]))
            bar_tmp1 = gpu_utils.array(W[m]["E"])
            bar_tmp2 = tau_gpu * gpu_utils.array(Y[m])
            precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
        foo = gpu_utils.asnumpy(foo)

        # Calculate updates
        for k in range(K):
            unstructured = (Sigma['cov'][k] == np.eye(N)).all(
            )  # TODO: Are there better ways to choose between sparse and non-sparse inference depending on factor smoothness?
            if unstructured:  # updates according to q(z) without sparse inference
                bar = gpu_utils.array(s.zeros((N, )))
                tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k])
                for m in range(M):
                    tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)

                    bar_tmp1 = gpu_utils.array(W[m]["E"][:, k])
                    bar_tmp2 = gpu_utils.array(
                        tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2))

                    bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1)
                bar += precomputed_bar[:, k]
                bar = gpu_utils.asnumpy(bar)

                Qvar[:, k] = 1. / (1 + foo[:, k])
                Qmean[:, k] = Qvar[:, k] * bar
            else:  # updates according to p(z|u)
                SigmaZZ = Sigma['cov'][k]
                SigmaZU = SigmaZZ[:, self.idx_inducing]
                p_cov_inv = Sigma['inv'][k, :, :]
                mat = gpu_utils.dot(SigmaZU, p_cov_inv)

                Qmean[:, k] = gpu_utils.dot(mat, U['E'][:, k])
                for n in range(N):
                    exp_var = SigmaZZ[n, n] - gpu_utils.dot(
                        gpu_utils.dot(SigmaZZ[n, self.idx_inducing],
                                      p_cov_inv), SigmaZZ[self.idx_inducing,
                                                          n])
                    var_exp = gpu_utils.dot(
                        gpu_utils.dot(mat[n, :], U['cov'][k, :, :]),
                        mat[n, :].transpose())
                    Qvar[n, k] = exp_var + var_exp

        # Save updated parameters of the Q distribution
        return {'Qmean': Qmean, 'Qvar': Qvar}
Example #18
0
    def calc_gradient_Sigma(self, par, lidx, k):
        """
        Method to calculate gradients of covariance matrix wrt to hyperparameters
        """
        self.zeta[k] = par[0]
        self.Kc.set_gridix(lidx, k)

        # if required set group parameters
        if self.model_groups:
            sigma = par[1]
            x = par[2:]

            assert len(x) == self.Kg.rank * self.G, \
                "Length of x incorrect: Is %s, should be  %s * %s" % (len(x), self.Kg.rank, self.G)
            x = x.reshape(self.Kg.rank, self.G)
            self.Kg.set_parameters(x=x, sigma=sigma, k=k,
                                   spectral_decomp=self.kronecker)  # set and recalculate group kernel (matrix and spectral decomposition if Kronecker

        # get kernel matrices
        if self.kronecker: # TODO avoid building the full matrix use V and D below
            Vc, Dc = self.Kc.get_kernel_components_k(k)
            Kc = gpu_utils.dot(gpu_utils.dot(Vc, np.diag(Dc)), Vc.transpose())
            Vg, Dg = self.Kg.get_kernel_components_k(k)
            Kg = gpu_utils.dot(gpu_utils.dot(Vg, np.diag(Dg)), Vg.transpose())

            # gradient wrt zeta
            gradient_Sigma_zeta = - np.kron(Kg, Kc) + np.eye(self.Nu)

        else:
            Kc = self.Kc.Kmat[self.Kc.get_best_lidx(k),:,:]
            Kg = self.Kg.Kmat[k,:,:]

            # gradient wrt zeta
            if self.model_groups:
                gradient_Sigma_zeta = - Kc[self.covidx, :][:,self.covidx] *\
                                      Kg[self.groupsidx, :][:, self.groupsidx] +\
                                      np.eye(self.Nu)
            else:
                gradient_Sigma_zeta = - Kc + np.eye(self.Nu)

        if self.model_groups:
            # gradient wrt sigma
            Z = np.dot(x.transpose(), x) # diagonal can be neglected as set to 1, gradient 0
            Gmat_unscaled = Z + sigma * np.eye(self.G) # this is Kg before scaled to correlation
            Gmat_unscaled_sqrt = np.sqrt(Gmat_unscaled)
            N = np.outer(np.diag(Gmat_unscaled_sqrt), np.diag(Gmat_unscaled_sqrt))
            # N = np.array([[Gmat_unscaled_sqrt[g,g] * Gmat_unscaled_sqrt[h,h] for g in range(self.G)] for h in range(self.G)])
            tmp = -0.5 * np.outer(np.diag(Gmat_unscaled_sqrt), 1/np.diag(Gmat_unscaled_sqrt))
            AN_sigma = tmp + tmp.transpose()
            # AN_sigma = np.array([[-0.5 * Gmat_unscaled_sqrt[g,g] / Gmat_unscaled_sqrt[h,h] -0.5 * Gmat_unscaled_sqrt[h,h] / Gmat_unscaled_sqrt[g,g] for g in range(self.G)] for h in range(self.G)])
            N2 = N**2
            # N2  = np.array([[Gmat_unscaled[g,g] * Gmat_unscaled[h,h] for g in range(self.G)]for h in range(self.G)])
            # AZ_sigma = 0
            diffGmat_sigma = (1-np.eye(self.G)) * Z * AN_sigma / N2
            if self.kronecker:
                gradient_Sigma_sigma = (1 - self.zeta[k]) * np.kron(diffGmat_sigma, Kc)
            else:
                gradient_Sigma_sigma = (1 - self.zeta[k]) *\
                                       diffGmat_sigma[self.groupsidx, :][:,self.groupsidx] \
                                       * Kc[self.covidx, :][:, self.covidx]

            # gradient wrt x
            gradient_Sigma_x = []
            for r in range(self.Kg.rank):
                drg = - 1/np.diag(Gmat_unscaled_sqrt) * x[r,:]
                for g in range(self.G):
                    tmp = np.outer(np.diag(Gmat_unscaled_sqrt), drg[g] * np.eye(self.G)[g, :])
                    AN_x = tmp + tmp.transpose()
                    tmp = np.outer(x[r, :], np.eye(self.G)[g, :])
                    AZ_x = tmp + tmp.transpose()
                    diffGmat_x = (1-np.eye(self.G)) * (Z * AN_x + AZ_x * N) / N2
                    if self.kronecker:
                        grad = (1 - self.zeta[k]) * np.kron(diffGmat_x, Kc)
                    else:
                        grad = (1 - self.zeta[k]) * diffGmat_x[self.groupsidx, :][:,self.groupsidx] *  Kc[self.covidx, :][:,self.covidx]
                    gradient_Sigma_x.append(grad)


            # drg = [[-0.5 * 1/ Gmat_unscaled_sqrt[g,g] * 2 * x[r, g] for r in range(self.Kg.rank)] for g in range(self.G)]
            # # below diagonal can be neglected as set to 1, gradient 0
            # AN_x = [[np.outer(np.diag(Gmat_unscaled_sqrt), drg[g][r] * np.eye(self.G)[g, :]) + np.outer(np.diag(Gmat_unscaled_sqrt), drg[g][r] * np.eye(self.G)[g, :]).transpose() for r in range(self.Kg.rank)] for g in range(self.G)]
            # AZ_x = [[np.outer(x[r, :], np.eye(self.G)[g, :]) + np.outer(x[r, :],np.eye(self.G)[g,:]).transpose() for r in range(self.Kg.rank)] for g in range(self.G)]
            # diffGmat_x  = [[(1-np.eye(self.G)) * (Z * AN_x[g][r] + AZ_x[g][r] * N) / N2 for r in range(self.Kg.rank)] for g in range(self.G)]
            # gradient_Sigma_x = [(1 - self.zeta[k]) *
            #                     diffGmat_x[g][r][self.groupsidx, :][:,self.groupsidx] *
            #                     Kc[self.covidx, :][:,self.covidx]
            #                     for r in range(self.Kg.rank) for g in range(self.G)]
        else:
            gradient_Sigma_sigma = None
            gradient_Sigma_x = None

        return gradient_Sigma_zeta, gradient_Sigma_sigma, gradient_Sigma_x
Example #19
0
    def _updateParameters(self, Y, W, tau, mask, Alpha, Qmean_T1, Qvar_T1,
                          Qtheta, SZ, theta_lnE, theta_lnEInv):
        """ Hidden method to compute parameter updates """

        # Mask matrices
        for m in range(len(Y)):
            tau[m][mask[m]] = 0.

        # Precompute terms to speed up GPU computation
        N = Qmean_T1.shape[0]
        M = len(Y)
        K = self.dim[1]

        weights = [1] * M
        if self.weight_views and M > 1:
            total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum()
            # weights = [(total_w-Y[m].shape[1])/total_w * M / (M-1) for m in range(M)]
            weights = np.asarray(
                [total_w / (M * Y[m].shape[1]) for m in range(M)])
            weights = weights / weights.sum() * M

        # term4_tmp1 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ]
        # term4_tmp2 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ]
        # term4_tmp3 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ]
        # for m in range(M):
        #     tau_gpu = gpu_utils.array(tau[m])
        #     Y_gpu = gpu_utils.array(Y[m])
        #     for k in range(K):
        #         Wk_gpu = gpu_utils.array(W[m]["E"][:,k])
        #         WWk_gpu = gpu_utils.array(W[m]["E2"][:,k])
        #         term4_tmp1[k] += gpu_utils.dot(tau_gpu*Y_gpu, Wk_gpu)
        #         term4_tmp3[k] += gpu_utils.dot(tau_gpu, WWk_gpu)
        # del tau_gpu, Y_gpu, Wk_gpu, WWk_gpu

        term4_tmp1 = gpu_utils.array(s.zeros((N, K)) + Alpha)
        term4_tmp2 = gpu_utils.array(s.zeros((N, K)) + Alpha)
        term4_tmp3 = gpu_utils.array(s.zeros((N, K)) + Alpha)

        for m in range(M):
            tau_gpu = gpu_utils.array(tau[m])
            Y_gpu = gpu_utils.array(Y[m])
            W_gpu = gpu_utils.array(W[m]["E"])
            WW_gpu = gpu_utils.array(W[m]["E2"])
            term4_tmp1 += weights[m] * gpu_utils.dot(tau_gpu * Y_gpu, W_gpu)
            term4_tmp3 += weights[m] * gpu_utils.dot(tau_gpu, WW_gpu)
        del tau_gpu, Y_gpu, W_gpu, WW_gpu

        # Update each latent variable in turn (notice that the update of Z[,k] depends on the other values of Z!)
        for k in range(K):
            term1 = (theta_lnE - theta_lnEInv)[:, k]
            term2 = 0.5 * s.log(Alpha[:, k])

            for m in range(M):
                tau_gpu = gpu_utils.array(tau[m])
                Wk_gpu = gpu_utils.array(W[m]["E"][:, k])
                term4_tmp2_tmp = (tau_gpu * gpu_utils.dot(
                    gpu_utils.array(SZ[:, s.arange(K) != k]),
                    (Wk_gpu *
                     gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)))).sum(
                         axis=1)
                term4_tmp2[:, k] += weights[m] * term4_tmp2_tmp
                del tau_gpu, Wk_gpu, term4_tmp2_tmp

            # term4_tmp3[k] += Alpha[:,k]
            term3 = gpu_utils.asnumpy(0.5 * gpu_utils.log(term4_tmp3[:, k]))
            term4 = gpu_utils.asnumpy(0.5 * gpu_utils.divide(
                gpu_utils.square(term4_tmp1[:, k] - term4_tmp2[:, k]),
                term4_tmp3[:, k]))

            # Update S
            # NOTE there could be some precision issues in T --> loads of 1s in result
            Qtheta[:, k] = 1. / (1. + s.exp(-(term1 + term2 - term3 + term4)))
            Qtheta[:, k] = np.nan_to_num(Qtheta[:, k])

            # Update Z
            Qvar_T1[:, k] = gpu_utils.asnumpy(1. / term4_tmp3[:, k])
            Qmean_T1[:,
                     k] = Qvar_T1[:, k] * gpu_utils.asnumpy(term4_tmp1[:, k] -
                                                            term4_tmp2[:, k])

            # Update Expectations for the next iteration
            SZ[:, k] = Qtheta[:, k] * Qmean_T1[:, k]

        return {'mean_B1': Qmean_T1, 'var_B1': Qvar_T1, 'theta': Qtheta}