Beispiel #1
0
 def update_qX_gradients(self):
     delta = -self.variationalterm.comp_value(self.X)
     if self.mpi_comm is not None:
         delta = reduceArrays([np.float64(delta)],self.mpi_comm, self.mpi_root)[0]
         if self.mpi_comm.rank != self.mpi_root: delta = 0
     self._log_marginal_likelihood += delta
     self.variationalterm.update_gradients(self.X)
Beispiel #2
0
 def update_qX_gradients(self):
     if self.psicov:
         self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations_psicov(
                                             variational_posterior=self.X,
                                             Z=self.Z,
                                             dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                             dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                             dL_dpsicov=self.grad_dict['dL_dpsicov'])
     else:
         self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(
                                             variational_posterior=self.X,
                                             Z=self.Z,
                                             dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                             dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                             dL_dpsi2=self.grad_dict['dL_dpsi2'])
     delta = -self.variationalterm.comp_value(self.X)
     if self.mpi_comm is not None:
         delta = reduceArrays([np.float64(delta)],self.mpi_comm, self.mpi_root)[0]
         if self.mpi_comm.rank != self.mpi_root: delta = 0.
     self._log_marginal_likelihood += delta
     self.variationalterm.update_gradients(self.X)
Beispiel #3
0
 def update_qX_gradients(self):
     if self.psicov:
         self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations_psicov(
                                             variational_posterior=self.X,
                                             Z=self.Z,
                                             dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                             dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                             dL_dpsicov=self.grad_dict['dL_dpsicov'])
     else:
         self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(
                                             variational_posterior=self.X,
                                             Z=self.Z,
                                             dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                             dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                             dL_dpsi2=self.grad_dict['dL_dpsi2'])
     delta = -self.variationalterm.comp_value(self.X)
     if self.mpi_comm is not None:
         delta = reduceArrays([np.float64(delta)],self.mpi_comm, self.mpi_root)[0]
         if self.mpi_comm.rank != self.mpi_root: delta = 0.
     self._log_marginal_likelihood += delta
     self.variationalterm.update_gradients(self.X)
Beispiel #4
0
    def _inference_vardtc(self):
        if self.svi:
            from GPy.util.linalg import tdot
            self.qU_var = tdot(self.qU_W)+np.eye(self.Z.shape[0])*self.qU_a
            self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.qU_mean , self.qU_var, Kuu_sigma=self.Kuu_sigma)
            
            if self.mpi_comm is None or (self.mpi_comm is not None and self.mpi_comm.rank==self.mpi_root):
                KL, dKL_dqU_mean, dKL_dqU_var, dKL_dKuu = self.inference_method.comp_KL_qU(self.qU_mean ,self.qU_var)
                self._log_marginal_likelihood += -KL*self.qU_ratio        
                self.grad_dict['dL_dqU_mean'] += -dKL_dqU_mean*self.qU_ratio
                self.grad_dict['dL_dqU_var'] += -dKL_dqU_var*self.qU_ratio
                self.grad_dict['dL_dKmm'] += -dKL_dKuu*self.qU_ratio
        else:
            self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, Kuu_sigma=self.Kuu_sigma)

        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        dL_dKmm = self.grad_dict['dL_dKmm']
        if self.mpi_comm is None or (self.mpi_comm is not None and self.mpi_comm.rank==self.mpi_root):
            self.Kuu_sigma.gradient = np.diag(dL_dKmm)

        if isinstance(self.X, VariationalPosterior):
            #gradients wrt kernel
            
            if self.psicov:
                self.kern.update_gradients_expectations_psicov(variational_posterior=self.X,
                                                        Z=self.Z,
                                                        dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                                        dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                                        dL_dpsicov=self.grad_dict['dL_dpsicov'])
            else:
                self.kern.update_gradients_expectations(variational_posterior=self.X,
                                                        Z=self.Z,
                                                        dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                                        dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                                        dL_dpsi2=self.grad_dict['dL_dpsi2'])
            kerngrad = self.kern.gradient.copy()
            if self.mpi_comm is None:
                self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                kerngrad += self.kern.gradient.copy()
                self.kern.gradient = kerngrad
            else:
                kerngrad = reduceArrays([kerngrad], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank==self.mpi_root:
                    self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                    kerngrad += self.kern.gradient.copy()
                    self.kern.gradient = kerngrad

            #gradients wrt Z
            if self.psicov:
                self.Z.gradient = self.kern.gradients_Z_expectations_psicov(
                                   self.grad_dict['dL_dpsi0'],
                                   self.grad_dict['dL_dpsi1'],
                                   self.grad_dict['dL_dpsicov'],
                                   Z=self.Z,
                                   variational_posterior=self.X)
            else:
                self.Z.gradient = self.kern.gradients_Z_expectations(
                                   self.grad_dict['dL_dpsi0'],
                                   self.grad_dict['dL_dpsi1'],
                                   self.grad_dict['dL_dpsi2'],
                                   Z=self.Z,
                                   variational_posterior=self.X)
            if self.mpi_comm is None:
                self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)
            else:
                self.Z.gradient =  reduceArrays([self.Z.gradient], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank == self.mpi_root:
                    self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)
        else:
            #gradients wrt kernel
            self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
            kerngrad = self.kern.gradient.copy()
            self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
            kerngrad += self.kern.gradient
            if self.mpi_comm is None:
                self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                self.kern.gradient += kerngrad
            else:
                kerngrad = reduceArrays([kerngrad], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank==self.mpi_root:
                    self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                    kerngrad += self.kern.gradient.copy()
                    self.kern.gradient = kerngrad
            #gradients wrt Z
            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
            if self.mpi_comm is None:
                self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)
            else:
                self.Z.gradient =  reduceArrays([self.Z.gradient], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank == self.mpi_root:
                    self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)

        if self.svi:
            self.qU_mean.gradient = self.grad_dict['dL_dqU_mean']
            self.qU_W.gradient = (self.grad_dict['dL_dqU_var']+self.grad_dict['dL_dqU_var'].T).dot(self.qU_W)
            self.qU_a.gradient = np.diag(self.grad_dict['dL_dqU_var']).sum()
Beispiel #5
0
    def _inference_vardtc(self):
        if self.svi:
            from GPy.util.linalg import tdot
            self.qU_var = tdot(self.qU_W)+np.eye(self.Z.shape[0])*self.qU_a
            self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.qU_mean , self.qU_var, Kuu_sigma=self.Kuu_sigma)
            
            if self.mpi_comm is None or (self.mpi_comm is not None and self.mpi_comm.rank==self.mpi_root):
                KL, dKL_dqU_mean, dKL_dqU_var, dKL_dKuu = self.inference_method.comp_KL_qU(self.qU_mean ,self.qU_var)
                self._log_marginal_likelihood += -KL*self.qU_ratio        
                self.grad_dict['dL_dqU_mean'] += -dKL_dqU_mean*self.qU_ratio
                self.grad_dict['dL_dqU_var'] += -dKL_dqU_var*self.qU_ratio
                self.grad_dict['dL_dKmm'] += -dKL_dKuu*self.qU_ratio
        else:
            self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, Kuu_sigma=self.Kuu_sigma if hasattr(self, 'Kuu_sigma') else None)

        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        dL_dKmm = self.grad_dict['dL_dKmm']
        if (self.mpi_comm is None or (self.mpi_comm is not None and self.mpi_comm.rank==self.mpi_root)) and (hasattr(self, 'Kuu_sigma') and self.Kuu_sigma is not None):
            self.Kuu_sigma.gradient = np.diag(dL_dKmm)

        if isinstance(self.X, VariationalPosterior):
            #gradients wrt kernel
            
            if self.psicov:
                self.kern.update_gradients_expectations_psicov(variational_posterior=self.X,
                                                        Z=self.Z,
                                                        dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                                        dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                                        dL_dpsicov=self.grad_dict['dL_dpsicov'])
            else:
                self.kern.update_gradients_expectations(variational_posterior=self.X,
                                                        Z=self.Z,
                                                        dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                                        dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                                        dL_dpsi2=self.grad_dict['dL_dpsi2'])
            kerngrad = self.kern.gradient.copy()
            if self.mpi_comm is None:
                self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                kerngrad += self.kern.gradient.copy()
                self.kern.gradient = kerngrad
            else:
                kerngrad = reduceArrays([kerngrad], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank==self.mpi_root:
                    self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                    kerngrad += self.kern.gradient.copy()
                    self.kern.gradient = kerngrad

            #gradients wrt Z
            if self.psicov:
                self.Z.gradient = self.kern.gradients_Z_expectations_psicov(
                                   self.grad_dict['dL_dpsi0'],
                                   self.grad_dict['dL_dpsi1'],
                                   self.grad_dict['dL_dpsicov'],
                                   Z=self.Z,
                                   variational_posterior=self.X)
            else:
                self.Z.gradient = self.kern.gradients_Z_expectations(
                                   self.grad_dict['dL_dpsi0'],
                                   self.grad_dict['dL_dpsi1'],
                                   self.grad_dict['dL_dpsi2'],
                                   Z=self.Z,
                                   variational_posterior=self.X)
            if self.mpi_comm is None:
                self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)
            else:
                self.Z.gradient =  reduceArrays([self.Z.gradient], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank == self.mpi_root:
                    self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)
        else:
            #gradients wrt kernel
            self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
            kerngrad = self.kern.gradient.copy()
            self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
            kerngrad += self.kern.gradient
            if self.mpi_comm is None:
                self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                self.kern.gradient += kerngrad
            else:
                kerngrad = reduceArrays([kerngrad], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank==self.mpi_root:
                    self.kern.update_gradients_full(dL_dKmm, self.Z, None)
                    kerngrad += self.kern.gradient.copy()
                    self.kern.gradient = kerngrad
            #gradients wrt Z
            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
            if self.mpi_comm is None:
                self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)
            else:
                self.Z.gradient =  reduceArrays([self.Z.gradient], self.mpi_comm, self.mpi_root)[0]
                if self.mpi_comm.rank == self.mpi_root:
                    self.Z.gradient += self.kern.gradients_X(dL_dKmm, self.Z)

        if self.svi:
            self.qU_mean.gradient = self.grad_dict['dL_dqU_mean']
            self.qU_W.gradient = (self.grad_dict['dL_dqU_var']+self.grad_dict['dL_dqU_var'].T).dot(self.qU_W)
            self.qU_a.gradient = np.diag(self.grad_dict['dL_dqU_var']).sum()
Beispiel #6
0
    def inference_nonroot(self,
                          kern,
                          X,
                          Z,
                          likelihood,
                          Y,
                          Y_metadata=None,
                          Lm=None,
                          dL_dKmm=None):

        num_data, output_dim = Y.shape
        num_data_total = allReduceArrays([np.int32(num_data)],
                                         self.mpi_comm)[0]

        input_dim = Z.shape[0]
        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)
        beta = 1. / np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(
            kern, X, Z, Y, beta, uncertain_inputs)

        flag = np.zeros((1, ), dtype=np.int32)
        self.mpi_comm.Bcast(flag, root=self.root)
        if flag[0] == 1: raise LinAlgError('Linalg error!')

        LmInv, LLInv = np.empty((input_dim, input_dim)).T, np.empty(
            (input_dim, input_dim)).T
        broadcastArrays([LmInv, LLInv], self.mpi_comm, self.root)
        LmLLInv = LLInv.dot(LmInv)
        b = psi1Y.dot(LmLLInv.T)
        v = b.dot(LmLLInv)

        if psi1S is not None:
            psi1SLLinv = psi1S.dot(LmLLInv.T)
            bbt_sum = np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT_sum = tdot(psi1SLLinv.T)
            reduceArrays([bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum], self.mpi_comm,
                         self.root)
            psi1SP = psi1SLLinv.dot(LmLLInv)

        dL_dpsi2R = np.empty((input_dim, input_dim))
        broadcastArrays([dL_dpsi2R], self.mpi_comm, self.root)

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data, ))) / 2.

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            dL_dpsi1 = beta * (np.dot(m, v) + Shalf[:, None] * psi1SP)
        else:
            dL_dpsi1 = beta * np.dot(Y, v)

        if uncertain_inputs:
            dL_dpsi2 = beta * dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1, dL_dpsi2R) * 2.
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': None,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': None
            }
        else:
            grad_dict = {
                'dL_dKmm': None,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': None
            }
        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            psi1LmiLLi = psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m * beta + psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta / -2. + np.square(psi1LmiLLi).sum(
                axis=1) / 2

        return None, 0, grad_dict
Beispiel #7
0
    def inference_root(self,
                       kern,
                       X,
                       Z,
                       likelihood,
                       Y,
                       Kuu_sigma=None,
                       Y_metadata=None,
                       Lm=None,
                       dL_dKmm=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]
        num_data_total = allReduceArrays([np.int32(num_data)],
                                         self.mpi_comm)[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(
            kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        try:
            Kmm = kern.K(Z).copy()
            if Kuu_sigma is not None:
                diag.add(Kmm, Kuu_sigma)
            else:
                diag.add(Kmm, self.const_jitter)
            Lm = jitchol(Kmm)

            LmInv = dtrtri(Lm)
            LmInvPsi2LmInvT = LmInv.dot(psi2.dot(LmInv.T))

            Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
            LL = jitchol(Lambda)
            LLInv = dtrtri(LL)
            flag = np.zeros((1, ), dtype=np.int32)
            self.mpi_comm.Bcast(flag, root=self.root)
        except LinAlgError as e:
            flag = np.ones((1, ), dtype=np.int32)
            self.mpi_comm.Bcast(flag, root=self.root)
            raise e

        broadcastArrays([LmInv, LLInv], self.mpi_comm, self.root)
        LmLLInv = LLInv.dot(LmInv)

        logdet_L = 2. * np.sum(np.log(np.diag(LL)))
        b = psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)

        if psi1S is not None:
            psi1SLLinv = psi1S.dot(LmLLInv.T)
            bbt_sum = np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT_sum = tdot(psi1SLLinv.T)
            bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum = reduceArrays(
                [bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum], self.mpi_comm,
                self.root)
            bbt += bbt_sum
            LLinvPsi1TYYTPsi1LLinvT += LLinvPsi1TYYTPsi1LLinvT_sum
            psi1SP = psi1SLLinv.dot(LmLLInv)
        tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT +
                           output_dim * np.eye(input_dim)).dot(LLInv)
        dL_dpsi2R = LmInv.T.dot(tmp +
                                output_dim * np.eye(input_dim)).dot(LmInv) / 2.
        broadcastArrays([dL_dpsi2R], self.mpi_comm, self.root)

        #======================================================================
        # Compute log-likelihood
        #======================================================================
        logL_R = -num_data_total * np.log(beta)
        logL = -(output_dim * (num_data_total * log_2_pi + logL_R + psi0 -
                               np.trace(LmInvPsi2LmInvT)) + YRY -
                 bbt) / 2. - output_dim * logdet_L / 2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = dL_dpsi2R - output_dim * LmInv.T.dot(LmInvPsi2LmInvT).dot(
            LmInv) / 2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        wd_inv = backsub_both_sides(
            Lm,
            np.eye(input_dim) -
            backsub_both_sides(LL, np.identity(input_dim), transpose='left'),
            transpose='left')
        post = Posterior(woodbury_inv=wd_inv,
                         woodbury_vector=v.T,
                         K=Kmm,
                         mean=None,
                         cov=None,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = (YRY * beta + beta * output_dim * psi0 - num_data_total *
                      output_dim * beta) / 2. - beta * (dL_dpsi2R * psi2).sum(
                      ) - beta * np.trace(LLinvPsi1TYYTPsi1LLinvT)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data, ))) / 2.

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            dL_dpsi1 = beta * (np.dot(m, v) + Shalf[:, None] * psi1SP)
        else:
            dL_dpsi1 = beta * np.dot(Y, v)

        if uncertain_inputs:
            dL_dpsi2 = beta * dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1, dL_dpsi2R) * 2.
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            psi1LmiLLi = psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m * beta + psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta / -2. + np.square(psi1LmiLLi).sum(
                axis=1) / 2

        return post, logL, grad_dict
Beispiel #8
0
    def inference_root(self, kern, X, Z, likelihood, Y, Kuu_sigma=None, Y_metadata=None, Lm=None, dL_dKmm=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]
        num_data_total = allReduceArrays([np.int32(num_data)], self.mpi_comm)[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1./np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        try:
            Kmm = kern.K(Z).copy()
            if Kuu_sigma is not None:
                diag.add(Kmm, Kuu_sigma)
            else:
                diag.add(Kmm, self.const_jitter)
            Lm = jitchol(Kmm)
    
            LmInv = dtrtri(Lm)
            LmInvPsi2LmInvT = LmInv.dot(psi2.dot(LmInv.T))
                
            Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
            LL = jitchol(Lambda)        
            LLInv = dtrtri(LL)
            flag = np.zeros((1,),dtype=np.int32)
            self.mpi_comm.Bcast(flag,root=self.root)
        except LinAlgError as e:
            flag = np.ones((1,),dtype=np.int32)
            self.mpi_comm.Bcast(flag,root=self.root)
            raise e
            
        broadcastArrays([LmInv, LLInv],self.mpi_comm,  self.root)
        LmLLInv = LLInv.dot(LmInv)
        
        logdet_L = 2.*np.sum(np.log(np.diag(LL)))
        b  = psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
        
        if psi1S is not None:
            psi1SLLinv = psi1S.dot(LmLLInv.T)
            bbt_sum = np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT_sum = tdot(psi1SLLinv.T)
            bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum = reduceArrays([bbt_sum,  LLinvPsi1TYYTPsi1LLinvT_sum], self.mpi_comm, self.root)
            bbt += bbt_sum
            LLinvPsi1TYYTPsi1LLinvT += LLinvPsi1TYYTPsi1LLinvT_sum
            psi1SP = psi1SLLinv.dot(LmLLInv)
        tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
        dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
        broadcastArrays([dL_dpsi2R], self.mpi_comm, self.root)

        #======================================================================
        # Compute log-likelihood
        #======================================================================
        logL_R = -num_data_total*np.log(beta)
        logL = -(output_dim*(num_data_total*log_2_pi+logL_R+psi0-np.trace(LmInvPsi2LmInvT))+YRY- bbt)/2.-output_dim*logdet_L/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm =  dL_dpsi2R - output_dim* LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
        post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v.T, K=Kmm, mean=None, cov=None, K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = (YRY*beta + beta*output_dim*psi0 - num_data_total*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data,)))/2.

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            dL_dpsi1 = beta*(np.dot(m,v)+Shalf[:,None]*psi1SP)
        else:
            dL_dpsi1 = beta*np.dot(Y,v)

        if uncertain_inputs:
            dL_dpsi2 = beta* dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1,dL_dpsi2R)*2.
            dL_dpsi2 = None
        
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}
            
        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            psi1LmiLLi = psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m*beta+ psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta/-2.+ np.square(psi1LmiLLi).sum(axis=1)/2

        return post, logL, grad_dict
Beispiel #9
0
    def inference_nonroot(self, kern, X, Z, likelihood, Y,Y_metadata=None, Lm=None, dL_dKmm=None):
        
        num_data, output_dim = Y.shape
        num_data_total = allReduceArrays([np.int32(num_data)], self.mpi_comm)[0]
        
        input_dim = Z.shape[0]
        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)
        beta = 1./np.fmax(likelihood.variance, 1e-6)
        
        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)

        flag = np.zeros((1,),dtype=np.int32)
        self.mpi_comm.Bcast(flag,root=self.root)
        if flag[0] == 1: raise LinAlgError('Linalg error!')

        LmInv, LLInv = np.empty((input_dim, input_dim)).T, np.empty((input_dim, input_dim)).T
        broadcastArrays([LmInv, LLInv], self.mpi_comm, self.root)
        LmLLInv = LLInv.dot(LmInv)
        b  = psi1Y.dot(LmLLInv.T)
        v = b.dot(LmLLInv)
        
        if psi1S is not None:
            psi1SLLinv = psi1S.dot(LmLLInv.T)
            bbt_sum = np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT_sum = tdot(psi1SLLinv.T)
            reduceArrays([bbt_sum,  LLinvPsi1TYYTPsi1LLinvT_sum], self.mpi_comm, self.root)
            psi1SP = psi1SLLinv.dot(LmLLInv)
        
        dL_dpsi2R = np.empty((input_dim, input_dim))
        broadcastArrays([dL_dpsi2R], self.mpi_comm, self.root)
            
        dL_dpsi0 = -output_dim * (beta * np.ones((num_data,)))/2.

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            dL_dpsi1 = beta*(np.dot(m,v)+Shalf[:,None]*psi1SP)
        else:
            dL_dpsi1 = beta*np.dot(Y,v)

        if uncertain_inputs:
            dL_dpsi2 = beta* dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1,dL_dpsi2R)*2.
            dL_dpsi2 = None
        
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': None,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':None}
        else:
            grad_dict = {'dL_dKmm': None,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':None}
        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            psi1LmiLLi = psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m*beta+ psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta/-2.+ np.square(psi1LmiLLi).sum(axis=1)/2
        
        return None, 0, grad_dict