def shared_computations(self): #essential computations self.Kmmi, Lm, Lmi, self.log_det_Kmm = GPy.util.linalg.pdinv(self.Kmm) self.psi1Kmmi = np.dot(self.psi1, self.Kmmi) if not self.natgrads: if self.S_param is 'chol': L = choleskies.flat_to_triang(self.q_of_U_choleskies) self.q_of_U_covariance = np.einsum('ijk,ljk->ilk', L, L) self.q_of_U_precision = choleskies.multiple_dpotri(L) self.q_of_U_cov_logdet = 2.*np.array([np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(self.output_dim)]) uuT = np.dot(self.q_of_U_mean, self.q_of_U_mean.T) + self.q_of_U_covariance.sum(-1) self.psi1KmmiS = np.einsum('ij,jkl->ikl', self.psi1Kmmi, self.q_of_U_covariance) # intermediate computation elif self.S_param is 'diag': self.q_of_U_cov_logdet = np.sum(np.log(self.q_of_U_diags),0) uuT = np.dot(self.q_of_U_mean, self.q_of_U_mean.T) + np.diag(self.q_of_U_diags.sum(-1)) self.q_of_U_precision = np.dstack([np.diag(1./x) for x in 1*self.q_of_U_diags.T]) self.psi1KmmiS = self.psi1Kmmi[:,:,None]*self.q_of_U_diags[None,:,:] self.KiuuT = np.dot(self.Kmmi, uuT) self.KiuuTKi = self.KiuuT.dot(self.Kmmi) self.KmmiPsi2 = np.dot(self.Kmmi, self.psi2) self.KmmiPsi2Kmmi = self.KmmiPsi2.dot(self.Kmmi) #this thing is only used for prediction self.woodbury_inv = None
def gradient_updates(self): """set the derivatives in the kernel and in Z""" self.kern.update_gradients_full(self.dL_dKmm, self.Z) g = self.kern._gradient_array_.copy() # self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:], self.q_of_X_in.shape[0], axis=0) self.kern.update_gradients_expectations(Z=self.Z, variational_posterior=self.q_of_X_in, dL_dpsi0=self.dL_dpsi0, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2) self.kern._gradient_array_ += g self.Z.gradient = self.kern.gradients_X(self.dL_dKmm, self.Z) self.Z.gradient += self.kern.gradients_Z_expectations(Z=self.Z, variational_posterior=self.q_of_X_in, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2, dL_dpsi0=self.dL_dpsi0) if not self.natgrads: self.q_of_U_mean.gradient = self.dL_dEu + 2.*np.einsum('ijk,jk->ik', self.dL_duuT, self.q_of_U_mean) if self.S_param is 'chol': L = choleskies.flat_to_triang(self.q_of_U_choleskies) dL_dchol = 2.*np.einsum('ijk,jlk->ilk', self.dL_duuT, L) self.q_of_U_choleskies.gradient = choleskies.triang_to_flat(dL_dchol) else: self.q_of_U_diags.gradient = np.vstack([np.diag(self.dL_duuT[:,:,i]) for i in xrange(self.output_dim)]).T
def gradient_updates(self): #note that the kerel gradients are a little different because there's no q(X), just a fixed X self.kern.update_gradients_full(self.dL_dKmm, self.Z) g = self.kern._gradient_array_.copy() dL_dKnm = self.dL_dpsi1 + 2.*self.psi1.dot(self.dL_dpsi2) self.kern.update_gradients_full(dL_dKnm, self.X, self.Z) g += self.kern._gradient_array_.copy() self.kern.update_gradients_diag(self.dL_dpsi0, self.X) self.kern._gradient_array_ += g self.Z.gradient = self.kern.gradients_X(self.dL_dKmm, self.Z) self.Z.gradient += self.kern.gradients_X(dL_dKnm.T, self.Z, self.X) self.q_of_U_mean.gradient = self.dL_dEu + 2.*np.einsum('ijk,jk->ik',self.dL_duuT, self.q_of_U_mean) if self.S_param is 'chol': L = choleskies.flat_to_triang(self.q_of_U_choleskies) dL_dchol = 2.*np.einsum('ijk,jlk->ilk', self.dL_duuT, L) self.q_of_U_choleskies.gradient = choleskies.triang_to_flat(dL_dchol) else: self.q_of_U_diags.gradient = np.vstack([np.diag(self.dL_duuT[:,:,i]) for i in xrange(self.output_dim)]).T