def parameters_changed(self): f_index = self.Y_metadata['function_index'].flatten() d_index = self.Y_metadata['d_index'].flatten() T = len(self.likelihood.likelihoods_list) self.batch_scale = [] [self.batch_scale.append(float(self.Xmulti_all[t].shape[0])/float(self.Xmulti[t].shape[0])) for t in range(T)] self._log_marginal_likelihood, self.gradients, self.posteriors, _ = self.inference_method.inference(q_u_means=self.q_u_means, q_u_chols=self.q_u_chols, X=self.Xmulti, Y=self.Ymulti, Z=self.Z, Zold = self.Zold, kern_list_old = self.kern_list_old, kern_list=self.kern_list, likelihood=self.likelihood, B_list=self.B_list, B_list_old=self.B_list_old, phi_means=self.phi_means, phi_chols=self.phi_chols, Y_metadata=self.Y_metadata, batch_scale=self.batch_scale) D = self.likelihood.num_output_functions(self.Y_metadata) N = self.X.shape[0] M = self.num_inducing Z_grad = np.zeros_like(self.Z.values) for q, kern_q in enumerate(self.kern_list): # Update the variational parameter gradients: self.q_u_means[:, q:q + 1].gradient = self.gradients['dL_dmu_u'][q] self.q_u_chols[:, q:q + 1].gradient = self.gradients['dL_dL_u'][q] # Update kernel hyperparameters: lengthscale and variance kern_q.update_gradients_full(self.gradients['dL_dKmm'][q], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim]) grad = kern_q.gradient.copy() # Update kernel hyperparameters: W + kappa Kffdiag = [] KuqF = [] for d in range(D): Kffdiag.append(self.gradients['dL_dKdiag'][q][d]) KuqF.append(self.gradients['dL_dKmn'][q][d] * kern_q.K(self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],self.Xmulti[f_index[d]])) #main correction consisted of building Kffdiag by multiplying also kern_q.Kdiag #Kffdiag.append(kern_q.Kdiag(self.Xmulti[f_index[d]]) * self.gradients['dL_dKdiag'][q][d]) # Juanjo's correction #KuqF.append(kern_q.K(self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]]) * self.gradients['dL_dKmn'][q][d]) # Juanjo's correction util.update_gradients_diag(self.B_list[q], Kffdiag) Bgrad = self.B_list[q].gradient.copy() util.update_gradients_Kmn(self.B_list[q], KuqF, D) Bgrad += self.B_list[q].gradient.copy() self.B_list[q].gradient = Bgrad for d in range(self.likelihood.num_output_functions(self.Y_metadata)): kern_q.update_gradients_full(self.B_list[q].W[d] * self.gradients['dL_dKmn'][q][d],self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],self.Xmulti[f_index[d]]) grad += kern_q.gradient.copy() kern_q.update_gradients_diag(self.B_list[q].B[d,d] *self.gradients['dL_dKdiag'][q][d], self.Xmulti[f_index[d]]) grad += kern_q.gradient.copy() # Juan wrote this line kern_q.gradient = grad if not self.Z.is_fixed: Z_grad[:,q*self.Xdim:q*self.Xdim+self.Xdim] += kern_q.gradients_X(self.gradients['dL_dKmm'][q], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim]).copy() for d in range(self.likelihood.num_output_functions(self.Y_metadata)): Z_grad[:,q*self.Xdim:q*self.Xdim+self.Xdim]+= self.B_list[q].W[d]*kern_q.gradients_X(self.gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],self.Xmulti[f_index[d]]).copy() self.Z.gradient[:] = Z_grad
def parameters_changed(self): f_index = self.Y_metadata['function_index'].flatten() d_index = self.Y_metadata['d_index'].flatten() T = len(self.likelihood.likelihoods_list) self.batch_scale = [] [ self.batch_scale.append( float(self.Xmulti_all[t].shape[0] / self.Xmulti[t].shape[0])) for t in range(T) ] self._log_marginal_likelihood, gradients, self.posteriors, _ = self.inference_method.inference( q_u_means=self.q_u_means, q_u_chols=self.q_u_chols, X=self.Xmulti, Y=self.Ymulti, Z=self.Z, kern_list=self.kern_list, likelihood=self.likelihood, B_list=self.B_list, Y_metadata=self.Y_metadata, batch_scale=self.batch_scale) D = self.likelihood.num_output_functions(self.Y_metadata) N = self.X.shape[0] M = self.num_inducing _, B_list = util.LCM(input_dim=self.Xdim, output_dim=D, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) Z_grad = np.zeros_like(self.Z.values) for q, kern_q in enumerate(self.kern_list): # Update the variational parameter gradients: # SVI + VEM if self.stochastic: if self.vem_step: self.q_u_means[:, q:q + 1].gradient = gradients['dL_dmu_u'][q] self.q_u_chols[:, q:q + 1].gradient = gradients['dL_dL_u'][q] else: self.q_u_means[:, q:q + 1].gradient = np.zeros( gradients['dL_dmu_u'][q].shape) self.q_u_chols[:, q:q + 1].gradient = np.zeros( gradients['dL_dL_u'][q].shape) else: self.q_u_means[:, q:q + 1].gradient = gradients['dL_dmu_u'][q] self.q_u_chols[:, q:q + 1].gradient = gradients['dL_dL_u'][q] # Update kernel hyperparameters: lengthscale and variance kern_q.update_gradients_full( gradients['dL_dKmm'][q], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim]) grad = kern_q.gradient.copy() # Update kernel hyperparameters: W + kappa Kffdiag = [] KuqF = [] for d in range(D): Kffdiag.append(gradients['dL_dKdiag'][q][d]) KuqF.append(gradients['dL_dKmn'][q][d] * kern_q.K( self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]])) util.update_gradients_diag(self.B_list[q], Kffdiag) Bgrad = self.B_list[q].gradient.copy() util.update_gradients_Kmn(self.B_list[q], KuqF, D) Bgrad += self.B_list[q].gradient.copy() # SVI + VEM if self.stochastic: if self.vem_step: self.B_list[q].gradient = np.zeros(Bgrad.shape) else: self.B_list[q].gradient = Bgrad else: self.B_list[q].gradient = Bgrad for d in range( self.likelihood.num_output_functions(self.Y_metadata)): kern_q.update_gradients_full( gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]) grad += B_list[q].W[d] * kern_q.gradient.copy() kern_q.update_gradients_diag(gradients['dL_dKdiag'][q][d], self.Xmulti[f_index[d]]) grad += B_list[q].B[d, d] * kern_q.gradient.copy() # SVI + VEM if self.stochastic: if self.vem_step: kern_q.gradient = np.zeros(grad.shape) else: kern_q.gradient = grad else: kern_q.gradient = grad if not self.Z.is_fixed: Z_grad[:, q * self.Xdim:q * self.Xdim + self.Xdim] += kern_q.gradients_X( gradients['dL_dKmm'][q], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim]) for d in range( self.likelihood.num_output_functions(self.Y_metadata)): Z_grad[:, q * self.Xdim:q * self.Xdim + self.Xdim] += B_list[q].W[d] * kern_q.gradients_X( gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]) if not self.Z.is_fixed: # SVI + VEM if self.stochastic: if self.vem_step: self.Z.gradient[:] = np.zeros(Z_grad.shape) else: self.Z.gradient[:] = Z_grad else: self.Z.gradient[:] = Z_grad
def parameters_changed(self): f_index = self.Y_metadata['function_index'].flatten() d_index = self.Y_metadata['d_index'].flatten() T = len(self.likelihood.likelihoods_list) self.batch_scale = [] [ self.batch_scale.append( float(self.Xmulti_all[t].shape[0]) / float(self.Xmulti[t].shape[0])) for t in range(T) ] self._log_marginal_likelihood, self.gradients, self.posteriors, _ = self.inference_method.inference( q_u_means=self.q_u_means, q_u_chols=self.q_u_chols, X=self.Xmulti, Y=self.Ymulti, Z=self.Z, kern_list=self.kern_list, likelihood=self.likelihood, B_list=self.B_list, Y_metadata=self.Y_metadata, batch_scale=self.batch_scale, Gauss_Newton=self.Gauss_Newton) D = self.likelihood.num_output_functions(self.Y_metadata) N = self.X.shape[0] M = self.num_inducing # _, B_list = util.LCM(input_dim=self.Xdim, output_dim=D, rank=1, kernels_list=self.kern_list, W_list=self.W_list, # kappa_list=self.kappa_list) Z_grad = np.zeros_like(self.Z.values) if self.FNG is True: #print('IN FNG') for q, kern_q in enumerate(self.kern_list): self.q_u_means[:, q:q + 1].gradient = self.q_u_means[:, q:q + 1].gradient * 0.0 self.q_u_chols[:, q:q + 1].gradient = self.q_u_chols[:, q:q + 1].gradient * 0.0 else: for q, kern_q in enumerate(self.kern_list): self.q_u_means[:, q:q + 1].gradient = self.gradients['dL_dmu_u'][q] self.q_u_chols[:, q:q + 1].gradient = self.gradients['dL_dL_u'][q] # Update kernel hyperparameters: lengthscale and variance kern_q.update_gradients_full( self.gradients['dL_dKmm'][q], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim]) grad = kern_q.gradient.copy() # Update kernel hyperparameters: W + kappa Kffdiag = [] KuqF = [] for d in range(D): Kffdiag.append( kern_q.Kdiag(self.Xmulti[f_index[d]]) * self.gradients['dL_dKdiag'][q][d]) #Kffdiag.append(self.gradients['dL_dKdiag'][q][d]) #old line #KuqF.append(self.gradients['dL_dKmn'][q][d] * kern_q.K(self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]])) #old line KuqF.append( kern_q.K( self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]) * self.gradients['dL_dKmn'][q][d]) util.update_gradients_diag(self.B_list[q], Kffdiag) Bgrad = self.B_list[q].gradient.copy() util.update_gradients_Kmn(self.B_list[q], KuqF, D) Bgrad += self.B_list[q].gradient.copy() # SVI + VEM # if self.stochastic: # if self.vem_step: # self.B_list[q].gradient = np.zeros(Bgrad.shape) # else: # self.B_list[q].gradient = Bgrad # else: # self.B_list[q].gradient = Bgrad self.B_list[q].gradient = Bgrad for d in range( self.likelihood.num_output_functions(self.Y_metadata)): #kern_q.update_gradients_full(self.gradients['dL_dKmn'][q][d], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]]) kern_q.update_gradients_full( self.B_list[q].W[d] * self.gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]) #grad += B_list[q].W[d]*kern_q.gradient.copy() #old line #grad += self.B_list[q].W[d] * kern_q.gradient.copy() #Juan wrote this grad += kern_q.gradient.copy() # Juan wrote this #kern_q.update_gradients_diag(self.gradients['dL_dKdiag'][q][d], self.Xmulti[f_index[d]]) kern_q.update_gradients_diag( self.B_list[q].B[d, d] * self.gradients['dL_dKdiag'][q][d], self.Xmulti[f_index[d]]) #grad += B_list[q].B[d,d] * kern_q.gradient.copy() #old line #grad += self.B_list[q].B[d, d] * kern_q.gradient.copy() #Juan wrote this line grad += kern_q.gradient.copy() # Juan wrote this line # SVI + VEM # if self.stochastic: # if self.vem_step: # kern_q.gradient = np.zeros(grad.shape) # else: # kern_q.gradient = grad # else: # kern_q.gradient = grad kern_q.gradient = grad if not self.Z.is_fixed: Z_grad[:, q * self.Xdim:q * self.Xdim + self.Xdim] += kern_q.gradients_X( self.gradients['dL_dKmm'][q], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim]).copy() for d in range( self.likelihood.num_output_functions( self.Y_metadata)): Z_grad[:, q * self.Xdim:q * self.Xdim + self.Xdim] += self.B_list[q].W[ d] * kern_q.gradients_X( self.gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]).copy() #Z_grad[:,q*self.Xdim:q*self.Xdim+self.Xdim] += kern_q.gradients_X(self.B_list[q].W[d]*self.gradients['dL_dKmn'][q][d], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]]) #self.Z.gradient[:] = Z_grad self.Z.gradient[:] = Z_grad
def parameters_changed(self): """ Description: Updates the "object.gradient" attribute of parameter variables for being used by the optimizer. In other words, loads derivatives of the ELBO wrt. variational, hyper- and linear combination parameters into the model, taken these ones from the inference class [see inference.py -> gradients()]. """ ####### Dimensions ####### D = self.likelihood.num_output_functions(self.Y_metadata) N = self.X.shape[0] M = self.num_inducing T = len(self.likelihood.likelihoods_list) f_index = self.Y_metadata['function_index'].flatten() d_index = self.Y_metadata['d_index'].flatten() ####### Batch Scaling (Stochastic VI) ####### self.batch_scale = [] [ self.batch_scale.append( float(self.Xmulti_all[t].shape[0]) / float(self.Xmulti[t].shape[0])) for t in range(T) ] # -------------------------------# ELBO + BASIC GRADIENTS (Chain Rule) #----------------------------------# self._log_marginal_likelihood, self.gradients = self.inference_method.variational_inference( q_u_means=self.q_u_means, q_u_chols=self.q_u_chols, X=self.Xmulti, Y=self.Ymulti, Z=self.Z, kern_list=self.kern_list, likelihood=self.likelihood, B_list=self.B_list, Y_metadata=self.Y_metadata, batch_scale=self.batch_scale) #------------------------------------# ALL GRADIENTS UPDATE #--------------------------------------------# Z_grad = np.zeros_like(self.Z.values) for q, kern_q in enumerate(self.kern_list): #-----------------------------# GRADIENTS OF VARIATIONAL PARAMETERS #----------------------------------# ####### Update gradients of variational parameter ####### self.q_u_means[:, q:q + 1].gradient = self.gradients['dL_dmu_u'][q] self.q_u_chols[:, q:q + 1].gradient = self.gradients['dL_dL_u'][q] # ----------------------.-------# GRADIENTS OF HYPERPARAMETERS #--------------------------------------# ####### Update gradients of kernel hyperparameters: lengthscale and variance ####### kern_q.update_gradients_full( self.gradients['dL_dKmm'][q], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim]) grad = kern_q.gradient.copy() ####### Update gradients of (multi-output) kernel hyperparameters: W + kappa ####### Kffdiag = [] KuqF = [] for d in range(D): ####### main correction consisted of building Kffdiag by multiplying also kern_q.Kdiag ####### Kffdiag.append( kern_q.Kdiag(self.Xmulti[f_index[d]]) * self.gradients['dL_dKdiag'][q][d]) KuqF.append( kern_q.K( self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]) * self.gradients['dL_dKmn'][q][d]) util.update_gradients_diag(self.B_list[q], Kffdiag) Bgrad = self.B_list[q].gradient.copy() util.update_gradients_Kmn(self.B_list[q], KuqF, D) Bgrad += self.B_list[q].gradient.copy() self.B_list[q].gradient = Bgrad ####### Re-update gradients of kernel hyperparameters: lengthscale and variance (second term) ####### for d in range( self.likelihood.num_output_functions(self.Y_metadata)): kern_q.update_gradients_full( self.B_list[q].W[d] * self.gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]) grad += kern_q.gradient.copy() kern_q.update_gradients_diag( self.B_list[q].B[d, d] * self.gradients['dL_dKdiag'][q][d], self.Xmulti[f_index[d]]) grad += kern_q.gradient.copy() # Juan J. wrote this line kern_q.gradient = grad ####### Update gradients of inducing points ####### if not self.Z.is_fixed: Z_grad[:, q * self.Xdim:q * self.Xdim + self.Xdim] += kern_q.gradients_X( self.gradients['dL_dKmm'][q], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim]).copy() for d in range( self.likelihood.num_output_functions(self.Y_metadata)): Z_grad[:, q * self.Xdim:q * self.Xdim + self. Xdim] += self.B_list[q].W[d] * kern_q.gradients_X( self.gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim], self.Xmulti[f_index[d]]).copy() self.Z.gradient[:] = Z_grad