Example #1
0
    def parameters_changed(self):
        f_index = self.Y_metadata['function_index'].flatten()
        d_index = self.Y_metadata['d_index'].flatten()
        T = len(self.likelihood.likelihoods_list)
        self.batch_scale = []
        [self.batch_scale.append(float(self.Xmulti_all[t].shape[0])/float(self.Xmulti[t].shape[0])) for t in range(T)]
        self._log_marginal_likelihood, self.gradients, self.posteriors, _ = self.inference_method.inference(q_u_means=self.q_u_means,
                                                                        q_u_chols=self.q_u_chols, X=self.Xmulti, Y=self.Ymulti, Z=self.Z,
                                                                        Zold = self.Zold, kern_list_old = self.kern_list_old,
                                                                        kern_list=self.kern_list, likelihood=self.likelihood,
                                                                        B_list=self.B_list, B_list_old=self.B_list_old,
                                                                        phi_means=self.phi_means, phi_chols=self.phi_chols,
                                                                        Y_metadata=self.Y_metadata, batch_scale=self.batch_scale)
        D = self.likelihood.num_output_functions(self.Y_metadata)
        N = self.X.shape[0]
        M = self.num_inducing

        Z_grad = np.zeros_like(self.Z.values)
        for q, kern_q in enumerate(self.kern_list):
            # Update the variational parameter gradients:

            self.q_u_means[:, q:q + 1].gradient = self.gradients['dL_dmu_u'][q]
            self.q_u_chols[:, q:q + 1].gradient = self.gradients['dL_dL_u'][q]

            # Update kernel hyperparameters: lengthscale and variance
            kern_q.update_gradients_full(self.gradients['dL_dKmm'][q], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim])
            grad = kern_q.gradient.copy()

            # Update kernel hyperparameters: W + kappa
            Kffdiag = []
            KuqF = []
            for d in range(D):
                Kffdiag.append(self.gradients['dL_dKdiag'][q][d])
                KuqF.append(self.gradients['dL_dKmn'][q][d] * kern_q.K(self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],self.Xmulti[f_index[d]]))
                #main correction consisted of building Kffdiag by multiplying also kern_q.Kdiag
                #Kffdiag.append(kern_q.Kdiag(self.Xmulti[f_index[d]]) * self.gradients['dL_dKdiag'][q][d]) # Juanjo's correction
                #KuqF.append(kern_q.K(self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]]) * self.gradients['dL_dKmn'][q][d]) # Juanjo's correction

            util.update_gradients_diag(self.B_list[q], Kffdiag)
            Bgrad = self.B_list[q].gradient.copy()
            util.update_gradients_Kmn(self.B_list[q], KuqF, D)
            Bgrad += self.B_list[q].gradient.copy()

            self.B_list[q].gradient = Bgrad

            for d in range(self.likelihood.num_output_functions(self.Y_metadata)):
                kern_q.update_gradients_full(self.B_list[q].W[d] * self.gradients['dL_dKmn'][q][d],self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],self.Xmulti[f_index[d]])
                grad += kern_q.gradient.copy()
                kern_q.update_gradients_diag(self.B_list[q].B[d,d] *self.gradients['dL_dKdiag'][q][d], self.Xmulti[f_index[d]])
                grad += kern_q.gradient.copy()  # Juan wrote this line

            kern_q.gradient = grad

            if not self.Z.is_fixed:
                Z_grad[:,q*self.Xdim:q*self.Xdim+self.Xdim] += kern_q.gradients_X(self.gradients['dL_dKmm'][q], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim]).copy()
                for d in range(self.likelihood.num_output_functions(self.Y_metadata)):
                    Z_grad[:,q*self.Xdim:q*self.Xdim+self.Xdim]+= self.B_list[q].W[d]*kern_q.gradients_X(self.gradients['dL_dKmn'][q][d], self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],self.Xmulti[f_index[d]]).copy()

        self.Z.gradient[:] = Z_grad
Example #2
0
    def parameters_changed(self):
        f_index = self.Y_metadata['function_index'].flatten()
        d_index = self.Y_metadata['d_index'].flatten()
        T = len(self.likelihood.likelihoods_list)
        self.batch_scale = []
        [
            self.batch_scale.append(
                float(self.Xmulti_all[t].shape[0] / self.Xmulti[t].shape[0]))
            for t in range(T)
        ]
        self._log_marginal_likelihood, gradients, self.posteriors, _ = self.inference_method.inference(
            q_u_means=self.q_u_means,
            q_u_chols=self.q_u_chols,
            X=self.Xmulti,
            Y=self.Ymulti,
            Z=self.Z,
            kern_list=self.kern_list,
            likelihood=self.likelihood,
            B_list=self.B_list,
            Y_metadata=self.Y_metadata,
            batch_scale=self.batch_scale)
        D = self.likelihood.num_output_functions(self.Y_metadata)
        N = self.X.shape[0]
        M = self.num_inducing
        _, B_list = util.LCM(input_dim=self.Xdim,
                             output_dim=D,
                             rank=1,
                             kernels_list=self.kern_list,
                             W_list=self.W_list,
                             kappa_list=self.kappa_list)
        Z_grad = np.zeros_like(self.Z.values)
        for q, kern_q in enumerate(self.kern_list):
            # Update the variational parameter gradients:
            # SVI + VEM
            if self.stochastic:
                if self.vem_step:
                    self.q_u_means[:,
                                   q:q + 1].gradient = gradients['dL_dmu_u'][q]
                    self.q_u_chols[:,
                                   q:q + 1].gradient = gradients['dL_dL_u'][q]
                else:
                    self.q_u_means[:, q:q + 1].gradient = np.zeros(
                        gradients['dL_dmu_u'][q].shape)
                    self.q_u_chols[:, q:q + 1].gradient = np.zeros(
                        gradients['dL_dL_u'][q].shape)
            else:
                self.q_u_means[:, q:q + 1].gradient = gradients['dL_dmu_u'][q]
                self.q_u_chols[:, q:q + 1].gradient = gradients['dL_dL_u'][q]

            # Update kernel hyperparameters: lengthscale and variance
            kern_q.update_gradients_full(
                gradients['dL_dKmm'][q],
                self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim])
            grad = kern_q.gradient.copy()

            # Update kernel hyperparameters: W + kappa
            Kffdiag = []
            KuqF = []
            for d in range(D):
                Kffdiag.append(gradients['dL_dKdiag'][q][d])
                KuqF.append(gradients['dL_dKmn'][q][d] * kern_q.K(
                    self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                    self.Xmulti[f_index[d]]))

            util.update_gradients_diag(self.B_list[q], Kffdiag)
            Bgrad = self.B_list[q].gradient.copy()
            util.update_gradients_Kmn(self.B_list[q], KuqF, D)
            Bgrad += self.B_list[q].gradient.copy()
            # SVI + VEM
            if self.stochastic:
                if self.vem_step:
                    self.B_list[q].gradient = np.zeros(Bgrad.shape)
                else:
                    self.B_list[q].gradient = Bgrad
            else:
                self.B_list[q].gradient = Bgrad

            for d in range(
                    self.likelihood.num_output_functions(self.Y_metadata)):
                kern_q.update_gradients_full(
                    gradients['dL_dKmn'][q][d],
                    self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                    self.Xmulti[f_index[d]])
                grad += B_list[q].W[d] * kern_q.gradient.copy()
                kern_q.update_gradients_diag(gradients['dL_dKdiag'][q][d],
                                             self.Xmulti[f_index[d]])
                grad += B_list[q].B[d, d] * kern_q.gradient.copy()
                # SVI + VEM
                if self.stochastic:
                    if self.vem_step:
                        kern_q.gradient = np.zeros(grad.shape)
                    else:
                        kern_q.gradient = grad
                else:
                    kern_q.gradient = grad

            if not self.Z.is_fixed:
                Z_grad[:, q * self.Xdim:q * self.Xdim +
                       self.Xdim] += kern_q.gradients_X(
                           gradients['dL_dKmm'][q],
                           self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim])
                for d in range(
                        self.likelihood.num_output_functions(self.Y_metadata)):
                    Z_grad[:, q * self.Xdim:q * self.Xdim +
                           self.Xdim] += B_list[q].W[d] * kern_q.gradients_X(
                               gradients['dL_dKmn'][q][d],
                               self.Z[:, q * self.Xdim:q * self.Xdim +
                                      self.Xdim], self.Xmulti[f_index[d]])

        if not self.Z.is_fixed:
            # SVI + VEM
            if self.stochastic:
                if self.vem_step:
                    self.Z.gradient[:] = np.zeros(Z_grad.shape)
                else:
                    self.Z.gradient[:] = Z_grad
            else:
                self.Z.gradient[:] = Z_grad
    def parameters_changed(self):
        f_index = self.Y_metadata['function_index'].flatten()
        d_index = self.Y_metadata['d_index'].flatten()
        T = len(self.likelihood.likelihoods_list)
        self.batch_scale = []
        [
            self.batch_scale.append(
                float(self.Xmulti_all[t].shape[0]) /
                float(self.Xmulti[t].shape[0])) for t in range(T)
        ]
        self._log_marginal_likelihood, self.gradients, self.posteriors, _ = self.inference_method.inference(
            q_u_means=self.q_u_means,
            q_u_chols=self.q_u_chols,
            X=self.Xmulti,
            Y=self.Ymulti,
            Z=self.Z,
            kern_list=self.kern_list,
            likelihood=self.likelihood,
            B_list=self.B_list,
            Y_metadata=self.Y_metadata,
            batch_scale=self.batch_scale,
            Gauss_Newton=self.Gauss_Newton)
        D = self.likelihood.num_output_functions(self.Y_metadata)
        N = self.X.shape[0]
        M = self.num_inducing
        # _, B_list = util.LCM(input_dim=self.Xdim, output_dim=D, rank=1, kernels_list=self.kern_list, W_list=self.W_list,
        #                      kappa_list=self.kappa_list)
        Z_grad = np.zeros_like(self.Z.values)

        if self.FNG is True:
            #print('IN FNG')
            for q, kern_q in enumerate(self.kern_list):
                self.q_u_means[:, q:q +
                               1].gradient = self.q_u_means[:, q:q +
                                                            1].gradient * 0.0
                self.q_u_chols[:, q:q +
                               1].gradient = self.q_u_chols[:, q:q +
                                                            1].gradient * 0.0
        else:
            for q, kern_q in enumerate(self.kern_list):

                self.q_u_means[:, q:q +
                               1].gradient = self.gradients['dL_dmu_u'][q]
                self.q_u_chols[:,
                               q:q + 1].gradient = self.gradients['dL_dL_u'][q]

                # Update kernel hyperparameters: lengthscale and variance
                kern_q.update_gradients_full(
                    self.gradients['dL_dKmm'][q],
                    self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim])
                grad = kern_q.gradient.copy()

                # Update kernel hyperparameters: W + kappa
                Kffdiag = []
                KuqF = []
                for d in range(D):
                    Kffdiag.append(
                        kern_q.Kdiag(self.Xmulti[f_index[d]]) *
                        self.gradients['dL_dKdiag'][q][d])
                    #Kffdiag.append(self.gradients['dL_dKdiag'][q][d])   #old line
                    #KuqF.append(self.gradients['dL_dKmn'][q][d] * kern_q.K(self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]]))   #old line
                    KuqF.append(
                        kern_q.K(
                            self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                            self.Xmulti[f_index[d]]) *
                        self.gradients['dL_dKmn'][q][d])

                util.update_gradients_diag(self.B_list[q], Kffdiag)
                Bgrad = self.B_list[q].gradient.copy()
                util.update_gradients_Kmn(self.B_list[q], KuqF, D)
                Bgrad += self.B_list[q].gradient.copy()
                # SVI + VEM
                # if self.stochastic:
                #     if self.vem_step:
                #         self.B_list[q].gradient = np.zeros(Bgrad.shape)
                #     else:
                #         self.B_list[q].gradient = Bgrad
                # else:
                #     self.B_list[q].gradient = Bgrad

                self.B_list[q].gradient = Bgrad

                for d in range(
                        self.likelihood.num_output_functions(self.Y_metadata)):
                    #kern_q.update_gradients_full(self.gradients['dL_dKmn'][q][d], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]])
                    kern_q.update_gradients_full(
                        self.B_list[q].W[d] * self.gradients['dL_dKmn'][q][d],
                        self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                        self.Xmulti[f_index[d]])

                    #grad += B_list[q].W[d]*kern_q.gradient.copy()   #old line
                    #grad += self.B_list[q].W[d] * kern_q.gradient.copy()    #Juan wrote this
                    grad += kern_q.gradient.copy()  # Juan wrote this

                    #kern_q.update_gradients_diag(self.gradients['dL_dKdiag'][q][d], self.Xmulti[f_index[d]])
                    kern_q.update_gradients_diag(
                        self.B_list[q].B[d, d] *
                        self.gradients['dL_dKdiag'][q][d],
                        self.Xmulti[f_index[d]])
                    #grad += B_list[q].B[d,d] * kern_q.gradient.copy()              #old line
                    #grad += self.B_list[q].B[d, d] * kern_q.gradient.copy()            #Juan wrote this line
                    grad += kern_q.gradient.copy()  # Juan wrote this line
                    # SVI + VEM
                # if self.stochastic:
                #     if self.vem_step:
                #         kern_q.gradient = np.zeros(grad.shape)
                #     else:
                #         kern_q.gradient = grad
                # else:
                #     kern_q.gradient = grad

                kern_q.gradient = grad

                if not self.Z.is_fixed:
                    Z_grad[:, q * self.Xdim:q * self.Xdim +
                           self.Xdim] += kern_q.gradients_X(
                               self.gradients['dL_dKmm'][q],
                               self.Z[:, q * self.Xdim:q * self.Xdim +
                                      self.Xdim]).copy()
                    for d in range(
                            self.likelihood.num_output_functions(
                                self.Y_metadata)):
                        Z_grad[:, q * self.Xdim:q * self.Xdim +
                               self.Xdim] += self.B_list[q].W[
                                   d] * kern_q.gradients_X(
                                       self.gradients['dL_dKmn'][q][d],
                                       self.Z[:, q * self.Xdim:q * self.Xdim +
                                              self.Xdim],
                                       self.Xmulti[f_index[d]]).copy()
                        #Z_grad[:,q*self.Xdim:q*self.Xdim+self.Xdim] += kern_q.gradients_X(self.B_list[q].W[d]*self.gradients['dL_dKmn'][q][d], self.Z[:,q*self.Xdim:q*self.Xdim+self.Xdim], self.Xmulti[f_index[d]])

                    #self.Z.gradient[:] = Z_grad
            self.Z.gradient[:] = Z_grad
Example #4
0
    def parameters_changed(self):
        """
        Description: Updates the "object.gradient" attribute of parameter variables for being used by the optimizer. In
        other words, loads derivatives of the ELBO wrt. variational, hyper- and linear combination parameters into the
        model, taken these ones from the inference class [see inference.py -> gradients()].
        """
        ####### Dimensions #######
        D = self.likelihood.num_output_functions(self.Y_metadata)
        N = self.X.shape[0]
        M = self.num_inducing
        T = len(self.likelihood.likelihoods_list)
        f_index = self.Y_metadata['function_index'].flatten()
        d_index = self.Y_metadata['d_index'].flatten()

        ####### Batch Scaling (Stochastic VI) #######
        self.batch_scale = []
        [
            self.batch_scale.append(
                float(self.Xmulti_all[t].shape[0]) /
                float(self.Xmulti[t].shape[0])) for t in range(T)
        ]

        # -------------------------------#   ELBO + BASIC GRADIENTS (Chain Rule)    #----------------------------------#
        self._log_marginal_likelihood, self.gradients = self.inference_method.variational_inference(
            q_u_means=self.q_u_means,
            q_u_chols=self.q_u_chols,
            X=self.Xmulti,
            Y=self.Ymulti,
            Z=self.Z,
            kern_list=self.kern_list,
            likelihood=self.likelihood,
            B_list=self.B_list,
            Y_metadata=self.Y_metadata,
            batch_scale=self.batch_scale)

        #------------------------------------#   ALL GRADIENTS UPDATE     #--------------------------------------------#
        Z_grad = np.zeros_like(self.Z.values)
        for q, kern_q in enumerate(self.kern_list):

            #-----------------------------#   GRADIENTS OF VARIATIONAL PARAMETERS   #----------------------------------#
            #######  Update gradients of variational parameter  #######
            self.q_u_means[:, q:q + 1].gradient = self.gradients['dL_dmu_u'][q]
            self.q_u_chols[:, q:q + 1].gradient = self.gradients['dL_dL_u'][q]

            # ----------------------.-------#   GRADIENTS OF HYPERPARAMETERS    #--------------------------------------#
            #######   Update gradients of kernel hyperparameters: lengthscale and variance   #######
            kern_q.update_gradients_full(
                self.gradients['dL_dKmm'][q],
                self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim])
            grad = kern_q.gradient.copy()

            #######  Update gradients of (multi-output) kernel hyperparameters: W + kappa   #######
            Kffdiag = []
            KuqF = []
            for d in range(D):
                #######  main correction consisted of building Kffdiag by multiplying also kern_q.Kdiag  #######
                Kffdiag.append(
                    kern_q.Kdiag(self.Xmulti[f_index[d]]) *
                    self.gradients['dL_dKdiag'][q][d])
                KuqF.append(
                    kern_q.K(
                        self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                        self.Xmulti[f_index[d]]) *
                    self.gradients['dL_dKmn'][q][d])

            util.update_gradients_diag(self.B_list[q], Kffdiag)
            Bgrad = self.B_list[q].gradient.copy()
            util.update_gradients_Kmn(self.B_list[q], KuqF, D)
            Bgrad += self.B_list[q].gradient.copy()
            self.B_list[q].gradient = Bgrad

            #######   Re-update gradients of kernel hyperparameters: lengthscale and variance (second term) #######
            for d in range(
                    self.likelihood.num_output_functions(self.Y_metadata)):
                kern_q.update_gradients_full(
                    self.B_list[q].W[d] * self.gradients['dL_dKmn'][q][d],
                    self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                    self.Xmulti[f_index[d]])
                grad += kern_q.gradient.copy()
                kern_q.update_gradients_diag(
                    self.B_list[q].B[d, d] * self.gradients['dL_dKdiag'][q][d],
                    self.Xmulti[f_index[d]])
                grad += kern_q.gradient.copy()  # Juan J. wrote this line

            kern_q.gradient = grad

            #######  Update gradients of inducing points #######
            if not self.Z.is_fixed:
                Z_grad[:, q * self.Xdim:q * self.Xdim +
                       self.Xdim] += kern_q.gradients_X(
                           self.gradients['dL_dKmm'][q],
                           self.Z[:, q * self.Xdim:q * self.Xdim +
                                  self.Xdim]).copy()
                for d in range(
                        self.likelihood.num_output_functions(self.Y_metadata)):
                    Z_grad[:, q * self.Xdim:q * self.Xdim + self.
                           Xdim] += self.B_list[q].W[d] * kern_q.gradients_X(
                               self.gradients['dL_dKmn'][q][d],
                               self.Z[:,
                                      q * self.Xdim:q * self.Xdim + self.Xdim],
                               self.Xmulti[f_index[d]]).copy()

        self.Z.gradient[:] = Z_grad