Exemple #1
0
    def parameters_changed(self):
        f_index = self.Y_metadata['function_index'].flatten()
        d_index = self.Y_metadata['d_index'].flatten()
        T = len(self.likelihood.likelihoods_list)
        self.batch_scale = []
        [
            self.batch_scale.append(
                float(self.Xmulti_all[t].shape[0] / self.Xmulti[t].shape[0]))
            for t in range(T)
        ]
        self._log_marginal_likelihood, gradients, self.posteriors, _ = self.inference_method.inference(
            q_u_means=self.q_u_means,
            q_u_chols=self.q_u_chols,
            X=self.Xmulti,
            Y=self.Ymulti,
            Z=self.Z,
            kern_list=self.kern_list,
            likelihood=self.likelihood,
            B_list=self.B_list,
            Y_metadata=self.Y_metadata,
            batch_scale=self.batch_scale)
        D = self.likelihood.num_output_functions(self.Y_metadata)
        N = self.X.shape[0]
        M = self.num_inducing
        _, B_list = util.LCM(input_dim=self.Xdim,
                             output_dim=D,
                             rank=1,
                             kernels_list=self.kern_list,
                             W_list=self.W_list,
                             kappa_list=self.kappa_list)
        Z_grad = np.zeros_like(self.Z.values)
        for q, kern_q in enumerate(self.kern_list):
            # Update the variational parameter gradients:
            # SVI + VEM
            if self.stochastic:
                if self.vem_step:
                    self.q_u_means[:,
                                   q:q + 1].gradient = gradients['dL_dmu_u'][q]
                    self.q_u_chols[:,
                                   q:q + 1].gradient = gradients['dL_dL_u'][q]
                else:
                    self.q_u_means[:, q:q + 1].gradient = np.zeros(
                        gradients['dL_dmu_u'][q].shape)
                    self.q_u_chols[:, q:q + 1].gradient = np.zeros(
                        gradients['dL_dL_u'][q].shape)
            else:
                self.q_u_means[:, q:q + 1].gradient = gradients['dL_dmu_u'][q]
                self.q_u_chols[:, q:q + 1].gradient = gradients['dL_dL_u'][q]

            # Update kernel hyperparameters: lengthscale and variance
            kern_q.update_gradients_full(
                gradients['dL_dKmm'][q],
                self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim])
            grad = kern_q.gradient.copy()

            # Update kernel hyperparameters: W + kappa
            Kffdiag = []
            KuqF = []
            for d in range(D):
                Kffdiag.append(gradients['dL_dKdiag'][q][d])
                KuqF.append(gradients['dL_dKmn'][q][d] * kern_q.K(
                    self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                    self.Xmulti[f_index[d]]))

            util.update_gradients_diag(self.B_list[q], Kffdiag)
            Bgrad = self.B_list[q].gradient.copy()
            util.update_gradients_Kmn(self.B_list[q], KuqF, D)
            Bgrad += self.B_list[q].gradient.copy()
            # SVI + VEM
            if self.stochastic:
                if self.vem_step:
                    self.B_list[q].gradient = np.zeros(Bgrad.shape)
                else:
                    self.B_list[q].gradient = Bgrad
            else:
                self.B_list[q].gradient = Bgrad

            for d in range(
                    self.likelihood.num_output_functions(self.Y_metadata)):
                kern_q.update_gradients_full(
                    gradients['dL_dKmn'][q][d],
                    self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim],
                    self.Xmulti[f_index[d]])
                grad += B_list[q].W[d] * kern_q.gradient.copy()
                kern_q.update_gradients_diag(gradients['dL_dKdiag'][q][d],
                                             self.Xmulti[f_index[d]])
                grad += B_list[q].B[d, d] * kern_q.gradient.copy()
                # SVI + VEM
                if self.stochastic:
                    if self.vem_step:
                        kern_q.gradient = np.zeros(grad.shape)
                    else:
                        kern_q.gradient = grad
                else:
                    kern_q.gradient = grad

            if not self.Z.is_fixed:
                Z_grad[:, q * self.Xdim:q * self.Xdim +
                       self.Xdim] += kern_q.gradients_X(
                           gradients['dL_dKmm'][q],
                           self.Z[:, q * self.Xdim:q * self.Xdim + self.Xdim])
                for d in range(
                        self.likelihood.num_output_functions(self.Y_metadata)):
                    Z_grad[:, q * self.Xdim:q * self.Xdim +
                           self.Xdim] += B_list[q].W[d] * kern_q.gradients_X(
                               gradients['dL_dKmn'][q][d],
                               self.Z[:, q * self.Xdim:q * self.Xdim +
                                      self.Xdim], self.Xmulti[f_index[d]])

        if not self.Z.is_fixed:
            # SVI + VEM
            if self.stochastic:
                if self.vem_step:
                    self.Z.gradient[:] = np.zeros(Z_grad.shape)
                else:
                    self.Z.gradient[:] = Z_grad
            else:
                self.Z.gradient[:] = Z_grad
Exemple #2
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kern_list,
                 likelihood,
                 Y_metadata,
                 name='SVMOGP',
                 batch_size=None):

        self.batch_size = batch_size
        self.kern_list = kern_list
        self.likelihood = likelihood
        self.Y_metadata = Y_metadata

        self.num_inducing = Z.shape[0]  # M
        self.num_latent_funcs = len(kern_list)  # Q
        self.num_output_funcs = likelihood.num_output_functions(
            self.Y_metadata)
        self.W_list, self.kappa_list = util.random_W_kappas(
            self.num_latent_funcs, self.num_output_funcs, rank=1)

        self.Xmulti = X
        self.Ymulti = Y

        # Batch the data
        self.Xmulti_all, self.Ymulti_all = X, Y
        if batch_size is None:
            self.stochastic = False
            Xmulti_batch, Ymulti_batch = X, Y
        else:
            # Makes a climin slicer to make drawing minibatches much quicker
            self.stochastic = True
            self.slicer_list = []
            [
                self.slicer_list.append(
                    draw_mini_slices(Xmulti_task.shape[0], self.batch_size))
                for Xmulti_task in self.Xmulti
            ]
            Xmulti_batch, Ymulti_batch = self.new_batch()
            self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch

        # Initialize inducing points Z
        #Z = kmm_init(self.X_all, self.num_inducing)
        self.Xdim = Z.shape[1]
        Z = np.tile(Z, (1, self.num_latent_funcs))

        inference_method = SVMOGPInf()

        super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10],
                                     Y=Ymulti_batch[0][1:10],
                                     Z=Z,
                                     kernel=kern_list[0],
                                     likelihood=likelihood,
                                     mean_function=None,
                                     X_variance=None,
                                     inference_method=inference_method,
                                     Y_metadata=Y_metadata,
                                     name=name,
                                     normalizer=False)

        self.unlink_parameter(
            self.kern)  # Unlink SparseGP default param kernel

        _, self.B_list = util.LCM(input_dim=self.Xdim,
                                  output_dim=self.num_output_funcs,
                                  rank=1,
                                  kernels_list=self.kern_list,
                                  W_list=self.W_list,
                                  kappa_list=self.kappa_list)

        # Set-up optimization parameters: [Z, m_u, L_u]
        self.q_u_means = Param(
            'm_u',
            5 * np.random.randn(self.num_inducing, self.num_latent_funcs) +
            np.tile(np.random.randn(1, self.num_latent_funcs),
                    (self.num_inducing, 1)))
        chols = choleskies.triang_to_flat(
            np.tile(
                np.eye(self.num_inducing)[None, :, :],
                (self.num_latent_funcs, 1, 1)))
        self.q_u_chols = Param('L_u', chols)

        self.link_parameter(self.Z, index=0)
        self.link_parameter(self.q_u_means)
        self.link_parameters(self.q_u_chols)
        [self.link_parameter(kern_q)
         for kern_q in kern_list]  # link all kernels
        [self.link_parameter(B_q) for B_q in self.B_list]

        self.vem_step = True  # [True=VE-step, False=VM-step]
        self.ve_count = 0
        self.elbo = np.zeros((1, 1))
Exemple #3
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kern_list,
                 likelihood,
                 Y_metadata,
                 name='SVMOGP',
                 batch_size=None,
                 non_chained=True):

        self.batch_size = batch_size
        self.kern_list = kern_list
        self.likelihood = likelihood
        self.Y_metadata = Y_metadata

        self.num_inducing = Z.shape[0]  # M
        self.num_latent_funcs = len(kern_list)  # Q
        self.num_output_funcs = likelihood.num_output_functions(Y_metadata)

        if (not non_chained):
            assert self.num_output_funcs == self.num_latent_funcs, "we need a latent function per likelihood parameter"

        if non_chained:
            self.W_list, self.kappa_list = util.random_W_kappas(
                self.num_latent_funcs, self.num_output_funcs, rank=1)
        else:
            self.W_list, self.kappa_list = util.Chained_W_kappas(
                self.num_latent_funcs, self.num_output_funcs, rank=1)

        self.Xmulti = X
        self.Ymulti = Y
        self.iAnnMulti = Y_metadata['iAnn']

        # Batch the data
        self.Xmulti_all, self.Ymulti_all, self.iAnn_all = X, Y, Y_metadata[
            'iAnn']
        if batch_size is None:
            #self.stochastic = False
            Xmulti_batch, Ymulti_batch, iAnnmulti_batch = X, Y, Y_metadata[
                'iAnn']
        else:
            # Makes a climin slicer to make drawing minibatches much quicker
            #self.stochastic = False   #"This was True as Pablo had it"
            self.slicer_list = []
            [
                self.slicer_list.append(
                    draw_mini_slices(Xmulti_task.shape[0], self.batch_size))
                for Xmulti_task in self.Xmulti
            ]
            Xmulti_batch, Ymulti_batch, iAnnmulti_batch = self.new_batch()
            self.Xmulti, self.Ymulti, self.iAnnMulti = Xmulti_batch, Ymulti_batch, iAnnmulti_batch
            self.Y_metadata.update(iAnn=iAnnmulti_batch)

        # Initialize inducing points Z
        #Z = kmm_init(self.X_all, self.num_inducing)
        self.Xdim = Z.shape[1]
        Z = np.tile(Z, (1, self.num_latent_funcs))

        inference_method = SVMOGPInf()

        super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10],
                                     Y=Ymulti_batch[0][1:10],
                                     Z=Z,
                                     kernel=kern_list[0],
                                     likelihood=likelihood,
                                     mean_function=None,
                                     X_variance=None,
                                     inference_method=inference_method,
                                     Y_metadata=Y_metadata,
                                     name=name,
                                     normalizer=False)

        self.unlink_parameter(
            self.kern)  # Unlink SparseGP default param kernel

        _, self.B_list = util.LCM(input_dim=self.Xdim,
                                  output_dim=self.num_output_funcs,
                                  rank=1,
                                  kernels_list=self.kern_list,
                                  W_list=self.W_list,
                                  kappa_list=self.kappa_list)

        # Set-up optimization parameters: [Z, m_u, L_u]
        self.q_u_means = Param(
            'm_u',
            0.0 * np.random.randn(self.num_inducing, self.num_latent_funcs) +
            0.0 * np.tile(np.random.randn(1, self.num_latent_funcs),
                          (self.num_inducing, 1)))
        chols = choleskies.triang_to_flat(
            np.tile(
                np.eye(self.num_inducing)[None, :, :],
                (self.num_latent_funcs, 1, 1)))
        self.q_u_chols = Param('L_u', chols)

        self.link_parameter(self.Z, index=0)
        self.link_parameter(self.q_u_means)
        self.link_parameters(self.q_u_chols)
        [self.link_parameter(kern_q)
         for kern_q in kern_list]  # link all kernels
        [self.link_parameter(B_q) for B_q in self.B_list]

        self.vem_step = True  # [True=VE-step, False=VM-step]
        self.ve_count = 0
        self.elbo = np.zeros((1, 1))
        self.index_VEM = 0  #this is a variable to index correctly the self.elbo when using VEM
        self.Gauss_Newton = False  #This is a flag for using the Gauss-Newton approximation when dL_dV is needed