def __init__(self, X, Y, Z, kern_list, likelihood, Y_metadata, name='HetMOGP', batch_size=None): """ :param X: Input data :param Y: (Heterogeneous) Output data :param Z: Inducing inputs :param kern_list: Kernel functions of GP priors :param likelihood: (Heterogeneous) Likelihoods :param Y_metadata: Linking info between F->likelihoods :param name: Model name :param batch_size: Size of batch for stochastic optimization Description: Initialization method for the model class """ #---------------------------------------# INITIALIZATIONS #--------------------------------------------# ####### Initialization of class variables ####### self.batch_size = batch_size self.kern_list = kern_list self.likelihood = likelihood self.Y_metadata = Y_metadata ####### Heterogeneous Data ####### self.Xmulti = X self.Ymulti = Y ####### Batches of Data for Stochastic Mode ####### self.Xmulti_all, self.Ymulti_all = X, Y if batch_size is None: self.stochastic = False Xmulti_batch, Ymulti_batch = X, Y else: ####### Makes a climin slicer to make drawing minibatches much quicker ####### self.stochastic = True self.slicer_list = [] [ self.slicer_list.append( draw_mini_slices(Xmulti_task.shape[0], self.batch_size)) for Xmulti_task in self.Xmulti ] Xmulti_batch, Ymulti_batch = self.new_batch() self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch ####### Model dimensions {M, Q, D} ####### self.num_inducing = Z.shape[0] # M self.num_latent_funcs = len(kern_list) # Q self.num_output_funcs = likelihood.num_output_functions( self.Y_metadata) ####### Inducing points Z ####### self.Xdim = Z.shape[1] Z = np.tile(Z, (1, self.num_latent_funcs)) ####### Inference ####### inference_method = Inference() ####### Model class (and inherited classes) super-initialization ####### super(HetMOGP, self).__init__(X=Xmulti_batch[0][1:10], Y=Ymulti_batch[0][1:10], Z=Z, kernel=kern_list[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, Y_metadata=Y_metadata, name=name, normalizer=False) ####### Initialization of the Multi-output GP mixing ####### self.W_list, self.kappa_list = multi_output.random_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) _, self.B_list = multi_output.LCM(input_dim=self.Xdim, output_dim=self.num_output_funcs, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) ####### Initialization of Variational Parameters (q_u_means = \mu, q_u_chols = lower_triang(S)) ####### self.q_u_means = Param( 'm_u', 0 * np.random.randn(self.num_inducing, self.num_latent_funcs) + 0 * np.tile(np.random.randn(1, self.num_latent_funcs), (self.num_inducing, 1))) chols = choleskies.triang_to_flat( np.tile( np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('L_u', chols) #-----------------------------# LINKS FOR OPTIMIZABLE PARAMETERS #---------------------------------------# ####### Linking and Un-linking of parameters and hyperaparameters (for ParamZ optimizer) ####### self.unlink_parameter( self.kern) # Unlink SparseGP default param kernel self.link_parameter(self.Z, index=0) self.link_parameter(self.q_u_means) self.link_parameters(self.q_u_chols) [self.link_parameter(kern_q) for kern_q in kern_list] # link all kernels [self.link_parameter(B_q) for B_q in self.B_list] ####### EXTRA. Auxiliary variables ####### self.vem_step = True # [True=VE-step, False=VM-step] self.ve_count = 0 self.elbo = np.zeros((1, 1))
def __init__(self, X, Y, Z, kern_list, likelihood, Y_metadata, name='SVMOGP', batch_size=None): self.batch_size = batch_size self.kern_list = kern_list self.likelihood = likelihood self.Y_metadata = Y_metadata self.num_inducing = Z.shape[0] # M self.num_latent_funcs = len(kern_list) # Q self.num_output_funcs = likelihood.num_output_functions( self.Y_metadata) self.W_list, self.kappa_list = util.random_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) self.Xmulti = X self.Ymulti = Y # Batch the data self.Xmulti_all, self.Ymulti_all = X, Y if batch_size is None: self.stochastic = False Xmulti_batch, Ymulti_batch = X, Y else: # Makes a climin slicer to make drawing minibatches much quicker self.stochastic = True self.slicer_list = [] [ self.slicer_list.append( draw_mini_slices(Xmulti_task.shape[0], self.batch_size)) for Xmulti_task in self.Xmulti ] Xmulti_batch, Ymulti_batch = self.new_batch() self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch # Initialize inducing points Z #Z = kmm_init(self.X_all, self.num_inducing) self.Xdim = Z.shape[1] Z = np.tile(Z, (1, self.num_latent_funcs)) inference_method = SVMOGPInf() super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10], Y=Ymulti_batch[0][1:10], Z=Z, kernel=kern_list[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, Y_metadata=Y_metadata, name=name, normalizer=False) self.unlink_parameter( self.kern) # Unlink SparseGP default param kernel _, self.B_list = util.LCM(input_dim=self.Xdim, output_dim=self.num_output_funcs, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) # Set-up optimization parameters: [Z, m_u, L_u] self.q_u_means = Param( 'm_u', 5 * np.random.randn(self.num_inducing, self.num_latent_funcs) + np.tile(np.random.randn(1, self.num_latent_funcs), (self.num_inducing, 1))) chols = choleskies.triang_to_flat( np.tile( np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('L_u', chols) self.link_parameter(self.Z, index=0) self.link_parameter(self.q_u_means) self.link_parameters(self.q_u_chols) [self.link_parameter(kern_q) for kern_q in kern_list] # link all kernels [self.link_parameter(B_q) for B_q in self.B_list] self.vem_step = True # [True=VE-step, False=VM-step] self.ve_count = 0 self.elbo = np.zeros((1, 1))
def __init__(self, X, Y, Z, kern_list, likelihood, Y_metadata, name='SVMOGP', batch_size=None, non_chained=True): self.batch_size = batch_size self.kern_list = kern_list self.likelihood = likelihood self.Y_metadata = Y_metadata self.num_inducing = Z.shape[0] # M self.num_latent_funcs = len(kern_list) # Q self.num_output_funcs = likelihood.num_output_functions(Y_metadata) if (not non_chained): assert self.num_output_funcs == self.num_latent_funcs, "we need a latent function per likelihood parameter" if non_chained: self.W_list, self.kappa_list = util.random_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) else: self.W_list, self.kappa_list = util.Chained_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) self.Xmulti = X self.Ymulti = Y self.iAnnMulti = Y_metadata['iAnn'] # Batch the data self.Xmulti_all, self.Ymulti_all, self.iAnn_all = X, Y, Y_metadata[ 'iAnn'] if batch_size is None: #self.stochastic = False Xmulti_batch, Ymulti_batch, iAnnmulti_batch = X, Y, Y_metadata[ 'iAnn'] else: # Makes a climin slicer to make drawing minibatches much quicker #self.stochastic = False #"This was True as Pablo had it" self.slicer_list = [] [ self.slicer_list.append( draw_mini_slices(Xmulti_task.shape[0], self.batch_size)) for Xmulti_task in self.Xmulti ] Xmulti_batch, Ymulti_batch, iAnnmulti_batch = self.new_batch() self.Xmulti, self.Ymulti, self.iAnnMulti = Xmulti_batch, Ymulti_batch, iAnnmulti_batch self.Y_metadata.update(iAnn=iAnnmulti_batch) # Initialize inducing points Z #Z = kmm_init(self.X_all, self.num_inducing) self.Xdim = Z.shape[1] Z = np.tile(Z, (1, self.num_latent_funcs)) inference_method = SVMOGPInf() super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10], Y=Ymulti_batch[0][1:10], Z=Z, kernel=kern_list[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, Y_metadata=Y_metadata, name=name, normalizer=False) self.unlink_parameter( self.kern) # Unlink SparseGP default param kernel _, self.B_list = util.LCM(input_dim=self.Xdim, output_dim=self.num_output_funcs, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) # Set-up optimization parameters: [Z, m_u, L_u] self.q_u_means = Param( 'm_u', 0.0 * np.random.randn(self.num_inducing, self.num_latent_funcs) + 0.0 * np.tile(np.random.randn(1, self.num_latent_funcs), (self.num_inducing, 1))) chols = choleskies.triang_to_flat( np.tile( np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('L_u', chols) self.link_parameter(self.Z, index=0) self.link_parameter(self.q_u_means) self.link_parameters(self.q_u_chols) [self.link_parameter(kern_q) for kern_q in kern_list] # link all kernels [self.link_parameter(B_q) for B_q in self.B_list] self.vem_step = True # [True=VE-step, False=VM-step] self.ve_count = 0 self.elbo = np.zeros((1, 1)) self.index_VEM = 0 #this is a variable to index correctly the self.elbo when using VEM self.Gauss_Newton = False #This is a flag for using the Gauss-Newton approximation when dL_dV is needed