def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'): super(StateSpace, self).__init__(name=name) self.num_data, input_dim = X.shape assert input_dim == 1, "State space methods for time only" num_data_Y, self.output_dim = Y.shape assert num_data_Y == self.num_data, "X and Y data don't match" assert self.output_dim == 1, "State space methods for single outputs only" # Make sure the observations are ordered in time sort_index = np.argsort(X[:, 0]) self.X = X[sort_index] self.Y = Y[sort_index] # Noise variance self.sigma2 = Param('Gaussian_noise', sigma2) self.link_parameter(self.sigma2) # Default kernel if kernel is None: self.kern = kern.Matern32(1) else: self.kern = kernel self.link_parameter(self.kern) self.sigma2.constrain_positive() # Assert that the kernel is supported if not hasattr(self.kern, 'sde'): raise NotImplementedError( 'SDE must be implemented for the kernel being used')
def __init__(self, X, Y, kern, mu_old, Su_old, Kaa_old, Z_old, Z, likelihood=likelihoods.Gaussian(), mean_function=None): """ X is a data matrix, size N x D Y is a data matrix, size N x R Z is a matrix of pseudo inputs, size M x D kern, mean_function are appropriate gpflow objects mu_old, Su_old are mean and covariance of old q(u) Z_old is the old inducing inputs This method only works with a Gaussian likelihood. """ # X = X # Y=Y self.X = Param('input', X) self.Y = Param('output', Y) # likelihood = likelihoods.Gaussian() # GPModel.__init__(self, X, Y, kern, likelihood, mean_function) GP.__init__(self, X, Y, kern, likelihood, mean_function, inference_method=None) # GP.__init__(self, X, Y, kern, likelihood, mean_function) # SparseGP.__init__(self, X, Y, Z, kern, likelihood, mean_function, inference_method = GPy.inference.latent_function_inference.VarDTC()) # SparseGP.__init__(self, X, Y, Z, kern, likelihood, mean_function, inference_method = None) self.Z = Param('inducing inputs', Z) self.link_parameter(self.Z) self.mean_function = mean_function self.num_data = X.shape[0] self.num_latent = Y.shape[1] self.mu_old = mu_old self.M_old = Z_old.shape[0] self.Su_old = Su_old self.Kaa_old = Kaa_old self.Z_old = Z_old self.ARD = True self.grad_fun = grad(self.objective)
def __init__(self, input_dim, variance1=1., variance2=1., lengthscale1=1., lengthscale2=1., xc=1, active_dims=None): super(ChangepointRBF, self).__init__(input_dim, active_dims, 'chngpt') assert input_dim == 1, "For this kernel we assume input_dim = 1" self.variance1 = Param('variance1', variance1) self.variance2 = Param('variance2', variance2) self.lengthscale1 = Param('lengthscale1', lengthscale1) self.lengthscale2 = Param('lengthscale2', lengthscale2) self.rbf = RBF(input_dim=input_dim, lengthscale=1., variance=1.) self.xc = Param('xc', xc) self.add_parameters(self.variance1, self.variance2, self.lengthscale1, self.lengthscale2, self.xc)
def __init__(self, which, X, X_variance, Z, num_inducing, kernel): super(PsiStatModel, self).__init__(name='psi stat test') self.which = which self.X = Param("X", X) self.X_variance = Param('X_variance', X_variance, Logexp()) self.q = NormalPosterior(self.X, self.X_variance) self.Z = Param("Z", Z) self.N, self.input_dim = X.shape self.num_inducing, input_dim = Z.shape assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format( Z.shape, X.shape) self.kern = kernel self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.q) self.add_parameters(self.q, self.Z, self.kern)
def test_param(self): param = Param('test', np.arange(4 * 2).reshape(4, 2)) param[0].constrain_positive() param[1].fix() param[2].set_prior(Gaussian(0, 1)) pcopy = param.copy() self.assertListEqual(param.tolist(), pcopy.tolist()) self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n')) self.assertIsNot(param, pcopy) with tempfile.TemporaryFile('w+b') as f: pickle.dump(param, f) f.seek(0) pcopy = pickle.load(f) self.assertListEqual(param.tolist(), pcopy.tolist()) self.assertSequenceEqual(str(param), str(pcopy))
def __init__(self, param1 = 2., param2 = 3.): super(TestLikelihood, self).__init__("TestLike") self.p1 = Param('param1', param1) self.p2 = Param('param2', param2) self.link_parameter(self.p1) self.link_parameter(self.p2) self.p1.fix() self.p1.unfix() self.p2.constrain_negative() self.p1.fix() self.p2.constrain_positive() self.p2.fix() self.p2.constrain_positive()
def test_param(self): param = Param('test', np.arange(4*2).reshape(4,2)) param[0].constrain_positive() param[1].fix() param[2].set_prior(Gaussian(0,1)) pcopy = param.copy() self.assertListEqual(param.tolist(), pcopy.tolist()) self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n')) self.assertIsNot(param, pcopy) with tempfile.TemporaryFile('w+b') as f: pickle.dump(param, f) f.seek(0) pcopy = pickle.load(f) self.assertListEqual(param.tolist(), pcopy.tolist()) self.assertSequenceEqual(str(param), str(pcopy))
def __init__(self, param1=2., param2=3.): super(TestLikelihood, self).__init__("TestLike") self.p1 = Param('param1', param1) self.p2 = Param('param2', param2) self.link_parameter(self.p1) self.link_parameter(self.p2) self.p1.fix() self.p1.unfix() self.p2.constrain_negative() self.p1.fix() self.p2.constrain_positive() self.p2.fix() self.p2.constrain_positive()
def setUp(self): self.rbf = GPy.kern.RBF(20) self.white = GPy.kern.White(1) from GPy.core.parameterization import Param from GPy.core.parameterization.transformations import Logistic self.param = Param('param', np.random.uniform(0, 1, (10, 5)), Logistic(0, 1)) self.test1 = GPy.core.Parameterized("test model") self.test1.param = self.param self.test1.kern = self.rbf + self.white self.test1.link_parameter(self.test1.kern) self.test1.link_parameter(self.param, 0) # print self.test1: #============================================================================= # test_model. | Value | Constraint | Prior | Tied to # param | (25L, 2L) | {0.0,1.0} | | # add.rbf.variance | 1.0 | 0.0,1.0 +ve | | # add.rbf.lengthscale | 1.0 | 0.0,1.0 +ve | | # add.white.variance | 1.0 | 0.0,1.0 +ve | | #============================================================================= x = np.linspace(-2, 6, 4)[:, None] y = np.sin(x) self.testmodel = GPy.models.GPRegression(x, y)
def test_add_parameter_in_hierarchy(self): self.test1.kern.rbf.link_parameter( Param("NEW", np.random.rand(2), NegativeLogexp()), 1) self.assertListEqual( self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size + 1, self.param.size + 1 + 2)) self.assertListEqual( self.test1.constraints[GPy.transformations.Logistic(0, 1)].tolist(), range(self.param.size)) self.assertListEqual( self.test1.constraints[GPy.transformations.Logexp(0, 1)].tolist(), np.r_[50, 53:55].tolist())
def __init__(self, X, Y, Z, kern_list, likelihood, mean_functions=None, name='SVGPMulti', Y_metadata=None, batchsize=None): """ Extension to the SVGP to allow multiple latent function, where the latent functions are assumed independant (have one kernel per latent function) """ # super(SVGPMulti, self).__init__(name) # Parameterized.__init__(self) assert X.ndim == 2 self.Y_metadata = Y_metadata _, self.output_dim = Y.shape # self.Z = Param('inducing inputs', Z) # self.num_inducing = Z.shape[0] # self.likelihood = likelihood self.kern_list = kern_list self.batchsize = batchsize #Batch the data self.X_all, self.Y_all = X, Y if batchsize is None: X_batch, Y_batch = X, Y else: import climin.util #Make a climin slicer to make drawing minibatches much quicker self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize) X_batch, Y_batch = self.new_batch() # if isinstance(X_batch, (ObsAr, VariationalPosterior)): # self.X = X_batch.copy() # else: # self.X = ObsAr(X_batch) # self.Y = Y_batch #create the SVI inference method # self.inference_method = svgp_inf() inference_method = svgp_inf() #Initialize base model super(SVGPMulti, self).__init__(X=X_batch, Y=Y_batch, Z=Z, kernel=kern_list[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=False) self.unlink_parameter(self.kern) # We don't want a single kern # self.num_data, self.input_dim = self.X.shape self.num_outputs = self.Y.shape[1] self.num_latent_funcs = self.likelihood.request_num_latent_functions( self.Y_all) #Make a latent function per dimension self.q_u_means = Param( 'q_u_means', np.zeros((self.num_inducing, self.num_latent_funcs))) chols = choleskies.triang_to_flat( np.tile( np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('qf_u_chols', chols) self.link_parameter(self.Z, index=0) self.link_parameter(self.q_u_means) self.link_parameter(self.q_u_chols) # self.link_parameter(self.likelihood) #Must pass a list of kernels that work on each latent function for now assert len(kern_list) == self.num_latent_funcs #Add the rest of the kernels, one kernel per latent function [self.link_parameter(kern) for kern in kern_list] #self.latent_f_list = [self.mf, self.mg] #self.latent_fchol_list = [self.cholf, self.cholg] if mean_functions is None: self.mean_functions = [None] * self.num_latent_funcs elif len(mean_functions) != len(kern_list): raise ValueError("Must provide a mean function for all latent\n\ functions as a list, provide None if no latent\n\ function is needed for a specific latent function" ) else: self.mean_functions = [] for m_f in mean_functions: if m_f is not None: self.link_parameter(m_f) self.mean_functions.append(m_f)
def __init__(self, X, Y, Z, kern_list, likelihood, Y_metadata, name='SVMOGP', batch_size=None, non_chained=True): self.batch_size = batch_size self.kern_list = kern_list self.likelihood = likelihood self.Y_metadata = Y_metadata self.num_inducing = Z.shape[0] # M self.num_latent_funcs = len(kern_list) # Q self.num_output_funcs = likelihood.num_output_functions(Y_metadata) if (not non_chained): assert self.num_output_funcs == self.num_latent_funcs, "we need a latent function per likelihood parameter" if non_chained: self.W_list, self.kappa_list = util.random_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) else: self.W_list, self.kappa_list = util.Chained_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) self.Xmulti = X self.Ymulti = Y self.iAnnMulti = Y_metadata['iAnn'] # Batch the data self.Xmulti_all, self.Ymulti_all, self.iAnn_all = X, Y, Y_metadata[ 'iAnn'] if batch_size is None: #self.stochastic = False Xmulti_batch, Ymulti_batch, iAnnmulti_batch = X, Y, Y_metadata[ 'iAnn'] else: # Makes a climin slicer to make drawing minibatches much quicker #self.stochastic = False #"This was True as Pablo had it" self.slicer_list = [] [ self.slicer_list.append( draw_mini_slices(Xmulti_task.shape[0], self.batch_size)) for Xmulti_task in self.Xmulti ] Xmulti_batch, Ymulti_batch, iAnnmulti_batch = self.new_batch() self.Xmulti, self.Ymulti, self.iAnnMulti = Xmulti_batch, Ymulti_batch, iAnnmulti_batch self.Y_metadata.update(iAnn=iAnnmulti_batch) # Initialize inducing points Z #Z = kmm_init(self.X_all, self.num_inducing) self.Xdim = Z.shape[1] Z = np.tile(Z, (1, self.num_latent_funcs)) inference_method = SVMOGPInf() super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10], Y=Ymulti_batch[0][1:10], Z=Z, kernel=kern_list[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, Y_metadata=Y_metadata, name=name, normalizer=False) self.unlink_parameter( self.kern) # Unlink SparseGP default param kernel _, self.B_list = util.LCM(input_dim=self.Xdim, output_dim=self.num_output_funcs, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) # Set-up optimization parameters: [Z, m_u, L_u] self.q_u_means = Param( 'm_u', 0.0 * np.random.randn(self.num_inducing, self.num_latent_funcs) + 0.0 * np.tile(np.random.randn(1, self.num_latent_funcs), (self.num_inducing, 1))) chols = choleskies.triang_to_flat( np.tile( np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('L_u', chols) self.link_parameter(self.Z, index=0) self.link_parameter(self.q_u_means) self.link_parameters(self.q_u_chols) [self.link_parameter(kern_q) for kern_q in kern_list] # link all kernels [self.link_parameter(B_q) for B_q in self.B_list] self.vem_step = True # [True=VE-step, False=VM-step] self.ve_count = 0 self.elbo = np.zeros((1, 1)) self.index_VEM = 0 #this is a variable to index correctly the self.elbo when using VEM self.Gauss_Newton = False #This is a flag for using the Gauss-Newton approximation when dL_dV is needed
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): super(Kern_check_dK_dX, self).__init__(kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) self.X = Param('X',X) self.link_parameter(self.X)
def __init__(self, kernel=None, dL_dK=None, X=None): super(Kern_check_d2Kdiag_dXdX, self).__init__(kernel=kernel,dL_dK=dL_dK, X=X) self.X = Param('X',X) self.link_parameter(self.X) self.Xc = X.copy()
def __init__(self, X, Y, Z, kern_list, likelihood, Y_metadata, name='HetMOGP', batch_size=None): """ :param X: Input data :param Y: (Heterogeneous) Output data :param Z: Inducing inputs :param kern_list: Kernel functions of GP priors :param likelihood: (Heterogeneous) Likelihoods :param Y_metadata: Linking info between F->likelihoods :param name: Model name :param batch_size: Size of batch for stochastic optimization Description: Initialization method for the model class """ #---------------------------------------# INITIALIZATIONS #--------------------------------------------# ####### Initialization of class variables ####### self.batch_size = batch_size self.kern_list = kern_list self.likelihood = likelihood self.Y_metadata = Y_metadata ####### Heterogeneous Data ####### self.Xmulti = X self.Ymulti = Y ####### Batches of Data for Stochastic Mode ####### self.Xmulti_all, self.Ymulti_all = X, Y if batch_size is None: self.stochastic = False Xmulti_batch, Ymulti_batch = X, Y else: ####### Makes a climin slicer to make drawing minibatches much quicker ####### self.stochastic = True self.slicer_list = [] [ self.slicer_list.append( draw_mini_slices(Xmulti_task.shape[0], self.batch_size)) for Xmulti_task in self.Xmulti ] Xmulti_batch, Ymulti_batch = self.new_batch() self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch ####### Model dimensions {M, Q, D} ####### self.num_inducing = Z.shape[0] # M self.num_latent_funcs = len(kern_list) # Q self.num_output_funcs = likelihood.num_output_functions( self.Y_metadata) ####### Inducing points Z ####### self.Xdim = Z.shape[1] Z = np.tile(Z, (1, self.num_latent_funcs)) ####### Inference ####### inference_method = Inference() ####### Model class (and inherited classes) super-initialization ####### super(HetMOGP, self).__init__(X=Xmulti_batch[0][1:10], Y=Ymulti_batch[0][1:10], Z=Z, kernel=kern_list[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, Y_metadata=Y_metadata, name=name, normalizer=False) ####### Initialization of the Multi-output GP mixing ####### self.W_list, self.kappa_list = multi_output.random_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) _, self.B_list = multi_output.LCM(input_dim=self.Xdim, output_dim=self.num_output_funcs, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) ####### Initialization of Variational Parameters (q_u_means = \mu, q_u_chols = lower_triang(S)) ####### self.q_u_means = Param( 'm_u', 0 * np.random.randn(self.num_inducing, self.num_latent_funcs) + 0 * np.tile(np.random.randn(1, self.num_latent_funcs), (self.num_inducing, 1))) chols = choleskies.triang_to_flat( np.tile( np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('L_u', chols) #-----------------------------# LINKS FOR OPTIMIZABLE PARAMETERS #---------------------------------------# ####### Linking and Un-linking of parameters and hyperaparameters (for ParamZ optimizer) ####### self.unlink_parameter( self.kern) # Unlink SparseGP default param kernel self.link_parameter(self.Z, index=0) self.link_parameter(self.q_u_means) self.link_parameters(self.q_u_chols) [self.link_parameter(kern_q) for kern_q in kern_list] # link all kernels [self.link_parameter(B_q) for B_q in self.B_list] ####### EXTRA. Auxiliary variables ####### self.vem_step = True # [True=VE-step, False=VM-step] self.ve_count = 0 self.elbo = np.zeros((1, 1))
class StateSpace(Model): def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'): super(StateSpace, self).__init__(name=name) self.num_data, input_dim = X.shape assert input_dim == 1, "State space methods for time only" num_data_Y, self.output_dim = Y.shape assert num_data_Y == self.num_data, "X and Y data don't match" assert self.output_dim == 1, "State space methods for single outputs only" # Make sure the observations are ordered in time sort_index = np.argsort(X[:, 0]) self.X = X[sort_index] self.Y = Y[sort_index] # Noise variance self.sigma2 = Param('Gaussian_noise', sigma2) self.link_parameter(self.sigma2) # Default kernel if kernel is None: self.kern = kern.Matern32(1) else: self.kern = kernel self.link_parameter(self.kern) self.sigma2.constrain_positive() # Assert that the kernel is supported if not hasattr(self.kern, 'sde'): raise NotImplementedError( 'SDE must be implemented for the kernel being used') #assert self.kern.sde() not False, "This kernel is not supported for state space estimation" def parameters_changed(self): """ Parameters have now changed """ # Get the model matrices from the kernel (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde() # Use the Kalman filter to evaluate the likelihood self._log_marginal_likelihood = self.kf_likelihood( F, L, Qc, H, self.sigma2, Pinf, self.X.T, self.Y.T) gradients = self.compute_gradients() self.sigma2.gradient_full[:] = gradients[-1] self.kern.gradient_full[:] = gradients[:-1] def log_likelihood(self): return self._log_marginal_likelihood def compute_gradients(self): # Get the model matrices from the kernel (F, L, Qc, H, Pinf, dFt, dQct, dPinft) = self.kern.sde() # Allocate space for the full partial derivative matrices dF = np.zeros([dFt.shape[0], dFt.shape[1], dFt.shape[2] + 1]) dQc = np.zeros([dQct.shape[0], dQct.shape[1], dQct.shape[2] + 1]) dPinf = np.zeros( [dPinft.shape[0], dPinft.shape[1], dPinft.shape[2] + 1]) # Assign the values for the kernel function dF[:, :, :-1] = dFt dQc[:, :, :-1] = dQct dPinf[:, :, :-1] = dPinft # The sigma2 derivative dR = np.zeros([1, 1, dF.shape[2]]) dR[:, :, -1] = 1 # Calculate the likelihood gradients gradients = self.kf_likelihood_g(F, L, Qc, H, self.sigma2, Pinf, dF, dQc, dPinf, dR, self.X.T, self.Y.T) return gradients def predict_raw(self, Xnew, Ynew=None, filteronly=False): # Set defaults if Ynew is None: Ynew = self.Y # Make a single matrix containing training and testing points X = np.vstack((self.X, Xnew)) Y = np.vstack((Ynew, np.nan * np.zeros(Xnew.shape))) # Sort the matrix (save the order) _, return_index, return_inverse = np.unique(X, True, True) X = X[return_index] Y = Y[return_index] # Get the model matrices from the kernel (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde() # Run the Kalman filter (M, P) = self.kalman_filter(F, L, Qc, H, self.sigma2, Pinf, X.T, Y.T) # Run the Rauch-Tung-Striebel smoother if not filteronly: (M, P) = self.rts_smoother(F, L, Qc, X.T, M, P) # Put the data back in the original order M = M[:, return_inverse] P = P[:, :, return_inverse] # Only return the values for Xnew M = M[:, self.num_data:] P = P[:, :, self.num_data:] # Calculate the mean and variance m = H.dot(M).T V = np.tensordot(H[0], P, (0, 0)) V = np.tensordot(V, H[0], (0, 0)) V = V[:, None] # Return the posterior of the state return (m, V) def predict(self, Xnew, filteronly=False): # Run the Kalman filter to get the state (m, V) = self.predict_raw(Xnew, filteronly=filteronly) # Add the noise variance to the state variance V += self.sigma2 # Lower and upper bounds lower = m - 2 * np.sqrt(V) upper = m + 2 * np.sqrt(V) # Return mean and variance return (m, V, lower, upper) def plot(self, plot_limits=None, levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, plot_filter=False, linecol=Tango.colorsHex['darkBlue'], fillcol=Tango.colorsHex['lightBlue']): # Deal with optional parameters if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) # Define the frame on which to plot resolution = resolution or 200 Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) # Make a prediction on the frame and plot it if plot_raw: m, v = self.predict_raw(Xgrid, filteronly=plot_filter) lower = m - 2 * np.sqrt(v) upper = m + 2 * np.sqrt(v) Y = self.Y else: m, v, lower, upper = self.predict(Xgrid, filteronly=plot_filter) Y = self.Y # Plot the values gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol) ax.plot(self.X, self.Y, 'kx', mew=1.5) # Optionally plot some samples if samples: if plot_raw: Ysim = self.posterior_samples_f(Xgrid, samples) else: Ysim = self.posterior_samples(Xgrid, samples) for yi in Ysim.T: ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25) # Set the limits of the plot to some sensible values ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max( np.append(Y.flatten(), upper.flatten())) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) def prior_samples_f(self, X, size=10): # Sort the matrix (save the order) (_, return_index, return_inverse) = np.unique(X, True, True) X = X[return_index] # Get the model matrices from the kernel (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde() # Allocate space for results Y = np.empty((size, X.shape[0])) # Simulate random draws #for j in range(0,size): # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T)) Y = self.simulate(F, L, Qc, Pinf, X.T, size) # Only observations Y = np.tensordot(H[0], Y, (0, 0)) # Reorder simulated values Y = Y[:, return_inverse] # Return trajectory return Y.T def posterior_samples_f(self, X, size=10): # Sort the matrix (save the order) (_, return_index, return_inverse) = np.unique(X, True, True) X = X[return_index] # Get the model matrices from the kernel (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde() # Run smoother on original data (m, V) = self.predict_raw(X) # Simulate random draws from the GP prior y = self.prior_samples_f(np.vstack((self.X, X)), size) # Allocate space for sample trajectories Y = np.empty((size, X.shape[0])) # Run the RTS smoother on each of these values for j in range(0, size): yobs = y[0:self.num_data, j:j + 1] + np.sqrt( self.sigma2) * np.random.randn(self.num_data, 1) (m2, V2) = self.predict_raw(X, Ynew=yobs) Y[j, :] = m.T + y[self.num_data:, j].T - m2.T # Reorder simulated values Y = Y[:, return_inverse] # Return posterior sample trajectories return Y.T def posterior_samples(self, X, size=10): # Make samples of f Y = self.posterior_samples_f(X, size) # Add noise Y += np.sqrt(self.sigma2) * np.random.randn(Y.shape[0], Y.shape[1]) # Return trajectory return Y def kalman_filter(self, F, L, Qc, H, R, Pinf, X, Y): # KALMAN_FILTER - Run the Kalman filter for a given model and data # Allocate space for results MF = np.empty((F.shape[0], Y.shape[1])) PF = np.empty((F.shape[0], F.shape[0], Y.shape[1])) # Initialize MF[:, -1] = np.zeros(F.shape[0]) PF[:, :, -1] = Pinf.copy() # Time step lengths dt = np.empty(X.shape) dt[:, 0] = X[:, 1] - X[:, 0] dt[:, 1:] = np.diff(X) # Solve the LTI SDE for these time steps As, Qs, index = self.lti_disc(F, L, Qc, dt) # Kalman filter for k in range(0, Y.shape[1]): # Form discrete-time model #(A, Q) = self.lti_disc(F,L,Qc,dt[:,k]) A = As[:, :, index[k]] Q = Qs[:, :, index[k]] # Prediction step MF[:, k] = A.dot(MF[:, k - 1]) PF[:, :, k] = A.dot(PF[:, :, k - 1]).dot(A.T) + Q # Update step (only if there is data) if not np.isnan(Y[:, k]): if Y.shape[0] == 1: K = PF[:, :, k].dot( H.T) / (H.dot(PF[:, :, k]).dot(H.T) + R) else: LL = linalg.cho_factor(H.dot(PF[:, :, k]).dot(H.T) + R) K = linalg.cho_solve(LL, H.dot(PF[:, :, k].T)).T MF[:, k] += K.dot(Y[:, k] - H.dot(MF[:, k])) PF[:, :, k] -= K.dot(H).dot(PF[:, :, k]) # Return values return (MF, PF) def rts_smoother(self, F, L, Qc, X, MS, PS): # RTS_SMOOTHER - Run the RTS smoother for a given model and data # Time step lengths dt = np.empty(X.shape) dt[:, 0] = X[:, 1] - X[:, 0] dt[:, 1:] = np.diff(X) # Solve the LTI SDE for these time steps As, Qs, index = self.lti_disc(F, L, Qc, dt) # Sequentially smooth states starting from the end for k in range(2, X.shape[1] + 1): # Form discrete-time model #(A, Q) = self.lti_disc(F,L,Qc,dt[:,1-k]) A = As[:, :, index[1 - k]] Q = Qs[:, :, index[1 - k]] # Smoothing step LL = linalg.cho_factor(A.dot(PS[:, :, -k]).dot(A.T) + Q) G = linalg.cho_solve(LL, A.dot(PS[:, :, -k])).T MS[:, -k] += G.dot(MS[:, 1 - k] - A.dot(MS[:, -k])) PS[:, :, -k] += G.dot(PS[:, :, 1 - k] - A.dot(PS[:, :, -k]).dot(A.T) - Q).dot(G.T) # Return return (MS, PS) def kf_likelihood(self, F, L, Qc, H, R, Pinf, X, Y): # Evaluate marginal likelihood # Initialize lik = 0 m = np.zeros((F.shape[0], 1)) P = Pinf.copy() # Time step lengths dt = np.empty(X.shape) dt[:, 0] = X[:, 1] - X[:, 0] dt[:, 1:] = np.diff(X) # Solve the LTI SDE for these time steps As, Qs, index = self.lti_disc(F, L, Qc, dt) # Kalman filter for likelihood evaluation for k in range(0, Y.shape[1]): # Form discrete-time model #(A,Q) = self.lti_disc(F,L,Qc,dt[:,k]) A = As[:, :, index[k]] Q = Qs[:, :, index[k]] # Prediction step m = A.dot(m) P = A.dot(P).dot(A.T) + Q # Update step only if there is data if not np.isnan(Y[:, k]): v = Y[:, k] - H.dot(m) if Y.shape[0] == 1: S = H.dot(P).dot(H.T) + R K = P.dot(H.T) / S lik -= 0.5 * np.log(S) lik -= 0.5 * v.shape[0] * np.log(2 * np.pi) lik -= 0.5 * v * v / S else: LL, isupper = linalg.cho_factor(H.dot(P).dot(H.T) + R) lik -= np.sum(np.log(np.diag(LL))) lik -= 0.5 * v.shape[0] * np.log(2 * np.pi) lik -= 0.5 * linalg.cho_solve((LL, isupper), v).dot(v) K = linalg.cho_solve((LL, isupper), H.dot(P.T)).T m += K.dot(v) P -= K.dot(H).dot(P) # Return likelihood return lik[0, 0] def kf_likelihood_g(self, F, L, Qc, H, R, Pinf, dF, dQc, dPinf, dR, X, Y): # Evaluate marginal likelihood gradient # State dimension, number of data points and number of parameters n = F.shape[0] steps = Y.shape[1] nparam = dF.shape[2] # Time steps t = X.squeeze() # Allocate space e = 0 eg = np.zeros(nparam) # Set up m = np.zeros([n, 1]) P = Pinf.copy() dm = np.zeros([n, nparam]) dP = dPinf.copy() mm = m.copy() PP = P.copy() # Initial dt dt = -np.Inf # Allocate space for expm results AA = np.zeros([2 * n, 2 * n, nparam]) FF = np.zeros([2 * n, 2 * n]) # Loop over all observations for k in range(0, steps): # The previous time step dt_old = dt # The time discretization step length if k > 0: dt = t[k] - t[k - 1] else: dt = 0 # Loop through all parameters (Kalman filter prediction step) for j in range(0, nparam): # Should we recalculate the matrix exponential? if abs(dt - dt_old) > 1e-9: # The first matrix for the matrix factor decomposition FF[:n, :n] = F FF[n:, :n] = dF[:, :, j] FF[n:, n:] = F # Solve the matrix exponential AA[:, :, j] = linalg.expm3(FF * dt) # Solve the differential equation foo = AA[:, :, j].dot(np.vstack([m, dm[:, j:j + 1]])) mm = foo[:n, :] dm[:, j:j + 1] = foo[n:, :] # The discrete-time dynamical model if j == 0: A = AA[:n, :n, j] Q = Pinf - A.dot(Pinf).dot(A.T) PP = A.dot(P).dot(A.T) + Q # The derivatives of A and Q dA = AA[n:, :n, j] dQ = dPinf[:,:,j] - dA.dot(Pinf).dot(A.T) \ - A.dot(dPinf[:,:,j]).dot(A.T) - A.dot(Pinf).dot(dA.T) # The derivatives of P dP[:,:,j] = dA.dot(P).dot(A.T) + A.dot(dP[:,:,j]).dot(A.T) \ + A.dot(P).dot(dA.T) + dQ # Set predicted m and P m = mm P = PP # Start the Kalman filter update step and precalculate variables S = H.dot(P).dot(H.T) + R # We should calculate the Cholesky factor if S is a matrix # [LS,notposdef] = chol(S,'lower'); # The Kalman filter update (S is scalar) HtiS = H.T / S iS = 1 / S K = P.dot(HtiS) v = Y[:, k] - H.dot(m) vtiS = v.T / S # Loop through all parameters (Kalman filter update step derivative) for j in range(0, nparam): # Innovation covariance derivative dS = H.dot(dP[:, :, j]).dot(H.T) + dR[:, :, j] # Evaluate the energy derivative for j eg[j] = eg[j] \ - .5*np.sum(iS*dS) \ + .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \ + .5*vtiS.dot(dS).dot(vtiS.T) \ + .5*vtiS.dot(H.dot(dm[:,j:j+1])) # Kalman filter update step derivatives dK = dP[:, :, j].dot(HtiS) - P.dot(HtiS).dot(dS) / S dm[:, j:j + 1] = dm[:, j:j + 1] + dK.dot(v) - K.dot(H).dot( dm[:, j:j + 1]) dKSKt = dK.dot(S).dot(K.T) dP[:, :, j] = dP[:, :, j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T # Evaluate the energy # e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v); e = e - .5 * S.shape[0] * np.log(2 * np.pi) - np.sum( np.log(np.sqrt(S))) - .5 * vtiS.dot(v) # Finish Kalman filter update step m = m + K.dot(v) P = P - K.dot(S).dot(K.T) # Make sure the covariances stay symmetric P = (P + P.T) / 2 dP = (dP + dP.transpose([1, 0, 2])) / 2 # raise NameError('Debug me') # Return the gradient return eg def kf_likelihood_g_notstable(self, F, L, Qc, H, R, Pinf, dF, dQc, dPinf, dR, X, Y): # Evaluate marginal likelihood gradient # State dimension, number of data points and number of parameters steps = Y.shape[1] nparam = dF.shape[2] n = F.shape[0] # Time steps t = X.squeeze() # Allocate space e = 0 eg = np.zeros(nparam) # Set up Z = np.zeros(F.shape) QC = L.dot(Qc).dot(L.T) m = np.zeros([n, 1]) P = Pinf.copy() dm = np.zeros([n, nparam]) dP = dPinf.copy() mm = m.copy() PP = P.copy() # % Initial dt dt = -np.Inf # Allocate space for expm results AA = np.zeros([2 * F.shape[0], 2 * F.shape[0], nparam]) AAA = np.zeros([4 * F.shape[0], 4 * F.shape[0], nparam]) FF = np.zeros([2 * F.shape[0], 2 * F.shape[0]]) FFF = np.zeros([4 * F.shape[0], 4 * F.shape[0]]) # Loop over all observations for k in range(0, steps): # The previous time step dt_old = dt # The time discretization step length if k > 0: dt = t[k] - t[k - 1] else: dt = t[1] - t[0] # Loop through all parameters (Kalman filter prediction step) for j in range(0, nparam): # Should we recalculate the matrix exponential? if abs(dt - dt_old) > 1e-9: # The first matrix for the matrix factor decomposition FF[:n, :n] = F FF[n:, :n] = dF[:, :, j] FF[n:, n:] = F # Solve the matrix exponential AA[:, :, j] = linalg.expm3(FF * dt) # Solve using matrix fraction decomposition foo = AA[:, :, j].dot(np.vstack([m, dm[:, j:j + 1]])) # Pick the parts mm = foo[:n, :] dm[:, j:j + 1] = foo[n:, :] # Should we recalculate the matrix exponential? if abs(dt - dt_old) > 1e-9: # Define W and G W = L.dot(dQc[:, :, j]).dot(L.T) G = dF[:, :, j] # The second matrix for the matrix factor decomposition FFF[:n, :n] = F FFF[2 * n:-n, :n] = G FFF[:n, n:2 * n] = QC FFF[n:2 * n, n:2 * n] = -F.T FFF[2 * n:-n, n:2 * n] = W FFF[-n:, n:2 * n] = -G.T FFF[2 * n:-n, 2 * n:-n] = F FFF[2 * n:-n, -n:] = QC FFF[-n:, -n:] = -F.T # Solve the matrix exponential AAA[:, :, j] = linalg.expm3(FFF * dt) # Solve using matrix fraction decomposition foo = AAA[:, :, j].dot( np.vstack([P, np.eye(n), dP[:, :, j], np.zeros([n, n])])) # Pick the parts C = foo[:n, :] D = foo[n:2 * n, :] dC = foo[2 * n:-n, :] dD = foo[-n:, :] # The prediction step covariance (PP = C/D) if j == 0: PP = linalg.solve(D.T, C.T).T PP = (PP + PP.T) / 2 # Sove dP for j (C/D == P_{k|k-1}) dP[:, :, j] = linalg.solve(D.T, (dC - PP.dot(dD)).T).T # Set predicted m and P m = mm P = PP # Start the Kalman filter update step and precalculate variables S = H.dot(P).dot(H.T) + R # We should calculate the Cholesky factor if S is a matrix # [LS,notposdef] = chol(S,'lower'); # The Kalman filter update (S is scalar) HtiS = H.T / S iS = 1 / S K = P.dot(HtiS) v = Y[:, k] - H.dot(m) vtiS = v.T / S # Loop through all parameters (Kalman filter update step derivative) for j in range(0, nparam): # Innovation covariance derivative dS = H.dot(dP[:, :, j]).dot(H.T) + dR[:, :, j] # Evaluate the energy derivative for j eg[j] = eg[j] \ - .5*np.sum(iS*dS) \ + .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \ + .5*vtiS.dot(dS).dot(vtiS.T) \ + .5*vtiS.dot(H.dot(dm[:,j:j+1])) # Kalman filter update step derivatives dK = dP[:, :, j].dot(HtiS) - P.dot(HtiS).dot(dS) / S dm[:, j:j + 1] = dm[:, j:j + 1] + dK.dot(v) - K.dot(H).dot( dm[:, j:j + 1]) dKSKt = dK.dot(S).dot(K.T) dP[:, :, j] = dP[:, :, j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T # Evaluate the energy # e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v); e = e - .5 * S.shape[0] * np.log(2 * np.pi) - np.sum( np.log(np.sqrt(S))) - .5 * vtiS.dot(v) # Finish Kalman filter update step m = m + K.dot(v) P = P - K.dot(S).dot(K.T) # Make sure the covariances stay symmetric P = (P + P.T) / 2 dP = (dP + dP.transpose([1, 0, 2])) / 2 # raise NameError('Debug me') # Report #print e #print eg # Return the gradient return eg def simulate(self, F, L, Qc, Pinf, X, size=1): # Simulate a trajectory using the state space model # Allocate space for results f = np.zeros((F.shape[0], size, X.shape[1])) # Initial state f[:, :, 1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0], size)) # Time step lengths dt = np.empty(X.shape) dt[:, 0] = X[:, 1] - X[:, 0] dt[:, 1:] = np.diff(X) # Solve the LTI SDE for these time steps As, Qs, index = self.lti_disc(F, L, Qc, dt) # Sweep through remaining time points for k in range(1, X.shape[1]): # Form discrete-time model A = As[:, :, index[1 - k]] Q = Qs[:, :, index[1 - k]] # Draw the state f[:, :, k] = A.dot(f[:, :, k - 1]) + np.dot( np.linalg.cholesky(Q), np.random.randn(A.shape[0], size)) # Return values return f def lti_disc(self, F, L, Qc, dt): # Discrete-time solution to the LTI SDE # Dimensionality n = F.shape[0] index = 0 # Check for numbers of time steps if dt.flatten().shape[0] == 1: # The covariance matrix by matrix fraction decomposition Phi = np.zeros((2 * n, 2 * n)) Phi[:n, :n] = F Phi[:n, n:] = L.dot(Qc).dot(L.T) Phi[n:, n:] = -F.T AB = linalg.expm(Phi * dt).dot( np.vstack((np.zeros((n, n)), np.eye(n)))) Q = linalg.solve(AB[n:, :].T, AB[:n, :].T) # The dynamical model A = linalg.expm(F * dt) # Return return A, Q # Optimize for cases where time steps occur repeatedly else: # Time discretizations (round to 14 decimals to avoid problems) dt, _, index = np.unique(np.round(dt, 14), True, True) # Allocate space for A and Q A = np.empty((n, n, dt.shape[0])) Q = np.empty((n, n, dt.shape[0])) # Call this function for each dt for j in range(0, dt.shape[0]): A[:, :, j], Q[:, :, j] = self.lti_disc(F, L, Qc, dt[j]) # Return return A, Q, index
def __init__(self, X, Y, Z, kern_list, likelihood, Y_metadata, name='SVMOGP', batch_size=None): self.batch_size = batch_size self.kern_list = kern_list self.likelihood = likelihood self.Y_metadata = Y_metadata self.num_inducing = Z.shape[0] # M self.num_latent_funcs = len(kern_list) # Q self.num_output_funcs = likelihood.num_output_functions( self.Y_metadata) self.W_list, self.kappa_list = util.random_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) self.Xmulti = X self.Ymulti = Y # Batch the data self.Xmulti_all, self.Ymulti_all = X, Y if batch_size is None: self.stochastic = False Xmulti_batch, Ymulti_batch = X, Y else: # Makes a climin slicer to make drawing minibatches much quicker self.stochastic = True self.slicer_list = [] [ self.slicer_list.append( draw_mini_slices(Xmulti_task.shape[0], self.batch_size)) for Xmulti_task in self.Xmulti ] Xmulti_batch, Ymulti_batch = self.new_batch() self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch # Initialize inducing points Z #Z = kmm_init(self.X_all, self.num_inducing) self.Xdim = Z.shape[1] Z = np.tile(Z, (1, self.num_latent_funcs)) inference_method = SVMOGPInf() super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10], Y=Ymulti_batch[0][1:10], Z=Z, kernel=kern_list[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, Y_metadata=Y_metadata, name=name, normalizer=False) self.unlink_parameter( self.kern) # Unlink SparseGP default param kernel _, self.B_list = util.LCM(input_dim=self.Xdim, output_dim=self.num_output_funcs, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) # Set-up optimization parameters: [Z, m_u, L_u] self.q_u_means = Param( 'm_u', 5 * np.random.randn(self.num_inducing, self.num_latent_funcs) + np.tile(np.random.randn(1, self.num_latent_funcs), (self.num_inducing, 1))) chols = choleskies.triang_to_flat( np.tile( np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('L_u', chols) self.link_parameter(self.Z, index=0) self.link_parameter(self.q_u_means) self.link_parameters(self.q_u_chols) [self.link_parameter(kern_q) for kern_q in kern_list] # link all kernels [self.link_parameter(B_q) for B_q in self.B_list] self.vem_step = True # [True=VE-step, False=VM-step] self.ve_count = 0 self.elbo = np.zeros((1, 1))
def __init__(self, variance, degree=2, name='parabola'): super(Parabola, self).__init__(1, 1, name) self.variance = Param('variance', np.ones(degree + 1) * variance) self.degree = degree self.link_parameter(self.variance)
def __init__(self, name=None, parameters=[], *a, **kw): super(Test, self).__init__(name=name) self.x = Param('x', np.random.uniform(0, 1, (3, 4))) self.x[0].constrain_bounded(0, 1) self.link_parameter(self.x) self.x[1].fix()
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): Kern_check_model.__init__(self, kernel=kernel, dL_dK=dL_dK, X=X, X2=X2) self.X = Param('X', X) self.link_parameter(self.X)
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', normalizer=None, missing_data=False, stochastic=False, batchsize=1): self.logger = logging.getLogger(self.__class__.__name__) if X is None: from ..util.initialization import initialize_latent self.logger.info( "initializing latent space X with method {}".format(init)) X, fracs = initialize_latent(init, input_dim, Y) else: fracs = np.ones(input_dim) self.init = init if Z is None: self.logger.info("initializing inducing inputs") Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] if X_variance == False: self.logger.info('no variance on X, activating sparse GPLVM') X = Param("latent space", X) elif X_variance is None: self.logger.info( "initializing latent space variance ~ uniform(0,.1)") X_variance = np.random.uniform(0, .1, X.shape) self.variational_prior = NormalPrior() X = NormalPosterior(X, X_variance) if kernel is None: self.logger.info("initializing kernel RBF") kernel = kern.RBF( input_dim, lengthscale=1. / fracs, ARD=True) #+ kern.Bias(input_dim) + kern.White(input_dim) if likelihood is None: likelihood = Gaussian() self.kl_factr = 1. if inference_method is None: from ..inference.latent_function_inference.var_dtc import VarDTC self.logger.debug("creating inference_method var_dtc") inference_method = VarDTC( limit=1 if not missing_data else Y.shape[1]) if kernel.useGPU and isinstance(inference_method, VarDTC_GPU): kernel.psicomp.GPU_direct = True super(BayesianGPLVMMiniBatch, self).__init__(X, Y, Z, kernel, likelihood=likelihood, name=name, inference_method=inference_method, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize) self.X = X self.link_parameter(self.X, 0)
def __init__(self, X, Y, Z, kern_list_uq, kern_list_Gx, kern_list_Tq, likelihood, Y_metadata, name='ConvHetMOGP_VIK', batch_size=None): self.batch_size = batch_size self.kern_list = kern_list_uq self.likelihood = likelihood self.Y_metadata = Y_metadata self.kern_list_Gdj = kern_list_Gx self.kern_list_Tq = kern_list_Tq self.num_inducing = Z.shape[0] # M self.num_latent_funcs = len(kern_list_uq) # Q self.num_output_funcs = likelihood.num_output_functions( self.Y_metadata) #This is the number J in the paper self.W_list, self.kappa_list = util.random_W_kappas( self.num_latent_funcs, self.num_output_funcs, rank=1) check_ARD_uq = [kern.lengthscale.shape[0] > 1 for kern in kern_list_uq] check_ARD_Gx = [ kern.lengthscale.shape[0] > 1 for kern in kern_list_Gx ] # This is just to verify Automatic Relevance Determination check_ARD_Tq = [kern.lengthscale.shape[0] > 1 for kern in kern_list_Tq] if (sum(check_ARD_uq) == 0) and (sum(check_ARD_Gx) == 0) and (sum(check_ARD_Tq) == 0): isARD = False elif (sum(check_ARD_uq) == check_ARD_uq.__len__()) and ( sum(check_ARD_Gx) == check_ARD_Gx.__len__()) and (sum(check_ARD_Tq) == check_ARD_Tq.__len__()): isARD = True else: print( '\nAll kernel_lists for Uq, Gx and Tx have to coincide in Automatic Relevance Determination,' ) print('All kernel_lists have to coincide: ARD=True or ARD=False\n') assert (sum(check_ARD_uq) == check_ARD_uq.__len__()) and ( sum(check_ARD_Gx) == check_ARD_Gx.__len__()) and (sum(check_ARD_Tq) == check_ARD_Tq.__len__()) self.kern_aux = GPy.kern.RBF( input_dim=Z.shape[1], lengthscale=1.0, variance=1.0, name='rbf_aux', ARD=isARD) + GPy.kern.White(input_dim=Z.shape[1]) self.kern_aux.white.variance = 1e-6 self.Xmulti = X self.Ymulti = Y # Batch the data self.Xmulti_all, self.Ymulti_all = X, Y if batch_size is None: #self.stochastic = False Xmulti_batch, Ymulti_batch = X, Y else: # Makes a climin slicer to make drawing minibatches much quicker #self.stochastic = False #"This was True as Pablo had it" self.slicer_list = [] [ self.slicer_list.append( draw_mini_slices(Xmulti_task.shape[0], self.batch_size)) for Xmulti_task in self.Xmulti ] Xmulti_batch, Ymulti_batch = self.new_batch() self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch # Initialize inducing points Z self.Xdim = Z.shape[1] Z = np.tile(Z, (1, self.num_latent_funcs)) inference_method = SVMOGPInf() super(ConvHetMOGP_VIK, self).__init__(X=Xmulti_batch[0][1:10], Y=Ymulti_batch[0][1:10], Z=Z, kernel=kern_list_uq[0], likelihood=likelihood, mean_function=None, X_variance=None, inference_method=inference_method, Y_metadata=Y_metadata, name=name, normalizer=False) self.unlink_parameter( self.kern) # Unlink SparseGP default param kernel _, self.B_list = util.LCM(input_dim=self.Xdim, output_dim=self.num_output_funcs, rank=1, kernels_list=self.kern_list, W_list=self.W_list, kappa_list=self.kappa_list) # Set-up optimization parameters: [Z, m_u, L_u] self.q_u_means = [ Param( 'm_u' + str(dj), 10.0 * np.random.randn(self.num_inducing, self.num_latent_funcs) + 10.0 * np.tile(np.random.randn(1, self.num_latent_funcs), (self.num_inducing, 1))) for dj in range(self.num_output_funcs) ] chols = choleskies.triang_to_flat( np.tile(3 * np.eye(self.num_inducing)[None, :, :], (self.num_latent_funcs, 1, 1))) self.q_u_chols = Param('L_u', chols) self.link_parameter(self.Z, index=0) [self.link_parameter(q_u_means) for q_u_means in self.q_u_means] self.link_parameters(self.q_u_chols) [self.link_parameter(kern_q) for kern_q in kern_list_uq] # link all kernels [self.link_parameter(B_q) for B_q in self.B_list] [self.link_parameter(kern_list_Gjd) for kern_list_Gjd in kern_list_Gx] [self.link_parameter(kern_list_Tq) for kern_list_Tq in kern_list_Tq] #self.link_parameter(self.kern_aux.white.variance) self.vem_step = True # [True=VE-step, False=VM-step] self.ve_count = 0 self.elbo = np.zeros((1, 1)) self.index_VEM = 0 #this is a variable to index correctly the self.elbo when using VEM self.Gauss_Newton = False #This is a flag for using the Gauss-Newton approximation when dL_dV is needed for kern_q in self.kern_list: kern_q.variance = 1.0 kern_q.variance.fix() for kern_Gjd in self.kern_list_Gdj: kern_Gjd.variance = 1.0 kern_Gjd.variance.fix() #print('IN fix Gdj') for kern_Tq in self.kern_list_Tq: kern_Tq.variance = 1.0 kern_Tq.variance.fix()
def __init__(self, input_dim, alp=1.0, bet=1.0, active_dims=None): super(ExpKernel, self).__init__(input_dim, active_dims, 'exp kernel') assert input_dim == 1, "For this kernel we assume input_dim=1" self.alp = Param("alp", alp) self.bet = Param("bet", bet) self.link_parameters(self.alp, self.bet)