Exemple #1
0
    def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
        super(StateSpace, self).__init__(name=name)
        self.num_data, input_dim = X.shape
        assert input_dim == 1, "State space methods for time only"
        num_data_Y, self.output_dim = Y.shape
        assert num_data_Y == self.num_data, "X and Y data don't match"
        assert self.output_dim == 1, "State space methods for single outputs only"

        # Make sure the observations are ordered in time
        sort_index = np.argsort(X[:, 0])
        self.X = X[sort_index]
        self.Y = Y[sort_index]

        # Noise variance
        self.sigma2 = Param('Gaussian_noise', sigma2)
        self.link_parameter(self.sigma2)

        # Default kernel
        if kernel is None:
            self.kern = kern.Matern32(1)
        else:
            self.kern = kernel
        self.link_parameter(self.kern)

        self.sigma2.constrain_positive()

        # Assert that the kernel is supported
        if not hasattr(self.kern, 'sde'):
            raise NotImplementedError(
                'SDE must be implemented for the kernel being used')
Exemple #2
0
    def __init__(self,
                 X,
                 Y,
                 kern,
                 mu_old,
                 Su_old,
                 Kaa_old,
                 Z_old,
                 Z,
                 likelihood=likelihoods.Gaussian(),
                 mean_function=None):
        """
        X is a data matrix, size N x D
        Y is a data matrix, size N x R
        Z is a matrix of pseudo inputs, size M x D
        kern, mean_function are appropriate gpflow objects
        mu_old, Su_old are mean and covariance of old q(u)
        Z_old is the old inducing inputs
        This method only works with a Gaussian likelihood.
        """

        #        X = X
        #        Y=Y

        self.X = Param('input', X)
        self.Y = Param('output', Y)

        # likelihood = likelihoods.Gaussian()
        #        GPModel.__init__(self, X, Y, kern, likelihood, mean_function)
        GP.__init__(self,
                    X,
                    Y,
                    kern,
                    likelihood,
                    mean_function,
                    inference_method=None)
        #        GP.__init__(self, X, Y, kern, likelihood, mean_function)

        #        SparseGP.__init__(self, X, Y, Z, kern, likelihood, mean_function, inference_method = GPy.inference.latent_function_inference.VarDTC())
        #        SparseGP.__init__(self, X, Y, Z, kern, likelihood, mean_function, inference_method = None)

        self.Z = Param('inducing inputs', Z)
        self.link_parameter(self.Z)
        self.mean_function = mean_function
        self.num_data = X.shape[0]
        self.num_latent = Y.shape[1]

        self.mu_old = mu_old
        self.M_old = Z_old.shape[0]
        self.Su_old = Su_old
        self.Kaa_old = Kaa_old
        self.Z_old = Z_old
        self.ARD = True
        self.grad_fun = grad(self.objective)
Exemple #3
0
 def __init__(self, input_dim, 
              variance1=1., variance2=1., lengthscale1=1., lengthscale2=1., xc=1, 
              active_dims=None):
     super(ChangepointRBF, self).__init__(input_dim, active_dims, 'chngpt')
     assert input_dim == 1, "For this kernel we assume input_dim = 1"
     self.variance1 = Param('variance1', variance1)
     self.variance2 = Param('variance2', variance2)
     self.lengthscale1 = Param('lengthscale1', lengthscale1)
     self.lengthscale2 = Param('lengthscale2', lengthscale2)
     self.rbf = RBF(input_dim=input_dim, lengthscale=1., variance=1.)
     self.xc = Param('xc', xc)
     self.add_parameters(self.variance1, self.variance2, self.lengthscale1, self.lengthscale2, self.xc)
 def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
     super(PsiStatModel, self).__init__(name='psi stat test')
     self.which = which
     self.X = Param("X", X)
     self.X_variance = Param('X_variance', X_variance, Logexp())
     self.q = NormalPosterior(self.X, self.X_variance)
     self.Z = Param("Z", Z)
     self.N, self.input_dim = X.shape
     self.num_inducing, input_dim = Z.shape
     assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(
         Z.shape, X.shape)
     self.kern = kernel
     self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.q)
     self.add_parameters(self.q, self.Z, self.kern)
Exemple #5
0
 def test_param(self):
     param = Param('test', np.arange(4 * 2).reshape(4, 2))
     param[0].constrain_positive()
     param[1].fix()
     param[2].set_prior(Gaussian(0, 1))
     pcopy = param.copy()
     self.assertListEqual(param.tolist(), pcopy.tolist())
     self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n'))
     self.assertIsNot(param, pcopy)
     with tempfile.TemporaryFile('w+b') as f:
         pickle.dump(param, f)
         f.seek(0)
         pcopy = pickle.load(f)
     self.assertListEqual(param.tolist(), pcopy.tolist())
     self.assertSequenceEqual(str(param), str(pcopy))
            def __init__(self, param1 = 2., param2 = 3.):
                super(TestLikelihood, self).__init__("TestLike")
                self.p1 = Param('param1', param1)
                self.p2 = Param('param2', param2)

                self.link_parameter(self.p1)
                self.link_parameter(self.p2)

                self.p1.fix()
                self.p1.unfix()
                self.p2.constrain_negative()
                self.p1.fix()
                self.p2.constrain_positive()
                self.p2.fix()
                self.p2.constrain_positive()
Exemple #7
0
 def test_param(self):
     param = Param('test', np.arange(4*2).reshape(4,2))
     param[0].constrain_positive()
     param[1].fix()
     param[2].set_prior(Gaussian(0,1))
     pcopy = param.copy()
     self.assertListEqual(param.tolist(), pcopy.tolist())
     self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n'))
     self.assertIsNot(param, pcopy)
     with tempfile.TemporaryFile('w+b') as f:
         pickle.dump(param, f)
         f.seek(0)
         pcopy = pickle.load(f)
     self.assertListEqual(param.tolist(), pcopy.tolist())
     self.assertSequenceEqual(str(param), str(pcopy))
Exemple #8
0
            def __init__(self, param1=2., param2=3.):
                super(TestLikelihood, self).__init__("TestLike")
                self.p1 = Param('param1', param1)
                self.p2 = Param('param2', param2)

                self.link_parameter(self.p1)
                self.link_parameter(self.p2)

                self.p1.fix()
                self.p1.unfix()
                self.p2.constrain_negative()
                self.p1.fix()
                self.p2.constrain_positive()
                self.p2.fix()
                self.p2.constrain_positive()
Exemple #9
0
    def setUp(self):
        self.rbf = GPy.kern.RBF(20)
        self.white = GPy.kern.White(1)
        from GPy.core.parameterization import Param
        from GPy.core.parameterization.transformations import Logistic
        self.param = Param('param', np.random.uniform(0, 1, (10, 5)),
                           Logistic(0, 1))

        self.test1 = GPy.core.Parameterized("test model")
        self.test1.param = self.param
        self.test1.kern = self.rbf + self.white
        self.test1.link_parameter(self.test1.kern)
        self.test1.link_parameter(self.param, 0)

        # print self.test1:
        #=============================================================================
        # test_model.          |    Value    |  Constraint   |  Prior  |  Tied to
        # param                |  (25L, 2L)  |   {0.0,1.0}   |         |
        # add.rbf.variance     |        1.0  |  0.0,1.0 +ve  |         |
        # add.rbf.lengthscale  |        1.0  |  0.0,1.0 +ve  |         |
        # add.white.variance   |        1.0  |  0.0,1.0 +ve  |         |
        #=============================================================================

        x = np.linspace(-2, 6, 4)[:, None]
        y = np.sin(x)
        self.testmodel = GPy.models.GPRegression(x, y)
Exemple #10
0
 def test_add_parameter_in_hierarchy(self):
     self.test1.kern.rbf.link_parameter(
         Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
     self.assertListEqual(
         self.test1.constraints[NegativeLogexp()].tolist(),
         range(self.param.size + 1, self.param.size + 1 + 2))
     self.assertListEqual(
         self.test1.constraints[GPy.transformations.Logistic(0,
                                                             1)].tolist(),
         range(self.param.size))
     self.assertListEqual(
         self.test1.constraints[GPy.transformations.Logexp(0, 1)].tolist(),
         np.r_[50, 53:55].tolist())
Exemple #11
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kern_list,
                 likelihood,
                 mean_functions=None,
                 name='SVGPMulti',
                 Y_metadata=None,
                 batchsize=None):
        """
        Extension to the SVGP to allow multiple latent function,
        where the latent functions are assumed independant (have one kernel per latent function)
        """
        # super(SVGPMulti, self).__init__(name)  # Parameterized.__init__(self)

        assert X.ndim == 2
        self.Y_metadata = Y_metadata
        _, self.output_dim = Y.shape

        # self.Z = Param('inducing inputs', Z)
        # self.num_inducing = Z.shape[0]
        # self.likelihood = likelihood

        self.kern_list = kern_list
        self.batchsize = batchsize

        #Batch the data
        self.X_all, self.Y_all = X, Y
        if batchsize is None:
            X_batch, Y_batch = X, Y
        else:
            import climin.util
            #Make a climin slicer to make drawing minibatches much quicker
            self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0],
                                                       self.batchsize)
            X_batch, Y_batch = self.new_batch()

        # if isinstance(X_batch, (ObsAr, VariationalPosterior)):
        # self.X = X_batch.copy()
        # else:
        # self.X = ObsAr(X_batch)
        # self.Y = Y_batch

        #create the SVI inference method
        # self.inference_method = svgp_inf()
        inference_method = svgp_inf()

        #Initialize base model
        super(SVGPMulti, self).__init__(X=X_batch,
                                        Y=Y_batch,
                                        Z=Z,
                                        kernel=kern_list[0],
                                        likelihood=likelihood,
                                        mean_function=None,
                                        X_variance=None,
                                        inference_method=inference_method,
                                        name=name,
                                        Y_metadata=Y_metadata,
                                        normalizer=False)
        self.unlink_parameter(self.kern)  # We don't want a single kern

        # self.num_data, self.input_dim = self.X.shape
        self.num_outputs = self.Y.shape[1]

        self.num_latent_funcs = self.likelihood.request_num_latent_functions(
            self.Y_all)

        #Make a latent function per dimension
        self.q_u_means = Param(
            'q_u_means', np.zeros((self.num_inducing, self.num_latent_funcs)))
        chols = choleskies.triang_to_flat(
            np.tile(
                np.eye(self.num_inducing)[None, :, :],
                (self.num_latent_funcs, 1, 1)))
        self.q_u_chols = Param('qf_u_chols', chols)

        self.link_parameter(self.Z, index=0)
        self.link_parameter(self.q_u_means)
        self.link_parameter(self.q_u_chols)
        # self.link_parameter(self.likelihood)

        #Must pass a list of kernels that work on each latent function for now
        assert len(kern_list) == self.num_latent_funcs
        #Add the rest of the kernels, one kernel per latent function
        [self.link_parameter(kern) for kern in kern_list]
        #self.latent_f_list = [self.mf, self.mg]
        #self.latent_fchol_list = [self.cholf, self.cholg]

        if mean_functions is None:
            self.mean_functions = [None] * self.num_latent_funcs
        elif len(mean_functions) != len(kern_list):
            raise ValueError("Must provide a mean function for all latent\n\
                             functions as a list, provide None if no latent\n\
                             function is needed for a specific latent function"
                             )
        else:
            self.mean_functions = []
            for m_f in mean_functions:
                if m_f is not None:
                    self.link_parameter(m_f)
                self.mean_functions.append(m_f)
Exemple #12
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kern_list,
                 likelihood,
                 Y_metadata,
                 name='SVMOGP',
                 batch_size=None,
                 non_chained=True):

        self.batch_size = batch_size
        self.kern_list = kern_list
        self.likelihood = likelihood
        self.Y_metadata = Y_metadata

        self.num_inducing = Z.shape[0]  # M
        self.num_latent_funcs = len(kern_list)  # Q
        self.num_output_funcs = likelihood.num_output_functions(Y_metadata)

        if (not non_chained):
            assert self.num_output_funcs == self.num_latent_funcs, "we need a latent function per likelihood parameter"

        if non_chained:
            self.W_list, self.kappa_list = util.random_W_kappas(
                self.num_latent_funcs, self.num_output_funcs, rank=1)
        else:
            self.W_list, self.kappa_list = util.Chained_W_kappas(
                self.num_latent_funcs, self.num_output_funcs, rank=1)

        self.Xmulti = X
        self.Ymulti = Y
        self.iAnnMulti = Y_metadata['iAnn']

        # Batch the data
        self.Xmulti_all, self.Ymulti_all, self.iAnn_all = X, Y, Y_metadata[
            'iAnn']
        if batch_size is None:
            #self.stochastic = False
            Xmulti_batch, Ymulti_batch, iAnnmulti_batch = X, Y, Y_metadata[
                'iAnn']
        else:
            # Makes a climin slicer to make drawing minibatches much quicker
            #self.stochastic = False   #"This was True as Pablo had it"
            self.slicer_list = []
            [
                self.slicer_list.append(
                    draw_mini_slices(Xmulti_task.shape[0], self.batch_size))
                for Xmulti_task in self.Xmulti
            ]
            Xmulti_batch, Ymulti_batch, iAnnmulti_batch = self.new_batch()
            self.Xmulti, self.Ymulti, self.iAnnMulti = Xmulti_batch, Ymulti_batch, iAnnmulti_batch
            self.Y_metadata.update(iAnn=iAnnmulti_batch)

        # Initialize inducing points Z
        #Z = kmm_init(self.X_all, self.num_inducing)
        self.Xdim = Z.shape[1]
        Z = np.tile(Z, (1, self.num_latent_funcs))

        inference_method = SVMOGPInf()

        super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10],
                                     Y=Ymulti_batch[0][1:10],
                                     Z=Z,
                                     kernel=kern_list[0],
                                     likelihood=likelihood,
                                     mean_function=None,
                                     X_variance=None,
                                     inference_method=inference_method,
                                     Y_metadata=Y_metadata,
                                     name=name,
                                     normalizer=False)

        self.unlink_parameter(
            self.kern)  # Unlink SparseGP default param kernel

        _, self.B_list = util.LCM(input_dim=self.Xdim,
                                  output_dim=self.num_output_funcs,
                                  rank=1,
                                  kernels_list=self.kern_list,
                                  W_list=self.W_list,
                                  kappa_list=self.kappa_list)

        # Set-up optimization parameters: [Z, m_u, L_u]
        self.q_u_means = Param(
            'm_u',
            0.0 * np.random.randn(self.num_inducing, self.num_latent_funcs) +
            0.0 * np.tile(np.random.randn(1, self.num_latent_funcs),
                          (self.num_inducing, 1)))
        chols = choleskies.triang_to_flat(
            np.tile(
                np.eye(self.num_inducing)[None, :, :],
                (self.num_latent_funcs, 1, 1)))
        self.q_u_chols = Param('L_u', chols)

        self.link_parameter(self.Z, index=0)
        self.link_parameter(self.q_u_means)
        self.link_parameters(self.q_u_chols)
        [self.link_parameter(kern_q)
         for kern_q in kern_list]  # link all kernels
        [self.link_parameter(B_q) for B_q in self.B_list]

        self.vem_step = True  # [True=VE-step, False=VM-step]
        self.ve_count = 0
        self.elbo = np.zeros((1, 1))
        self.index_VEM = 0  #this is a variable to index correctly the self.elbo when using VEM
        self.Gauss_Newton = False  #This is a flag for using the Gauss-Newton approximation when dL_dV is needed
Exemple #13
0
 def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
     super(Kern_check_dK_dX, self).__init__(kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
     self.X = Param('X',X)
     self.link_parameter(self.X)
Exemple #14
0
 def __init__(self, kernel=None, dL_dK=None, X=None):
     super(Kern_check_d2Kdiag_dXdX, self).__init__(kernel=kernel,dL_dK=dL_dK, X=X)
     self.X = Param('X',X)
     self.link_parameter(self.X)
     self.Xc = X.copy()
Exemple #15
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kern_list,
                 likelihood,
                 Y_metadata,
                 name='HetMOGP',
                 batch_size=None):
        """
        :param X:           Input data
        :param Y:           (Heterogeneous) Output data
        :param Z:           Inducing inputs
        :param kern_list:   Kernel functions of GP priors
        :param likelihood:  (Heterogeneous) Likelihoods
        :param Y_metadata:  Linking info between F->likelihoods
        :param name:        Model name
        :param batch_size:  Size of batch for stochastic optimization

        Description: Initialization method for the model class
        """

        #---------------------------------------#     INITIALIZATIONS     #--------------------------------------------#
        #######   Initialization of class variables  #######
        self.batch_size = batch_size
        self.kern_list = kern_list
        self.likelihood = likelihood
        self.Y_metadata = Y_metadata

        #######   Heterogeneous Data  #######
        self.Xmulti = X
        self.Ymulti = Y

        #######  Batches of Data for Stochastic Mode   #######
        self.Xmulti_all, self.Ymulti_all = X, Y
        if batch_size is None:
            self.stochastic = False
            Xmulti_batch, Ymulti_batch = X, Y
        else:
            #######   Makes a climin slicer to make drawing minibatches much quicker   #######
            self.stochastic = True
            self.slicer_list = []
            [
                self.slicer_list.append(
                    draw_mini_slices(Xmulti_task.shape[0], self.batch_size))
                for Xmulti_task in self.Xmulti
            ]
            Xmulti_batch, Ymulti_batch = self.new_batch()
            self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch

        #######   Model dimensions {M, Q, D}  #######
        self.num_inducing = Z.shape[0]  # M
        self.num_latent_funcs = len(kern_list)  # Q
        self.num_output_funcs = likelihood.num_output_functions(
            self.Y_metadata)

        ####### Inducing points Z #######
        self.Xdim = Z.shape[1]
        Z = np.tile(Z, (1, self.num_latent_funcs))

        #######   Inference   #######
        inference_method = Inference()

        #######  Model class (and inherited classes) super-initialization  #######
        super(HetMOGP, self).__init__(X=Xmulti_batch[0][1:10],
                                      Y=Ymulti_batch[0][1:10],
                                      Z=Z,
                                      kernel=kern_list[0],
                                      likelihood=likelihood,
                                      mean_function=None,
                                      X_variance=None,
                                      inference_method=inference_method,
                                      Y_metadata=Y_metadata,
                                      name=name,
                                      normalizer=False)

        #######  Initialization of the Multi-output GP mixing  #######
        self.W_list, self.kappa_list = multi_output.random_W_kappas(
            self.num_latent_funcs, self.num_output_funcs, rank=1)
        _, self.B_list = multi_output.LCM(input_dim=self.Xdim,
                                          output_dim=self.num_output_funcs,
                                          rank=1,
                                          kernels_list=self.kern_list,
                                          W_list=self.W_list,
                                          kappa_list=self.kappa_list)

        ####### Initialization of Variational Parameters (q_u_means = \mu, q_u_chols = lower_triang(S))  #######
        self.q_u_means = Param(
            'm_u',
            0 * np.random.randn(self.num_inducing, self.num_latent_funcs) +
            0 * np.tile(np.random.randn(1, self.num_latent_funcs),
                        (self.num_inducing, 1)))
        chols = choleskies.triang_to_flat(
            np.tile(
                np.eye(self.num_inducing)[None, :, :],
                (self.num_latent_funcs, 1, 1)))
        self.q_u_chols = Param('L_u', chols)

        #-----------------------------#   LINKS FOR OPTIMIZABLE PARAMETERS     #---------------------------------------#

        ####### Linking and Un-linking of parameters and hyperaparameters (for ParamZ optimizer)  #######
        self.unlink_parameter(
            self.kern)  # Unlink SparseGP default param kernel
        self.link_parameter(self.Z, index=0)
        self.link_parameter(self.q_u_means)
        self.link_parameters(self.q_u_chols)
        [self.link_parameter(kern_q)
         for kern_q in kern_list]  # link all kernels
        [self.link_parameter(B_q) for B_q in self.B_list]

        ####### EXTRA. Auxiliary variables  #######
        self.vem_step = True  # [True=VE-step, False=VM-step]
        self.ve_count = 0
        self.elbo = np.zeros((1, 1))
Exemple #16
0
class StateSpace(Model):
    def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
        super(StateSpace, self).__init__(name=name)
        self.num_data, input_dim = X.shape
        assert input_dim == 1, "State space methods for time only"
        num_data_Y, self.output_dim = Y.shape
        assert num_data_Y == self.num_data, "X and Y data don't match"
        assert self.output_dim == 1, "State space methods for single outputs only"

        # Make sure the observations are ordered in time
        sort_index = np.argsort(X[:, 0])
        self.X = X[sort_index]
        self.Y = Y[sort_index]

        # Noise variance
        self.sigma2 = Param('Gaussian_noise', sigma2)
        self.link_parameter(self.sigma2)

        # Default kernel
        if kernel is None:
            self.kern = kern.Matern32(1)
        else:
            self.kern = kernel
        self.link_parameter(self.kern)

        self.sigma2.constrain_positive()

        # Assert that the kernel is supported
        if not hasattr(self.kern, 'sde'):
            raise NotImplementedError(
                'SDE must be implemented for the kernel being used')
        #assert self.kern.sde() not False, "This kernel is not supported for state space estimation"

    def parameters_changed(self):
        """
        Parameters have now changed
        """
        # Get the model matrices from the kernel
        (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde()

        # Use the Kalman filter to evaluate the likelihood
        self._log_marginal_likelihood = self.kf_likelihood(
            F, L, Qc, H, self.sigma2, Pinf, self.X.T, self.Y.T)
        gradients = self.compute_gradients()
        self.sigma2.gradient_full[:] = gradients[-1]
        self.kern.gradient_full[:] = gradients[:-1]

    def log_likelihood(self):
        return self._log_marginal_likelihood

    def compute_gradients(self):
        # Get the model matrices from the kernel
        (F, L, Qc, H, Pinf, dFt, dQct, dPinft) = self.kern.sde()

        # Allocate space for the full partial derivative matrices
        dF = np.zeros([dFt.shape[0], dFt.shape[1], dFt.shape[2] + 1])
        dQc = np.zeros([dQct.shape[0], dQct.shape[1], dQct.shape[2] + 1])
        dPinf = np.zeros(
            [dPinft.shape[0], dPinft.shape[1], dPinft.shape[2] + 1])

        # Assign the values for the kernel function
        dF[:, :, :-1] = dFt
        dQc[:, :, :-1] = dQct
        dPinf[:, :, :-1] = dPinft

        # The sigma2 derivative
        dR = np.zeros([1, 1, dF.shape[2]])
        dR[:, :, -1] = 1

        # Calculate the likelihood gradients
        gradients = self.kf_likelihood_g(F, L, Qc, H, self.sigma2, Pinf, dF,
                                         dQc, dPinf, dR, self.X.T, self.Y.T)
        return gradients

    def predict_raw(self, Xnew, Ynew=None, filteronly=False):

        # Set defaults
        if Ynew is None:
            Ynew = self.Y

        # Make a single matrix containing training and testing points
        X = np.vstack((self.X, Xnew))
        Y = np.vstack((Ynew, np.nan * np.zeros(Xnew.shape)))

        # Sort the matrix (save the order)
        _, return_index, return_inverse = np.unique(X, True, True)
        X = X[return_index]
        Y = Y[return_index]

        # Get the model matrices from the kernel
        (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde()

        # Run the Kalman filter
        (M, P) = self.kalman_filter(F, L, Qc, H, self.sigma2, Pinf, X.T, Y.T)

        # Run the Rauch-Tung-Striebel smoother
        if not filteronly:
            (M, P) = self.rts_smoother(F, L, Qc, X.T, M, P)

        # Put the data back in the original order
        M = M[:, return_inverse]
        P = P[:, :, return_inverse]

        # Only return the values for Xnew
        M = M[:, self.num_data:]
        P = P[:, :, self.num_data:]

        # Calculate the mean and variance
        m = H.dot(M).T
        V = np.tensordot(H[0], P, (0, 0))
        V = np.tensordot(V, H[0], (0, 0))
        V = V[:, None]

        # Return the posterior of the state
        return (m, V)

    def predict(self, Xnew, filteronly=False):

        # Run the Kalman filter to get the state
        (m, V) = self.predict_raw(Xnew, filteronly=filteronly)

        # Add the noise variance to the state variance
        V += self.sigma2

        # Lower and upper bounds
        lower = m - 2 * np.sqrt(V)
        upper = m + 2 * np.sqrt(V)

        # Return mean and variance
        return (m, V, lower, upper)

    def plot(self,
             plot_limits=None,
             levels=20,
             samples=0,
             fignum=None,
             ax=None,
             resolution=None,
             plot_raw=False,
             plot_filter=False,
             linecol=Tango.colorsHex['darkBlue'],
             fillcol=Tango.colorsHex['lightBlue']):

        # Deal with optional parameters
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)

        # Define the frame on which to plot
        resolution = resolution or 200
        Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)

        # Make a prediction on the frame and plot it
        if plot_raw:
            m, v = self.predict_raw(Xgrid, filteronly=plot_filter)
            lower = m - 2 * np.sqrt(v)
            upper = m + 2 * np.sqrt(v)
            Y = self.Y
        else:
            m, v, lower, upper = self.predict(Xgrid, filteronly=plot_filter)
            Y = self.Y

        # Plot the values
        gpplot(Xgrid,
               m,
               lower,
               upper,
               axes=ax,
               edgecol=linecol,
               fillcol=fillcol)
        ax.plot(self.X, self.Y, 'kx', mew=1.5)

        # Optionally plot some samples
        if samples:
            if plot_raw:
                Ysim = self.posterior_samples_f(Xgrid, samples)
            else:
                Ysim = self.posterior_samples(Xgrid, samples)
            for yi in Ysim.T:
                ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)

        # Set the limits of the plot to some sensible values
        ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(
            np.append(Y.flatten(), upper.flatten()))
        ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)

    def prior_samples_f(self, X, size=10):

        # Sort the matrix (save the order)
        (_, return_index, return_inverse) = np.unique(X, True, True)
        X = X[return_index]

        # Get the model matrices from the kernel
        (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde()

        # Allocate space for results
        Y = np.empty((size, X.shape[0]))

        # Simulate random draws
        #for j in range(0,size):
        #    Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
        Y = self.simulate(F, L, Qc, Pinf, X.T, size)

        # Only observations
        Y = np.tensordot(H[0], Y, (0, 0))

        # Reorder simulated values
        Y = Y[:, return_inverse]

        # Return trajectory
        return Y.T

    def posterior_samples_f(self, X, size=10):

        # Sort the matrix (save the order)
        (_, return_index, return_inverse) = np.unique(X, True, True)
        X = X[return_index]

        # Get the model matrices from the kernel
        (F, L, Qc, H, Pinf, dF, dQc, dPinf) = self.kern.sde()

        # Run smoother on original data
        (m, V) = self.predict_raw(X)

        # Simulate random draws from the GP prior
        y = self.prior_samples_f(np.vstack((self.X, X)), size)

        # Allocate space for sample trajectories
        Y = np.empty((size, X.shape[0]))

        # Run the RTS smoother on each of these values
        for j in range(0, size):
            yobs = y[0:self.num_data, j:j + 1] + np.sqrt(
                self.sigma2) * np.random.randn(self.num_data, 1)
            (m2, V2) = self.predict_raw(X, Ynew=yobs)
            Y[j, :] = m.T + y[self.num_data:, j].T - m2.T

        # Reorder simulated values
        Y = Y[:, return_inverse]

        # Return posterior sample trajectories
        return Y.T

    def posterior_samples(self, X, size=10):

        # Make samples of f
        Y = self.posterior_samples_f(X, size)

        # Add noise
        Y += np.sqrt(self.sigma2) * np.random.randn(Y.shape[0], Y.shape[1])

        # Return trajectory
        return Y

    def kalman_filter(self, F, L, Qc, H, R, Pinf, X, Y):
        # KALMAN_FILTER - Run the Kalman filter for a given model and data

        # Allocate space for results
        MF = np.empty((F.shape[0], Y.shape[1]))
        PF = np.empty((F.shape[0], F.shape[0], Y.shape[1]))

        # Initialize
        MF[:, -1] = np.zeros(F.shape[0])
        PF[:, :, -1] = Pinf.copy()

        # Time step lengths
        dt = np.empty(X.shape)
        dt[:, 0] = X[:, 1] - X[:, 0]
        dt[:, 1:] = np.diff(X)

        # Solve the LTI SDE for these time steps
        As, Qs, index = self.lti_disc(F, L, Qc, dt)

        # Kalman filter
        for k in range(0, Y.shape[1]):

            # Form discrete-time model
            #(A, Q) = self.lti_disc(F,L,Qc,dt[:,k])
            A = As[:, :, index[k]]
            Q = Qs[:, :, index[k]]

            # Prediction step
            MF[:, k] = A.dot(MF[:, k - 1])
            PF[:, :, k] = A.dot(PF[:, :, k - 1]).dot(A.T) + Q

            # Update step (only if there is data)
            if not np.isnan(Y[:, k]):
                if Y.shape[0] == 1:
                    K = PF[:, :, k].dot(
                        H.T) / (H.dot(PF[:, :, k]).dot(H.T) + R)
                else:
                    LL = linalg.cho_factor(H.dot(PF[:, :, k]).dot(H.T) + R)
                    K = linalg.cho_solve(LL, H.dot(PF[:, :, k].T)).T
                MF[:, k] += K.dot(Y[:, k] - H.dot(MF[:, k]))
                PF[:, :, k] -= K.dot(H).dot(PF[:, :, k])

        # Return values
        return (MF, PF)

    def rts_smoother(self, F, L, Qc, X, MS, PS):
        # RTS_SMOOTHER - Run the RTS smoother for a given model and data

        # Time step lengths
        dt = np.empty(X.shape)
        dt[:, 0] = X[:, 1] - X[:, 0]
        dt[:, 1:] = np.diff(X)

        # Solve the LTI SDE for these time steps
        As, Qs, index = self.lti_disc(F, L, Qc, dt)

        # Sequentially smooth states starting from the end
        for k in range(2, X.shape[1] + 1):

            # Form discrete-time model
            #(A, Q) = self.lti_disc(F,L,Qc,dt[:,1-k])
            A = As[:, :, index[1 - k]]
            Q = Qs[:, :, index[1 - k]]

            # Smoothing step
            LL = linalg.cho_factor(A.dot(PS[:, :, -k]).dot(A.T) + Q)
            G = linalg.cho_solve(LL, A.dot(PS[:, :, -k])).T
            MS[:, -k] += G.dot(MS[:, 1 - k] - A.dot(MS[:, -k]))
            PS[:, :, -k] += G.dot(PS[:, :, 1 - k] -
                                  A.dot(PS[:, :, -k]).dot(A.T) - Q).dot(G.T)

        # Return
        return (MS, PS)

    def kf_likelihood(self, F, L, Qc, H, R, Pinf, X, Y):
        # Evaluate marginal likelihood

        # Initialize
        lik = 0
        m = np.zeros((F.shape[0], 1))
        P = Pinf.copy()

        # Time step lengths
        dt = np.empty(X.shape)
        dt[:, 0] = X[:, 1] - X[:, 0]
        dt[:, 1:] = np.diff(X)

        # Solve the LTI SDE for these time steps
        As, Qs, index = self.lti_disc(F, L, Qc, dt)

        # Kalman filter for likelihood evaluation
        for k in range(0, Y.shape[1]):

            # Form discrete-time model
            #(A,Q) = self.lti_disc(F,L,Qc,dt[:,k])
            A = As[:, :, index[k]]
            Q = Qs[:, :, index[k]]

            # Prediction step
            m = A.dot(m)
            P = A.dot(P).dot(A.T) + Q

            # Update step only if there is data
            if not np.isnan(Y[:, k]):
                v = Y[:, k] - H.dot(m)
                if Y.shape[0] == 1:
                    S = H.dot(P).dot(H.T) + R
                    K = P.dot(H.T) / S
                    lik -= 0.5 * np.log(S)
                    lik -= 0.5 * v.shape[0] * np.log(2 * np.pi)
                    lik -= 0.5 * v * v / S
                else:
                    LL, isupper = linalg.cho_factor(H.dot(P).dot(H.T) + R)
                    lik -= np.sum(np.log(np.diag(LL)))
                    lik -= 0.5 * v.shape[0] * np.log(2 * np.pi)
                    lik -= 0.5 * linalg.cho_solve((LL, isupper), v).dot(v)
                    K = linalg.cho_solve((LL, isupper), H.dot(P.T)).T
                m += K.dot(v)
                P -= K.dot(H).dot(P)

        # Return likelihood
        return lik[0, 0]

    def kf_likelihood_g(self, F, L, Qc, H, R, Pinf, dF, dQc, dPinf, dR, X, Y):
        # Evaluate marginal likelihood gradient

        # State dimension, number of data points and number of parameters
        n = F.shape[0]
        steps = Y.shape[1]
        nparam = dF.shape[2]

        # Time steps
        t = X.squeeze()

        # Allocate space
        e = 0
        eg = np.zeros(nparam)

        # Set up
        m = np.zeros([n, 1])
        P = Pinf.copy()
        dm = np.zeros([n, nparam])
        dP = dPinf.copy()
        mm = m.copy()
        PP = P.copy()

        # Initial dt
        dt = -np.Inf

        # Allocate space for expm results
        AA = np.zeros([2 * n, 2 * n, nparam])
        FF = np.zeros([2 * n, 2 * n])

        # Loop over all observations
        for k in range(0, steps):

            # The previous time step
            dt_old = dt

            # The time discretization step length
            if k > 0:
                dt = t[k] - t[k - 1]
            else:
                dt = 0

            # Loop through all parameters (Kalman filter prediction step)
            for j in range(0, nparam):

                # Should we recalculate the matrix exponential?
                if abs(dt - dt_old) > 1e-9:

                    # The first matrix for the matrix factor decomposition
                    FF[:n, :n] = F
                    FF[n:, :n] = dF[:, :, j]
                    FF[n:, n:] = F

                    # Solve the matrix exponential
                    AA[:, :, j] = linalg.expm3(FF * dt)

                # Solve the differential equation
                foo = AA[:, :, j].dot(np.vstack([m, dm[:, j:j + 1]]))
                mm = foo[:n, :]
                dm[:, j:j + 1] = foo[n:, :]

                # The discrete-time dynamical model
                if j == 0:
                    A = AA[:n, :n, j]
                    Q = Pinf - A.dot(Pinf).dot(A.T)
                    PP = A.dot(P).dot(A.T) + Q

                # The derivatives of A and Q
                dA = AA[n:, :n, j]
                dQ = dPinf[:,:,j] - dA.dot(Pinf).dot(A.T) \
                   - A.dot(dPinf[:,:,j]).dot(A.T) - A.dot(Pinf).dot(dA.T)

                # The derivatives of P
                dP[:,:,j] = dA.dot(P).dot(A.T) + A.dot(dP[:,:,j]).dot(A.T) \
                   + A.dot(P).dot(dA.T) + dQ

            # Set predicted m and P
            m = mm
            P = PP

            # Start the Kalman filter update step and precalculate variables
            S = H.dot(P).dot(H.T) + R

            # We should calculate the Cholesky factor if S is a matrix
            # [LS,notposdef] = chol(S,'lower');

            # The Kalman filter update (S is scalar)
            HtiS = H.T / S
            iS = 1 / S
            K = P.dot(HtiS)
            v = Y[:, k] - H.dot(m)
            vtiS = v.T / S

            # Loop through all parameters (Kalman filter update step derivative)
            for j in range(0, nparam):

                # Innovation covariance derivative
                dS = H.dot(dP[:, :, j]).dot(H.T) + dR[:, :, j]

                # Evaluate the energy derivative for j
                eg[j] = eg[j]                           \
                    - .5*np.sum(iS*dS)                  \
                    + .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
                    + .5*vtiS.dot(dS).dot(vtiS.T)       \
                    + .5*vtiS.dot(H.dot(dm[:,j:j+1]))

                # Kalman filter update step derivatives
                dK = dP[:, :, j].dot(HtiS) - P.dot(HtiS).dot(dS) / S
                dm[:, j:j + 1] = dm[:, j:j + 1] + dK.dot(v) - K.dot(H).dot(
                    dm[:, j:j + 1])
                dKSKt = dK.dot(S).dot(K.T)
                dP[:, :,
                   j] = dP[:, :, j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T

            # Evaluate the energy
            # e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
            e = e - .5 * S.shape[0] * np.log(2 * np.pi) - np.sum(
                np.log(np.sqrt(S))) - .5 * vtiS.dot(v)

            # Finish Kalman filter update step
            m = m + K.dot(v)
            P = P - K.dot(S).dot(K.T)

            # Make sure the covariances stay symmetric
            P = (P + P.T) / 2
            dP = (dP + dP.transpose([1, 0, 2])) / 2

            # raise NameError('Debug me')

        # Return the gradient
        return eg

    def kf_likelihood_g_notstable(self, F, L, Qc, H, R, Pinf, dF, dQc, dPinf,
                                  dR, X, Y):
        # Evaluate marginal likelihood gradient

        # State dimension, number of data points and number of parameters
        steps = Y.shape[1]
        nparam = dF.shape[2]
        n = F.shape[0]

        # Time steps
        t = X.squeeze()

        # Allocate space
        e = 0
        eg = np.zeros(nparam)

        # Set up
        Z = np.zeros(F.shape)
        QC = L.dot(Qc).dot(L.T)
        m = np.zeros([n, 1])
        P = Pinf.copy()
        dm = np.zeros([n, nparam])
        dP = dPinf.copy()
        mm = m.copy()
        PP = P.copy()

        # % Initial dt
        dt = -np.Inf

        # Allocate space for expm results
        AA = np.zeros([2 * F.shape[0], 2 * F.shape[0], nparam])
        AAA = np.zeros([4 * F.shape[0], 4 * F.shape[0], nparam])
        FF = np.zeros([2 * F.shape[0], 2 * F.shape[0]])
        FFF = np.zeros([4 * F.shape[0], 4 * F.shape[0]])

        # Loop over all observations
        for k in range(0, steps):

            # The previous time step
            dt_old = dt

            # The time discretization step length
            if k > 0:
                dt = t[k] - t[k - 1]
            else:
                dt = t[1] - t[0]

            # Loop through all parameters (Kalman filter prediction step)
            for j in range(0, nparam):

                # Should we recalculate the matrix exponential?
                if abs(dt - dt_old) > 1e-9:

                    # The first matrix for the matrix factor decomposition
                    FF[:n, :n] = F
                    FF[n:, :n] = dF[:, :, j]
                    FF[n:, n:] = F

                    # Solve the matrix exponential
                    AA[:, :, j] = linalg.expm3(FF * dt)

                # Solve using matrix fraction decomposition
                foo = AA[:, :, j].dot(np.vstack([m, dm[:, j:j + 1]]))

                # Pick the parts
                mm = foo[:n, :]
                dm[:, j:j + 1] = foo[n:, :]

                # Should we recalculate the matrix exponential?
                if abs(dt - dt_old) > 1e-9:

                    # Define W and G
                    W = L.dot(dQc[:, :, j]).dot(L.T)
                    G = dF[:, :, j]

                    # The second matrix for the matrix factor decomposition
                    FFF[:n, :n] = F
                    FFF[2 * n:-n, :n] = G
                    FFF[:n, n:2 * n] = QC
                    FFF[n:2 * n, n:2 * n] = -F.T
                    FFF[2 * n:-n, n:2 * n] = W
                    FFF[-n:, n:2 * n] = -G.T
                    FFF[2 * n:-n, 2 * n:-n] = F
                    FFF[2 * n:-n, -n:] = QC
                    FFF[-n:, -n:] = -F.T

                    # Solve the matrix exponential
                    AAA[:, :, j] = linalg.expm3(FFF * dt)

                # Solve using matrix fraction decomposition
                foo = AAA[:, :, j].dot(
                    np.vstack([P, np.eye(n), dP[:, :, j],
                               np.zeros([n, n])]))

                # Pick the parts
                C = foo[:n, :]
                D = foo[n:2 * n, :]
                dC = foo[2 * n:-n, :]
                dD = foo[-n:, :]

                # The prediction step covariance (PP = C/D)
                if j == 0:
                    PP = linalg.solve(D.T, C.T).T
                    PP = (PP + PP.T) / 2

                # Sove dP for j (C/D == P_{k|k-1})
                dP[:, :, j] = linalg.solve(D.T, (dC - PP.dot(dD)).T).T

            # Set predicted m and P
            m = mm
            P = PP

            # Start the Kalman filter update step and precalculate variables
            S = H.dot(P).dot(H.T) + R

            # We should calculate the Cholesky factor if S is a matrix
            # [LS,notposdef] = chol(S,'lower');

            # The Kalman filter update (S is scalar)
            HtiS = H.T / S
            iS = 1 / S
            K = P.dot(HtiS)
            v = Y[:, k] - H.dot(m)
            vtiS = v.T / S

            # Loop through all parameters (Kalman filter update step derivative)
            for j in range(0, nparam):

                # Innovation covariance derivative
                dS = H.dot(dP[:, :, j]).dot(H.T) + dR[:, :, j]

                # Evaluate the energy derivative for j
                eg[j] = eg[j]                           \
                    - .5*np.sum(iS*dS)                  \
                    + .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
                    + .5*vtiS.dot(dS).dot(vtiS.T)       \
                    + .5*vtiS.dot(H.dot(dm[:,j:j+1]))

                # Kalman filter update step derivatives
                dK = dP[:, :, j].dot(HtiS) - P.dot(HtiS).dot(dS) / S
                dm[:, j:j + 1] = dm[:, j:j + 1] + dK.dot(v) - K.dot(H).dot(
                    dm[:, j:j + 1])
                dKSKt = dK.dot(S).dot(K.T)
                dP[:, :,
                   j] = dP[:, :, j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T

            # Evaluate the energy
            # e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
            e = e - .5 * S.shape[0] * np.log(2 * np.pi) - np.sum(
                np.log(np.sqrt(S))) - .5 * vtiS.dot(v)

            # Finish Kalman filter update step
            m = m + K.dot(v)
            P = P - K.dot(S).dot(K.T)

            # Make sure the covariances stay symmetric
            P = (P + P.T) / 2
            dP = (dP + dP.transpose([1, 0, 2])) / 2

            # raise NameError('Debug me')

        # Report
        #print e
        #print eg

        # Return the gradient
        return eg

    def simulate(self, F, L, Qc, Pinf, X, size=1):
        # Simulate a trajectory using the state space model

        # Allocate space for results
        f = np.zeros((F.shape[0], size, X.shape[1]))

        # Initial state
        f[:, :,
          1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0], size))

        # Time step lengths
        dt = np.empty(X.shape)
        dt[:, 0] = X[:, 1] - X[:, 0]
        dt[:, 1:] = np.diff(X)

        # Solve the LTI SDE for these time steps
        As, Qs, index = self.lti_disc(F, L, Qc, dt)

        # Sweep through remaining time points
        for k in range(1, X.shape[1]):

            # Form discrete-time model
            A = As[:, :, index[1 - k]]
            Q = Qs[:, :, index[1 - k]]

            # Draw the state
            f[:, :, k] = A.dot(f[:, :, k - 1]) + np.dot(
                np.linalg.cholesky(Q), np.random.randn(A.shape[0], size))

        # Return values
        return f

    def lti_disc(self, F, L, Qc, dt):
        # Discrete-time solution to the LTI SDE

        # Dimensionality
        n = F.shape[0]
        index = 0

        # Check for numbers of time steps
        if dt.flatten().shape[0] == 1:

            # The covariance matrix by matrix fraction decomposition
            Phi = np.zeros((2 * n, 2 * n))
            Phi[:n, :n] = F
            Phi[:n, n:] = L.dot(Qc).dot(L.T)
            Phi[n:, n:] = -F.T
            AB = linalg.expm(Phi * dt).dot(
                np.vstack((np.zeros((n, n)), np.eye(n))))
            Q = linalg.solve(AB[n:, :].T, AB[:n, :].T)

            # The dynamical model
            A = linalg.expm(F * dt)

            # Return
            return A, Q

        # Optimize for cases where time steps occur repeatedly
        else:

            # Time discretizations (round to 14 decimals to avoid problems)
            dt, _, index = np.unique(np.round(dt, 14), True, True)

            # Allocate space for A and Q
            A = np.empty((n, n, dt.shape[0]))
            Q = np.empty((n, n, dt.shape[0]))

            # Call this function for each dt
            for j in range(0, dt.shape[0]):
                A[:, :, j], Q[:, :, j] = self.lti_disc(F, L, Qc, dt[j])

            # Return
            return A, Q, index
Exemple #17
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kern_list,
                 likelihood,
                 Y_metadata,
                 name='SVMOGP',
                 batch_size=None):

        self.batch_size = batch_size
        self.kern_list = kern_list
        self.likelihood = likelihood
        self.Y_metadata = Y_metadata

        self.num_inducing = Z.shape[0]  # M
        self.num_latent_funcs = len(kern_list)  # Q
        self.num_output_funcs = likelihood.num_output_functions(
            self.Y_metadata)
        self.W_list, self.kappa_list = util.random_W_kappas(
            self.num_latent_funcs, self.num_output_funcs, rank=1)

        self.Xmulti = X
        self.Ymulti = Y

        # Batch the data
        self.Xmulti_all, self.Ymulti_all = X, Y
        if batch_size is None:
            self.stochastic = False
            Xmulti_batch, Ymulti_batch = X, Y
        else:
            # Makes a climin slicer to make drawing minibatches much quicker
            self.stochastic = True
            self.slicer_list = []
            [
                self.slicer_list.append(
                    draw_mini_slices(Xmulti_task.shape[0], self.batch_size))
                for Xmulti_task in self.Xmulti
            ]
            Xmulti_batch, Ymulti_batch = self.new_batch()
            self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch

        # Initialize inducing points Z
        #Z = kmm_init(self.X_all, self.num_inducing)
        self.Xdim = Z.shape[1]
        Z = np.tile(Z, (1, self.num_latent_funcs))

        inference_method = SVMOGPInf()

        super(SVMOGP, self).__init__(X=Xmulti_batch[0][1:10],
                                     Y=Ymulti_batch[0][1:10],
                                     Z=Z,
                                     kernel=kern_list[0],
                                     likelihood=likelihood,
                                     mean_function=None,
                                     X_variance=None,
                                     inference_method=inference_method,
                                     Y_metadata=Y_metadata,
                                     name=name,
                                     normalizer=False)

        self.unlink_parameter(
            self.kern)  # Unlink SparseGP default param kernel

        _, self.B_list = util.LCM(input_dim=self.Xdim,
                                  output_dim=self.num_output_funcs,
                                  rank=1,
                                  kernels_list=self.kern_list,
                                  W_list=self.W_list,
                                  kappa_list=self.kappa_list)

        # Set-up optimization parameters: [Z, m_u, L_u]
        self.q_u_means = Param(
            'm_u',
            5 * np.random.randn(self.num_inducing, self.num_latent_funcs) +
            np.tile(np.random.randn(1, self.num_latent_funcs),
                    (self.num_inducing, 1)))
        chols = choleskies.triang_to_flat(
            np.tile(
                np.eye(self.num_inducing)[None, :, :],
                (self.num_latent_funcs, 1, 1)))
        self.q_u_chols = Param('L_u', chols)

        self.link_parameter(self.Z, index=0)
        self.link_parameter(self.q_u_means)
        self.link_parameters(self.q_u_chols)
        [self.link_parameter(kern_q)
         for kern_q in kern_list]  # link all kernels
        [self.link_parameter(B_q) for B_q in self.B_list]

        self.vem_step = True  # [True=VE-step, False=VM-step]
        self.ve_count = 0
        self.elbo = np.zeros((1, 1))
Exemple #18
0
 def __init__(self, variance, degree=2, name='parabola'):
     super(Parabola, self).__init__(1, 1, name)
     self.variance = Param('variance',
                           np.ones(degree + 1) * variance)
     self.degree = degree
     self.link_parameter(self.variance)
Exemple #19
0
 def __init__(self, name=None, parameters=[], *a, **kw):
     super(Test, self).__init__(name=name)
     self.x = Param('x', np.random.uniform(0, 1, (3, 4)))
     self.x[0].constrain_bounded(0, 1)
     self.link_parameter(self.x)
     self.x[1].fix()
Exemple #20
0
 def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
     Kern_check_model.__init__(self, kernel=kernel, dL_dK=dL_dK, X=X, X2=X2)
     self.X = Param('X', X)
     self.link_parameter(self.X)
Exemple #21
0
    def __init__(self,
                 Y,
                 input_dim,
                 X=None,
                 X_variance=None,
                 init='PCA',
                 num_inducing=10,
                 Z=None,
                 kernel=None,
                 inference_method=None,
                 likelihood=None,
                 name='bayesian gplvm',
                 normalizer=None,
                 missing_data=False,
                 stochastic=False,
                 batchsize=1):
        self.logger = logging.getLogger(self.__class__.__name__)
        if X is None:
            from ..util.initialization import initialize_latent
            self.logger.info(
                "initializing latent space X with method {}".format(init))
            X, fracs = initialize_latent(init, input_dim, Y)
        else:
            fracs = np.ones(input_dim)

        self.init = init

        if Z is None:
            self.logger.info("initializing inducing inputs")
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]

        if X_variance == False:
            self.logger.info('no variance on X, activating sparse GPLVM')
            X = Param("latent space", X)
        elif X_variance is None:
            self.logger.info(
                "initializing latent space variance ~ uniform(0,.1)")
            X_variance = np.random.uniform(0, .1, X.shape)
            self.variational_prior = NormalPrior()
            X = NormalPosterior(X, X_variance)

        if kernel is None:
            self.logger.info("initializing kernel RBF")
            kernel = kern.RBF(
                input_dim, lengthscale=1. / fracs,
                ARD=True)  #+ kern.Bias(input_dim) + kern.White(input_dim)

        if likelihood is None:
            likelihood = Gaussian()

        self.kl_factr = 1.

        if inference_method is None:
            from ..inference.latent_function_inference.var_dtc import VarDTC
            self.logger.debug("creating inference_method var_dtc")
            inference_method = VarDTC(
                limit=1 if not missing_data else Y.shape[1])

        if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
            kernel.psicomp.GPU_direct = True

        super(BayesianGPLVMMiniBatch,
              self).__init__(X,
                             Y,
                             Z,
                             kernel,
                             likelihood=likelihood,
                             name=name,
                             inference_method=inference_method,
                             normalizer=normalizer,
                             missing_data=missing_data,
                             stochastic=stochastic,
                             batchsize=batchsize)
        self.X = X
        self.link_parameter(self.X, 0)
Exemple #22
0
    def __init__(self,
                 X,
                 Y,
                 Z,
                 kern_list_uq,
                 kern_list_Gx,
                 kern_list_Tq,
                 likelihood,
                 Y_metadata,
                 name='ConvHetMOGP_VIK',
                 batch_size=None):

        self.batch_size = batch_size
        self.kern_list = kern_list_uq
        self.likelihood = likelihood
        self.Y_metadata = Y_metadata
        self.kern_list_Gdj = kern_list_Gx
        self.kern_list_Tq = kern_list_Tq

        self.num_inducing = Z.shape[0]  # M
        self.num_latent_funcs = len(kern_list_uq)  # Q
        self.num_output_funcs = likelihood.num_output_functions(
            self.Y_metadata)  #This is the number J in the paper
        self.W_list, self.kappa_list = util.random_W_kappas(
            self.num_latent_funcs, self.num_output_funcs, rank=1)

        check_ARD_uq = [kern.lengthscale.shape[0] > 1 for kern in kern_list_uq]
        check_ARD_Gx = [
            kern.lengthscale.shape[0] > 1 for kern in kern_list_Gx
        ]  # This is just to verify Automatic Relevance Determination
        check_ARD_Tq = [kern.lengthscale.shape[0] > 1 for kern in kern_list_Tq]
        if (sum(check_ARD_uq) == 0) and (sum(check_ARD_Gx)
                                         == 0) and (sum(check_ARD_Tq) == 0):
            isARD = False
        elif (sum(check_ARD_uq) == check_ARD_uq.__len__()) and (
                sum(check_ARD_Gx)
                == check_ARD_Gx.__len__()) and (sum(check_ARD_Tq)
                                                == check_ARD_Tq.__len__()):
            isARD = True
        else:
            print(
                '\nAll kernel_lists for Uq, Gx and Tx have to coincide in Automatic Relevance Determination,'
            )
            print('All kernel_lists have to coincide: ARD=True or ARD=False\n')
            assert (sum(check_ARD_uq) == check_ARD_uq.__len__()) and (
                sum(check_ARD_Gx)
                == check_ARD_Gx.__len__()) and (sum(check_ARD_Tq)
                                                == check_ARD_Tq.__len__())

        self.kern_aux = GPy.kern.RBF(
            input_dim=Z.shape[1],
            lengthscale=1.0,
            variance=1.0,
            name='rbf_aux',
            ARD=isARD) + GPy.kern.White(input_dim=Z.shape[1])
        self.kern_aux.white.variance = 1e-6

        self.Xmulti = X
        self.Ymulti = Y

        # Batch the data
        self.Xmulti_all, self.Ymulti_all = X, Y
        if batch_size is None:
            #self.stochastic = False
            Xmulti_batch, Ymulti_batch = X, Y
        else:
            # Makes a climin slicer to make drawing minibatches much quicker
            #self.stochastic = False   #"This was True as Pablo had it"
            self.slicer_list = []
            [
                self.slicer_list.append(
                    draw_mini_slices(Xmulti_task.shape[0], self.batch_size))
                for Xmulti_task in self.Xmulti
            ]
            Xmulti_batch, Ymulti_batch = self.new_batch()
            self.Xmulti, self.Ymulti = Xmulti_batch, Ymulti_batch

        # Initialize inducing points Z
        self.Xdim = Z.shape[1]
        Z = np.tile(Z, (1, self.num_latent_funcs))
        inference_method = SVMOGPInf()

        super(ConvHetMOGP_VIK,
              self).__init__(X=Xmulti_batch[0][1:10],
                             Y=Ymulti_batch[0][1:10],
                             Z=Z,
                             kernel=kern_list_uq[0],
                             likelihood=likelihood,
                             mean_function=None,
                             X_variance=None,
                             inference_method=inference_method,
                             Y_metadata=Y_metadata,
                             name=name,
                             normalizer=False)

        self.unlink_parameter(
            self.kern)  # Unlink SparseGP default param kernel

        _, self.B_list = util.LCM(input_dim=self.Xdim,
                                  output_dim=self.num_output_funcs,
                                  rank=1,
                                  kernels_list=self.kern_list,
                                  W_list=self.W_list,
                                  kappa_list=self.kappa_list)

        # Set-up optimization parameters: [Z, m_u, L_u]
        self.q_u_means = [
            Param(
                'm_u' + str(dj), 10.0 *
                np.random.randn(self.num_inducing, self.num_latent_funcs) +
                10.0 * np.tile(np.random.randn(1, self.num_latent_funcs),
                               (self.num_inducing, 1)))
            for dj in range(self.num_output_funcs)
        ]
        chols = choleskies.triang_to_flat(
            np.tile(3 * np.eye(self.num_inducing)[None, :, :],
                    (self.num_latent_funcs, 1, 1)))
        self.q_u_chols = Param('L_u', chols)

        self.link_parameter(self.Z, index=0)
        [self.link_parameter(q_u_means) for q_u_means in self.q_u_means]
        self.link_parameters(self.q_u_chols)
        [self.link_parameter(kern_q)
         for kern_q in kern_list_uq]  # link all kernels
        [self.link_parameter(B_q) for B_q in self.B_list]
        [self.link_parameter(kern_list_Gjd) for kern_list_Gjd in kern_list_Gx]
        [self.link_parameter(kern_list_Tq) for kern_list_Tq in kern_list_Tq]
        #self.link_parameter(self.kern_aux.white.variance)

        self.vem_step = True  # [True=VE-step, False=VM-step]
        self.ve_count = 0
        self.elbo = np.zeros((1, 1))
        self.index_VEM = 0  #this is a variable to index correctly the self.elbo when using VEM
        self.Gauss_Newton = False  #This is a flag for using the Gauss-Newton approximation when dL_dV is needed

        for kern_q in self.kern_list:
            kern_q.variance = 1.0
            kern_q.variance.fix()
        for kern_Gjd in self.kern_list_Gdj:
            kern_Gjd.variance = 1.0
            kern_Gjd.variance.fix()
            #print('IN fix Gdj')
        for kern_Tq in self.kern_list_Tq:
            kern_Tq.variance = 1.0
            kern_Tq.variance.fix()
Exemple #23
0
 def __init__(self, input_dim, alp=1.0, bet=1.0, active_dims=None):
     super(ExpKernel, self).__init__(input_dim, active_dims, 'exp kernel')
     assert input_dim == 1, "For this kernel we assume input_dim=1"
     self.alp = Param("alp", alp)
     self.bet = Param("bet", bet)
     self.link_parameters(self.alp, self.bet)