def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
                 Z=None, kernel=None, inference_method=None, likelihood=None,
                 name='bayesian gplvm', normalizer=None,
                 missing_data=False, stochastic=False, batchsize=1):
        self.logger = logging.getLogger(self.__class__.__name__)
        if X is None:
            from ..util.initialization import initialize_latent
            self.logger.info("initializing latent space X with method {}".format(init))
            X, fracs = initialize_latent(init, input_dim, Y)
        else:
            fracs = np.ones(input_dim)

        self.init = init

        if Z is None:
            self.logger.info("initializing inducing inputs")
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]

        if X_variance is False:
            self.logger.info('no variance on X, activating sparse GPLVM')
            X = Param("latent space", X)
        else:
            if X_variance is None:
                self.logger.info("initializing latent space variance ~ uniform(0,.1)")
                X_variance = np.random.uniform(0,.1,X.shape)
            self.variational_prior = NormalPrior()
            X = NormalPosterior(X, X_variance)

        if kernel is None:
            self.logger.info("initializing kernel RBF")
            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) #+ kern.Bias(input_dim) + kern.White(input_dim)

        if likelihood is None:
            likelihood = Gaussian()

        self.kl_factr = 1.

        if inference_method is None:
            from ..inference.latent_function_inference.var_dtc import VarDTC
            self.logger.debug("creating inference_method var_dtc")
            inference_method = VarDTC(limit=3 if not missing_data else Y.shape[1])

        super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
                                           name=name, inference_method=inference_method,
                                           normalizer=normalizer,
                                           missing_data=missing_data, stochastic=stochastic,
                                           batchsize=batchsize)
        self.X = X
        self.link_parameter(self.X, 0)
    def __init__(self,
                 X,
                 Y,
                 Xr_dim,
                 kernel=None,
                 kernel_row=None,
                 Z=None,
                 Z_row=None,
                 X_row=None,
                 Xvariance_row=None,
                 num_inducing=(10, 10),
                 qU_var_r_W_dim=None,
                 qU_var_c_W_dim=None,
                 init='GP',
                 name='GPMR'):

        #Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1])

        if kernel_row is None:
            kernel_row = kern.RBF(Xr_dim, name='kern_row')

        if init == 'GP':
            from . import SparseGPRegression, BayesianGPLVM
            from ..util.linalg import jitchol
            Mc, Mr = num_inducing
            print('Intializing with GP...')
            print('Fit Sparse GP...')
            m_sgp = SparseGPRegression(X,
                                       Y,
                                       kernel=kernel.copy(),
                                       num_inducing=Mc)
            m_sgp.likelihood.variance[:] = Y.var() * 0.01
            m_sgp.optimize(max_iters=1000)
            print('Fit BGPLVM...')
            m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,
                                  Xr_dim,
                                  kernel=kernel_row.copy(),
                                  num_inducing=Mr)
            m_lvm.likelihood.variance[:] = m_lvm.Y.var() * 0.01
            m_lvm.optimize(max_iters=10000)

            kernel[:] = m_sgp.kern.param_array.copy()
            kernel.variance[:] = np.sqrt(kernel.variance)
            Z = m_sgp.Z.values.copy()
            kernel_row[:] = m_lvm.kern.param_array.copy()
            kernel_row.variance[:] = np.sqrt(kernel_row.variance)
            Z_row = m_lvm.Z.values.copy()
            X_row = m_lvm.X.mean.values.copy()
            Xvariance_row = m_lvm.X.variance.values

            qU_mean = m_lvm.posterior.mean.T.copy()
            qU_var_col_W = jitchol(m_sgp.posterior.covariance)
            qU_var_col_diag = np.full(Mc, 1e-5)
            qU_var_row_W = jitchol(m_lvm.posterior.covariance)
            qU_var_row_diag = np.full(Mr, 1e-5)
            print('Done.')
        else:
            qU_mean = np.zeros(num_inducing)
            qU_var_col_W = np.random.randn(
                num_inducing[0], num_inducing[0]
                if qU_var_c_W_dim is None else qU_var_c_W_dim) * 0.01
            qU_var_col_diag = np.full(num_inducing[0], 1e-5)
            qU_var_row_W = np.random.randn(
                num_inducing[1], num_inducing[1]
                if qU_var_r_W_dim is None else qU_var_r_W_dim) * 0.01
            qU_var_row_diag = np.full(num_inducing[1], 1e-5)

        if X_row is None:
            u, s, v = np.linalg.svd(Y)
            X_row = Y.T.dot(u[:, :Xr_dim])  #*np.sqrt(s)[:Xr_dim])
            X_row = X_row / X_row.std(0)
        if Xvariance_row is None:
            Xvariance_row = np.ones((Y.shape[1], Xr_dim)) * 0.0001
        if Z is None:
            Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
        if Z_row is None:
            Z_row = X_row[np.random.permutation(
                X_row.shape[0])[:num_inducing[1]]].copy()

        self.kern_row = kernel_row
        self.X_row = NormalPosterior(X_row, Xvariance_row, name='Xr')
        self.Z_row = Param('Zr', Z_row)
        self.variational_prior_row = NormalPrior()

        self.qU_mean = Param('qU_mean', qU_mean)
        self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
        self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag,
                                   Logexp())
        self.qU_var_r_W = Param('qU_var_row_W', qU_var_row_W)
        self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag,
                                   Logexp())

        #Likelihood
        likelihood = likelihoods.Gaussian(variance=np.var(Y) * 0.01)
        from ..inference.latent_function_inference import VarDTC_SVI_Multiout
        inference_method = VarDTC_SVI_Multiout()

        super(GPMultioutRegression,
              self).__init__(X,
                             Y,
                             Z,
                             kernel,
                             likelihood=likelihood,
                             name=name,
                             inference_method=inference_method)

        self.link_parameters(self.kern_row, self.X_row, self.Z_row,
                             self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag,
                             self.qU_var_r_W, self.qU_var_r_diag)

        self._log_marginal_likelihood = np.nan
class GPMultioutRegression(SparseGP):
    """
    Gaussian Process model for multi-output regression without missing data

    This is an implementation of Latent Variable Multiple Output Gaussian Processes (LVMOGP) in [Dai_et_al_2017]_.

    .. rubric:: References

    .. [Dai_et_al_2017] Dai, Z.; Alvarez, M.A.; Lawrence, N.D: Efficient Modeling of Latent Information in Supervised Learning using Gaussian Processes. In NIPS, 2017.

    :param X: input observations.
    :type X: numpy.ndarray
    :param Y: output observations, each column corresponding to an output dimension.
    :type Y: numpy.ndarray
    :param int Xr_dim: the dimensionality of a latent space, in which output dimensions are embedded in
    :param kernel: a GPy kernel for GP of individual output dimensions ** defaults to RBF **
    :type kernel: GPy.kern.Kern or None
    :param kernel_row: a GPy kernel for the GP of the latent space ** defaults to RBF **
    :type kernel_row: GPy.kern.Kern or None
    :param Z: inducing inputs
    :type Z: numpy.ndarray or None
    :param Z_row: inducing inputs for the latent space
    :type Z_row: numpy.ndarray or None
    :param X_row: the initial value of the mean of the variational posterior distribution of points in the latent space
    :type X_row: numpy.ndarray or None
    :param Xvariance_row: the initial value of the variance of the variational posterior distribution of points in the latent space
    :type Xvariance_row: numpy.ndarray or None
    :param num_inducing: a tuple (M, Mr). M is the number of inducing points for GP of individual output dimensions. Mr is the number of inducing points for the latent space.
    :type num_inducing: (int, int)
    :param int qU_var_r_W_dim: the dimensionality of the covariance of q(U) for the latent space. If it is smaller than the number of inducing points, it represents a low-rank parameterization of the covariance matrix.
    :param int qU_var_c_W_dim: the dimensionality of the covariance of q(U) for the GP regression. If it is smaller than the number of inducing points, it represents a low-rank parameterization of the covariance matrix.
    :param str init: the choice of initialization: 'GP' or 'rand'. With 'rand', the model is initialized randomly. With 'GP', the model is initialized through a protocol as follows: (1) fits a sparse GP (2) fits a BGPLVM based on the outcome of sparse GP (3) initialize the model based on the outcome of the BGPLVM.
    :param str name: the name of the model

    """
    def __init__(self,
                 X,
                 Y,
                 Xr_dim,
                 kernel=None,
                 kernel_row=None,
                 Z=None,
                 Z_row=None,
                 X_row=None,
                 Xvariance_row=None,
                 num_inducing=(10, 10),
                 qU_var_r_W_dim=None,
                 qU_var_c_W_dim=None,
                 init='GP',
                 name='GPMR'):

        #Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1])

        if kernel_row is None:
            kernel_row = kern.RBF(Xr_dim, name='kern_row')

        if init == 'GP':
            from . import SparseGPRegression, BayesianGPLVM
            from ..util.linalg import jitchol
            Mc, Mr = num_inducing
            print('Intializing with GP...')
            print('Fit Sparse GP...')
            m_sgp = SparseGPRegression(X,
                                       Y,
                                       kernel=kernel.copy(),
                                       num_inducing=Mc)
            m_sgp.likelihood.variance[:] = Y.var() * 0.01
            m_sgp.optimize(max_iters=1000)
            print('Fit BGPLVM...')
            m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,
                                  Xr_dim,
                                  kernel=kernel_row.copy(),
                                  num_inducing=Mr)
            m_lvm.likelihood.variance[:] = m_lvm.Y.var() * 0.01
            m_lvm.optimize(max_iters=10000)

            kernel[:] = m_sgp.kern.param_array.copy()
            kernel.variance[:] = np.sqrt(kernel.variance)
            Z = m_sgp.Z.values.copy()
            kernel_row[:] = m_lvm.kern.param_array.copy()
            kernel_row.variance[:] = np.sqrt(kernel_row.variance)
            Z_row = m_lvm.Z.values.copy()
            X_row = m_lvm.X.mean.values.copy()
            Xvariance_row = m_lvm.X.variance.values

            qU_mean = m_lvm.posterior.mean.T.copy()
            qU_var_col_W = jitchol(m_sgp.posterior.covariance)
            qU_var_col_diag = np.full(Mc, 1e-5)
            qU_var_row_W = jitchol(m_lvm.posterior.covariance)
            qU_var_row_diag = np.full(Mr, 1e-5)
            print('Done.')
        else:
            qU_mean = np.zeros(num_inducing)
            qU_var_col_W = np.random.randn(
                num_inducing[0], num_inducing[0]
                if qU_var_c_W_dim is None else qU_var_c_W_dim) * 0.01
            qU_var_col_diag = np.full(num_inducing[0], 1e-5)
            qU_var_row_W = np.random.randn(
                num_inducing[1], num_inducing[1]
                if qU_var_r_W_dim is None else qU_var_r_W_dim) * 0.01
            qU_var_row_diag = np.full(num_inducing[1], 1e-5)

        if X_row is None:
            u, s, v = np.linalg.svd(Y)
            X_row = Y.T.dot(u[:, :Xr_dim])  #*np.sqrt(s)[:Xr_dim])
            X_row = X_row / X_row.std(0)
        if Xvariance_row is None:
            Xvariance_row = np.ones((Y.shape[1], Xr_dim)) * 0.0001
        if Z is None:
            Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
        if Z_row is None:
            Z_row = X_row[np.random.permutation(
                X_row.shape[0])[:num_inducing[1]]].copy()

        self.kern_row = kernel_row
        self.X_row = NormalPosterior(X_row, Xvariance_row, name='Xr')
        self.Z_row = Param('Zr', Z_row)
        self.variational_prior_row = NormalPrior()

        self.qU_mean = Param('qU_mean', qU_mean)
        self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
        self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag,
                                   Logexp())
        self.qU_var_r_W = Param('qU_var_row_W', qU_var_row_W)
        self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag,
                                   Logexp())

        #Likelihood
        likelihood = likelihoods.Gaussian(variance=np.var(Y) * 0.01)
        from ..inference.latent_function_inference import VarDTC_SVI_Multiout
        inference_method = VarDTC_SVI_Multiout()

        super(GPMultioutRegression,
              self).__init__(X,
                             Y,
                             Z,
                             kernel,
                             likelihood=likelihood,
                             name=name,
                             inference_method=inference_method)

        self.link_parameters(self.kern_row, self.X_row, self.Z_row,
                             self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag,
                             self.qU_var_r_W, self.qU_var_r_diag)

        self._log_marginal_likelihood = np.nan

    def parameters_changed(self):
        qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
        qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(
            self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z,
            self.likelihood, self.Y, self.qU_mean, qU_var_r, qU_var_c)

        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
        self.qU_var_c_diag.gradient[:] = np.diag(
            self.grad_dict['dL_dqU_var_c'])
        self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c'] +
                                       self.grad_dict['dL_dqU_var_c'].T).dot(
                                           self.qU_var_c_W)
        self.qU_var_r_diag.gradient[:] = np.diag(
            self.grad_dict['dL_dqU_var_r'])
        self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r'] +
                                       self.grad_dict['dL_dqU_var_r'].T).dot(
                                           self.qU_var_r_W)

        self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
        kerngrad = self.kern.gradient.copy()
        self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X,
                                        self.Z)
        kerngrad += self.kern.gradient
        self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z,
                                        None)
        self.kern.gradient += kerngrad
        #gradients wrt Z
        self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'],
                                                self.Z)
        self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T,
                                                 self.Z, self.X)

        #gradients wrt kernel
        self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'],
                                            self.Z_row, None)
        kerngrad = self.kern_row.gradient.copy()
        self.kern_row.update_gradients_expectations(
            variational_posterior=self.X_row,
            Z=self.Z_row,
            dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
            dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
            dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
        self.kern_row.gradient += kerngrad

        #gradients wrt Z
        self.Z_row.gradient = self.kern_row.gradients_X(
            self.grad_dict['dL_dKuu_r'], self.Z_row)
        self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
            self.grad_dict['dL_dpsi0_r'],
            self.grad_dict['dL_dpsi1_r'],
            self.grad_dict['dL_dpsi2_r'],
            Z=self.Z_row,
            variational_posterior=self.X_row)

        self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(
            self.X_row)

        self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
            variational_posterior=self.X_row,
            Z=self.Z_row,
            dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
            dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
            dL_dpsi2=self.grad_dict['dL_dpsi2_r'])

        self.variational_prior_row.update_gradients_KL(self.X_row)

    def optimize_auto(self, max_iters=10000, verbose=True):
        """
        Optimize the model parameters through a pre-defined protocol.

        :param int max_iters: the maximum number of iterations.
        :param boolean verbose: print the progress of optimization or not.
        """
        self.Z.fix(warning=False)
        self.kern.fix(warning=False)
        self.kern_row.fix(warning=False)
        self.Zr.fix(warning=False)
        self.Xr.fix(warning=False)
        self.optimize(max_iters=int(0.1 * max_iters), messages=verbose)
        self.unfix()
        self.optimize(max_iters=max_iters, messages=verbose)
    def __init__(self,
                 X,
                 Y,
                 indexD,
                 Xr_dim,
                 kernel=None,
                 kernel_row=None,
                 Z=None,
                 Z_row=None,
                 X_row=None,
                 Xvariance_row=None,
                 num_inducing=(10, 10),
                 qU_var_r_W_dim=None,
                 qU_var_c_W_dim=None,
                 init='GP',
                 heter_noise=False,
                 name='GPMRMD'):

        assert len(Y.shape) == 1 or Y.shape[1] == 1

        self.output_dim = int(np.max(indexD)) + 1
        self.heter_noise = heter_noise
        self.indexD = indexD

        #Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1])
        if kernel_row is None:
            kernel_row = kern.RBF(Xr_dim, name='kern_row')

        if init == 'GP':
            from . import SparseGPRegression, BayesianGPLVM
            from ..util.linalg import jitchol
            Mc, Mr = num_inducing
            print('Intializing with GP...')
            print('Fit Sparse GP...')
            m_sgp = SparseGPRegressionMD(X,
                                         Y,
                                         indexD,
                                         kernel=kernel.copy(),
                                         num_inducing=Mc)
            m_sgp.likelihood.variance[:] = Y.var() * 0.01
            m_sgp.optimize(max_iters=1000)
            print('Fit BGPLVM...')
            m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,
                                  Xr_dim,
                                  kernel=kernel_row.copy(),
                                  num_inducing=Mr)
            m_lvm.likelihood.variance[:] = m_lvm.Y.var() * 0.01
            m_lvm.optimize(max_iters=10000)

            kernel[:] = m_sgp.kern.param_array.copy()
            kernel.variance[:] = np.sqrt(kernel.variance)
            Z = m_sgp.Z.values.copy()
            kernel_row[:] = m_lvm.kern.param_array.copy()
            kernel_row.variance[:] = np.sqrt(kernel_row.variance)
            Z_row = m_lvm.Z.values.copy()
            X_row = m_lvm.X.mean.values.copy()
            Xvariance_row = m_lvm.X.variance.values

            qU_mean = m_lvm.posterior.mean.T.copy()
            qU_var_col_W = jitchol(m_sgp.posterior.covariance)
            qU_var_col_diag = np.full(Mc, 1e-5)
            qU_var_row_W = jitchol(m_lvm.posterior.covariance)
            qU_var_row_diag = np.full(Mr, 1e-5)
            print('Done.')
        else:
            qU_mean = np.zeros(num_inducing)
            qU_var_col_W = np.random.randn(
                num_inducing[0], num_inducing[0]
                if qU_var_c_W_dim is None else qU_var_c_W_dim) * 0.01
            qU_var_col_diag = np.full(num_inducing[0], 1e-5)
            qU_var_row_W = np.random.randn(
                num_inducing[1], num_inducing[1]
                if qU_var_r_W_dim is None else qU_var_r_W_dim) * 0.01
            qU_var_row_diag = np.full(num_inducing[1], 1e-5)

        if Z is None:
            Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
        if X_row is None:
            X_row = np.random.randn(self.output_dim, Xr_dim)
        if Xvariance_row is None:
            Xvariance_row = np.ones((self.output_dim, Xr_dim)) * 0.0001
        if Z_row is None:
            Z_row = X_row[np.random.permutation(
                X_row.shape[0])[:num_inducing[1]]].copy()

        self.kern_row = kernel_row
        self.X_row = NormalPosterior(X_row, Xvariance_row, name='Xr')
        self.Z_row = Param('Zr', Z_row)
        self.variational_prior_row = NormalPrior()

        self.qU_mean = Param('qU_mean', qU_mean)
        self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
        self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag,
                                   Logexp())
        self.qU_var_r_W = Param('qU_var_row_W', qU_var_row_W)
        self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag,
                                   Logexp())

        #Likelihood
        if heter_noise:
            likelihood = likelihoods.Gaussian(variance=np.array(
                [np.var(Y[indexD == d])
                 for d in range(self.output_dim)]) * 0.01)
        else:
            likelihood = likelihoods.Gaussian(variance=np.var(Y) * 0.01)
        from ..inference.latent_function_inference.vardtc_svi_multiout_miss import VarDTC_SVI_Multiout_Miss
        inference_method = VarDTC_SVI_Multiout_Miss()

        super(GPMultioutRegressionMD,
              self).__init__(X,
                             Y,
                             Z,
                             kernel,
                             likelihood=likelihood,
                             name=name,
                             inference_method=inference_method)
        self.output_dim = int(np.max(indexD)) + 1

        self.link_parameters(self.kern_row, self.X_row, self.Z_row,
                             self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag,
                             self.qU_var_r_W, self.qU_var_r_diag)

        self._log_marginal_likelihood = np.nan
Exemple #5
0
class GPMultioutRegression(SparseGP):
    """
    Gaussian Process model for scalable multioutput regression

    This is a thin wrapper around the models.GP class, with a set of sensible defaults

    :param X_list: list of input observations corresponding to each output
    :type X_list: list of numpy arrays
    :param Y_list: list of observed values related to the different noise models
    :type Y_list: list of numpy arrays
    :param kernel: a GPy kernel ** Coregionalized, defaults to RBF ** Coregionalized
    :type kernel: None | GPy.kernel defaults
    :likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
    :type likelihoods_list: None | a list GPy.likelihoods
    :param name: model name
    :type name: string
    :param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
    :type W_rank: integer
    :param kernel_name: name of the kernel
    :type kernel_name: string
    """
    def __init__(self, X, Y, Xr_dim, kernel=None, kernel_row=None, likelihood=None, Z=None, Z_row=None, X_row=None, Xvariance_row=None, num_inducing=(10,10), qU_var_r_W_dim=None, qU_var_c_W_dim=None, init='GP', name='GPMR'):

        #Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1])

        if kernel_row is None:
            kernel_row = kern.RBF(Xr_dim,name='kern_row')

        if init=='GP':
            from . import SparseGPRegression, BayesianGPLVM
            from ..util.linalg import jitchol
            Mc, Mr = num_inducing
            print('Intializing with GP...')
            print('Fit Sparse GP...')
            m_sgp = SparseGPRegression(X,Y,kernel=kernel.copy(),num_inducing=Mc)
            m_sgp.likelihood.variance[:] = Y.var()*0.01
            m_sgp.optimize(max_iters=1000)
            print('Fit BGPLVM...')
            m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,Xr_dim,kernel=kernel_row.copy(), num_inducing=Mr)
            m_lvm.likelihood.variance[:] = m_lvm.Y.var()*0.01
            m_lvm.optimize(max_iters=10000)

            kernel[:] = m_sgp.kern.param_array.copy()
            kernel.variance[:] = np.sqrt(kernel.variance)
            Z = m_sgp.Z.values.copy()
            kernel_row[:] = m_lvm.kern.param_array.copy()
            kernel_row.variance[:] = np.sqrt(kernel_row.variance)
            Z_row = m_lvm.Z.values.copy()
            X_row = m_lvm.X.mean.values.copy()
            Xvariance_row = m_lvm.X.variance.values

            qU_mean = m_lvm.posterior.mean.T.copy()
            qU_var_col_W = jitchol(m_sgp.posterior.covariance)
            qU_var_col_diag = np.full(Mc,1e-5)
            qU_var_row_W = jitchol(m_lvm.posterior.covariance)
            qU_var_row_diag = np.full(Mr,1e-5)
            print('Done.')
        else:
            qU_mean = np.zeros(num_inducing)
            qU_var_col_W = np.random.randn(num_inducing[0],num_inducing[0] if qU_var_c_W_dim is None else qU_var_c_W_dim)*0.01
            qU_var_col_diag = np.full(num_inducing[0],1e-5)
            qU_var_row_W = np.random.randn(num_inducing[1],num_inducing[1] if qU_var_r_W_dim is None else qU_var_r_W_dim)*0.01
            qU_var_row_diag = np.full(num_inducing[1],1e-5)

        if X_row is None:
            u,s,v = np.linalg.svd(Y)
            X_row = Y.T.dot(u[:,:Xr_dim])#*np.sqrt(s)[:Xr_dim])
            X_row = X_row/X_row.std(0)
        if Xvariance_row is None:
            Xvariance_row = np.ones((Y.shape[1],Xr_dim))*0.0001
        if Z is None:
            Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
        if Z_row is None:
            Z_row = X_row[np.random.permutation(X_row.shape[0])[:num_inducing[1]]].copy()

        self.kern_row = kernel_row
        self.X_row = NormalPosterior(X_row, Xvariance_row,name='Xr')
        self.Z_row = Param('Zr', Z_row)
        self.variational_prior_row = NormalPrior()

        self.qU_mean = Param('qU_mean', qU_mean)
        self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
        self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag, Logexp())
        self.qU_var_r_W = Param('qU_var_row_W',qU_var_row_W)
        self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag, Logexp())

        #Likelihood
        likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
        from ..inference.latent_function_inference import VarDTC_SVI_Multiout
        inference_method = VarDTC_SVI_Multiout()

        super(GPMultioutRegression,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
                                           name=name, inference_method=inference_method)

        self.link_parameters(self.kern_row, self.X_row, self.Z_row,self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag, self.qU_var_r_W, self.qU_var_r_diag)

        self._log_marginal_likelihood = np.nan

    def parameters_changed(self):
        qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
        qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z, self.likelihood, self.Y, self.qU_mean ,qU_var_r, qU_var_c)

        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
        self.qU_var_c_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_c'])
        self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c']+self.grad_dict['dL_dqU_var_c'].T).dot(self.qU_var_c_W)
        self.qU_var_r_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_r'])
        self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r']+self.grad_dict['dL_dqU_var_r'].T).dot(self.qU_var_r_W)

        self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
        kerngrad = self.kern.gradient.copy()
        self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X, self.Z)
        kerngrad += self.kern.gradient
        self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z, None)
        self.kern.gradient += kerngrad
        #gradients wrt Z
        self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'], self.Z)
        self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T, self.Z, self.X)


        #gradients wrt kernel
        self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'], self.Z_row, None)
        kerngrad = self.kern_row.gradient.copy()
        self.kern_row.update_gradients_expectations(variational_posterior=self.X_row,
                                                Z=self.Z_row,
                                                dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
                                                dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
                                                dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
        self.kern_row.gradient += kerngrad

        #gradients wrt Z
        self.Z_row.gradient = self.kern_row.gradients_X(self.grad_dict['dL_dKuu_r'], self.Z_row)
        self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
                           self.grad_dict['dL_dpsi0_r'],
                           self.grad_dict['dL_dpsi1_r'],
                           self.grad_dict['dL_dpsi2_r'],
                           Z=self.Z_row,
                           variational_posterior=self.X_row)

        self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(self.X_row)

        self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
                                            variational_posterior=self.X_row,
                                            Z=self.Z_row,
                                            dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
                                            dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
                                            dL_dpsi2=self.grad_dict['dL_dpsi2_r'])

        self.variational_prior_row.update_gradients_KL(self.X_row)

    def optimize_auto(self,max_iters=10000,verbose=True):
        self.Z.fix(warning=False)
        self.kern.fix(warning=False)
        self.kern_row.fix(warning=False)
        self.Zr.fix(warning=False)
        self.Xr.fix(warning=False)
        self.optimize(max_iters=int(0.1*max_iters),messages=verbose)
        self.unfix()
        self.optimize(max_iters=max_iters,messages=verbose)
Exemple #6
0
    def __init__(self,
                 Y,
                 input_dim,
                 X=None,
                 X_variance=None,
                 init='PCA',
                 num_inducing=10,
                 Z=None,
                 kernel=None,
                 inference_method=None,
                 likelihood=None,
                 name='bayesian gplvm',
                 mpi_comm=None,
                 normalizer=None,
                 missing_data=False,
                 stochastic=False,
                 batchsize=1,
                 Y_metadata=None,
                 variational_prior=None):

        self.logger = logging.getLogger(self.__class__.__name__)
        if X is None:
            from ..util.initialization import initialize_latent
            self.logger.info(
                "initializing latent space X with method {}".format(init))
            X, fracs = initialize_latent(init, input_dim, Y)
        else:
            fracs = np.ones(input_dim)

        self.init = init

        if X_variance is None:
            self.logger.info(
                "initializing latent space variance ~ uniform(0,.1)")
            X_variance = np.random.uniform(0, .1, X.shape)

        if Z is None:
            self.logger.info("initializing inducing inputs")
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]

        if kernel is None:
            self.logger.info("initializing kernel RBF")
            kernel = kern.RBF(
                input_dim, lengthscale=1. / fracs,
                ARD=True)  #+ kern.Bias(input_dim) + kern.White(input_dim)

        if likelihood is None:
            likelihood = Gaussian()

        if variational_prior is None:
            variational_prior = NormalPrior()
        self.variational_prior = variational_prior

        X = NormalPosterior(X, X_variance)

        if inference_method is None:
            if mpi_comm is not None:
                inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
            else:
                from ..inference.latent_function_inference.var_dtc import VarDTC
                self.logger.debug("creating inference_method var_dtc")
                inference_method = VarDTC(
                    limit=3 if not missing_data else Y.shape[1])
        if isinstance(inference_method, VarDTC_minibatch):
            inference_method.mpi_comm = mpi_comm

        super(BayesianGPLVM,
              self).__init__(X,
                             Y,
                             Z,
                             kernel,
                             likelihood=likelihood,
                             name=name,
                             inference_method=inference_method,
                             normalizer=normalizer,
                             mpi_comm=mpi_comm,
                             variational_prior=variational_prior,
                             Y_metadata=Y_metadata)
        self.link_parameter(self.X, index=0)
class GPMultioutRegressionMD(SparseGP):
    """
    Gaussian Process model for scalable multioutput regression

    This is a thin wrapper around the models.GP class, with a set of sensible defaults
    """
    def __init__(self,
                 X,
                 Y,
                 indexD,
                 Xr_dim,
                 kernel=None,
                 kernel_row=None,
                 likelihood=None,
                 Z=None,
                 Z_row=None,
                 X_row=None,
                 Xvariance_row=None,
                 num_inducing=(10, 10),
                 qU_var_r_W_dim=None,
                 qU_var_c_W_dim=None,
                 init='GP',
                 heter_noise=False,
                 name='GPMR'):

        assert len(Y.shape) == 1 or Y.shape[1] == 1

        self.output_dim = int(np.max(indexD)) + 1
        self.heter_noise = heter_noise
        self.indexD = indexD

        #Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1])
        if kernel_row is None:
            kernel_row = kern.RBF(Xr_dim, name='kern_row')

        if init == 'GP':
            from . import SparseGPRegression, BayesianGPLVM
            from ..util.linalg import jitchol
            Mc, Mr = num_inducing
            print('Intializing with GP...')
            print('Fit Sparse GP...')
            m_sgp = SparseGPRegressionMD(X,
                                         Y,
                                         indexD,
                                         kernel=kernel.copy(),
                                         num_inducing=Mc)
            m_sgp.likelihood.variance[:] = Y.var() * 0.01
            m_sgp.optimize(max_iters=1000)
            print('Fit BGPLVM...')
            m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,
                                  Xr_dim,
                                  kernel=kernel_row.copy(),
                                  num_inducing=Mr)
            m_lvm.likelihood.variance[:] = m_lvm.Y.var() * 0.01
            m_lvm.optimize(max_iters=10000)

            kernel[:] = m_sgp.kern.param_array.copy()
            kernel.variance[:] = np.sqrt(kernel.variance)
            Z = m_sgp.Z.values.copy()
            kernel_row[:] = m_lvm.kern.param_array.copy()
            kernel_row.variance[:] = np.sqrt(kernel_row.variance)
            Z_row = m_lvm.Z.values.copy()
            X_row = m_lvm.X.mean.values.copy()
            Xvariance_row = m_lvm.X.variance.values

            qU_mean = m_lvm.posterior.mean.T.copy()
            qU_var_col_W = jitchol(m_sgp.posterior.covariance)
            qU_var_col_diag = np.full(Mc, 1e-5)
            qU_var_row_W = jitchol(m_lvm.posterior.covariance)
            qU_var_row_diag = np.full(Mr, 1e-5)
            print('Done.')
        else:
            qU_mean = np.zeros(num_inducing)
            qU_var_col_W = np.random.randn(
                num_inducing[0], num_inducing[0]
                if qU_var_c_W_dim is None else qU_var_c_W_dim) * 0.01
            qU_var_col_diag = np.full(num_inducing[0], 1e-5)
            qU_var_row_W = np.random.randn(
                num_inducing[1], num_inducing[1]
                if qU_var_r_W_dim is None else qU_var_r_W_dim) * 0.01
            qU_var_row_diag = np.full(num_inducing[1], 1e-5)

        if Z is None:
            Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
        if X_row is None:
            X_row = np.random.randn(self.output_dim, Xr_dim)
        if Xvariance_row is None:
            Xvariance_row = np.ones((self.output_dim, Xr_dim)) * 0.0001
        if Z_row is None:
            Z_row = X_row[np.random.permutation(
                X_row.shape[0])[:num_inducing[1]]].copy()

        self.kern_row = kernel_row
        self.X_row = NormalPosterior(X_row, Xvariance_row, name='Xr')
        self.Z_row = Param('Zr', Z_row)
        self.variational_prior_row = NormalPrior()

        self.qU_mean = Param('qU_mean', qU_mean)
        self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
        self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag,
                                   Logexp())
        self.qU_var_r_W = Param('qU_var_row_W', qU_var_row_W)
        self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag,
                                   Logexp())

        #Likelihood
        if heter_noise:
            likelihood = likelihoods.Gaussian(variance=np.array(
                [np.var(Y[indexD == d])
                 for d in range(self.output_dim)]) * 0.01)
        else:
            likelihood = likelihoods.Gaussian(variance=np.var(Y) * 0.01)
        from ..inference.latent_function_inference.vardtc_svi_multiout_miss import VarDTC_SVI_Multiout_Miss
        inference_method = VarDTC_SVI_Multiout_Miss()

        super(GPMultioutRegressionMD,
              self).__init__(X,
                             Y,
                             Z,
                             kernel,
                             likelihood=likelihood,
                             name=name,
                             inference_method=inference_method)
        self.output_dim = int(np.max(indexD)) + 1

        self.link_parameters(self.kern_row, self.X_row, self.Z_row,
                             self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag,
                             self.qU_var_r_W, self.qU_var_r_diag)

        self._log_marginal_likelihood = np.nan

    def parameters_changed(self):
        qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
        qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(
            self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z,
            self.likelihood, self.Y, self.qU_mean, qU_var_r, qU_var_c,
            self.indexD, self.output_dim)
        # import pdb;pdb.set_trace()

        if self.heter_noise:
            self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        else:
            self.likelihood.update_gradients(
                self.grad_dict['dL_dthetaL'].sum())
        self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
        self.qU_var_c_diag.gradient[:] = np.diag(
            self.grad_dict['dL_dqU_var_c'])
        self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c'] +
                                       self.grad_dict['dL_dqU_var_c'].T).dot(
                                           self.qU_var_c_W)
        self.qU_var_r_diag.gradient[:] = np.diag(
            self.grad_dict['dL_dqU_var_r'])
        self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r'] +
                                       self.grad_dict['dL_dqU_var_r'].T).dot(
                                           self.qU_var_r_W)

        self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
        kerngrad = self.kern.gradient.copy()
        self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X,
                                        self.Z)
        kerngrad += self.kern.gradient
        self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z,
                                        None)
        self.kern.gradient += kerngrad
        #gradients wrt Z
        self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'],
                                                self.Z)
        self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T,
                                                 self.Z, self.X)

        #gradients wrt kernel
        self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'],
                                            self.Z_row, None)
        kerngrad = self.kern_row.gradient.copy()
        self.kern_row.update_gradients_expectations(
            variational_posterior=self.X_row,
            Z=self.Z_row,
            dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
            dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
            dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
        self.kern_row.gradient += kerngrad

        #gradients wrt Z
        self.Z_row.gradient = self.kern_row.gradients_X(
            self.grad_dict['dL_dKuu_r'], self.Z_row)
        self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
            self.grad_dict['dL_dpsi0_r'],
            self.grad_dict['dL_dpsi1_r'],
            self.grad_dict['dL_dpsi2_r'],
            Z=self.Z_row,
            variational_posterior=self.X_row)

        self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(
            self.X_row)

        self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
            variational_posterior=self.X_row,
            Z=self.Z_row,
            dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
            dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
            dL_dpsi2=self.grad_dict['dL_dpsi2_r'])

        self.variational_prior_row.update_gradients_KL(self.X_row)

    def optimize_auto(self, max_iters=10000, verbose=True):
        self.Z.fix(warning=False)
        self.kern.fix(warning=False)
        self.kern_row.fix(warning=False)
        self.Zr.fix(warning=False)
        self.Xr.fix(warning=False)
        self.optimize(max_iters=int(0.1 * max_iters), messages=verbose)
        self.unfix()
        self.optimize(max_iters=max_iters, messages=verbose)
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
    """
    Bayesian Gaussian Process Latent Variable Model

    :param Y: observed data (np.ndarray) or GPy.likelihood
    :type Y: np.ndarray| GPy.likelihood instance
    :param input_dim: latent dimensionality
    :type input_dim: int
    :param init: initialisation method for the latent space
    :type init: 'PCA'|'random'

    """
    def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
                 Z=None, kernel=None, inference_method=None, likelihood=None,
                 name='bayesian gplvm', normalizer=None,
                 missing_data=False, stochastic=False, batchsize=1):
        self.logger = logging.getLogger(self.__class__.__name__)
        if X is None:
            from ..util.initialization import initialize_latent
            self.logger.info("initializing latent space X with method {}".format(init))
            X, fracs = initialize_latent(init, input_dim, Y)
        else:
            fracs = np.ones(input_dim)

        self.init = init

        if Z is None:
            self.logger.info("initializing inducing inputs")
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]

        if X_variance is False:
            self.logger.info('no variance on X, activating sparse GPLVM')
            X = Param("latent space", X)
        else:
            if X_variance is None:
                self.logger.info("initializing latent space variance ~ uniform(0,.1)")
                X_variance = np.random.uniform(0,.1,X.shape)
            self.variational_prior = NormalPrior()
            X = NormalPosterior(X, X_variance)

        if kernel is None:
            self.logger.info("initializing kernel RBF")
            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) #+ kern.Bias(input_dim) + kern.White(input_dim)

        if likelihood is None:
            likelihood = Gaussian()

        self.kl_factr = 1.

        if inference_method is None:
            from ..inference.latent_function_inference.var_dtc import VarDTC
            self.logger.debug("creating inference_method var_dtc")
            inference_method = VarDTC(limit=3 if not missing_data else Y.shape[1])

        super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
                                           name=name, inference_method=inference_method,
                                           normalizer=normalizer,
                                           missing_data=missing_data, stochastic=stochastic,
                                           batchsize=batchsize)
        self.X = X
        self.link_parameter(self.X, 0)

    #def set_X_gradients(self, X, X_grad):
    #    """Set the gradients of the posterior distribution of X in its specific form."""
    #    X.mean.gradient, X.variance.gradient = X_grad

    #def get_X_gradients(self, X):
    #    """Get the gradients of the posterior distribution of X in its specific form."""
    #    return X.mean.gradient, X.variance.gradient

    def _outer_values_update(self, full_values):
        """
        Here you put the values, which were collected before in the right places.
        E.g. set the gradients of parameters, etc.
        """
        super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
        if self.has_uncertain_inputs():
            meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
                                            variational_posterior=self.X,
                                            Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
                                            dL_dpsi1=full_values['dL_dpsi1'],
                                            dL_dpsi2=full_values['dL_dpsi2'],
                                            psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)

            self.X.mean.gradient = meangrad_tmp
            self.X.variance.gradient = vargrad_tmp
        else:
            self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
            self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)

    def _outer_init_full_values(self):
        return super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()

    def parameters_changed(self):
        super(BayesianGPLVMMiniBatch,self).parameters_changed()

        kl_fctr = self.kl_factr
        if kl_fctr > 0 and self.has_uncertain_inputs():
            Xgrad = self.X.gradient.copy()
            self.X.gradient[:] = 0
            self.variational_prior.update_gradients_KL(self.X)

            if self.missing_data or not self.stochastics:
                self.X.mean.gradient = kl_fctr*self.X.mean.gradient
                self.X.variance.gradient = kl_fctr*self.X.variance.gradient
            else:
                d = self.output_dim
                self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
                self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
            self.X.gradient += Xgrad

            if self.missing_data or not self.stochastics:
                self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
            else: #self.stochastics is given:
                d = self.output_dim
                self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d

        self._Xgrad = self.X.gradient.copy()
Exemple #9
0
    def __init__(self,
                 layer_upper,
                 Xs,
                 X_win=0,
                 Us=None,
                 U_win=1,
                 Z=None,
                 num_inducing=10,
                 kernel=None,
                 inference_method=None,
                 likelihood=None,
                 noise_var=1.,
                 inducing_init='kmeans',
                 back_cstr=False,
                 MLP_dims=None,
                 name='layer'):

        self.layer_upper = layer_upper
        self.nSeq = len(Xs)

        self.X_win = X_win  # if X_win==0, it is not autoregressive.
        self.X_dim = Xs[0].shape[1]
        self.Xs_flat = Xs
        self.X_observed = False if isinstance(Xs[0],
                                              VariationalPosterior) else True

        self.withControl = Us is not None
        self.U_win = U_win
        self.U_dim = Us[0].shape[1] if self.withControl else None
        self.Us_flat = Us
        if self.withControl:
            assert len(Xs) == len(
                Us
            ), "The number of signals should be equal to the number controls!"

        if not self.X_observed and back_cstr:
            self._init_encoder(MLP_dims)
            self.back_cstr = True
        else:
            self.back_cstr = False
        self._init_XY()

        if Z is None:
            if not back_cstr and inducing_init == 'kmeans':
                from sklearn.cluster import KMeans
                m = KMeans(n_clusters=num_inducing, n_init=1000, max_iter=100)
                m.fit(self.X.mean.values.copy())
                Z = m.cluster_centers_.copy()
            else:
                Z = np.random.randn(num_inducing, self.X.shape[1])
        assert Z.shape[1] == self.X.shape[1]

        if kernel is None: kernel = kern.RBF(self.X.shape[1], ARD=True)

        if inference_method is None: inference_method = VarDTC()
        if likelihood is None:
            likelihood = likelihoods.Gaussian(variance=noise_var)
        self.normalPrior, self.normalEntropy = NormalPrior(), NormalEntropy()
        super(Layer, self).__init__(self.X,
                                    self.Y,
                                    Z,
                                    kernel,
                                    likelihood,
                                    inference_method=inference_method,
                                    name=name)
        if not self.X_observed:
            if back_cstr:
                assert self.X_win > 0
                self.link_parameters(*(self.init_Xs + self.Xs_var +
                                       [self.encoder]))
            else:
                self.link_parameters(*self.Xs_flat)
Exemple #10
0
class Layer(SparseGP):
    def __init__(self,
                 layer_upper,
                 Xs,
                 X_win=0,
                 Us=None,
                 U_win=1,
                 Z=None,
                 num_inducing=10,
                 kernel=None,
                 inference_method=None,
                 likelihood=None,
                 noise_var=1.,
                 inducing_init='kmeans',
                 back_cstr=False,
                 MLP_dims=None,
                 name='layer'):

        self.layer_upper = layer_upper
        self.nSeq = len(Xs)

        self.X_win = X_win  # if X_win==0, it is not autoregressive.
        self.X_dim = Xs[0].shape[1]
        self.Xs_flat = Xs
        self.X_observed = False if isinstance(Xs[0],
                                              VariationalPosterior) else True

        self.withControl = Us is not None
        self.U_win = U_win
        self.U_dim = Us[0].shape[1] if self.withControl else None
        self.Us_flat = Us
        if self.withControl:
            assert len(Xs) == len(
                Us
            ), "The number of signals should be equal to the number controls!"

        if not self.X_observed and back_cstr:
            self._init_encoder(MLP_dims)
            self.back_cstr = True
        else:
            self.back_cstr = False
        self._init_XY()

        if Z is None:
            if not back_cstr and inducing_init == 'kmeans':
                from sklearn.cluster import KMeans
                m = KMeans(n_clusters=num_inducing, n_init=1000, max_iter=100)
                m.fit(self.X.mean.values.copy())
                Z = m.cluster_centers_.copy()
            else:
                Z = np.random.randn(num_inducing, self.X.shape[1])
        assert Z.shape[1] == self.X.shape[1]

        if kernel is None: kernel = kern.RBF(self.X.shape[1], ARD=True)

        if inference_method is None: inference_method = VarDTC()
        if likelihood is None:
            likelihood = likelihoods.Gaussian(variance=noise_var)
        self.normalPrior, self.normalEntropy = NormalPrior(), NormalEntropy()
        super(Layer, self).__init__(self.X,
                                    self.Y,
                                    Z,
                                    kernel,
                                    likelihood,
                                    inference_method=inference_method,
                                    name=name)
        if not self.X_observed:
            if back_cstr:
                assert self.X_win > 0
                self.link_parameters(*(self.init_Xs + self.Xs_var +
                                       [self.encoder]))
            else:
                self.link_parameters(*self.Xs_flat)

    def _init_encoder(self, MLP_dims):
        from .mlp import MLP
        from copy import deepcopy
        from GPy.core.parameterization.transformations import Logexp
        X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim
        assert X_win > 0, "Neural Network constraints only applies autoregressive structure!"
        Q = X_win * X_dim + U_win * U_dim if self.withControl else X_win * X_dim
        self.init_Xs = [
            NormalPosterior(self.Xs_flat[i].mean.values[:X_win],
                            self.Xs_flat[i].variance.values[:X_win],
                            name='init_Xs_' + str(i)) for i in range(self.nSeq)
        ]
        for init_X in self.init_Xs:
            init_X.mean[:] = np.random.randn(*init_X.shape) * 1e-2
        self.encoder = MLP([Q, Q * 2, Q +
                            X_dim / 2, X_dim] if MLP_dims is None else [Q] +
                           deepcopy(MLP_dims) + [X_dim])
        self.Xs_var = [
            Param('X_var_' + str(i),
                  self.Xs_flat[i].variance.values[X_win:].copy(), Logexp())
            for i in range(self.nSeq)
        ]

    def _init_XY(self):
        X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim
        self._update_conv()
        if X_win > 0:
            X_mean_conv, X_var_conv = np.vstack(self.X_mean_conv), np.vstack(
                self.X_var_conv)
        if self.withControl:
            U_mean_conv, U_var_conv = np.vstack(self.U_mean_conv), np.vstack(
                self.U_var_conv)

        if not self.withControl:
            self.X = NormalPosterior(X_mean_conv, X_var_conv)
        elif X_win == 0:
            self.X = NormalPosterior(U_mean_conv, U_var_conv)
        else:
            self.X = NormalPosterior(np.hstack([X_mean_conv, U_mean_conv]),
                                     np.hstack([X_var_conv, U_var_conv]))

        if self.X_observed:
            self.Y = np.vstack([x[X_win:] for x in self.Xs_flat])
        else:
            self.Y = NormalPosterior(
                np.vstack([x.mean.values[X_win:] for x in self.Xs_flat]),
                np.vstack([x.variance.values[X_win:] for x in self.Xs_flat]))

    def plot_latent(self,
                    labels=None,
                    which_indices=None,
                    resolution=50,
                    ax=None,
                    marker='o',
                    s=40,
                    fignum=None,
                    plot_inducing=True,
                    legend=True,
                    plot_limits=None,
                    aspect='auto',
                    updates=False,
                    predict_kwargs={},
                    imshow_kwargs={}):
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from GPy.plotting.matplot_dep import dim_reduction_plots

        return dim_reduction_plots.plot_latent(self, labels, which_indices,
                                               resolution, ax, marker, s,
                                               fignum, plot_inducing, legend,
                                               plot_limits, aspect, updates,
                                               predict_kwargs, imshow_kwargs)

    def _update_conv(self):
        X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim
        if self.back_cstr: self._encoder_freerun()
        self.X_mean_conv, self.X_var_conv, self.U_mean_conv, self.U_var_conv = [], [], [], []
        for i_seq in range(self.nSeq):
            N = self.Xs_flat[i_seq].shape[0] - X_win
            if X_win > 0:
                self.X_mean_conv.append(
                    get_conv_1D(self.Xs_flat[i_seq].mean.values[:-1],
                                X_win).reshape(N, -1))
                self.X_var_conv.append(
                    get_conv_1D(self.Xs_flat[i_seq].variance.values[:-1],
                                X_win).reshape(N, -1))
            if self.withControl:
                self.U_mean_conv.append(
                    get_conv_1D(
                        self.Us_flat[i_seq].mean.values[-N - U_win + 1:],
                        U_win).reshape(N, -1))
                self.U_var_conv.append(
                    get_conv_1D(
                        self.Us_flat[i_seq].variance.values[-N - U_win + 1:],
                        U_win).reshape(N, -1))

    def _update_X(self):
        self._update_conv()
        X_offset, Y_offset = 0, 0
        for i_seq in range(self.nSeq):
            if self.X_win > 0:
                N, Q = self.X_mean_conv[i_seq].shape
                self.X.mean[X_offset:X_offset +
                            N, :Q] = self.X_mean_conv[i_seq]
                self.X.variance[X_offset:X_offset +
                                N, :Q] = self.X_var_conv[i_seq]
            else:
                Q = 0
            if self.withControl:
                N = self.U_mean_conv[i_seq].shape[0]
                self.X.mean[X_offset:X_offset + N,
                            Q:] = self.U_mean_conv[i_seq]
                self.X.variance[X_offset:X_offset + N,
                                Q:] = self.U_var_conv[i_seq]
            X_offset += N

            if not self.X_observed:
                N = self.Xs_flat[i_seq].shape[0] - self.X_win
                self.Y.mean[Y_offset:Y_offset +
                            N] = self.Xs_flat[i_seq].mean[self.X_win:]
                self.Y.variance[Y_offset:Y_offset +
                                N] = self.Xs_flat[i_seq].variance[self.X_win:]
                Y_offset += N

    def update_latent_gradients(self):
        X_offset = 0
        X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim
        for i_seq in range(self.nSeq):
            N = self.Xs_flat[i_seq].shape[0] - X_win
            if self.withControl:
                U_offset = -N - U_win + 1 + self.Us_flat[i_seq].shape[0]
            for n in range(N):
                if X_win > 0:
                    Q = self.X_mean_conv[i_seq].shape[1]
                    self.Xs_flat[i_seq].mean.gradient[
                        n:n + X_win] += self.X.mean.gradient[X_offset +
                                                             n, :Q].reshape(
                                                                 -1, X_dim)
                    self.Xs_flat[i_seq].variance.gradient[
                        n:n +
                        X_win] += self.X.variance.gradient[X_offset +
                                                           n, :Q].reshape(
                                                               -1, X_dim)
                else:
                    Q = 0
                if self.withControl:
                    self.Us_flat[i_seq].mean.gradient[
                        U_offset + n:U_offset + n +
                        U_win] += self.X.mean.gradient[X_offset + n,
                                                       Q:].reshape(-1, U_dim)
                    self.Us_flat[i_seq].variance.gradient[
                        U_offset + n:U_offset + n +
                        U_win] += self.X.variance.gradient[X_offset + n,
                                                           Q:].reshape(
                                                               -1, U_dim)
            X_offset += N
        if self.back_cstr: self._encoder_update_gradient()

    def _update_qX_gradients(self):
        self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(
            variational_posterior=self.X,
            Z=self.Z,
            dL_dpsi0=self.grad_dict['dL_dpsi0'],
            dL_dpsi1=self.grad_dict['dL_dpsi1'],
            dL_dpsi2=self.grad_dict['dL_dpsi2'])

    def _prepare_gradients(self):
        X_win = self.X_win
        if self.withControl and self.layer_upper is None:
            for U in self.Us_flat:
                U.mean.gradient[:] = 0
                U.variance.gradient[:] = 0
        if not self.X_observed:
            Y_offset = 0
            delta = 0
            for X in self.Xs_flat:
                N = X.shape[0] - X_win
                X.mean.gradient[:] = 0
                X.variance.gradient[:] = 0
                X.mean.gradient[X_win:] += self.grad_dict['dL_dYmean'][
                    Y_offset:Y_offset + N]
                X.variance.gradient[X_win:] += self.grad_dict['dL_dYvar'][
                    Y_offset:Y_offset + N, None]
                if X_win > 0:
                    delta += -self.normalPrior.comp_value(X[:X_win])
                    self.normalPrior.update_gradients(X[:X_win])
                delta += -self.normalEntropy.comp_value(X[X_win:])
                self.normalEntropy.update_gradients(X[X_win:])
                Y_offset += N
            self._log_marginal_likelihood += delta

    def parameters_changed(self):
        self._update_X()
        super(Layer, self).parameters_changed()
        self._update_qX_gradients()
        self._prepare_gradients()

    def _encoder_freerun(self):
        X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim
        Q = X_win * X_dim + U_win * U_dim if self.withControl else X_win * X_dim

        X_in = np.zeros((Q, ))
        for i_seq in range(self.nSeq):
            X_flat, init_X, X_var = self.Xs_flat[i_seq], self.init_Xs[
                i_seq], self.Xs_var[i_seq]
            if self.withControl: U_flat = self.Us_flat[i_seq]
            X_flat.mean[:X_win] = init_X.mean.values
            X_flat.variance[:X_win] = init_X.variance.values
            X_flat.variance[X_win:] = X_var.values

            N = X_flat.shape[0] - X_win
            if self.withControl: U_offset = U_flat.shape[0] - N - U_win + 1
            for n in range(N):
                X_in[:X_win * X_dim] = X_flat.mean[n:n + X_win].flat
                if self.withControl:
                    X_in[X_win * X_dim:] = U_flat.mean[U_offset + n:U_offset +
                                                       n + U_win].flat
                X_out = self.encoder.predict(X_in[None, :])
                X_flat.mean[X_win + n] = X_out[0]

    def _encoder_update_gradient(self):
        self.encoder.prepare_grad()
        X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim
        Q = X_win * X_dim + U_win * U_dim if self.withControl else X_win * X_dim

        X_in = np.zeros((Q, ))
        dL = np.zeros((X_dim, ))
        for i_seq in range(self.nSeq):
            X_flat, init_X, X_var = self.Xs_flat[i_seq], self.init_Xs[
                i_seq], self.Xs_var[i_seq]
            if self.withControl: U_flat = self.Us_flat[i_seq]
            N = X_flat.shape[0] - X_win
            if self.withControl: U_offset = U_flat.shape[0] - N - U_win + 1

            for n in range(N - 1, -1, -1):
                X_in[:X_win * X_dim] = X_flat.mean[n:n + X_win].flat
                if self.withControl:
                    X_in[X_win * X_dim:] = U_flat.mean[U_offset + n:U_offset +
                                                       n + U_win].flat
                dL[:] = X_flat.mean.gradient[X_win + n].flat
                dX = self.encoder.update_gradient(X_in[None, :], dL[None, :])
                X_flat.mean.gradient[n:n + X_win] += dX[0, :X_win *
                                                        X_dim].reshape(
                                                            -1, X_dim)
                if self.withControl:
                    U_flat.mean.gradient[U_offset + n:U_offset + n +
                                         U_win] += dX[0,
                                                      X_win * X_dim:].reshape(
                                                          -1, U_dim)
            init_X.mean.gradient[:] = X_flat.mean.gradient[:X_win]
            init_X.variance.gradient[:] = X_flat.variance.gradient[:X_win]
            X_var.gradient[:] = X_flat.variance.gradient[X_win:]

    def freerun(self,
                init_Xs=None,
                step=None,
                U=None,
                m_match=True,
                encoder=False):
        X_win, X_dim, U_win, U_dim = self.X_win, self.X_dim, self.U_win, self.U_dim
        Q = X_win * X_dim + U_win * U_dim if self.withControl else X_win * X_dim
        if U is None and self.withControl:
            raise "The model needs control signals!"
        if U is not None and step is None: step = U.shape[0] - U_win
        elif step is None: step = 100
        if init_Xs is None and X_win > 0:
            if m_match:
                init_Xs = NormalPosterior(np.zeros((X_win, X_dim)),
                                          np.ones((X_win, X_dim)))
            else:
                init_Xs = np.zeros((X_win, X_dim))
        if U is not None:
            assert U.shape[
                1] == U_dim, "The dimensionality of control signal has to be " + str(
                    U_dim) + "!"
        encoder = encoder and self.back_cstr

        if m_match:  # free run with moment matching
            X = NormalPosterior(np.empty((X_win + step, X_dim)),
                                np.ones((X_win + step, X_dim)))
            if X_win > 0:
                X.mean[:X_win] = init_Xs.mean[-X_win:]
                X.variance[:X_win] = init_Xs.variance[-X_win:]
            X_in = NormalPosterior(np.empty((1, Q)), np.ones((1, Q)))
            X_in.variance[:] = 1e-10
            for n in range(step):
                if X_win > 0:
                    X_in.mean[0, :X_win * X_dim] = X.mean[n:n + X_win].flat
                    X_in.variance[0, :X_win * X_dim] = X.variance[n:n +
                                                                  X_win].flat
                if self.withControl:
                    if isinstance(U, NormalPosterior):
                        X_in.mean[0, X_win * X_dim:] = U.mean[n:n + U_win].flat
                        X_in.variance[0, X_win *
                                      X_dim:] = U.variance[n:n + U_win].flat
                    else:
                        X_in.mean[0, X_win * X_dim:] = U[n:n + U_win].flat
                X_out = self._raw_predict(X_in)
                X.mean[X_win + n] = X_out[0]
                if np.any(X_out[1] <= 0.): X_out[1][X_out[1] <= 0.] = 1e-10
                X.variance[X_win + n] = X_out[1]
        elif encoder:
            X = np.empty((X_win + step, X_dim))
            X_in = np.empty((1, Q))
            if X_win > 0: X[:X_win] = init_Xs[-X_win:]
            for n in range(step):
                if X_win > 0: X_in[0, :X_win * X_dim] = X[n:n + X_win].flat
                if self.withControl:
                    X_in[0, X_win * X_dim:] = U[n:n + U_win].flat
                X[X_win + n] = self.encoder.predict(X_in)[0]
        else:
            X = np.empty((X_win + step, X_dim))
            X_in = np.empty((1, Q))
            if X_win > 0: X[:X_win] = init_Xs[-X_win:]
            for n in range(step):
                if X_win > 0: X_in[0, :X_win * X_dim] = X[n:n + X_win].flat
                if self.withControl:
                    X_in[0, X_win * X_dim:] = U[n:n + U_win].flat
                X[X_win + n] = self._raw_predict(X_in)[0]
        return X
Exemple #11
0
class GPMultioutRegressionMD(SparseGP):
    """
    Gaussian Process model for multi-output regression with missing data

    This is an implementation of Latent Variable Multiple Output Gaussian Processes (LVMOGP) in [Dai et al. 2017]. This model targets at the use case, in which each output dimension is observed at a different set of inputs. The model takes a different data format: the inputs and outputs observations of all the output dimensions are stacked together correspondingly into two matrices. An extra array is used to indicate the index of output dimension for each data point. The output dimensions are indexed using integers from 0 to D-1 assuming there are D output dimensions.

    Zhenwen Dai, Mauricio A. Alvarez and Neil D. Lawrence. Efficient Modeling of Latent Information in Supervised Learning using Gaussian Processes. In NIPS, 2017.

    :param X: input observations.
    :type X: numpy.ndarray
    :param Y: output observations, each column corresponding to an output dimension.
    :type Y: numpy.ndarray
    :param indexD: the array containing the index of output dimension for each data point
    :type indexD: numpy.ndarray
    :param int Xr_dim: the dimensionality of a latent space, in which output dimensions are embedded in
    :param kernel: a GPy kernel for GP of individual output dimensions ** defaults to RBF **
    :type kernel: GPy.kern.Kern or None
    :param kernel_row: a GPy kernel for the GP of the latent space ** defaults to RBF **
    :type kernel_row: GPy.kern.Kern or None
    :param Z: inducing inputs
    :type Z: numpy.ndarray or None
    :param Z_row: inducing inputs for the latent space
    :type Z_row: numpy.ndarray or None
    :param X_row: the initial value of the mean of the variational posterior distribution of points in the latent space
    :type X_row: numpy.ndarray or None
    :param Xvariance_row: the initial value of the variance of the variational posterior distribution of points in the latent space
    :type Xvariance_row: numpy.ndarray or None
    :param num_inducing: a tuple (M, Mr). M is the number of inducing points for GP of individual output dimensions. Mr is the number of inducing points for the latent space.
    :type num_inducing: (int, int)
    :param int qU_var_r_W_dim: the dimensionality of the covariance of q(U) for the latent space. If it is smaller than the number of inducing points, it represents a low-rank parameterization of the covariance matrix.
    :param int qU_var_c_W_dim: the dimensionality of the covariance of q(U) for the GP regression. If it is smaller than the number of inducing points, it represents a low-rank parameterization of the covariance matrix.
    :param str init: the choice of initialization: 'GP' or 'rand'. With 'rand', the model is initialized randomly. With 'GP', the model is initialized through a protocol as follows: (1) fits a sparse GP (2) fits a BGPLVM based on the outcome of sparse GP (3) initialize the model based on the outcome of the BGPLVM.
    :param boolean heter_noise: whether assuming heteroscedastic noise in the model, boolean
    :param str name: the name of the model
    """
    def __init__(self, X, Y, indexD, Xr_dim, kernel=None, kernel_row=None,  Z=None, Z_row=None, X_row=None, Xvariance_row=None, num_inducing=(10,10), qU_var_r_W_dim=None, qU_var_c_W_dim=None, init='GP', heter_noise=False, name='GPMRMD'):

        assert len(Y.shape)==1 or Y.shape[1]==1

        self.output_dim = int(np.max(indexD))+1
        self.heter_noise = heter_noise
        self.indexD = indexD

        #Kernel
        if kernel is None:
            kernel = kern.RBF(X.shape[1])
        if kernel_row is None:
            kernel_row = kern.RBF(Xr_dim,name='kern_row')

        if init=='GP':
            from . import SparseGPRegression, BayesianGPLVM
            from ..util.linalg import jitchol
            Mc, Mr = num_inducing
            print('Intializing with GP...')
            print('Fit Sparse GP...')
            m_sgp = SparseGPRegressionMD(X,Y,indexD,kernel=kernel.copy(),num_inducing=Mc)
            m_sgp.likelihood.variance[:] = Y.var()*0.01
            m_sgp.optimize(max_iters=1000)
            print('Fit BGPLVM...')
            m_lvm = BayesianGPLVM(m_sgp.posterior.mean.copy().T,Xr_dim,kernel=kernel_row.copy(), num_inducing=Mr)
            m_lvm.likelihood.variance[:] = m_lvm.Y.var()*0.01
            m_lvm.optimize(max_iters=10000)

            kernel[:] = m_sgp.kern.param_array.copy()
            kernel.variance[:] = np.sqrt(kernel.variance)
            Z = m_sgp.Z.values.copy()
            kernel_row[:] = m_lvm.kern.param_array.copy()
            kernel_row.variance[:] = np.sqrt(kernel_row.variance)
            Z_row = m_lvm.Z.values.copy()
            X_row = m_lvm.X.mean.values.copy()
            Xvariance_row = m_lvm.X.variance.values

            qU_mean = m_lvm.posterior.mean.T.copy()
            qU_var_col_W = jitchol(m_sgp.posterior.covariance)
            qU_var_col_diag = np.full(Mc,1e-5)
            qU_var_row_W = jitchol(m_lvm.posterior.covariance)
            qU_var_row_diag = np.full(Mr,1e-5)
            print('Done.')
        else:
            qU_mean = np.zeros(num_inducing)
            qU_var_col_W = np.random.randn(num_inducing[0],num_inducing[0] if qU_var_c_W_dim is None else qU_var_c_W_dim)*0.01
            qU_var_col_diag = np.full(num_inducing[0],1e-5)
            qU_var_row_W = np.random.randn(num_inducing[1],num_inducing[1] if qU_var_r_W_dim is None else qU_var_r_W_dim)*0.01
            qU_var_row_diag = np.full(num_inducing[1],1e-5)


        if Z is None:
            Z = X[np.random.permutation(X.shape[0])[:num_inducing[0]]].copy()
        if X_row is None:
            X_row = np.random.randn(self.output_dim,Xr_dim)
        if Xvariance_row is None:
            Xvariance_row = np.ones((self.output_dim,Xr_dim))*0.0001
        if Z_row is None:
            Z_row = X_row[np.random.permutation(X_row.shape[0])[:num_inducing[1]]].copy()

        self.kern_row = kernel_row
        self.X_row = NormalPosterior(X_row, Xvariance_row,name='Xr')
        self.Z_row = Param('Zr', Z_row)
        self.variational_prior_row = NormalPrior()

        self.qU_mean = Param('qU_mean', qU_mean)
        self.qU_var_c_W = Param('qU_var_col_W', qU_var_col_W)
        self.qU_var_c_diag = Param('qU_var_col_diag', qU_var_col_diag, Logexp())
        self.qU_var_r_W = Param('qU_var_row_W',qU_var_row_W)
        self.qU_var_r_diag = Param('qU_var_row_diag', qU_var_row_diag, Logexp())

        #Likelihood
        if heter_noise:
            likelihood = likelihoods.Gaussian(variance=np.array([np.var(Y[indexD==d]) for d in range(self.output_dim)])*0.01)
        else:
            likelihood = likelihoods.Gaussian(variance=np.var(Y)*0.01)
        from ..inference.latent_function_inference.vardtc_svi_multiout_miss import VarDTC_SVI_Multiout_Miss
        inference_method = VarDTC_SVI_Multiout_Miss()

        super(GPMultioutRegressionMD,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
                                           name=name, inference_method=inference_method)
        self.output_dim = int(np.max(indexD))+1

        self.link_parameters(self.kern_row, self.X_row, self.Z_row,self.qU_mean, self.qU_var_c_W, self.qU_var_c_diag, self.qU_var_r_W, self.qU_var_r_diag)

        self._log_marginal_likelihood = np.nan

    def parameters_changed(self):
        qU_var_c = tdot(self.qU_var_c_W) + np.diag(self.qU_var_c_diag)
        qU_var_r = tdot(self.qU_var_r_W) + np.diag(self.qU_var_r_diag)
        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern_row, self.kern, self.X_row, self.X, self.Z_row, self.Z, self.likelihood, self.Y, self.qU_mean ,qU_var_r, qU_var_c, self.indexD, self.output_dim)
        # import pdb;pdb.set_trace()

        if self.heter_noise:
            self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        else:
            self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'].sum())
        self.qU_mean.gradient[:] = self.grad_dict['dL_dqU_mean']
        self.qU_var_c_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_c'])
        self.qU_var_c_W.gradient[:] = (self.grad_dict['dL_dqU_var_c']+self.grad_dict['dL_dqU_var_c'].T).dot(self.qU_var_c_W)
        self.qU_var_r_diag.gradient[:] = np.diag(self.grad_dict['dL_dqU_var_r'])
        self.qU_var_r_W.gradient[:] = (self.grad_dict['dL_dqU_var_r']+self.grad_dict['dL_dqU_var_r'].T).dot(self.qU_var_r_W)

        self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag_c'], self.X)
        kerngrad = self.kern.gradient.copy()
        self.kern.update_gradients_full(self.grad_dict['dL_dKfu_c'], self.X, self.Z)
        kerngrad += self.kern.gradient
        self.kern.update_gradients_full(self.grad_dict['dL_dKuu_c'], self.Z, None)
        self.kern.gradient += kerngrad
        #gradients wrt Z
        self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKuu_c'], self.Z)
        self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKfu_c'].T, self.Z, self.X)


        #gradients wrt kernel
        self.kern_row.update_gradients_full(self.grad_dict['dL_dKuu_r'], self.Z_row, None)
        kerngrad = self.kern_row.gradient.copy()
        self.kern_row.update_gradients_expectations(variational_posterior=self.X_row,
                                                Z=self.Z_row,
                                                dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
                                                dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
                                                dL_dpsi2=self.grad_dict['dL_dpsi2_r'])
        self.kern_row.gradient += kerngrad

        #gradients wrt Z
        self.Z_row.gradient = self.kern_row.gradients_X(self.grad_dict['dL_dKuu_r'], self.Z_row)
        self.Z_row.gradient += self.kern_row.gradients_Z_expectations(
                           self.grad_dict['dL_dpsi0_r'],
                           self.grad_dict['dL_dpsi1_r'],
                           self.grad_dict['dL_dpsi2_r'],
                           Z=self.Z_row,
                           variational_posterior=self.X_row)

        self._log_marginal_likelihood -= self.variational_prior_row.KL_divergence(self.X_row)

        self.X_row.mean.gradient, self.X_row.variance.gradient = self.kern_row.gradients_qX_expectations(
                                            variational_posterior=self.X_row,
                                            Z=self.Z_row,
                                            dL_dpsi0=self.grad_dict['dL_dpsi0_r'],
                                            dL_dpsi1=self.grad_dict['dL_dpsi1_r'],
                                            dL_dpsi2=self.grad_dict['dL_dpsi2_r'])

        self.variational_prior_row.update_gradients_KL(self.X_row)

    def optimize_auto(self,max_iters=10000,verbose=True):
        """
        Optimize the model parameters through a pre-defined protocol.

        :param int max_iters: the maximum number of iterations.
        :param boolean verbose: print the progress of optimization or not.
        """
        self.Z.fix(warning=False)
        self.kern.fix(warning=False)
        self.kern_row.fix(warning=False)
        self.Zr.fix(warning=False)
        self.Xr.fix(warning=False)
        self.optimize(max_iters=int(0.1*max_iters),messages=verbose)
        self.unfix()
        self.optimize(max_iters=max_iters,messages=verbose)