예제 #1
0
    def _init_variational_parameters(self, q_mu, q_sqrt):
        q_mu = np.zeros(
            (self.num_inducing, self.num_latent)) if q_mu is None else q_mu
        self.q_mu = Parameter(q_mu, dtype=settings.float_type)  # M x K

        if q_sqrt is None:
            if self.q_diag:
                self.q_sqrt = Parameter(np.ones(
                    (self.num_inducing, self.num_latent),
                    dtype=settings.float_type),
                                        transform=transforms.positive)  # M x K
            else:
                q_sqrt = np.array([
                    np.eye(self.num_inducing, dtype=settings.float_type)
                    for _ in range(self.num_latent)
                ])
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=transforms.LowerTriangular(
                                            self.num_inducing,
                                            self.num_latent))  # K x M x M
        else:
            if self.q_diag:
                assert q_sqrt.ndim == 2
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=transforms.positive)  # M x K
            else:
                assert q_sqrt.ndim == 3
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=transforms.LowerTriangular(
                                            self.num_inducing,
                                            self.num_classes))  # K x M x M
예제 #2
0
    def _init_variational_parameters(self, num_inducing, q_mu, q_sqrt, q_diag):
        """
        Constructs the mean and cholesky of the covariance of the variational Gaussian posterior.
        If a user passes values for `q_mu` and `q_sqrt` the routine checks if they have consistent
        and correct shapes. If a user does not specify any values for `q_mu` and `q_sqrt`, the routine
        initializes them, their shape depends on `num_inducing` and `q_diag`.
        Note: most often the comments refer to the number of observations (=output dimensions) with P,
        number of latent GPs with L, and number of inducing points M. Typically P equals L,
        but when certain multi-output kernels are used, this can change.
        Parameters
        ----------
        :param num_inducing: int
            Number of inducing variables, typically referred to as M.
        :param q_mu: np.array or None
            Mean of the variational Gaussian posterior. If None the function will initialise
            the mean with zeros. If not None, the shape of `q_mu` is checked.
        :param q_sqrt: np.array or None
            Cholesky of the covariance of the variational Gaussian posterior.
            If None the function will initialise `q_sqrt` with identity matrix.
            If not None, the shape of `q_sqrt` is checked, depending on `q_diag`.
        :param q_diag: bool
            Used to check if `q_mu` and `q_sqrt` have the correct shape or to
            construct them with the correct shape. If `q_diag` is true,
            `q_sqrt` is two dimensional and only holds the square root of the
            covariance diagonal elements. If False, `q_sqrt` is three dimensional.
        """
        q_mu = np.zeros(
            (num_inducing, self.num_latent)) if q_mu is None else q_mu
        self.q_mu = Parameter(q_mu, dtype=settings.float_type)  # M x P

        if q_sqrt is None:
            if self.q_diag:
                self.q_sqrt = Parameter(np.ones(
                    (num_inducing, self.num_latent),
                    dtype=settings.float_type),
                                        transform=transforms.positive)  # M x P
            else:
                q_sqrt = np.array([
                    np.eye(num_inducing, dtype=settings.float_type)
                    for _ in range(self.num_latent)
                ])
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=transforms.LowerTriangular(
                                            num_inducing,
                                            self.num_latent))  # P x M x M
        else:
            if q_diag:
                assert q_sqrt.ndim == 2
                self.num_latent = q_sqrt.shape[1]
                self.q_sqrt = Parameter(
                    q_sqrt, transform=transforms.positive)  # M x L/P
            else:
                assert q_sqrt.ndim == 3
                self.num_latent = q_sqrt.shape[0]
                num_inducing = q_sqrt.shape[1]
                self.q_sqrt = Parameter(q_sqrt,
                                        transform=transforms.LowerTriangular(
                                            num_inducing,
                                            self.num_latent))  # L/P x M x M
예제 #3
0
    def __init__(self,
                 kern,
                 Z,
                 num_inducing,
                 num_outputs,
                 mean_function=None,
                 white=True):
        self.white = white
        self.kern = kern
        self.num_inputs = kern.input_dim
        self.num_outputs = num_outputs
        self.num_inducing = num_inducing
        self.q_diag = False
        Um = np.zeros((self.num_inducing, self.num_outputs))
        Us_sqrt = np.ones(
            (self.num_inducing,
             self.num_outputs)) if self.q_diag else np.array(
                 [np.eye(self.num_inducing) for _ in range(self.num_outputs)])
        with tf.name_scope("inducing"):
            self.Z = Param(Z, name="z")()
            self.Um = Param(Um, name="u")()
            if self.q_diag:
                self.Us_sqrt = Param(Us_sqrt,
                                     transforms.positive,
                                     name="u_variance")()
            else:
                self.Us_sqrt = Param(Us_sqrt,
                                     transforms.LowerTriangular(
                                         self.num_inducing, self.num_outputs),
                                     name="u_variance")()

        self.Ku = self.kern.Ksymm(self.Z) + tf.eye(
            tf.shape(self.Z)[0], dtype=self.Z.dtype) * settings.jitter
        self.Lu = tf.cholesky(self.Ku)
        self.mean_function = mean_function
예제 #4
0
    def __init__(self, kern, Um, Us_sqrt, Z, num_outputs, white=True):
        self.white = white
        self.kern = kern
        self.num_outputs = num_outputs
        self.num_inducing = Z.shape[0]
        self.q_diag = True if Us_sqrt.ndim == 2 else False
        with tf.name_scope("inducing"):
            self.Z = Param(
                Z,  # MxM
                name="z")()
            self.Um = Param(
                Um,  #DxM
                name="u")()
            if self.q_diag:
                self.Us_sqrt = Param(
                    Us_sqrt,  # DxM
                    transforms.positive,
                    name="u_variance")()
            else:
                self.Us_sqrt = Param(
                    Us_sqrt,  # DxMxM
                    transforms.LowerTriangular(Us_sqrt.shape[1],
                                               Us_sqrt.shape[0]),
                    name="u_variance")()

        self.Ku = self.kern.Ksymm(self.Z) + tf.eye(
            tf.shape(self.Z)[0], dtype=self.Z.dtype) * settings.jitter
        self.Lu = tf.cholesky(self.Ku)
        self.Ku_tiled = tf.tile(self.Ku[None, :, :],
                                [self.num_outputs, 1, 1])  # DxMxM
        self.Lu_tiled = tf.tile(self.Lu[None, :, :], [self.num_outputs, 1, 1])
예제 #5
0
 def __init__(self,
              kern,
              num_inducing,
              num_outputs,
              mean_function=None,
              white=True):
     self.white = white
     self.kern = kern
     self.num_inputs = kern.input_dim
     self.num_outputs = num_outputs
     self.num_inducing = num_inducing
     self.q_diag = False
     Um = np.zeros((self.num_inducing, self.num_outputs))
     Us_sqrt = np.ones(
         (self.num_inducing,
          self.num_outputs)) if self.q_diag else np.array(
              [np.eye(self.num_inducing) for _ in range(self.num_outputs)])
     with tf.name_scope("inducing"):
         self.Um = Param(Um, name="u")()
         if self.q_diag:
             self.Us_sqrt = Param(Us_sqrt,
                                  transforms.positive,
                                  name="u_variance")()
         else:
             self.Us_sqrt = Param(Us_sqrt,
                                  transforms.LowerTriangular(
                                      self.num_inducing, self.num_outputs),
                                  name="u_variance")()
     self.mean_function = mean_function
예제 #6
0
    def __init__(self,
                 latent_dim,
                 Y,
                 transitions,
                 T_latent=None,
                 inputs=None,
                 emissions=None,
                 px1_mu=None,
                 px1_cov=None,
                 Xmu=None,
                 Xchol=None,
                 name=None):

        _Xmu = np.zeros(
            (T_latent or Y.shape[0], latent_dim)) if Xmu is None else Xmu
        super().__init__(_Xmu,
                         Y,
                         transitions,
                         inputs,
                         emissions,
                         px1_mu,
                         px1_cov,
                         name=name)

        _Xchol = np.eye(self.T_latent *
                        self.latent_dim) if Xchol is None else Xchol
        if _Xchol.ndim == 1:
            self.Xchol = gp.Param(_Xchol)
        else:
            chol_transform = gtf.LowerTriangular(
                self.T_latent *
                self.latent_dim if _Xchol.ndim == 2 else self.latent_dim,
                num_matrices=1 if _Xchol.ndim == 2 else self.T_latent,
                squeeze=_Xchol.ndim == 2)
            self.Xchol = gp.Param(_Xchol, transform=chol_transform)
예제 #7
0
    def __init__(self,
                 X_init,
                 Y,
                 transitions,
                 inputs=None,
                 emissions=None,
                 px1_mu=None,
                 px1_cov=None,
                 name=None):
        super().__init__(name=name)
        self.T_latent, self.latent_dim = X_init.shape
        self.T, self.obs_dim = Y.shape

        self.transitions = transitions
        self.emissions = emissions or GaussianEmissions(
            self.latent_dim, self.obs_dim)

        self.X = gp.Param(X_init)
        self.Y = gp.Param(Y, trainable=False)
        self.inputs = None if inputs is None else gp.Param(inputs,
                                                           trainable=False)

        self.px1_mu = gp.Param(
            np.zeros(self.latent_dim) if px1_mu is None else px1_mu,
            trainable=False)
        self.px1_cov_chol = gp.Param(
            np.eye(self.latent_dim)
            if px1_cov is None else np.linalg.cholesky(px1_cov),
            trainable=False,
            transform=gtf.LowerTriangular(self.latent_dim, squeeze=True))
    def __init__(self, kern, Z, num_outputs, mean_function):
        """
        A sparse variational GP layer in whitened representation. This layer holds the kernel,
        variational parameters, inducing points and mean function.

        The underlying model at inputs X is
        f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X)

        The variational distribution over the inducing points is
        q(v) = N(q_mu, q_sqrt q_sqrt^T)

        The layer holds D_out independent GPs with the same kernel and inducing points.

        :kern: The kernel for the layer (input_dim = D_in)
        :param q_mu: mean initialization (M, D_out)
        :param q_sqrt: sqrt of variance initialization (D_out,M,M)
        :param Z: Inducing points (M, D_in)
        :param mean_function: The mean function
        :return:
        """
        Parameterized.__init__(self)
        M = Z.shape[0]

        q_mu = np.zeros((M, num_outputs))
        self.q_mu = Parameter(q_mu)

        q_sqrt = np.tile(np.eye(M)[None, :, :], [num_outputs, 1, 1])
        transform = transforms.LowerTriangular(M, num_matrices=num_outputs)
        self.q_sqrt = Parameter(q_sqrt, transform=transform)

        self.feature = InducingPoints(Z)
        self.kern = kern
        self.mean_function = mean_function
예제 #9
0
    def __init__(self, dim, input_dim=0, kern=None, Z=None, n_ind_pts=100,
                 mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None,
                 jitter=gps.numerics.jitter_level, name=None):
        super().__init__(name=name)
        self.OBSERVATIONS_AS_INPUT = False
        self.dim = dim
        self.input_dim = input_dim
        self.jitter = jitter

        self.Q_sqrt = Param(np.ones(self.dim) if Q_diag is None else Q_diag ** 0.5, transform=gtf.positive)

        self.n_ind_pts = n_ind_pts if Z is None else (Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2])

        if isinstance(Z, np.ndarray) and Z.ndim == 2:
            self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z))
        else:
            Z_list = [np.random.randn(self.n_ind_pts, self.dim + self.input_dim)
                      for _ in range(self.dim)] if Z is None else [z for z in Z]
            self.Z = mf.SeparateIndependentMof([gp.features.InducingPoints(z) for z in Z_list])

        if isinstance(kern, gp.kernels.Kernel):
            self.kern = mk.SharedIndependentMok(kern, self.dim)
        else:
            kern_list = kern or [gp.kernels.Matern32(self.dim + self.input_dim, ARD=True) for _ in range(self.dim)]
            self.kern = mk.SeparateIndependentMok(kern_list)

        self.mean_fn = mean_fn or mean_fns.Identity(self.dim)
        self.Umu = Param(np.zeros((self.dim, self.n_ind_pts)) if Umu is None else Umu)  # Lm^-1(Umu - m(Z))
        transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.dim, squeeze=False)
        self.Ucov_chol = Param(np.tile(np.eye(self.n_ind_pts)[None, ...], [self.dim, 1, 1])
                               if Ucov_chol is None else Ucov_chol, transform=transform)  # Lm^-1(Ucov_chol)
        self._Kzz = None
예제 #10
0
    def setup_variational_parameters(self):
        self.Z = Parameter(self.inducing_locations) # M x D

        self.q_mu = Parameter(np.zeros((self.num_inducing, 1))) # M x 1

        q_sqrt = np.tile(np.eye(self.num_inducing)[None, :, :], [1, 1, 1])
        transform = transforms.LowerTriangular(self.num_inducing, num_matrices=1)
        self.q_sqrt = Parameter(q_sqrt, transform=transform) # 1 x M x M
예제 #11
0
파일: impl.py 프로젝트: naiqili/GPGene-TCBB
    def __init__(self,
                 kern,
                 Z,
                 mean_function,
                 num_nodes,
                 dim_per_in,
                 dim_per_out,
                 gmat,
                 share_Z=False,
                 nb_init=True,
                 **kwargs):

        Layer.__init__(self, input_prop_dim=False, **kwargs)

        self.kern = kern
        self.num_nodes = num_nodes
        self.dim_per_in, self.dim_per_out = dim_per_in, dim_per_out
        self.gmat = gmat
        self.share_Z = share_Z
        self.nb_init = nb_init
        self.num_outputs = num_nodes * dim_per_out
        self.num_inducing = Z.shape[0]

        self.q_mu = Parameter(
            np.zeros((self.num_inducing, num_nodes * dim_per_out)))
        self.mean_function = ParamList([], trainable=False)
        self.q_sqrt_lst = ParamList([])
        transform = transforms.LowerTriangular(self.num_inducing,
                                               num_matrices=self.dim_per_out)

        if share_Z:
            self.feature = InducingPoints(Z)
        else:
            self.feature = ParamList([])  # InducingPoints(Z)

        for nd in range(num_nodes):
            if mean_function:
                self.mean_function.append(mean_function[nd])
            else:
                self.mean_function.append(Zero())
            if share_Z:
                pa_nd = self.pa_idx(nd)
                Ku_nd = self.kern[nd].compute_K_symm(Z)
                Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z.shape[0]) *
                                           settings.jitter)
                q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1])
                self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform))
            else:
                pa_nd = self.pa_idx(nd)
                Z_tmp = Z[:, pa_nd].copy()
                self.feature.append(InducingPoints(Z_tmp))
                Ku_nd = self.kern[nd].compute_K_symm(Z_tmp)
                Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z_tmp.shape[0]) *
                                           settings.jitter)
                q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1])
                self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform))

        self.needs_build_cholesky = True
예제 #12
0
    def __init__(self,
                 layer_id,
                 kern,
                 U,
                 Z,
                 num_outputs,
                 mean_function,
                 white=False,
                 **kwargs):
        """
        A sparse variational GP layer in whitened representation. This layer holds the kernel,
        variational parameters, inducing points and mean function.
        The underlying model at inputs X is
        f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X)
        The variational distribution over the inducing points is
        q(v) = N(q_mu, q_sqrt q_sqrt^T)
        The layer holds D_out independent GPs with the same kernel and inducing points.
        :param kern: The kernel for the layer (input_dim = D_in)
        :param Z: Inducing points (M, D_in)
        :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs))
        :param mean_function: The mean function
        :return:
        """
        Layer.__init__(self, layer_id, U, num_outputs, **kwargs)

        #Initialize using kmeans

        self.dim_in = U[0].shape[1] if layer_id == 0 else num_outputs
        self.Z = Z if Z is not None else np.random.normal(
            0, 0.01, (100, self.dim_in))

        self.num_inducing = self.Z.shape[0]

        q_mu = np.zeros((self.num_inducing, num_outputs))
        self.q_mu = Parameter(q_mu)

        q_sqrt = np.tile(
            np.eye(self.num_inducing)[None, :, :], [num_outputs, 1, 1])
        transform = transforms.LowerTriangular(self.num_inducing,
                                               num_matrices=num_outputs)
        self.q_sqrt = Parameter(q_sqrt, transform=transform)

        self.feature = InducingPoints(self.Z)
        self.kern = kern
        self.mean_function = mean_function

        self.num_outputs = num_outputs
        self.white = white

        if not self.white:  # initialize to prior
            Ku = self.kern.compute_K_symm(self.Z)
            Lu = np.linalg.cholesky(Ku +
                                    np.eye(self.Z.shape[0]) * settings.jitter)
            self.q_sqrt = np.tile(Lu[None, :, :], [num_outputs, 1, 1])

        self.needs_build_cholesky = True
예제 #13
0
 def __init__(self, dim, input_dim=0, Q=None, name=None):
     super().__init__(name=name)
     self.OBSERVATIONS_AS_INPUT = False
     self.dim = dim
     self.input_dim = input_dim
     if Q is None or Q.ndim == 2:
         self.Qchol = Param(np.eye(self.dim) if Q is None else np.linalg.cholesky(Q),
                            gtf.LowerTriangular(self.dim, squeeze=True))
     elif Q.ndim == 1:
         self.Qchol = Param(Q ** 0.5)
예제 #14
0
    def __init__(self, kern, num_outputs, mean_function,
                Z=None,
                feature=None,
                white=False, input_prop_dim=None,
                q_mu=None,
                q_sqrt=None, **kwargs):
        """
        A sparse variational GP layer in whitened representation. This layer holds the kernel,
        variational parameters, inducing points and mean function.

        The underlying model at inputs X is
        f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X)

        The variational distribution over the inducing points is
        q(v) = N(q_mu, q_sqrt q_sqrt^T)

        The layer holds D_out independent GPs with the same kernel and inducing points.

        :param kern: The kernel for the layer (input_dim = D_in)
        :param Z: Inducing points (M, D_in)
        :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs))
        :param mean_function: The mean function
        :return:
        """
        Layer.__init__(self, input_prop_dim, **kwargs)
        if feature is None:
            feature = InducingPoints(Z)

        self.num_inducing = len(feature)

        self.feature = feature
        self.kern = kern
        self.mean_function = mean_function

        self.num_outputs = num_outputs
        self.white = white

        if q_mu is None:
            q_mu = np.zeros((self.num_inducing, num_outputs), dtype=settings.float_type)
        self.q_mu = Parameter(q_mu)

        if q_sqrt is None:
            if not self.white:  # initialize to prior
                with gpflow.params_as_tensors_for(feature):
                    Ku = conditionals.Kuu(feature, self.kern, jitter=settings.jitter)
                    Lu = tf.linalg.cholesky(Ku)
                    Lu = self.enquire_session().run(Lu)
                    q_sqrt = np.tile(Lu[None, :, :], [num_outputs, 1, 1])
            else:
                q_sqrt = np.tile(np.eye(self.num_inducing, dtype=settings.float_type)[None, :, :], [num_outputs, 1, 1])

        transform = transforms.LowerTriangular(self.num_inducing, num_matrices=num_outputs)
        self.q_sqrt = Parameter(q_sqrt, transform=transform)

        self.needs_build_cholesky = True
예제 #15
0
파일: latent.py 프로젝트: lmao14/heterogp
 def __init__(self, Z, mean_function, kern, num_latent=1, whiten=True, name=None):
     super(Latent, self).__init__(name=name)
     self.mean_function = mean_function
     self.kern = kern
     self.num_latent = num_latent
     M = Z.shape[0]
     # M = tf.print(M,[M,'any thing i want'],message='Debug message:',summarize=100)
     
     self.feature = InducingPoints(Z)
     num_inducing = len(self.feature)
     self.whiten = whiten
     
     self.q_mu = Parameter(np.zeros((num_inducing, self.num_latent), dtype=settings.float_type))
     
     q_sqrt = np.tile(np.eye(M)[None, :, :], [self.num_latent, 1, 1])
     transform = transforms.LowerTriangular(M, num_matrices=self.num_latent)
     self.q_sqrt = Parameter(q_sqrt, transform=transform)
예제 #16
0
    def _init_variational_parameters(self, Z):
        
        q_mu = np.zeros((self.num_inducing, self.num_outputs))
        q_mu = gpflow.Param(q_mu)

        # initialize q_sqrt to prior
        """
        if not self.white:
            if self.gc_kernel:
                Ku = self.kernel.compute_Ku_symmetric(Z, jitter=settings.jitter)
            else:
                Ku = self.kernel.compute_K_symm(Z) + np.eye(Z.shape[0], dtype=settings.float_type) * settings.jitter

            Lu = np.linalg.cholesky(Ku)
            q_sqrt = np.tile(Lu[None, :, :], [self.num_outputs, 1, 1])
        else:
            q_sqrt = np.tile(np.eye(self.num_inducing)[None, :, :], [self.num_outputs, 1, 1])
        
        # q_sqrt = tf.convert_to_tensor(q_sqrt, dtype=settings.float_type)
        transform = transforms.LowerTriangular(self.num_inducing, num_matrices=self.num_outputs)
        # q_sqrt = Parameter(q_sqrt, transform=transform)
        q_sqrt = gpflow.Param(q_sqrt, transform=transform)
        """

        if self.white or self.q_diag:
            q_sqrt = np.tile(np.eye(self.num_inducing)[None, :, :], [self.num_outputs, 1, 1])
        else:
            if self.gc_kernel:
                Ku = self.kernel.compute_Ku_symmetric(Z, jitter=settings.jitter)
            else:
                Ku = self.kernel.compute_K_symm(Z) + np.eye(Z.shape[0], dtype=settings.float_type) * settings.jitter

            Lu = np.linalg.cholesky(Ku)
            q_sqrt = np.tile(Lu[None, :, :], [self.num_outputs, 1, 1])

        if self.q_diag:
            transform = transforms.DiagMatrix(self.num_inducing)
        else:
            transform = transforms.LowerTriangular(self.num_inducing, num_matrices=self.num_outputs)

        q_sqrt = gpflow.Param(q_sqrt, transform=transform)

        return q_mu, q_sqrt
예제 #17
0
    def __init__(self,
                 kern,
                 kern_g,
                 Z,
                 Z_g,
                 mu0_g,
                 num_inducing,
                 num_inducing_g,
                 num_outputs,
                 mean_function=None,
                 white=True):
        SVGP_Layer.__init__(self, kern, Z, num_inducing, num_outputs,
                            mean_function, white)
        self.kern_g = kern_g
        self.num_inducing_g = num_inducing_g
        self.q_diag_g = False
        self.mu0_g = Param(mu0_g, name="mu0_g")()
        Um_g = np.zeros((self.num_inducing_g, self.num_outputs))
        Us_sqrt_g = np.ones(
            (self.num_inducing_g,
             self.num_outputs)) if self.q_diag_g else np.array([
                 np.eye(self.num_inducing_g) for _ in range(self.num_outputs)
             ])
        with tf.name_scope("inducing"):
            self.Z_g = Param(Z_g, name="z_g")()
            self.Um_g = Param(Um_g, name="u_g")()
            if self.q_diag_g:
                self.Us_sqrt_g = Param(Us_sqrt_g,
                                       transforms.positive,
                                       name="u_variance_g")()
            else:
                self.Us_sqrt_g = Param(Us_sqrt_g,
                                       transforms.LowerTriangular(
                                           self.num_inducing_g,
                                           self.num_outputs),
                                       name="u_variance_g")()

        self.Ku_g = self.kern_g.Ksymm(self.Z_g) + tf.eye(
            tf.shape(self.Z_g)[0],
            dtype=self.Z_g.dtype) * settings.numerics.jitter_level
        self.Lu_g = tf.cholesky(self.Ku_g)
예제 #18
0
파일: gpzip.py 프로젝트: Zhu-Justin/GPZIP
def onoff(Xtrain,Ytrain,Xtest,Ytest,dir):
    tf.reset_default_graph()
    parentDir = "/l/hegdep1/onoffgp/uai/experiments/pptr"
    sys.path.append(parentDir)

    from onofftf.main import Param, DataSet, GaussKL, KernSE, GPConditional, GaussKLkron
    from onofftf.utils import modelmanager
    from gpflow import transforms

    modelPath = dir
    tbPath    = dir
    logPath   = dir + 'modelsumm.log'

    logger = logging.getLogger('log')
    logger.setLevel(logging.DEBUG)
    logger.addHandler(logging.FileHandler(logPath))
    logger.info("traning size   = " + str(Xtrain.shape[0]))
    logger.info("test size   = " + str(Xtest.shape[0]))

    traindf = pd.DataFrame({'ndatehour':Xtrain[:,2].flatten()*1000,'pptr':Ytrain.flatten()})
    train_data = DataSet(Xtrain, Ytrain)

    logger.info("number of training examples:" + str(Xtrain.shape))

    # ****************************************************************
    # parameter initializations
    # ****************************************************************
    list_to_np = lambda _list : [np.array(e) for e in _list]

    num_iter = 50000
    num_inducing_f = np.array([10,100])
    num_inducing_g = np.array([10,100])
    num_data = Xtrain.shape[0]
    num_minibatch = 1000

    init_fkell = list_to_np([[8., 8.],[5./1000]])
    init_fkvar = list_to_np([[20.],[20.]])

    init_gkell = list_to_np([[8.,8.],[5./1000]])
    init_gkvar = list_to_np([[10.],[10.]])

    init_noisevar = 0.01

    q_diag = True

    init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1)

    init_Zf = [init_Zf_s,init_Zf_t]
    init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.1
    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T

    init_Zg = init_Zf.copy()
    init_u_gm = np.random.randn(np.prod(num_inducing_g),1)*0.1
    init_u_gs_sqrt = np.ones(np.prod(num_inducing_g)).reshape(1,-1).T

    kern_param_learning_rate = 1e-3
    indp_param_learning_rate = 1e-3

    # ****************************************************************
    # define tensorflow variables and placeholders
    # ****************************************************************
    X = tf.placeholder(dtype = float_type)
    Y = tf.placeholder(dtype = float_type)

    with tf.name_scope("f_kern"):
        fkell = [Param(init_fkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

        fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

    fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))]

    with tf.name_scope("g_kern"):
        gkell = [Param(init_gkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

        gkvar = [Param(init_gkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

    gkern_list = [KernSE(gkell[i],gkvar[i]) for i in range(len(num_inducing_g))]

    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,transform=transforms.Log1pe(),
                         name="variance",learning_rate = kern_param_learning_rate,summ=True)


    with tf.name_scope("f_ind"):
        Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_f))]

        u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)


    with tf.name_scope("g_ind"):
        Zg_list = [Param(init_Zg[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_g))]

        u_gm = Param(init_u_gm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.LowerTriangular(init_u_gs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)


    # ****************************************************************
    # define model support functions
    # ****************************************************************
    def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list,
                       u_gm, u_gs_sqrt, gkern_list, Zg_list, whiten=False):
        if whiten:
            raise NotImplementedError()
        else:
            Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \
                    tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_f))]

            Kgmm = [gkern_list[i].K(Zg_list[i].get_tfv()) + \
                    tf.eye(num_inducing_g[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_g))]

            KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm) + \
                 GaussKLkron(u_gm.get_tfv(), u_gs_sqrt.get_tfv(), Kgmm)

        return KL

    def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list,f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)
        input_mask_g = _gen_inp_mask(Zg_list)

        # compute fmean and fvar from the kronecker inference
        fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f)
        if not f_mu is None :
            fmean = fmean + f_mu.get_tfv()

        # compute gmean and gvar from the kronecker inference
        gmean,gvar = kron_inf(Xnew,gkern_list,Zg_list,u_gm,u_gs_sqrt,num_inducing_g,input_mask_g)

        # compute augemented distributions
        ephi_g, ephi2_g, evar_phi_g = probit_expectations(gmean, gvar)

        # compute augmented f
        # p(f|g) = N(f| diag(ephi_g)* A*u_fm, diag(evar_phi_g)) * (Kfnn + A(u_fs - Kfmm)t(A)))
        gfmean = tf.multiply(ephi_g, fmean)
        gfvar = tf.multiply(ephi2_g, fvar)
        gfmeanu = tf.multiply(evar_phi_g, tf.square(fmean))

        # return mean and variance vectors in order
        return gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, ephi_g, evar_phi_g

    def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv,q_mu.get_tfv())

        n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        Kmn_kron = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            Kmn_kron.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        Kmn = tf.reshape(tf.multiply(tf.expand_dims(Kmn_kron[0],1),Kmn_kron[1]),[np.prod(num_inducing),-1])
        A  = tf.matmul(tf_kron(*Kmm_inv),Kmn)

        mu = tf.matmul(Kmn, alpha, transpose_a=True)
        var = Knn - tf.reshape(tf.matrix_diag_part(tf.matmul(Kmn, A,transpose_a=True) - \
                               tf.matmul(tf.matmul(A,S,transpose_a=True),A)),[-1,1])

        return mu , var

    def __kron_mv( As, x):
        num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))]
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N,1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p],
                               np.round(N/num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N,1])
        return b

    def tf_kron(*args):
        def __tf_kron(a,b):

            a_shape = [tf.shape(a)[0],tf.shape(a)[1]]
            b_shape = [tf.shape(b)[0],tf.shape(b)[1]]

            return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                              tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                              [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

        kron_pord = tf.constant(1.,shape=[1,1],dtype=float_type)
        for Ap in args:
            kron_pord = __tf_kron(kron_pord,Ap)

        return kron_pord

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask

    def variational_expectations(Y, fmu, fvar, fmuvar, noisevar):
        return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \
               - 0.5 * (tf.square(Y - fmu) + fvar + fmuvar) / noisevar

    def probit_expectations(gmean, gvar):
        def normcdf(x):
            return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1. - 2.e-3) + 1.e-3

        def owent(h, a):
            h = tf.abs(h)
            term1 = tf.atan(a) / (2 * np.pi)
            term2 = tf.exp((-1 / 2) * (tf.multiply(tf.square(h), (tf.square(a) + 1))))
            return tf.multiply(term1, term2)

        z = gmean / tf.sqrt(1. + gvar)
        a = 1 / tf.sqrt(1. + (2 * gvar))

        cdfz = normcdf(z)
        tz = owent(z, a)

        ephig = cdfz
        ephisqg = (cdfz - 2. * tz)
        evarphig = (cdfz - 2. * tz - tf.square(cdfz))

        # clip negative values from variance terms to zero
        ephisqg = (ephisqg + tf.abs(ephisqg)) / 2.
        evarphig = (evarphig + tf.abs(evarphig)) / 2.

        return ephig, ephisqg, evarphig

    # ****************************************************************
    # build model and define lower bound
    # ****************************************************************
    # get kl term
    with tf.name_scope("kl"):
        kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list,
                            u_gm,u_gs_sqrt,gkern_list,Zg_list)
        tf.summary.scalar('kl', kl)

    # get augmented functions
    with tf.name_scope("model_build"):
        gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, pgmean, pgvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list,
                                                                                               u_gm,u_gs_sqrt,gkern_list,Zg_list)
        tf.summary.histogram('gfmean',gfmean)
        tf.summary.histogram('gfvar',gfvar)
        tf.summary.histogram('gfmeanu',gfmeanu)
        tf.summary.histogram('fmean',fmean)
        tf.summary.histogram('fvar',fvar)
        tf.summary.histogram('gmean',gmean)
        tf.summary.histogram('gvar',gvar)
        tf.summary.histogram('pgmean',pgmean)
        tf.summary.histogram('pgvar',pgvar)

    # compute likelihood
    with tf.name_scope("var_exp"):
        var_exp = tf.reduce_sum(variational_expectations(Y,gfmean,gfvar,gfmeanu,noisevar.get_tfv()))
        tf.summary.scalar('var_exp', var_exp)

        # mini-batch scaling
        scale =  tf.cast(num_data, float_type) / tf.cast(num_minibatch, float_type)
        var_exp_scaled = var_exp * scale
        tf.summary.scalar('var_exp_scaled', var_exp_scaled)


    # final lower bound
    with tf.name_scope("cost"):
        cost =  -(var_exp_scaled - kl)
        tf.summary.scalar('cost',cost)


    # ****************************************************************
    # define optimizer op
    # ****************************************************************
    all_var_list = tf.trainable_variables()
    all_lr_list = [var._learning_rate for var in all_var_list]

    train_opt_group = []

    for group_learning_rate in set(all_lr_list):
        _ind_bool = np.where(np.isin(np.array(all_lr_list),group_learning_rate))[0]
        group_var_list = [all_var_list[ind] for ind in _ind_bool]
        group_tf_optimizer = tf.train.AdamOptimizer(learning_rate = group_learning_rate)
        group_grad_list = tf.gradients(cost,group_var_list)
        group_grads_and_vars = list(zip(group_grad_list,group_var_list))


        group_train_op = group_tf_optimizer.apply_gradients(group_grads_and_vars)

        # Summarize all gradients
        for grad, var in group_grads_and_vars:
            tf.summary.histogram(var.name + '/gradient', grad)

        train_opt_group.append({'names':[var.name for var in group_var_list],
                                'vars':group_var_list,
                                'learning_rate':group_learning_rate,
                                'grads':group_grad_list,
                                'train_op':group_train_op})

    train_op = tf.group(*[group['train_op'] for group in train_opt_group])



    # ****************************************************************
    # define graph and run optimization
    # ****************************************************************
    sess = tf.InteractiveSession()

    # model saver
    saver = tf.train.Saver()

    # tensorboard summary
    summ_merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(tbPath,
                                            graph=sess.graph)

    sess.run(tf.global_variables_initializer())


    logger.info('*******  started optimization at ' + time.strftime('%Y%m%d-%H%M') + " *******")
    optstime = time.time()
    logger.info(
        '{:>16s}'.format("iteration") + '{:>6s}'.format("time"))

    for i in range(num_iter):
        optstime = time.time()
        batch = train_data.next_batch(num_minibatch)
        try:
            summary, _ = sess.run([summ_merged,train_op],
                                 feed_dict={X : batch[0],
                                            Y : batch[1]
                                 })

            if i% 200 == 0:
                logger.info(
                    '{:>16d}'.format(i) + '{:>6.3f}'.format((time.time() - optstime)/60))
                summary_writer.add_summary(summary,i)
                summary_writer.flush()

            if i% 10000 == 0:
                modelmngr = modelmanager(saver, sess, modelPath)
                modelmngr.save()

                # ****************************************************************
                # plot inducing monitoring plots
                # ****************************************************************
                lp_u_fm = u_fm.get_tfv().eval().flatten()
                lp_u_gm = u_gm.get_tfv().eval().flatten()

                lp_zf_t = Zf_list[1].get_tfv().eval().flatten()
                lp_zg_t = Zg_list[1].get_tfv().eval().flatten()

                lp_zf_sort_ind = np.argsort(lp_zf_t)
                lp_zg_sort_ind = np.argsort(lp_zg_t)

                scale_z = 1000
                mpl.rcParams['figure.figsize'] = (16,8)
                fig, (ax1,ax2,ax3) = plt.subplots(3, 1, sharex=True)

                mean_pptr = traindf.groupby('ndatehour')['pptr'].mean()
                ax1.bar(mean_pptr.index, mean_pptr.values, align='center')

                for m in np.arange(num_inducing_f[0]):
                    u_fm_temporal = lp_u_fm[m*num_inducing_f[1]:(m+1)*num_inducing_f[1]]
                    ax2.plot(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),u_fm_temporal[lp_zf_sort_ind],alpha=0.7)
                ax2.scatter(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),np.ones([num_inducing_f[1],1])*lp_u_fm.min(),color="#514A30")

                for m in np.arange(num_inducing_g[0]):
                    u_gm_temporal = lp_u_gm[m*num_inducing_g[1]:(m+1)*num_inducing_g[1]]
                    ax3.plot(np.round(lp_zg_t[lp_zg_sort_ind] * scale_z,4),u_gm_temporal[lp_zg_sort_ind],alpha=0.7)
                ax3.scatter(np.round(lp_zg_t[lp_zg_sort_ind] * scale_z,4),np.ones([num_inducing_g[1],1])*lp_u_gm.min(),color="#514A30")

                fig.savefig(dir +"inducing_"+str(i)+".png")

        except KeyboardInterrupt as e:
            print("Stopping training")
            break

    modelmngr = modelmanager(saver, sess, modelPath)
    modelmngr.save()
    summary_writer.close()


    # ****************************************************************
    # param summary
    # ****************************************************************
    logger.info("Noise variance          = " + str(noisevar.get_tfv().eval()))
    logger.info("Kf spatial lengthscale  = " + str(fkell[0].get_tfv().eval()))
    logger.info("Kf spatial variance     = " + str(fkvar[0].get_tfv().eval()))
    logger.info("Kf temporal lengthscale = " + str(fkell[1].get_tfv().eval()))
    logger.info("Kf temporal variance    = " + str(fkvar[1].get_tfv().eval()))

    logger.info("Kg spatial lengthscale  = " + str(gkell[0].get_tfv().eval()))
    logger.info("Kg spatial variance     = " + str(gkvar[0].get_tfv().eval()))
    logger.info("Kg temporal lengthscale = " + str(gkell[1].get_tfv().eval()))
    logger.info("Kg temporal variance    = " + str(gkvar[1].get_tfv().eval()))

    # ****************************************************************
    # model predictions
    # ****************************************************************
    # get test and training predictions
    # def predict_onoff(Xtrain,Xtest):
    #     pred_train = np.maximum(gfmean.eval(feed_dict = {X:Xtrain}),0)
    #     pred_test = np.maximum(gfmean.eval(feed_dict = {X:Xtest}),0)
    #     return pred_train, pred_test
    #
    # pred_train, pred_test = predict_onoff(Xtrain,Xtest)
    #
    # train_rmse = np.sqrt(np.mean((pred_train - Ytrain)**2))
    # train_mae  = np.mean(np.abs(pred_train - Ytrain))
    # test_rmse  = np.sqrt(np.mean((pred_test - Ytest)**2))
    # test_mae   = np.mean(np.abs(pred_test - Ytest))
    #
    # logger.info("train rmse:"+str(train_rmse))
    # logger.info("train mae:"+str(train_mae))
    #
    # logger.info("test rmse:"+str(test_rmse))
    # logger.info("test mae:"+str(test_mae))
    # logger.removeHandler(logger.handlers)

    def predict_onoff(Xtest):
        pred_test = np.maximum(gfmean.eval(feed_dict = {X:Xtest}),0)
        return pred_test

    pred_test = predict_onoff(Xtest)

    test_rmse  = np.sqrt(np.mean((pred_test - Ytest)**2))
    test_mae   = np.mean(np.abs(pred_test - Ytest))

    logger.info("test rmse:"+str(test_rmse))
    logger.info("test mae:"+str(test_mae))
    logger.removeHandler(logger.handlers)

    # ****************************************************************
    # return values
    # ****************************************************************
    retdict = {'Xtrain':Xtrain,'Ytrain':Ytrain,
               'Xtest':Xtest,'Ytest':Ytest,
            #    'rawpred_train':gfmean.eval(feed_dict = {X:Xtrain}),
            #    'rawpred_test':gfmean.eval(feed_dict = {X:Xtest}),
            #    'pred_train':pred_train,
            #    'pred_test':pred_test,
            #    'train_rmse':train_rmse,
            #    'train_mae':train_mae,
               'test_rmse':test_rmse,
               'test_mae':test_mae
            #    ,'train_log_evidence': -cost.eval({X : Xtrain,Y : Ytrain})
               }

    return retdict
예제 #19
0
    def __init__(self,
                 latent_dim,
                 Y,
                 inputs=None,
                 emissions=None,
                 px1_mu=None,
                 px1_cov=None,
                 kern=None,
                 Z=None,
                 n_ind_pts=100,
                 mean_fn=None,
                 Q_diag=None,
                 Umu=None,
                 Ucov_chol=None,
                 qx1_mu=None,
                 qx1_cov=None,
                 As=None,
                 bs=None,
                 Ss=None,
                 n_samples=100,
                 batch_size=None,
                 chunking=False,
                 seed=None,
                 parallel_iterations=10,
                 jitter=gp.settings.numerics.jitter_level,
                 name=None):

        super().__init__(latent_dim,
                         Y[0],
                         inputs=None if inputs is None else inputs[0],
                         emissions=emissions,
                         px1_mu=px1_mu,
                         px1_cov=None,
                         kern=kern,
                         Z=Z,
                         n_ind_pts=n_ind_pts,
                         mean_fn=mean_fn,
                         Q_diag=Q_diag,
                         Umu=Umu,
                         Ucov_chol=Ucov_chol,
                         qx1_mu=qx1_mu,
                         qx1_cov=None,
                         As=None,
                         bs=None,
                         Ss=False if Ss is False else None,
                         n_samples=n_samples,
                         seed=seed,
                         parallel_iterations=parallel_iterations,
                         jitter=jitter,
                         name=name)

        self.T = [Y_s.shape[0] for Y_s in Y]
        self.T_tf = tf.constant(self.T, dtype=gp.settings.int_type)
        self.max_T = max(self.T)
        self.sum_T = float(sum(self.T))
        self.n_seq = len(self.T)
        self.batch_size = batch_size
        self.chunking = chunking

        if self.batch_size is None:
            self.Y = ParamList(Y, trainable=False)
        else:
            _Y = np.stack([
                np.concatenate(
                    [Ys, np.zeros((self.max_T - len(Ys), self.obs_dim))])
                for Ys in Y
            ])
            self.Y = Param(_Y, trainable=False)

        if inputs is not None:
            if self.batch_size is None:
                self.inputs = ParamList(inputs, trainable=False)
            else:
                desired_length = self.max_T if self.chunking else self.max_T - 1
                _inputs = [
                    np.concatenate([
                        inputs[s],
                        np.zeros(
                            (desired_length - len(inputs[s]), self.input_dim))
                    ]) for s in range(self.n_seq)
                ]  # pad the inputs
                self.inputs = Param(_inputs, trainable=False)

        if qx1_mu is None:
            self.qx1_mu = Param(np.zeros((self.n_seq, self.latent_dim)))

        self.qx1_cov_chol = Param(
            np.tile(np.eye(self.latent_dim)[None, ...], [self.n_seq, 1, 1])
            if qx1_cov is None else np.linalg.cholesky(qx1_cov),
            transform=gtf.LowerTriangular(self.latent_dim,
                                          num_matrices=self.n_seq))

        _As = [np.ones((T_s - 1, self.latent_dim))
               for T_s in self.T] if As is None else As
        _bs = [np.zeros((T_s - 1, self.latent_dim))
               for T_s in self.T] if bs is None else bs
        if Ss is not False:
            _S_chols = [np.tile(self.Q_sqrt.value.copy()[None, ...], [T_s - 1, 1]) for T_s in self.T] if Ss is None \
                else [np.sqrt(S) if S.ndim == 2 else np.linalg.cholesky(S) for S in Ss]

        if self.batch_size is None:
            self.As = ParamList(_As)
            self.bs = ParamList(_bs)
            if Ss is not False:
                self.S_chols = ParamList([
                    Param(Sc,
                          transform=gtf.positive if Sc.ndim == 2 else
                          gtf.LowerTriangular(self.latent_dim,
                                              num_matrices=Sc.shape[0]))
                    for Sc in _S_chols
                ])
        else:
            _As = np.stack([
                np.concatenate(
                    [_A,
                     np.zeros((self.max_T - len(_A) - 1, *_A.shape[1:]))])
                for _A in _As
            ])
            _bs = np.stack([
                np.concatenate([
                    _b,
                    np.zeros((self.max_T - len(_b) - 1, self.latent_dim))
                ]) for _b in _bs
            ])
            self.As = Param(_As)
            self.bs = Param(_bs)
            if Ss is not False:
                _S_chols = [
                    np.concatenate([
                        _S,
                        np.zeros((self.max_T - len(_S) - 1, *_S.shape[1:]))
                    ]) for _S in _S_chols
                ]
                _S_chols = np.stack(_S_chols)
                self.S_chols = Param(_S_chols, transform=gtf.positive if _S_chols.ndim == 3 else \
                    gtf.LowerTriangular(self.latent_dim, num_matrices=(self.n_seq, self.max_T - 1)))

        self.multi_diag_px1_cov = False
        if isinstance(px1_cov, list):  # different prior for each sequence
            _x1_cov = np.stack(px1_cov)
            _x1_cov = np.sqrt(
                _x1_cov) if _x1_cov.ndim == 2 else np.linalg.cholesky(_x1_cov)
            _transform = None if _x1_cov.ndim == 2 else gtf.LowerTriangular(
                self.latent_dim, num_matrices=self.n_seq)
            self.multi_diag_px1_cov = _x1_cov.ndim == 2
        elif isinstance(px1_cov, np.ndarray):  # same prior for each sequence
            assert px1_cov.ndim < 3
            _x1_cov = np.sqrt(
                px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov)
            _transform = None if px1_cov.ndim == 1 else gtf.LowerTriangular(
                self.latent_dim, squeeze=True)

        self.px1_cov_chol = None if px1_cov is None else Param(
            _x1_cov, trainable=False, transform=_transform)

        if self.chunking:
            px1_mu_check = len(self.px1_mu.shape) == 1
            px1_cov_check_1 = not self.multi_diag_px1_cov
            px1_cov_check_2 = self.px1_cov_chol is None or len(
                self.px1_cov_chol.shape) < 3
            assert px1_mu_check and px1_cov_check_1 and px1_cov_check_2, \
                'Only one prior over x1 allowed for chunking'
예제 #20
0
    def __init__(self,
                 X,
                 Y,
                 kernf,
                 kerng,
                 likelihood,
                 Zf,
                 Zg,
                 mean_function=None,
                 minibatch_size=None,
                 name='model'):
        Model.__init__(self, name)
        self.mean_function = mean_function or Zero()
        self.kernf = kernf
        self.kerng = kerng
        self.likelihood = likelihood
        self.whiten = False
        self.q_diag = True

        # save initial attributes for future plotting purpose
        Xtrain = DataHolder(X)
        Ytrain = DataHolder(Y)
        self.Xtrain, self.Ytrain = Xtrain, Ytrain

        # sort out the X, Y into MiniBatch objects.
        if minibatch_size is None:
            minibatch_size = X.shape[0]
        self.num_data = X.shape[0]
        self.num_latent = Y.shape[1]  # num_latent will be 1
        self.X = MinibatchData(X, minibatch_size, np.random.RandomState(0))
        self.Y = MinibatchData(Y, minibatch_size, np.random.RandomState(0))

        # Add variational paramters
        self.Zf = Param(Zf)
        self.Zg = Param(Zg)
        self.num_inducing_f = Zf.shape[0]
        self.num_inducing_g = Zg.shape[0]

        # init variational parameters
        self.u_fm = Param(
            np.random.randn(self.num_inducing_f, self.num_latent) * 0.01)
        self.u_gm = Param(
            np.random.randn(self.num_inducing_g, self.num_latent) * 0.01)

        if self.q_diag:
            self.u_fs_sqrt = Param(
                np.ones((self.num_inducing_f, self.num_latent)),
                transforms.positive)
            self.u_gs_sqrt = Param(
                np.ones((self.num_inducing_g, self.num_latent)),
                transforms.positive)
        else:
            u_fs_sqrt = np.array([
                np.eye(self.num_inducing_f) for _ in range(self.num_latent)
            ]).swapaxes(0, 2)
            self.u_fs_sqrt = Param(
                u_fs_sqrt, transforms.LowerTriangular(u_fs_sqrt.shape[2]))

            u_gs_sqrt = np.array([
                np.eye(self.num_inducing_g) for _ in range(self.num_latent)
            ]).swapaxes(0, 2)
            self.u_gs_sqrt = Param(
                u_gs_sqrt, transforms.LowerTriangular(u_gs_sqrt.shape[2]))
예제 #21
0
def main(scriptPath):
    tf.reset_default_graph()
    parentDir = '/'.join(os.path.dirname(os.path.realpath(scriptPath)).split('/')[:-1]) 
    subDir = "/" + scriptPath.split("/")[-2].split(".py")[0] + "/"
    sys.path.append(parentDir)

    from onofftf.main import Param, DataSet, GaussKL, KernSE, GPConditional, GaussKLkron
    from onofftf.utils import modelmanager
    from gpflow import transforms

    cmodelPath = parentDir + subDir + 'results_scgp.pickle'
    modelPath = parentDir + subDir + 'model_hurdle.ckpt'
    logPath   = parentDir + subDir + 'modelsumm_hurdle.log'

    logger = logging.getLogger('log')
    logger.setLevel(logging.DEBUG)
    logger.addHandler(logging.FileHandler(logPath))

    data = pickle.load(open(parentDir + subDir +"data.pickle","rb"))
    Xtrain = data['Xtrain']
    Ytrain = data['Ytrain']
    Ytrain_c = data['Ytrain'] > 0 * 1
    Xtest = data['Xtest']
    Ytest = data['Ytest']
    Ytest_c = data['Ytest'] > 0 * 1

    # load results from the classifier model
    cresults = pickle.load(open(cmodelPath,"rb"))
    train_pred_on_idx,_ = np.where(cresults['pred_train']['pfmean'] > 0.5)
    test_pred_on_idx,_  = np.where(cresults['pred_test']['pfmean'] > 0.5)
    Xtrain_reg_hurdle = Xtrain[train_pred_on_idx,:]
    Ytrain_reg_hurdle = Ytrain[train_pred_on_idx]
    Xtest_reg_hurdle = Xtest[test_pred_on_idx,:]
    Ytest_reg_hurdle = Ytest[test_pred_on_idx]

    traindf = pd.DataFrame({'ndatehour':Xtrain[train_pred_on_idx,2].flatten()*1000,'pptr':Ytrain[train_pred_on_idx].flatten()})
    train_data = DataSet(Xtrain_reg_hurdle,Ytrain_reg_hurdle)

    logger.info("traning size   = " + str(Xtrain.shape[0]))
    logger.info("test size   = " + str(Xtest.shape[0]))


    # ****************************************************************
    # parameter initializations
    # ****************************************************************
    list_to_np = lambda _list : [np.array(e) for e in _list]

    num_iter = 50000
    num_inducing_f = np.array([10,100])
    num_data = Xtrain.shape[0]
    num_minibatch = 500

    init_fkell = list_to_np([[5.,5.],[5./1000]])
    init_fkvar = list_to_np([[20.],[20.]])
    init_noisevar = 0.01

    q_diag = True

    init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1)
    init_Zf = [init_Zf_s,init_Zf_t]

    init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.01
    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T

    kern_param_learning_rate = 1e-3
    indp_param_learning_rate = 1e-3

    # ****************************************************************
    # define tensorflow variables and placeholders
    # ****************************************************************
    X = tf.placeholder(dtype = float_type)
    Y = tf.placeholder(dtype = float_type)

    with tf.name_scope("f_kern"):
        fkell = [Param(init_fkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

        fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

    fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))]


    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,transform=transforms.Log1pe(),
                         name="variance",learning_rate = kern_param_learning_rate,summ=True)


    with tf.name_scope("f_ind"):
        Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_f))]

        u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)

    # ****************************************************************
    # define model support functions
    # ****************************************************************
    def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list,whiten=False):
        if whiten:
            raise NotImplementedError()
        else:
            Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \
                    tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_f))]

            KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm)

        return KL

    def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)

        # compute fmean and fvar from the kronecker inference
        fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f)
        if not f_mu is None :
            fmean = fmean + f_mu.get_tfv()

        # return mean and variance vectors in order
        return fmean, fvar

    def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv,q_mu.get_tfv())

        n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        Kmn_kron = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            Kmn_kron.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        Kmn = tf.reshape(tf.multiply(tf.expand_dims(Kmn_kron[0],1),Kmn_kron[1]),[np.prod(num_inducing),-1])
        A  = tf.matmul(tf_kron(*Kmm_inv),Kmn)

        mu = tf.matmul(Kmn, alpha, transpose_a=True)
        var = Knn - tf.reshape(tf.matrix_diag_part(tf.matmul(Kmn, A,transpose_a=True) - \
                               tf.matmul(tf.matmul(A,S,transpose_a=True),A)),[-1,1])

        return mu , var

    def __kron_mv( As, x):
        num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))]
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N,1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p],
                               np.round(N/num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N,1])
        return b

    def tf_kron(*args):
        def __tf_kron(a,b):

            a_shape = [tf.shape(a)[0],tf.shape(a)[1]]
            b_shape = [tf.shape(b)[0],tf.shape(b)[1]]

            return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                              tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                              [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

        kron_pord = tf.constant(1.,shape=[1,1],dtype=float_type)
        for Ap in args:
            kron_pord = __tf_kron(kron_pord,Ap)

        return kron_pord

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask

    def variational_expectations(Y, fmu, fvar, noisevar):
        return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \
               - 0.5 * (tf.square(Y - fmu) + fvar) / noisevar

    # ****************************************************************
    # build model and define lower bound
    # ****************************************************************
    # get kl term
    with tf.name_scope("kl"):
        kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list)

    # get augmented functions
    with tf.name_scope("model_build"):
        fmean, fvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list)

    # compute likelihood
    with tf.name_scope("var_exp"):
        var_exp = tf.reduce_sum(variational_expectations(Y,fmean,fvar,noisevar.get_tfv()))
        scale =  tf.cast(num_data, float_type) / tf.cast(num_minibatch, float_type)
        var_exp_scaled = var_exp * scale

    # final lower bound
    with tf.name_scope("cost"):
        cost =  -(var_exp_scaled - kl)
    # ****************************************************************
    # define optimizer op
    # ****************************************************************
    all_var_list = tf.trainable_variables()
    all_lr_list = [var._learning_rate for var in all_var_list]

    train_opt_group = []

    for group_learning_rate in set(all_lr_list):
        _ind_bool = np.where(np.isin(np.array(all_lr_list),group_learning_rate))[0]
        group_var_list = [all_var_list[ind] for ind in _ind_bool]
        group_tf_optimizer = tf.train.AdamOptimizer(learning_rate = group_learning_rate)
        group_grad_list = tf.gradients(cost,group_var_list)
        group_grads_and_vars = list(zip(group_grad_list,group_var_list))


        group_train_op = group_tf_optimizer.apply_gradients(group_grads_and_vars)
        train_opt_group.append({'names':[var.name for var in group_var_list],
                                'vars':group_var_list,
                                'learning_rate':group_learning_rate,
                                'grads':group_grad_list,
                                'train_op':group_train_op})

    train_op = tf.group(*[group['train_op'] for group in train_opt_group])

    # ****************************************************************
    # define graph and run optimization
    # ****************************************************************
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())


    logger.info('*******  started optimization at ' + time.strftime('%Y%m%d-%H%M') + " *******")
    optstime = time.time()
    logger.info(
        '{:>16s}'.format("iteration") + '{:>6s}'.format("time"))

    for i in range(num_iter):
        optstime = time.time()
        batch = train_data.next_batch(num_minibatch)
        try:
            sess.run([train_op],feed_dict={X : batch[0],Y : batch[1]})
            if i% 100 == 0:
                logger.info(
                    '{:>16d}'.format(i) + '{:>6.3f}'.format((time.time() - optstime)/60))

            if i% 10000 == 0:
                modelmngr = modelmanager(saver, sess, modelPath)
                modelmngr.save()

                # ****************************************************************
                # plot inducing monitoring plots
                # ****************************************************************
                lp_u_fm = u_fm.get_tfv().eval().flatten()

                lp_zf_t = Zf_list[1].get_tfv().eval().flatten()

                lp_zf_sort_ind = np.argsort(lp_zf_t)

                scale_z = 1000
                mpl.rcParams['figure.figsize'] = (16,8)
                fig, (ax1,ax2) = plt.subplots(2, 1, sharex=True)

                mean_pptr = traindf.groupby('ndatehour')['pptr'].mean()
                ax1.bar(mean_pptr.index, mean_pptr.values, align='center')

                for m in np.arange(num_inducing_f[0]):
                    u_fm_temporal = lp_u_fm[m*num_inducing_f[1]:(m+1)*num_inducing_f[1]]
                    ax2.plot(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),u_fm_temporal[lp_zf_sort_ind],alpha=0.7)
                ax2.scatter(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),np.ones([num_inducing_f[1],1])*lp_u_fm.min(),color="#514A30")

                fig.savefig(parentDir+ subDir + "svgp_inducing_"+str(i)+".png")

        except KeyboardInterrupt as e:
            print("Stopping training")
            break

    modelmngr = modelmanager(saver, sess, modelPath)
    modelmngr.save()
    tf.reset_default_graph()

    # ****************************************************************
    # param summary
    # ****************************************************************
    logger.info("Noise variance          = " + str(noisevar.get_tfv().eval()))
    logger.info("Kf spatial lengthscale  = " + str(fkell[0].get_tfv().eval()))
    logger.info("Kf spatial variance     = " + str(fkvar[0].get_tfv().eval()))
    logger.info("Kf temporal lengthscale = " + str(fkell[1].get_tfv().eval()))
    logger.info("Kf temporal variance    = " + str(fkvar[1].get_tfv().eval()))

    # ****************************************************************
    # model predictions
    # ****************************************************************

    # get regession summary
    from onofftf.svgppred import predict_svgp
    def rmse(predict,actual):
        predict = np.maximum(predict,0)
        return np.sqrt(np.mean((actual-predict)**2))

    def mad(predict,actual):
        predict = np.maximum(predict,0)
        return np.mean(np.abs(actual-predict))

    pred_train_hurdle_svgp, pred_test_hurdle_svgp = predict_svgp(Xtrain = Xtrain_reg_hurdle,
                                                   Xtest = Xtest_reg_hurdle,
                                                   checkpointPath = modelPath)

    train_hurdle_reg_rmse = rmse(pred_train_hurdle_svgp["fmean"],Ytrain_reg_hurdle)
    logger.info("rmse on train set for hurdle svgp : "+str(train_hurdle_reg_rmse))
    train_hurdle_reg_mae = mad(pred_train_hurdle_svgp["fmean"],Ytrain_reg_hurdle)
    logger.info("mad on train set for hurdle svgp : "+str(train_hurdle_reg_mae))

    test_hurdle_reg_rmse = rmse(pred_test_hurdle_svgp["fmean"],Ytest_reg_hurdle)
    logger.info("rmse on test set for hurdle svgp  : "+str(test_hurdle_reg_rmse))
    test_hurdle_reg_mae = mad(pred_test_hurdle_svgp["fmean"],Ytest_reg_hurdle)
    logger.info("mad on test set for hurdle svgp  : "+str(test_hurdle_reg_mae))

    # combine the results from regression and classification
    train_pred_hurdle_clf = (cresults['pred_train']['pfmean'] > 0.5)*1.0
    test_pred_hurdle_clf  = (cresults['pred_test']['pfmean'] > 0.5)*1.0
    train_pred_hurdle_comb = train_pred_hurdle_clf.copy()
    train_pred_hurdle_comb[train_pred_on_idx] = pred_train_hurdle_svgp["fmean"]
    test_pred_hurdle_comb = test_pred_hurdle_clf.copy()
    test_pred_hurdle_comb[test_pred_on_idx] = pred_test_hurdle_svgp["fmean"]

    # final results
    train_hurdle_comb_rmse = rmse(train_pred_hurdle_comb,Ytrain)
    logger.info("rmse on train set for hurdle svgp : "+str(train_hurdle_comb_rmse))
    train_hurdle_comb_mae = mad(train_pred_hurdle_comb,Ytrain)
    logger.info("mad on train set for hurdle svgp : "+str(train_hurdle_comb_mae))

    test_hurdle_comb_rmse = rmse(test_pred_hurdle_comb,Ytest)
    logger.info("rmse on test set for hurdle svgp  : "+str(test_hurdle_comb_rmse))
    test_hurdle_comb_mae = mad(test_pred_hurdle_comb,Ytest)
    logger.info("mad on test set for hurdle svgp  : "+str(test_hurdle_comb_mae))

    for handler in logger.handlers:
        handler.close()
        logger.removeHandler(handler)

    # ****************************************************************
    # return values
    # ****************************************************************
    results = {
               'pred_train_hurdle_svgp':pred_train_hurdle_svgp,
               'pred_test_hurdle_svgp':pred_test_hurdle_svgp,
               'train_hurdle_reg_rmse':train_hurdle_reg_rmse,
               'train_hurdle_reg_mae':train_hurdle_reg_mae,
               'test_hurdle_reg_rmse':test_hurdle_reg_rmse,
               'test_hurdle_reg_mae':test_hurdle_reg_mae,
               'train_pred_hurdle_comb':train_pred_hurdle_comb,
               'test_pred_hurdle_comb':test_pred_hurdle_comb,
               'train_hurdle_comb_rmse':train_hurdle_comb_rmse,
               'train_hurdle_comb_mae':train_hurdle_comb_mae,
               'test_hurdle_comb_rmse':test_hurdle_comb_rmse,
               'test_hurdle_comb_mae':test_hurdle_comb_mae,
               'train_pred_on_idx':train_pred_on_idx,
               'test_pred_on_idx':test_pred_on_idx
               }
    pickle.dump(results,open(parentDir+ subDir +"results_hurdle.pickle","wb"))
예제 #22
0
    def __init__(self,
                 latent_dim,
                 Y,
                 transitions,
                 T_latent=None,
                 inputs=None,
                 emissions=None,
                 px1_mu=None,
                 px1_cov=None,
                 Xmu=None,
                 Xchol=None,
                 n_samples=100,
                 batch_size=None,
                 seed=None,
                 name=None):

        super().__init__(latent_dim,
                         Y[0],
                         transitions,
                         T_latent=None,
                         inputs=None,
                         emissions=emissions,
                         px1_mu=px1_mu,
                         px1_cov=None,
                         Xmu=None,
                         Xchol=None,
                         n_samples=n_samples,
                         seed=seed,
                         name=name)

        self.T = [Y_s.shape[0] for Y_s in Y]
        self.T_latent = T_latent or self.T
        self.n_seq = len(self.T)
        self.T_tf = tf.constant(self.T, dtype=gp.settings.int_type)
        self.T_latent_tf = tf.constant(self.T_latent,
                                       dtype=gp.settings.int_type)
        self.sum_T = float(sum(self.T))
        self.sum_T_latent = float(sum(self.T_latent))
        self.batch_size = batch_size

        self.Y = gp.ParamList(Y, trainable=False)

        self.inputs = None if inputs is None else gp.ParamList(inputs,
                                                               trainable=False)

        _Xmu = [np.zeros((T_s, self.latent_dim))
                for T_s in self.T_latent] if Xmu is None else Xmu
        self.X = gp.ParamList(_Xmu)

        _Xchol = [np.eye(T_s * self.latent_dim)
                  for T_s in self.T_latent] if Xchol is None else Xchol
        xc_tr = lambda xc: None if xc.ndim == 1 else gtf.LowerTriangular(
            xc.shape[-1],
            num_matrices=1 if xc.ndim == 2 else xc.shape[0],
            squeeze=xc.ndim == 2)
        self.Xchol = gp.ParamList(
            [gp.Param(xc, transform=xc_tr(xc)) for xc in _Xchol])

        self.multi_diag_px1_cov = False
        if isinstance(px1_cov, list):  # different prior for each sequence
            _x1_cov = np.stack(px1_cov)
            _x1_cov = np.sqrt(
                _x1_cov) if _x1_cov.ndim == 2 else np.linalg.cholesky(_x1_cov)
            _transform = None if _x1_cov.ndim == 2 else gtf.LowerTriangular(
                self.latent_dim, num_matrices=self.n_seq)
            self.multi_diag_px1_cov = _x1_cov.ndim == 2
        elif isinstance(px1_cov, np.ndarray):  # same prior for each sequence
            assert px1_cov.ndim < 3
            _x1_cov = np.sqrt(
                px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov)
            _transform = None if px1_cov.ndim == 1 else gtf.LowerTriangular(
                self.latent_dim, squeeze=True)
        else:
            _x1_cov = np.eye(self.latent_dim)
            _transform = gtf.LowerTriangular(self.latent_dim, squeeze=True)

        self.px1_cov_chol = gp.Param(_x1_cov,
                                     trainable=False,
                                     transform=_transform)
예제 #23
0
    def __init__(self,
                 latent_dim,
                 Y,
                 inputs=None,
                 emissions=None,
                 px1_mu=None,
                 px1_cov=None,
                 kern=None,
                 Z=None,
                 n_ind_pts=100,
                 mean_fn=None,
                 Q_diag=None,
                 Umu=None,
                 Ucov_chol=None,
                 qx1_mu=None,
                 qx1_cov=None,
                 As=None,
                 bs=None,
                 Ss=None,
                 n_samples=100,
                 seed=None,
                 parallel_iterations=10,
                 jitter=gps.numerics.jitter_level,
                 name=None):

        super().__init__(name=name)

        self.latent_dim = latent_dim
        self.T, self.obs_dim = Y.shape
        self.Y = Param(Y, trainable=False)

        self.inputs = None if inputs is None else Param(inputs,
                                                        trainable=False)
        self.input_dim = 0 if self.inputs is None else self.inputs.shape[1]

        self.qx1_mu = Param(
            np.zeros(self.latent_dim) if qx1_mu is None else qx1_mu)
        self.qx1_cov_chol = Param(
            np.eye(self.latent_dim)
            if qx1_cov is None else np.linalg.cholesky(qx1_cov),
            transform=gtf.LowerTriangular(self.latent_dim, squeeze=True))

        self.As = Param(
            np.ones((self.T - 1, self.latent_dim)) if As is None else As)
        self.bs = Param(
            np.zeros((self.T - 1, self.latent_dim)) if bs is None else bs)

        self.Q_sqrt = Param(
            np.ones(self.latent_dim) if Q_diag is None else Q_diag**0.5,
            transform=gtf.positive)
        if Ss is False:
            self._S_chols = None
        else:
            self.S_chols = Param(
                np.tile(self.Q_sqrt.value.copy()[None, ...], [self.T - 1, 1])
                if Ss is None else
                (np.sqrt(Ss) if Ss.ndim == 2 else np.linalg.cholesky(Ss)),
                transform=gtf.positive if
                (Ss is None or Ss.ndim == 2) else gtf.LowerTriangular(
                    self.latent_dim, num_matrices=self.T - 1, squeeze=False))

        self.emissions = emissions or GaussianEmissions(
            latent_dim=self.latent_dim, obs_dim=self.obs_dim)

        self.px1_mu = Param(
            np.zeros(self.latent_dim) if px1_mu is None else px1_mu,
            trainable=False)
        self.px1_cov_chol = None if px1_cov is None else \
            Param(np.sqrt(px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov), trainable=False,
                  transform=gtf.positive if px1_cov.ndim == 1 else gtf.LowerTriangular(self.latent_dim, squeeze=True))

        self.n_samples = n_samples
        self.seed = seed
        self.parallel_iterations = parallel_iterations
        self.jitter = jitter

        # Inference-specific attributes (see gpssm_models.py for appropriate choices):
        nans = tf.constant(np.zeros(
            (self.T, self.n_samples, self.latent_dim)) * np.nan,
                           dtype=gps.float_type)
        self.sample_fn = lambda **kwargs: (nans, None)
        self.sample_kwargs = {}
        self.KL_fn = lambda *fs: tf.constant(np.nan, dtype=gps.float_type)

        # GP Transitions:
        self.n_ind_pts = n_ind_pts if Z is None else (
            Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2])

        if isinstance(Z, np.ndarray) and Z.ndim == 2:
            self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z))
        else:
            Z_list = [
                np.random.randn(self.n_ind_pts, self.latent_dim +
                                self.input_dim) for _ in range(self.latent_dim)
            ] if Z is None else [z for z in Z]
            self.Z = mf.SeparateIndependentMof(
                [gp.features.InducingPoints(z) for z in Z_list])

        if isinstance(kern, gp.kernels.Kernel):
            self.kern = mk.SharedIndependentMok(kern, self.latent_dim)
        else:
            kern_list = kern or [
                gp.kernels.Matern32(self.latent_dim + self.input_dim, ARD=True)
                for _ in range(self.latent_dim)
            ]
            self.kern = mk.SeparateIndependentMok(kern_list)

        self.mean_fn = mean_fn or mean_fns.Identity(self.latent_dim)
        self.Umu = Param(
            np.zeros((self.latent_dim, self.n_ind_pts))
            if Umu is None else Umu)  # (Lm^-1)(Umu - m(Z))
        LT_transform = gtf.LowerTriangular(self.n_ind_pts,
                                           num_matrices=self.latent_dim,
                                           squeeze=False)
        self.Ucov_chol = Param(np.tile(
            np.eye(self.n_ind_pts)[None, ...], [self.latent_dim, 1, 1])
                               if Ucov_chol is None else Ucov_chol,
                               transform=LT_transform)  # (Lm^-1)Lu
        self._Kzz = None
예제 #24
0
def predict_svgp(Xtrain,
                 Xtest,
                 checkpointPath,
                 num_inducing_f=np.array([10, 100]),
                 include_fmu=False):
    tf.reset_default_graph()

    # param initializations
    list_to_np = lambda _list: [np.array(e) for e in _list]

    init_fkell = list_to_np([[8., 8.], [5. / 1000]])
    init_fkvar = list_to_np([[20.], [20.]])

    init_noisevar = 0.001

    q_diag = True
    if include_fmu:
        init_f_mu = 0.

    init_Zf_s = kmeans(Xtrain[:, 0:2], num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:, 2].min(), Xtrain[:,
                                                                      2].max(),
                                           num_inducing_f[1]),
                               axis=1)

    init_Zf = [init_Zf_s, init_Zf_t]

    init_u_fm = np.random.randn(np.prod(num_inducing_f), 1) * 0.1
    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1, -1).T

    kern_param_learning_rate = 1e-4
    indp_param_learning_rate = 1e-4

    # ****************************************************************
    # define tensorflow variables and placeholders
    # ****************************************************************
    X = tf.placeholder(dtype=float_type)
    Y = tf.placeholder(dtype=float_type)

    with tf.name_scope("f_kern"):
        fkell = [
            Param(init_fkell[i],
                  transform=transforms.Log1pe(),
                  name="lengthscale",
                  learning_rate=kern_param_learning_rate,
                  summ=True) for i in range(len(num_inducing_f))
        ]

        fkvar = [
            Param(init_fkvar[i],
                  transform=transforms.Log1pe(),
                  name="variance",
                  learning_rate=kern_param_learning_rate,
                  summ=True) for i in range(len(num_inducing_f))
        ]

    fkern_list = [
        KernSE(fkell[i], fkvar[i]) for i in range(len(num_inducing_f))
    ]

    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,
                         transform=transforms.Log1pe(),
                         name="variance",
                         learning_rate=kern_param_learning_rate,
                         summ=True)

    with tf.name_scope("f_ind"):
        Zf_list = [
            Param(init_Zf[i],
                  name="z",
                  learning_rate=indp_param_learning_rate,
                  summ=True) for i in range(len(num_inducing_f))
        ]

        u_fm = Param(init_u_fm,
                     name="value",
                     learning_rate=indp_param_learning_rate,
                     summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,
                              transforms.positive,
                              name="variance",
                              learning_rate=indp_param_learning_rate,
                              summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,
                              transforms.LowerTriangular(
                                  init_u_fs_sqrt.shape[0]),
                              name="variance",
                              learning_rate=indp_param_learning_rate,
                              summ=True)

    # ****************************************************************
    # define model support functions
    # ****************************************************************
    def build_predict(Xnew, u_fm, u_fs_sqrt, fkern_list, Zf_list, f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)

        # compute fmean and fvar from the kronecker inference
        fmean, fvar = kron_inf(Xnew, fkern_list, Zf_list, u_fm, u_fs_sqrt,
                               num_inducing_f, input_mask_f)
        if not f_mu is None:
            fmean = fmean + f_mu.get_tfv()

        # return mean and variance vectors in order
        return fmean, fvar

    def kron_inf(Xnew, kern_list, Z_list, q_mu, q_sqrt, num_inducing,
                 input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv, q_mu.get_tfv())

        n_batch = tf.stack([tf.shape(Xnew)[0], np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        KMN = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            KMN.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        def loop_rows(n, mu, var):
            Kmn = tf.reshape(KMN[0][:, n], [num_inducing[0], 1])
            for p in range(1, len(num_inducing)):
                Kmn = tf_kron(Kmn,
                              tf.reshape(KMN[p][:, n], [num_inducing[p], 1]))

            mu_n = tf.matmul(Kmn, alpha, transpose_a=True)
            mu = mu.write(n, mu_n)
            A = __kron_mv(Kmm_inv, Kmn)
            tmp = Knn[n] - tf.matmul(Kmn, A,transpose_a=True) + \
                           tf.matmul(tf.matmul(A,S,transpose_a=True),A)

            var = var.write(n, tmp)
            return tf.add(n, 1), mu, var

        def loop_cond(n, mu, var):
            return tf.less(n, n_batch[0])

        mu = tf.TensorArray(float_type, size=n_batch[0])
        var = tf.TensorArray(float_type, size=n_batch[0])
        _, mu, var = tf.while_loop(loop_cond, loop_rows, [0, mu, var])

        mu = tf.reshape(mu.stack(), n_batch)
        var = tf.reshape(var.stack(), n_batch)

        return mu, var

    def __kron_mv(As, x):
        num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))]
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N, 1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p], np.round(
                N / num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N, 1])
        return b

    def tf_kron(*args):
        def __tf_kron(a, b):

            a_shape = [tf.shape(a)[0], tf.shape(a)[1]]
            b_shape = [tf.shape(b)[0], tf.shape(b)[1]]

            return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                              tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                              [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

        kron_pord = tf.constant(1., shape=[1, 1], dtype=float_type)
        for Ap in args:
            kron_pord = __tf_kron(kron_pord, Ap)

        return kron_pord

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask

    # ****************************************************************
    # build model and define lower bound
    # ****************************************************************

    # get augmented functions
    with tf.name_scope("model_build"):
        fmean, fvar = build_predict(X, u_fm, u_fs_sqrt, fkern_list, Zf_list)

    # load model
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    modelmngr = modelmanager(saver, sess, checkpointPath)
    modelmngr.load()

    # return inside a dictionary
    pred_train = {
        'fmean': fmean.eval(feed_dict={X: Xtrain}),
        'fvar': fvar.eval(feed_dict={X: Xtrain})
    }

    if Xtest is not None:
        pred_test = {
            'fmean': fmean.eval(feed_dict={X: Xtest}),
            'fvar': fvar.eval(feed_dict={X: Xtest})
        }

    sess.close()

    if Xtest is not None:
        return pred_train, pred_test
    else:
        return pred_train
예제 #25
0
def predict_onoff(Xtrain,Xtest,checkpointPath,num_inducing_f = np.array([10,100]),num_inducing_g = np.array([10,100]),include_fmu = False):
    tf.reset_default_graph()

    # param initializations
    list_to_np = lambda _list : [np.array(e) for e in _list]

    init_fkell = list_to_np([[8.,8.],[5./1000]])
    init_fkvar = list_to_np([[20.],[20.]])

    init_gkell = list_to_np([[8.,8.],[5./1000]])
    init_gkvar = list_to_np([[10.],[10.]])

    init_noisevar = 0.001

    q_diag = True
    if include_fmu:
        init_f_mu = 0.

    init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0]
    init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1)

    init_Zf = [init_Zf_s,init_Zf_t]
    init_Zg = init_Zf.copy()


    init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.1
    init_u_gm = np.random.randn(np.prod(num_inducing_g),1)*0.1

    init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T
    init_u_gs_sqrt = np.ones(np.prod(num_inducing_g)).reshape(1,-1).T

    kern_param_learning_rate = 1e-4
    indp_param_learning_rate = 1e-4


    # tf variable declarations
    X = tf.placeholder(dtype = float_type)
    Y = tf.placeholder(dtype = float_type)

    with tf.name_scope("f_kern"):
        fkell = [Param(init_fkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

        fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_f))]

    fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))]

    with tf.name_scope("g_kern"):
        gkell = [Param(init_gkell[i],transform=transforms.Log1pe(),
                       name="lengthscale",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

        gkvar = [Param(init_gkvar[i],transform=transforms.Log1pe(),
                       name="variance",learning_rate = kern_param_learning_rate,summ=True)
                 for i in range(len(num_inducing_g))]

    gkern_list = [KernSE(gkell[i],gkvar[i]) for i in range(len(num_inducing_g))]

    with tf.name_scope("likelihood"):
        noisevar = Param(init_noisevar,transform=transforms.Log1pe(),
                         name="variance",learning_rate = kern_param_learning_rate,summ=True)


    with tf.name_scope("f_ind"):
        Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_f))]

        u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)

    #     f_mu = Param(init_f_mu,name="fmu",learning_rate = indp_param_learning_rate,summ=True)

    with tf.name_scope("g_ind"):
        Zg_list = [Param(init_Zg[i],name="z",learning_rate = indp_param_learning_rate,summ=True)
                   for i in range(len(num_inducing_g))]

        u_gm = Param(init_u_gm,name="value",learning_rate = indp_param_learning_rate,summ=True)
        if q_diag:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.positive,
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)
        else:
            u_gs_sqrt = Param(init_u_gs_sqrt,transforms.LowerTriangular(init_u_gs_sqrt.shape[0]),
                              name="variance",learning_rate = indp_param_learning_rate,summ=True)



    def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list,
                       u_gm, u_gs_sqrt, gkern_list, Zg_list, whiten=False):
        if whiten:
            raise NotImplementedError()
        else:
            Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \
                    tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_f))]

            Kgmm = [gkern_list[i].K(Zg_list[i].get_tfv()) + \
                    tf.eye(num_inducing_g[i], dtype=float_type) * jitter_level
                    for i in range(len(num_inducing_g))]

            KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm) + \
                 GaussKLkron(u_gm.get_tfv(), u_gs_sqrt.get_tfv(), Kgmm)

        return KL

    def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list,f_mu=None):

        input_mask_f = _gen_inp_mask(Zf_list)
        input_mask_g = _gen_inp_mask(Zg_list)

        # compute fmean and fvar from the kronecker inference
        fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f)
        # fmean = fmean + mean_function(Xnew)
        if not f_mu is None :
            fmean = fmean + f_mu.get_tfv()


        # compute gmean and gvar from the kronecker inference
        gmean,gvar = kron_inf(Xnew,gkern_list,Zg_list,u_gm,u_gs_sqrt,num_inducing_g,input_mask_g)
        gmean = gmean + tf.cast(tf.constant(-1.0),float_type)

        # compute augemented distributions
        ephi_g, ephi2_g, evar_phi_g = probit_expectations(gmean, gvar)

        # compute augmented f
        # p(f|g) = N(f| diag(ephi_g)* A*u_fm, diag(evar_phi_g)) * (Kfnn + A(u_fs - Kfmm)t(A)))
        gfmean = tf.multiply(ephi_g, fmean)
        gfvar = tf.multiply(ephi2_g, fvar)
        gfmeanu = tf.multiply(evar_phi_g, tf.square(fmean))

        # return mean and variance vectors in order
        return gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, ephi_g, evar_phi_g


    def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask):
        # Compute alpha = K_mm^-1 * f_m
        Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \
               tf.eye(num_inducing[p], dtype=float_type) * jitter_level
               for p in range(len(num_inducing))]

        Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))]
        alpha = __kron_mv(Kmm_inv,q_mu.get_tfv(),num_inducing)

        n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)])
        Knn = tf.ones(n_batch, dtype=float_type)
        KMN = []

        for p in range(len(num_inducing)):
            xnew = tf.gather(Xnew, input_mask[p], axis=1)
            Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch)
            KMN.append(kern_list[p].K(Z_list[p].get_tfv(), xnew))

        S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv())))

        def loop_rows(n,mu,var):
            Kmn = tf.reshape(KMN[0][:,n], [num_inducing[0],1])
            for p in range(1,len(num_inducing)):
                Kmn = tf_kron(Kmn,tf.reshape(KMN[p][:,n],[num_inducing[p],1]))

            mu_n = tf.matmul(Kmn, alpha, transpose_a=True)
            mu = mu.write(n, mu_n)
            A = __kron_mv(Kmm_inv,Kmn,num_inducing)
            tmp = Knn[n] - tf.matmul(Kmn, A,transpose_a=True) + \
                           tf.matmul(tf.matmul(A,S,transpose_a=True),A)

            var = var.write(n, tmp)
            return tf.add(n,1), mu, var

        def loop_cond(n,mu,var):
            return tf.less(n, n_batch[0])

        mu = tf.TensorArray(float_type, size=n_batch[0])
        var = tf.TensorArray(float_type, size=n_batch[0])
        _, mu, var = tf.while_loop(loop_cond, loop_rows, [0, mu, var])

        mu = tf.reshape(mu.stack(), n_batch)
        var = tf.reshape(var.stack(), n_batch)

        return mu , var

    def __kron_mv( As, x,num_inducing):
        N = np.prod(num_inducing)
        b = tf.reshape(x, [N,1])
        for p in range(len(As)):
            Ap = As[p]
            X = tf.reshape(b, (num_inducing[p],
                               np.round(N/num_inducing[p]).astype(np.int)))
            b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True)
            b = tf.reshape(b, [N,1])
        return b

    def tf_kron(a,b):
        a_shape = [a.shape[0].value,a.shape[1].value]
        b_shape = [b.shape[0].value,b.shape[1].value]
        return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \
                          tf.reshape(b,[1,b_shape[0],1,b_shape[1]]),
                          [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]])

    def _gen_inp_mask(Z_list):
        input_mask = []
        tmp = 0
        for p in range(len(Z_list)):
            p_dim = Z_list[p].shape[1]
            input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32))
            tmp += p_dim
        return input_mask


    def variational_expectations(Y,fmu,fvar,fmuvar,noisevar):
        return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \
                - 0.5 * (tf.square(Y - fmu) + fvar + fmuvar) / noisevar

    def probit_expectations(gmean, gvar):
        def normcdf(x):
            return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1. - 2.e-3) + 1.e-3

        def owent(h, a):
            h = tf.abs(h)
            term1 = tf.atan(a) / (2 * np.pi)
            term2 = tf.exp((-1 / 2) * (tf.multiply(tf.square(h), (tf.square(a) + 1))))
            return tf.multiply(term1, term2)

        z = gmean / tf.sqrt(1. + gvar)
        a = 1 / tf.sqrt(1. + (2 * gvar))

        cdfz = normcdf(z)
        tz = owent(z, a)

        ephig = cdfz
        ephisqg = (cdfz - 2. * tz)
        evarphig = (cdfz - 2. * tz - tf.square(cdfz))

        # clip negative values from variance terms to zero
        ephisqg = (ephisqg + tf.abs(ephisqg)) / 2.
        evarphig = (evarphig + tf.abs(evarphig)) / 2.

        return ephig, ephisqg, evarphig


    kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list,
                        u_fm,u_fs_sqrt,fkern_list,Zf_list)
    gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, pgmean, pgvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list)

    # load model
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    modelmngr = modelmanager(saver, sess, checkpointPath)
    modelmngr.load()

    pred_train = {'gfmean' : gfmean.eval(feed_dict = {X:Xtrain}),
                  'fmean' : fmean.eval(feed_dict = {X:Xtrain}),
                  'pgmean' : pgmean.eval(feed_dict = {X:Xtrain})}

    if Xtest is not None:
        pred_test = {'gfmean' : gfmean.eval(feed_dict = {X:Xtest}),
                     'fmean' : fmean.eval(feed_dict = {X:Xtest}),
                     'pgmean' : pgmean.eval(feed_dict = {X:Xtest})}
    sess.close()

    if Xtest is not None:
        return pred_train, pred_test
    else:
        return pred_train