Example #1
0
def test_multioutput_with_diag_q_sqrt(session_tf):
    data = DataMixedKernel

    q_sqrt_diag = np.ones((data.M, data.L)) * 2
    q_sqrt = np.repeat(np.eye(data.M)[None, ...], data.L,
                       axis=0) * 2  # L x M x M

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k1 = mk.SeparateMixedMok(kern_list, W=data.W)
    f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy()))
    m1 = SVGP(data.X,
              data.Y,
              k1,
              Gaussian(),
              feat=f1,
              q_mu=data.mu_data,
              q_sqrt=q_sqrt_diag,
              q_diag=True)

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k2 = mk.SeparateMixedMok(kern_list, W=data.W)
    f2 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy()))
    m2 = SVGP(data.X,
              data.Y,
              k2,
              Gaussian(),
              feat=f2,
              q_mu=data.mu_data,
              q_sqrt=q_sqrt,
              q_diag=False)

    check_equality_predictions(session_tf, [m1, m2])
Example #2
0
def test_compare_mixed_kernel(session_tf):
    data = DataMixedKernel

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k1 = mk.SeparateMixedMok(kern_list, W=data.W)
    f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy()))
    m1 = SVGP(data.X,
              data.Y,
              k1,
              Gaussian(),
              feat=f1,
              q_mu=data.mu_data,
              q_sqrt=data.sqrt_data)

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k2 = mk.SeparateMixedMok(kern_list, W=data.W)
    f2 = mf.MixedKernelSharedMof(InducingPoints(data.X[:data.M, ...].copy()))
    m2 = SVGP(data.X,
              data.Y,
              k2,
              Gaussian(),
              feat=f2,
              q_mu=data.mu_data,
              q_sqrt=data.sqrt_data)

    check_equality_predictions(session_tf, [m1, m2])
Example #3
0
    def __init__(self, dim, input_dim=0, kern=None, Z=None, n_ind_pts=100,
                 mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None,
                 jitter=gps.numerics.jitter_level, name=None):
        super().__init__(name=name)
        self.OBSERVATIONS_AS_INPUT = False
        self.dim = dim
        self.input_dim = input_dim
        self.jitter = jitter

        self.Q_sqrt = Param(np.ones(self.dim) if Q_diag is None else Q_diag ** 0.5, transform=gtf.positive)

        self.n_ind_pts = n_ind_pts if Z is None else (Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2])

        if isinstance(Z, np.ndarray) and Z.ndim == 2:
            self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z))
        else:
            Z_list = [np.random.randn(self.n_ind_pts, self.dim + self.input_dim)
                      for _ in range(self.dim)] if Z is None else [z for z in Z]
            self.Z = mf.SeparateIndependentMof([gp.features.InducingPoints(z) for z in Z_list])

        if isinstance(kern, gp.kernels.Kernel):
            self.kern = mk.SharedIndependentMok(kern, self.dim)
        else:
            kern_list = kern or [gp.kernels.Matern32(self.dim + self.input_dim, ARD=True) for _ in range(self.dim)]
            self.kern = mk.SeparateIndependentMok(kern_list)

        self.mean_fn = mean_fn or mean_fns.Identity(self.dim)
        self.Umu = Param(np.zeros((self.dim, self.n_ind_pts)) if Umu is None else Umu)  # Lm^-1(Umu - m(Z))
        transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.dim, squeeze=False)
        self.Ucov_chol = Param(np.tile(np.eye(self.n_ind_pts)[None, ...], [self.dim, 1, 1])
                               if Ucov_chol is None else Ucov_chol, transform=transform)  # Lm^-1(Ucov_chol)
        self._Kzz = None
Example #4
0
def test_separate_independent_mok(session_tf):
    """
    We use different independent kernels for each of the output dimensions.
    We can achieve this in two ways:
        1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof
        2) inefficient: SeparateIndependentMok with InducingPoints
    However, both methods should return the same conditional,
    and after optimization return the same log likelihood.
    """
    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kern_list_1 = [
        RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)
    ]
    kernel_1 = mk.SeparateIndependentMok(kern_list_1)
    feature_1 = InducingPoints(Data.X[:Data.M, ...].copy())
    m1 = SVGP(Data.X,
              Data.Y,
              kernel_1,
              Gaussian(),
              feature_1,
              q_mu=q_mu_1,
              q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    m1.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list_2 = [
        RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)
    ]
    kernel_2 = mk.SeparateIndependentMok(kern_list_2)
    feature_2 = mf.SharedIndependentMof(
        InducingPoints(Data.X[:Data.M, ...].copy()))
    m2 = SVGP(Data.X,
              Data.Y,
              kernel_2,
              Gaussian(),
              feature_2,
              q_mu=q_mu_2,
              q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    m2.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2])
def init():
    feature = mf.SharedIndependentMof(gpflow.features.InducingPoints(Z.copy()))

    #Define POI kernels
    poi_list = [0, 1, 2, 3, 4]
    kern_list = []
    for i in range(len(poi_list)):
        kern = POI(effects=0.5,
                   lengthscale=5,
                   input_dim=D,
                   locs_poi=locs_poi,
                   typeIndicator=typeIndicator,
                   typIdx=poi_list[i],
                   locs=Z[:, 0:2],
                   mindist=typeMinDist[i],
                   name=str(poi_list[i]),
                   kernel_type="Linear")
        kern_list.append(kern)

    #Add spatial kernel
    kern_spatial = gpflow.kernels.Matern32(input_dim=D, lengthscales=100)

    #Define kernel list
    kern_list.append(kern_spatial)
    L = len(kern_list)
    W = np.ones((L, 1))
    W_t = np.transpose(W)
    kernel = mk.SeparateMixedMok(kern_list, W=W_t)

    #Define linear mean function
    #mean_fct = SlicedLinear(A = theta, p=p)
    mean_fct = SlicedNN(p=p)

    q_mu = np.random.normal(0.0, 1, (M, L))
    q_sqrt = np.repeat(np.eye(M)[None, ...], L, axis=0) * 1.0

    m = gpflow.models.SVGP(
        X=X_train,
        Y=y_train,
        kern=kernel,
        likelihood=gpflow.likelihoods.Gaussian(),
        feat=feature,
        whiten=True,  #minibatch_size=len(X_train),
        mean_function=mean_fct,  # + mean_poi,
        q_mu=q_mu,
        q_sqrt=q_sqrt,
        name='svgp')

    m.likelihood.variance = 0.01  #Initialze params
    m.feature.trainable = False
    m.kern.W.trainable = False
    return m
Example #6
0
def test_sample_conditional_mixedkernel(session_tf):
    q_mu = np.random.randn(Data.M, Data.L)  # M x L
    q_sqrt = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.L)
    ])  # L x M x M
    Z = Data.X[:Data.M, ...]  # M x D
    N = int(10e5)
    Xs = np.ones((N, Data.D), dtype=float_type)

    values = {"Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt}
    placeholders = _create_placeholder_dict(values)
    feed_dict = _create_feed_dict(placeholders, values)

    # Path 1: mixed kernel: most efficient route
    W = np.random.randn(Data.P, Data.L)
    mixed_kernel = mk.SeparateMixedMok([RBF(Data.D) for _ in range(Data.L)], W)
    mixed_feature = mf.MixedKernelSharedMof(InducingPoints(Z.copy()))

    sample = sample_conditional(placeholders["Xnew"],
                                mixed_feature,
                                mixed_kernel,
                                placeholders["q_mu"],
                                q_sqrt=placeholders["q_sqrt"],
                                white=True)
    value = session_tf.run(sample, feed_dict=feed_dict)

    # Path 2: independent kernels, mixed later
    separate_kernel = mk.SeparateIndependentMok(
        [RBF(Data.D) for _ in range(Data.L)])
    shared_feature = mf.SharedIndependentMof(InducingPoints(Z.copy()))
    sample2 = sample_conditional(placeholders["Xnew"],
                                 shared_feature,
                                 separate_kernel,
                                 placeholders["q_mu"],
                                 q_sqrt=placeholders["q_sqrt"],
                                 white=True)
    value2 = session_tf.run(sample2, feed_dict=feed_dict)
    value2 = np.matmul(value2, W.T)
    # check if mean and covariance of samples are similar
    np.testing.assert_array_almost_equal(np.mean(value, axis=0),
                                         np.mean(value2, axis=0),
                                         decimal=1)
    np.testing.assert_array_almost_equal(np.cov(value, rowvar=False),
                                         np.cov(value2, rowvar=False),
                                         decimal=1)
Example #7
0
def main():
    X = np.loadtxt("../data/neur.X.txt")
    Y = np.loadtxt("../data/neur.Y.txt")

    gpflow.reset_default_graph_and_session()
    name = 'test'
    minibatch_size = 500

    W1_init = normalize(np.random.random(size=(C, K1)))
    W2_init = normalize(np.random.random(size=(G, K2)))

    with gpflow.defer_build():
        kernel = mk.SharedIndependentMok(
            gpflow.kernels.RBF(1, active_dims=[0]), K1 * K2)
        Z = np.linspace(0, 1, T)[:, None].astype(np.float64)
        feature = gpflow.features.InducingPoints(Z)
        feature = mf.SharedIndependentMof(feature)

        model = SplitGPM(X,
                         Y,
                         np.log(W1_init + 1e-5),
                         np.log(W2_init + 1e-5),
                         kernel,
                         gpflow.likelihoods.Gaussian(),
                         feat=feature,
                         minibatch_size=minibatch_size,
                         name=name)
    model.compile()

    model.W1.set_trainable(True)  # learn cell assignments
    model.W2.set_trainable(True)  # learn gene assignments
    model.feature.set_trainable(True)  # move inducing points
    model.kern.set_trainable(True)  # learn kernel parameters
    model.likelihood.set_trainable(True)  # lear likelihood parameters

    adam = gpflow.train.AdamOptimizer(0.005)
    adam.minimize(model, maxiter=10000)

    save_model(model)
Example #8
0
    def train(self, verbose=True, maxiter=1000):
        with gpflow.settings.temp_settings(self.gpflow_config):
            # Default parameters
            if self.kern is None:
                self.kern = gpflow.kernels.SquaredExponential(
                    input_dim=self.n_in_dims,
                    variance=self.dtype(0.2),
                    lengthscales=self.dtype(1.0))
            if self.lh is None:
                self.lh = gpflow.likelihoods.Gaussian(
                    variance=self.dtype(0.02))

            if self.feature is None:
                self.feature = gpf.features.InducingPoints(self.Z)

            if self.multi_output:
                self.kern = mok.SharedIndependentMok(
                    self.kern, output_dimensionality=self.n_out_dims)
                self.feature = mof.SharedIndependentMof(self.feature)

            self.m = gpflow.models.SVGP(self.X,
                                        self.Y,
                                        self.kern,
                                        likelihood=self.lh,
                                        feat=self.feature,
                                        mean_function=self.mf,
                                        minibatch_size=self.batch_size)

            opt = gpflow.train.tensorflow_optimizer.AdamOptimizer(
                learning_rate=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8)

            opt.minimize(self.m, maxiter=maxiter)

            if verbose:
                pd.set_option('display.max_rows', 20)
                pd.set_option('display.max_columns', 10)
                print(self.m.as_pandas_table())
                print('Log likelihood: ', self.m.compute_log_likelihood())
Example #9
0
def test_shared_independent_mok(session_tf):
    """
    In this test we use the same kernel and the same inducing features
    for each of the outputs. The outputs are considered to be uncorrelated.
    This is how GPflow handled multiple outputs before the multioutput framework was added.
    We compare three models here:
        1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints.
           This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P
           which is extremely inefficient as most of the elements are zero.
        2) efficient: SharedIndependentMok and SharedIndependentMof
           This combinations uses the most efficient form of matrices
        3) the old way, efficient way: using Kernel and InducingPoints
        Model 2) and 3) follow more or less the same code path.
    """
    # Model 1
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)  # MP x 1
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_1 = InducingPoints(Data.X[:Data.M, ...].copy())
    m1 = SVGP(Data.X,
              Data.Y,
              kernel_1,
              Gaussian(),
              feature_1,
              q_mu=q_mu_1,
              q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2
    q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = RBF(Data.D, variance=0.5, lengthscales=1.2)
    feature_2 = InducingPoints(Data.X[:Data.M, ...].copy())
    m2 = SVGP(Data.X,
              Data.Y,
              kernel_2,
              Gaussian(),
              feature_2,
              q_mu=q_mu_2,
              q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    # Model 3
    q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_3 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_3 = mf.SharedIndependentMof(
        InducingPoints(Data.X[:Data.M, ...].copy()))
    m3 = SVGP(Data.X,
              Data.Y,
              kernel_3,
              Gaussian(),
              feature_3,
              q_mu=q_mu_3,
              q_sqrt=q_sqrt_3)
    m3.set_trainable(False)
    m3.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2, m3])
Example #10
0
y = y[:, :, :G]
mask = ~np.isnan(y)

Y = y[mask][:, None]
X = x[mask][:, None]
weight_idx = np.tile(np.arange(N * G).reshape(N, G)[None], (T, 1, 1))[mask]

# gp objects
if global_trajectories:
    num_clusters = K * L + L
else:
    num_clusters = K * L

kernel = mk.SharedIndependentMok(gpflow.kernels.RBF(1), num_clusters)
feature = mf.SharedIndependentMof(
    gpflow.features.InducingPoints(
        np.arange(T).astype(np.float64).reshape(-1, 1)))
likelihood = gpflow.likelihoods.Gaussian()

# model -- for hyperparameter learning
with gpflow.defer_build():
    m = MixtureSVGP(X,
                    Y,
                    weight_idx,
                    kern=kernel,
                    num_latent=num_clusters,
                    num_data=X.shape[0],
                    likelihood=likelihood,
                    feat=feature,
                    minibatch_size=minibatch_size)
Example #11
0
    def __init__(self,
                 latent_dim,
                 Y,
                 inputs=None,
                 emissions=None,
                 px1_mu=None,
                 px1_cov=None,
                 kern=None,
                 Z=None,
                 n_ind_pts=100,
                 mean_fn=None,
                 Q_diag=None,
                 Umu=None,
                 Ucov_chol=None,
                 qx1_mu=None,
                 qx1_cov=None,
                 As=None,
                 bs=None,
                 Ss=None,
                 n_samples=100,
                 seed=None,
                 parallel_iterations=10,
                 jitter=gps.numerics.jitter_level,
                 name=None):

        super().__init__(name=name)

        self.latent_dim = latent_dim
        self.T, self.obs_dim = Y.shape
        self.Y = Param(Y, trainable=False)

        self.inputs = None if inputs is None else Param(inputs,
                                                        trainable=False)
        self.input_dim = 0 if self.inputs is None else self.inputs.shape[1]

        self.qx1_mu = Param(
            np.zeros(self.latent_dim) if qx1_mu is None else qx1_mu)
        self.qx1_cov_chol = Param(
            np.eye(self.latent_dim)
            if qx1_cov is None else np.linalg.cholesky(qx1_cov),
            transform=gtf.LowerTriangular(self.latent_dim, squeeze=True))

        self.As = Param(
            np.ones((self.T - 1, self.latent_dim)) if As is None else As)
        self.bs = Param(
            np.zeros((self.T - 1, self.latent_dim)) if bs is None else bs)

        self.Q_sqrt = Param(
            np.ones(self.latent_dim) if Q_diag is None else Q_diag**0.5,
            transform=gtf.positive)
        if Ss is False:
            self._S_chols = None
        else:
            self.S_chols = Param(
                np.tile(self.Q_sqrt.value.copy()[None, ...], [self.T - 1, 1])
                if Ss is None else
                (np.sqrt(Ss) if Ss.ndim == 2 else np.linalg.cholesky(Ss)),
                transform=gtf.positive if
                (Ss is None or Ss.ndim == 2) else gtf.LowerTriangular(
                    self.latent_dim, num_matrices=self.T - 1, squeeze=False))

        self.emissions = emissions or GaussianEmissions(
            latent_dim=self.latent_dim, obs_dim=self.obs_dim)

        self.px1_mu = Param(
            np.zeros(self.latent_dim) if px1_mu is None else px1_mu,
            trainable=False)
        self.px1_cov_chol = None if px1_cov is None else \
            Param(np.sqrt(px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov), trainable=False,
                  transform=gtf.positive if px1_cov.ndim == 1 else gtf.LowerTriangular(self.latent_dim, squeeze=True))

        self.n_samples = n_samples
        self.seed = seed
        self.parallel_iterations = parallel_iterations
        self.jitter = jitter

        # Inference-specific attributes (see gpssm_models.py for appropriate choices):
        nans = tf.constant(np.zeros(
            (self.T, self.n_samples, self.latent_dim)) * np.nan,
                           dtype=gps.float_type)
        self.sample_fn = lambda **kwargs: (nans, None)
        self.sample_kwargs = {}
        self.KL_fn = lambda *fs: tf.constant(np.nan, dtype=gps.float_type)

        # GP Transitions:
        self.n_ind_pts = n_ind_pts if Z is None else (
            Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2])

        if isinstance(Z, np.ndarray) and Z.ndim == 2:
            self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z))
        else:
            Z_list = [
                np.random.randn(self.n_ind_pts, self.latent_dim +
                                self.input_dim) for _ in range(self.latent_dim)
            ] if Z is None else [z for z in Z]
            self.Z = mf.SeparateIndependentMof(
                [gp.features.InducingPoints(z) for z in Z_list])

        if isinstance(kern, gp.kernels.Kernel):
            self.kern = mk.SharedIndependentMok(kern, self.latent_dim)
        else:
            kern_list = kern or [
                gp.kernels.Matern32(self.latent_dim + self.input_dim, ARD=True)
                for _ in range(self.latent_dim)
            ]
            self.kern = mk.SeparateIndependentMok(kern_list)

        self.mean_fn = mean_fn or mean_fns.Identity(self.latent_dim)
        self.Umu = Param(
            np.zeros((self.latent_dim, self.n_ind_pts))
            if Umu is None else Umu)  # (Lm^-1)(Umu - m(Z))
        LT_transform = gtf.LowerTriangular(self.n_ind_pts,
                                           num_matrices=self.latent_dim,
                                           squeeze=False)
        self.Ucov_chol = Param(np.tile(
            np.eye(self.n_ind_pts)[None, ...], [self.latent_dim, 1, 1])
                               if Ucov_chol is None else Ucov_chol,
                               transform=LT_transform)  # (Lm^-1)Lu
        self._Kzz = None
 def shared_independent(self):
     return mf.SharedIndependentMof(make_ip())