def test_compare_mixed_kernel(session_tf): data = DataMixedKernel kern_list = [RBF(data.D) for _ in range(data.L)] k1 = mk.SeparateMixedMok(kern_list, W=data.W) f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy())) m1 = SVGP(data.X, data.Y, k1, Gaussian(), feat=f1, q_mu=data.mu_data, q_sqrt=data.sqrt_data) kern_list = [RBF(data.D) for _ in range(data.L)] k2 = mk.SeparateMixedMok(kern_list, W=data.W) f2 = mf.MixedKernelSharedMof(InducingPoints(data.X[:data.M, ...].copy())) m2 = SVGP(data.X, data.Y, k2, Gaussian(), feat=f2, q_mu=data.mu_data, q_sqrt=data.sqrt_data) check_equality_predictions(session_tf, [m1, m2])
def build_deep_gp(input_dim, num_data): layers = [input_dim, 2, 2, 1] # Below are different ways to build layers # 1. Pass in Lists: kernel_list = [RBF(), Matern12()] num_inducing = [25, 25] l1_kernel = construct_basic_kernel(kernels=kernel_list) l1_inducing = construct_basic_inducing_variables(num_inducing=num_inducing, input_dim=layers[0]) # 2. Pass in kernels, specificy output dims (shared hyperparams/variables) l2_kernel = construct_basic_kernel(kernels=RBF(), output_dim=layers[2], share_hyperparams=True) l2_inducing = construct_basic_inducing_variables( num_inducing=25, input_dim=layers[1], share_variables=True ) # 3. Pass in kernels, specificy output dims (independent hyperparams/vars) # By default and the constructor will make indep. copies l3_kernel = construct_basic_kernel(kernels=RBF(), output_dim=layers[3]) l3_inducing = construct_basic_inducing_variables( num_inducing=25, input_dim=layers[2], output_dim=layers[3] ) # Assemble at the end gp_layers = [ GPLayer(l1_kernel, l1_inducing, num_data), GPLayer(l2_kernel, l2_inducing, num_data), GPLayer(l3_kernel, l3_inducing, num_data, mean_function=Zero()), ] return DeepGP(gp_layers, Gaussian(0.1))
def make_single_layer_models(X, Y, Z): D = X.shape[1] Y_mean, Y_std = np.average(Y), np.std(Y) m_sgpr = SGPR(X, Y, RBF(D, variance=Y_std**2), Z.copy(), mean_function=Constant(Y_mean)) m_svgp = SVGP(X, Y, RBF(D, variance=Y_std**2), Gaussian(), Z.copy(), mean_function=Constant(Y_mean)) m_fitc = GPRFITC(X, Y, RBF(D, variance=Y_std**2), Z.copy(), mean_function=Constant(Y_mean)) for m in [m_sgpr, m_svgp, m_fitc]: m.mean_function.fixed = True m.likelihood.variance = 0.1 * Y_std return m_sgpr, m_svgp, m_fitc
def test_mixed_mok_with_Id_vs_independent_mok(session_tf): data = DataMixedKernelWithEye # Independent model k1 = mk.SharedIndependentMok(RBF(data.D, variance=0.5, lengthscales=1.2), data.L) f1 = InducingPoints(data.X[:data.M, ...].copy()) m1 = SVGP(data.X, data.Y, k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=data.MAXITER) # Mixed Model kern_list = [ RBF(data.D, variance=0.5, lengthscales=1.2) for _ in range(data.L) ] k2 = mk.SeparateMixedMok(kern_list, data.W) f2 = InducingPoints(data.X[:data.M, ...].copy()) m2 = SVGP(data.X, data.Y, k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=data.MAXITER) check_equality_predictions(session_tf, [m1, m2])
def make_dgp(X, Y, Z, L): D = X.shape[1] # the layer shapes are defined by the kernel dims, so here all hidden layers are D dimensional kernels = [] #for l in range(L): kernels.append(RBF(5)) kernels.append(RBF(2)) kernels.append(RBF(9)) # between layer noise (doesn't actually make much difference but we include it anyway) #for kernel in kernels[:-1]: # kernel += White(D, variance=1e-5) mb = 1000 if X.shape[0] > 1000 else None model = DGP(X, Y, Z, kernels, Gaussian(), num_samples=10, minibatch_size=mb) # start the inner layers almost deterministically for layer in model.layers[:-1]: layer.q_sqrt = layer.q_sqrt.value * 1e-5 return model
def test_optimize(self): with defer_build(): input_layer = InputLayer(input_dim=1, output_dim=1, num_inducing=self.M, kernel=RBF(1) + White(1), multitask=True) output_layer = OutputLayer(input_dim=1, output_dim=1, num_inducing=self.M, kernel=RBF(1) + White(1), multitask=True) seq = MultitaskSequential([input_layer, output_layer]) model = MultitaskDSDGP(X=self.X, Y=self.Y, Z=self.Z, layers=seq, likelihood=SwitchedLikelihood( [Gaussian(), Gaussian()]), num_latent=1) model.compile() before = model.compute_log_likelihood() opt = gpflow.train.AdamOptimizer(0.01) opt.minimize(model, maxiter=100) after = model.compute_log_likelihood() self.assertGreaterEqual(after, before)
def test_add_to_full(self): input_layer = InputLayer(2, 2, 10, RBF(2)) hidden_layer_1 = HiddenLayer(2, 2, 10, RBF(2)) hidden_layer_2 = HiddenLayer(2, 2, 10, RBF(2)) hidden_layer_3 = HiddenLayer(3, 2, 10, RBF(3)) output_layer = OutputLayer(2, 2, 10, RBF(2)) # Add hidden layer with correct dimensions with self.subTest(): layer_list = [input_layer, hidden_layer_1] seq = Sequential(layer_list) seq.add(hidden_layer_2) self.assertIs(seq.layers[-1], hidden_layer_2) # Add hidden layer with incorrect dimensions with self.subTest(): layer_list = [input_layer, hidden_layer_1] seq = Sequential(layer_list) with self.assertRaises(AssertionError): seq.add(hidden_layer_3) # Add output layer with correct dimensions with self.subTest(): layer_list = [input_layer, hidden_layer_1] seq = Sequential(layer_list) seq.add(output_layer) self.assertIs(seq.layers[-1], output_layer) # Add hidden layer after output layer with self.subTest(): layer_list = [input_layer, output_layer] seq = Sequential(layer_list) with self.assertRaises(ValueError): seq.add(hidden_layer_1)
def test_multioutput_with_diag_q_sqrt(session_tf): data = DataMixedKernel q_sqrt_diag = np.ones((data.M, data.L)) * 2 q_sqrt = np.repeat(np.eye(data.M)[None, ...], data.L, axis=0) * 2 # L x M x M kern_list = [RBF(data.D) for _ in range(data.L)] k1 = mk.SeparateMixedMok(kern_list, W=data.W) f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy())) m1 = SVGP(data.X, data.Y, k1, Gaussian(), feat=f1, q_mu=data.mu_data, q_sqrt=q_sqrt_diag, q_diag=True) kern_list = [RBF(data.D) for _ in range(data.L)] k2 = mk.SeparateMixedMok(kern_list, W=data.W) f2 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy())) m2 = SVGP(data.X, data.Y, k2, Gaussian(), feat=f2, q_mu=data.mu_data, q_sqrt=q_sqrt, q_diag=False) check_equality_predictions(session_tf, [m1, m2])
def test(self): kern1 = RBF(1) kern2 = RBF(2) lik = Gaussian() X = np.zeros((1, 1)) model = DGP(X, X, X, [kern1, kern2], lik) model.compute_log_likelihood()
def setup_dataset(input_dim: int, num_data: int): lim = [0, 100] kernel = RBF(lengthscales=20) sigma = 0.01 X = np.random.random(size=(num_data, input_dim)) * lim[1] cov = kernel.K(X) + np.eye(num_data) * sigma ** 2 Y = np.random.multivariate_normal(np.zeros(num_data), cov)[:, None] Y = np.clip(Y, -0.5, 0.5) return X, Y
def __init__(self, X, Y, inducing_points, final_inducing_points, hidden_units, units, share_inducing_inputs=True): Model.__init__(self) assert X.shape[0] == Y.shape[0] self.num_data, D_X = X.shape self.D_Y = 1 self.num_samples = 100 kernels = [] for l in range(hidden_units + 1): ks = [] if (l > 0): D = units else: D = D_X if (l < hidden_units): for w in range(units): ks.append( RBF(D, lengthscales=1., variance=1.) + White(D, variance=1e-5)) else: ks.append(RBF(D, lengthscales=1., variance=1.)) kernels.append(ks) self.dims_in = [D_X] + [units] * hidden_units self.dims_out = [units] * hidden_units + [1] q_mus, q_sqrts, Zs, mean_functions = init_layers( X, self.dims_in, self.dims_out, inducing_points, final_inducing_points, share_inducing_inputs) layers = [] for q_mu, q_sqrt, Z, mean_function, kernel in zip( q_mus, q_sqrts, Zs, mean_functions, kernels): layers.append(Layer(kernel, q_mu, q_sqrt, Z, mean_function)) self.layers = ParamList(layers) for layer in self.layers[:-1]: # fix the inner layer mean functions layer.mean_function.fixed = True self.likelihood = Gaussian() minibatch_size = 10000 if X.shape[0] > 10000 else None if minibatch_size is not None: self.X = MinibatchData(X, minibatch_size) self.Y = MinibatchData(Y, minibatch_size) else: self.X = DataHolder(X) self.Y = DataHolder(Y)
def test_separate_independent_mok(session_tf): """ We use different independent kernels for each of the output dimensions. We can achieve this in two ways: 1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof 2) inefficient: SeparateIndependentMok with InducingPoints However, both methods should return the same conditional, and after optimization return the same log likelihood. """ # Model 1 (INefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kern_list_1 = [ RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_1 = mk.SeparateIndependentMok(kern_list_1) feature_1 = InducingPoints(Data.X[:Data.M, ...].copy()) m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) m1.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER) # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list_2 = [ RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P) ] kernel_2 = mk.SeparateIndependentMok(kern_list_2) feature_2 = mf.SharedIndependentMof( InducingPoints(Data.X[:Data.M, ...].copy())) m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) m2.q_mu.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER) check_equality_predictions(session_tf, [m1, m2])
def make_mf_dgp(cls, X, Y, Z, add_linear=True, minibatch_size=None): """ Constructor for convenience. Constructs a mf-dgp model from training data and inducing point locations :param X: List of target :param Y: :param Z: :param add_linear: :return: """ n_fidelities = len(X) Din = X[0].shape[1] Dout = Y[0].shape[1] kernels = [RBF(Din, active_dims=list(range(Din)), variance=1., lengthscales=1, ARD=True)] for l in range(1, n_fidelities): D = Din + Dout D_range = list(range(D)) k_corr = RBF(Din, active_dims=D_range[:Din], lengthscales=1, variance=1.0, ARD=True) k_prev = RBF(Dout, active_dims=D_range[Din:], variance=1., lengthscales=1.0) k_in = RBF(Din, active_dims=D_range[:Din], variance=1., lengthscales=1, ARD=True) if add_linear: k_l = k_corr * (k_prev + Linear(Dout, active_dims=D_range[Din:], variance=1.)) + k_in else: k_l = k_corr * k_prev + k_in kernels.append(k_l) """ A White noise kernel is currently expected by Mf-DGP at all layers except the last. In cases where no noise is desired, this should be set to 0 and fixed, as follows: white = White(1, variance=0.) white.variance.trainable = False kernels[i] += white """ for i, kernel in enumerate(kernels[:-1]): kernels[i] += White(1, variance=1e-6) num_data = 0 for i in range(len(X)): _log.info('\nData at Fidelity {}'.format(i + 1)) _log.info('X - {}'.format(X[i].shape)) _log.info('Y - {}'.format(Y[i].shape)) _log.info('Z - {}'.format(Z[i].shape)) num_data += X[i].shape[0] layers = init_layers_mf(Y, Z, kernels, num_outputs=Dout) model = DGP_Base(X, Y, Gaussian(), layers, num_samples=10, minibatch_size=minibatch_size) return model
def test_initialize_params(self): input_layer = InputLayer(2, 2, 10, RBF(2)) hidden_layer_1 = HiddenLayer(2, 2, 10, RBF(2)) output_layer = OutputLayer(2, 1, 10, RBF(2)) Z = np.ones((10, 2)) X = np.ones((100, 2)) seq = Sequential([input_layer, hidden_layer_1, output_layer]) seq.initialize_params(X, Z) self.assertTrue(np.allclose(Z, seq.layers[0].feature.Z.value))
def make_DGP(L, D_problem, D_hidden, X, Y, Z): kernels = [] # First layer kernels.append(RBF(D_problem, lengthscales=0.2, variance=1.) + White(D_problem, variance=1e-5)) for l in range(L-1): k = RBF(D_hidden, lengthscales=0.2, variance=1.) + White(D_hidden, variance=1e-5) kernels.append(k) m_dgp = DGP(X, Y, Z, kernels, Gaussian(), num_samples=10) # init the layers to near determinisic for layer in m_dgp.layers[:-1]: layer.q_sqrt = layer.q_sqrt.value * 1e-5 return m_dgp
def make_dgp(L): # kernels = [ckern.WeightedColourPatchConv(RBF(25*1, lengthscales=10., variance=10.), [28, 28], [5, 5], colour_channels=1)] kernels = [RBF(784,lengthscales=10., variance=10.)] for l in range(L-1): kernels.append(RBF(50, lengthscales=10., variance=10.)) model = DGP(X, Y, Z, kernels, gpflow.likelihoods.MultiClass(num_classes), minibatch_size=minibatch_size, num_outputs=num_classes, dropout = 0.0) # start things deterministic for layer in model.layers[:-1]: layer.q_sqrt = layer.q_sqrt.value * 1e-5 return model
def test_dims(self): input_layer = InputLayer(2, 3, 10, RBF(2)) hidden_layer_1 = HiddenLayer(3, 2, 10, RBF(2)) hidden_layer_2 = HiddenLayer(2, 1, 10, RBF(2)) hidden_layer_3 = HiddenLayer(1, 2, 10, RBF(3)) output_layer = OutputLayer(2, 1, 10, RBF(2)) layer_list = [ input_layer, hidden_layer_1, hidden_layer_2, hidden_layer_3, output_layer ] seq = Sequential(layer_list) dims = seq.get_dims() reference = [(2, 3), (3, 2), (2, 1), (1, 2), (2, 1)] self.assertEqual(dims, reference)
def make_kern(): k = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1.0, ARD=True) k.variance.set_trainable(False) return k
def make_dgp(X, Y, Z, L): D = X.shape[1] Y_mean, Y_std = np.average(Y), np.std(Y) # the layer shapes are defined by the kernel dims, so here all hidden layers are D dimensional kernels = [] for l in range(L): kernels.append(RBF(D, lengthscales=1., variance=1.)) # between layer noise (doesn't actually make much difference but we include it anyway) for kernel in kernels[:-1]: kernel += White(D, variance=1e-5) mb = 10000 if X.shape[0] > 10000 else None model = DGP(X, Y, Z, kernels, Gaussian(), num_samples=1, minibatch_size=mb) # same final layer inits we used for the single layer model model.layers[-1].kern.variance = Y_std**2 model.likelihood.variance = Y_std * 0.1 model.layers[-1].mean_function = Constant(Y_mean) model.layers[-1].mean_function.fixed = True # start the inner layers almost deterministically for layer in model.layers[:-1]: layer.q_sqrt = layer.q_sqrt.value * 1e-5 return model
def setUp(self): self.rng = np.random.RandomState(42) input_dim = 2 output_dim = 2 kern_list = [RBF(2) for _ in range(output_dim)] mean_function = None self.Z = self.rng.randn(5, 2) num_inducing = 5 self.layer = MultikernelOutputLayer(input_dim=input_dim, output_dim=output_dim, num_inducing=num_inducing, kernel_list=kern_list, share_Z=False, mean_function=mean_function) self.layer_shared_Z = MultikernelOutputLayer( input_dim=input_dim, output_dim=output_dim, num_inducing=num_inducing, kernel_list=kern_list, share_Z=True, mean_function=mean_function) self.X = self.rng.randn(10, 2)
def get_non_linear_input_dependent_kernel(original_X, current_X): X_dim = original_X.shape[1] Y_dim = current_X.shape[1] - X_dim k1 = RBF(input_dim=X_dim, active_dims=list(range(X_dim))) k2 = RationalQuadratic(input_dim=X_dim + Y_dim, active_dims=list(range(0, X_dim + Y_dim))) return k1 + k2
def map_base_kernel(s, dim, init_hyper_fixed): if init_hyper_fixed: # TODO: ARD # if dim == 1: if s == 's': k = RBF(dim, lengthscales=1, variance=0.5) elif s == 'r': k = RQ(dim, lengthscales=1, variance=0.5, alpha=0.5) elif s == 'p': k = Per(dim, period=1, lengthscales=0.1, variance=0.5) else: k = Lin(dim, variance=0.5) # else: # if s == 's': # k = RBF(dim, lengthscales=1 * np.ones(dim), variance=0.5) # elif s == 'r': # k = RQ(dim, lengthscales=1 * np.ones(dim), variance=0.5, alpha=0.5) # elif s == 'p': # k = Per(dim, period=1, lengthscales=0.1, variance=0.5) # else: # k = Lin(dim, variance=0.5 * np.ones(dim)) else: if dim == 1: # this is for reusing hypers of trained models if s == 's': k = RBF(dim, lengthscales=rnd.ranf() * 5) elif s == 'r': k = RQ(dim, lengthscales=rnd.ranf() * 5) elif s == 'p': k = Per(dim, period=rnd.ranf() * 5, lengthscales=rnd.ranf() * 5) else: k = Lin(dim, variance=rnd.ranf() * 10) else: if s == 's': k = RBF(dim, lengthscales=rnd.ranf(dim) * 5) elif s == 'r': k = RQ(dim, lengthscales=rnd.ranf(dim) * 5) elif s == 'p': k = Per(dim, period=rnd.ranf() * 5, lengthscales=rnd.ranf() * 5) else: k = Lin(dim, variance=rnd.ranf(dim) * 10) return k
def test_sample_conditional_mixedkernel(session_tf): q_mu = np.random.randn(Data.M, Data.L) # M x L q_sqrt = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.L) ]) # L x M x M Z = Data.X[:Data.M, ...] # M x D N = int(10e5) Xs = np.ones((N, Data.D), dtype=float_type) values = {"Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt} placeholders = _create_placeholder_dict(values) feed_dict = _create_feed_dict(placeholders, values) # Path 1: mixed kernel: most efficient route W = np.random.randn(Data.P, Data.L) mixed_kernel = mk.SeparateMixedMok([RBF(Data.D) for _ in range(Data.L)], W) mixed_feature = mf.MixedKernelSharedMof(InducingPoints(Z.copy())) sample = sample_conditional(placeholders["Xnew"], mixed_feature, mixed_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value = session_tf.run(sample, feed_dict=feed_dict) # Path 2: independent kernels, mixed later separate_kernel = mk.SeparateIndependentMok( [RBF(Data.D) for _ in range(Data.L)]) shared_feature = mf.SharedIndependentMof(InducingPoints(Z.copy())) sample2 = sample_conditional(placeholders["Xnew"], shared_feature, separate_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value2 = session_tf.run(sample2, feed_dict=feed_dict) value2 = np.matmul(value2, W.T) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value, axis=0), np.mean(value2, axis=0), decimal=1) np.testing.assert_array_almost_equal(np.cov(value, rowvar=False), np.cov(value2, rowvar=False), decimal=1)
def setup_dataset(input_dim: int, num_data: int): lim = [0, 100] kernel = RBF(lengthscales=20) sigma = 0.01 X = np.random.uniform(low=lim[0], high=lim[1], size=(num_data, input_dim)) cov = kernel(X) + np.eye(num_data) * sigma ** 2 Y = np.random.multivariate_normal(np.zeros(num_data), cov)[:, None] Y = np.clip(Y, -0.5, 0.5).astype(np.float64) return X, Y
def make_data(input_dim: int, output_dim: int, num_data: int): lim = [0, 20] sigma = 0.1 X = np.random.random(size=(num_data, input_dim)) * lim[1] cov = RBF().K(X) + np.eye(num_data) * sigma ** 2 Y = [np.random.multivariate_normal(np.zeros(num_data), cov)[:, None] for _ in range(output_dim)] Y = np.hstack(Y) return X, Y
def make_dgp(num_layers, X, Y, Z): kernels = [RBF(variance=2.0, lengthscales=2.0)] layer_sizes = [784] for l in range(num_layers - 1): kernels.append(RBF(variance=2.0, lengthscales=2.0)) layer_sizes.append(30) model = DeepGP(X, Y, Z, kernels, layer_sizes, MultiClass(10), num_outputs=10) # init hidden layers to be near deterministic for layer in model.layers[:-1]: layer.q_sqrt.assign(layer.q_sqrt * 1e-5) return model
def get_linear_kernel(original_X, current_X): X_dim = original_X.shape[1] Y_dim = current_X.shape[1] - X_dim k1 = RBF(input_dim=X_dim, active_dims=list(range(X_dim))) if Y_dim > 0: k_linear = Linear(input_dim=Y_dim, active_dims=list(range(X_dim, X_dim + Y_dim))) return k1 + k_linear return k1
def generate_data(num_functions=10, N=1000): jitter = 1e-6 Xs = np.linspace(-5.0, 5.0, N)[:, None] kernel = RBF(lengthscales=1.0) cov = kernel(Xs) L = np.linalg.cholesky(cov + np.eye(N) * jitter) epsilon = np.random.randn(N, num_functions) F = np.sin(Xs) + np.matmul(L, epsilon) return Xs, F
def prepare(self): N = 100 M = 10 rng = np.random.RandomState(42) X = rng.randn(N, 2) Y = rng.randn(N, 1) Z = rng.randn(M, 2) X_ind = rng.randint(0, 2, (N, 1)) Z_ind = rng.randint(0, 2, (M, 1)) X = np.hstack([X, X_ind]) Y = np.hstack([Y, X_ind]) Z = np.hstack([Z, Z_ind]) Xs = rng.randn(M, 2) Xs_ind = rng.randint(0, 2, (M, 1)) Xs = np.hstack([Xs, Xs_ind]) with defer_build(): lik = SwitchedLikelihood([Gaussian(), Gaussian()]) input_layer = InputLayer(input_dim=2, output_dim=1, num_inducing=M, kernel=RBF(2) + White(2), mean_function=Linear(A=np.ones((3, 1))), multitask=True) output_layer = OutputLayer(input_dim=1, output_dim=1, num_inducing=M, kernel=RBF(1) + White(1), multitask=True) seq = MultitaskSequential([input_layer, output_layer]) model = MultitaskDSDGP(X=X, Y=Y, Z=Z, layers=seq, likelihood=lik, num_latent=1) model.compile() return model, Xs
def test_sample_conditional(session_tf, whiten, full_cov, full_output_cov): q_mu = np.random.randn(Data.M, Data.P) # M x P q_sqrt = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M Z = Data.X[:Data.M, ...] # M x D Xs = np.ones((Data.N, Data.D), dtype=float_type) feature = InducingPoints(Z.copy()) kernel = RBF(Data.D) values = {"Z": Z, "Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt} placeholders = _create_placeholder_dict(values) feed_dict = _create_feed_dict(placeholders, values) # Path 1 sample_f = sample_conditional(placeholders["Xnew"], feature, kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=whiten, full_cov=full_cov, full_output_cov=full_output_cov, num_samples=int(1e5)) value_f, mean_f, var_f = session_tf.run(sample_f, feed_dict=feed_dict) value_f = value_f.reshape((-1, ) + value_f.shape[2:]) # Path 2 if full_output_cov: pytest.skip( "sample_conditional with X instead of feature does not support full_output_cov" ) sample_x = sample_conditional(placeholders["Xnew"], placeholders["Z"], kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=whiten, full_cov=full_cov, full_output_cov=full_output_cov, num_samples=int(1e5)) value_x, mean_x, var_x = session_tf.run(sample_x, feed_dict=feed_dict) value_x = value_x.reshape((-1, ) + value_x.shape[2:]) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value_x, axis=0), np.mean(value_f, axis=0), decimal=1) np.testing.assert_array_almost_equal(np.cov(value_x, rowvar=False), np.cov(value_f, rowvar=False), decimal=1) np.testing.assert_allclose(mean_x, mean_f) np.testing.assert_allclose(var_x, var_f)