def test_multioutput_with_diag_q_sqrt(session_tf): data = DataMixedKernel q_sqrt_diag = np.ones((data.M, data.L)) * 2 q_sqrt = np.repeat(np.eye(data.M)[None, ...], data.L, axis=0) * 2 # L x M x M kern_list = [RBF(data.D) for _ in range(data.L)] k1 = mk.SeparateMixedMok(kern_list, W=data.W) f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy())) m1 = SVGP(data.X, data.Y, k1, Gaussian(), feat=f1, q_mu=data.mu_data, q_sqrt=q_sqrt_diag, q_diag=True) kern_list = [RBF(data.D) for _ in range(data.L)] k2 = mk.SeparateMixedMok(kern_list, W=data.W) f2 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy())) m2 = SVGP(data.X, data.Y, k2, Gaussian(), feat=f2, q_mu=data.mu_data, q_sqrt=q_sqrt, q_diag=False) check_equality_predictions(session_tf, [m1, m2])
def test_compare_mixed_kernel(session_tf): data = DataMixedKernel kern_list = [RBF(data.D) for _ in range(data.L)] k1 = mk.SeparateMixedMok(kern_list, W=data.W) f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy())) m1 = SVGP(data.X, data.Y, k1, Gaussian(), feat=f1, q_mu=data.mu_data, q_sqrt=data.sqrt_data) kern_list = [RBF(data.D) for _ in range(data.L)] k2 = mk.SeparateMixedMok(kern_list, W=data.W) f2 = mf.MixedKernelSharedMof(InducingPoints(data.X[:data.M, ...].copy())) m2 = SVGP(data.X, data.Y, k2, Gaussian(), feat=f2, q_mu=data.mu_data, q_sqrt=data.sqrt_data) check_equality_predictions(session_tf, [m1, m2])
def test_mixed_mok_with_Id_vs_independent_mok(session_tf): data = DataMixedKernelWithEye # Independent model k1 = mk.SharedIndependentMok(RBF(data.D, variance=0.5, lengthscales=1.2), data.L) f1 = InducingPoints(data.X[:data.M, ...].copy()) m1 = SVGP(data.X, data.Y, k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) m1.set_trainable(False) m1.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m1, maxiter=data.MAXITER) # Mixed Model kern_list = [ RBF(data.D, variance=0.5, lengthscales=1.2) for _ in range(data.L) ] k2 = mk.SeparateMixedMok(kern_list, data.W) f2 = InducingPoints(data.X[:data.M, ...].copy()) m2 = SVGP(data.X, data.Y, k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) m2.set_trainable(False) m2.q_sqrt.set_trainable(True) gpflow.training.ScipyOptimizer().minimize(m2, maxiter=data.MAXITER) check_equality_predictions(session_tf, [m1, m2])
def _build_model(self, Y_var, freqs, X, Y, kern_params=None, Z=None, q_mu = None, q_sqrt = None, M=None, P=None, L=None, W=None, num_data=None, jitter=1e-6, tec_scale=None, W_trainable=False, use_mc=False, **kwargs): """ Build the model from the data. X,Y: tensors the X and Y of data Returns: gpflow.models.Model """ settings.numerics.jitter = jitter with gp.defer_build(): # Define the likelihood likelihood = ComplexHarmonicPhaseOnlyGaussianEncodedHetero(tec_scale=tec_scale) # likelihood.variance = 0.3**2#(5.*np.pi/180.)**2 # likelihood_var = log_normal_solve((5.*np.pi/180.)**2, 0.5*(5.*np.pi/180.)**2) # likelihood.variance.prior = LogNormal(likelihood_var[0],likelihood_var[1]**2) # likelihood.variance.transform = gp.transforms.positiveRescale(np.exp(likelihood_var[0])) likelihood.variance.trainable = False q_mu = q_mu/tec_scale #M, L q_sqrt = q_sqrt/tec_scale# L, M, M kern = mk.SeparateMixedMok([self._build_kernel(None, None, None, #kern_params[l].w, kern_params[l].mu, kern_params[l].v, kern_var = np.var(q_mu[:,l]), **kwargs.get("priors",{})) for l in range(L)], W) kern.W.trainable = W_trainable kern.W.prior = gp.priors.Gaussian(W, 0.01**2) feature = mf.MixedKernelSeparateMof([InducingPoints(Z) for _ in range(L)]) mean = Zero() model = HeteroscedasticPhaseOnlySVGP(Y_var, freqs, X, Y, kern, likelihood, feat = feature, mean_function=mean, minibatch_size=None, num_latent = P, num_data = num_data, whiten = False, q_mu = None, q_sqrt = None, q_diag = True) for feat in feature.feat_list: feat.Z.trainable = True #True model.q_mu.trainable = True model.q_mu.prior = gp.priors.Gaussian(0., 0.05**2) model.q_sqrt.trainable = True # model.q_sqrt.prior = gp.priors.Gaussian(0., (0.005/tec_scale)**2) model.compile() tf.summary.image('W',kern.W.constrained_tensor[None,:,:,None]) tf.summary.image('q_mu',model.q_mu.constrained_tensor[None,:,:,None]) # tf.summary.image('q_sqrt',model.q_sqrt.constrained_tensor[:,:,:,None]) return model
def test_MixedMok_Kgg(session_tf): data = DataMixedKernel kern_list = [RBF(data.D) for _ in range(data.L)] kern = mk.SeparateMixedMok(kern_list, W=data.W) Kgg = kern.compute_Kgg(Data.X, Data.X) # L x N x N Kff = kern.compute_K(Data.X, Data.X) # N x P x N x P # Kff = W @ Kgg @ W^T Kff_infered = np.einsum("lnm,pl,ql->npmq", Kgg, data.W, data.W) np.testing.assert_array_almost_equal(Kff, Kff_infered, decimal=5)
def init(): feature = mf.SharedIndependentMof(gpflow.features.InducingPoints(Z.copy())) #Define POI kernels poi_list = [0, 1, 2, 3, 4] kern_list = [] for i in range(len(poi_list)): kern = POI(effects=0.5, lengthscale=5, input_dim=D, locs_poi=locs_poi, typeIndicator=typeIndicator, typIdx=poi_list[i], locs=Z[:, 0:2], mindist=typeMinDist[i], name=str(poi_list[i]), kernel_type="Linear") kern_list.append(kern) #Add spatial kernel kern_spatial = gpflow.kernels.Matern32(input_dim=D, lengthscales=100) #Define kernel list kern_list.append(kern_spatial) L = len(kern_list) W = np.ones((L, 1)) W_t = np.transpose(W) kernel = mk.SeparateMixedMok(kern_list, W=W_t) #Define linear mean function #mean_fct = SlicedLinear(A = theta, p=p) mean_fct = SlicedNN(p=p) q_mu = np.random.normal(0.0, 1, (M, L)) q_sqrt = np.repeat(np.eye(M)[None, ...], L, axis=0) * 1.0 m = gpflow.models.SVGP( X=X_train, Y=y_train, kern=kernel, likelihood=gpflow.likelihoods.Gaussian(), feat=feature, whiten=True, #minibatch_size=len(X_train), mean_function=mean_fct, # + mean_poi, q_mu=q_mu, q_sqrt=q_sqrt, name='svgp') m.likelihood.variance = 0.01 #Initialze params m.feature.trainable = False m.kern.W.trainable = False return m
def _build_model(self, Y_var, X, Y, Z=None, q_mu = None, q_sqrt = None, M=None, P=None, L=None, W=None, num_data=None, jitter=1e-6, tec_scale=None, W_diag=False, **kwargs): """ Build the model from the data. X,Y: tensors the X and Y of data Returns: gpflow.models.Model """ settings.numerics.jitter = jitter with gp.defer_build(): # Define the likelihood likelihood = GaussianTecHetero(tec_scale=tec_scale) q_mu = q_mu/tec_scale #M, L q_sqrt = q_sqrt/tec_scale# L, M, M kern = mk.SeparateMixedMok([self._build_kernel(kern_var = np.var(q_mu[:,l]), **kwargs.get("priors",{})) for l in range(L)], W) if W_diag: # kern.W.transform = Reshape(W.shape,(P,L,L))(gp.transforms.DiagMatrix(L)(gp.transforms.positive)) kern.W.trainable = False else: kern.W.transform = Reshape(W.shape,(P//L,L,L))(MatrixSquare()(gp.transforms.LowerTriangular(L,P//L))) kern.W.trainable = True feature = mf.MixedKernelSeparateMof([InducingPoints(Z) for _ in range(L)]) mean = Zero() model = HeteroscedasticTecSVGP(Y_var, X, Y, kern, likelihood, feat = feature, mean_function=mean, minibatch_size=None, num_latent = P, num_data = num_data, whiten = False, q_mu = q_mu, q_sqrt = q_sqrt) for feat in feature.feat_list: feat.Z.trainable = True model.q_mu.trainable = True model.q_sqrt.trainable = True # model.q_sqrt.prior = gp.priors.Gaussian(q_sqrt, 0.005**2) model.compile() tf.summary.image('W',kern.W.constrained_tensor[None,:,:,None]) tf.summary.image('q_mu',model.q_mu.constrained_tensor[None,:,:,None]) tf.summary.image('q_sqrt',model.q_sqrt.constrained_tensor[:,:,:,None]) return model
def test_sample_conditional_mixedkernel(session_tf): q_mu = np.random.randn(Data.M, Data.L) # M x L q_sqrt = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.L) ]) # L x M x M Z = Data.X[:Data.M, ...] # M x D N = int(10e5) Xs = np.ones((N, Data.D), dtype=float_type) values = {"Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt} placeholders = _create_placeholder_dict(values) feed_dict = _create_feed_dict(placeholders, values) # Path 1: mixed kernel: most efficient route W = np.random.randn(Data.P, Data.L) mixed_kernel = mk.SeparateMixedMok([RBF(Data.D) for _ in range(Data.L)], W) mixed_feature = mf.MixedKernelSharedMof(InducingPoints(Z.copy())) sample = sample_conditional(placeholders["Xnew"], mixed_feature, mixed_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value = session_tf.run(sample, feed_dict=feed_dict) # Path 2: independent kernels, mixed later separate_kernel = mk.SeparateIndependentMok( [RBF(Data.D) for _ in range(Data.L)]) shared_feature = mf.SharedIndependentMof(InducingPoints(Z.copy())) sample2 = sample_conditional(placeholders["Xnew"], shared_feature, separate_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value2 = session_tf.run(sample2, feed_dict=feed_dict) value2 = np.matmul(value2, W.T) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value, axis=0), np.mean(value2, axis=0), decimal=1) np.testing.assert_array_almost_equal(np.cov(value, rowvar=False), np.cov(value2, rowvar=False), decimal=1)
def build_default_kernel(n_dims, n_kernels, n_outputs, add_bias=False, w_prior=0.1, kern_var_trainable=False, rbf_var=0.1, bias_var=0.1): L = n_kernels D = n_dims P = n_outputs with gpf.defer_build(): # Use some sensible defaults kern_list = [ gpf.kernels.RBF(D, ARD=True, variance=1.0) for _ in range(L) ] for cur_kern in kern_list: cur_kern.lengthscales.prior = gpf.priors.Gamma(3, 3) cur_kern.variance = rbf_var cur_kern.variance.set_trainable(kern_var_trainable) if add_bias: kern_list[-1] = gpf.kernels.Bias(D) kern_list[-1].variance = bias_var kern_list[-1].variance.set_trainable(kern_var_trainable) W_init = np.random.randn(P, L) kernel = mk.SeparateMixedMok(kern_list, W_init) kernel.W.prior = gpf.priors.Gaussian(0, w_prior) return kernel
def test_conditional_broadcasting(session_tf, full_cov, white, conditional_type): """ Test that the `conditional` and `sample_conditional` broadcasts correctly over leading dimensions of Xnew. Xnew can be shape [..., N, D], and conditional should broadcast over the [...]. """ X_ = tf.placeholder(tf.float64, [None, None]) q_mu = np.random.randn(Data.M, Data.Dy) q_sqrt = np.tril(np.random.randn(Data.Dy, Data.M, Data.M), -1) if conditional_type == "Z": feat = Data.Z kern = gpflow.kernels.Matern52(Data.Dx, lengthscales=0.5) elif conditional_type == "inducing_points": feat = gpflow.features.InducingPoints(Data.Z) kern = gpflow.kernels.Matern52(Data.Dx, lengthscales=0.5) elif conditional_type == "mixing": # variational params have different output dim in this case q_mu = np.random.randn(Data.M, Data.L) q_sqrt = np.tril(np.random.randn(Data.L, Data.M, Data.M), -1) feat = mf.MixedKernelSharedMof(gpflow.features.InducingPoints(Data.Z)) kern = mk.SeparateMixedMok(kernels=[ gpflow.kernels.Matern52(Data.Dx, lengthscales=0.5) for _ in range(Data.L) ], W=Data.W) if conditional_type == "mixing" and full_cov: pytest.skip("combination is not implemented") num_samples = 5 sample_tf, mean_tf, cov_tf = sample_conditional( X_, feat, kern, tf.convert_to_tensor(q_mu), q_sqrt=tf.convert_to_tensor(q_sqrt), white=white, full_cov=full_cov, num_samples=num_samples) ss, ms, vs = [], [], [] for X in Data.SX: s, m, v = session_tf.run([sample_tf, mean_tf, cov_tf], {X_: X}) ms.append(m) vs.append(v) ss.append(s) ms = np.array(ms) vs = np.array(vs) ss = np.array(ss) ss_S12, ms_S12, vs_S12 = session_tf.run( sample_conditional(Data.SX, feat, kern, tf.convert_to_tensor(q_mu), q_sqrt=tf.convert_to_tensor(q_sqrt), white=white, full_cov=full_cov, num_samples=num_samples)) ss_S1_S2, ms_S1_S2, vs_S1_S2 = session_tf.run( sample_conditional(Data.S1_S2_X, feat, kern, tf.convert_to_tensor(q_mu), q_sqrt=tf.convert_to_tensor(q_sqrt), white=white, full_cov=full_cov, num_samples=num_samples)) assert_allclose(ss_S12.shape, ss.shape) assert_allclose(ms_S12, ms) assert_allclose(vs_S12, vs) assert_allclose(ms_S1_S2.reshape(Data.S1 * Data.S2, Data.N, Data.Dy), ms) assert_allclose(ss_S1_S2.shape, [Data.S1, Data.S2, num_samples, Data.N, Data.Dy]) if full_cov: assert_allclose( vs_S1_S2.reshape(Data.S1 * Data.S2, Data.Dy, Data.N, Data.N), vs) else: assert_allclose(vs_S1_S2.reshape(Data.S1 * Data.S2, Data.N, Data.Dy), vs)
def _make_part_model(self, X, Y, weights, Z, q_mu, q_sqrt, W, freqs, minibatch_size=None, priors=None): """ Create a gpflow model for a selection of data X: array (N, Din) Y: array (N, P, Nf) weights: array like Y the statistical weights of each datapoint minibatch_size : int Z: list of array (M, Din) The inducing points mean locations. q_mu: list of array (M, L) q_sqrt: list of array (L, M, M) W: array [P,L] freqs: array [Nf,] the freqs priors : dict of priors for the global model Returns: model : gpflow.models.Model """ N, P, Nf = Y.shape _, Din = X.shape assert priors is not None likelihood_var = priors['likelihood_var'] tec_kern_time_ls = priors['tec_kern_time_ls'] tec_kern_dir_ls = priors['tec_kern_dir_ls'] tec_kern_var = priors['tec_kern_var'] tec_mean = priors['tec_mean'] Z_var = priors['Z_var'] P, L = W.shape with defer_build(): # Define the likelihood likelihood = WrappedPhaseGaussianMulti( tec_scale=priors['tec_scale'], freqs=freqs) likelihood.variance = np.exp(likelihood_var[0]) #median as initial likelihood.variance.prior = LogNormal(likelihood_var[0], likelihood_var[1]**2) likelihood.variance.set_trainable(True) def _kern(): kern_thin_layer = ThinLayer(np.array([0., 0., 0.]), priors['tec_scale'], active_dims=slice(2, 6, 1)) kern_time = Matern32(1, active_dims=slice(6, 7, 1)) kern_dir = Matern32(2, active_dims=slice(0, 2, 1)) ### # time kern kern_time.lengthscales = np.exp(tec_kern_time_ls[0]) kern_time.lengthscales.prior = LogNormal( tec_kern_time_ls[0], tec_kern_time_ls[1]**2) kern_time.lengthscales.set_trainable(True) kern_time.variance = 1. #np.exp(tec_kern_var[0]) #kern_time.variance.prior = LogNormal(tec_kern_var[0],tec_kern_var[1]**2) kern_time.variance.set_trainable(False) # ### # directional kern kern_dir.variance = np.exp(tec_kern_var[0]) kern_dir.variance.prior = LogNormal(tec_kern_var[0], tec_kern_var[1]**2) kern_dir.variance.set_trainable(True) kern_dir.lengthscales = np.exp(tec_kern_dir_ls[0]) kern_dir.lengthscales.prior = LogNormal( tec_kern_dir_ls[0], tec_kern_dir_ls[1]**2) kern_dir.lengthscales.set_trainable(True) kern = kern_dir * kern_time #(kern_thin_layer + kern_dir)*kern_time return kern kern = mk.SeparateMixedMok([_kern() for _ in range(L)], W) feature_list = [] for _ in range(L): feat = InducingPoints(Z) #feat.Z.prior = Gaussian(Z,Z_var) feature_list.append(feat) feature = mf.MixedKernelSeparateMof(feature_list) mean = Zero() model = HomoscedasticPhaseOnlySVGP(weights, X, Y, kern, likelihood, feat=feature, mean_function=mean, minibatch_size=minibatch_size, num_latent=P, num_data=N, whiten=False, q_mu=q_mu, q_sqrt=q_sqrt) model.compile() return model
np.random.seed(23) X = np.random.uniform(low=0., high=1., size=[300, 100]) Y = objective.f(X, fulldim=False, noisy=True) def _kern(): return gpflow.kernels.Matern32(input_dim=D, ARD=True, lengthscales=np.ones(shape=[D]) * 0.2) np.random.seed(123) with gpflow.defer_build(): W = np.random.normal(loc=0., scale=1., size=[output, rank]) kernels = mk.SeparateMixedMok([_kern() for _ in range(rank)], W) # kernels = mk.SharedIndependentMok(gpflow.kernels.Matern32(input_dim=D, ARD=True, lengthscales=np.ones(shape=[D]) * 0.2), output) feature_list = [ gpflow.features.InducingPoints(Xnn[:M, :]) for r in range(rank) ] feature = mf.MixedKernelSeparateMof(feature_list) # feature = mf.MixedKernelSharedMof(gpflow.features.InducingPoints(Xnn[:M,...].copy())) # feature = mf.SharedIndependentMof(gpflow.features.InducingPoints(Xnn[:M,...].copy())) q_mu = np.zeros((M, rank)) q_sqrt = np.repeat(np.eye(M)[None, ...], rank, axis=0) * 1.0 likelihood = gpflow.likelihoods.Gaussian() likelihood.variance = 0.01
# start = start[:, 0:2] # predict = predict[:, 0:2] D = start.shape[1] # number of input dimensions M = 20 # number of inducing points L = 1 # number of latent GPs P = predict.shape[1] # number of observations = output dimensions MAXITER = gpflow.test_util.notebook_niter(int(1e100)) q_mu = np.zeros((M, L)) q_sqrt = np.repeat(np.eye(M)[None, ...], L, axis=0) * 1.0 kern_list = [ gpflow.kernels.RBF(D) + gpflow.kernels.Linear(D) for _ in range(L) ] kernel = mk.SeparateMixedMok(kern_list, W=np.random.randn(P, L)) feature = mf.MixedKernelSharedMof( gpflow.features.InducingPoints(start[:M, ...].copy())) m = gpflow.models.SVGP(start, predict, kernel, gpflow.likelihoods.Gaussian(), feat=feature, q_mu=q_mu, q_sqrt=q_sqrt) opt = gpflow.train.ScipyOptimizer() opt.minimize(m, disp=True, maxiter=MAXITER) saver = gpflow.Saver() saver.save('./model/multioutput.mdl', m)
def separate_mixed(self, num=Datum.L): return mk.SeparateMixedMok(make_kernels(num), Datum.W)