def __init__(self, kernel, inducing_variables, mean_function, white=False, **kwargs): super().__init__(**kwargs) self.inducing_points = inducing_variables self.num_inducing = inducing_variables.shape[0] m = inducing_variables.shape[1] # Initialise q_mu to y^2_pi(i) q_mu = np.zeros((self.num_inducing, 1)) self.q_mu = Parameter(q_mu, dtype=default_float()) # Initialise q_sqrt to near deterministic. Store as lower triangular matrix L. q_sqrt = 1e-4 * np.eye(self.num_inducing, dtype=default_float()) self.q_sqrt = Parameter(q_sqrt, transform=triangular()) self.kernel = kernel self.mean_function = mean_function self.white = white # Initialise to prior (Ku) + jitter. if not self.white: Ku = self.kernel(self.inducing_points) Ku += default_jitter() * tf.eye(self.num_inducing, dtype=Ku.dtype) Lu = tf.linalg.cholesky(Ku) q_sqrt = Lu self.q_sqrt = Parameter(q_sqrt, transform=triangular())
def __init__(self, data: RegressionData, kernel, noise_variance: float = 1.0, parallel=False, max_parallel=10000): self.noise_variance = Parameter(noise_variance, transform=positive()) ts, ys = data_input_to_tensor(data) super().__init__(kernel, None, None, num_latent_gps=ys.shape[-1]) self.data = ts, ys filter_spec = kernel.get_spec(ts.shape[0]) filter_ys_spec = tf.TensorSpec((ts.shape[0], 1), config.default_float()) smoother_spec = kernel.get_spec(None) smoother_ys_spec = tf.TensorSpec((None, 1), config.default_float()) if not parallel: self._kf = tf.function( partial(kf, return_loglikelihood=True, return_predicted=False), input_signature=[filter_spec, filter_ys_spec]) self._kfs = tf.function( kfs, input_signature=[smoother_spec, smoother_ys_spec]) else: self._kf = tf.function( partial(pkf, return_loglikelihood=True, max_parallel=ts.shape[0]), input_signature=[filter_spec, filter_ys_spec]) self._kfs = tf.function( partial(pkfs, max_parallel=max_parallel), input_signature=[smoother_spec, smoother_ys_spec])
def test_robust_max_multiclass_symmetric(num_classes, num_points, tol, epsilon): """ This test is based on the observation that for symmetric inputs the class predictions must have equal probability. """ rng = np.random.RandomState(1) p = 1. / num_classes F = tf.ones((num_points, num_classes), dtype=default_float()) Y = tf.convert_to_tensor(rng.randint(num_classes, size=(num_points, 1)), dtype=default_float()) likelihood = MultiClass(num_classes) likelihood.invlink.epsilon = tf.convert_to_tensor(epsilon, dtype=default_float()) mu, _ = likelihood.predict_mean_and_var(F, F) pred = likelihood.predict_density(F, F, Y) variational_expectations = likelihood.variational_expectations(F, F, Y) expected_mu = (p * (1. - epsilon) + (1. - p) * epsilon / (num_classes - 1)) * np.ones((num_points, 1)) expected_log_density = np.log(expected_mu) # assert_allclose() would complain about shape mismatch assert (np.allclose(mu, expected_mu, tol, tol)) assert (np.allclose(pred, expected_log_density, 1e-3, 1e-3)) validation_variational_expectation = (p * np.log(1. - epsilon) + (1. - p) * np.log(epsilon / (num_classes - 1))) assert_allclose( variational_expectations, np.ones((num_points, 1)) * validation_variational_expectation, tol, tol)
def test_scipy__disconnected_variable(compile: bool, allow_unused_variables: bool) -> None: target1 = [0.2, 0.8] v1 = tf.Variable([0.5, 0.5], dtype=default_float(), name="v1") v2 = tf.Variable([0.5], dtype=default_float(), name="v2") def f() -> tf.Tensor: # v2 not used. return tf.reduce_sum((target1 - v1) ** 2) opt = gpflow.optimizers.Scipy() if allow_unused_variables: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") result = opt.minimize( f, [v1, v2], compile=compile, allow_unused_variables=allow_unused_variables ) (warning,) = w msg = warning.message.args[0] assert v2.name in msg assert result.success np.testing.assert_allclose(target1 + [0.5], result.x) np.testing.assert_allclose(target1, v1) np.testing.assert_allclose([0.5], v2) else: with pytest.raises(ValueError, match=v2.name): opt.minimize(f, [v1, v2], allow_unused_variables=allow_unused_variables)
def sample_from_conditional(self, X, z=None, full_cov=False): """Computes self.conditional and draws a sample using the reparameterisation trick, adding input propagation if necessary. :X: A tensor, input points [S,N,D_in]. :full_cov: A boolean, whether to calculate full covariance or not. :z: A tensor or None, used in reparameterisation trick.""" mean, var = self.conditional_SND(X, full_cov=full_cov) S, N, D = tf.shape(X)[0], tf.shape(X)[1], self.num_outputs if z is None: z = tf.random.normal(tf.shape(mean), dtype=default_float()) samples = reparameterise(mean, var, z, full_cov=full_cov) if self.input_prop_dim: shape = [S, N, self.input_prop_dim] # Get first self.input_prop_dim dimensions of X to propagate X_prop = tf.reshape(X[:, :, :self.input_prop_dim], shape) samples = tf.concat([X_prop, samples], axis=2) mean = tf.concat([X_prop, mean], axis=2) if full_cov: shape = [S, N, N, self.num_outputs] # Zero variance for retained dimensions of X zeros = tf.zeros(shape, dtype=default_float()) var = tf.concat([zeros, var], axis=3) else: var = tf.concat([tf.zeros_like(X_prop), var], axis=2) return samples, mean, var
def init_test_gplvm(Y, latent_dim, kernel, num_inducing=None, inducing_variable=None, X_mean_init=None, X_var_init=None): num_data = Y.shape[0] # number of data points if X_mean_init is None: X_mean_init = tf.constant(PCA(n_components=latent_dim).fit_transform(Y), dtype=default_float()) else: X_mean_init = tf.constant(X_mean_init, dtype=default_float()) if X_var_init is None: X_var_init = tf.ones((num_data, latent_dim), dtype=default_float()) else: X_var_init = tf.constant(X_var_init, dtype=default_float()) if (inducing_variable is None) == (num_inducing is None): raise ValueError( "GPLVM needs exactly one of `inducing_variable` and `num_inducing`" ) if inducing_variable is None: inducing_variable = tf.convert_to_tensor(np.random.permutation(X_mean_init.numpy())[:num_inducing], dtype=default_float()) #inducing_variable = tf.convert_to_tensor(np.linspace(np.min(X_mean_init, axis=0), np.max(X_mean_init, axis=0), num_inducing), dtype=default_float()) model = TestGPLVM( Y, X_data_mean=X_mean_init, X_data_var=X_var_init, kernel=kernel, inducing_variable=inducing_variable, ) return model
def __init__(self, kernel, inducing_variables, q_mu_initial, q_sqrt_initial, mean_function, white=False, **kwargs): super().__init__(**kwargs) self.inducing_points = inducing_variables self.num_inducing = inducing_variables.shape[0] # Initialise q_mu to y^2_pi(i) q_mu = q_mu_initial[:, None] self.q_mu = Parameter(q_mu, dtype=default_float()) # Initialise q_sqrt to near deterministic. Store as lower triangular matrix L. q_sqrt = 1e-4 * np.eye(self.num_inducing, dtype=default_float()) #q_sqrt = np.diag(q_sqrt_initial) self.q_sqrt = Parameter(q_sqrt, transform=triangular()) self.kernel = kernel self.mean_function = mean_function self.white = white
def test_sparse_mcmc_likelihoods_and_gradients(): """ This test makes sure that when the inducing points are the same as the data points, the sparse mcmc is the same as full mcmc """ rng = np.random.RandomState(0) X, Y = rng.randn(10, 1), rng.randn(10, 1) v_vals = rng.randn(10, 1) likelihood = gpflow.likelihoods.StudentT() model_1 = gpflow.models.GPMC( data=(X, Y), kernel=gpflow.kernels.Exponential(), likelihood=likelihood ) model_2 = gpflow.models.SGPMC( data=(X, Y), kernel=gpflow.kernels.Exponential(), inducing_variable=X.copy(), likelihood=likelihood, ) model_1.V = tf.convert_to_tensor(v_vals, dtype=default_float()) model_2.V = tf.convert_to_tensor(v_vals, dtype=default_float()) model_1.kernel.lengthscales.assign(0.8) model_2.kernel.lengthscales.assign(0.8) model_1.kernel.variance.assign(4.2) model_2.kernel.variance.assign(4.2) assert_allclose( model_1.log_posterior_density(), model_2.log_posterior_density(), rtol=1e-5, atol=1e-5 )
def test_sgpr_qu(): rng = Datum().rng X, Z = tf.cast(rng.randn(100, 2), default_float()), tf.cast(rng.randn(20, 2), default_float()) Y = tf.cast( np.sin(X @ np.array([[-1.4], [0.5]])) + 0.5 * np.random.randn(len(X), 1), default_float()) model = gpflow.models.SGPR((X, Y), kernel=gpflow.kernels.SquaredExponential(), inducing_variable=Z) @tf.function def closure(): return -model.log_marginal_likelihood() gpflow.optimizers.Scipy().minimize(closure, variables=model.trainable_variables) qu_mean, qu_cov = model.compute_qu() f_at_Z_mean, f_at_Z_cov = model.predict_f(model.inducing_variable.Z, full_cov=True) np.testing.assert_allclose(qu_mean, f_at_Z_mean, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(tf.reshape(qu_cov, (1, 20, 20)), f_at_Z_cov, rtol=1e-5, atol=1e-5)
def _init_variational_parameters(self, num_inducing, q_mu, q_sqrt, q_diag): """ Constructs the mean and cholesky of the covariance of the variational Gaussian posterior. If a user passes values for `q_mu` and `q_sqrt` the routine checks if they have consistent and correct shapes. If a user does not specify any values for `q_mu` and `q_sqrt`, the routine initializes them, their shape depends on `num_inducing` and `q_diag`. Note: most often the comments refer to the number of observations (=output dimensions) with P, number of latent GPs with L, and number of inducing points M. Typically P equals L, but when certain multioutput kernels are used, this can change. Parameters ---------- :param num_inducing: int Number of inducing variables, typically refered to as M. :param q_mu: np.array or None Mean of the variational Gaussian posterior. If None the function will initialise the mean with zeros. If not None, the shape of `q_mu` is checked. :param q_sqrt: np.array or None Cholesky of the covariance of the variational Gaussian posterior. If None the function will initialise `q_sqrt` with identity matrix. If not None, the shape of `q_sqrt` is checked, depending on `q_diag`. :param q_diag: bool Used to check if `q_mu` and `q_sqrt` have the correct shape or to construct them with the correct shape. If `q_diag` is true, `q_sqrt` is two dimensional and only holds the square root of the covariance diagonal elements. If False, `q_sqrt` is three dimensional. """ q_mu = np.zeros( (num_inducing, self.num_latent_gps)) if q_mu is None else q_mu self.q_mu = Parameter(q_mu, dtype=default_float()) # [M, P] if q_sqrt is None: if self.q_diag: ones = np.ones((num_inducing, self.num_latent_gps), dtype=default_float()) self.q_sqrt = Parameter(ones, transform=positive()) # [M, P] else: q_sqrt = [ np.eye(num_inducing, dtype=default_float()) for _ in range(self.num_latent_gps) ] q_sqrt = np.array(q_sqrt) self.q_sqrt = Parameter(q_sqrt, transform=triangular()) # [P, M, M] else: if q_diag: assert q_sqrt.ndim == 2 self.num_latent_gps = q_sqrt.shape[1] self.q_sqrt = Parameter(q_sqrt, transform=positive()) # [M, L|P] else: assert q_sqrt.ndim == 3 self.num_latent_gps = q_sqrt.shape[0] num_inducing = q_sqrt.shape[1] self.q_sqrt = Parameter(q_sqrt, transform=triangular()) # [L|P, M, M]
class DataQuad: num_data = 10 num_ind = 10 D_in = 2 D_out = 3 H = 150 Xmu = tf.convert_to_tensor(rng.randn(num_data, D_in), dtype=default_float()) L = gen_L(num_data, D_in, D_in) Xvar = tf.convert_to_tensor(np.array([l @ l.T for l in L]), dtype=default_float()) Z = rng.randn(num_ind, D_in) q_mu = tf.convert_to_tensor(rng.randn(num_ind, D_out), dtype=default_float()) q_sqrt = gen_q_sqrt(D_out, num_ind, num_ind)
def __init__(self, kernel, inducing_variables, num_outputs, mean_function, input_prop_dim=None, white=False, **kwargs): super().__init__(input_prop_dim, **kwargs) self.num_inducing = inducing_variables.shape[0] self.mean_function = mean_function self.num_outputs = num_outputs self.white = white self.kernels = [] for i in range(self.num_outputs): self.kernels.append(copy.deepcopy(kernel)) # Initialise q_mu to all zeros q_mu = np.zeros((self.num_inducing, num_outputs)) self.q_mu = Parameter(q_mu, dtype=default_float()) # Initialise q_sqrt to identity function #q_sqrt = tf.tile(tf.expand_dims(tf.eye(self.num_inducing, # dtype=default_float()), 0), (num_outputs, 1, 1)) q_sqrt = [ np.eye(self.num_inducing, dtype=default_float()) for _ in range(num_outputs) ] q_sqrt = np.array(q_sqrt) # Store as lower triangular matrix L. self.q_sqrt = Parameter(q_sqrt, transform=triangular()) # Initialise to prior (Ku) + jitter. if not self.white: Kus = [ self.kernels[i].K(inducing_variables) for i in range(self.num_outputs) ] Lus = [ np.linalg.cholesky(Kus[i] + np.eye(self.num_inducing) * default_jitter()) for i in range(self.num_outputs) ] q_sqrt = Lus q_sqrt = np.array(q_sqrt) self.q_sqrt = Parameter(q_sqrt, transform=triangular()) self.inducing_points = [] for i in range(self.num_outputs): self.inducing_points.append( inducingpoint_wrapper(inducing_variables))
def log10_Wasserstein_distance(mean, covariance, approximate_mean, approximate_covariance, jitter=1e-12): """ Identify the decadic logarithm of the Wasserstein distance based on the means and covariance matrices. :param mean:The analytic mean, with a shape of [N*]. :param covariance: The analytic covariance, with a shape of [N* x N*]. :param approximate_mean: The approximate mean, with a shape of [N*]. :param approximate_covariance: The approximate covariance, with a shape of [N* x N*]. :param jitter: The jitter value for numerical robustness. :return: A scalar log distance value. """ squared_mean_distance = tf.norm(mean - approximate_mean)**2 square_root_covariance = tf.linalg.sqrtm( covariance + tf.eye(tf.shape(covariance)[0], dtype=covariance.dtype) * jitter) matrix_product = square_root_covariance @ approximate_covariance @ square_root_covariance square_root_matrix_product = tf.linalg.sqrtm( matrix_product + tf.eye(tf.shape(matrix_product)[0], dtype=matrix_product.dtype) * jitter) term = covariance + approximate_covariance - 2 * square_root_matrix_product trace = tf.linalg.trace(term) ws_distance = (squared_mean_distance + trace)**0.5 log10_ws_distance = tf.math.log(ws_distance) / tf.math.log( tf.constant(10.0, dtype=default_float())) return log10_ws_distance
class SampleConditional(Sample): # N_old is 0 at first, we then start keeping track of past evaluation points. X = None # [N_old, D] P = tf.shape(q_mu)[-1] # num latent GPs f = tf.zeros((0, P), dtype=default_float()) # [N_old, P] def __call__(self, X_new: TensorType) -> tf.Tensor: N_old = tf.shape(self.f)[0] N_new = tf.shape(X_new)[0] if self.X is None: self.X = X_new else: self.X = tf.concat([self.X, X_new], axis=0) mean, cov = conditional( self.X, inducing_variable, kernel, q_mu, q_sqrt=q_sqrt, white=whiten, full_cov=True, ) # mean: [N_old+N_new, P], cov: [P, N_old+N_new, N_old+N_new] mean = tf.linalg.matrix_transpose(mean) # [P, N_old+N_new] f_old = tf.linalg.matrix_transpose(self.f) # [P, N_old] f_new = draw_conditional_sample(mean, cov, f_old) # [P, N_new] f_new = tf.linalg.matrix_transpose(f_new) # [N_new, P] self.f = tf.concat([self.f, f_new], axis=0) # [N_old + N_new, P] tf.debugging.assert_equal(tf.shape(self.f), [N_old + N_new, self.P]) tf.debugging.assert_equal(tf.shape(f_new), [N_new, self.P]) return f_new
def test_multiclass(): num_classes = 3 model = gpflow.models.SVGP( gpflow.kernels.SquaredExponential(), gpflow.likelihoods.MultiClass(num_classes=num_classes), inducing_variable=Datum.X.copy(), num_latent_gps=num_classes, ) gpflow.set_trainable(model.inducing_variable, False) # test with explicitly unknown shapes: tensor_spec = tf.TensorSpec(shape=None, dtype=default_float()) elbo = tf.function( model.elbo, input_signature=[(tensor_spec, tensor_spec)], ) @tf.function def model_closure(): return -elbo(Datum.cdata) opt = gpflow.optimizers.Scipy() # simply test whether it runs without erroring...: opt.minimize( model_closure, variables=model.trainable_variables, options=dict(maxiter=3), compile=True, )
def K_diag(self, X, presliced=False): if not presliced: X, _ = self.slice(X, None) X_product = self._weighted_product(X) const = tf.cast((1. / np.pi) * self._J(0.), default_float()) return self.variance * const * X_product**self.order
def run_one_mcmc(n_training, gp_model): num_burnin_steps = FLAGS.n_burnin num_samples = FLAGS.n_samples mcmc_helper, run_chain_fn = get_run_chain_fn(gp_model, num_samples, num_burnin_steps) try: tic = time.time() result, is_accepted = run_chain_fn() print(np.mean(is_accepted)) run_time = time.time() - tic parameter_samples = mcmc_helper.convert_to_constrained_values(result) except Exception as e: # noqa: It's not clear what the error returned by TF could be, so well... run_time = float("nan") parameter_samples = [ np.nan * np.ones((num_samples, ), dtype=config.default_float()) for _ in gp_model.trainable_parameters ] print( f"{FLAGS.model}-{FLAGS.cov} failed with n_training={n_training} and error: \n {e}" ) return run_time, dict( zip(gpf.utilities.parameter_dict(gp_model), parameter_samples))
def init_fake_svgp(X, Y): from mogpe.models.utils.model import init_inducing_variables output_dim = Y.shape[1] input_dim = X.shape[1] num_inducing = 30 inducing_variable = init_inducing_variables(X, num_inducing) inducing_variable = gpf.inducing_variables.SharedIndependentInducingVariables( gpf.inducing_variables.InducingPoints(inducing_variable)) noise_var = 0.1 lengthscale = 1. mean_function = gpf.mean_functions.Constant() likelihood = gpf.likelihoods.Gaussian(noise_var) kern_list = [] for _ in range(output_dim): # Create multioutput kernel from kernel list lengthscale = tf.convert_to_tensor([lengthscale] * input_dim, dtype=default_float()) kern_list.append(gpf.kernels.RBF(lengthscales=lengthscale)) kernel = gpf.kernels.SeparateIndependent(kern_list) return SVGPModel(kernel, likelihood, mean_function=mean_function, inducing_variable=inducing_variable)
def gen_sim(num_data, dim): np.random.seed(1) xmin = -5 xmax = 5 break_pt = xmin + (xmax - xmin) * 0.5 k1 = 0.0 k21 = 0.5 k22 = -0.5 c1 = 0 sigma = 0 x = np.linspace(xmin, xmax, int(num_data/2), dtype=default_float()) X = branch_simulation(x, break_pt, k1, k21, k22, c1, sigma) labels = np.repeat([0, 1], int(num_data)/2) fx, gx = branch_kernel(X, break_pt, dim) break_num = x[x < break_pt].size halfpt = int(num_data / 2) tmp1 = fx[:break_num] tmp2 = fx[break_num:halfpt] tmp3 = gx[-(halfpt-break_num):] Y = np.concatenate([tmp1, tmp2, tmp1, tmp3], axis=0) return (X, Y, labels)
def __post_init__(self): num_grid_pts = self.data.shape[0] self.epsilon = tf.random.normal((num_grid_pts, self.num_samples), dtype=default_float()) samples_pipe = SamplesPipe(self.data, self.epsilon) self.kernels_controller = KernelsController([samples_pipe]) samples_map = hv.DynamicMap(self.update_samples, streams=[samples_pipe]) control_vline_stream = hv.streams.Draw(rename=dict(x="x1", y="x2")) control_vline_map = hv.DynamicMap( self.update_control_vlines, streams=[control_vline_stream, samples_pipe]) vlines_map = hv.DynamicMap( self.update_vlines, streams=[control_vline_stream, samples_pipe]) scatter_map = hv.DynamicMap( self.update_scatter, streams=[control_vline_stream, samples_pipe]) self.streams = { "samples": samples_pipe, "vlines_control": control_vline_stream, "vlines": control_vline_stream } self.maps = { "samples": samples_map, "vlines_control": control_vline_map, "vlines": vlines_map, "scatter": scatter_map }
def test_conjugate_gradient_convergence(): """ Check that the method of conjugate gradients implemented can solve a linear system of equations """ rng: np.random.RandomState = np.random.RandomState(999) noise = 1e-3 train, z, _ = data(rng) x, y = train n = x.shape[0] b = tf.transpose(y) k = SquaredExponential() K = k(x) + noise * tf.eye(n, dtype=default_float()) Kinv_y = tf.linalg.solve(K, y) # We could solve by cholesky instead model = CGLB((x, y), kernel=k, inducing_variable=z, noise_variance=noise) common = model._common_calculation() initial = tf.zeros_like(b) A = common.A LB = common.LB max_error = 0.01 max_steps = 200 restart_cg_step = 200 preconditioner = NystromPreconditioner(A, LB, noise) v = cglb_conjugate_gradient(K, b, initial, preconditioner, max_error, max_steps, restart_cg_step) # NOTE: with smaller `max_error` we can reduce the `rtol` np.testing.assert_allclose(Kinv_y, tf.transpose(v), rtol=0.1)
def __init__(self, image_shape: Tuple, output_dim: int, base_kernel: gpflow.kernels.Kernel, batch_size: Optional[int] = None): super().__init__() with self.name_scope: self.base_kernel = base_kernel input_size = int(tf.reduce_prod(image_shape)) input_shape = (input_size, ) self.cnn = tf.keras.Sequential([ tf.keras.layers.InputLayer(input_shape=input_shape, batch_size=batch_size), tf.keras.layers.Reshape(image_shape), tf.keras.layers.Conv2D(filters=32, kernel_size=image_shape[:-1], padding="same", activation="relu"), tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2), tf.keras.layers.Conv2D(filters=64, kernel_size=(5, 5), padding="same", activation="relu"), tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2), tf.keras.layers.Flatten(), tf.keras.layers.Dense(output_dim, activation="relu"), tf.keras.layers.Lambda(lambda x: tf.cast(x, default_float())) ]) self.cnn.build()
def testFeatureequivalence(tol): VFFlength = 10 RVFFlength = 2 * VFFlength - 1 kernel = gpflow.kernels.Matern12(variance=1.0, lengthscales=0.7) VFFinducingVar = VFF_IV.FourierFeatures1D(0, 6, VFFlength) VFFmodel = gpflow.models.SGPR((X, Y), kernel, VFFinducingVar) RVFFinducingVar = RVFF_1D(0, 6, RVFFlength) RVFFfrequencies = tf.concat( ( VFFinducingVar.omegas, tf.reshape( tf.gather( VFFinducingVar.omegas, tf.where( VFFinducingVar.omegas != 0.0 ) ), [-1] ) ), axis=0 ) RVFFphases = tf.concat( ( np.pi / ( 2 * VFFinducingVar.omegas[1:VFFlength] ), tf.zeros(RVFFlength - VFFlength, dtype=default_float()) ), axis=0 ) RVFFphases = tf.concat( ( tf.constant([ np.pi / 2 ], dtype=default_float()), RVFFphases ), axis=0 ) RVFFinducingVar.omegas.assign( RVFFfrequencies ) RVFFinducingVar.phis.assign( RVFFphases ) RVFFmodel = gpflow.models.SGPR((X, Y), kernel, RVFFinducingVar) meanVFF, covVFF = VFFmodel.predict_f(Xtest, True, False) meanRVFF, covRVFF = RVFFmodel.predict_f(Xtest, True, False) plot(Xtest, [(meanVFF, tf.transpose(tf.linalg.diag_part(covVFF)), 'r', 'c'), (meanRVFF, tf.transpose(tf.linalg.diag_part(covRVFF)), 'b', 'g')]) print("If you can only see one mean function and one error bar, VFF and RVFF perfectly coincide.") print("RVFF model ELBO is %2.2f" % RVFFmodel.elbo()) print("VFF model ELBO is %2.2f" % VFFmodel.elbo()) return tf.math.reduce_sum(tf.math.abs(covRVFF-covVFF)) < tol and tf.math.reduce_sum(tf.math.abs(meanRVFF-meanVFF)) < tol
def solve_lyap_vec(F: tf.Tensor, L: tf.Tensor, Q: tf.Tensor) -> tf.Tensor: """Vectorized Lyapunov equation solver F P + P F' + L Q L' = 0 Parameters ---------- F : tf.Tensor ... L : tf.Tensor ... Q : tf.Tensor ... Returns ------- Pinf : tf.Tensor Steady state covariance """ dtype = config.default_float() dim = tf.shape(F)[0] op1 = tf.linalg.LinearOperatorFullMatrix(F) op2 = tf.linalg.LinearOperatorIdentity(dim, dtype=dtype) F1 = tf.linalg.LinearOperatorKronecker([op2, op1]).to_dense() F2 = tf.linalg.LinearOperatorKronecker([op1, op2]).to_dense() F = F1 + F2 Q = tf.matmul(L, tf.matmul(Q, L, transpose_b=True)) Pinf = tf.reshape(tf.linalg.solve(F, tf.reshape(Q, (-1, 1))), (dim, dim)) Pinf = -0.5 * (Pinf + tf.transpose(Pinf)) return Pinf
def get_matern_sde(variance, lengthscales, d: int) -> Tuple[tf.Tensor, ...]: """ TODO: write description Parameters ---------- variance: float observation noise lengthscales: tf.Tensor tensor with kernel lengthscale d: int the exponent of the Matern kernel plus one half for instance Matern32 -> 2, this will be used as the dimension of the latent SSM Returns ------- F, L, H, Q: tuple of tf.Tensor Parameters for the LTI sde """ dtype = config.default_float() lamda = math.sqrt(2 * d - 1) / lengthscales F = _get_transition_matrix(lamda, d, dtype) one = tf.ones((1,), dtype) L = tf.linalg.diag(one, k=-d + 1, num_rows=d, num_cols=1) # type: tf.Tensor H = tf.linalg.diag(one, num_rows=1, num_cols=d) # type: tf.Tensor Q = _get_brownian_cov(variance, lamda, d, dtype) return F, L, H, Q
def test_svgp(whiten, q_diag): model = gpflow.models.SVGP( gpflow.kernels.SquaredExponential(), gpflow.likelihoods.Gaussian(), inducing_variable=Datum.X.copy(), q_diag=q_diag, whiten=whiten, mean_function=gpflow.mean_functions.Constant(), num_latent_gps=Datum.Y.shape[1], ) gpflow.set_trainable(model.inducing_variable, False) # test with explicitly unknown shapes: tensor_spec = tf.TensorSpec(shape=None, dtype=default_float()) elbo = tf.function(model.elbo, input_signature=[(tensor_spec, tensor_spec)],) @tf.function def model_closure(): return -elbo(Datum.data) opt = gpflow.optimizers.Scipy() # simply test whether it runs without erroring...: opt.minimize( model_closure, variables=model.trainable_variables, options=dict(maxiter=3), compile=True, )
def upper_bound(self) -> tf.Tensor: """ Upper bound for the sparse GP regression marginal likelihood. Note that the same inducing points are used for calculating the upper bound, as are used for computing the likelihood approximation. This may not lead to the best upper bound. The upper bound can be tightened by optimising Z, just like the lower bound. This is especially important in FITC, as FITC is known to produce poor inducing point locations. An optimisable upper bound can be found in https://github.com/markvdw/gp_upper. The key reference is :: @misc{titsias_2014, title={Variational Inference for Gaussian and Determinantal Point Processes}, url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf}, publisher={Workshop on Advances in Variational Inference (NIPS 2014)}, author={Titsias, Michalis K.}, year={2014}, month={Dec} } The key quantity, the trace term, can be computed via >>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel, ... np.zeros((len(model.inducing_variable), 1))) which computes each individual element of the trace term. """ X_data, Y_data = self.data num_data = to_default_float(tf.shape(Y_data)[0]) Kdiag = self.kernel(X_data, full_cov=False) kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance) kuf = Kuf(self.inducing_variable, self.kernel, X_data) I = tf.eye(tf.shape(kuu)[0], dtype=default_float()) L = tf.linalg.cholesky(kuu) A = tf.linalg.triangular_solve(L, kuf, lower=True) AAT = tf.linalg.matmul(A, A, transpose_b=True) B = I + AAT / self.likelihood.variance LB = tf.linalg.cholesky(B) # Using the Trace bound, from Titsias' presentation c = tf.maximum(tf.reduce_sum(Kdiag) - tf.reduce_sum(tf.square(A)), 0) # Alternative bound on max eigenval: corrected_noise = self.likelihood.variance + c const = -0.5 * num_data * tf.math.log(2 * np.pi * self.likelihood.variance) logdet = -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB))) LC = tf.linalg.cholesky(I + AAT / corrected_noise) v = tf.linalg.triangular_solve(LC, tf.linalg.matmul(A, Y_data) / corrected_noise, lower=True) quad = -0.5 * tf.reduce_sum(tf.square(Y_data)) / corrected_noise + 0.5 * tf.reduce_sum(tf.square(v)) return const + logdet + quad
def predict_f(self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False) -> MeanAndVariance: """ Compute the mean and variance of the latent function at some new points. Note that this is very similar to the SGPR prediction, for which there are notes in the SGPR notebook. Note: This model does not allow full output covariances. :param Xnew: points at which to predict """ if full_output_cov: raise NotImplementedError pX = DiagonalGaussian(self.X_data_mean, self.X_data_var) Y_data = self.data num_inducing = self.inducing_variable.num_inducing psi1 = expectation(pX, (self.kernel, self.inducing_variable)) psi2 = tf.reduce_sum( expectation(pX, (self.kernel, self.inducing_variable), (self.kernel, self.inducing_variable)), axis=0, ) jitter = default_jitter() Kus = covariances.Kuf(self.inducing_variable, self.kernel, Xnew) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) L = tf.linalg.cholesky( covariances.Kuu(self.inducing_variable, self.kernel, jitter=jitter)) A = tf.linalg.triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.linalg.triangular_solve(L, psi2, lower=True) AAT = tf.linalg.triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=default_float()) LB = tf.linalg.cholesky(B) c = tf.linalg.triangular_solve( LB, tf.linalg.matmul(A, Y_data), lower=True) / sigma tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True) tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True) mean = tf.linalg.matmul(tmp2, c, transpose_a=True) if full_cov: var = (self.kernel(Xnew) + tf.linalg.matmul(tmp2, tmp2, transpose_a=True) - tf.linalg.matmul(tmp1, tmp1, transpose_a=True)) shape = tf.stack([1, 1, tf.shape(Y_data)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = (self.kernel(Xnew, full_cov=False) + tf.reduce_sum(tf.square(tmp2), axis=0) - tf.reduce_sum(tf.square(tmp1), axis=0)) shape = tf.stack([1, tf.shape(Y_data)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def test_RBF_eKzxKxz_gradient_notNaN(): """ Ensure that <K_{Z, x} K_{x, Z}>_p(x) is not NaN and correct, when K_{Z, Z} is zero with finite precision. See pull request #595. """ kernel = gpflow.kernels.SquaredExponential(1, lengthscale=0.1) kernel.variance.assign(2.0) p = gpflow.probability_distributions.Gaussian( tf.constant([[10]], dtype=default_float()), tf.constant([[[0.1]]], dtype=default_float())) z = gpflow.inducing_variables.InducingPoints([[-10.], [10.]]) with tf.GradientTape() as tape: ekz = expectation(p, (kernel, z), (kernel, z)) grad = tape.gradient(ekz, kernel.lengthscale) assert grad is not None and not np.isnan(grad)
def default_bijector(cls, dtype: Any = None, **kwargs) -> tfb.Bijector: """ Linear bijection between $[0, 1]^{2} <--> [0, 4]^{2}$ """ if dtype is None: dtype = default_float() return tfb.Scale(tf.cast(4.0, dtype=dtype))