def transformation_matrix(y, y_tilde, ls_y, lamb): """ Compute the transformation matrix for DME/TTGP, defined as $A := (L + n \lambda I)^{-1} \tilde{L}$. This code uses Gaussian kernels only. Parameters ---------- y : np.ndarray [Size: (n, d_y)] Samples of the mediating variable from the simulation process y_tilde : np.ndarray [Size: (m, d_y)] Samples of the mediating variable from the observation process ls_y : float or np.ndarray [Size: () or (1,) for isotropic; (d_y,) for anistropic] The length scale(s) of the mediating variable(s) lamb : float Regularization parameter Returns ------- np.ndarray [Size: (n, m)] The transformation matrix """ # Size: (n, n) l = gaussian_kernel_gramix(y, y, ls_y) # Size: (n, m) tilde_l = gaussian_kernel_gramix(y, y_tilde, ls_y) # Size: (n, n) n = y.shape[0] lower = True l_chol = la.cholesky(l + n * lamb * np.eye(n), lower=lower) # Size: (n, m) a = la.cho_solve((l_chol, lower), tilde_l) return a
def ttgp_pred(x_q, x, y, y_tilde, z_tilde, ls_x, ls_y, sigma, full=True): """ Compute the posterior predictive mean and covariance of a task transformed Gaussian process. This code uses Gaussian kernels only. Parameters ---------- x_q : np.ndarray [Size: (n_q, d_x)] The query features x : np.ndarray [Size: (n, d_x)] The features from the transformation set y : np.ndarray [Size: (n, d_y)] The mediators from the transformation set y_tilde: np.ndarray [Size: (m, d_y)] The mediators from the task set z_tilde : np.ndarray [Size: (m,)] The targets from the task set ls_x: float or np.ndarray [Size: () or (1,) for isotropic; (d_y,) for anistropic] The length scale(s) of the input variable(s) ls_y: float or np.ndarray [Size: () or (1,) for isotropic; (d_y,) for anistropic] The length scale(s) of the mediating variable(s) sigma : float The noise standard deviation full : boolean, optional Whether to do full Bayesian inference on g or use maximum a posteriori approximations on g Returns ------- np.ndarray [Size: (n_q,)] The predictive mean on the query points np.ndarray [Size: (n_q, n_q)] The predictive covariance between the query points """ # Size of the transformation and task datasets n = y.shape[0] m = y_tilde.shape[0] # The equivalent regularization parameter from noise standard deviation due to DME-TTGP equivalence lamb = sigma**2 / n # Size: (n, m) trans_mat = transformation_matrix(y, y_tilde, ls_y, lamb) # Compute the noise covariance depending on whether we are performing full Bayesian inference on g if full: # Size: (m, m) l_tt = gaussian_kernel_gramix(y_tilde, y_tilde, ls_y) # Size: (n, m) l_t = gaussian_kernel_gramix(y, y_tilde, ls_y) # Size: (m, m) noise_cov = l_tt + sigma**2 * np.eye(m) - np.dot( np.transpose(l_t), trans_mat) else: # Size: (m, m) noise_cov = sigma**2 * np.eye(m) # Once we have the transformation and noise covariance, apply the transformed Gaussian process equations for prediction return tgp_pred(x_q, x, z_tilde, ls_x, trans_mat, noise_cov)
def tgp_pred(x_q, x, z_tilde, ls_x, trans_mat, noise_cov): """ Compute the posterior predictive mean and covariance of a transformed Gaussian process with a given transformation and noise covariance. This code uses Gaussian kernels only. Parameters ---------- x_q : np.ndarray [Size: (n_q, d)] The query features x : np.ndarray [Size: (n, d)] The features z_tilde : np.ndarray [Size: (m,)] The transformed targets ls_x: float or np.ndarray [Size: () or (1,) for isotropic; (d_y,) for anistropic] The length scale(s) of the input variable(s) trans_mat : np.ndarray [Size: (n, m)] The transformation matrix noise_cov: np.ndarray [Size: (m, m)] The noise covariance Returns ------- np.ndarray [Size: (n_q,)] The predictive mean on the query points np.ndarray [Size: (n_q, n_q)] The predictive covariance between the query points """ # Size: (n, n) k = gaussian_kernel_gramix(x, x, ls_x) # Size: (m, m) s = np.dot(np.transpose(trans_mat), np.dot(k, trans_mat)) + noise_cov # Size: (m, m) lower = True s_chol = la.cholesky(s, lower=lower) # Size: (m, n) smt = la.cho_solve((s_chol, lower), np.transpose(trans_mat)) # Size: (n, n_q) k_q = gaussian_kernel_gramix(x, x_q, ls_x) # Size: (n_q, n_q) k_qq = gaussian_kernel_gramix(x_q, x_q, ls_x) # Size: (m, n_q) smt_k_q = np.dot(smt, k_q) # Size: (n_q,) f_mean = np.dot(np.transpose(smt_k_q), z_tilde) # Size: (n_q, n_q) f_cov = k_qq - np.dot(np.transpose(smt_k_q), np.dot(np.transpose(trans_mat), k_q)) # The posterior predictive mean and covariance of the latent function return f_mean, f_cov
def tgp_nlml(x, z_tilde, ls_x, trans_mat, noise_cov): """ Compute the negative log marginal likelihood of a transformed Gaussian process. This code uses Gaussian kernels only. Parameters ---------- x : np.ndarray [Size: (n, d)] The features z_tilde : np.ndarray [Size: (m,)] The transformed targets ls_x: float or np.ndarray [Size: () or (1,) for isotropic; (d_y,) for anistropic] The length scale(s) of the input variable(s) trans_mat : np.ndarray [Size: (n, m)] The transformation matrix noise_cov: np.ndarray [Size: (m, m)] The noise covariance Returns ------- float The negative log marginal likelihood """ k = gaussian_kernel_gramix(x, x, ls_x) s = np.dot(np.transpose(trans_mat), np.dot(k, trans_mat)) + noise_cov return negative_log_gaussian(z_tilde, 0, s)
def kernel_means_likelihood(theta_query, theta_sim, weights, beta): """ Query the kernel means likelihood. Parameters ---------- theta_query : np.ndarray [size: (n_query, p)] The parameters to query the likelihood at theta_sim : np.ndarray [size: (m, p)] Parameter values corresponding to the simulations weights : np.ndarray [size: (m, 1)] The weights of the kernel means likelihood beta : float or np.ndarray [size: () or (1,) for isotropic; (p,) for anistropic] The length scale(s) for the parameter kernel Returns ------- np.ndarray [size: (n_query,)] The kernel means likelihood values at the query points """ # size: (m, n_query) theta_evaluation_gramix = gaussian_kernel_gramix(theta_sim, theta_query, beta) # size: (n_query,) return np.dot(theta_evaluation_gramix.transpose(), weights).ravel()
def dme_query_fast(t_query, t_tilde, t, x, y, ls_t, ls_x, lamb, eps): """ Compute the deconditional mean embedding using the alternative form which is cubic in simulation samples but linear in prior samples. This code uses Gaussian kernels only. Parameters ---------- t_query : np.ndarray [Size: (n_q, d_t)] The parameters to query the deconditional mean embedding at t_tilde: np.ndarray [Size: (m, d_t)] The prior parameter samples t : np.ndarray [Size: (n, d_t)] The likelihood parameter samples x : np.ndarray [Size: (n, d_x)] The likelihood statistic samples y : np.ndarray [Size: (d_x,)] The observed statistic ls_t : float or np.ndarray [Size: () or (1,) for isotropic; (d_y,) for anistropic] The length scale(s) of the parameters lt_x : float or np.ndarray [Size: () or (1,) for isotropic; (d_y,) for anistropic] The length scale(s) of the statistics lamb : float The regularization hyperparameter for the prior operator inversion eps: float The regularization hyperparameter for the evidence operator inversion Returns ------- np.ndarray [Size: (n_q, 1)] The deconditional mean embedding evaluated at the query parameters """ m = t_tilde.shape[0] n = t.shape[0] # Size: (n, m) a = transformation_matrix(t, t_tilde, ls_t, lamb) # Size: (n, n) k = gaussian_kernel_gramix(x, x, ls_x) # Size: (n, 1) k_y = gaussian_kernel_gramix(x, y, ls_x) # Size: (m, 1) query_weights = np.dot(np.transpose(a), la.solve(np.dot(k, np.dot(a, np.transpose(a))) + m * eps * np.eye(n), k_y)) # Size: (m, n_q) l_query = gaussian_kernel_gramix(t_tilde, t_query, ls_t) # Size: (n_q, 1) q_query = np.dot(np.transpose(l_query), query_weights) return q_query
def kernel_means_weights(y, x_sim, theta_sim, eps, beta, reg=None): """ Compute the weights of the kernel means likelihood. Parameters ---------- y : np.ndarray [size: (1, d)] Observed data or summary statistics x_sim : np.ndarray [size: (m, s, d)] Simulated data or summary statistics theta_sim : np.ndarray [size: (m, p)] Parameter values corresponding to the simulations eps : float or np.ndarray [size: () or (1,) for isotropic; (d,) for anistropic] The simulator noise level(s) for the epsilon-kernel or epsilon-likelihood beta : float or np.ndarray [size: () or (1,) for isotropic; (p,) for anistropic] The length scale(s) for the parameter kernel reg : float, optional The regularization parameter for the conditional kernel mean Returns ------- np.ndarray [size: (m, 1)] The weights of the kernel means likelihood """ # size: (m, 1) if x_sim.ndim == 3: data_epsilon_likelihood = gaussian_density_gramix_multiple( y, x_sim, eps).transpose() elif x_sim.ndim == 2: data_epsilon_likelihood = gaussian_density_gramix(y, x_sim, eps).transpose() else: raise ValueError('Simulated dataset is neither 2D or 3D.') # The number of simulations m = theta_sim.shape[0] # Set the regularization hyperparameter to some default value if not specified if reg is None: reg = 1e-3 * np.min(beta) # Compute the weights at O(m^3) theta_sim_gramix = gaussian_kernel_gramix(theta_sim, theta_sim, beta) lower = True theta_sim_gramix_cholesky = la.cholesky(theta_sim_gramix + m * reg * np.eye(m), lower=lower) weights = la.cho_solve((theta_sim_gramix_cholesky, lower), data_epsilon_likelihood) # size: (m, 1) return weights
def approximate_kernel_means_posterior_embedding(theta_query, theta_sim, weights, beta, theta_samples, marginal_likelihood=None, beta_query=None): """ Compute the approximate kernel means posterior embedding. Parameters ---------- theta_query : np.ndarray [size: (n_query, p)] The parameters to query the likelihood at theta_sim : np.ndarray [size: (m, p)] Parameter values corresponding to the simulations weights : np.ndarray [size: (m, 1)] The weights of the kernel means likelihood beta : float or np.ndarray [size: () or (1,) for isotropic; (p,) for anistropic] The length scale(s) for the parameter kernel theta_samples : np.ndarray [size: (n_samples, p)] The parameters samples to marginalize over marginal_likelihood : float, optional The marginal likelihood value if it was precomputed """ # Approximate the integral empirically with size: (q, m) if beta_query is None: beta_query = beta h = np.dot(gaussian_kernel_gramix(theta_query, theta_samples, beta_query), gaussian_kernel_gramix(theta_samples, theta_sim, beta)) / theta_samples.shape[0] # Compute the marginal likelihood if it has not been computed already if marginal_likelihood is None: marginal_likelihood = approximate_marginal_kernel_means_likelihood( theta_samples, theta_sim, weights, beta) # size: (q,) return np.dot(h, weights).ravel() / marginal_likelihood
def marginal_kernel_means_likelihood(theta_sim, weights, beta, prior_mean=None, prior_std=None): """ Compute the marginal kernel means likelihood under a diagonal Gaussian prior. Parameters ---------- theta_sim : np.ndarray [size: (m, p)] Parameter values corresponding to the simulations weights : np.ndarray [size: (m, 1)] The weights of the kernel means likelihood beta : float or np.ndarray [size: () or (1,) for isotropic; (p,) for anistropic] The length scale(s) for the parameter kernel prior_mean : np.ndarray [size: () or (1,) for isotropic; (p,) for anistropic] The mean(s) of the diagonal Gaussian prior prior_std : np.ndarray [size: () or (1,) for isotropic; (p,) for anistropic] The standard deviation(s) of the diagonal Gaussian prior Returns ------- float The marginal kernel means likelihood """ # By defaut, the prior has zero mean if prior_mean is None: prior_mean = np.zeros((1, theta_sim.shape[-1])) # By default, the prior standard deviation is set to the same as the length scale of the parameter kernel if prior_std is None: prior_std = beta # Compute the final length scale and the ratio scalar coefficient of the resulting prior mean embedding prior_embedding_length_scale = np.sqrt(beta**2 + prior_std**2) ratio = np.prod( convert_anisotropic(beta / prior_embedding_length_scale, theta_sim.shape[1])) # Compute the prior mean embedding [size: (m, 1)] prior_mean_embedding = ratio * gaussian_kernel_gramix( theta_sim, np.atleast_2d(prior_mean), prior_embedding_length_scale) # Compute the kernel means marginal likelihood return np.dot(prior_mean_embedding.ravel(), weights.ravel())