def dgp_dSigma(self, x: np.ndarray, X: np.ndarray, kern: GPy.kern.Kern, w_inv: np.ndarray) -> np.ndarray: """ Partial derivatives of the gp posterior samples with respect to the posterior covariance matrix :param x: The locations the samples are taken at :param X: The locations used to train the GP model :param kern: Prior covariance matrix :param w_inv: inverses of the woodbury matrix of the model :return: the derivative of the gp samples with respect to the matrix """ N, b, d, n = w_inv.shape[0], x.shape[0], x.shape[1], X.shape[0] dkxX_dx = np.empty((b, n, d)) dkxx_dx = np.empty((b, b, d)) for i in range(d): dkxX_dx[:, :, i] = kern.dK_dX(x, X, i) dkxx_dx[:, :, i] = kern.dK_dX(x, x, i) K = kern.K(x, X) grad = np.empty((N, b, d)) dsigma = np.zeros((N, b, b, b, d)) for i in range(b): for j in range(d): Ks = np.zeros((b, n)) Ks[i, :] = dkxX_dx[i, :, j] dKss_dxi = np.zeros((b, b)) dKss_dxi[i, :] = dkxx_dx[i, :, j] dKss_dxi[:, i] = dkxx_dx[i, :, j].T dKss_dxi[i, i] = 0 dsigma[:, :, :, i, j] = dKss_dxi[None, :, :] - np.matmul( np.matmul(Ks[None, :, :], w_inv), (K.T)[None, :, :]) - np.matmul( np.matmul(K[None, :, :], w_inv), (Ks.T)[None, :, :]) return dsigma
def get_dk_dtheta(self, k: GPy.kern.Kern, X, X2=None): assert isinstance(k, self.acceptable_kernels) if X2 is None: X2 = X X_sliced, X2_sliced = X[:, k.active_dims], X2[:, k.active_dims] if isinstance(k, (GPy.kern.RBF, GPy.kern.Matern52)): dk_dr = k.dK_dr_via_X(X_sliced, X2_sliced) # dr/dl if k.ARD: tmp = k._inv_dist(X_sliced, X2_sliced) dr_dl = -np.dstack([ tmp * np.square(X_sliced[:, q:q + 1] - X2_sliced[:, q:q + 1].T) / k.lengthscale[q]**3 for q in range(k.input_dim) ]) dk_dl = dk_dr[..., None] * dr_dl else: r = k._scaled_dist(X_sliced, X2_sliced) dr_dl = -r / k.lengthscale dk_dl = dk_dr * dr_dl # # For testing the broadcast multiplication # dk_dl_slow = [] # for ii in range(dr_dl.shape[-1]): # dr_dlj = dr_dl[...,ii] # dk_dlj = dk_dr * dr_dlj # dk_dl_slow.append(dk_dlj) # # dk_dl_slow = np.dstack(dk_dl_slow) elif isinstance(k, CategoryOverlapKernel): dk_dl = None else: raise NotImplementedError # Return variance grad as well, if not fixed if not self.fix_inner_variances: return k.K(X, X2) / k.variance, dk_dl else: return dk_dl
def vi_comparison(X: np.ndarray, y: List[Tuple[int, float]], yc: List[List[Tuple[int, int]]], kern: GPy.kern.Kern, sigma2s: np.ndarray, alpha: np.ndarray, beta: np.ndarray, max_iters: int=200, lr: float=1e-3, method: str='fr', optimize: str="adam", get_logger: Callable=None) -> Tuple[Posterior, float, Dict, np.ndarray, np.ndarray]: """ :param X: All locations of both direct observations and batch comparisons :param y: Direct observations in as a list of tuples telling location index (row in X) and observation value. :param yc: Batch comparisons in a list of lists of tuples. Each batch is a list and tuples tell the comparisons (winner index, loser index) :param kern: Prior covariance kernel :param sigma2s: Noise variance of the observations :param alpha: Initial values for alpha :param beta: Initial values for beta :param max_iter: macimum number of optimization iterations :param method: full rank 'fr' or mean field 'mf' methods :param optimize: optimization algorithm. adam or l-bfgs-B :param get_logger: Function for receiving the legger where the prints are forwarded. :return: A Tuple containing the posterior, log marginal likelihood, its gradients with respect to hyper parameters (not supported at the moment) and alpha and beta values """ if(method == 'fr'): recompute_posterior = recompute_posterior_fr s_to_l = dL_fr else: recompute_posterior = recompute_posterior_mf s_to_l = dL_mf K = kern.K(X) K = K + 1e-6*np.identity(len(K)) N = X.shape[0] X0 = np.r_[alpha, beta] args = [K, sigma2s, y, yc, recompute_posterior, s_to_l] if optimize is "adam": X, log_marginal, _ = adam(log_lik, X0.flatten(), args, bounds=None, max_it=max_iters, get_logger=get_logger) else: res = sp.optimize.minimize(fun=log_lik, x0=X0.flatten(), args= args, method='L-BFGS-B', jac=True, bounds=None ) X = res.x.reshape(-1) log_marginal = res.fun alpha = X[:K.shape[0]].reshape(-1,1) beta = X[K.shape[0]:].reshape(-1,1) # Create posterior instance m, L, L_inv, KL, dKL_db_, dKL_da_ = recompute_posterior(alpha, beta, K) posterior = Posterior(mean=m, cov=L @ L.T, K=K) grad_dict = {}# {'dL_dK': dF_dK - dKL_dK, 'dL_dthetaL':dL_dthetaL} # return posterior, log_marginal, grad_dict return posterior, log_marginal, grad_dict, alpha, beta
def make_cov_cholesky_waveshaping(kernel: GPy.kern.Kern) -> np.ndarray: """Compute the Cholesky decomposition for waveshaping synthesis. :param kernel: The GP kernel :return: The Cholesky decomposition """ # Remark: Since we are doing waveshaping, it is not necessary to consider # periodic / non-periodic kernels separately. samples = 44100 / 20 xs = np.arange(samples) * 2. * np.pi / samples xs = np.sin(xs) cov = kernel.K(xs[:, None], xs[:, None]) chol = GPy.util.linalg.jitchol(cov) return chol
def dgp_dmean(self, kern: GPy.kern.Kern, w_vec: np.ndarray, x: np.ndarray, X: np.ndarray) -> np.ndarray: """ Partial derivatives of the gp posterior samples with respect to the posterior mean :param kern: Prior covariance matrix :param w_vec: woodbury vectors of the posterior of the model :param x: The locations the samples are taken at :param X: The locations used to train the GP model :return: the derivative of the gp samples with respect to the mean """ N, b, d, n = w_vec.shape[0], x.shape[0], x.shape[1], X.shape[0] dkxX_dx = np.empty((b, n, d)) dmu = np.zeros((N, b, b, d)) for i in range(d): dkxX_dx[:, :, i] = kern.dK_dX(x, X, i) for j in range(b): dmu[:, j, j, i] = np.matmul(dkxX_dx[j, :, i][None, :], w_vec[:, :, None]).flatten() # d return dmu #N x b x b x d
def plot_samples(k: GPy.kern.Kern, directory: str) -> None: """Plots samples from a kernel. :param k: The kernel from which to draw samples :return: None """ X = np.linspace(0., 10., 500) X = X[:, None] mu = np.zeros(500) C = k.K(X, X) Z = np.random.multivariate_normal(mu, C, 20) fig, ax = plt.subplots(1, 1) for i in range(3): c = colors[i] ls = linestyles[i] ax.plot(X[:], Z[i, :], color=c, linestyle=ls) ls = f'l{k.lengthscale[0]}'.replace('.', '_') path = os.path.join(directory, f"samples_{k.name}_{ls}.pdf") plt.savefig(path, bbox_inches='tight')
def ep_comparison(X: np.ndarray, y: List[Tuple[int, float]], yc: List[List[Tuple[int, int]]], kern: GPy.kern.Kern, sigma2s: np.ndarray, max_itt: int=50, delta: float=0.9, eta: float = 1.0, tol: float=1e-6, ga_approx_old: GaussianApproximation=None, get_logger: Callable=None) -> Tuple[Posterior, int, Dict, GaussianApproximation]: """ :param X: All locations of both direct observations and batch comparisons :param y: Direct observations as a list of tuples telling location index (row in X) and observation value. :param yc: Batch comparisons in a list of lists of tuples. Each batch is a list and tuples tell the comparisons (winner index, loser index) :param kern: GP kernel :param sigma2s: noise variance of observations :param max_itt: maximum number of iterations :param delta: damping updates factor. :param eta: parameter for fractional updates. :param tol: tolerance after which the EP is stopped unless too many iterations have passed :param ga_approx_old: If there has been previous gaussian approximation, it should be passed :param get_logger: Function for receiving the legger where the prints are forwarded. :return: A tuple consisting of the posterior approximation, log marginal likelihood, radient dictionary and gaussian approximations of the batches """ t0 = time.time() N = X.shape[0] Ndir = len(y) Ncomp = len(yc) ################################################################################### # Contruct observations and kernels ################################################################################### K = kern.K(X) ################################################################################### # Prepare marginal moments, site approximation and cavity containers ################################################################################### f_marg_moments = MarginalMoments(N) f_ga_approx = GaussianApproximation(np.zeros(N,dtype=np.float64), np.zeros((N,N),dtype=np.float64)) f_cavity = CavityParams(N) # insert likelihood information to each gaussian approximation for i in range(Ndir): (ii,yi) = y[i] #index in kernel, y value f_ga_approx.v[ii] = yi / sigma2s[i] f_ga_approx.tau[ii,ii] = 1./sigma2s[i] if ga_approx_old is not None: #If there exists old gaussian approximation, we reuse it N_old = ga_approx_old.tau.shape[0] if N-N_old > -1: f_ga_approx.v[:N_old] = ga_approx_old.v f_ga_approx.tau[np.ix_(np.arange(N_old), np.arange(N_old))] = ga_approx_old.tau ################################################################################### # Prepare global approximations ################################################################################### f_post_params = update_posterior(K, f_ga_approx.v, f_ga_approx.tau, y, yc) if np.any(np.isnan(f_post_params.mu)): if get_logger is not None: get_logger().error('Posterior mean contains nan in the EP approximation') ################################################################################### # Iterate ################################################################################### for itt in range(max_itt): old_params = np.hstack((f_post_params.mu.copy(), f_post_params.Sigma_diag.copy())) if get_logger is not None: get_logger().info('Iteration %d' % (itt + 1)) d_list = [] if(len(yc)>0): d_list = np.random.choice(range(len(yc)), size=len(yc), replace=False) for d in d_list: #iterate through batches loc_inds_winners, loc_inds_loosers = [yc[d][k][0] for k in range(len(yc[d]))], [yc[d][k][1] for k in range(len(yc[d]))] loc_inds_batch = np.sort(np.unique(loc_inds_winners + loc_inds_loosers)) # get relevant EP parameters for comparison points f_cavity._update_batch(eta=eta, ga_approx=f_ga_approx, post_params=f_post_params, batch=loc_inds_batch, get_logger=get_logger) try: #get cavity parameters of the batch ind_winners, ind_loosers = [np.where(loc_inds_batch == it)[0][0] for it in loc_inds_winners], [np.where(loc_inds_batch == it)[0][0] for it in loc_inds_loosers] # indices within a batch f_marg_moments.logZ_hat[loc_inds_batch], f_marg_moments.mu_hat[loc_inds_batch], f_marg_moments.sigma2_hat[np.ix_(loc_inds_batch, loc_inds_batch)], sigma2s[loc_inds_batch] = \ _match_moments_batch(f_cavity.v[loc_inds_batch], f_cavity.tau[np.ix_(loc_inds_batch, loc_inds_batch)], ind_winners, ind_loosers, sigma2s[loc_inds_batch], N=100000, get_logger=get_logger) except AssertionError as e: if get_logger is not None: get_logger().error('Numerical problem with feedback %d in iteration %d. Skipping update' % (d, itt)) _, sigma2s[loc_inds_batch] = f_ga_approx._update_batch(eta=eta, delta=delta, post_params=f_post_params, marg_moments=f_marg_moments, batch=loc_inds_batch, get_logger=get_logger, sigma2s = sigma2s[loc_inds_batch]) f_post_params = update_posterior(K, f_ga_approx.v, f_ga_approx.tau, y, yc, get_logger=get_logger) if np.any(np.isnan(f_post_params.mu)) or np.any(np.isnan(f_post_params.mu)): if get_logger is not None: get_logger().error('Posterior mean contains nan in the EP approximation') # check for convergence new_params = np.hstack((f_post_params.mu.copy(), f_post_params.Sigma_diag.copy())) converged = True if ( np.mean((new_params-old_params)**2)/np.mean(old_params**2) < tol): pass else: converged = False if converged: run_time = time.time() - t0 if get_logger is not None: get_logger().info('Converged in %d iterations in %4.3fs' % (itt + 1, run_time)) break #############################################################################3 # Marginal likelihood & gradients #############################################################################3 # compute normalization constant for likelihoods for i in range(Ndir): (ii,yi) = y[i] #index in kernel, y value f_cavity._update_i(eta=eta, ga_approx=f_ga_approx, post_params=f_post_params, i=ii) f_marg_moments.logZ_hat[ii] = log_npdf(yi, f_cavity.v[ii]/f_cavity.tau[ii,ii], 1./f_cavity.tau[ii,ii] + sigma2s[i]) # marginal likelihood and gradient contribution from each f Z_tilde = _log_Z_tilde(f_marg_moments, f_ga_approx, f_cavity, y, yc) f_posterior, f_logZ, f_grad = _inference(K, f_ga_approx, f_cavity, Z_tilde, y, yc) return f_posterior, f_logZ, f_grad, f_ga_approx