def compute_objective(elts_serve, elts_return, elts_prior_serve, elts_prior_return, intercept, n, p, n_surfaces, server_ids, returner_ids, surf_ids, mean_surface_skills_serve, mean_surface_skills_return): prior_L_serve = lo_tri_from_elements(elts_prior_serve, n_surfaces) prior_L_return = lo_tri_from_elements(elts_prior_return, n_surfaces) prior_cov_serve = tf.einsum( 'ik,kl->il', prior_L_serve, tf.transpose(prior_L_serve)) + \ tf.eye(n_surfaces) * 1e-6 prior_cov_return = tf.einsum( 'ik,kl->il', prior_L_return, tf.transpose(prior_L_return)) + \ tf.eye(n_surfaces) * 1e-6 L_serve = create_ls(elts_serve, n_surfaces, elts_serve.shape[0]) L_return = create_ls(elts_return, n_surfaces, elts_return.shape[0]) cov_serve = tf.einsum( 'ijk,ikl->ijl', L_serve, tf.transpose(L_serve, (0, 2, 1))) \ + tf.eye(n_surfaces) * 1e-6 cov_return = tf.einsum( 'ijk,ikl->ijl', L_return, tf.transpose(L_return, (0, 2, 1))) + \ tf.eye(n_surfaces) * 1e-6 mean_serve_surface_skills = tf.gather_nd(mean_surface_skills_serve, tf.stack([server_ids, surf_ids], axis=1)) mean_return_surface_skills = tf.gather_nd(mean_surface_skills_return, tf.stack([returner_ids, surf_ids], axis=1)) var_serve_surface_skills = tf.gather_nd( cov_serve, tf.stack([server_ids, surf_ids, surf_ids], axis=1)) var_return_surface_skills = tf.gather_nd( cov_return, tf.stack([returner_ids, surf_ids, surf_ids], axis=1)) kl_serve = tf.reduce_sum( mvn_kl(mean_surface_skills_serve, cov_serve, tf.zeros(mean_surface_skills_return.shape[1]), prior_cov_serve, is_batch=True)) kl_return = tf.reduce_sum( mvn_kl(mean_surface_skills_return, cov_return, tf.zeros(mean_surface_skills_return.shape[1]), prior_cov_return, is_batch=True)) lik = partial(log_binomial_lik, n=n) pred_mean = (mean_serve_surface_skills - mean_return_surface_skills + intercept) pred_var = var_serve_surface_skills + var_return_surface_skills expected_lik = expectation(p, pred_var, pred_mean, lik) return expected_lik - (kl_serve + kl_return)
def create_pos_def_mat_from_elts(elements, mat_size, jitter=JITTER): ls = lo_tri_from_elements(elements, mat_size) pos_def = ls @ tf.transpose(ls) pos_def = pos_def + tf.eye(mat_size) * jitter return pos_def
def predict(fit_result: SOGPResult, X_new: np.ndarray): # TODO: Is there something I can do about the casts here? n_inducing = fit_result.mu.shape[0] L = lo_tri_from_elements(fit_result.L_elts.astype(np.float32), n_inducing) base_kern = kern_lookup[fit_result.kernel] k_fun = get_kernel_fun( base_kern, tf.constant(fit_result.alpha.astype(np.float32)), tf.constant(fit_result.lengthscales.astype(np.float32)), tf.constant(fit_result.bias_sd.astype(np.float32)), ) pred_mean, pred_var = compute_qf_mean_cov( L, fit_result.mu.astype(np.float32), X_new.astype(np.float32), fit_result.Z.astype(np.float32), k_fun, ) return pred_mean.numpy(), np.sqrt(pred_var)
def project_to_x(gp: InducingPointGPSpecification, X: tf.Tensor, diag_only=True) -> Tuple[tf.Tensor, tf.Tensor]: L = lo_tri_from_elements(gp.L_elts, gp.mu.shape[0]) return compute_qf_mean_cov(L, gp.mu, X, gp.Z, gp.kernel_fun, diag_only=diag_only)
def to_minimize_with_grad(x): with tf.GradientTape() as tape: x_tf = tf.constant(x) x_tf = tf.cast(x_tf, tf.float32) tape.watch(x_tf) theta = reconstruct_tf(x_tf, summary) alpha, lscales, bias_sd = ( theta["alpha"]**2, theta["lscales"]**2, theta["bias_sd"]**2, ) L_cov = lo_tri_from_elements(theta["L_elts"], n_inducing) kern_fun = get_kernel_fun(kernel_fun, alpha, lscales, bias_sd) objective = -compute_objective(X, y, theta["mu"], L_cov, theta["Z"], bernoulli_probit_lik, kern_fun) objective = objective - (tf.reduce_sum( lscale_prior.log_prob(lscales)) + kernel_var_prior.log_prob( alpha**2) + bias_var_prior.log_prob(bias_sd**2)) grad = tape.gradient(objective, x_tf) if verbose: print(objective, np.linalg.norm(grad.numpy())) return (objective.numpy().astype(np.float64), grad.numpy().astype(np.float64))
def calculate_kl(gp: InducingPointGPSpecification) -> float: L = lo_tri_from_elements(gp.L_elts, gp.mu.shape[0]) return compute_kl_term(gp.mu, L, gp.Z, gp.kernel_fun)