def compute_objective(elts_serve, elts_return, elts_prior_serve,
                      elts_prior_return, intercept, n, p, n_surfaces,
                      server_ids, returner_ids, surf_ids,
                      mean_surface_skills_serve, mean_surface_skills_return):

    prior_L_serve = lo_tri_from_elements(elts_prior_serve, n_surfaces)
    prior_L_return = lo_tri_from_elements(elts_prior_return, n_surfaces)

    prior_cov_serve = tf.einsum(
        'ik,kl->il', prior_L_serve, tf.transpose(prior_L_serve)) + \
        tf.eye(n_surfaces) * 1e-6
    prior_cov_return = tf.einsum(
        'ik,kl->il', prior_L_return, tf.transpose(prior_L_return)) + \
        tf.eye(n_surfaces) * 1e-6

    L_serve = create_ls(elts_serve, n_surfaces, elts_serve.shape[0])
    L_return = create_ls(elts_return, n_surfaces, elts_return.shape[0])

    cov_serve = tf.einsum(
        'ijk,ikl->ijl', L_serve, tf.transpose(L_serve, (0, 2, 1))) \
        + tf.eye(n_surfaces) * 1e-6

    cov_return = tf.einsum(
        'ijk,ikl->ijl', L_return, tf.transpose(L_return, (0, 2, 1))) + \
        tf.eye(n_surfaces) * 1e-6

    mean_serve_surface_skills = tf.gather_nd(mean_surface_skills_serve,
                                             tf.stack([server_ids, surf_ids],
                                                      axis=1))
    mean_return_surface_skills = tf.gather_nd(mean_surface_skills_return,
                                              tf.stack([returner_ids,
                                                        surf_ids], axis=1))

    var_serve_surface_skills = tf.gather_nd(
        cov_serve, tf.stack([server_ids, surf_ids, surf_ids], axis=1))

    var_return_surface_skills = tf.gather_nd(
        cov_return, tf.stack([returner_ids, surf_ids, surf_ids], axis=1))

    kl_serve = tf.reduce_sum(
        mvn_kl(mean_surface_skills_serve, cov_serve,
               tf.zeros(mean_surface_skills_return.shape[1]), prior_cov_serve,
               is_batch=True))

    kl_return = tf.reduce_sum(
        mvn_kl(mean_surface_skills_return, cov_return,
               tf.zeros(mean_surface_skills_return.shape[1]), prior_cov_return,
               is_batch=True))

    lik = partial(log_binomial_lik, n=n)

    pred_mean = (mean_serve_surface_skills - mean_return_surface_skills +
                 intercept)
    pred_var = var_serve_surface_skills + var_return_surface_skills

    expected_lik = expectation(p, pred_var, pred_mean, lik)

    return expected_lik - (kl_serve + kl_return)
def create_pos_def_mat_from_elts(elements, mat_size, jitter=JITTER):

    ls = lo_tri_from_elements(elements, mat_size)
    pos_def = ls @ tf.transpose(ls)
    pos_def = pos_def + tf.eye(mat_size) * jitter

    return pos_def
Beispiel #3
0
def predict(fit_result: SOGPResult, X_new: np.ndarray):
    # TODO: Is there something I can do about the casts here?

    n_inducing = fit_result.mu.shape[0]

    L = lo_tri_from_elements(fit_result.L_elts.astype(np.float32), n_inducing)

    base_kern = kern_lookup[fit_result.kernel]

    k_fun = get_kernel_fun(
        base_kern,
        tf.constant(fit_result.alpha.astype(np.float32)),
        tf.constant(fit_result.lengthscales.astype(np.float32)),
        tf.constant(fit_result.bias_sd.astype(np.float32)),
    )

    pred_mean, pred_var = compute_qf_mean_cov(
        L,
        fit_result.mu.astype(np.float32),
        X_new.astype(np.float32),
        fit_result.Z.astype(np.float32),
        k_fun,
    )

    return pred_mean.numpy(), np.sqrt(pred_var)
Beispiel #4
0
def project_to_x(gp: InducingPointGPSpecification,
                 X: tf.Tensor,
                 diag_only=True) -> Tuple[tf.Tensor, tf.Tensor]:

    L = lo_tri_from_elements(gp.L_elts, gp.mu.shape[0])

    return compute_qf_mean_cov(L,
                               gp.mu,
                               X,
                               gp.Z,
                               gp.kernel_fun,
                               diag_only=diag_only)
Beispiel #5
0
    def to_minimize_with_grad(x):

        with tf.GradientTape() as tape:

            x_tf = tf.constant(x)
            x_tf = tf.cast(x_tf, tf.float32)

            tape.watch(x_tf)

            theta = reconstruct_tf(x_tf, summary)

            alpha, lscales, bias_sd = (
                theta["alpha"]**2,
                theta["lscales"]**2,
                theta["bias_sd"]**2,
            )

            L_cov = lo_tri_from_elements(theta["L_elts"], n_inducing)

            kern_fun = get_kernel_fun(kernel_fun, alpha, lscales, bias_sd)

            objective = -compute_objective(X, y, theta["mu"], L_cov,
                                           theta["Z"], bernoulli_probit_lik,
                                           kern_fun)

            objective = objective - (tf.reduce_sum(
                lscale_prior.log_prob(lscales)) + kernel_var_prior.log_prob(
                    alpha**2) + bias_var_prior.log_prob(bias_sd**2))

            grad = tape.gradient(objective, x_tf)

        if verbose:
            print(objective, np.linalg.norm(grad.numpy()))

        return (objective.numpy().astype(np.float64),
                grad.numpy().astype(np.float64))
Beispiel #6
0
def calculate_kl(gp: InducingPointGPSpecification) -> float:

    L = lo_tri_from_elements(gp.L_elts, gp.mu.shape[0])

    return compute_kl_term(gp.mu, L, gp.Z, gp.kernel_fun)