Example #1
0
def save_theta_and_grad_callback(step: int, loss: float, theta: np.ndarray,
                                 grad: np.ndarray, opt_state: Any,
                                 target_dir: str, summary: Any,
                                 save_every: int,
                                 additional_vars: Dict[str, np.ndarray] = {}):

    os.makedirs(target_dir, exist_ok=True)

    if step % save_every == 0:

        # Reconstruct
        theta_dict = reconstruct_np(theta, summary)
        grad_dict = reconstruct_np(grad, summary)

        theta_dict['loss'] = loss
        theta_dict['step'] = step
        grad_dict['step'] = step

        # Add any additional variables passed in
        theta_dict.update(additional_vars)

        # Save
        theta_target = os.path.join(target_dir, f'theta_{step}.npz')
        grad_target = os.path.join(target_dir, f'grad_{step}.npz')

        np.savez(theta_target, **theta_dict)
        np.savez(grad_target, **grad_dict)
Example #2
0
def to_optimise(
    flat_theta,
    X,
    z,
    weights,
    use_berman_turner,
    summary,
    init_kernel_spec,
    log_theta_dir=None,
    verbose=True,
    likelihood_scale_factor=1.0,
):

    global STEP

    flat_theta = tf.cast(tf.constant(flat_theta), tf.float32)

    with tf.GradientTape() as tape:

        tape.watch(flat_theta)

        theta = reconstruct_tf(flat_theta, summary)

        kernel_spec, gp_spec = update_specs(theta, init_kernel_spec)

        cur_objective = -calculate_objective(
            X, z, weights, gp_spec, use_berman_turner=use_berman_turner)

        kernel_prior_prob = calculate_prior_prob(kernel_spec)
        cur_objective = cur_objective - kernel_prior_prob

        cur_grad = tape.gradient(cur_objective, flat_theta)

        if log_theta_dir is not None:
            makedirs(log_theta_dir, exist_ok=True)
            grads = reconstruct_np(cur_grad.numpy(), summary)
            theta = reconstruct_np(flat_theta.numpy(), summary)
            np.savez(
                join(log_theta_dir, f"grads_{STEP}"),
                **grads,
                objective=cur_objective.numpy(),
                step=STEP,
            )
            np.savez(
                join(log_theta_dir, f"theta_{STEP}"),
                **theta,
                objective=cur_objective.numpy(),
                step=STEP,
            )

        STEP += 1

        if verbose:
            print(cur_objective, np.linalg.norm(cur_grad.numpy()))

    return (
        cur_objective.numpy().astype(np.float64),
        cur_grad.numpy().astype(np.float64),
    )
Example #3
0
def test_end_to_end():

    input_arrays = generate_arrays()

    flat_array, summaries = flatten_and_summarise(**input_arrays)

    reconstructed = reconstruct_np(flat_array, summaries)

    assert all([
        np.array_equal(reconstructed[x], input_arrays[x]) for x in input_arrays
    ])
Example #4
0
def fit(X: np.ndarray,
        z: np.ndarray,
        weights: np.ndarray,
        sp_num: np.ndarray,
        n_inducing: int,
        n_latent: int,
        log_folder: str,
        use_berman_turner: bool = True,
        X_thin: Optional[np.ndarray] = None,
        n_thin_inducing: Optional[int] = None,
        learning_rate: float = 0.01,
        steps: int = 100000,
        batch_size: int = 50000,
        save_opt_state: bool = False,
        save_every: Optional[int] = 1000,
        fix_thin_inducing: bool = False,
        cov_alpha: Optional[float] = None,
        thin_alpha: Optional[float] = 1.,
        fix_zero_w_prior_mean: bool = True,
        separate_w_prior_vars: bool = True):

    n_cov = X.shape[1]
    n_data = X.shape[0]
    n_out = len(np.unique(sp_num))

    Z = find_starting_z(X[(z == 0) & (sp_num == np.unique(sp_num)[0])],
                        n_inducing)

    if X_thin is not None:
        # Make sure we were given how many thinning inducing to use
        assert n_thin_inducing is not None
        Z_thin = find_starting_z(
            X_thin[(z == 0) & (sp_num == np.unique(sp_num)[0])],
            n_thin_inducing)
    else:
        Z_thin = None

    log_cov_alpha = np.log(cov_alpha) if cov_alpha is not None else tf.cast(
        tf.constant(np.log(np.sqrt(2. / n_latent))), tf.float32)
    log_thin_alpha = np.log(thin_alpha)

    start_theta = initialise_theta(Z,
                                   n_latent,
                                   n_cov,
                                   n_out,
                                   Z_thin=Z_thin,
                                   log_cov_alpha=log_cov_alpha,
                                   log_thin_alpha=log_thin_alpha,
                                   separate_w_prior_vars=separate_w_prior_vars)

    if fix_thin_inducing:
        # Remove them from the theta dict of parameters to optimise
        start_theta = {x: y for x, y in start_theta.items() if x != 'thin_Zs'}

    if fix_zero_w_prior_mean:
        # Remove them from the theta dict of parameters to optimise
        start_theta = {
            x: y
            for x, y in start_theta.items() if x != 'w_prior_mean'
        }

    flat_theta, summary = flatten_and_summarise_tf(**start_theta)

    log_folder = os.path.join(
        log_folder,
        create_path_with_variables(lr=learning_rate,
                                   batch_size=batch_size,
                                   steps=steps))

    os.makedirs(log_folder, exist_ok=True)

    opt_step_fun = partial(adam_step, step_size_fun=lambda t: learning_rate)
    opt_state = initialise_state(flat_theta.shape[0])

    flat_theta = flat_theta.numpy()

    to_optimise = partial(objective_and_grad,
                          n_data=n_data,
                          n_latent=n_latent,
                          summary=summary,
                          use_berman_turner=use_berman_turner,
                          log_cov_alpha=log_cov_alpha)

    if fix_thin_inducing:

        to_optimise = partial(to_optimise,
                              thin_Zs=tf.constant(
                                  np.expand_dims(Z_thin.astype(np.float32),
                                                 axis=0)))

    n_w_means = n_out if separate_w_prior_vars else 1

    if fix_zero_w_prior_mean:
        to_optimise = partial(to_optimise,
                              w_prior_mean=tf.zeros((n_w_means, n_latent)))

    full_data = {'X': X, 'sp_num': sp_num, 'z': z, 'weights': weights}

    log_file = os.path.join(log_folder, 'losses.txt')

    if X_thin is not None:
        full_data['X_thin'] = X_thin
    else:
        to_optimise = partial(to_optimise, X_thin=None)

    loss_log_file = open(log_file, 'w')

    additional_vars = {}

    if fix_thin_inducing:
        # Store thin Zs for callback to save
        additional_vars['thin_Zs'] = np.expand_dims(Z_thin, axis=0)

    if fix_zero_w_prior_mean:
        additional_vars['w_prior_mean'] = np.zeros((n_w_means, n_latent))

    additional_vars['log_cov_alpha'] = log_cov_alpha
    additional_vars['log_thin_alpha'] = log_thin_alpha

    def opt_callback(step: int, loss: float, theta: np.ndarray,
                     grad: np.ndarray, opt_state: Any):

        # Save theta and the gradients
        save_theta_and_grad_callback(step,
                                     loss,
                                     theta,
                                     grad,
                                     opt_state,
                                     log_folder,
                                     summary,
                                     save_every,
                                     additional_vars=additional_vars)

        # Log the loss
        loss_log_callback(step, loss, theta, grad, opt_state, loss_log_file)

    flat_theta, loss_log, _ = optimise_minibatching(full_data,
                                                    to_optimise,
                                                    opt_step_fun,
                                                    opt_state,
                                                    flat_theta,
                                                    batch_size,
                                                    steps,
                                                    X.shape[0],
                                                    callback=opt_callback)

    # Cast to float32
    flat_theta = flat_theta.astype(np.float32)

    final_theta = reconstruct_np(flat_theta, summary)

    if fix_thin_inducing:
        final_theta['thin_Zs'] = np.expand_dims(Z_thin, axis=0)

    if fix_zero_w_prior_mean:
        final_theta['w_prior_mean'] = np.zeros((1, n_latent))

    final_theta['log_cov_alpha'] = log_cov_alpha
    final_theta['log_thin_alpha'] = log_thin_alpha

    return final_theta
def to_optimise(flat_theta):

    flat_theta = tf.cast(tf.constant(flat_theta), tf.float32)

    with tf.GradientTape() as tape:

        tape.watch(flat_theta)

        theta = reconstruct_tf(flat_theta, summary)

        obj = -compute_objective(n=n, p=p, n_surfaces=n_surfaces,
                                 server_ids=server_ids,
                                 returner_ids=returner_ids, surf_ids=surf_ids,
                                 **theta)

        grad = tape.gradient(obj, flat_theta)

        print(obj, np.linalg.norm(grad.numpy()))

    print(np.round(covar_to_corr(pos_def_mat_from_vector(
        theta['elts_prior_serve'], n_surfaces)), 2))

    return obj.numpy().astype(np.float64), grad.numpy().astype(np.float64)


result = minimize(to_optimise, flat_theta.numpy().astype(np.float64),
                  method='L-BFGS-B', jac=True)

np.savez('surface_model_1990', players=encoder.classes_,
         surfaces=surf_enc.classes_, **reconstruct_np(result.x, summary))