Beispiel #1
0
    def dgp_dSigma(self, x: np.ndarray, X: np.ndarray, kern: GPy.kern.Kern,
                   w_inv: np.ndarray) -> np.ndarray:
        """
        Partial derivatives of the gp posterior samples with respect to the posterior covariance matrix
        
        :param x: The locations the samples are taken at
        :param X: The locations used to train the GP model
        :param kern: Prior covariance matrix
        :param w_inv: inverses of the woodbury matrix of the model
        :return: the derivative of the gp samples with respect to the matrix
        """
        N, b, d, n = w_inv.shape[0], x.shape[0], x.shape[1], X.shape[0]
        dkxX_dx = np.empty((b, n, d))
        dkxx_dx = np.empty((b, b, d))
        for i in range(d):
            dkxX_dx[:, :, i] = kern.dK_dX(x, X, i)
            dkxx_dx[:, :, i] = kern.dK_dX(x, x, i)
        K = kern.K(x, X)

        grad = np.empty((N, b, d))
        dsigma = np.zeros((N, b, b, b, d))
        for i in range(b):
            for j in range(d):
                Ks = np.zeros((b, n))
                Ks[i, :] = dkxX_dx[i, :, j]
                dKss_dxi = np.zeros((b, b))
                dKss_dxi[i, :] = dkxx_dx[i, :, j]
                dKss_dxi[:, i] = dkxx_dx[i, :, j].T
                dKss_dxi[i, i] = 0
                dsigma[:, :, :, i, j] = dKss_dxi[None, :, :] - np.matmul(
                    np.matmul(Ks[None, :, :], w_inv),
                    (K.T)[None, :, :]) - np.matmul(
                        np.matmul(K[None, :, :], w_inv), (Ks.T)[None, :, :])
        return dsigma
    def get_dk_dtheta(self, k: GPy.kern.Kern, X, X2=None):
        assert isinstance(k, self.acceptable_kernels)

        if X2 is None:
            X2 = X
        X_sliced, X2_sliced = X[:, k.active_dims], X2[:, k.active_dims]

        if isinstance(k, (GPy.kern.RBF, GPy.kern.Matern52)):
            dk_dr = k.dK_dr_via_X(X_sliced, X2_sliced)

            # dr/dl
            if k.ARD:
                tmp = k._inv_dist(X_sliced, X2_sliced)
                dr_dl = -np.dstack([
                    tmp *
                    np.square(X_sliced[:, q:q + 1] - X2_sliced[:, q:q + 1].T) /
                    k.lengthscale[q]**3 for q in range(k.input_dim)
                ])
                dk_dl = dk_dr[..., None] * dr_dl
            else:
                r = k._scaled_dist(X_sliced, X2_sliced)
                dr_dl = -r / k.lengthscale
                dk_dl = dk_dr * dr_dl

            # # For testing the broadcast multiplication
            # dk_dl_slow = []
            # for ii in range(dr_dl.shape[-1]):
            #     dr_dlj = dr_dl[...,ii]
            #     dk_dlj = dk_dr * dr_dlj
            #     dk_dl_slow.append(dk_dlj)
            #
            # dk_dl_slow = np.dstack(dk_dl_slow)

        elif isinstance(k, CategoryOverlapKernel):
            dk_dl = None

        else:
            raise NotImplementedError

        # Return variance grad as well, if not fixed
        if not self.fix_inner_variances:
            return k.K(X, X2) / k.variance, dk_dl
        else:
            return dk_dl
Beispiel #3
0
def vi_comparison(X: np.ndarray, y: List[Tuple[int, float]], yc: List[List[Tuple[int, int]]],
                  kern: GPy.kern.Kern, sigma2s: np.ndarray, alpha: np.ndarray, beta: np.ndarray,
                  max_iters: int=200, lr: float=1e-3, method: str='fr', optimize: str="adam",
                  get_logger: Callable=None) -> Tuple[Posterior, float, Dict, np.ndarray, np.ndarray]:
    """
    :param X: All locations of both direct observations and batch comparisons
    :param y: Direct observations in as a list of tuples telling location index (row in X) and observation value.
    :param yc: Batch comparisons in a list of lists of tuples. Each batch is a list and tuples tell the comparisons (winner index, loser index)
    :param kern: Prior covariance kernel
    :param sigma2s: Noise variance of the observations
    :param alpha: Initial values for alpha
    :param beta: Initial values for beta
    :param max_iter: macimum number of optimization iterations
    :param method: full rank 'fr' or mean field 'mf' methods
    :param optimize: optimization algorithm. adam or l-bfgs-B
    :param get_logger: Function for receiving the legger where the prints are forwarded.
    :return: A Tuple containing the posterior, log marginal likelihood, its gradients with respect to hyper parameters (not supported at the moment) and alpha and beta values
    """
    if(method == 'fr'):
        recompute_posterior = recompute_posterior_fr
        s_to_l = dL_fr
    else:
        recompute_posterior = recompute_posterior_mf
        s_to_l = dL_mf

    K = kern.K(X)
    K = K + 1e-6*np.identity(len(K))
    N = X.shape[0]

    X0 = np.r_[alpha, beta]
    args = [K, sigma2s, y, yc, recompute_posterior, s_to_l]
    if optimize is "adam":
        X, log_marginal, _ = adam(log_lik, X0.flatten(), args, bounds=None, max_it=max_iters, get_logger=get_logger)
    else:
        res = sp.optimize.minimize(fun=log_lik,
                                   x0=X0.flatten(),
                                   args= args,
                                   method='L-BFGS-B',
                                   jac=True,
                                   bounds=None
                                   )
        X = res.x.reshape(-1)
        log_marginal = res.fun
    alpha = X[:K.shape[0]].reshape(-1,1)
    beta = X[K.shape[0]:].reshape(-1,1)

    # Create posterior instance
    m, L, L_inv, KL, dKL_db_, dKL_da_ = recompute_posterior(alpha, beta, K)
    posterior = Posterior(mean=m, cov=L @ L.T, K=K)
    grad_dict = {}# {'dL_dK': dF_dK - dKL_dK, 'dL_dthetaL':dL_dthetaL}
    # return posterior, log_marginal, grad_dict
    return posterior, log_marginal, grad_dict, alpha, beta
Beispiel #4
0
def make_cov_cholesky_waveshaping(kernel: GPy.kern.Kern) -> np.ndarray:
    """Compute the Cholesky decomposition for waveshaping synthesis.

    :param kernel: The GP kernel
    :return: The Cholesky decomposition
    """
    #  Remark: Since we are doing waveshaping, it is not necessary to consider
    #  periodic / non-periodic kernels separately.
    samples = 44100 / 20
    xs = np.arange(samples) * 2. * np.pi / samples
    xs = np.sin(xs)
    cov = kernel.K(xs[:, None], xs[:, None])
    chol = GPy.util.linalg.jitchol(cov)
    return chol
Beispiel #5
0
 def dgp_dmean(self, kern: GPy.kern.Kern, w_vec: np.ndarray, x: np.ndarray,
               X: np.ndarray) -> np.ndarray:
     """
     Partial derivatives of the gp posterior samples with respect to the posterior mean
     
     :param kern: Prior covariance matrix
     :param w_vec: woodbury vectors of the posterior of the model
     :param x: The locations the samples are taken at
     :param X: The locations used to train the GP model
     :return: the derivative of the gp samples with respect to the mean
     """
     N, b, d, n = w_vec.shape[0], x.shape[0], x.shape[1], X.shape[0]
     dkxX_dx = np.empty((b, n, d))
     dmu = np.zeros((N, b, b, d))
     for i in range(d):
         dkxX_dx[:, :, i] = kern.dK_dX(x, X, i)
         for j in range(b):
             dmu[:, j, j, i] = np.matmul(dkxX_dx[j, :, i][None, :],
                                         w_vec[:, :, None]).flatten()  # d
     return dmu  #N x b x b x d
Beispiel #6
0
def plot_samples(k: GPy.kern.Kern, directory: str) -> None:
    """Plots samples from a kernel.

    :param k: The kernel from which to draw samples
    :return: None
    """

    X = np.linspace(0., 10., 500)
    X = X[:, None]
    mu = np.zeros(500)
    C = k.K(X, X)
    Z = np.random.multivariate_normal(mu, C, 20)
    fig, ax = plt.subplots(1, 1)

    for i in range(3):
        c = colors[i]
        ls = linestyles[i]
        ax.plot(X[:], Z[i, :], color=c, linestyle=ls)

    ls = f'l{k.lengthscale[0]}'.replace('.', '_')
    path = os.path.join(directory, f"samples_{k.name}_{ls}.pdf")
    plt.savefig(path, bbox_inches='tight')
def ep_comparison(X: np.ndarray, y: List[Tuple[int, float]], yc: List[List[Tuple[int, int]]],
                  kern: GPy.kern.Kern, sigma2s: np.ndarray, max_itt: int=50, delta: float=0.9,
                  eta: float = 1.0, tol: float=1e-6, ga_approx_old: GaussianApproximation=None,
                  get_logger: Callable=None) -> Tuple[Posterior, int, Dict, GaussianApproximation]:
    """
    :param X: All locations of both direct observations and batch comparisons
    :param y: Direct observations as a list of tuples telling location index (row in X) and observation value.
    :param yc: Batch comparisons in a list of lists of tuples. Each batch is a list and tuples tell the comparisons (winner index, loser index)
    :param kern: GP kernel
    :param sigma2s: noise variance of observations
    :param max_itt: maximum number of iterations
    :param delta: damping updates factor.
    :param eta: parameter for fractional updates.
    :param tol: tolerance after which the EP is stopped unless too many iterations have passed
    :param ga_approx_old: If there has been previous gaussian approximation, it should be passed
    :param get_logger: Function for receiving the legger where the prints are forwarded.
    :return: A tuple consisting of the posterior approximation, log marginal likelihood, radient dictionary and gaussian approximations of the batches
    """

    t0 = time.time()    

    N = X.shape[0]
    Ndir = len(y)
    Ncomp = len(yc)
    ###################################################################################
    # Contruct observations and kernels
    ###################################################################################
    K = kern.K(X)

    ###################################################################################
    # Prepare marginal moments, site approximation and cavity containers
    ###################################################################################


    f_marg_moments = MarginalMoments(N)
    f_ga_approx = GaussianApproximation(np.zeros(N,dtype=np.float64), np.zeros((N,N),dtype=np.float64))
    f_cavity = CavityParams(N)

    # insert likelihood information to each gaussian approximation
    for i in range(Ndir):
        (ii,yi) = y[i] #index in kernel, y value
        f_ga_approx.v[ii] = yi / sigma2s[i]
        f_ga_approx.tau[ii,ii] = 1./sigma2s[i]

    if ga_approx_old is not None: #If there exists old gaussian approximation, we reuse it
        N_old = ga_approx_old.tau.shape[0]
        if N-N_old > -1:
            f_ga_approx.v[:N_old] = ga_approx_old.v
            f_ga_approx.tau[np.ix_(np.arange(N_old), np.arange(N_old))] = ga_approx_old.tau


    ###################################################################################
    # Prepare global approximations
    ###################################################################################
    f_post_params = update_posterior(K, f_ga_approx.v, f_ga_approx.tau, y, yc)
    if np.any(np.isnan(f_post_params.mu)):
        if get_logger is not None:
            get_logger().error('Posterior mean contains nan in the EP approximation')


    ###################################################################################
    # Iterate
    ###################################################################################
    for itt in range(max_itt):
        old_params = np.hstack((f_post_params.mu.copy(), f_post_params.Sigma_diag.copy()))

        if get_logger is not None:
            get_logger().info('Iteration %d' % (itt + 1))
        d_list = []
        if(len(yc)>0):
            d_list = np.random.choice(range(len(yc)), size=len(yc), replace=False)
        for d in d_list: #iterate through batches
            loc_inds_winners, loc_inds_loosers = [yc[d][k][0] for k in range(len(yc[d]))], [yc[d][k][1] for k in range(len(yc[d]))]
            loc_inds_batch = np.sort(np.unique(loc_inds_winners + loc_inds_loosers))
            # get relevant EP parameters for comparison points
            f_cavity._update_batch(eta=eta, ga_approx=f_ga_approx, post_params=f_post_params, batch=loc_inds_batch, get_logger=get_logger)

            try:
                #get cavity parameters of the batch
                ind_winners, ind_loosers = [np.where(loc_inds_batch == it)[0][0] for it in loc_inds_winners], [np.where(loc_inds_batch == it)[0][0] for it in loc_inds_loosers] # indices within a batch
                f_marg_moments.logZ_hat[loc_inds_batch], f_marg_moments.mu_hat[loc_inds_batch], f_marg_moments.sigma2_hat[np.ix_(loc_inds_batch, loc_inds_batch)], sigma2s[loc_inds_batch] = \
                     _match_moments_batch(f_cavity.v[loc_inds_batch], f_cavity.tau[np.ix_(loc_inds_batch, loc_inds_batch)], ind_winners, ind_loosers, sigma2s[loc_inds_batch],  N=100000, get_logger=get_logger)
            except AssertionError as e:
                if get_logger is not None:
                    get_logger().error('Numerical problem with feedback %d in iteration %d. Skipping update' % (d, itt))
            _, sigma2s[loc_inds_batch] = f_ga_approx._update_batch(eta=eta, delta=delta, post_params=f_post_params, marg_moments=f_marg_moments, batch=loc_inds_batch, get_logger=get_logger, sigma2s = sigma2s[loc_inds_batch])
        f_post_params = update_posterior(K, f_ga_approx.v, f_ga_approx.tau, y, yc, get_logger=get_logger)

        if np.any(np.isnan(f_post_params.mu)) or np.any(np.isnan(f_post_params.mu)):
            if get_logger is not None:
                get_logger().error('Posterior mean contains nan in the EP approximation')

        # check for convergence
        new_params = np.hstack((f_post_params.mu.copy(), f_post_params.Sigma_diag.copy()))
        converged = True
        if ( np.mean((new_params-old_params)**2)/np.mean(old_params**2) < tol):
            pass
        else:
            converged = False
        if converged:
            run_time = time.time() - t0
            if get_logger is not None:
                get_logger().info('Converged in %d iterations in %4.3fs' % (itt + 1, run_time))
            break
    #############################################################################3
    # Marginal likelihood & gradients
    #############################################################################3

    # compute normalization constant for likelihoods
    for i in range(Ndir):
        (ii,yi) = y[i] #index in kernel, y value
        f_cavity._update_i(eta=eta, ga_approx=f_ga_approx, post_params=f_post_params, i=ii)
        f_marg_moments.logZ_hat[ii] = log_npdf(yi, f_cavity.v[ii]/f_cavity.tau[ii,ii], 1./f_cavity.tau[ii,ii] + sigma2s[i])

    # marginal likelihood and gradient contribution from each f

    Z_tilde = _log_Z_tilde(f_marg_moments, f_ga_approx, f_cavity, y, yc)
    f_posterior, f_logZ, f_grad = _inference(K, f_ga_approx, f_cavity, Z_tilde, y, yc)
    return f_posterior, f_logZ, f_grad, f_ga_approx