Beispiel #1
0
 def compute_covariance(x: np.ndarray, kernel: RBF) -> tuple:
     assert x.ndim <= 2
     if x.ndim == 1:
         x = x.reshape(-1, 1)
     K_xx = kernel.K(x)
     K_xx_cho = jitchol(K_xx)
     cholesky_inv = np.linalg.inv(K_xx_cho)
     K_xx_inv = cholesky_inv.T @ cholesky_inv
     return K_xx, K_xx_inv
Beispiel #2
0
class ChangepointRBF(Kern):
    def __init__(self, input_dim, 
                 variance1=1., variance2=1., lengthscale1=1., lengthscale2=1., xc=1, 
                 active_dims=None):
        super(ChangepointRBF, self).__init__(input_dim, active_dims, 'chngpt')
        assert input_dim == 1, "For this kernel we assume input_dim = 1"
        self.variance1 = Param('variance1', variance1)
        self.variance2 = Param('variance2', variance2)
        self.lengthscale1 = Param('lengthscale1', lengthscale1)
        self.lengthscale2 = Param('lengthscale2', lengthscale2)
        self.rbf = RBF(input_dim=input_dim, lengthscale=1., variance=1.)
        self.xc = Param('xc', xc)
        self.add_parameters(self.variance1, self.variance2, self.lengthscale1, self.lengthscale2, self.xc)

    def parameters_changed(self):
        pass

    def K(self, X, X2):
        """Covariance matrix"""
        u1 = self.u(X)
        a1 = self.a(X)
        if X2 is None:
            u2 = u1
            a2 = a1
        else:
            u2 = self.u(X2)
            a2 = self.a(X2)
        return a1 * a2 * self.rbf.K(X=u1, X2=u2)

    def Kdiag(self, X):
        """Diagonal of covariance matrix"""
        u = self.u(X)
        a = self.a(X)
        return a * self.rbf.Kdiag(u)

    def u(self, X: np.ndarray):
        """u operation in the paper"""
        u = np.empty(X.shape)
        for i in X.shape[0]:
            if X[i] < self.xc: u[i] = X[i] / self.variance1
            else: u[i] = self.xc/self.variance1 + (X[i] - self.xc)/self.variance2
        return u

    def a(self, X: np.ndarray):
        """a operation in the paper"""
        a = np.empty(X.shape)
        for i in X.shape[0]:
            if X[i] < self.xc: a[i] = self.lengthscale1
            else: a[i] = self.lengthscale2
        return a
Beispiel #3
0
def get_data(kernel_name, variance_value=1.0, n_traces=3, lengthscale=1.0):
    n_dims = 100
    n_frames = 20
    #n_traces = 3

    x = np.linspace(0, 10, n_dims)[:, np.newaxis]

    if kernel_name == "RBF":
        kernel = RBF(input_dim=1,
                     variance=variance_value,
                     lengthscale=lengthscale)
    elif kernel_name == "Brownian":
        kernel = Brownian(input_dim=1, variance=variance_value)
    elif kernel_name == "Matern32":
        kernel = Matern32(input_dim=1, variance=variance_value)
    elif kernel_name == "Cosine":
        kernel = Cosine(input_dim=1, variance=variance_value)
    elif kernel_name == "Exponential":
        kernel = Exponential(input_dim=1, variance=variance_value)
    elif kernel_name == "Linear":
        kernel = Linear(input_dim=1)
    elif kernel_name == "GridRBF":
        kernel = GridRBF(input_dim=1, variance=variance_value)
    elif kernel_name == "MLP":
        kernel = MLP(input_dim=1, variance=variance_value)
    elif kernel_name == "PeriodicMatern32":
        kernel = PeriodicMatern32(input_dim=1, variance=variance_value)
    elif kernel_name == "Spline":
        kernel = Spline(input_dim=1, variance=variance_value)
    elif kernel_name == "White":
        kernel = White(input_dim=1, variance=variance_value)
    elif kernel_name == "StdPeriodic":
        kernel = StdPeriodic(input_dim=1, variance=variance_value)
    else:
        raise ValueError("Unknown Kernel name")

    kernel_matrix = kernel.K(x, x)

    gaussian_process_animation = GaussianProcessAnimation(kernel_matrix,
                                                          n_dims=n_dims,
                                                          n_frames=n_frames)
    frames = gaussian_process_animation.get_traces(n_traces)
    data = np.stack(frames).transpose((2, 0, 1))
    return data
Beispiel #4
0
from GPy.kern import RBF, Brownian, Cosine
import numpy as np
from numpy.linalg import eig
import matplotlib.pyplot as plt
from sklearn.decomposition import KernelPCA

kernel = RBF(input_dim=1, variance=2.0)
#kernel = Brownian(input_dim=1, variance=2.0)
#kernel = Cosine(input_dim=1, variance=2.0)

values = []
r = range(10, 300)
for n_dims in r:
    x = np.linspace(0, 10, n_dims)[:, np.newaxis]
    k = kernel.K(x, x)
    eigenvalues, eigenvectors = eig(k)
    first_eigenvalue = np.max(np.abs(eigenvalues))
    values.append(first_eigenvalue / n_dims)

plt.plot(list(r), values)
plt.show()
import matplotlib.pyplot as plt
from visualization import plot_gp, model_output

N = 50
noise_var = 0.01
X = np.zeros((N, 1))
x_half = int(N / 2)
X[:x_half, :] = np.linspace(0, 2,
                            x_half)[:,
                                    None]  # First cluster of inputs/covariates
X[x_half:, :] = np.linspace(
    8, 10, x_half)[:, None]  # Second cluster of inputs/covariates

rbf = RBF(input_dim=1)
mu = np.zeros(N)
cov = rbf.K(X) + np.eye(N) * np.sqrt(noise_var)
y = np.random.multivariate_normal(mu, cov).reshape(-1, 1)

# plt.scatter(X, y)
# plt.show()

gp_regression = GPRegression(X, y)
gp_regression.optimize(messages=True)
log_likelihood1 = gp_regression.log_likelihood()

model_output(gp_regression,
             title="GP Regression with loglikelihood: " + str(log_likelihood1))

#################################
# inducing variables, u. Each inducing variable has its own associated input index, Z, which lives in the same space as X.
Z = np.hstack((np.linspace(2.5, 4., 3), np.linspace(7, 8.5, 3)))[:, None]
Beispiel #6
0
class GPQuad(BayesianQuadratureTransform):
    def __init__(self, dim, unit_sp=None, hypers=None):
        super(GPQuad, self).__init__(dim, unit_sp, hypers)
        # GPy RBF kernel with given hypers
        self.kern = RBF(self.d,
                        variance=self.hypers['sig_var'],
                        lengthscale=self.hypers['lengthscale'],
                        ARD=True)

    def weights_rbf(self, unit_sp, hypers):
        # BQ weights for RBF kernel with given hypers, computations adopted from the GP-ADF code [Deisenroth] with
        # the following assumptions:
        #   (A1) the uncertain input is zero-mean with unit covariance
        #   (A2) one set of hyper-parameters is used for all output dimensions (one GP models all outputs)
        d, n = unit_sp.shape
        # GP kernel hyper-parameters
        alpha, el, jitter = hypers['sig_var'], hypers['lengthscale'], hypers[
            'noise_var']
        assert len(el) == d
        # pre-allocation for convenience
        eye_d, eye_n = np.eye(d), np.eye(n)
        iLam1 = np.atleast_2d(np.diag(el**-1))  # sqrt(Lambda^-1)
        iLam2 = np.atleast_2d(np.diag(el**-2))

        inp = unit_sp.T.dot(
            iLam1
        )  # sigmas / el[:, na] (x - m)^T*sqrt(Lambda^-1) # (numSP, xdim)
        K = np.exp(2 * np.log(alpha) - 0.5 * maha(inp, inp))
        iK = cho_solve(cho_factor(K + jitter * eye_n), eye_n)
        B = iLam2 + eye_d  # (D, D)
        c = alpha**2 / np.sqrt(det(B))
        t = inp.dot(inv(B))  # inn*(P + Lambda)^-1
        l = np.exp(-0.5 * np.sum(inp * t, 1))  # (N, 1)
        zet = 2 * np.log(alpha) - 0.5 * np.sum(inp * inp, 1)
        inp = inp.dot(iLam1)
        R = 2 * iLam2 + eye_d
        t = 1 / np.sqrt(det(R))
        L = np.exp((zet[:, na] + zet[:, na].T) +
                   maha(inp, -inp, V=0.5 * inv(R)))
        q = c * l  # evaluations of the kernel mean map (from the viewpoint of RHKS methods)
        # mean weights
        wm_f = q.dot(iK)
        iKQ = iK.dot(t * L)
        # covariance weights
        wc_f = iKQ.dot(iK)
        # cross-covariance "weights"
        wc_fx = np.diag(q).dot(iK)
        # used for self.D.dot(x - mean).dot(wc_fx).dot(fx)
        self.D = inv(eye_d +
                     np.diag(el**2))  # S(S+Lam)^-1; for S=I, (I+Lam)^-1
        # model variance; to be added to the covariance
        # this diagonal form assumes independent GP outputs (cov(f^a, f^b) = 0 for all a, b: a neq b)
        self.model_var = np.diag((alpha**2 - np.trace(iKQ)) * np.ones((d, 1)))
        return wm_f, wc_f, wc_fx

    def plot_gp_model(self,
                      f,
                      unit_sp,
                      args,
                      test_range=(-5, 5, 50),
                      plot_dims=(0, 0)):
        # plot out_dim vs. in_dim
        in_dim, out_dim = plot_dims
        # test input must have the same dimension as specified in kernel
        test = np.linspace(*test_range)
        test_pts = np.zeros((self.d, len(test)))
        test_pts[in_dim, :] = test
        # function value observations at training points (unit sigma-points)
        y = np.apply_along_axis(f, 0, unit_sp, args)
        fx = np.apply_along_axis(f, 0, test_pts,
                                 args)  # function values at test points
        K = self.kern.K(unit_sp.T)  # covariances between sigma-points
        k = self.kern.K(
            test_pts.T,
            unit_sp.T)  # covariance between test inputs and sigma-points
        kxx = self.kern.Kdiag(test_pts.T)  # prior predictive variance
        k_iK = cho_solve(cho_factor(K), k.T).T
        gp_mean = k_iK.dot(y[out_dim, :])  # GP mean
        gp_var = np.diag(np.diag(kxx) -
                         k_iK.dot(k.T))  # GP predictive variance
        # plot the GP mean, predictive variance and the true function
        plt.figure()
        plt.plot(test, fx[out_dim, :], color='r', ls='--', lw=2, label='true')
        plt.plot(test, gp_mean, color='b', ls='-', lw=2, label='GP mean')
        plt.fill_between(test,
                         gp_mean + 2 * np.sqrt(gp_var),
                         gp_mean - 2 * np.sqrt(gp_var),
                         color='b',
                         alpha=0.25,
                         label='GP variance')
        plt.plot(unit_sp[in_dim, :],
                 y[out_dim, :],
                 color='k',
                 ls='',
                 marker='o',
                 ms=8,
                 label='data')
        plt.legend()
        plt.show()

    def _weights(self, sigma_points, hypers):
        return self.weights_rbf(sigma_points, hypers)

    def _fcn_eval(self, fcn, x, fcn_pars):
        return np.apply_along_axis(fcn, 0, x, fcn_pars)

    def _mean(self, weights, fcn_evals):
        return fcn_evals.dot(weights)

    def _covariance(self, weights, fcn_evals, mean_out):
        return fcn_evals.dot(weights).dot(fcn_evals.T) - np.outer(
            mean_out, mean_out.T) + self.model_var

    def _cross_covariance(self, weights, fcn_evals, x, mean_out, mean_in):
        return fcn_evals.dot(weights.T).dot((x - mean_in).T).dot(self.D)

    def _int_var_rbf(self, X, hyp, jitter=1e-8):
        """
        Posterior integral variance of the Gaussian Process quadrature.
        X - vector (1, 2*xdim**2+xdim)
        hyp - kernel hyperparameters [s2, el_1, ... el_d]
        """
        # reshape X to SP matrix
        X = np.reshape(X, (self.n, self.d))
        # set kernel hyper-parameters
        s2, el = hyp[0], hyp[1:]
        self.kern.param_array[0] = s2  # variance
        self.kern.param_array[1:] = el  # lengthscale
        K = self.kern.K(X)
        L = np.diag(el**2)
        # posterior variance of the integral
        ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal(
            mean=np.zeros(self.d), cov=L).pdf(X)
        postvar = -ks.dot(solve(K + jitter * np.eye(self.n), ks.T))
        return postvar

    def _int_var_rbf_hyp(self, hyp, X, jitter=1e-8):
        """
        Posterior integral variance as a function of hyper-parameters
        :param hyp: RBF kernel hyper-parameters [s2, el_1, ..., el_d]
        :param X: sigma-points
        :param jitter: numerical jitter (for stabilizing computations)
        :return: posterior integral variance
        """
        # reshape X to SP matrix
        X = np.reshape(X, (self.n, self.d))
        # set kernel hyper-parameters
        s2, el = 1, hyp  # sig_var hyper always set to 1
        self.kern.param_array[0] = s2  # variance
        self.kern.param_array[1:] = el  # lengthscale
        K = self.kern.K(X)
        L = np.diag(el**2)
        # posterior variance of the integral
        ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal(
            mean=np.zeros(self.d), cov=L).pdf(X)
        postvar = s2 * np.sqrt(det(2 * inv(L) + np.eye(self.d)))**-1 - ks.dot(
            solve(K + jitter * np.eye(self.n), ks.T))
        return postvar

    def _min_var_sigmas(self):
        # solver options
        op = {'disp': True}
        # bounds based on input unit Gaussian (-2*std, +2std)
        bnds = tuple((-2, 2) for i in range(self.n * self.d))
        hyp = np.hstack((self.hypers['sig_var'], self.hypers['lengthscale']))
        # unconstrained
        #        res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op)
        #        res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op)
        res = minimize(self._int_var_rbf,
                       self.unit_sp,
                       args=hyp,
                       method='L-BFGS-B',
                       bounds=bnds,
                       options=op)
        return res.x

    def _min_var_hypers(self):
        """
        Finds kernel hyper-parameters minimizing the posterior integral variance.
        :return: optimized kernel hyper-parameters
        """
        # solver options
        op = {'disp': True}
        hyp = self.hypers[
            'lengthscale']  # np.hstack((self.hypers['sig_var'], self.hypers['lengthscale']))
        # bounds based on input unit Gaussian (-2*std, +2std)
        bnds = tuple((1e-3, 1000) for i in range(len(hyp)))
        # unconstrained
        #        res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op)
        #        res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op)
        res = minimize(self._int_var_rbf_hyp,
                       hyp,
                       args=self.unit_sp,
                       method='L-BFGS-B',
                       bounds=bnds,
                       options=op)
        return res.x

    def _min_logmarglik_hypers(self):
        # finds hypers by maximizing the marginal likelihood (empirical bayes)
        # the multiple output dimensions should be reflected in the log marglik
        pass

    def _min_intvar_logmarglik_hypers(self):
        # finds hypers by minimizing the sum of log-marginal likelihood and the integral variance objectives
        pass
Beispiel #7
0
    def _compute_mean(
        prior: Union[Gaussian, Gaussian1D],
        gp: GP,
        kernel: RBF,
        X_D: np.ndarray = None,
        Y_D: Union[np.ndarray, int] = None,
    ):
        """
        Compute the mean (i.e. expectation) of the integral
        :param prior: Prior
        :param gp: GP
        :param kernel: type of kernel - for now only the RBF kernel is supported
        :param X_D: Query points - if this argument is not supplied the evaluated points of the Gaussian process will
        be used
        :param Y_D: The functional value at X_D. Note that -1 is a special value. If Y_D is -1 is supplied, we are
        not interested in finding out the integral expectation but rather only interested in finding the inverse of the
        covariance matrix and value of vector n_s
        :return: mean: mean value of the integral, K_xx_inv: inverse of the full covariance matrix,n_s: the vector
        defined in Equation 7.1.7 in Mike's DPhil dissertation
        """
        from GPy.util.linalg import jitchol
        # w, h are the lengthscale and variance of the RBF kernel - see Equation 7.1.4 in Mike's DPhil Dissertation

        w = kernel.lengthscale.values
        h = kernel.variance.values[0]

        #print("kerLengthScale: ", kernel.lengthscale.values[0], 'kerVar: ', kernel.variance.values[0])
        # print("w: ", w, "h: ", h)
        if X_D is None:
            X_D = gp._gpy_gp.X
        if Y_D is None:
            Y_D = gp._gpy_gp.Y
        n, d = X_D.shape
        # n: number of samples, d: dimensionality of each sample
        # print("X_D: ", X_D, "Y_D: ", Y_D)

        if isinstance(prior, Gaussian1D):
            mu = prior.matrix_mean
            sigma = prior.matrix_variance
        else:
            mu = prior.mean
            sigma = prior.covariance

        # Defined in Equations 7.1.7
        n_s = np.zeros((n, ))

        if d == 1:
            W = float(sigma) + w**2
            mu = np.asscalar(mu)
            for i in range(n):
                n_s[i] = h * norm._pdf_point_est(
                    X_D[i, :], loc=mu, scale=np.sqrt(W))
        else:
            if len(w) > 1:
                assert len(w) == d
                w = np.diag(w)
            else:
                w = np.diag(np.array([w] * d))
            W = sigma + w
            for i in range(n):
                n_s[i] = h * multivariate_normal._pdf_point_est(
                    X_D[i, :], mean=mu, cov=W)
        K_xx = kernel.K(X_D)
        # Find the inverse of K_xx matrix via Cholesky decomposition (with jitter)

        K_xx_cho = jitchol(K_xx, )
        choleksy_inverse = np.linalg.inv(K_xx_cho)
        K_xx_inv = choleksy_inverse.T @ choleksy_inverse

        if isinstance(Y_D, int) and Y_D == -1:
            return np.nan, K_xx_inv, n_s
        else:
            mean = n_s.T @ K_xx_inv @ Y_D
            return mean, K_xx_inv, n_s