def __call__(self, X, Y=None, eval_gradient=False):
        """Return the kernel k(X, Y) and optionally its gradient.
        Parameters
        ----------
        X : array, shape (n_samples_X, n_features)
            Left argument of the returned kernel k(X, Y)
        Y : array, shape (n_samples_Y, n_features), (optional, default=None)
            Right argument of the returned kernel k(X, Y). If None, k(X, X)
            if evaluated instead.
        eval_gradient : bool (optional, default=False)
            Determines whether the gradient with respect to the kernel
            hyperparameter is determined. Only supported when Y is None.
        Returns
        -------
        K : array, shape (n_samples_X, n_samples_Y)
            Kernel k(X, Y)
        K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
            The gradient of the kernel k(X, X) with respect to the
            hyperparameter of the kernel. Only returned when eval_gradient
            is True.
        """
        prototypes_std = self.prototypes.std(0)
        n_prototypes = self.prototypes.shape[0]
        n_gradient_dim = n_prototypes + (0 if self.hyperparameter_gamma.fixed
                                         else 1)

        X = np.atleast_2d(X)
        if Y is not None and eval_gradient:
            raise ValueError("Gradient can only be evaluated when Y is None.")

        if Y is None:
            K = np.eye(X.shape[0]) * self.diag(X)
            if eval_gradient:
                K_gradient = np.zeros((K.shape[0], K.shape[0], n_gradient_dim))
                K_pairwise = pairwise_kernels(
                    self.prototypes / prototypes_std,
                    X / prototypes_std,
                    metric="rbf",
                    gamma=self.gamma,
                )
                for i in range(n_prototypes):
                    for j in range(K.shape[0]):
                        K_gradient[j, j,
                                   i] = (self.sigma_2[i] * K_pairwise[i, j] /
                                         K_pairwise[:, j].sum())
                if not self.hyperparameter_gamma.fixed:
                    # XXX: Analytic expression for gradient?
                    def f(gamma):  # helper function
                        theta = self.theta.copy()
                        theta[-1] = gamma[0]
                        return self.clone_with_theta(theta)(X, Y)

                    K_gradient[:, :, -1] = _approx_fprime([self.theta[-1]], f,
                                                          1e-5)[:, :, 0]
                return K, K_gradient
            else:
                return K
        else:
            K = np.zeros((X.shape[0], Y.shape[0]))
            return K  # XXX: similar entries?
    def __call__(self, XX1, XX2=None, eval_gradient=False):
        """Return the kernel k(XX1, XX2) and optionally its gradient.

        Parameters
        ----------
        XX1 : array, shape (n_samples_XX1, n_features)
            Left argument of the returned kernel k(XX1, XX2)

        XX2 : array, shape (n_samples_XX2, n_features), (optional, default=None)
            Right argument of the returned kernel k(XX1, XX2). If None, 
            k(XX1, XX1) is evaluated instead.

        eval_gradient : bool (optional, default=False)
            Determines whether the gradient with respect to the kernel
            hyperparameter is determined. Only supported when XX2 is None.

        Returns
        -------
        K : array, shape (n_samples_XX1, n_samples_XX2)
            Kernel k(XX1, XX2)

        K_gradient : array (opt.), shape (n_samples_XX1, n_samples_XX1, n_dims)
            The gradient of the kernel k(XX1, XX1) with respect to the
            hyperparameter of the kernel. Only returned when eval_gradient
            is True.
        """
        XX1 = np.atleast_2d(XX1)
        length_scale = _check_length_scale(XX1, self.length_scale)
        if XX2 is None:
            K = full_kernel(XX1,
                            length_scale,
                            self.n_XX_func,
                            return_code=self.return_code)
        else:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated when XX2 is None.")
            K = full_kernel(XX1, length_scale, self.n_XX_func, XX2,
                            self.return_code)
        print(K.shape, 'KK')
        if not eval_gradient:
            return K

        if self.hyperparameter_length_scale.fixed:
            # Hyperparameter l kept fixed
            length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
        else:
            # approximate gradient numerically
            def f(gamma):  # helper function
                return full_kernel(XX1,
                                   gamma,
                                   self.n_XX_func,
                                   return_code=self.return_code)

            length_scale = np.atleast_1d(length_scale)
            length_scale_gradient = _approx_fprime(length_scale, f, 1e-8)
        return K, length_scale_gradient
    def __call__(self, X, Y=None, eval_gradient=False):
        l_train = 10**self.gp_l.predict(X)

        # Prepare distances and length scale information for any pair of
        # datapoints, whose correlation shall be computed
        if Y is not None:
            # Get pairwise componentwise L1-differences to the input training
            # set
            d = Y[:, np.newaxis, :] - X[np.newaxis, :, :]
            d = d.reshape((-1, Y.shape[1]))
            # Predict length scales for query datapoints
            l_query = 10**self.gp_l.predict(Y)
            l = np.transpose([
                np.tile(l_train, len(l_query)),
                np.repeat(l_query, len(l_train))
            ])
        else:
            # No external datapoints given; auto-correlation of training set
            # is used instead
            d = X[:, np.newaxis, :] - X[np.newaxis, :, :]
            d = d.reshape((-1, X.shape[1]))
            l = np.transpose([
                np.tile(l_train, len(l_train)),
                np.repeat(l_train, len(l_train))
            ])  # XXX: check

        # Compute general Matern kernel
        if d.ndim > 1 and self.theta_gp.size == d.ndim:
            activation = \
                np.sum(self.theta_gp.reshape(1, d.ndim) * d ** 2, axis=1)
        else:
            activation = self.theta_gp[0] * np.sum(d**2, axis=1)
        tmp = 0.5 * (l**2).sum(1)
        tmp2 = np.maximum(2 * np.sqrt(self.nu * activation / tmp), 1e-5)
        k = np.sqrt(l[:, 0]) * np.sqrt(l[:, 1]) \
            / (gamma(self.nu) * 2**(self.nu - 1))
        k /= np.sqrt(tmp)
        k *= tmp2**self.nu * kv(self.nu, tmp2)

        # Convert correlations to 2d matrix
        if Y is not None:
            return k.reshape(-1, X.shape[0]).T
        else:  # exploit symmetry of auto-correlation
            K = k.reshape(X.shape[0], X.shape[0])
            if not eval_gradient:
                return K
            else:
                # approximate gradient numerically
                # XXX: computed gradient analytically?
                def f(theta):  # helper function
                    return self.clone_with_theta(theta)(X, Y)

                return K, _approx_fprime(self.weights, f, 1e-7)
    def __call__(self, X, Y=None, eval_gradient=False):
        l_train = 10 ** self.gp_l.predict(X)

        # Prepare distances and length scale information for any pair of
        # datapoints, whose correlation shall be computed
        if Y is not None:
            # Get pairwise componentwise L1-differences to the input training
            # set
            d = Y[:, np.newaxis, :] - X[np.newaxis, :, :]
            d = d.reshape((-1, Y.shape[1]))
            # Predict length scales for query datapoints
            l_query = 10 ** self.gp_l.predict(Y)
            l = np.transpose([np.tile(l_train, len(l_query)),
                              np.repeat(l_query, len(l_train))])
        else:
            # No external datapoints given; auto-correlation of training set
            # is used instead
            d = X[:, np.newaxis, :] - X[np.newaxis, :, :]
            d = d.reshape((-1, X.shape[1]))
            l = np.transpose([np.tile(l_train, len(l_train)),
                              np.repeat(l_train, len(l_train))])  # XXX: check

        # Compute general Matern kernel
        if d.ndim > 1 and self.theta_gp.size == d.ndim:
            activation = \
                np.sum(self.theta_gp.reshape(1, d.ndim) * d ** 2, axis=1)
        else:
            activation = self.theta_gp[0] * np.sum(d ** 2, axis=1)
        tmp = 0.5 * (l**2).sum(1)
        tmp2 = np.maximum(2*np.sqrt(self.nu * activation / tmp), 1e-5)
        k = np.sqrt(l[:, 0]) * np.sqrt(l[:, 1]) \
            / (gamma(self.nu) * 2**(self.nu - 1))
        k /= np.sqrt(tmp)
        k *= tmp2**self.nu * kv(self.nu, tmp2)

        # Convert correlations to 2d matrix
        if Y is not None:
            return k.reshape(-1, X.shape[0]).T
        else:  # exploit symmetry of auto-correlation
            K = k.reshape(X.shape[0], X.shape[0])
            if not eval_gradient:
                return K
            else:
                # approximate gradient numerically
                def f(theta):  # helper function
                    import copy  # XXX: Avoid deepcopy
                    kernel = copy.deepcopy(self)
                    kernel.theta = theta
                    return kernel(X)
                return K, _approx_fprime(self.params, f, 1e-5)
Exemple #5
0
    def __call__(self, XX1, XX2=None, eval_gradient=False):
        """Return the kernel k(XX1, XX2) and optionally its gradient.

        Parameters
        ----------
        XX1 : array, shape (n_samples_XX1, n_features)
            Left argument of the returned kernel k(XX1, XX2)

        XX2 : array, shape (n_samples_XX2, n_features), (optional, default=None)
            Right argument of the returned kernel k(XX1, XX2). If None, 
            k(XX1, XX1) is evaluated instead.

        Returns
        -------
        K : array, shape (n_samples_XX1, n_samples_XX2)
            Kernel k(XX1, XX2)

        K_gradient : array (opt.), shape (n_samples_XX1, n_samples_XX1, n_dims)
            The gradient of the kernel k(XX1, XX1) with respect to the
            hyperparameter of the kernel. Only returned when eval_gradient
            is True.
        """
        XX1 = np.atleast_2d(XX1)
        hyperparams = np.squeeze(self.length_scale).astype(float)
        if XX2 is None:
            K = full_multilevel_kernel(XX1, hyperparams,
                                       self.nsamples_per_model,
                                       self.return_code != 'full')
        else:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated when XX2 is None.")
            K = full_multilevel_kernel_for_prediction(XX1, XX2, hyperparams,
                                                      self.nsamples_per_model)
        if not eval_gradient:
            return K

        if self.hyperparameter_length_scale.fixed:
            # Hyperparameter l kept fixed
            length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
        else:
            # approximate gradient numerically
            def f(gamma):  # helper function
                return full_multilevel_kernel(XX1, gamma,
                                              self.nsamples_per_model)

            length_scale = np.atleast_1d(self.length_scale)
            length_scale_gradient = _approx_fprime(length_scale, f, 1e-8)

        return K, length_scale_gradient
Exemple #6
0
def test_kernel_gradient(kernel):
    # Compare analytic and numeric gradient of kernels.
    K, K_gradient = kernel(X, eval_gradient=True)

    assert_equal(K_gradient.shape[0], X.shape[0])
    assert_equal(K_gradient.shape[1], X.shape[0])
    assert_equal(K_gradient.shape[2], kernel.theta.shape[0])

    def eval_kernel_for_theta(theta):
        kernel_clone = kernel.clone_with_theta(theta)
        K = kernel_clone(X, eval_gradient=False)
        return K

    K_gradient_approx = \
        _approx_fprime(kernel.theta, eval_kernel_for_theta, 1e-10)

    assert_almost_equal(K_gradient, K_gradient_approx, 4)
def test_kernel_gradient(kernel):
    # Compare analytic and numeric gradient of kernels.
    K, K_gradient = kernel(X, eval_gradient=True)

    assert_equal(K_gradient.shape[0], X.shape[0])
    assert_equal(K_gradient.shape[1], X.shape[0])
    assert_equal(K_gradient.shape[2], kernel.theta.shape[0])

    def eval_kernel_for_theta(theta):
        kernel_clone = kernel.clone_with_theta(theta)
        K = kernel_clone(X, eval_gradient=False)
        return K

    K_gradient_approx = \
        _approx_fprime(kernel.theta, eval_kernel_for_theta, 1e-10)

    assert_almost_equal(K_gradient, K_gradient_approx, 4)
 def __call__(self, X, Y=None, eval_gradient=False):
     X_nn = self._project_manifold(X)
     if Y is None:
         K = self.base_kernel(X_nn)
         if not eval_gradient:
             return K
         else:
             # approximate gradient numerically
             # XXX: Analytic expression for gradient based on chain rule and
             #      backpropagation?
             def f(theta):  # helper function
                 return self.clone_with_theta(theta)(X, Y)
             return K, _approx_fprime(self.theta, f, 1e-5)
     else:
         if eval_gradient:
             raise ValueError(
                 "Gradient can only be evaluated when Y is None.")
         Y_nn = self._project_manifold(Y)
         return self.base_kernel(X_nn, Y_nn)
    def test_fd_kernel_1d(self):
        num_pts = 4
        length_scale = 1
        Xf = np.linspace(0., 1., num_pts)
        Y = Xf.copy()
        K_ff = kernel_ff(Xf[:, np.newaxis], Xf[:, np.newaxis], length_scale)
        K_fd_fd = np.zeros_like(K_ff)
        for ii in range(num_pts):

            def f(x):
                Xf[ii] = x[0]
                return kernel_ff(Xf[:, np.newaxis], Y[:, np.newaxis],
                                 length_scale)

            length_scale_gradient = _approx_fprime([Xf[ii]], f, 1e-8)
            K_fd_fd += length_scale_gradient.reshape(K_ff.shape)
            #print length_scale_gradient.reshape(K_ff.shape)
        K_fd = kernel_fd(Xf[:, np.newaxis], Xf[:, np.newaxis], length_scale, 0)
        assert np.allclose(K_fd, K_fd_fd)
 def __call__(self, X, Y=None, eval_gradient=False):
     X_nn = self._project_manifold(X)
     if Y is None:
         K = self.base_kernel(X_nn)
         if not eval_gradient:
             return K
         else:
             # approximate gradient numerically
             # XXX: Analytic expression for gradient based on chain rule and
             #      backpropagation?
             def f(theta):  # helper function
                 return self.clone_with_theta(theta)(X, Y)
             return K, _approx_fprime(self.theta, f, 1e-5)
     else:
         if eval_gradient:
             raise ValueError(
                 "Gradient can only be evaluated when Y is None.")
         Y_nn = self._project_manifold(Y)
         return self.base_kernel(X_nn, Y_nn)
def test_weighted_white_kernel_gradient():
    # Compare analytic and numeric gradient of the kernel:
    N = 3
    X = np.random.RandomState(0).normal(0, 1, (N, 1))
    weight = np.exp(np.random.RandomState(0).normal(0, 1, N))
    kernel = WeightedWhiteKernel(noise_weight=1. / weight, noise_level=0.1)
    K, K_gradient = kernel(X, eval_gradient=True)

    assert_equal(K_gradient.shape[0], X.shape[0])
    assert_equal(K_gradient.shape[1], X.shape[0])
    assert_equal(K_gradient.shape[2], kernel.theta.shape[0])

    def eval_kernel_for_theta(theta):
        kernel_clone = kernel.clone_with_theta(theta)
        K = kernel_clone(X, eval_gradient=False)
        return K

    K_gradient_approx = \
        _approx_fprime(kernel.theta, eval_kernel_for_theta, 1e-10)

    assert_almost_equal(K_gradient, K_gradient_approx, 4)
    def __call__(self, X, Y=None, eval_gradient=False):
        """Return the kernel k(X, Y) and optionally its gradient.

        Parameters
        ----------
        X : array, shape (n_samples_X, n_features)
            Left argument of the returned kernel k(X, Y)

        Y : array, shape (n_samples_Y, n_features), (optional, default=None)
            Right argument of the returned kernel k(X, Y). If None, k(X, X)
            if evaluated instead.

        eval_gradient : bool (optional, default=False)
            Determines whether the gradient with respect to the kernel
            hyperparameter is determined. Only supported when Y is None.

        Returns
        -------
        K : array, shape (n_samples_X, n_samples_Y)
            Kernel k(X, Y)

        K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
            The gradient of the kernel k(X, X) with respect to the
            hyperparameter of the kernel. Only returned when eval_gradient
            is True.
        """
        prototypes_std = self.prototypes.std(0)
        n_prototypes = self.prototypes.shape[0]
        n_gradient_dim = \
            n_prototypes + (0 if self.hyperparameter_gamma.fixed else 1)

        X = np.atleast_2d(X)
        if Y is not None and eval_gradient:
            raise ValueError("Gradient can only be evaluated when Y is None.")

        if Y is None:
            K= np.eye(X.shape[0]) * self.diag(X)
            if eval_gradient:
                K_gradient = \
                    np.zeros((K.shape[0], K.shape[0], n_gradient_dim))
                K_pairwise = \
                    pairwise_kernels(self.prototypes / prototypes_std,
                                     X / prototypes_std,
                                     metric="rbf", gamma=self.gamma)
                for i in range(n_prototypes):
                    for j in range(K.shape[0]):
                        K_gradient[j, j, i] = \
                            self.sigma_2[i] * K_pairwise[i, j] \
                            / K_pairwise[:, j].sum()
                if not self.hyperparameter_gamma.fixed:
                    # XXX: Analytic expression for gradient?
                    def f(gamma):  # helper function
                        theta = self.theta.copy()
                        theta[-1] = gamma[0]
                        return self.clone_with_theta(theta)(X, Y)
                    K_gradient[:, :, -1] = \
                        _approx_fprime([self.theta[-1]], f, 1e-5)[:, :, 0]
                return K, K_gradient
            else:
                return K
        else:
            K = np.zeros((X.shape[0], Y.shape[0]))
            return K   # XXX: similar entries?
Exemple #13
0
    def __call__(self, X, Y=None, eval_gradient=False):
        """Return the kernel k(X, Y) and optionally its gradient.

        Parameters
        ----------
        X : array, shape (n_samples_X, n_features)
            Left argument of the returned kernel k(X, Y)

        Y : array, shape (n_samples_Y, n_features), (optional, default=None)
            Right argument of the returned kernel k(X, Y). If None, k(X, X)
            if evaluated instead.

        eval_gradient : bool (optional, default=False)
            Determines whether the gradient with respect to the kernel
            hyperparameter is determined. Only supported when Y is None.

        Returns
        -------
        K : array, shape (n_samples_X, n_samples_Y)
            Kernel k(X, Y)

        K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
            The gradient of the kernel k(X, X) with respect to the
            hyperparameter of the kernel. Only returned when eval_gradient
            is True.
        """
        X_values = X[:, 0].reshape(-1, 1)

        length_scale = _check_length_scale(X_values, self.length_scale)
        if Y is None:
            dists = pdist(X_values / length_scale, metric='euclidean')
        else:
            Y_values = Y[:, 0].reshape(-1, 1)
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated when Y is None.")
            dists = cdist(X_values / length_scale,
                          Y_values / length_scale,
                          metric='euclidean')

        if self.nu == 0.5:
            K = np.exp(-dists)
        elif self.nu == 1.5:
            K = dists * math.sqrt(3)
            K = (1. + K) * np.exp(-K)
        elif self.nu == 2.5:
            K = dists * math.sqrt(5)
            K = (1. + K + K**2 / 3.0) * np.exp(-K)
        else:  # general case; expensive to evaluate
            K = dists
            K[K == 0.0] += np.finfo(float).eps  # strict zeros result in nan
            tmp = (math.sqrt(2 * self.nu) * K)
            K.fill((2**(1. - self.nu)) / gamma(self.nu))
            K *= tmp**self.nu
            K *= kv(self.nu, tmp)

        if Y is None:
            # convert from upper-triangular matrix to square matrix
            K = squareform(K)
            np.fill_diagonal(K, 1)

        if eval_gradient:
            if self.hyperparameter_length_scale.fixed:
                # Hyperparameter l kept fixed
                K_gradient = np.empty(
                    (X_values.shape[0], X_values.shape[0], 0))
                return K, K_gradient

            # We need to recompute the pairwise dimension-wise distances
            if self.anisotropic:
                D = (X_values[:, np.newaxis, :] - X_values[np.newaxis, :, :]) ** 2 \
                    / (length_scale ** 2)
            else:
                D = squareform(dists**2)[:, :, np.newaxis]

            if self.nu == 0.5:
                K_gradient = K[..., np.newaxis] * D \
                             / np.sqrt(D.sum(2))[:, :, np.newaxis]
                K_gradient[~np.isfinite(K_gradient)] = 0
            elif self.nu == 1.5:
                K_gradient = \
                    3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]
            elif self.nu == 2.5:
                tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]
                K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)
            else:
                # approximate gradient numerically
                def f(theta):  # helper function
                    return self.clone_with_theta(theta)(X_values, Y_values)

                return K, _approx_fprime(self.theta, f, 1e-10)

            if not self.anisotropic:
                return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]
            else:
                return K, K_gradient
        else:
            return K