Beispiel #1
0
def test_rbf_kernel_gram_1d():

    rng = onp.random.RandomState(123)
    n_samples = 100

    X = rng.rand(n_samples)

    # X
    K_sk = rbf_sklearn(X[:, np.newaxis], X[:, np.newaxis], gamma=1.0)

    K = gram(rbf_kernel, {"gamma": 1.0}, X, X)

    onp.testing.assert_array_almost_equal(K_sk, onp.array(K))

    Y = 10 * X + 0.1 * rng.randn(n_samples)

    # Y
    K_sk = rbf_sklearn(Y[:, np.newaxis], Y[:, np.newaxis], gamma=1.0)

    K = gram(rbf_kernel, {"gamma": 1.0}, Y, Y)

    onp.testing.assert_array_almost_equal(K_sk, onp.array(K))

    # X AND Y
    K_sk = rbf_sklearn(X[:, np.newaxis], Y[:, np.newaxis], gamma=1.0)

    K = gram(rbf_kernel, {"gamma": 1.0}, X, Y)

    onp.testing.assert_array_almost_equal(K_sk, onp.array(K))
Beispiel #2
0
def mmd(
    X: np.ndarray,
    Y: np.ndarray,
    kernel: Callable,
    params_x: Dict[str, float],
    params_y: Dict[str, float],
    params_xy: Dict[str, float],
    bias: bool = False,
    center: bool = False,
) -> float:
    """Maximum Mean Discrepancy

    Parameters
    ----------
    X : jax.numpy.ndarray
        array-like of shape (n_samples, n_features)
    Y : np.ndarray
        The data matrix.

    Notes
    -----

        This method is equivalent to the HSIC method.
    """
    n_samples, m_samples = X.shape[0], Y.shape[0]

    # constants
    a00 = 1.0 / (n_samples * (n_samples - 1.0))
    a11 = 1.0 / (m_samples * (m_samples - 1.0))
    a01 = -1.0 / (n_samples * m_samples)

    # kernel matrices
    Kx = gram(kernel, params_x, X, X)
    Ky = gram(kernel, params_y, Y, Y)
    Kxy = gram(kernel, params_xy, X, Y)

    if bias:
        mmd = np.mean(Kx) + np.mean(Ky) - 2 * np.mean(Kxy)
        return np.where(mmd >= 0.0, np.sqrt(mmd), 0.0)
    else:
        return (
            2 * a01 * np.mean(Kxy)
            + a00 * (np.sum(Kx) - n_samples)
            + a11 * (np.sum(Ky) - m_samples)
        )
Beispiel #3
0
def _hsic_uncentered(
    X: np.ndarray,
    Y: np.ndarray,
    kernel: Callable,
    params_x: Dict[str, float],
    params_y: Dict[str, float],
) -> float:
    """A method to calculate the uncentered HSIC version"""
    # kernel matrix
    Kx = gram(kernel, params_x, X, X)
    Ky = gram(kernel, params_y, Y, Y)

    #
    K = np.dot(Kx, Ky.T)

    hsic_value = np.mean(K)

    return hsic_value
Beispiel #4
0
def test_rbf_kernel_cov_2d():

    X = onp.random.rand(100, 2)

    # sklearn rbf_kernel
    K_sk = rbf_sklearn(X, X, gamma=1.0)

    K = gram(rbf_kernel, {"gamma": 1.0}, X, X)

    onp.testing.assert_array_almost_equal(K_sk, onp.array(K))
Beispiel #5
0
def mmd_mi(
    X: np.ndarray,
    Y: np.ndarray,
    kernel: Callable,
    params_x: Dict[str, float],
    params_y: Dict[str, float],
) -> float:
    """Maximum Mean Discrepancy

    Parameters
    ----------
    X : jax.numpy.ndarray
        array-like of shape (n_samples, n_features)
    Y : np.ndarray
        The data matrix.

    Notes
    -----

        This method is equivalent to the HSIC method.
    """
    # calculate kernel matrices
    Kx = gram(kernel, params_x, X, X)
    Ky = gram(kernel, params_y, Y, Y)

    # center kernel matrices
    Kx = centering(Kx)
    Ky = centering(Ky)

    # get the expectrations
    A = np.mean(Kx * Ky)
    B = np.mean(np.mean(Kx, axis=0) * np.mean(Ky, axis=0))
    C = np.mean(Kx) * np.mean(Ky)

    # calculate the mmd value
    mmd_value = A - 2 * B + C

    return mmd_value
Beispiel #6
0
def test_centering():

    n_samples = 100

    X = onp.random.rand(n_samples)

    # sklearn rbf_kernel
    K_sk = rbf_sklearn(X[:, np.newaxis], X[:, np.newaxis], gamma=1.0)

    K_sk = KernelCenterer().fit_transform(K_sk)

    K = gram(rbf_kernel, {"gamma": 1.0}, X, X)
    # H = np.eye(n_samples) - (1.0 / n_samples) * np.ones((n_samples, n_samples))
    # K = np.einsum("ij,jk,kl->il", H, K, H)
    # K = np.dot(H, np.dot(K, H))
    K = centering(K)

    onp.testing.assert_array_almost_equal(K_sk, onp.array(K))
Beispiel #7
0
def test_rbf_kernel_gram_2d():

    rng = onp.random.RandomState(123)
    n_samples, n_features = 100, 2
    X = onp.random.rand(n_samples, n_features)

    # sklearn rbf_kernel
    K_sk = rbf_sklearn(X, X, gamma=1.0)

    K = covariance_matrix(rbf_kernel, {"gamma": 1.0}, X, X)

    onp.testing.assert_array_almost_equal(K_sk, onp.array(K))

    Y = 10 * X + 0.1 * rng.randn(n_samples, n_features)

    # sklearn rbf_kernel
    K_sk = rbf_sklearn(Y, Y, gamma=1.0)

    K = gram(rbf_kernel, {"gamma": 1.0}, Y, Y)

    onp.testing.assert_array_almost_equal(K_sk, onp.array(K))
Beispiel #8
0
def nhsic_cca(
    X: np.ndarray,
    Y: np.ndarray,
    kernel: Callable,
    params_x: Dict[str, float],
    params_y: Dict[str, float],
    epsilon: float = 1e-5,
    bias: bool = False,
) -> float:
    """Normalized HSIC (Tangent Kernel Alignment)

    A normalized variant of HSIC method which divides by
    the HS-Norm of each dataset.

    Parameters
    ----------
    X : jax.numpy.ndarray
        the input value for one dataset

    Y : jax.numpy.ndarray
        the input value for the second dataset

    kernel : Callable
        the kernel function to be used for each of the kernel
        calculations

    params_x : Dict[str, float]
        a dictionary of parameters to be used for calculating the
        kernel function for X

    params_y : Dict[str, float]
        a dictionary of parameters to be used for calculating the
        kernel function for Y

    Returns
    -------
    cka_value : float
        the normalized hsic value.

    Notes
    -----

        This is a metric that is similar to the correlation, [0,1]
    """
    n_samples = X.shape[0]

    # kernel matrix
    Kx = gram(kernel, params_x, X, X)
    Ky = gram(kernel, params_y, Y, Y)

    # center kernel matrices
    Kx = centering(Kx)
    Ky = centering(Ky)

    K_id = np.eye(Kx.shape[0])
    Kx_inv = np.linalg.inv(Kx + epsilon * n_samples * K_id)
    Ky_inv = np.linalg.inv(Ky + epsilon * n_samples * K_id)

    Rx = np.dot(Kx, Kx_inv)
    Ry = np.dot(Ky, Ky_inv)

    hsic_value = np.sum(Rx * Ry)

    if bias:
        bias = 1 / (Kx.shape[0] ** 2)
    else:
        bias = 1 / (Kx.shape[0] - 1) ** 2
    return bias * hsic_value