Exemple #1
0
def python_kcit_K2(Kx: np.ndarray,
                   Ky: np.ndarray,
                   Z: np.ndarray,
                   alpha=0.05,
                   with_gp=True,
                   sigma_squared=1e-3,
                   num_bootstrap_for_null=5000,
                   seed=None):
    """ A test for X _||_ Y | Z using KCIT with Gram matrices for X, Y, and a tabular data Z

    see `kcit_null` for the output
    """
    if seed is not None:
        np.random.seed(seed)

    T = len(Kx)

    Kz = rbf_kernel_median(Z)
    Kx, Ky, Kz = centering(Kx * Kz), centering(Ky), centering(Kz)

    if with_gp:
        Kxz = residual_kernel_matrix_kernel_real(Kx, Z, min(
            200, T // 5))  # originally, min(400, T // 4)
        Kyz = residual_kernel_matrix_kernel_real(Ky, Z, min(200, T // 5))
    else:
        P = eye(T) - Kz @ pdinv(Kz + sigma_squared * eye(T))
        Kxz = P @ Kx @ P.T
        Kyz = P @ Ky @ P.T

    test_statistic = (Kxz * Kyz).sum()  # trace(Kxz @ Kyz)

    return kcit_null(Kxz, Kyz, T, alpha, num_bootstrap_for_null,
                     test_statistic)
def test_hsics():
    np.random.seed(0)

    X = np.random.randn(600, 3)
    Y = np.random.randn(600, 3) + 0.01 * X
    KX, KY = rbf_kernel_median(X, Y)
    t0, p0 = c_HSIC(KX, KY, n_jobs=1, size_of_null_sample=5000)
    p2 = HSIC(KX, KY, num_boot=5000)

    assert np.allclose([p0, p2], [0.0338, 0.0316], atol=0.005)
def test_reproducible():
    np.random.seed(0)

    X, Y, Z = henon(49, 200, 0.25, True)
    KX, KY, KZ = rbf_kernel_median(X, Y, Z)
    _, p1 = SDCIT(KX, KY, KZ, seed=55)
    _, p2 = c_SDCIT(KX, KY, KZ,
                    seed=55)  # macOS and Linux may have different result.
    _, _, p3, *_ = python_kcit(X, Y, Z, seed=99)
    _, _, p4, *_ = python_kcit_K(KX, KY, KZ, seed=99)

    # [0.345, 0.347, 0.095, 0.0606]
    assert np.allclose([p1, p2, p3, p4], [0.345, 0.348, 0.095, 0.0606],
                       atol=0.005,
                       rtol=0)
Exemple #4
0
def read_postnonlinear_noise(independent,
                             noise,
                             trial,
                             N,
                             dir_at=SDCIT_DATA_DIR + '/'):
    X, Y, Z = read_postnonlinear_noise_data(independent, noise, trial, N,
                                            dir_at)
    kx, ky, kz = rbf_kernel_median(X, Y, Z)

    dist_mat_file = os.path.expanduser(dir_at +
                                       'dist_{}_{}_{}_{}_postnonlinear.mat'.
                                       format(noise, trial, independent, N))
    mat_load = scipy.io.loadmat(dist_mat_file,
                                squeeze_me=True,
                                struct_as_record=False)
    Dz = np.array(mat_load['D'])

    return cythonize(kx, ky, kz, Dz)
Exemple #5
0
def testSDCIT(ps, x, y, z=[]):
    import numpy as np
    from sdcit.sdcit_mod import SDCIT
    from sdcit.utils import rbf_kernel_median
    X = [ps.ds[x[0]]]
    Y = [ps.ds[y[0]]]
    Z = []
    for var in z:
        zdat = ps.ds[var]
        Z.append(zdat)
    Xa = np.array(X).transpose()
    Ya = np.array(Y).transpose()
    if not Z:
        return testFCIT(ps, X, Y)
    Za = np.array(Z).transpose()
    Kx, Ky, Kz = rbf_kernel_median(Xa, Ya, Za)
    test_stat, p_value = SDCIT(Kx, Ky, Kz)
    #print('p = ', p_value)
    return p_value
Exemple #6
0
def residual_kernel_skeleton(skeleton: RelationalSkeleton, VK, index_of):
    """Residual kernel and relevant information for every attribute class"""
    out = dict()

    schema = skeleton.schema
    for item_class in schema.item_classes:
        items = list(skeleton.items(item_class))
        if not item_class.attrs:
            continue

        item_indices_on_VK = [index_of[item] for item in items]
        lookup = {item: i for i, item in enumerate(items)}
        for attr_class in item_class.attrs:
            values = np.array([skeleton[(item, attr_class)] for item in items])
            subvk = VK[np.ix_(item_indices_on_VK, item_indices_on_VK)]

            K = rbf_kernel_median(values[:, None])
            K2 = residual_kernel(K, subvk)
            out[attr_class] = (lookup, K2)

    return SkResidualKernel(out)
Exemple #7
0
            for N in [200, 400]:
                for b in [500, 1000]:
                    for trial in range(300):
                        mat_load = scipy.io.loadmat(os.path.expanduser(
                            SDCIT_DATA_DIR + '/{}_{}_{}_{}_chaotic.mat'.format(
                                '0.0', trial, independent, N)),
                                                    squeeze_me=True,
                                                    struct_as_record=False)
                        data = mat_load['data']
                        X = data.Yt1
                        Y = data.Xt
                        Z = data.Yt[:, 0:2]

                        start = time.time()

                        kkk = rbf_kernel_median(X, Y, Z)
                        Dz = K2D(kkk[-1])
                        c_SDCIT(*kkk,
                                Dz=Dz,
                                size_of_null_sample=b,
                                seed=trial,
                                to_shuffle=False)

                        endtime = time.time()
                        print(endtime - start,
                              trial,
                              N,
                              b,
                              file=f,
                              sep=',',
                              flush=True)
Exemple #8
0
def read_chaotic(independent, gamma, trial, N, dir_at=SDCIT_DATA_DIR + '/'):
    X, Y, Z = read_chaotic_data(independent, gamma, trial, N, dir_at)
    kx, ky, kz = rbf_kernel_median(X, Y, Z)
    Dz = K2D(kz)
    return kx, ky, kz, Dz