def python_kcit_K2(Kx: np.ndarray, Ky: np.ndarray, Z: np.ndarray, alpha=0.05, with_gp=True, sigma_squared=1e-3, num_bootstrap_for_null=5000, seed=None): """ A test for X _||_ Y | Z using KCIT with Gram matrices for X, Y, and a tabular data Z see `kcit_null` for the output """ if seed is not None: np.random.seed(seed) T = len(Kx) Kz = rbf_kernel_median(Z) Kx, Ky, Kz = centering(Kx * Kz), centering(Ky), centering(Kz) if with_gp: Kxz = residual_kernel_matrix_kernel_real(Kx, Z, min( 200, T // 5)) # originally, min(400, T // 4) Kyz = residual_kernel_matrix_kernel_real(Ky, Z, min(200, T // 5)) else: P = eye(T) - Kz @ pdinv(Kz + sigma_squared * eye(T)) Kxz = P @ Kx @ P.T Kyz = P @ Ky @ P.T test_statistic = (Kxz * Kyz).sum() # trace(Kxz @ Kyz) return kcit_null(Kxz, Kyz, T, alpha, num_bootstrap_for_null, test_statistic)
def test_hsics(): np.random.seed(0) X = np.random.randn(600, 3) Y = np.random.randn(600, 3) + 0.01 * X KX, KY = rbf_kernel_median(X, Y) t0, p0 = c_HSIC(KX, KY, n_jobs=1, size_of_null_sample=5000) p2 = HSIC(KX, KY, num_boot=5000) assert np.allclose([p0, p2], [0.0338, 0.0316], atol=0.005)
def test_reproducible(): np.random.seed(0) X, Y, Z = henon(49, 200, 0.25, True) KX, KY, KZ = rbf_kernel_median(X, Y, Z) _, p1 = SDCIT(KX, KY, KZ, seed=55) _, p2 = c_SDCIT(KX, KY, KZ, seed=55) # macOS and Linux may have different result. _, _, p3, *_ = python_kcit(X, Y, Z, seed=99) _, _, p4, *_ = python_kcit_K(KX, KY, KZ, seed=99) # [0.345, 0.347, 0.095, 0.0606] assert np.allclose([p1, p2, p3, p4], [0.345, 0.348, 0.095, 0.0606], atol=0.005, rtol=0)
def read_postnonlinear_noise(independent, noise, trial, N, dir_at=SDCIT_DATA_DIR + '/'): X, Y, Z = read_postnonlinear_noise_data(independent, noise, trial, N, dir_at) kx, ky, kz = rbf_kernel_median(X, Y, Z) dist_mat_file = os.path.expanduser(dir_at + 'dist_{}_{}_{}_{}_postnonlinear.mat'. format(noise, trial, independent, N)) mat_load = scipy.io.loadmat(dist_mat_file, squeeze_me=True, struct_as_record=False) Dz = np.array(mat_load['D']) return cythonize(kx, ky, kz, Dz)
def testSDCIT(ps, x, y, z=[]): import numpy as np from sdcit.sdcit_mod import SDCIT from sdcit.utils import rbf_kernel_median X = [ps.ds[x[0]]] Y = [ps.ds[y[0]]] Z = [] for var in z: zdat = ps.ds[var] Z.append(zdat) Xa = np.array(X).transpose() Ya = np.array(Y).transpose() if not Z: return testFCIT(ps, X, Y) Za = np.array(Z).transpose() Kx, Ky, Kz = rbf_kernel_median(Xa, Ya, Za) test_stat, p_value = SDCIT(Kx, Ky, Kz) #print('p = ', p_value) return p_value
def residual_kernel_skeleton(skeleton: RelationalSkeleton, VK, index_of): """Residual kernel and relevant information for every attribute class""" out = dict() schema = skeleton.schema for item_class in schema.item_classes: items = list(skeleton.items(item_class)) if not item_class.attrs: continue item_indices_on_VK = [index_of[item] for item in items] lookup = {item: i for i, item in enumerate(items)} for attr_class in item_class.attrs: values = np.array([skeleton[(item, attr_class)] for item in items]) subvk = VK[np.ix_(item_indices_on_VK, item_indices_on_VK)] K = rbf_kernel_median(values[:, None]) K2 = residual_kernel(K, subvk) out[attr_class] = (lookup, K2) return SkResidualKernel(out)
for N in [200, 400]: for b in [500, 1000]: for trial in range(300): mat_load = scipy.io.loadmat(os.path.expanduser( SDCIT_DATA_DIR + '/{}_{}_{}_{}_chaotic.mat'.format( '0.0', trial, independent, N)), squeeze_me=True, struct_as_record=False) data = mat_load['data'] X = data.Yt1 Y = data.Xt Z = data.Yt[:, 0:2] start = time.time() kkk = rbf_kernel_median(X, Y, Z) Dz = K2D(kkk[-1]) c_SDCIT(*kkk, Dz=Dz, size_of_null_sample=b, seed=trial, to_shuffle=False) endtime = time.time() print(endtime - start, trial, N, b, file=f, sep=',', flush=True)
def read_chaotic(independent, gamma, trial, N, dir_at=SDCIT_DATA_DIR + '/'): X, Y, Z = read_chaotic_data(independent, gamma, trial, N, dir_at) kx, ky, kz = rbf_kernel_median(X, Y, Z) Dz = K2D(kz) return kx, ky, kz, Dz