def test_postnonlinear(independent, noise, trial, N): np.random.seed(trial) mmsd, pval = SDCIT(*read_postnonlinear_noise(independent, noise, trial, N), seed=trial, adjust=False, to_shuffle=False) return independent, noise, trial, N, mmsd, pval
def test_chaotic(independent, gamma, trial, N): np.random.seed(trial) mmsd, pval = SDCIT(*read_chaotic(independent, gamma, trial, N), seed=trial, adjust=False, to_shuffle=False) return independent, gamma, trial, N, mmsd, pval
def para(N, independent, trial): outs = [] mat_load = scipy.io.loadmat(os.path.expanduser( SDCIT_DATA_DIR + '/{}_{}_{}_{}_chaotic.mat'.format('0.3', trial, independent, N)), squeeze_me=True, struct_as_record=False) data = mat_load['data'] if independent: X = data.Xt1 Y = data.Yt Z = data.Xt[:, 0:2] else: X = data.Yt1 Y = data.Xt Z = data.Yt[:, 0:2] DX = euclidean_distances(X, squared=True) DY = euclidean_distances(Y, squared=True) DZ = euclidean_distances(Z, squared=True) DX /= np.max(DX) DY /= np.max(DY) DZ /= np.max(DZ) mX = 0.5 / medd(DX) mY = 0.5 / medd(DY) mZ = 0.5 / medd(DZ) for multiplier in [ 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100 ]: KX = np.exp(-mX * DX * multiplier) KY = np.exp(-mY * DY * multiplier) KZ = np.exp(-mZ * DZ * multiplier) Dz = K2D(KZ) p_KCIT = python_kcit_K(KX, KY, KZ, seed=trial)[2] p_KCIT2 = python_kcit_K2(KX, KY, Z, seed=trial)[2] p_SDCIT = SDCIT(KX, KY, KZ, Dz=Dz, size_of_null_sample=500, seed=trial, to_shuffle=False)[1] outs.append(['SDCIT', N, trial, multiplier, independent, p_SDCIT]) outs.append(['KCIT', N, trial, multiplier, independent, p_KCIT]) outs.append(['KCIT2', N, trial, multiplier, independent, p_KCIT2]) return outs
def test_reproducible(): np.random.seed(0) X, Y, Z = henon(49, 200, 0.25, True) KX, KY, KZ = rbf_kernel_median(X, Y, Z) _, p1 = SDCIT(KX, KY, KZ, seed=55) _, p2 = c_SDCIT(KX, KY, KZ, seed=55) # macOS and Linux may have different result. _, _, p3, *_ = python_kcit(X, Y, Z, seed=99) _, _, p4, *_ = python_kcit_K(KX, KY, KZ, seed=99) # [0.345, 0.347, 0.095, 0.0606] assert np.allclose([p1, p2, p3, p4], [0.345, 0.348, 0.095, 0.0606], atol=0.005, rtol=0)
def testSDCIT(ps, x, y, z=[]): import numpy as np from sdcit.sdcit_mod import SDCIT from sdcit.utils import rbf_kernel_median X = [ps.ds[x[0]]] Y = [ps.ds[y[0]]] Z = [] for var in z: zdat = ps.ds[var] Z.append(zdat) Xa = np.array(X).transpose() Ya = np.array(Y).transpose() if not Z: return testFCIT(ps, X, Y) Za = np.array(Z).transpose() Kx, Ky, Kz = rbf_kernel_median(Xa, Ya, Za) test_stat, p_value = SDCIT(Kx, Ky, Kz) #print('p = ', p_value) return p_value
def experiment(obj_filename): if not os.path.exists(obj_filename): trial = 0 gamma_param = 0.0 N = 400 independent = 1 initial_B = 100 kx, ky, kz, Dz = read_chaotic(independent, gamma_param, trial, N) # Compare SDCIT and KCIPT100 print('SDCIT ... ') sdcit_mmd, sdcit_pval, sdcit_null = SDCIT(kx, ky, kz, with_null=True, seed=trial, to_shuffle=False) print('KCIPT {} ... '.format(initial_B)) _, mmds100, _, outer_null100 = c_KCIPT(kx, ky, kz, K2D(kz), initial_B, 10000, 10000, n_jobs=PARALLEL_JOBS, seed=trial) # Infer desired B desired_B = int(initial_B * (outer_null100.std() / sdcit_null.std()) ** 2) print('Desired B: {}'.format(desired_B)) # Prepare outer null distribution print('KCIPT {} ... '.format(desired_B)) _, mmds_B, _, outer_null_B = c_KCIPT(kx, ky, kz, K2D(kz), desired_B, 10000, 10000, n_jobs=PARALLEL_JOBS, seed=trial) print('TS distributions for KCIPT {} ... '.format(desired_B)) time.sleep(1) distr_boot = np.zeros((1000,)) for ii in trange(len(distr_boot)): _, mmds_B, _, _ = c_KCIPT(kx, ky, kz, K2D(kz), desired_B, 0, 0, n_jobs=PARALLEL_JOBS, seed=ii) distr_boot[ii] = mmds_B.mean() with open(obj_filename, 'wb') as f: # Python 3: open(..., 'wb') pickle.dump([sdcit_mmd, sdcit_null, mmds100, outer_null100, desired_B, mmds_B, outer_null_B, distr_boot], f) print(independent, gamma_param, N) outs = [test_chaotic(independent, gamma_param, tt, N, B=desired_B, n_jobs=PARALLEL_JOBS) for tt in trange(300)] with open(SDCIT_RESULT_DIR + '/kcipt_chaotic_{}.csv'.format(desired_B), 'a') as f: for out in outs: print(*out, sep=',', file=f, flush=True)