def test_context_deterministic(self): for s in [2, 98, 10]: with util.NumpySeedContext(seed=s): A1 = np.random.randn(5, 1) B1 = np.random.rand(6) with util.NumpySeedContext(seed=s): A2 = np.random.randn(5, 1) B2 = np.random.rand(6) np.testing.assert_array_almost_equal(A1, A2) np.testing.assert_array_almost_equal(B1, B2)
def job_nfsicJ10_med(paired_source, tr, te, r, n_permute=None): """ NFSIC in which the test locations are randomized, and the Gaussian width is set with the median heuristic. Use full sample. No training/testing splits. J=10 """ J = 10 pdata = tr + te with util.ContextTimer() as t: #V, W = it.GaussNFSIC.init_locs_2randn(pdata, J, seed=r+2) # May overfit and increase type-I errors? #V, W = it.GaussNFSIC.init_locs_joint_randn(pdata, J, seed=r+2) with util.NumpySeedContext(seed=r + 92): dx = pdata.dx() dy = pdata.dy() V = np.random.randn(J, dx) W = np.random.randn(J, dy) k, l = kl_kgauss_median(pdata) nfsic_med = it.NFSIC(k, l, V, W, alpha=alpha, reg='auto', n_permute=n_permute, seed=r + 3) nfsic_med_result = nfsic_med.perform_test(pdata) return { 'indtest': nfsic_med, 'test_result': nfsic_med_result, 'time_secs': t.secs }
def sample_d_variates(w, n, D, seed=81): """ Return an n x D sample matrix. """ with util.NumpySeedContext(seed=seed): # rejection sampling sam = np.zeros((n, D)) # sample block_size*D at a time. block_size = 500 from_ind = 0 while from_ind < n: # uniformly randomly draw x, y from U(-pi, pi) X = stats.uniform.rvs(loc=-math.pi, scale=2 * math.pi, size=D * block_size) X = np.reshape(X, (block_size, D)) un_den = 1.0 + np.prod(np.sin(w * X), 1) I = stats.uniform.rvs(size=block_size) < un_den / 2.0 # accept accepted_count = np.sum(I) to_take = min(n - from_ind, accepted_count) end_ind = from_ind + to_take AX = X[I, :] X_take = AX[:to_take, :] sam[from_ind:end_ind, :] = X_take from_ind = end_ind return sam
def sample(self, n, seed): d = self.dimx with util.NumpySeedContext(seed=seed): Z = np.random.randn(n, 1) X = np.random.randn(n, d) Xs = np.sign(X) Y = np.prod(Xs, 1)[:, np.newaxis] * np.abs(Z) return PairedData(X, Y, label='gauss_sign_dx%d' % d)
def sample(self, n, seed): d = self.dimx with util.NumpySeedContext(seed=seed): Z = np.random.randn(n, d/2+1) X = np.random.randn(n, d) Y = np.zeros((n, 1)) for j in range(d/2): Y = Y + np.sign(X[:, [2*j]]*X[:, [2*j+1]])*np.abs(Z[:, [j]]) Y = np.sqrt(2.0/d)*Y + Z[:, [d/2]] return PairedData(X, Y, label='pairwise_sign_dx%d'%self.dimx)
def test_bounded(self): # between 0 and 1 n = 100 for r in range(5): with util.NumpySeedContext(seed=r * 5 + 1): X = np.random.randn(n, 2) * 5 - 1 Y = np.random.rand(n, 3) - 0.5 evals, Vx, Vy = util.cca(X, Y, reg=1e-5) self.assertTrue(np.all(evals) <= 1) self.assertTrue(np.all(evals) >= -1)
def sample(self, n, seed=44): with util.NumpySeedContext(seed=seed + 100): NX = np.random.randn(n, self.ndx) NY = np.random.randn(n, self.ndy) pdata = self.ps.sample(n, seed=seed) X, Y = pdata.xy() Zx = np.hstack((X, NX)) Zy = np.hstack((Y, NY)) new_label = None if pdata.label is None else \ pdata.label + '_ndx%d'%self.ndx + '_ndy%d'%self.ndy return PairedData(Zx, Zy, label=new_label)
def test_list_permute(self): # Check that the relative frequency in the simulated histogram is # accurate enough. ps = data.PS2DSinFreq(freq=2) n_permute = 1000 J = 4 for s in [284, 77]: with util.NumpySeedContext(seed=s): pdata = ps.sample(n=200, seed=s + 1) dx = pdata.dx() dy = pdata.dy() X, Y = pdata.xy() k = kernel.KGauss(2) l = kernel.KGauss(3) V = np.random.randn(J, dx) W = np.random.randn(J, dy) #nfsic = it.NFSIC(k, l, V, W, alpha=0.01, reg=0, n_permute=n_permute, # seed=s+3): #nfsic_result = nfsic.perform_test(pdata) arr = it.NFSIC.list_permute(X, Y, k, l, V, W, n_permute=n_permute, seed=s + 34, reg=0) arr_naive = it.NFSIC._list_permute_naive(X, Y, k, l, V, W, n_permute=n_permute, seed=s + 389, reg=0) # make sure that the relative frequency of the histogram does # not differ much. freq_a, edge_a = np.histogram(arr) freq_n, edge_n = np.histogram(arr_naive) nfreq_a = freq_a / float(np.sum(freq_a)) nfreq_n = freq_n / float(np.sum(freq_n)) arr_diff = np.abs(nfreq_a - nfreq_n) self.assertTrue(np.all(arr_diff <= 0.2))
def test_bound_by_data(self): n, d = 50, 7 m = n + 3 for s in [82, 22]: with util.NumpySeedContext(seed=s): Data = np.random.rand(n, d) Z = np.random.randn(m, d) * 20 P = util.bound_by_data(Z, Data) self.assertTrue(np.all(P.flatten() <= 1)) self.assertTrue(np.all(P.flatten() >= 0)) self.assertTrue(np.any(Z.flatten() > 1)) self.assertTrue(np.any(Z.flatten() < 0)) self.assertEqual(P.shape[0], Z.shape[0]) self.assertEqual(P.shape[1], Z.shape[1])
def test_list_permute_spectral(self): # make sure that simulating from the spectral approach is roughly the # same as doing permutations. ps = data.PS2DSinFreq(freq=2) n_features = 5 n_simulate = 3000 n_permute = 3000 for s in [283, 2]: with util.NumpySeedContext(seed=s): pdata = ps.sample(n=200, seed=s + 1) X, Y = pdata.xy() sigmax2 = 1 sigmay2 = 0.8 fmx = feature.RFFKGauss( sigmax2, n_features=n_features, seed=s + 3 ) fmy = feature.RFFKGauss( sigmay2, n_features=n_features, seed=s + 23 ) Zx = fmx.gen_features(X) Zy = fmy.gen_features(Y) list_perm = indtest.FiniteFeatureHSIC.list_permute( X, Y, fmx, fmy, n_permute=n_permute, seed=s + 82 ) ( list_spectral, _, _, ) = indtest.FiniteFeatureHSIC.list_permute_spectral( Zx, Zy, n_simulate=n_simulate, seed=s + 119 ) # make sure that the relative frequency of the histogram does # not differ much. freq_p, _ = np.histogram(list_perm) freq_s, _ = np.histogram(list_spectral) nfreq_p = freq_p / np.sum(freq_p) nfreq_s = freq_s / np.sum(freq_s) arr_diff = np.abs(nfreq_p - nfreq_s) self.assertTrue(np.all(arr_diff <= 0.2))
def test_approximation(self): for s in [298, 67]: with util.NumpySeedContext(seed=s): k = kernel.KGauss(1) n = 50 d = 3 X = np.random.randn(n, d) * 3 + 5 D = n // 3 induce = util.subsample_rows(X, D, seed=s + 1) nymap = feature.NystromFeatureMap(k, induce) K = k.eval(X, X) Z = nymap.gen_features(X) # check approximation quality diff = np.linalg.norm((K - Z.dot(Z.T)), "fro") self.assertLessEqual(diff / n ** 2, 0.5) # check sizes self.assertEqual(Z.shape[1], D) self.assertEqual(Z.shape[0], n)