Esempio n. 1
0
    def test_context_deterministic(self):
        for s in [2, 98, 10]:
            with util.NumpySeedContext(seed=s):
                A1 = np.random.randn(5, 1)
                B1 = np.random.rand(6)

            with util.NumpySeedContext(seed=s):
                A2 = np.random.randn(5, 1)
                B2 = np.random.rand(6)

            np.testing.assert_array_almost_equal(A1, A2)
            np.testing.assert_array_almost_equal(B1, B2)
Esempio n. 2
0
def job_nfsicJ10_med(paired_source, tr, te, r, n_permute=None):
    """
    NFSIC in which the test locations are randomized, and the Gaussian width 
    is set with the median heuristic. Use full sample. No training/testing splits.
    J=10
    """
    J = 10
    pdata = tr + te
    with util.ContextTimer() as t:
        #V, W = it.GaussNFSIC.init_locs_2randn(pdata, J, seed=r+2)
        # May overfit and increase type-I errors?
        #V, W = it.GaussNFSIC.init_locs_joint_randn(pdata, J, seed=r+2)
        with util.NumpySeedContext(seed=r + 92):
            dx = pdata.dx()
            dy = pdata.dy()
            V = np.random.randn(J, dx)
            W = np.random.randn(J, dy)
        k, l = kl_kgauss_median(pdata)

        nfsic_med = it.NFSIC(k,
                             l,
                             V,
                             W,
                             alpha=alpha,
                             reg='auto',
                             n_permute=n_permute,
                             seed=r + 3)
        nfsic_med_result = nfsic_med.perform_test(pdata)
    return {
        'indtest': nfsic_med,
        'test_result': nfsic_med_result,
        'time_secs': t.secs
    }
Esempio n. 3
0
    def sample_d_variates(w, n, D, seed=81):
        """
        Return an n x D sample matrix. 
        """
        with util.NumpySeedContext(seed=seed):
            # rejection sampling
            sam = np.zeros((n, D))
            # sample block_size*D at a time.
            block_size = 500
            from_ind = 0
            while from_ind < n:
                # uniformly randomly draw x, y from U(-pi, pi)
                X = stats.uniform.rvs(loc=-math.pi,
                                      scale=2 * math.pi,
                                      size=D * block_size)
                X = np.reshape(X, (block_size, D))
                un_den = 1.0 + np.prod(np.sin(w * X), 1)
                I = stats.uniform.rvs(size=block_size) < un_den / 2.0

                # accept
                accepted_count = np.sum(I)
                to_take = min(n - from_ind, accepted_count)
                end_ind = from_ind + to_take

                AX = X[I, :]
                X_take = AX[:to_take, :]
                sam[from_ind:end_ind, :] = X_take
                from_ind = end_ind
        return sam
Esempio n. 4
0
 def sample(self, n, seed):
     d = self.dimx
     with util.NumpySeedContext(seed=seed):
         Z = np.random.randn(n, 1)
         X = np.random.randn(n, d)
         Xs = np.sign(X)
         Y = np.prod(Xs, 1)[:, np.newaxis] * np.abs(Z)
     return PairedData(X, Y, label='gauss_sign_dx%d' % d)
Esempio n. 5
0
 def sample(self, n, seed):
     d = self.dimx 
     with util.NumpySeedContext(seed=seed):
         Z = np.random.randn(n, d/2+1)
         X = np.random.randn(n, d)
         Y = np.zeros((n, 1))
         for j in range(d/2):
             Y = Y + np.sign(X[:, [2*j]]*X[:, [2*j+1]])*np.abs(Z[:, [j]])
         Y = np.sqrt(2.0/d)*Y + Z[:, [d/2]]
     return PairedData(X, Y, label='pairwise_sign_dx%d'%self.dimx)
Esempio n. 6
0
    def test_bounded(self):
        # between 0 and 1
        n = 100
        for r in range(5):
            with util.NumpySeedContext(seed=r * 5 + 1):
                X = np.random.randn(n, 2) * 5 - 1
                Y = np.random.rand(n, 3) - 0.5
                evals, Vx, Vy = util.cca(X, Y, reg=1e-5)

                self.assertTrue(np.all(evals) <= 1)
                self.assertTrue(np.all(evals) >= -1)
Esempio n. 7
0
    def sample(self, n, seed=44):
        with util.NumpySeedContext(seed=seed + 100):
            NX = np.random.randn(n, self.ndx)
            NY = np.random.randn(n, self.ndy)

            pdata = self.ps.sample(n, seed=seed)
            X, Y = pdata.xy()
            Zx = np.hstack((X, NX))
            Zy = np.hstack((Y, NY))
            new_label = None if pdata.label is None else \
                pdata.label + '_ndx%d'%self.ndx + '_ndy%d'%self.ndy
            return PairedData(Zx, Zy, label=new_label)
Esempio n. 8
0
    def test_list_permute(self):
        # Check that the relative frequency in the simulated histogram is
        # accurate enough.
        ps = data.PS2DSinFreq(freq=2)
        n_permute = 1000
        J = 4
        for s in [284, 77]:
            with util.NumpySeedContext(seed=s):
                pdata = ps.sample(n=200, seed=s + 1)
                dx = pdata.dx()
                dy = pdata.dy()
                X, Y = pdata.xy()

                k = kernel.KGauss(2)
                l = kernel.KGauss(3)
                V = np.random.randn(J, dx)
                W = np.random.randn(J, dy)
                #nfsic = it.NFSIC(k, l, V, W, alpha=0.01, reg=0, n_permute=n_permute,
                #        seed=s+3):

                #nfsic_result = nfsic.perform_test(pdata)
                arr = it.NFSIC.list_permute(X,
                                            Y,
                                            k,
                                            l,
                                            V,
                                            W,
                                            n_permute=n_permute,
                                            seed=s + 34,
                                            reg=0)
                arr_naive = it.NFSIC._list_permute_naive(X,
                                                         Y,
                                                         k,
                                                         l,
                                                         V,
                                                         W,
                                                         n_permute=n_permute,
                                                         seed=s + 389,
                                                         reg=0)

                # make sure that the relative frequency of the histogram does
                # not differ much.
                freq_a, edge_a = np.histogram(arr)
                freq_n, edge_n = np.histogram(arr_naive)
                nfreq_a = freq_a / float(np.sum(freq_a))
                nfreq_n = freq_n / float(np.sum(freq_n))
                arr_diff = np.abs(nfreq_a - nfreq_n)
                self.assertTrue(np.all(arr_diff <= 0.2))
Esempio n. 9
0
    def test_bound_by_data(self):
        n, d = 50, 7
        m = n + 3
        for s in [82, 22]:
            with util.NumpySeedContext(seed=s):
                Data = np.random.rand(n, d)
                Z = np.random.randn(m, d) * 20
                P = util.bound_by_data(Z, Data)

                self.assertTrue(np.all(P.flatten() <= 1))
                self.assertTrue(np.all(P.flatten() >= 0))

                self.assertTrue(np.any(Z.flatten() > 1))
                self.assertTrue(np.any(Z.flatten() < 0))

                self.assertEqual(P.shape[0], Z.shape[0])
                self.assertEqual(P.shape[1], Z.shape[1])
Esempio n. 10
0
    def test_list_permute_spectral(self):
        # make sure that simulating from the spectral approach is roughly the
        # same as doing permutations.
        ps = data.PS2DSinFreq(freq=2)
        n_features = 5
        n_simulate = 3000
        n_permute = 3000
        for s in [283, 2]:
            with util.NumpySeedContext(seed=s):
                pdata = ps.sample(n=200, seed=s + 1)
                X, Y = pdata.xy()

                sigmax2 = 1
                sigmay2 = 0.8
                fmx = feature.RFFKGauss(
                    sigmax2, n_features=n_features, seed=s + 3
                )
                fmy = feature.RFFKGauss(
                    sigmay2, n_features=n_features, seed=s + 23
                )

                Zx = fmx.gen_features(X)
                Zy = fmy.gen_features(Y)
                list_perm = indtest.FiniteFeatureHSIC.list_permute(
                    X, Y, fmx, fmy, n_permute=n_permute, seed=s + 82
                )
                (
                    list_spectral,
                    _,
                    _,
                ) = indtest.FiniteFeatureHSIC.list_permute_spectral(
                    Zx, Zy, n_simulate=n_simulate, seed=s + 119
                )

                # make sure that the relative frequency of the histogram does
                # not differ much.
                freq_p, _ = np.histogram(list_perm)
                freq_s, _ = np.histogram(list_spectral)
                nfreq_p = freq_p / np.sum(freq_p)
                nfreq_s = freq_s / np.sum(freq_s)
                arr_diff = np.abs(nfreq_p - nfreq_s)
                self.assertTrue(np.all(arr_diff <= 0.2))
Esempio n. 11
0
    def test_approximation(self):
        for s in [298, 67]:
            with util.NumpySeedContext(seed=s):
                k = kernel.KGauss(1)
                n = 50
                d = 3
                X = np.random.randn(n, d) * 3 + 5
                D = n // 3
                induce = util.subsample_rows(X, D, seed=s + 1)
                nymap = feature.NystromFeatureMap(k, induce)

                K = k.eval(X, X)
                Z = nymap.gen_features(X)

                # check approximation quality
                diff = np.linalg.norm((K - Z.dot(Z.T)), "fro")
                self.assertLessEqual(diff / n ** 2, 0.5)

                # check sizes
                self.assertEqual(Z.shape[1], D)
                self.assertEqual(Z.shape[0], n)