예제 #1
0
def job_rdcperm_med(paired_source, tr, te, r, n_features=10):
    """
    The Randomized Dependence Coefficient test with permutations.
    """
    pdata = tr + te
    n_permute = 500
    # n_features=10 from Lopez-Paz et al., 2013 paper.
    with util.ContextTimer() as t:
        # get the median distances
        X, Y = pdata.xy()
        # copula transform to both X and Y
        cop_map = fea.MarginalCDFMap()
        Xcdf = cop_map.gen_features(X)
        Ycdf = cop_map.gen_features(Y)

        medx = util.meddistance(Xcdf, subsample=1000)
        medy = util.meddistance(Ycdf, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220)
        rdcperm = it.RDCPerm(fmx,
                             fmy,
                             n_permute=n_permute,
                             alpha=alpha,
                             seed=r + 100)
        rdcperm_result = rdcperm.perform_test(pdata)
    return {
        'indtest': rdcperm,
        'test_result': rdcperm_result,
        'time_secs': t.secs
    }
예제 #2
0
def job_rdc_med(paired_source, tr, te, r, n_features=10):
    """
    The Randomized Dependence Coefficient test.
    - Gaussian width = median heuristic on the copula-transformed data 
    - 10 random features for each X andY
    - Use full dataset for testing
    """
    pdata = tr + te
    # n_features=10 from Lopez-Paz et al., 2013 paper.
    with util.ContextTimer() as t:
        # get the median distances
        X, Y = pdata.xy()
        # copula transform to both X and Y
        cop_map = fea.MarginalCDFMap()
        Xcdf = cop_map.gen_features(X)
        Ycdf = cop_map.gen_features(Y)

        medx = util.meddistance(Xcdf, subsample=1000)
        medy = util.meddistance(Ycdf, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220)
        rdc = it.RDC(fmx, fmy, alpha=alpha)
        rdc_result = rdc.perform_test(pdata)
    return {'indtest': rdc, 'test_result': rdc_result, 'time_secs': t.secs}
예제 #3
0
    def test_general(self):
        n = 30
        d = 4
        X = np.random.randn(n, d) * 3 + 4

        M = feature.MarginalCDFMap()
        Z = M.gen_features(X)

        # assert
        self.assertEqual(Z.shape[1], d)
        self.assertEqual(Z.shape[0], n)
        self.assertEqual(M.num_features(X), d)
        self.assertTrue(np.all(Z >= 0))
        self.assertTrue(np.all(Z <= 1))
예제 #4
0
def job_nfsicJ10_cperm_stoopt(paired_source, tr, te, r):
    """
    - Copula transform the data
    - Use permutations to simulate from the null distribution.
    """
    n_permute = 500

    with util.ContextTimer() as t:
        # copula transform to both X and Y
        cop_map = fea.MarginalCDFMap()
        xtr, ytr = tr.xy()
        xte, yte = te.xy()

        xtr = cop_map.gen_features(xtr)
        ytr = cop_map.gen_features(ytr)
        xte = cop_map.gen_features(xte)
        yte = cop_map.gen_features(yte)

        tr = data.PairedData(xtr, ytr)
        te = data.PairedData(xte, yte)

        to_return = job_nfsicJ10_stoopt(paired_source, tr, te, r, n_permute)
    to_return['time_secs'] = t.secs
    return to_return