Exemple #1
0
def job_rdcperm_med(paired_source, tr, te, r, n_features=10):
    """
    The Randomized Dependence Coefficient test with permutations.
    """
    pdata = tr + te
    n_permute = 500
    # n_features=10 from Lopez-Paz et al., 2013 paper.
    with util.ContextTimer() as t:
        # get the median distances
        X, Y = pdata.xy()
        # copula transform to both X and Y
        cop_map = fea.MarginalCDFMap()
        Xcdf = cop_map.gen_features(X)
        Ycdf = cop_map.gen_features(Y)

        medx = util.meddistance(Xcdf, subsample=1000)
        medy = util.meddistance(Ycdf, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220)
        rdcperm = it.RDCPerm(fmx,
                             fmy,
                             n_permute=n_permute,
                             alpha=alpha,
                             seed=r + 100)
        rdcperm_result = rdcperm.perform_test(pdata)
    return {
        'indtest': rdcperm,
        'test_result': rdcperm_result,
        'time_secs': t.secs
    }
Exemple #2
0
def job_rdcperm_nc_med(paired_source, tr, te, r, n_features=10):
    """
    The Randomized Dependence Coefficient test with permutations.
    No copula transformtation. Use median heuristic on the data.
    """
    pdata = tr + te
    n_permute = 500
    # n_features=10 from Lopez-Paz et al., 2013 paper.
    with util.ContextTimer() as t:
        # get the median distances
        X, Y = pdata.xy()

        medx = util.meddistance(X, subsample=1000)
        medy = util.meddistance(Y, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220)
        rdcperm = it.RDCPerm(fmx,
                             fmy,
                             n_permute=n_permute,
                             alpha=alpha,
                             seed=r + 100,
                             use_copula=False)
        rdcperm_result = rdcperm.perform_test(pdata)
    return {
        'indtest': rdcperm,
        'test_result': rdcperm_result,
        'time_secs': t.secs
    }