def job_rdcperm_med(paired_source, tr, te, r, n_features=10): """ The Randomized Dependence Coefficient test with permutations. """ pdata = tr + te n_permute = 500 # n_features=10 from Lopez-Paz et al., 2013 paper. with util.ContextTimer() as t: # get the median distances X, Y = pdata.xy() # copula transform to both X and Y cop_map = fea.MarginalCDFMap() Xcdf = cop_map.gen_features(X) Ycdf = cop_map.gen_features(Y) medx = util.meddistance(Xcdf, subsample=1000) medy = util.meddistance(Ycdf, subsample=1000) sigmax2 = medx**2 sigmay2 = medy**2 fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19) fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220) rdcperm = it.RDCPerm(fmx, fmy, n_permute=n_permute, alpha=alpha, seed=r + 100) rdcperm_result = rdcperm.perform_test(pdata) return { 'indtest': rdcperm, 'test_result': rdcperm_result, 'time_secs': t.secs }
def job_rdcperm_nc_med(paired_source, tr, te, r, n_features=10): """ The Randomized Dependence Coefficient test with permutations. No copula transformtation. Use median heuristic on the data. """ pdata = tr + te n_permute = 500 # n_features=10 from Lopez-Paz et al., 2013 paper. with util.ContextTimer() as t: # get the median distances X, Y = pdata.xy() medx = util.meddistance(X, subsample=1000) medy = util.meddistance(Y, subsample=1000) sigmax2 = medx**2 sigmay2 = medy**2 fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19) fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220) rdcperm = it.RDCPerm(fmx, fmy, n_permute=n_permute, alpha=alpha, seed=r + 100, use_copula=False) rdcperm_result = rdcperm.perform_test(pdata) return { 'indtest': rdcperm, 'test_result': rdcperm_result, 'time_secs': t.secs }