예제 #1
0
def job_fhsic_med(paired_source, tr, te, r):
    """
    HSIC with random Fourier features. Simulate the null distribution 
    with the spectrums of the empirical cross covariance operators.
    - Gaussian kernels.
    - No parameter selection procedure. Use the median heuristic for both 
    X and Y.
    - Use full sample for testing. 
    """

    n_simulate = 2000
    # random features
    n_features = 10
    # use full sample for testing. Merge training and test sets
    pdata = tr + te
    with util.ContextTimer() as t:
        X, Y = pdata.xy()
        medx = util.meddistance(X, subsample=1000)
        medy = util.meddistance(Y, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 1)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 2)
        ffhsic = it.FiniteFeatureHSIC(fmx,
                                      fmy,
                                      n_simulate=n_simulate,
                                      alpha=alpha,
                                      seed=r + 89)
        ffhsic_result = ffhsic.perform_test(pdata)
    return {
        'indtest': ffhsic,
        'test_result': ffhsic_result,
        'time_secs': t.secs
    }
예제 #2
0
def job_rdcperm_med(paired_source, tr, te, r, n_features=10):
    """
    The Randomized Dependence Coefficient test with permutations.
    """
    pdata = tr + te
    n_permute = 500
    # n_features=10 from Lopez-Paz et al., 2013 paper.
    with util.ContextTimer() as t:
        # get the median distances
        X, Y = pdata.xy()
        # copula transform to both X and Y
        cop_map = fea.MarginalCDFMap()
        Xcdf = cop_map.gen_features(X)
        Ycdf = cop_map.gen_features(Y)

        medx = util.meddistance(Xcdf, subsample=1000)
        medy = util.meddistance(Ycdf, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220)
        rdcperm = it.RDCPerm(fmx,
                             fmy,
                             n_permute=n_permute,
                             alpha=alpha,
                             seed=r + 100)
        rdcperm_result = rdcperm.perform_test(pdata)
    return {
        'indtest': rdcperm,
        'test_result': rdcperm_result,
        'time_secs': t.secs
    }
예제 #3
0
def job_rdcperm_nc_med(paired_source, tr, te, r, n_features=10):
    """
    The Randomized Dependence Coefficient test with permutations.
    No copula transformtation. Use median heuristic on the data.
    """
    pdata = tr + te
    n_permute = 500
    # n_features=10 from Lopez-Paz et al., 2013 paper.
    with util.ContextTimer() as t:
        # get the median distances
        X, Y = pdata.xy()

        medx = util.meddistance(X, subsample=1000)
        medy = util.meddistance(Y, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220)
        rdcperm = it.RDCPerm(fmx,
                             fmy,
                             n_permute=n_permute,
                             alpha=alpha,
                             seed=r + 100,
                             use_copula=False)
        rdcperm_result = rdcperm.perform_test(pdata)
    return {
        'indtest': rdcperm,
        'test_result': rdcperm_result,
        'time_secs': t.secs
    }
예제 #4
0
def job_rdc_med(paired_source, tr, te, r, n_features=10):
    """
    The Randomized Dependence Coefficient test.
    - Gaussian width = median heuristic on the copula-transformed data 
    - 10 random features for each X andY
    - Use full dataset for testing
    """
    pdata = tr + te
    # n_features=10 from Lopez-Paz et al., 2013 paper.
    with util.ContextTimer() as t:
        # get the median distances
        X, Y = pdata.xy()
        # copula transform to both X and Y
        cop_map = fea.MarginalCDFMap()
        Xcdf = cop_map.gen_features(X)
        Ycdf = cop_map.gen_features(Y)

        medx = util.meddistance(Xcdf, subsample=1000)
        medy = util.meddistance(Ycdf, subsample=1000)
        sigmax2 = medx**2
        sigmay2 = medy**2

        fmx = fea.RFFKGauss(sigmax2, n_features=n_features, seed=r + 19)
        fmy = fea.RFFKGauss(sigmay2, n_features=n_features, seed=r + 220)
        rdc = it.RDC(fmx, fmy, alpha=alpha)
        rdc_result = rdc.perform_test(pdata)
    return {'indtest': rdc, 'test_result': rdc_result, 'time_secs': t.secs}
예제 #5
0
def job_nfsic_grid(paired_source, tr, te, r):
    """
    NFSIC where the test locations are randomized, and the Gaussian widths 
    are optimized by a grid search.
    """
    # randomize the test locations by fitting Gaussians to the data
    with util.ContextTimer() as t:
        V, W = it.GaussNFSIC.init_locs_2randn(tr, J, seed=r + 2)
        xtr, ytr = tr.xy()
        n_gwidth_cand = 30
        gwidthx_factors = 2.0**np.linspace(-4, 4, n_gwidth_cand)
        gwidthy_factors = gwidthx_factors
        #gwidthy_factors = 2.0**np.linspace(-3, 4, 40)
        medx = util.meddistance(xtr, 1000)
        medy = util.meddistance(ytr, 1000)
        list_gwidthx = np.hstack(((medx**2) * gwidthx_factors))
        list_gwidthy = np.hstack(((medy**2) * gwidthy_factors))

        bestij, lambs = it.GaussNFSIC.grid_search_gwidth(
            tr, V, W, list_gwidthx, list_gwidthy)
        # These are width^2
        best_widthx = list_gwidthx[bestij[0]]
        best_widthy = list_gwidthy[bestij[1]]

        # perform test
        nfsic_grid = it.GaussNFSIC(best_widthx, best_widthy, V, W, alpha)
        nfsic_grid_result = nfsic_grid.perform_test(te)
    return {
        'indtest': nfsic_grid,
        'test_result': nfsic_grid_result,
        'time_secs': t.secs
    }
예제 #6
0
def kl_kgauss_median(pdata):
    """
    Get two Gaussian kernels constructed with the median heuristic.
    """
    xtr, ytr = pdata.xy()
    dx = xtr.shape[1]
    dy = ytr.shape[1]
    medx2 = util.meddistance(xtr, subsample=1000)**2
    medy2 = util.meddistance(ytr, subsample=1000)**2
    k = kernel.KGauss(medx2)
    l = kernel.KGauss(medy2)
    return k, l
def kl_median(pdata):
    """
    Get two Gaussian kernels constructed with the median heuristic.
    Randomize V, W from the standard Gaussian distribution.
    """
    xtr, ytr = pdata.xy()
    dx = xtr.shape[1]
    dy = ytr.shape[1]
    medx2 = util.meddistance(xtr)**2
    medy2 = util.meddistance(ytr)**2
    k = kernel.KGauss(medx2)
    l = kernel.KGauss(medy2)
    return k, l
    def setUp(self):
        n = 300
        dx = 2
        pdata_mean = get_pdata_mean(n, dx)
        X, Y = pdata_mean.xy()
        gwx2 = util.meddistance(X)**2
        gwy2 = util.meddistance(Y)**2
        J = 2
        V = np.random.randn(J, dx)
        W = np.random.randn(J, 1)

        self.gnfsic = it.GaussNFSIC(gwx2, gwy2, V, W, alpha=0.01)
        self.pdata_mean = pdata_mean
예제 #9
0
def kl_median(pdata):
    """
    Get two Gaussian kernels constructed with the median heuristic.
    Randomize V, W from the standard Gaussian distribution.
    """
    xtr, ytr = pdata.xy()
    dx = xtr.shape[1]
    dy = ytr.shape[1]
    medx2 = util.meddistance(xtr)**2
    medy2 = util.meddistance(ytr)**2
    k = kernel.KGauss(medx2)
    l = kernel.KGauss(medy2)
    return k, l
예제 #10
0
    def setUp(self):
        n = 300
        dx = 2
        pdata_mean = get_pdata_mean(n, dx)
        X, Y = pdata_mean.xy()
        gwx2 = util.meddistance(X)**2
        gwy2 = util.meddistance(Y)**2
        J = 2
        V = np.random.randn(J, dx)
        W = np.random.randn(J, 1)

        self.gnfsic = it.GaussNFSIC(gwx2, gwy2, V, W, alpha=0.01)
        self.pdata_mean = pdata_mean
    def setUp(self):
        n = 300
        dx = 2
        pdata_mean = get_pdata_mean(n, dx)
        X, Y = pdata_mean.xy()
        gwx2 = util.meddistance(X)**2
        gwy2 = util.meddistance(Y)**2
        k = kernel.KGauss(gwx2)
        l = kernel.KGauss(gwy2)
        J = 2
        V = np.random.randn(J, dx)
        W = np.random.randn(J, 1)

        self.nfsic = it.NFSIC(k, l, V, W, alpha=0.01)
        self.pdata_mean = pdata_mean
예제 #12
0
    def setUp(self):
        n = 300
        dx = 2
        pdata_mean = get_pdata_mean(n, dx)
        X, Y = pdata_mean.xy()
        gwx2 = util.meddistance(X)**2
        gwy2 = util.meddistance(Y)**2
        k = kernel.KGauss(gwx2)
        l = kernel.KGauss(gwy2)
        J = 2
        V = np.random.randn(J, dx)
        W = np.random.randn(J, 1)

        self.nfsic = it.NFSIC(k, l, V, W, alpha=0.01)
        self.pdata_mean = pdata_mean
    def test_nfsic(self):
        n = 50
        dx = 3
        dy = 1
        X = np.random.randn(n, dx)
        Y = np.random.randn(n, dy) + 1
        medx2 = util.meddistance(X)**2
        medy2 = util.meddistance(Y)**2
        k = kernel.KGauss(medx2)
        l = kernel.KGauss(medy2)
        J = 3
        V = np.random.randn(J, dx)
        W = np.random.randn(J, dy)

        nfsic, mean, cov = it.nfsic(X, Y, k, l, V, W, reg=0)

        self.assertAlmostEqual(np.imag(nfsic), 0)
        self.assertGreater(nfsic, 0)
예제 #14
0
    def test_nfsic(self):
        n = 50
        dx = 3
        dy = 1
        X = np.random.randn(n, dx)
        Y = np.random.randn(n, dy) + 1
        medx2 = util.meddistance(X)**2
        medy2 = util.meddistance(Y)**2
        k = kernel.KGauss(medx2)
        l = kernel.KGauss(medy2)
        J = 3
        V = np.random.randn(J, dx)
        W = np.random.randn(J, dy)

        nfsic, mean, cov = it.nfsic(X, Y, k, l, V, W, reg=0)

        self.assertAlmostEqual(np.imag(nfsic), 0)
        self.assertGreater(nfsic, 0)