Example #1
0
 def __init__(self, p, b, c, V, alpha=0.01, n_simulate=3000, seed=10):
     """
     n_simulate: number of times to draw from the null distribution.
     """
     k = kernel.KIMQ(b=b, c=c)
     null_sim = FSSDH0SimCovObs(n_simulate=n_simulate, seed=seed)
     super(IMQFSSD, self).__init__(p, k, V, null_sim, alpha)
Example #2
0
def job_fssdJ1q_imq_optv(p,
                         data_source,
                         tr,
                         te,
                         r,
                         J=1,
                         b=-0.5,
                         null_sim=None):
    """
    FSSD with optimization on tr. Test on te. Use an inverse multiquadric
    kernel (IMQ). Optimize only the test locations (V). Fix the kernel
    parameters to b = -0.5, c=1. These are the recommended values from

        Measuring Sample Quality with Kernels
        Jackson Gorham, Lester Mackey
    """
    if null_sim is None:
        null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r)

    Xtr = tr.data()
    with util.ContextTimer() as t:
        # IMQ kernel parameters: b and c
        c = 1.0

        # fit a Gaussian to the data and draw to initialize V0
        V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6)

        ops = {
            'reg': 1e-5,
            'max_iter': 30,
            'tol_fun': 1e-6,
            'disp': True,
            'locs_bounds_frac': 20.0,
        }

        V_opt, info = gof.IMQFSSD.optimize_locs(p, tr, b, c, V0, **ops)

        k_imq = kernel.KIMQ(b=b, c=c)

        # Use the optimized parameters to construct a test
        fssd_imq = gof.FSSD(p, k_imq, V_opt, null_sim=null_sim, alpha=alpha)
        fssd_imq_result = fssd_imq.perform_test(te)

    return {
        'test_result': fssd_imq_result,
        'time_secs': t.secs,
        'goftest': fssd_imq,
        'opt_info': info,
    }
Example #3
0
def job_fssdJ1q_imq_optbv(p, data_source, tr, te, r, J=1, null_sim=None):
    """
    FSSD with optimization on tr. Test on te. Use an inverse multiquadric
    kernel (IMQ). Optimize the test locations (V), and b. Fix c (in the kernel)
    """
    if null_sim is None:
        null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r)

    Xtr = tr.data()
    with util.ContextTimer() as t:
        # Initial IMQ kernel parameters: b and c
        b0 = -0.5
        # Fix c to this value
        c = 1.0
        c0 = c

        # fit a Gaussian to the data and draw to initialize V0
        V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6)

        ops = {
            'reg': 1e-5,
            'max_iter': 40,
            'tol_fun': 1e-6,
            'disp': True,
            'locs_bounds_frac': 20.0,
            # IMQ kernel bounds
            'b_lb': -20,
            'c_lb': c,
            'c_ub': c,
        }

        V_opt, b_opt, c_opt, info = gof.IMQFSSD.optimize_locs_params(
            p, tr, b0, c0, V0, **ops)

        k_imq = kernel.KIMQ(b=b_opt, c=c_opt)

        # Use the optimized parameters to construct a test
        fssd_imq = gof.FSSD(p, k_imq, V_opt, null_sim=null_sim, alpha=alpha)
        fssd_imq_result = fssd_imq.perform_test(te)

    return {
        'test_result': fssd_imq_result,
        'time_secs': t.secs,
        'goftest': fssd_imq,
        'opt_info': info,
    }
Example #4
0
def job_kstein_imq(p, data_source, tr, te, r):
    """
    Kernel Stein discrepancy test of Liu et al., 2016 and Chwialkowski et al.,
    2016. Use full sample. Use the inverse multiquadric kernel (IMQ) studied 
    in 

    Measuring Sample Quality with Kernels
    Gorham and Mackey 2017. 

    Parameters are fixed to the recommented values: beta = b = -0.5, c = 1. 
    """
    # full data
    data = tr + te
    X = data.data()
    with util.ContextTimer() as t:
        k = kernel.KIMQ(b=-0.5, c=1.0)

        kstein = gof.KernelSteinTest(p, k, alpha=alpha, n_simulate=1000, seed=r)
        kstein_result = kstein.perform_test(data)
    return { 'test_result': kstein_result, 'time_secs': t.secs}
Example #5
0
 def power_criterion(p, dat, b, c, test_locs, reg=1e-2):
     k = kernel.KIMQ(b=b, c=c)
     return FSSD.power_criterion(p, dat, k, test_locs, reg)