Exemple #1
0
    def ume_test(X, Y, Z, V, alpha=0.01, mode='mean'):
        """
        Perform a UME three-sample test.
        All the data are assumed to be preprocessed.

        Args:
            - X: n x d ndarray, a sample from P
            - Y: n x d ndarray, a sample from Q
            - Z: n x d ndarray, a sample from R
            - V: J x d ndarray, a set of J test locations
            - alpha: a user specified significance level

        Returns:
            - a dictionary of the form
                {
                    alpha: 0.01,
                    pvalue: 0.0002,
                    test_stat: 2.3,
                    h0_rejected: True,
                    time_secs: ...
                }
        """
        if mode == 'mean':
            mean_medxyz2 = SC_MMD.median_heuristic_bounliphone(X,
                                                               Y,
                                                               Z,
                                                               subsample=1000)
            gwidth = mean_medxyz2
        else:
            XYZ = np.vstack((X, Y, Z))
            med2 = util.meddistance(XYZ, subsample=1000)**2
            gwidth = med2
        k = kernel.KGauss(gwidth)
        scume = SC_UME(data.Data(X), data.Data(Y), k, k, V, V, alpha)
        return scume.perform_test(data.Data(Z))
Exemple #2
0
def met_gfssdJ1_3sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5):
    """
    FSSD-based model comparison test
        * Use J=1 test location by default (in the set V=W). 
        * 3sopt = optimize the test locations by maximizing the 3-model test's
        power criterion. There is only one set of test locations.
        * One Gaussian kernel for the two FSSD statistics. Optimize the
        Gaussian width
    """
    if not P.has_unnormalized_density() or not Q.has_unnormalized_density():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    p = P.get_unnormalized_density()
    q = Q.get_unnormalized_density()
    # sample some data
    datr = sample_pqr(None, None, data_source, n, r, only_from_r=True)

    # Start the timer here
    with util.ContextTimer() as t:
        # split the data into training/test sets
        datrtr, datrte = datr.split_tr_te(tr_proportion=tr_proportion, seed=r)
        Ztr = datrtr.data()

        # median heuristic to set the Gaussian widths
        medz = util.meddistance(Ztr, subsample=1000)
        gwidth0 = medz**2
        # pick a subset of points in the training set for V, W
        V0 = util.subsample_rows(Ztr, J, seed=r + 2)

        # optimization options
        opt_options = {
            'max_iter': 100,
            'reg': 1e-3,
            'tol_fun': 1e-6,
            'locs_bounds_frac': 100,
            'gwidth_lb': 0.1**2,
            'gwidth_ub': 10**2,
        }

        V_opt, gw_opt, opt_info = mct.DC_GaussFSSD.optimize_power_criterion(
            p, q, datrtr, V0, gwidth0, **opt_options)

        dcfssd_opt = mct.DC_GaussFSSD(p,
                                      q,
                                      gw_opt,
                                      gw_opt,
                                      V_opt,
                                      V_opt,
                                      alpha=alpha)
        dcfssd_opt_result = dcfssd_opt.perform_test(datrte)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':dcfssd_opt,
        'test_result': dcfssd_opt_result,
        'time_secs': t.secs
    }
Exemple #3
0
def met_gmmd_med(P, Q, data_source, n, r):
    """
    Use met_gmmd_med_bounliphone(). It uses the median heuristic following
    Bounliphone et al., 2016.

    Bounliphone et al., 2016's MMD-based 3-sample test.
    * Gaussian kernel. 
    * Gaussian width = mean of (median heuristic on (X, Z), median heuristic on
        (Y, Z))
    * Use full sample for testing (no
    holding out for optimization)
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:
        X, Y, Z = datp.data(), datq.data(), datr.data()

        # hyperparameters of the test
        medxz = util.meddistance(np.vstack((X, Z)), subsample=1000)
        medyz = util.meddistance(np.vstack((Y, Z)), subsample=1000)
        medxyz = np.mean([medxz, medyz])
        k = kernel.KGauss(sigma2=medxyz**2)

        scmmd = mct.SC_MMD(datp, datq, k, alpha=alpha)
        scmmd_result = scmmd.perform_test(datr)

    return {
        # This key "test" can be removed.
        #'test': scmmd,
        'test_result': scmmd_result,
        'time_secs': t.secs
    }
def run_optimization(args, gp, gq, img_data, model_name, J=10):
    """
    Wrapper for noise space optimization

    """

    model = load_pretrained_model(model_name)
    model.eval()
    if model_name == 'inceptionv3':
        feat_func = model.pool3
    else:
        feat_func = model.features

    sample_size = args.sample_size  # number of images we want to generate
    samples_p = sample_images(gp, sample_size)
    datap = go.extract_feats(samples_p, feat_func, upsample=True)

    samples_q = sample_images(gq, sample_size)
    dataq = go.extract_feats(samples_q, feat_func, upsample=True)

    ind = util.subsample_ind(img_data.shape[0], sample_size)
    datar = img_data[ind]
    datar = samples_p = go.extract_feats(datar.transpose((0, 3, 1, 2)),
                                         feat_func,
                                         upsample=True)
    datap = data.Data(datap)
    dataq = data.Data(dataq)
    datar = data.Data(datar)

    Zp0 = np.random.uniform(-1, 1, (J, gp.z_size))
    Zq0 = np.random.uniform(-1, 1, (J, gq.z_size))
    XYZ = np.vstack((datap.data(), dataq.data(), datar.data()))
    med2 = util.meddistance(XYZ, subsample=1000)**2

    if args.exp == 2:
        gp = gq

    with util.ContextTimer() as t:
        Z_opt, gw_opt, opt_result = go.optimize_3sample_criterion(datap,
                                                                  dataq,
                                                                  datar,
                                                                  gp,
                                                                  gq,
                                                                  feat_func,
                                                                  Zp0,
                                                                  Zq0,
                                                                  gwidth0=med2)

    results = {}
    results['Z'] = Z_opt
    results['width'] = gw_opt
    results['opt'] = opt_result
    results['t'] = t
    results['ind'] = ind

    return results
Exemple #5
0
    def test_basic(self):
        """
        Test basic things. Make sure SC_UME runs under normal usage.
        """
        mp, varp = 4, 1
        # q cannot be the true model. 
        # That violates our assumption and the asymptotic null distribution
        # does not hold.
        mq, varq = 0.5, 1

        # draw some data
        n = 2999 # sample size
        seed = 89
        with util.NumpySeedContext(seed=seed):
            X = np.random.randn(n, 1)*varp**0.5 + mp
            Y = np.random.randn(n, 1)*varq**0.5 + mq
            Z = np.random.randn(n, 1)
            
            datap = data.Data(X)
            dataq = data.Data(Y)
            datar = data.Data(Z)

        # hyperparameters of the test
        medxz = util.meddistance(np.vstack((X, Z)), subsample=1000)
        medyz = util.meddistance(np.vstack((Y, Z)), subsample=1000)
        k = kernel.KGauss(sigma2=medxz**2)
        l = kernel.KGauss(sigma2=medyz**2)

        # 2 sets of test locations
        J = 3
        Jp = J
        Jq = J
        V = util.fit_gaussian_draw(X, Jp, seed=seed+2)
        W = util.fit_gaussian_draw(Y, Jq, seed=seed+3)

        # construct a UME test
        alpha = 0.01 # significance level 
        scume = mct.SC_UME(datap, dataq, k, l, V, W, alpha=alpha)
        test_result = scume.perform_test(datar)

        # make sure it rejects
        #print(test_result)
        assert test_result['h0_rejected']
Exemple #6
0
    def test_basic(self):
        """
        Nothing special. Just test basic things.
        """
        seed = 13
        # sample
        n = 103
        alpha = 0.01
        for d in [1, 4]:
            mean = np.zeros(d)
            variance = 1
            p = density.IsotropicNormal(mean, variance)
            q = density.IsotropicNormal(mean, variance+3)

            # only one dimension of the mean is shifted
            #draw_mean = mean + np.hstack((1, np.zeros(d-1)))
            draw_mean = mean +0
            draw_variance = variance + 1
            X = util.randn(n, d, seed=seed)*np.sqrt(draw_variance) + draw_mean
            dat = data.Data(X)

            # Test
            for J in [1, 3]:
                sig2 = util.meddistance(X, subsample=1000)**2
                k = kernel.KGauss(sig2)

                # random test locations
                V = util.fit_gaussian_draw(X, J, seed=seed+1)
                W = util.fit_gaussian_draw(X, J, seed=seed+8)

                mcfssd = mct.DC_FSSD(p, q, k, k, V, W, alpha=0.01)
                s = mcfssd.compute_stat(dat)
                s2, var = mcfssd.get_H1_mean_variance(dat)

                tresult = mcfssd.perform_test(dat)

                # assertions
                self.assertGreaterEqual(tresult['pvalue'], 0)
                self.assertLessEqual(tresult['pvalue'], 1)
                testing.assert_approx_equal(s, (n**0.5)*s2)
Exemple #7
0
def met_gumeJ1_2V_rand(P, Q, data_source, n, r, J=1, use_1set_locs=False):
    """
    UME-based three-sample test. 
        * Use J=1 test location by default. 
        * Use two sets (2V) of test locations by default: V and W, each having J
            locations.  Will constrain V=W if use_1set_locs=True.
        * The test locations are selected at random from the data. Selected
            points are removed for testing.
        * Gaussian kernels for the two UME statistics. Median heuristic is used
            to select each width.
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:

        # remove the first J points from each set
        X, Y, Z = datp.data(), datq.data(), datr.data()

        # containing 3*J points
        pool3J = np.vstack((X[:J, :], Y[:J, :], Z[:J, :]))
        X, Y, Z = (X[J:, :], Y[J:, :], Z[J:, :])

        datp, datq, datr = [data.Data(a) for a in [X, Y, Z]]
        assert X.shape[0] == Y.shape[0]
        assert Y.shape[0] == Z.shape[0]
        assert Z.shape[0] == n - J
        assert datp.sample_size() == n - J
        assert datq.sample_size() == n - J
        assert datr.sample_size() == n - J

        #XYZ = np.vstack((X, Y, Z))
        #stds = np.std(util.subsample_rows(XYZ, min(n-3*J, 500),
        #    seed=r+87), axis=0)
        d = X.shape[1]
        # add a little noise to the locations.
        with util.NumpySeedContext(seed=r * 191):
            #pool3J = pool3J + np.random.randn(3*J, d)*np.max(stds)*3
            pool3J = np.random.randn(3 * J, d) * 2

        # median heuristic to set the Gaussian widths
        medxz = util.meddistance(np.vstack((X, Z)), subsample=1000)
        medyz = util.meddistance(np.vstack((Z, Y)), subsample=1000)
        if use_1set_locs:
            # randomly select J points from the pool3J for the J test locations
            #V = util.subsample_rows(pool3J, J, r)
            V = pool3J[:J, :]
            W = V
            k = kernel.KGauss(sigma2=np.mean([medxz, medyz])**2)
            l = k
        else:
            # use two sets of locations: V and W
            #VW = util.subsample_rows(pool3J, 2*J, r)
            VW = pool3J[:2 * J, :]
            V = VW[:J, :]
            W = VW[J:, :]

            # 2 Gaussian kernels
            k = kernel.KGauss(sigma2=medxz**2)
            l = kernel.KGauss(sigma2=medyz**2)

        # construct the test
        scume = mct.SC_UME(datp, datq, k, l, V, W, alpha=alpha)
        scume_rand_result = scume.perform_test(datr)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':scume,
        'test_result': scume_rand_result,
        'time_secs': t.secs
    }
Exemple #8
0
def met_gumeJ1_3sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5):
    """
    UME-based three-sample test
        * Use J=1 test location by default (in the set V=W). 
        * 3sopt = optimize the test locations by maximizing the 3-sample test's
        power criterion. There is only one set of test locations.
        * One Gaussian kernel for the two UME statistics. Optimize the Gaussian width
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:
        # split the data into training/test sets
        [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \
            [D.split_tr_te(tr_proportion=tr_proportion, seed=r) for D in [datp, datq, datr]]
        Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]]
        Xyztr = np.vstack((Xtr, Ytr, Ztr))
        # initialize optimization parameters.
        # Initialize the Gaussian widths with the median heuristic
        medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000)
        medyz = util.meddistance(np.vstack((Ztr, Ytr)), subsample=1000)
        gwidth0 = np.mean([medxz, medyz])**2

        # pick a subset of points in the training set for V, W
        V0 = util.subsample_rows(Xyztr, J, seed=r + 2)

        # optimization options
        opt_options = {
            'max_iter': 100,
            'reg': 1e-6,
            'tol_fun': 1e-7,
            'locs_bounds_frac': 50,
            'gwidth_lb': 0.1,
            'gwidth_ub': 6**2,
        }
        V_opt, gw2_opt, opt_result = mct.SC_GaussUME.optimize_3sample_criterion(
            datptr, datqtr, datrtr, V0, gwidth0, **opt_options)
        k_opt = kernel.KGauss(gw2_opt)

        # construct a UME test
        scume_opt3 = mct.SC_UME(datpte,
                                datqte,
                                k_opt,
                                k_opt,
                                V_opt,
                                V_opt,
                                alpha=alpha)
        scume_opt3_result = scume_opt3.perform_test(datrte)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':scume,
        'test_result': scume_opt3_result,
        'time_secs': t.secs
    }
Exemple #9
0
def met_gumeJ1_2sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5):
    """
    UME-based three-sample test
        * Use J=1 test location by default. 
        * 2sopt = optimize the two sets of test locations by maximizing the
            2-sample test's power criterion. Each set is optmized separately.
        * Gaussian kernels for the two UME statistics. The Gaussian widths are
        also optimized separately.
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:
        # split the data into training/test sets
        [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \
            [D.split_tr_te(tr_proportion=tr_proportion, seed=r) for D in [datp, datq, datr]]
        Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]]

        # initialize optimization parameters.
        # Initialize the Gaussian widths with the median heuristic
        medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000)
        medyz = util.meddistance(np.vstack((Ytr, Ztr)), subsample=1000)
        gwidth0p = medxz**2
        gwidth0q = medyz**2

        # numbers of test locations in V, W
        Jp = J
        Jq = J

        # pick a subset of points in the training set for V, W
        Xyztr = np.vstack((Xtr, Ytr, Ztr))
        VW = util.subsample_rows(Xyztr, Jp + Jq, seed=r + 1)
        V0 = VW[:Jp, :]
        W0 = VW[Jp:, :]

        # optimization options
        opt_options = {
            'max_iter': 100,
            'reg': 1e-4,
            'tol_fun': 1e-6,
            'locs_bounds_frac': 50,
            'gwidth_lb': 0.1,
            'gwidth_ub': 10**2,
        }

        umep_params, umeq_params = mct.SC_GaussUME.optimize_2sets_locs_widths(
            datptr, datqtr, datrtr, V0, W0, gwidth0p, gwidth0q, **opt_options)
        (V_opt, gw2p_opt, opt_infop) = umep_params
        (W_opt, gw2q_opt, opt_infoq) = umeq_params
        k_opt = kernel.KGauss(gw2p_opt)
        l_opt = kernel.KGauss(gw2q_opt)

        # construct a UME test
        scume_opt2 = mct.SC_UME(datpte,
                                datqte,
                                k_opt,
                                l_opt,
                                V_opt,
                                W_opt,
                                alpha=alpha)
        scume_opt2_result = scume_opt2.perform_test(datrte)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':scume,
        'test_result': scume_opt2_result,
        'time_secs': t.secs
    }
Exemple #10
0
def optimize_3sample_criterion(datap,
                               dataq,
                               datar,
                               gen_p,
                               gen_q,
                               model,
                               Zp0,
                               Zq0,
                               gwidth0,
                               reg=1e-3,
                               max_iter=100,
                               tol_fun=1e-6,
                               disp=False,
                               locs_bounds_frac=100,
                               gwidth_lb=None,
                               gwidth_ub=None):
    """
    Similar to optimize_2sets_locs_widths() but constrain V=W and
    constrain the two kernels to be the same Gaussian kernel.
    Optimize one set of test locations and one Gaussian kernel width by
    maximizing the test power criterion of the UME *three*-sample test

    This optimization function is deterministic.

    Args:
        - datap: a kgof.data.Data from P (model 1)
        - dataq: a kgof.data.Data from Q (model 2)
        - datar: a kgof.data.Data from R (data generating distribution)
        - gen_p: pytorch model representing the generator p (model 1)
        - gen_q: pytorch model representing the generator q (model 2)
        - Zp0: Jxd_n numpy array. Initial value for the noise vectors of J locations.
           This is for model 1. 
        - Zq0: Jxd_n numpy array. Initial V containing J locations. For both
           This is for model 22. 
        - model: a feature extractor applied to generated images 
        - gwidth0: initial value of the Gaussian width^2 for both UME(P, R),
              and UME(Q, R)
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
              the box defined by coordinate-wise min-max by std of each
              coordinate (of the aggregated data) multiplied by this number.
        - gwidth_lb: absolute lower bound on both the Gaussian width^2
        - gwidth_ub: absolute upper bound on both the Gaussian width^2

        If the lb, ub bounds are None, use fraction of the median heuristics
            to automatically set the bounds.
    Returns:
        - Z_opt: optimized noise vectors Z
        - gw_opt: optimized Gaussian width^2
        - opt_result: info from the optimization
    """
    J, dn = Zp0.shape
    Z0 = np.vstack([Zp0, Zq0])

    X, Y, Z = datap.data(), dataq.data(), datar.data()
    n, dp = X.shape

    global image_size

    def flatten(gwidth, V):
        return np.hstack((gwidth, V.reshape(-1)))

    def unflatten(x):
        sqrt_gwidth = x[0]
        V = np.reshape(x[1:], (2 * J, -1))
        return sqrt_gwidth, V

    # Parameterize the Gaussian width with its square root (then square later)
    # to automatically enforce the positivity.
    def obj_feat_space(sqrt_gwidth, V):
        k = kernel.KGauss(sqrt_gwidth**2)
        return -SC_UME.power_criterion(
            datap, dataq, datar, k, k, V, V, reg=reg)

    def flat_obj_feat(x):
        sqrt_gwidth, V = unflatten(x)
        return obj_feat_space(sqrt_gwidth, V)

    def obj_noise_space(sqrt_gwidth, z):
        zp = z[:J]
        zq = z[J:]
        torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1))
        torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1))
        # need preprocessing probably
        global model_input_size
        s = model_input_size
        upsample = nn.Upsample(size=(s, s), mode='bilinear')
        fp = model(upsample(gen_p(torch_zp))).cpu().data.numpy()
        fp = fp.reshape((J, -1))
        fq = model(upsample(gen_q(torch_zq))).cpu().data.numpy()
        fq = fq.reshape((J, -1))
        F = np.vstack([fp, fq])
        return obj_feat_space(sqrt_gwidth, F)

    def flat_obj_noise(x):
        sqrt_gwidth, z = unflatten(x)
        return obj_noise_space(sqrt_gwidth, z)

    def grad_power_noise(x):
        """
        Compute the gradient of the power criterion with respect to the width of Gaussian
        RBF kernel and the noise vector.

        Args:
            x: 1 + 2J*d_n vector
        Returns:
            the gradient of the power criterion with respect to kernel width/latent vector
        """

        with util.ContextTimer() as t:
            width, z = unflatten(x)
            zp = z[:J]
            zq = z[J:]

            # Compute the Jacobian of the generators with respect to noise vector
            torch_zp = to_torch_variable(zp,
                                         shape=(-1, zp.shape[1], 1, 1),
                                         requires_grad=True)
            torch_zq = to_torch_variable(zq,
                                         shape=(-1, zq.shape[1], 1, 1),
                                         requires_grad=True)
            gp_grad = compute_jacobian(
                torch_zp,
                gen_p(torch_zp).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            gq_grad = compute_jacobian(
                torch_zq,
                gen_q(torch_zq).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            v_grad_z = np.vstack([gp_grad, gq_grad])
            v_grad_z = np.squeeze(v_grad_z, [3, 4])  # 2J x d_pix x d_noise

            # Compute the Jacobian of the feature extractor with respect to noise vector
            vp_flatten = to_torch_variable(
                gen_p(torch_zp).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True)
            vq_flatten = to_torch_variable(
                gen_q(torch_zq).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True)
            size = (model_input_size, model_input_size)
            upsample = nn.Upsample(size=size, mode='bilinear')
            fp = model(upsample(vp_flatten))
            fq = model(upsample(vq_flatten))
            fp_grad = compute_jacobian(vp_flatten,
                                       fp.view(J, -1))  # J x d_nn x C x H x W
            fq_grad = compute_jacobian(vq_flatten,
                                       fq.view(J, -1))  # J x d_nn x C x H x W
            f_grad_v = np.vstack([fp_grad, fq_grad])
            f_grad_v = f_grad_v.reshape(
                (2 * J, f_grad_v.shape[1], -1))  # 2J x d_nn x d_pix

            # Compute the gradient of the objective function with respect to
            # the gaussian width and test locations
            F = np.vstack([fp.cpu().data.numpy(), fq.cpu().data.numpy()])
            F = np.reshape(F, (2 * J, -1))
            grad_obj = autograd.elementwise_grad(
                flat_obj_feat)  # 1+(2J)*d_nn input
            obj_grad_f = grad_obj(flatten(width, F))
            obj_grad_width = obj_grad_f[0]
            obj_grad_f = np.reshape(obj_grad_f[1:],
                                    [(2 * J), -1])  # 2J x d_nn array

            obj_grad_v = inner1d(obj_grad_f,
                                 np.transpose(f_grad_v,
                                              (2, 0, 1)))  # 2J x d_pix
            obj_grad_z = inner1d(obj_grad_v.T,
                                 np.transpose(v_grad_z, (2, 0, 1))).flatten()

        return np.concatenate([obj_grad_width.reshape([1]), obj_grad_z])

    # Initial point
    x0 = flatten(np.sqrt(gwidth0), Z0)

    # make sure that the optimized gwidth is not too small or too large.
    XYZ = np.vstack((X, Y, Z))
    med2 = util.meddistance(XYZ, subsample=1000)**2
    fac_min = 1e-2
    fac_max = 1e2
    if gwidth_lb is None:
        gwidth_lb = max(fac_min * med2, 1e-3)
    if gwidth_ub is None:
        gwidth_ub = min(fac_max * med2, 1e5)

    # # Make a box to bound test locations
    # XYZ_std = np.std(XYZ, axis=0)
    # # XYZ_min: length-d array
    # XYZ_min = np.min(XYZ, axis=0)
    # XYZ_max = np.max(XYZ, axis=0)
    # # V_lb: 2J x dn
    # V_lb = np.tile(XYZ_min - locs_bounds_frac*XYZ_std, (2*J, 1))
    # V_ub = np.tile(XYZ_max + locs_bounds_frac*XYZ_std, (2*J, 1))
    # # (J*d+1) x 2. Take square root because we parameterize with the square
    # # root
    # x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1)))
    # x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1)))
    # #x0_bounds = list(zip(x0_lb, x0_ub))

    # Assuming noise coming uniform dist over unit cube
    x0_bounds = [(gwidth_lb, gwidth_ub)] + [(-1, 1)] * (2 * J * dn)

    # optimize. Time the optimization as well.
    # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
    with util.ContextTimer() as timer:
        opt_result = scipy.optimize.minimize(flat_obj_noise,
                                             x0,
                                             method='L-BFGS-B',
                                             bounds=x0_bounds,
                                             tol=tol_fun,
                                             options={
                                                 'maxiter': max_iter,
                                                 'ftol': tol_fun,
                                                 'disp': disp,
                                                 'gtol': 1.0e-08,
                                             },
                                             jac=grad_power_noise)

    opt_result = dict(opt_result)
    opt_result['time_secs'] = timer.secs
    x_opt = opt_result['x']
    sq_gw_opt, Z_opt = unflatten(x_opt)
    gw_opt = sq_gw_opt**2

    assert util.is_real_num(gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt)
    return Z_opt, gw_opt, opt_result
Exemple #11
0
    def optimize_3sample_criterion(datap,
                                   dataq,
                                   datar,
                                   V0,
                                   gwidth0,
                                   reg=1e-3,
                                   max_iter=100,
                                   tol_fun=1e-6,
                                   disp=False,
                                   locs_bounds_frac=100,
                                   gwidth_lb=None,
                                   gwidth_ub=None):
        """
        Similar to optimize_2sets_locs_widths() but constrain V=W, and
        constrain the two Gaussian widths to be the same.
        Optimize one set of test locations and one Gaussian kernel width by
        maximizing the test power criterion of the UME *three*-sample test             

        This optimization function is deterministic.

        - datap: a kgof.data.Data from P (model 1)
        - dataq: a kgof.data.Data from Q (model 2)
        - datar: a kgof.data.Data from R (data generating distribution)
        - V0: Jxd numpy array. Initial V containing J locations. For both
              UME(P, R) and UME(Q, R)
        - gwidth0: initial value of the Gaussian width^2 for both UME(P, R),
              and UME(Q, R)
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
              the box defined by coordinate-wise min-max by std of each
              coordinate (of the aggregated data) multiplied by this number.
        - gwidth_lb: absolute lower bound on both the Gaussian width^2
        - gwidth_ub: absolute upper bound on both the Gaussian width^2

        If the lb, ub bounds are None, use fraction of the median heuristics 
            to automatically set the bounds.
        
        Return (optimized V, optimized Gaussian width^2, info from the optimization)
        """
        J = V0.shape[0]
        X, Y, Z = datap.data(), dataq.data(), datar.data()
        n, d = X.shape

        # Parameterize the Gaussian width with its square root (then square later)
        # to automatically enforce the positivity.
        def obj(sqrt_gwidth, V):
            k = kernel.KGauss(sqrt_gwidth**2)
            return -SC_UME.power_criterion(
                datap, dataq, datar, k, k, V, V, reg=reg)

        flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1)))

        def unflatten(x):
            sqrt_gwidth = x[0]
            V = np.reshape(x[1:], (J, d))
            return sqrt_gwidth, V

        def flat_obj(x):
            sqrt_gwidth, V = unflatten(x)
            return obj(sqrt_gwidth, V)

        # Initial point
        x0 = flatten(np.sqrt(gwidth0), V0)

        #make sure that the optimized gwidth is not too small or too large.
        XYZ = np.vstack((X, Y, Z))
        med2 = util.meddistance(XYZ, subsample=1000)**2
        fac_min = 1e-2
        fac_max = 1e2
        if gwidth_lb is None:
            gwidth_lb = max(fac_min * med2, 1e-2)
        if gwidth_ub is None:
            gwidth_ub = min(fac_max * med2, 1e5)

        # Make a box to bound test locations
        XYZ_std = np.std(XYZ, axis=0)
        # XYZ_min: length-d array
        XYZ_min = np.min(XYZ, axis=0)
        XYZ_max = np.max(XYZ, axis=0)
        # V_lb: J x d
        V_lb = np.tile(XYZ_min - locs_bounds_frac * XYZ_std, (J, 1))
        V_ub = np.tile(XYZ_max + locs_bounds_frac * XYZ_std, (J, 1))
        # (J*d+1) x 2. Take square root because we parameterize with the square
        # root
        x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1)))
        x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1)))
        x0_bounds = list(zip(x0_lb, x0_ub))

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-08,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        sq_gw_opt, V_opt = unflatten(x_opt)
        gw_opt = sq_gw_opt**2

        assert util.is_real_num(
            gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt)
        return V_opt, gw_opt, opt_result
Exemple #12
0
    def test_optimize_2sets_locs_widths(self):
        mp, varp = 2, 1
        # q cannot be the true model. 
        # That violates our assumption and the asymptotic null distribution
        # does not hold.
        mq, varq = 1, 1

        # draw some data
        n = 800 # sample size
        seed = 6
        with util.NumpySeedContext(seed=seed):
            X = np.random.randn(n, 1)*varp**0.5 + mp
            Y = np.random.randn(n, 1)*varq**0.5 + mq
            Z = np.random.randn(n, 1)
            
            datap = data.Data(X)
            dataq = data.Data(Y)
            datar = data.Data(Z)

        # split the data into training/test sets
        [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \
            [D.split_tr_te(tr_proportion=0.3, seed=85) for D in [datap, dataq, datar]]
        Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]]

        # initialize optimization parameters.
        # Initialize the Gaussian widths with the median heuristic
        medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000)
        medyz = util.meddistance(np.vstack((Ytr, Ztr)), subsample=1000)
        gwidth0p = medxz**2
        gwidth0q = medyz**2

        # numbers of test locations in V, W
        J = 2
        Jp = J
        Jq = J

        # pick a subset of points in the training set for V, W
        Xyztr = np.vstack((Xtr, Ytr, Ztr))
        VW = util.subsample_rows(Xyztr, Jp+Jq, seed=73)
        V0 = VW[:Jp, :]
        W0 = VW[Jp:, :]

        # optimization options
        opt_options = {
            'max_iter': 100,
            'reg': 1e-4,
            'tol_fun': 1e-6,
            'locs_bounds_frac': 100,
            'gwidth_lb': None,
            'gwidth_ub': None,
        }

        umep_params, umeq_params = mct.SC_GaussUME.optimize_2sets_locs_widths(
            datptr, datqtr, datrtr, V0, W0, gwidth0p, gwidth0q, 
            **opt_options)
        (V_opt, gw2p_opt, opt_infop) = umep_params
        (W_opt, gw2q_opt, opt_infoq) = umeq_params
        k_opt = kernel.KGauss(gw2p_opt)
        l_opt = kernel.KGauss(gw2q_opt)
        # construct a UME test
        alpha = 0.01 # significance level 
        scume_opt2 = mct.SC_UME(datpte, datqte, k_opt, l_opt, V_opt, W_opt, alpha=alpha)
        scume_opt2.perform_test(datrte)