def ume_test(X, Y, Z, V, alpha=0.01, mode='mean'): """ Perform a UME three-sample test. All the data are assumed to be preprocessed. Args: - X: n x d ndarray, a sample from P - Y: n x d ndarray, a sample from Q - Z: n x d ndarray, a sample from R - V: J x d ndarray, a set of J test locations - alpha: a user specified significance level Returns: - a dictionary of the form { alpha: 0.01, pvalue: 0.0002, test_stat: 2.3, h0_rejected: True, time_secs: ... } """ if mode == 'mean': mean_medxyz2 = SC_MMD.median_heuristic_bounliphone(X, Y, Z, subsample=1000) gwidth = mean_medxyz2 else: XYZ = np.vstack((X, Y, Z)) med2 = util.meddistance(XYZ, subsample=1000)**2 gwidth = med2 k = kernel.KGauss(gwidth) scume = SC_UME(data.Data(X), data.Data(Y), k, k, V, V, alpha) return scume.perform_test(data.Data(Z))
def met_gfssdJ1_3sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5): """ FSSD-based model comparison test * Use J=1 test location by default (in the set V=W). * 3sopt = optimize the test locations by maximizing the 3-model test's power criterion. There is only one set of test locations. * One Gaussian kernel for the two FSSD statistics. Optimize the Gaussian width """ if not P.has_unnormalized_density() or not Q.has_unnormalized_density(): # Not applicable. Return {}. return {} assert J >= 1 p = P.get_unnormalized_density() q = Q.get_unnormalized_density() # sample some data datr = sample_pqr(None, None, data_source, n, r, only_from_r=True) # Start the timer here with util.ContextTimer() as t: # split the data into training/test sets datrtr, datrte = datr.split_tr_te(tr_proportion=tr_proportion, seed=r) Ztr = datrtr.data() # median heuristic to set the Gaussian widths medz = util.meddistance(Ztr, subsample=1000) gwidth0 = medz**2 # pick a subset of points in the training set for V, W V0 = util.subsample_rows(Ztr, J, seed=r + 2) # optimization options opt_options = { 'max_iter': 100, 'reg': 1e-3, 'tol_fun': 1e-6, 'locs_bounds_frac': 100, 'gwidth_lb': 0.1**2, 'gwidth_ub': 10**2, } V_opt, gw_opt, opt_info = mct.DC_GaussFSSD.optimize_power_criterion( p, q, datrtr, V0, gwidth0, **opt_options) dcfssd_opt = mct.DC_GaussFSSD(p, q, gw_opt, gw_opt, V_opt, V_opt, alpha=alpha) dcfssd_opt_result = dcfssd_opt.perform_test(datrte) return { # This key "test" can be removed. Storing V, W can take quite a lot # of space, especially when the input dimension d is high. #'test':dcfssd_opt, 'test_result': dcfssd_opt_result, 'time_secs': t.secs }
def met_gmmd_med(P, Q, data_source, n, r): """ Use met_gmmd_med_bounliphone(). It uses the median heuristic following Bounliphone et al., 2016. Bounliphone et al., 2016's MMD-based 3-sample test. * Gaussian kernel. * Gaussian width = mean of (median heuristic on (X, Z), median heuristic on (Y, Z)) * Use full sample for testing (no holding out for optimization) """ if not P.has_datasource() or not Q.has_datasource(): # Not applicable. Return {}. return {} ds_p = P.get_datasource() ds_q = Q.get_datasource() # sample some data datp, datq, datr = sample_pqr(ds_p, ds_q, data_source, n, r, only_from_r=False) # Start the timer here with util.ContextTimer() as t: X, Y, Z = datp.data(), datq.data(), datr.data() # hyperparameters of the test medxz = util.meddistance(np.vstack((X, Z)), subsample=1000) medyz = util.meddistance(np.vstack((Y, Z)), subsample=1000) medxyz = np.mean([medxz, medyz]) k = kernel.KGauss(sigma2=medxyz**2) scmmd = mct.SC_MMD(datp, datq, k, alpha=alpha) scmmd_result = scmmd.perform_test(datr) return { # This key "test" can be removed. #'test': scmmd, 'test_result': scmmd_result, 'time_secs': t.secs }
def run_optimization(args, gp, gq, img_data, model_name, J=10): """ Wrapper for noise space optimization """ model = load_pretrained_model(model_name) model.eval() if model_name == 'inceptionv3': feat_func = model.pool3 else: feat_func = model.features sample_size = args.sample_size # number of images we want to generate samples_p = sample_images(gp, sample_size) datap = go.extract_feats(samples_p, feat_func, upsample=True) samples_q = sample_images(gq, sample_size) dataq = go.extract_feats(samples_q, feat_func, upsample=True) ind = util.subsample_ind(img_data.shape[0], sample_size) datar = img_data[ind] datar = samples_p = go.extract_feats(datar.transpose((0, 3, 1, 2)), feat_func, upsample=True) datap = data.Data(datap) dataq = data.Data(dataq) datar = data.Data(datar) Zp0 = np.random.uniform(-1, 1, (J, gp.z_size)) Zq0 = np.random.uniform(-1, 1, (J, gq.z_size)) XYZ = np.vstack((datap.data(), dataq.data(), datar.data())) med2 = util.meddistance(XYZ, subsample=1000)**2 if args.exp == 2: gp = gq with util.ContextTimer() as t: Z_opt, gw_opt, opt_result = go.optimize_3sample_criterion(datap, dataq, datar, gp, gq, feat_func, Zp0, Zq0, gwidth0=med2) results = {} results['Z'] = Z_opt results['width'] = gw_opt results['opt'] = opt_result results['t'] = t results['ind'] = ind return results
def test_basic(self): """ Test basic things. Make sure SC_UME runs under normal usage. """ mp, varp = 4, 1 # q cannot be the true model. # That violates our assumption and the asymptotic null distribution # does not hold. mq, varq = 0.5, 1 # draw some data n = 2999 # sample size seed = 89 with util.NumpySeedContext(seed=seed): X = np.random.randn(n, 1)*varp**0.5 + mp Y = np.random.randn(n, 1)*varq**0.5 + mq Z = np.random.randn(n, 1) datap = data.Data(X) dataq = data.Data(Y) datar = data.Data(Z) # hyperparameters of the test medxz = util.meddistance(np.vstack((X, Z)), subsample=1000) medyz = util.meddistance(np.vstack((Y, Z)), subsample=1000) k = kernel.KGauss(sigma2=medxz**2) l = kernel.KGauss(sigma2=medyz**2) # 2 sets of test locations J = 3 Jp = J Jq = J V = util.fit_gaussian_draw(X, Jp, seed=seed+2) W = util.fit_gaussian_draw(Y, Jq, seed=seed+3) # construct a UME test alpha = 0.01 # significance level scume = mct.SC_UME(datap, dataq, k, l, V, W, alpha=alpha) test_result = scume.perform_test(datar) # make sure it rejects #print(test_result) assert test_result['h0_rejected']
def test_basic(self): """ Nothing special. Just test basic things. """ seed = 13 # sample n = 103 alpha = 0.01 for d in [1, 4]: mean = np.zeros(d) variance = 1 p = density.IsotropicNormal(mean, variance) q = density.IsotropicNormal(mean, variance+3) # only one dimension of the mean is shifted #draw_mean = mean + np.hstack((1, np.zeros(d-1))) draw_mean = mean +0 draw_variance = variance + 1 X = util.randn(n, d, seed=seed)*np.sqrt(draw_variance) + draw_mean dat = data.Data(X) # Test for J in [1, 3]: sig2 = util.meddistance(X, subsample=1000)**2 k = kernel.KGauss(sig2) # random test locations V = util.fit_gaussian_draw(X, J, seed=seed+1) W = util.fit_gaussian_draw(X, J, seed=seed+8) mcfssd = mct.DC_FSSD(p, q, k, k, V, W, alpha=0.01) s = mcfssd.compute_stat(dat) s2, var = mcfssd.get_H1_mean_variance(dat) tresult = mcfssd.perform_test(dat) # assertions self.assertGreaterEqual(tresult['pvalue'], 0) self.assertLessEqual(tresult['pvalue'], 1) testing.assert_approx_equal(s, (n**0.5)*s2)
def met_gumeJ1_2V_rand(P, Q, data_source, n, r, J=1, use_1set_locs=False): """ UME-based three-sample test. * Use J=1 test location by default. * Use two sets (2V) of test locations by default: V and W, each having J locations. Will constrain V=W if use_1set_locs=True. * The test locations are selected at random from the data. Selected points are removed for testing. * Gaussian kernels for the two UME statistics. Median heuristic is used to select each width. """ if not P.has_datasource() or not Q.has_datasource(): # Not applicable. Return {}. return {} assert J >= 1 ds_p = P.get_datasource() ds_q = Q.get_datasource() # sample some data datp, datq, datr = sample_pqr(ds_p, ds_q, data_source, n, r, only_from_r=False) # Start the timer here with util.ContextTimer() as t: # remove the first J points from each set X, Y, Z = datp.data(), datq.data(), datr.data() # containing 3*J points pool3J = np.vstack((X[:J, :], Y[:J, :], Z[:J, :])) X, Y, Z = (X[J:, :], Y[J:, :], Z[J:, :]) datp, datq, datr = [data.Data(a) for a in [X, Y, Z]] assert X.shape[0] == Y.shape[0] assert Y.shape[0] == Z.shape[0] assert Z.shape[0] == n - J assert datp.sample_size() == n - J assert datq.sample_size() == n - J assert datr.sample_size() == n - J #XYZ = np.vstack((X, Y, Z)) #stds = np.std(util.subsample_rows(XYZ, min(n-3*J, 500), # seed=r+87), axis=0) d = X.shape[1] # add a little noise to the locations. with util.NumpySeedContext(seed=r * 191): #pool3J = pool3J + np.random.randn(3*J, d)*np.max(stds)*3 pool3J = np.random.randn(3 * J, d) * 2 # median heuristic to set the Gaussian widths medxz = util.meddistance(np.vstack((X, Z)), subsample=1000) medyz = util.meddistance(np.vstack((Z, Y)), subsample=1000) if use_1set_locs: # randomly select J points from the pool3J for the J test locations #V = util.subsample_rows(pool3J, J, r) V = pool3J[:J, :] W = V k = kernel.KGauss(sigma2=np.mean([medxz, medyz])**2) l = k else: # use two sets of locations: V and W #VW = util.subsample_rows(pool3J, 2*J, r) VW = pool3J[:2 * J, :] V = VW[:J, :] W = VW[J:, :] # 2 Gaussian kernels k = kernel.KGauss(sigma2=medxz**2) l = kernel.KGauss(sigma2=medyz**2) # construct the test scume = mct.SC_UME(datp, datq, k, l, V, W, alpha=alpha) scume_rand_result = scume.perform_test(datr) return { # This key "test" can be removed. Storing V, W can take quite a lot # of space, especially when the input dimension d is high. #'test':scume, 'test_result': scume_rand_result, 'time_secs': t.secs }
def met_gumeJ1_3sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5): """ UME-based three-sample test * Use J=1 test location by default (in the set V=W). * 3sopt = optimize the test locations by maximizing the 3-sample test's power criterion. There is only one set of test locations. * One Gaussian kernel for the two UME statistics. Optimize the Gaussian width """ if not P.has_datasource() or not Q.has_datasource(): # Not applicable. Return {}. return {} assert J >= 1 ds_p = P.get_datasource() ds_q = Q.get_datasource() # sample some data datp, datq, datr = sample_pqr(ds_p, ds_q, data_source, n, r, only_from_r=False) # Start the timer here with util.ContextTimer() as t: # split the data into training/test sets [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \ [D.split_tr_te(tr_proportion=tr_proportion, seed=r) for D in [datp, datq, datr]] Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]] Xyztr = np.vstack((Xtr, Ytr, Ztr)) # initialize optimization parameters. # Initialize the Gaussian widths with the median heuristic medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000) medyz = util.meddistance(np.vstack((Ztr, Ytr)), subsample=1000) gwidth0 = np.mean([medxz, medyz])**2 # pick a subset of points in the training set for V, W V0 = util.subsample_rows(Xyztr, J, seed=r + 2) # optimization options opt_options = { 'max_iter': 100, 'reg': 1e-6, 'tol_fun': 1e-7, 'locs_bounds_frac': 50, 'gwidth_lb': 0.1, 'gwidth_ub': 6**2, } V_opt, gw2_opt, opt_result = mct.SC_GaussUME.optimize_3sample_criterion( datptr, datqtr, datrtr, V0, gwidth0, **opt_options) k_opt = kernel.KGauss(gw2_opt) # construct a UME test scume_opt3 = mct.SC_UME(datpte, datqte, k_opt, k_opt, V_opt, V_opt, alpha=alpha) scume_opt3_result = scume_opt3.perform_test(datrte) return { # This key "test" can be removed. Storing V, W can take quite a lot # of space, especially when the input dimension d is high. #'test':scume, 'test_result': scume_opt3_result, 'time_secs': t.secs }
def met_gumeJ1_2sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5): """ UME-based three-sample test * Use J=1 test location by default. * 2sopt = optimize the two sets of test locations by maximizing the 2-sample test's power criterion. Each set is optmized separately. * Gaussian kernels for the two UME statistics. The Gaussian widths are also optimized separately. """ if not P.has_datasource() or not Q.has_datasource(): # Not applicable. Return {}. return {} assert J >= 1 ds_p = P.get_datasource() ds_q = Q.get_datasource() # sample some data datp, datq, datr = sample_pqr(ds_p, ds_q, data_source, n, r, only_from_r=False) # Start the timer here with util.ContextTimer() as t: # split the data into training/test sets [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \ [D.split_tr_te(tr_proportion=tr_proportion, seed=r) for D in [datp, datq, datr]] Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]] # initialize optimization parameters. # Initialize the Gaussian widths with the median heuristic medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000) medyz = util.meddistance(np.vstack((Ytr, Ztr)), subsample=1000) gwidth0p = medxz**2 gwidth0q = medyz**2 # numbers of test locations in V, W Jp = J Jq = J # pick a subset of points in the training set for V, W Xyztr = np.vstack((Xtr, Ytr, Ztr)) VW = util.subsample_rows(Xyztr, Jp + Jq, seed=r + 1) V0 = VW[:Jp, :] W0 = VW[Jp:, :] # optimization options opt_options = { 'max_iter': 100, 'reg': 1e-4, 'tol_fun': 1e-6, 'locs_bounds_frac': 50, 'gwidth_lb': 0.1, 'gwidth_ub': 10**2, } umep_params, umeq_params = mct.SC_GaussUME.optimize_2sets_locs_widths( datptr, datqtr, datrtr, V0, W0, gwidth0p, gwidth0q, **opt_options) (V_opt, gw2p_opt, opt_infop) = umep_params (W_opt, gw2q_opt, opt_infoq) = umeq_params k_opt = kernel.KGauss(gw2p_opt) l_opt = kernel.KGauss(gw2q_opt) # construct a UME test scume_opt2 = mct.SC_UME(datpte, datqte, k_opt, l_opt, V_opt, W_opt, alpha=alpha) scume_opt2_result = scume_opt2.perform_test(datrte) return { # This key "test" can be removed. Storing V, W can take quite a lot # of space, especially when the input dimension d is high. #'test':scume, 'test_result': scume_opt2_result, 'time_secs': t.secs }
def optimize_3sample_criterion(datap, dataq, datar, gen_p, gen_q, model, Zp0, Zq0, gwidth0, reg=1e-3, max_iter=100, tol_fun=1e-6, disp=False, locs_bounds_frac=100, gwidth_lb=None, gwidth_ub=None): """ Similar to optimize_2sets_locs_widths() but constrain V=W and constrain the two kernels to be the same Gaussian kernel. Optimize one set of test locations and one Gaussian kernel width by maximizing the test power criterion of the UME *three*-sample test This optimization function is deterministic. Args: - datap: a kgof.data.Data from P (model 1) - dataq: a kgof.data.Data from Q (model 2) - datar: a kgof.data.Data from R (data generating distribution) - gen_p: pytorch model representing the generator p (model 1) - gen_q: pytorch model representing the generator q (model 2) - Zp0: Jxd_n numpy array. Initial value for the noise vectors of J locations. This is for model 1. - Zq0: Jxd_n numpy array. Initial V containing J locations. For both This is for model 22. - model: a feature extractor applied to generated images - gwidth0: initial value of the Gaussian width^2 for both UME(P, R), and UME(Q, R) - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate (of the aggregated data) multiplied by this number. - gwidth_lb: absolute lower bound on both the Gaussian width^2 - gwidth_ub: absolute upper bound on both the Gaussian width^2 If the lb, ub bounds are None, use fraction of the median heuristics to automatically set the bounds. Returns: - Z_opt: optimized noise vectors Z - gw_opt: optimized Gaussian width^2 - opt_result: info from the optimization """ J, dn = Zp0.shape Z0 = np.vstack([Zp0, Zq0]) X, Y, Z = datap.data(), dataq.data(), datar.data() n, dp = X.shape global image_size def flatten(gwidth, V): return np.hstack((gwidth, V.reshape(-1))) def unflatten(x): sqrt_gwidth = x[0] V = np.reshape(x[1:], (2 * J, -1)) return sqrt_gwidth, V # Parameterize the Gaussian width with its square root (then square later) # to automatically enforce the positivity. def obj_feat_space(sqrt_gwidth, V): k = kernel.KGauss(sqrt_gwidth**2) return -SC_UME.power_criterion( datap, dataq, datar, k, k, V, V, reg=reg) def flat_obj_feat(x): sqrt_gwidth, V = unflatten(x) return obj_feat_space(sqrt_gwidth, V) def obj_noise_space(sqrt_gwidth, z): zp = z[:J] zq = z[J:] torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1)) torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1)) # need preprocessing probably global model_input_size s = model_input_size upsample = nn.Upsample(size=(s, s), mode='bilinear') fp = model(upsample(gen_p(torch_zp))).cpu().data.numpy() fp = fp.reshape((J, -1)) fq = model(upsample(gen_q(torch_zq))).cpu().data.numpy() fq = fq.reshape((J, -1)) F = np.vstack([fp, fq]) return obj_feat_space(sqrt_gwidth, F) def flat_obj_noise(x): sqrt_gwidth, z = unflatten(x) return obj_noise_space(sqrt_gwidth, z) def grad_power_noise(x): """ Compute the gradient of the power criterion with respect to the width of Gaussian RBF kernel and the noise vector. Args: x: 1 + 2J*d_n vector Returns: the gradient of the power criterion with respect to kernel width/latent vector """ with util.ContextTimer() as t: width, z = unflatten(x) zp = z[:J] zq = z[J:] # Compute the Jacobian of the generators with respect to noise vector torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1), requires_grad=True) torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1), requires_grad=True) gp_grad = compute_jacobian( torch_zp, gen_p(torch_zp).view(J, -1)) # J x d_pix x d_noise x 1 x 1 gq_grad = compute_jacobian( torch_zq, gen_q(torch_zq).view(J, -1)) # J x d_pix x d_noise x 1 x 1 v_grad_z = np.vstack([gp_grad, gq_grad]) v_grad_z = np.squeeze(v_grad_z, [3, 4]) # 2J x d_pix x d_noise # Compute the Jacobian of the feature extractor with respect to noise vector vp_flatten = to_torch_variable( gen_p(torch_zp).view(J, -1).cpu().data.numpy(), shape=(J, 3, image_size, image_size), requires_grad=True) vq_flatten = to_torch_variable( gen_q(torch_zq).view(J, -1).cpu().data.numpy(), shape=(J, 3, image_size, image_size), requires_grad=True) size = (model_input_size, model_input_size) upsample = nn.Upsample(size=size, mode='bilinear') fp = model(upsample(vp_flatten)) fq = model(upsample(vq_flatten)) fp_grad = compute_jacobian(vp_flatten, fp.view(J, -1)) # J x d_nn x C x H x W fq_grad = compute_jacobian(vq_flatten, fq.view(J, -1)) # J x d_nn x C x H x W f_grad_v = np.vstack([fp_grad, fq_grad]) f_grad_v = f_grad_v.reshape( (2 * J, f_grad_v.shape[1], -1)) # 2J x d_nn x d_pix # Compute the gradient of the objective function with respect to # the gaussian width and test locations F = np.vstack([fp.cpu().data.numpy(), fq.cpu().data.numpy()]) F = np.reshape(F, (2 * J, -1)) grad_obj = autograd.elementwise_grad( flat_obj_feat) # 1+(2J)*d_nn input obj_grad_f = grad_obj(flatten(width, F)) obj_grad_width = obj_grad_f[0] obj_grad_f = np.reshape(obj_grad_f[1:], [(2 * J), -1]) # 2J x d_nn array obj_grad_v = inner1d(obj_grad_f, np.transpose(f_grad_v, (2, 0, 1))) # 2J x d_pix obj_grad_z = inner1d(obj_grad_v.T, np.transpose(v_grad_z, (2, 0, 1))).flatten() return np.concatenate([obj_grad_width.reshape([1]), obj_grad_z]) # Initial point x0 = flatten(np.sqrt(gwidth0), Z0) # make sure that the optimized gwidth is not too small or too large. XYZ = np.vstack((X, Y, Z)) med2 = util.meddistance(XYZ, subsample=1000)**2 fac_min = 1e-2 fac_max = 1e2 if gwidth_lb is None: gwidth_lb = max(fac_min * med2, 1e-3) if gwidth_ub is None: gwidth_ub = min(fac_max * med2, 1e5) # # Make a box to bound test locations # XYZ_std = np.std(XYZ, axis=0) # # XYZ_min: length-d array # XYZ_min = np.min(XYZ, axis=0) # XYZ_max = np.max(XYZ, axis=0) # # V_lb: 2J x dn # V_lb = np.tile(XYZ_min - locs_bounds_frac*XYZ_std, (2*J, 1)) # V_ub = np.tile(XYZ_max + locs_bounds_frac*XYZ_std, (2*J, 1)) # # (J*d+1) x 2. Take square root because we parameterize with the square # # root # x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1))) # x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1))) # #x0_bounds = list(zip(x0_lb, x0_ub)) # Assuming noise coming uniform dist over unit cube x0_bounds = [(gwidth_lb, gwidth_ub)] + [(-1, 1)] * (2 * J * dn) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize(flat_obj_noise, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-08, }, jac=grad_power_noise) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sq_gw_opt, Z_opt = unflatten(x_opt) gw_opt = sq_gw_opt**2 assert util.is_real_num(gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt) return Z_opt, gw_opt, opt_result
def optimize_3sample_criterion(datap, dataq, datar, V0, gwidth0, reg=1e-3, max_iter=100, tol_fun=1e-6, disp=False, locs_bounds_frac=100, gwidth_lb=None, gwidth_ub=None): """ Similar to optimize_2sets_locs_widths() but constrain V=W, and constrain the two Gaussian widths to be the same. Optimize one set of test locations and one Gaussian kernel width by maximizing the test power criterion of the UME *three*-sample test This optimization function is deterministic. - datap: a kgof.data.Data from P (model 1) - dataq: a kgof.data.Data from Q (model 2) - datar: a kgof.data.Data from R (data generating distribution) - V0: Jxd numpy array. Initial V containing J locations. For both UME(P, R) and UME(Q, R) - gwidth0: initial value of the Gaussian width^2 for both UME(P, R), and UME(Q, R) - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate (of the aggregated data) multiplied by this number. - gwidth_lb: absolute lower bound on both the Gaussian width^2 - gwidth_ub: absolute upper bound on both the Gaussian width^2 If the lb, ub bounds are None, use fraction of the median heuristics to automatically set the bounds. Return (optimized V, optimized Gaussian width^2, info from the optimization) """ J = V0.shape[0] X, Y, Z = datap.data(), dataq.data(), datar.data() n, d = X.shape # Parameterize the Gaussian width with its square root (then square later) # to automatically enforce the positivity. def obj(sqrt_gwidth, V): k = kernel.KGauss(sqrt_gwidth**2) return -SC_UME.power_criterion( datap, dataq, datar, k, k, V, V, reg=reg) flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1))) def unflatten(x): sqrt_gwidth = x[0] V = np.reshape(x[1:], (J, d)) return sqrt_gwidth, V def flat_obj(x): sqrt_gwidth, V = unflatten(x) return obj(sqrt_gwidth, V) # Initial point x0 = flatten(np.sqrt(gwidth0), V0) #make sure that the optimized gwidth is not too small or too large. XYZ = np.vstack((X, Y, Z)) med2 = util.meddistance(XYZ, subsample=1000)**2 fac_min = 1e-2 fac_max = 1e2 if gwidth_lb is None: gwidth_lb = max(fac_min * med2, 1e-2) if gwidth_ub is None: gwidth_ub = min(fac_max * med2, 1e5) # Make a box to bound test locations XYZ_std = np.std(XYZ, axis=0) # XYZ_min: length-d array XYZ_min = np.min(XYZ, axis=0) XYZ_max = np.max(XYZ, axis=0) # V_lb: J x d V_lb = np.tile(XYZ_min - locs_bounds_frac * XYZ_std, (J, 1)) V_ub = np.tile(XYZ_max + locs_bounds_frac * XYZ_std, (J, 1)) # (J*d+1) x 2. Take square root because we parameterize with the square # root x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1))) x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1))) x0_bounds = list(zip(x0_lb, x0_ub)) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-08, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sq_gw_opt, V_opt = unflatten(x_opt) gw_opt = sq_gw_opt**2 assert util.is_real_num( gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt) return V_opt, gw_opt, opt_result
def test_optimize_2sets_locs_widths(self): mp, varp = 2, 1 # q cannot be the true model. # That violates our assumption and the asymptotic null distribution # does not hold. mq, varq = 1, 1 # draw some data n = 800 # sample size seed = 6 with util.NumpySeedContext(seed=seed): X = np.random.randn(n, 1)*varp**0.5 + mp Y = np.random.randn(n, 1)*varq**0.5 + mq Z = np.random.randn(n, 1) datap = data.Data(X) dataq = data.Data(Y) datar = data.Data(Z) # split the data into training/test sets [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \ [D.split_tr_te(tr_proportion=0.3, seed=85) for D in [datap, dataq, datar]] Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]] # initialize optimization parameters. # Initialize the Gaussian widths with the median heuristic medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000) medyz = util.meddistance(np.vstack((Ytr, Ztr)), subsample=1000) gwidth0p = medxz**2 gwidth0q = medyz**2 # numbers of test locations in V, W J = 2 Jp = J Jq = J # pick a subset of points in the training set for V, W Xyztr = np.vstack((Xtr, Ytr, Ztr)) VW = util.subsample_rows(Xyztr, Jp+Jq, seed=73) V0 = VW[:Jp, :] W0 = VW[Jp:, :] # optimization options opt_options = { 'max_iter': 100, 'reg': 1e-4, 'tol_fun': 1e-6, 'locs_bounds_frac': 100, 'gwidth_lb': None, 'gwidth_ub': None, } umep_params, umeq_params = mct.SC_GaussUME.optimize_2sets_locs_widths( datptr, datqtr, datrtr, V0, W0, gwidth0p, gwidth0q, **opt_options) (V_opt, gw2p_opt, opt_infop) = umep_params (W_opt, gw2q_opt, opt_infoq) = umeq_params k_opt = kernel.KGauss(gw2p_opt) l_opt = kernel.KGauss(gw2q_opt) # construct a UME test alpha = 0.01 # significance level scume_opt2 = mct.SC_UME(datpte, datqte, k_opt, l_opt, V_opt, W_opt, alpha=alpha) scume_opt2.perform_test(datrte)