def test_optimized_fssd(self): """ Test FSSD test with parameter optimization. """ seed = 4 # sample size n = 179 alpha = 0.01 for d in [1, 3]: mean = np.zeros(d) variance = 1.0 p = density.IsotropicNormal(mean, variance) # Mean difference. obvious reject ds = data.DSIsotropicNormal(mean + 4, variance + 0) dat = ds.sample(n, seed=seed) # test for J in [1, 4]: opts = { 'reg': 1e-2, 'max_iter': 10, 'tol_fun': 1e-3, 'disp': False } tr, te = dat.split_tr_te(tr_proportion=0.3, seed=seed + 1) Xtr = tr.X gwidth0 = util.meddistance(Xtr, subsample=1000)**2 # random test locations V0 = util.fit_gaussian_draw(Xtr, J, seed=seed + 1) V_opt, gw_opt, opt_result = \ gof.GaussFSSD.optimize_locs_widths(p, tr, gwidth0, V0, **opts) # construct a test k_opt = kernel.KGauss(gw_opt) null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10) fssd_opt = gof.FSSD(p, k_opt, V_opt, null_sim=null_sim, alpha=alpha) fssd_opt_result = fssd_opt.perform_test( te, return_simulated_stats=True) assert fssd_opt_result['h0_rejected']
def test_auto_init_opt_fssd(self): """ Test FSSD-opt test with automatic parameter initialization. """ seed = 5 # sample size n = 191 alpha = 0.01 for d in [1, 4]: mean = np.zeros(d) variance = 1.0 p = density.IsotropicNormal(mean, variance) # Mean difference. obvious reject ds = data.DSIsotropicNormal(mean + 4, variance + 0) dat = ds.sample(n, seed=seed) # test for J in [1, 3]: opts = { 'reg': 1e-2, 'max_iter': 10, 'tol_fun': 1e-3, 'disp': False } tr, te = dat.split_tr_te(tr_proportion=0.3, seed=seed + 1) V_opt, gw_opt, opt_result = \ gof.GaussFSSD.optimize_auto_init(p, tr, J, **opts) # construct a test k_opt = kernel.KGauss(gw_opt) null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10) fssd_opt = gof.FSSD(p, k_opt, V_opt, null_sim=null_sim, alpha=alpha) fssd_opt_result = fssd_opt.perform_test( te, return_simulated_stats=True) assert fssd_opt_result['h0_rejected']
def get_datasource(self): return data.DSIsotropicNormal(self.mean, self.variance)
def get_pqsource_list(prob_label): """ Return [(prob_param, p, ds) for ... ], a list of tuples where - prob_param: a problem parameters. Each parameter has to be a scalar (so that we can plot them later). Parameters are preferably positive integers. - p: a Density representing the distribution p - ds: a DataSource, each corresponding to one parameter setting. The DataSource generates sample from q. """ sg_ds = [1, 5, 10, 15] gmd_ds = [5, 20, 40, 60] # vary the mean gmd_d10_ms = [0, 0.02, 0.04, 0.06] gvinc_d1_vs = [1, 1.5, 2, 2.5] gvinc_d5_vs = [1, 1.5, 2, 2.5] gvsub1_d1_vs = [0.1, 0.3, 0.5, 0.7] gvd_ds = [1, 5, 10, 15] #gb_rbm_dx50_dh10_stds = [0, 0.01, 0.02, 0.03] gb_rbm_dx50_dh10_stds = [0, 0.02, 0.04, 0.06] #gb_rbm_dx50_dh10_stds = [0] gb_rbm_dx50_dh40_stds = [0, 0.01, 0.02, 0.04, 0.06] glaplace_ds = [1, 5, 10, 15] prob2tuples = { # H0 is true. vary d. P = Q = N(0, I) 'sg': [(d, density.IsotropicNormal(np.zeros(d), 1), data.DSIsotropicNormal(np.zeros(d), 1)) for d in sg_ds], # vary d. P = N(0, I), Q = N( (c,..0), I) 'gmd': [(d, density.IsotropicNormal(np.zeros(d), 1), data.DSIsotropicNormal(np.hstack((1, np.zeros(d - 1))), 1)) for d in gmd_ds], # P = N(0, I), Q = N( (m, ..0), I). Vary m 'gmd_d10_ms': [(m, density.IsotropicNormal(np.zeros(10), 1), data.DSIsotropicNormal(np.hstack((m, np.zeros(9))), 1)) for m in gmd_d10_ms], # d=1. Increase the variance. P = N(0, I). Q = N(0, v*I) 'gvinc_d1': [(var, density.IsotropicNormal(np.zeros(1), 1), data.DSIsotropicNormal(np.zeros(1), var)) for var in gvinc_d1_vs], # d=5. Increase the variance. P = N(0, I). Q = N(0, v*I) 'gvinc_d5': [(var, density.IsotropicNormal(np.zeros(5), 1), data.DSIsotropicNormal(np.zeros(5), var)) for var in gvinc_d5_vs], # d=1. P=N(0,1), Q(0,v). Consider the variance below 1. 'gvsub1_d1': [(var, density.IsotropicNormal(np.zeros(1), 1), data.DSIsotropicNormal(np.zeros(1), var)) for var in gvsub1_d1_vs], # Gaussian variance difference problem. Only the variance # of the first dimenion differs. d varies. 'gvd': [(d, density.Normal(np.zeros(d), np.eye(d)), data.DSNormal(np.zeros(d), np.diag(np.hstack((2, np.ones(d - 1)))))) for d in gvd_ds], # Gaussian Bernoulli RBM. dx=50, dh=10 'gbrbm_dx50_dh10': gaussbern_rbm_probs(gb_rbm_dx50_dh10_stds, dx=50, dh=10, n=sample_size), # Gaussian Bernoulli RBM. dx=50, dh=40 'gbrbm_dx50_dh40': gaussbern_rbm_probs(gb_rbm_dx50_dh40_stds, dx=50, dh=40, n=sample_size), # p: N(0, I), q: standard Laplace. Vary d 'glaplace': [ ( d, density.IsotropicNormal(np.zeros(d), 1), # Scaling of 1/sqrt(2) will make the variance 1. data.DSLaplace(d=d, loc=0, scale=1.0 / np.sqrt(2))) for d in glaplace_ds ], } if prob_label not in prob2tuples: raise ValueError('Unknown problem label. Need to be one of %s' % str(prob2tuples.keys())) return prob2tuples[prob_label]
def get_ns_pqsource(prob_label): """ Return (ns, p, ds), a tuple of where - ns: a list of sample sizes - p: a Density representing the distribution p - ds: a DataSource, each corresponding to one parameter setting. The DataSource generates sample from q. """ gmd_p01_d10_ns = [1000, 3000, 5000] #gb_rbm_dx50_dh10_vars = [0, 1e-3, 2e-3, 3e-3] prob2tuples = { # vary d. P = N(0, I), Q = N( (c,..0), I) 'gmd_p03_d10_ns': (gmd_p01_d10_ns, density.IsotropicNormal(np.zeros(10), 1), data.DSIsotropicNormal(np.hstack((0.03, np.zeros(10-1))), 1) ), # Gaussian Bernoulli RBM. dx=50, dh=10 # Perturbation variance to B[0, 0] is 0.1 'gbrbm_dx50_dh10_vp1': ([i*1000 for i in range(1, 4+1)], ) + #([1000, 5000], ) + gbrbm_perturb(var_perturb_B=0.1, dx=50, dh=10), # Gaussian Bernoulli RBM. dx=50, dh=40 # Perturbation variance to B[0, 0] is 0.1 'gbrbm_dx50_dh40_vp1': ([i*1000 for i in range(1, 4+1)], ) + #([1000, 5000], ) + gbrbm_perturb(var_perturb_B=0.1, dx=50, dh=40), # Gaussian Bernoulli RBM. dx=50, dh=10 # No perturbation 'gbrbm_dx50_dh10_h0': ([i*1000 for i in range(1, 4+1)], ) + #([1000, 5000], ) + gbrbm_perturb(var_perturb_B=0, dx=50, dh=10), # Gaussian Bernoulli RBM. dx=50, dh=40 # No perturbation 'gbrbm_dx50_dh40_h0': ([i*1000 for i in range(1, 4+1)], ) + #([1000, 5000], ) + gbrbm_perturb(var_perturb_B=0, dx=50, dh=40), # Gaussian Bernoulli RBM. dx=20, dh=10 # Perturbation variance to B[0, 0] is 0.1 'gbrbm_dx20_dh10_vp1': ([i*1000 for i in range(2, 5+1)], ) + gbrbm_perturb(var_perturb_B=0.1, dx=20, dh=10), # Gaussian Bernoulli RBM. dx=20, dh=10 # No perturbation 'gbrbm_dx20_dh10_h0': ([i*1000 for i in range(2, 5+1)], ) + gbrbm_perturb(var_perturb_B=0, dx=20, dh=10), } if prob_label not in prob2tuples: raise ValueError('Unknown problem label. Need to be one of %s'%str(prob2tuples.keys()) ) return prob2tuples[prob_label]
def get_pqsource(prob_label): """ Return (p, ds), a tuple of - p: a Density representing the distribution p - ds: a DataSource, each corresponding to one parameter setting. The DataSource generates sample from q. """ prob2tuples = { # H0 is true. vary d. P = Q = N(0, I) 'sg5': (density.IsotropicNormal(np.zeros(5), 1), data.DSIsotropicNormal(np.zeros(5), 1)), # P = N(0, I), Q = N( (0.2,..0), I) 'gmd5': (density.IsotropicNormal(np.zeros(5), 1), data.DSIsotropicNormal(np.hstack((0.2, np.zeros(4))), 1)), 'gmd1': (density.IsotropicNormal(np.zeros(1), 1), data.DSIsotropicNormal(np.ones(1) * 0.2, 1)), # P = N(0, I), Q = N( (1,..0), I) 'gmd100': (density.IsotropicNormal(np.zeros(100), 1), data.DSIsotropicNormal(np.hstack((1, np.zeros(99))), 1)), # Gaussian variance difference problem. Only the variance # of the first dimenion differs. d varies. 'gvd5': (density.Normal(np.zeros(5), np.eye(5)), data.DSNormal(np.zeros(5), np.diag(np.hstack( (2, np.ones(4)))))), 'gvd10': (density.Normal(np.zeros(10), np.eye(10)), data.DSNormal(np.zeros(10), np.diag(np.hstack((2, np.ones(9)))))), # Gaussian Bernoulli RBM. dx=50, dh=10. H0 is true 'gbrbm_dx50_dh10_v0': gaussbern_rbm_tuple(0, dx=50, dh=10, n=sample_size), # Gaussian Bernoulli RBM. dx=5, dh=3. H0 is true 'gbrbm_dx5_dh3_v0': gaussbern_rbm_tuple(0, dx=5, dh=3, n=sample_size), # Gaussian Bernoulli RBM. dx=50, dh=10. 'gbrbm_dx50_dh10_v1em3': gaussbern_rbm_tuple(1e-3, dx=50, dh=10, n=sample_size), # Gaussian Bernoulli RBM. dx=5, dh=3. Perturb with noise = 1e-2. 'gbrbm_dx5_dh3_v5em3': gaussbern_rbm_tuple(5e-3, dx=5, dh=3, n=sample_size), # Gaussian mixture of two components. Uniform mixture weights. # p = 0.5*N(0, 1) + 0.5*N(3, 0.01) # q = 0.5*N(-3, 0.01) + 0.5*N(0, 1) 'gmm_d1': (density.IsoGaussianMixture(np.array([[0], [3.0]]), np.array([1, 0.01])), data.DSIsoGaussianMixture(np.array([[-3.0], [0]]), np.array([0.01, 1]))), # p = N(0, 1) # q = 0.1*N([-10, 0,..0], 0.001) + 0.9*N([0,0,..0], 1) 'g_vs_gmm_d5': (density.IsotropicNormal(np.zeros(5), 1), data.DSIsoGaussianMixture(np.vstack((np.hstack( (0.0, np.zeros(4))), np.zeros(5))), np.array([0.0001, 1]), pmix=[0.1, 0.9])), 'g_vs_gmm_d2': (density.IsotropicNormal(np.zeros(2), 1), data.DSIsoGaussianMixture(np.vstack((np.hstack( (0.0, np.zeros(1))), np.zeros(2))), np.array([0.01, 1]), pmix=[0.1, 0.9])), 'g_vs_gmm_d1': (density.IsotropicNormal(np.zeros(1), 1), data.DSIsoGaussianMixture(np.array([[0.0], [0]]), np.array([0.01, 1]), pmix=[0.1, 0.9])), } if prob_label not in prob2tuples: raise ValueError('Unknown problem label. Need to be one of %s' % str(prob2tuples.keys())) return prob2tuples[prob_label]