def test_basic_H1(self): """ Nothing special. Just test basic things. """ seed = 12 # sample n = 271 alpha = 0.01 for d in [1, 4]: # h1 is true ss = data.SSGaussMeanDiff(d=d, my=2.0) dat = ss.sample(n, seed=seed) xy = dat.stack_xy() sig2 = util.meddistance(xy, subsample=1000)**2 k = kernel.KGauss(sig2) # Test for J in [1, 6]: # random test locations V = util.fit_gaussian_draw(xy, J, seed=seed + 1) ume = tst.UMETest(V, k, n_simulate=2000, alpha=alpha) tresult = ume.perform_test(dat) # assertions self.assertGreaterEqual(tresult['pvalue'], 0.0) # H1 is true. Should reject with a small p-value self.assertLessEqual(tresult['pvalue'], 0.1)
def get_sample_source(prob_label): """Return a SampleSource corresponding to the problem label. """ # map: prob_label -> sample_source prob2ss = { 'SSBlobs': data.SSBlobs(), 'gmd_d100': data.SSGaussMeanDiff(d=100, my=1.0), 'gmd_d2': data.SSGaussMeanDiff(d=2, my=1.0), 'gvd_d50': data.SSGaussVarDiff(d=50), 'gvd_d100': data.SSGaussVarDiff(d=100), # The null is true 'sg_d50': data.SSSameGauss(d=50) } if prob_label not in prob2ss: raise ValueError('Unknown problem label. Need to be one of %s'%str(prob2ss.keys()) ) return prob2ss[prob_label]
def test_optimize_locs_width(self): """ Test the function optimize_locs_width(..). Make sure it does not return unusual results. """ # sample source n = 600 dim = 2 seed = 17 ss = data.SSGaussMeanDiff(dim, my=1.0) #ss = data.SSGaussVarDiff(dim) #ss = data.SSSameGauss(dim) # ss = data.SSBlobs() dim = ss.dim() dat = ss.sample(n, seed=seed) tr, te = dat.split_tr_te(tr_proportion=0.5, seed=10) xy_tr = tr.stack_xy() # initialize test_locs by drawing the a Gaussian fitted to the data # number of test locations J = 3 V0 = util.fit_gaussian_draw(xy_tr, J, seed=seed + 1) med = util.meddistance(xy_tr, subsample=1000) gwidth0 = med**2 assert gwidth0 > 0 # optimize V_opt, gw2_opt, opt_info = tst.GaussUMETest.optimize_locs_width( tr, V0, gwidth0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, gwidth_lb=None, gwidth_ub=None) # perform the test using the optimized parameters on the test set alpha = 0.01 ume_opt = tst.GaussUMETest(V_opt, gw2_opt, n_simulate=2000, alpha=alpha) test_result = ume_opt.perform_test(te) assert test_result['h0_rejected'] assert util.is_real_num(gw2_opt) assert gw2_opt > 0 assert np.all(np.logical_not((np.isnan(V_opt)))) assert np.all(np.logical_not((np.isinf(V_opt))))
def get_sample_source_list(prob_label): """Return a list of SampleSource's representing the problems, each corresponding to one dimension in the list. """ # map: prob_label -> [sample_source] # dimensions to try dimensions = [5] + [100*i for i in range(1, 5+1)] high_dims = [5] + [300*i for i in range(1, 5+1)] low_dims = [2*d for d in range(1, 7+1)] prob2ss = { 'gmd': [data.SSGaussMeanDiff(d=d, my=1.0) for d in high_dims], 'gvd': [data.SSGaussVarDiff(d=d) for d in dimensions], # The null is true 'sg': [data.SSSameGauss(d=d) for d in high_dims], 'sg_low': [data.SSSameGauss(d=d) for d in low_dims] } if prob_label not in prob2ss: raise ValueError('Unknown problem label. Need to be one of %s'%str(prob2ss.keys()) ) return prob2ss[prob_label]
def get_sample_source(prob_label): """Return a SampleSource representing the problem, and sample_sizes to try in a 2-tuple""" # map: prob_label -> (sample_source, sample_sizes) sample_sizes = [i * 2000 for i in range(1, 5 + 1)] #sample_sizes = [i*1000 for i in range(1, 3+1)] prob2ss = { 'SSBlobs': (data.SSBlobs(), sample_sizes), 'gmd_d100': (data.SSGaussMeanDiff(d=100, my=1.0), sample_sizes), 'gvd_d50': (data.SSGaussVarDiff(d=50), sample_sizes), # The null is true 'sg_d50': (data.SSSameGauss(d=50), sample_sizes), 'sg_d5': (data.SSSameGauss(d=5), sample_sizes) } if prob_label not in prob2ss: raise ValueError('Unknown problem label. Need to be one of %s' % str(list(prob2ss.keys()))) return prob2ss[prob_label]
def test_perform_test(self): # Full sample size n = 200 # mean shift my = 0.1 dim = 3 ss = data.SSGaussMeanDiff(dim, my=my) # Consider two dimensions here for s in [2, 8, 9]: with util.NumpySeedContext(seed=s): tst_data = ss.sample(n, seed=s) locs = np.random.randn(2, dim) k = kernel.KGauss(1) me1 = tst.METest(locs[[0], :], k, alpha=0.01) result1 = me1.perform_test(tst_data) self.assertGreaterEqual(result1['pvalue'], 0) self.assertGreaterEqual(result1['test_stat'], 0) me2 = tst.METest(locs, k, alpha=0.01) result2 = me2.perform_test(tst_data) self.assertGreaterEqual(result2['pvalue'], 0) self.assertGreaterEqual(result2['test_stat'], 0)