def interval(mu, ndraw=100000, keep_every=100): #dbn = form_dbn(mu, samples) if not os.path.exists('umau_lengths%0.2f.npz' % mu): lengths = [] else: lengths = list(np.load('umau_lengths%0.2f.npz' % mu)['lengths']) if mu < 10: big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000] mean, scale = big_sample.mean(), big_sample.std() big_sample -= mean big_sample /= scale dbn = discrete_family(big_sample, np.ones_like(big_sample)) dbn.theta = 0. new_sample = draw_sample(mu, cutoff, nsample=2500)[:2500] for i, s in enumerate(new_sample): try: _interval = dbn.interval((s - mean) / scale) lengths.append(np.fabs(_interval[1] - _interval[0]) / scale) except: print 'exception raised' if i % 20 == 0 and i > 0: print np.median(lengths), np.mean(lengths) np.savez('umau_lengths%0.2f' % mu, **{'lengths':lengths,'mu':mu}) if i % 1000 == 0 and i > 0: big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000] mean, scale = big_sample.mean(), big_sample.std() big_sample -= mean big_sample /= scale print i else: for i in range(2500): big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000] s = big_sample[-1] big_sample = big_sample[:-1] mean, scale = big_sample.mean(), big_sample.std() big_sample -= mean big_sample /= scale s = (s - mean) / scale dbn = discrete_family(big_sample, np.ones_like(big_sample)) try: _interval = dbn.interval(s) lengths.append(np.fabs(_interval[1] - _interval[0]) / scale) except: print 'exception raised' print i if i % 10 == 0 and i > 0: print np.median(lengths), np.mean(lengths) np.savez('umau_lengths%0.2f' % mu, **{'lengths':lengths,'mu':mu}) print 'final', np.mean(lengths) return (np.mean(lengths), np.std(lengths), np.median(lengths))
def power_onesided(n, snr, pos, rho=0.25, ndraw=10000, muval = np.linspace(0,5,51), burnin=1000): S0 = simulation(n, 0, pos, rho=rho, ndraw=ndraw, burnin=burnin) W0 = np.ones(S0.shape) dfam0 = discrete_family(S0, W0) cutoff = dfam0.one_sided_acceptance(0, alternative='greater')[1] def UMPU_power_onesided(mu): return dfam0.ccdf(mu, cutoff) def sample_split_onesided(mu, alpha=0.05): cutoff = ndist.ppf(1 - alpha) if np.any(mu < 0): raise ValueError('mu is negative: in null hypothesis') power = 1 - ndist.cdf(cutoff - mu / np.sqrt(2)) return np.squeeze(power) power_fig = plt.figure(figsize=(8,8)) P_split = np.array(sample_split_onesided(muval)) plt.plot(muval, P_split, label='Sample splitting', c='red', linewidth=5, alpha=0.5) power_ax = power_fig.gca() power_ax.set_ylabel('Power', fontsize=20) power_ax.legend(loc='lower right') power_ax.set_xlabel('Effect size $\mu$', fontsize=20) P_UMPU = np.array([UMPU_power_onesided(m) for m in muval]) power_ax.plot(muval, P_UMPU, label=r'Selected using $i^*(Z_S)$', linewidth=5, alpha=0.5) P_full = power_full(n, snr, pos, rho=rho, muval=muval)[1]['full'] power_ax.plot(muval, P_full, label=r'Selected using $i^*(Z)$', color='blue', linewidth=5, alpha=0.5) print UMPU_power_onesided(snr) power_ax.legend(loc='lower right') power_ax.set_xlim([0,5]) power_ax.plot([snr,snr], [0,1], 'k--') return power_fig, {'umpu':P_UMPU, 'split':P_split}
def power(n, snr, pos, rho=0.25, muval = np.linspace(0,5,51)): X, mu, beta = parameters(n, rho, pos) # form the correct constraints con, initial = constraints(X, pos) Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000) S0 = np.dot(X.T, Z_selection.T).T W0 = np.ones(S0.shape[0]) dfam0 = discrete_family(S0[:,pos], W0) one_sided_acceptance_region = dfam0.one_sided_acceptance(0) def one_sided_power(mu): L, U = one_sided_acceptance_region return 1 - (dfam0.cdf(mu,U) - dfam0.cdf(mu, L)) power_fig = plt.figure(figsize=(8,8)) power_ax = power_fig.gca() power_ax.set_ylabel('Power', fontsize=20) power_ax.legend(loc='lower right') power_ax.set_xlabel('Effect size $\mu$', fontsize=20) full_power = np.array([one_sided_power(m) for m in muval]) print full_power power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5) power_ax.legend(loc='lower right') power_ax.set_xlim([0,5]) power_ax.plot([snr,snr],[0,1], 'k--') print one_sided_power(snr) return power_fig, {'full':full_power}
def interval(mu, ndraw=100000, keep_every=100): if not os.path.exists('lengths%0.2f.npz' % mu): lengths = [] else: lengths = list(np.load('lengths%0.2f.npz' % mu)['lengths']) big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000] mean, scale = big_sample.mean(), big_sample.std() big_sample -= mean big_sample /= scale dbn = discrete_family(big_sample, np.ones_like(big_sample)) dbn.theta = 0. new_sample = draw_sample(mu, cutoff, nsample=2500)[:2500] for i, s in enumerate(new_sample): try: _interval = dbn.equal_tailed_interval((s - mean) / scale) lengths.append(np.fabs(_interval[1] - _interval[0]) / scale) except: print 'exception raised' if i % 20 == 0 and i > 0: print np.median(lengths), np.mean(lengths) np.savez('lengths%0.2f' % mu, **{'lengths': lengths, 'mu': mu}) if i % 1000 == 0 and i > 0: big_sample = draw_sample(mu, cutoff, nsample=50000)[:50000] mean, scale = big_sample.mean(), big_sample.std() big_sample -= mean big_sample /= scale dbn.theta = 0. print i return (np.mean(lengths), np.std(lengths), np.median(lengths))
def form_dbn(mu, samples): pts = np.arange(-6,13) keep = np.fabs(pts - mu) <= 2 pts = pts[keep] _samples = np.hstack([samples['mu%d' % l] for l in pts]) _log_weights = np.hstack([(mu-l)*samples['mu%d' % l] for l in pts]) _weights = np.exp(_log_weights) dbn = discrete_family(_samples, _weights) dbn.theta = 0. return dbn
def power_onesided(n, snr, pos, rho=0.25, ndraw=10000, muval=np.linspace(0, 5, 51), burnin=1000): S0 = simulation(n, 0, pos, rho=rho, ndraw=ndraw, burnin=burnin) W0 = np.ones(S0.shape) dfam0 = discrete_family(S0, W0) cutoff = dfam0.one_sided_acceptance(0, alternative='greater')[1] def UMPU_power_onesided(mu): return dfam0.ccdf(mu, cutoff) def sample_split_onesided(mu, alpha=0.05): cutoff = ndist.ppf(1 - alpha) if np.any(mu < 0): raise ValueError('mu is negative: in null hypothesis') power = 1 - ndist.cdf(cutoff - mu / np.sqrt(2)) return np.squeeze(power) power_fig = plt.figure(figsize=(8, 8)) P_split = np.array(sample_split_onesided(muval)) plt.plot(muval, P_split, label='Sample splitting', c='red', linewidth=5, alpha=0.5) power_ax = power_fig.gca() power_ax.set_ylabel('Power', fontsize=20) power_ax.legend(loc='lower right') power_ax.set_xlabel('Effect size $\mu$', fontsize=20) P_UMPU = np.array([UMPU_power_onesided(m) for m in muval]) power_ax.plot(muval, P_UMPU, label=r'Selected using $i^*(Z_S)$', linewidth=5, alpha=0.5) P_full = power_full(n, snr, pos, rho=rho, muval=muval)[1]['full'] power_ax.plot(muval, P_full, label=r'Selected using $i^*(Z)$', color='blue', linewidth=5, alpha=0.5) print UMPU_power_onesided(snr) power_ax.legend(loc='lower right') power_ax.set_xlim([0, 5]) power_ax.plot([snr, snr], [0, 1], 'k--') return power_fig, {'umpu': P_UMPU, 'split': P_split}
def power(n, snr, pos, rho=0.25, muval=np.linspace(0, 5, 51)): X, mu, beta = parameters(n, rho, pos) # form the correct constraints con, initial = constraints(X, pos) Z_selection = sample_from_constraints(con, initial, ndraw=4000000, burnin=100000) S0 = np.dot(X.T, Z_selection.T).T W0 = np.ones(S0.shape[0]) dfam0 = discrete_family(S0[:, pos], W0) one_sided_acceptance_region = dfam0.one_sided_acceptance(0) def one_sided_power(mu): L, U = one_sided_acceptance_region return 1 - (dfam0.cdf(mu, U) - dfam0.cdf(mu, L)) power_fig = plt.figure(figsize=(8, 8)) power_ax = power_fig.gca() power_ax.set_ylabel('Power', fontsize=20) power_ax.legend(loc='lower right') power_ax.set_xlabel('Effect size $\mu$', fontsize=20) full_power = np.array([one_sided_power(m) for m in muval]) print full_power power_ax.plot(muval, full_power, label='Reduced model UMPU', linewidth=7, alpha=0.5) power_ax.legend(loc='lower right') power_ax.set_xlim([0, 5]) power_ax.plot([snr, snr], [0, 1], 'k--') print one_sided_power(snr) return power_fig, {'full': full_power}
def test_discreteExFam(): X = np.arange(100) pois = discrete_family(X, poisson.pmf(X, 1)) tol = 1e-5 print ( pois._leftCutFromRight(theta=0.4618311, rightCut=(5, 0.5)), pois._test2RejectsLeft(theta=2.39, observed=5, auxVar=0.5), ) print pois.interval(observed=5, alpha=0.05, randomize=True, auxVar=0.5) print abs(1 - sum(pois.pdf(0))) pois.ccdf(0, 3, 0.4) print pois.Var(np.log(2), lambda x: x) print pois.Cov(np.log(2), lambda x: x, lambda x: x) lc = pois._rightCutFromLeft(0, (0, 0.01)) print (0, 0.01), pois._leftCutFromRight(0, lc) pois._rightCutFromLeft(-10, (0, 0.01)) # [pois.test2Cutoffs(t)[1] for t in range(-10,3)] pois._critCovFromLeft(-10, (0, 0.01)) pois._critCovFromLeft(0, (0, 0.01)) pois._critCovFromRight(0, lc) pois._critCovFromLeft(5, (5, 1)) pois._test2RejectsLeft(np.log(5), 5) pois._test2RejectsRight(np.log(5), 5) pois._test2RejectsLeft(np.log(20), 5) pois._test2RejectsRight(np.log(0.1), 5) print pois._inter2Upper(5, auxVar=0.5) print pois.interval(5, auxVar=0.5)
def test_discreteExFam(): X = np.arange(100) pois = discrete_family(X, poisson.pmf(X, 1)) tol = 1e-5 print(pois._leftCutFromRight(theta=0.4618311, rightCut=(5, .5)), pois._test2RejectsLeft(theta=2.39, observed=5, auxVar=.5)) print pois.interval(observed=5, alpha=.05, randomize=True, auxVar=.5) print abs(1 - sum(pois.pdf(0))) pois.ccdf(0, 3, .4) print pois.Var(np.log(2), lambda x: x) print pois.Cov(np.log(2), lambda x: x, lambda x: x) lc = pois._rightCutFromLeft(0, (0, .01)) print(0, 0.01), pois._leftCutFromRight(0, lc) pois._rightCutFromLeft(-10, (0, .01)) #[pois.test2Cutoffs(t)[1] for t in range(-10,3)] pois._critCovFromLeft(-10, (0, .01)) pois._critCovFromLeft(0, (0, .01)) pois._critCovFromRight(0, lc) pois._critCovFromLeft(5, (5, 1)) pois._test2RejectsLeft(np.log(5), 5) pois._test2RejectsRight(np.log(5), 5) pois._test2RejectsLeft(np.log(20), 5) pois._test2RejectsRight(np.log(.1), 5) print pois._inter2Upper(5, auxVar=.5) print pois.interval(5, auxVar=.5)
import os import numpy as np import matplotlib.pyplot as plt from selection import affine from selection.discrete_family import discrete_family from scipy.stats import norm as ndist cutoff = ndist.ppf(0.95) null_constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff])) null_sample = affine.sample_from_constraints(null_constraint, np.array([4, 2.]), ndraw=100000).sum(1) null_dbn = discrete_family(null_sample, np.ones_like(null_sample)) def power(mu, ndraw=100000, keep_every=100): constraint = affine.constraints(np.array([[-1, 0.]]), np.array([-cutoff])) constraint.mean = np.array([mu, mu]) sample = affine.sample_from_constraints(constraint, np.array([4, 2.]), ndraw=ndraw)[::keep_every] print sample.mean(0) sample = sample.sum(1) decisions = [] for s in sample: decisions.append(null_dbn.one_sided_test(0, s, alternative='greater')) print np.mean(decisions) return np.mean(decisions)
import os from glob import glob import numpy as np import matplotlib.pyplot as plt from selection import affine from selection.discrete_family import discrete_family from scipy.stats import norm as ndist from sklearn.isotonic import IsotonicRegression cutoff = 3. null_constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff])) null_sample = affine.sample_from_constraints(null_constraint, np.array([4,2.]), ndraw=100000).sum(1) null_dbn = discrete_family(null_sample, np.ones_like(null_sample)) def draw_sample(mu, cutoff, nsample=10000): if mu >= cutoff - 4: sample = [] while True: candidate = np.random.standard_normal(1000000) + mu candidate = candidate[candidate > cutoff] sample.extend(candidate) if len(sample) > nsample: break sample = np.array(sample) sample += np.random.standard_normal(sample.shape) + mu else: constraint = affine.constraints(np.array([[-1,0.]]), np.array([-cutoff])) constraint.mean = np.array([mu,mu]) sample = affine.sample_from_constraints(constraint, np.array([cutoff + 0.1,0]), ndraw=2000000,
def marginal(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000, nsim=5000, sigma=1.): X, mu, beta = parameters(n, rho, pos) Psplit = [] Pselect = [] hypotheses = [] for _ in range(nsim): Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True) cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign correct = (sign == +1) and (pos == select_pos) hypotheses.append(correct) Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin) Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2) Z_inference = sigma * (cond_ncp + np.random.standard_normal()) Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2) dfam = discrete_family(Z_null, np.ones(Z_null.shape)) Pselect.append(dfam.ccdf(0, Z_observed)) if sign == +1: Psplit.append(ndist.sf(Z_inference / sigma)) else: Psplit.append(ndist.cdf(Z_inference / sigma)) Ugrid = np.linspace(0, 1, 101) Psplit = np.array(Psplit) Pselect = np.array(Pselect) hypotheses = np.array(hypotheses, np.bool) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8, 8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05, 0.05], [0, 1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8, 8)) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05, 0.05], [0, 1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['split'] = Psplit dbn1['select'] = Pselect dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def marginal(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000, nsim=5000, sigma=1.): X, mu, beta = parameters(n, rho, pos) Psplit = [] Pselect = [] hypotheses = [] for _ in range(nsim): Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True) cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign correct = (sign == +1) and (pos == select_pos) hypotheses.append(correct) Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin) Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2) Z_inference = sigma * (cond_ncp + np.random.standard_normal()) Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2) dfam = discrete_family(Z_null, np.ones(Z_null.shape)) Pselect.append(dfam.ccdf(0, Z_observed)) if sign == +1: Psplit.append(ndist.sf(Z_inference / sigma)) else: Psplit.append(ndist.cdf(Z_inference / sigma)) Ugrid = np.linspace(0,1,101) Psplit = np.array(Psplit) Pselect = np.array(Pselect) hypotheses = np.array(hypotheses, np.bool) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8,8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05,0.05],[0,1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8,8)) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05,0.05],[0,1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['split'] = Psplit dbn1['select'] = Pselect dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5): # Design, mean vector and parameter vector X, mu, beta = parameters(n, rho, pos) Pcov = [] Pexact = [] Pu = [] Pr = [] Pfixed = [] Pmax = [] hypotheses = [] # Set seed np.random.seed(0) # Max test max_stat = np.fabs(np.dot(X.T, np.random.standard_normal( (n, 10000)))).max(0) * sigma max_fam = discrete_family(max_stat, np.ones(max_stat.shape)) max_fam.theta = 0 for i in range(nsim): Y = (snr * mu + np.random.standard_normal(n)) * sigma Z = np.dot(X.T, Y) # did this find the correct position and sign? correct = np.all(np.less_equal(np.fabs(Z), Z[pos])) hypotheses.append(correct) Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1]) Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1]) Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma)) Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1]) Pr.append( reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1]) p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max()) Pmax.append(p) Ugrid = np.linspace(0, 1, 101) Pcov = np.array(Pcov) Pexact = np.array(Pexact) Pu = np.array(Pu) Pr = np.array(Pr) Pfixed = np.array(Pfixed) Pmax = np.array(Pmax) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8, 8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Pcov)(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pexact)(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pmax)(Ugrid), label='Max test', c='cyan', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pu)(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pr)(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pfixed)(Ugrid), label=r'Fixed 1-sparse, $\sigma$ known', c='yellow', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05, 0.05], [0, 1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8, 8)) hypotheses = np.array(hypotheses, np.bool) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Pcov[hypotheses])(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pexact[hypotheses])(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pu[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pr[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05, 0.05], [0, 1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['exact'] = Pexact dbn1['covtest'] = Pcov dbn1['unknown'] = Pu dbn1['known'] = Pr dbn1['fixed'] = Pfixed dbn1['max'] = Pmax dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5): # Design, mean vector and parameter vector X, mu, beta = parameters(n, rho, pos) Pcov = [] Pexact = [] Pu = [] Pr = [] Pfixed = [] Pmax = [] hypotheses = [] # Set seed np.random.seed(0) # Max test max_stat = np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0) * sigma max_fam = discrete_family(max_stat, np.ones(max_stat.shape)) max_fam.theta = 0 for i in range(nsim): Y = (snr * mu + np.random.standard_normal(n)) * sigma Z = np.dot(X.T, Y) # did this find the correct position and sign? correct = np.all(np.less_equal(np.fabs(Z), Z[pos])) hypotheses.append(correct) Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1]) Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1]) Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma)) Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1]) Pr.append(reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1]) p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max()) Pmax.append(p) Ugrid = np.linspace(0,1,101) Pcov = np.array(Pcov) Pexact = np.array(Pexact) Pu = np.array(Pu) Pr = np.array(Pr) Pfixed = np.array(Pfixed) Pmax = np.array(Pmax) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8,8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Pcov)(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pexact)(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pmax)(Ugrid), label='Max test', c='cyan', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pu)(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pr)(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pfixed)(Ugrid), label=r'Fixed 1-sparse, $\sigma$ known', c='yellow', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05,0.05],[0,1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8,8)) hypotheses = np.array(hypotheses, np.bool) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Pcov[hypotheses])(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pexact[hypotheses])(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pu[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pr[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05,0.05],[0,1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['exact'] = Pexact dbn1['covtest'] = Pcov dbn1['unknown'] = Pu dbn1['known'] = Pr dbn1['fixed'] = Pfixed dbn1['max'] = Pmax dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1