def constraints(X, pos): n, p = X.shape while True: Y = np.random.standard_normal(n) con, _, idx, sign = covtest(X, Y, sigma=1) if idx == pos and sign == +1: initial = Y.copy() break return con, initial
def test_covtest(): n, p = 30, 50 X = np.random.standard_normal((n,p)) + np.random.standard_normal(n)[:,None] X /= X.std(0)[None,:] Y = np.random.standard_normal(n) * 1.5 for exact, covariance in itertools.product([True, False], [None, np.identity(n)]): con, pval, idx, sign = covtest(X, Y, sigma=1.5, exact=exact, covariance=covariance) for covariance in [None, np.identity(n)]: con, pval, idx, sign = reduced_covtest(X, Y, sigma=1.5, covariance=covariance) return pval
def test_covtest(): n, p = 30, 50 X = np.random.standard_normal( (n, p)) + np.random.standard_normal(n)[:, None] X /= X.std(0)[None, :] Y = np.random.standard_normal(n) * 1.5 for exact, covariance in itertools.product([True, False], [None, np.identity(n)]): con, pval, idx, sign = covtest(X, Y, sigma=1.5, exact=exact, covariance=covariance) for covariance in [None, np.identity(n)]: con, pval, idx, sign = reduced_covtest(X, Y, sigma=1.5, covariance=covariance) return pval
def sample_split(X, Y, sigma=None, nstep=10, burnin=1000, ndraw=5000, reduced=True): n, p = X.shape half_n = int(n / 2) X1, Y1 = X[:half_n, :] * 1., Y[:half_n] * 1. X1 -= X1.mean(0)[None, :] Y1 -= Y1.mean() X2, Y2 = X[half_n:], Y[half_n:] X2 -= X2.mean(0)[None, :] Y2 -= Y2.mean() FS_half = forward_stepwise(X1, Y1) # sample splitting model FS_full = forward_stepwise(X.copy(), Y.copy()) # full data model spacings_P = [] split_P = [] reduced_Pknown = [] reduced_Punknown = [] covtest_P = [] for i in range(nstep): FS_half.next() if FS_half.P[i] is not None: RX = FS_half.X - FS_half.P[i](FS_half.X) RY = FS_half.Y - FS_half.P[i](FS_half.Y) covariance = centering(FS_half.Y.shape[0]) - np.dot( FS_half.P[i].U, FS_half.P[i].U.T) else: RX = FS_half.X RY = FS_half.Y covariance = centering(FS_half.Y.shape[0]) RX -= RX.mean(0)[None, :] RX /= (RX.std(0)[None, :] * np.sqrt(RX.shape[0])) # covtest on half -- not saved con, pval, idx, sign = covtest(RX, RY, sigma=sigma, covariance=covariance, exact=True) # spacings on half -- not saved eta1 = RX[:, idx] * sign Acon = constraints(FS_half.A, np.zeros(FS_half.A.shape[0]), covariance=centering(FS_half.Y.shape[0])) Acon.covariance *= sigma**2 Acon.pivot(eta1, FS_half.Y) # sample split eta2 = np.linalg.pinv(X2[:, FS_half.variables])[-1] eta_sigma = np.linalg.norm(eta2) * sigma split_P.append(2 * ndist.sf(np.fabs((eta2 * Y2).sum() / eta_sigma))) # inference on full mu using split model, this \beta^+_s. zero_block = np.zeros((Acon.linear_part.shape[0], (n - half_n))) linear_part = np.hstack([Acon.linear_part, zero_block]) Fcon = constraints(linear_part, Acon.offset, covariance=centering(n)) Fcon.covariance *= sigma**2 if i > 0: U = np.linalg.pinv(X[:, FS_half.variables[:-1]]) Uy = np.dot(U, Y) Fcon = Fcon.conditional(U, Uy) else: Fcon = Fcon eta_full = np.linalg.pinv(X[:, FS_half.variables])[-1] if reduced: reduced_pval = gibbs_test(Fcon, Y, eta_full, ndraw=ndraw, burnin=burnin, sigma_known=sigma is not None, alternative='twosided')[0] reduced_Pknown.append(reduced_pval) reduced_pval = gibbs_test(Fcon, Y, eta_full, ndraw=ndraw, burnin=burnin, sigma_known=False, alternative='twosided')[0] reduced_Punknown.append(reduced_pval) # now use all the data FS_full.next() if FS_full.P[i] is not None: RX = X - FS_full.P[i](X) RY = Y - FS_full.P[i](Y) covariance = centering(RY.shape[0]) - np.dot( FS_full.P[i].U, FS_full.P[i].U.T) else: RX = X RY = Y.copy() covariance = centering(RY.shape[0]) RX -= RX.mean(0)[None, :] RX /= RX.std(0)[None, :] con, pval, idx, sign = covtest(RX, RY, sigma=sigma, covariance=covariance, exact=False) covtest_P.append(pval) # spacings on full data eta1 = RX[:, idx] * sign Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]), centering(RY.shape[0])) Acon.covariance *= sigma**2 spacings_P.append(Acon.pivot(eta1, Y)) return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
def sample_split(X, Y, sigma=None, nstep=10, burnin=1000, ndraw=5000, reduced=True): n, p = X.shape half_n = int(n/2) X1, Y1 = X[:half_n,:]*1., Y[:half_n]*1. X1 -= X1.mean(0)[None,:] Y1 -= Y1.mean() X2, Y2 = X[half_n:], Y[half_n:] X2 -= X2.mean(0)[None,:] Y2 -= Y2.mean() FS_half = forward_stepwise(X1, Y1) # sample splitting model FS_full = forward_stepwise(X.copy(), Y.copy()) # full data model spacings_P = [] split_P = [] reduced_Pknown = [] reduced_Punknown = [] covtest_P = [] for i in range(nstep): FS_half.next() if FS_half.P[i] is not None: RX = FS_half.X - FS_half.P[i](FS_half.X) RY = FS_half.Y - FS_half.P[i](FS_half.Y) covariance = centering(FS_half.Y.shape[0]) - np.dot(FS_half.P[i].U, FS_half.P[i].U.T) else: RX = FS_half.X RY = FS_half.Y covariance = centering(FS_half.Y.shape[0]) RX -= RX.mean(0)[None,:] RX /= (RX.std(0)[None,:] * np.sqrt(RX.shape[0])) # covtest on half -- not saved con, pval, idx, sign = covtest(RX, RY, sigma=sigma, covariance=covariance, exact=True) # spacings on half -- not saved eta1 = RX[:,idx] * sign Acon = constraints(FS_half.A, np.zeros(FS_half.A.shape[0]), covariance=centering(FS_half.Y.shape[0])) Acon.covariance *= sigma**2 Acon.pivot(eta1, FS_half.Y) # sample split eta2 = np.linalg.pinv(X2[:,FS_half.variables])[-1] eta_sigma = np.linalg.norm(eta2) * sigma split_P.append(2*ndist.sf(np.fabs((eta2*Y2).sum() / eta_sigma))) # inference on full mu using split model, this \beta^+_s. zero_block = np.zeros((Acon.linear_part.shape[0], (n-half_n))) linear_part = np.hstack([Acon.linear_part, zero_block]) Fcon = constraints(linear_part, Acon.offset, covariance=centering(n)) Fcon.covariance *= sigma**2 if i > 0: U = np.linalg.pinv(X[:,FS_half.variables[:-1]]) Uy = np.dot(U, Y) Fcon = Fcon.conditional(U, Uy) else: Fcon = Fcon eta_full = np.linalg.pinv(X[:,FS_half.variables])[-1] if reduced: reduced_pval = gibbs_test(Fcon, Y, eta_full, ndraw=ndraw, burnin=burnin, sigma_known=sigma is not None, alternative='twosided')[0] reduced_Pknown.append(reduced_pval) reduced_pval = gibbs_test(Fcon, Y, eta_full, ndraw=ndraw, burnin=burnin, sigma_known=False, alternative='twosided')[0] reduced_Punknown.append(reduced_pval) # now use all the data FS_full.next() if FS_full.P[i] is not None: RX = X - FS_full.P[i](X) RY = Y - FS_full.P[i](Y) covariance = centering(RY.shape[0]) - np.dot(FS_full.P[i].U, FS_full.P[i].U.T) else: RX = X RY = Y.copy() covariance = centering(RY.shape[0]) RX -= RX.mean(0)[None,:] RX /= RX.std(0)[None,:] con, pval, idx, sign = covtest(RX, RY, sigma=sigma, covariance=covariance, exact=False) covtest_P.append(pval) # spacings on full data eta1 = RX[:,idx] * sign Acon = constraints(FS_full.A, np.zeros(FS_full.A.shape[0]), centering(RY.shape[0])) Acon.covariance *= sigma**2 spacings_P.append(Acon.pivot(eta1, Y)) return split_P, reduced_Pknown, reduced_Punknown, spacings_P, covtest_P, FS_half.variables
def marginal(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000, nsim=5000, sigma=1.): X, mu, beta = parameters(n, rho, pos) Psplit = [] Pselect = [] hypotheses = [] for _ in range(nsim): Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True) cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign correct = (sign == +1) and (pos == select_pos) hypotheses.append(correct) Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin) Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2) Z_inference = sigma * (cond_ncp + np.random.standard_normal()) Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2) dfam = discrete_family(Z_null, np.ones(Z_null.shape)) Pselect.append(dfam.ccdf(0, Z_observed)) if sign == +1: Psplit.append(ndist.sf(Z_inference / sigma)) else: Psplit.append(ndist.cdf(Z_inference / sigma)) Ugrid = np.linspace(0, 1, 101) Psplit = np.array(Psplit) Pselect = np.array(Pselect) hypotheses = np.array(hypotheses, np.bool) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8, 8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05, 0.05], [0, 1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8, 8)) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05, 0.05], [0, 1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['split'] = Psplit dbn1['select'] = Pselect dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def marginal(n, snr, pos, rho=0.25, ndraw=5000, burnin=1000, nsim=5000, sigma=1.): X, mu, beta = parameters(n, rho, pos) Psplit = [] Pselect = [] hypotheses = [] for _ in range(nsim): Y_select = (snr * mu / np.sqrt(2) + np.random.standard_normal(n)) * sigma con, _, select_pos, sign = covtest(X, Y_select, sigma=sigma, exact=True) cond_ncp = snr * np.dot(X.T[select_pos], mu) / np.sqrt(2) * sign correct = (sign == +1) and (pos == select_pos) hypotheses.append(correct) Y_null = sample_from_constraints(con, Y_select, ndraw=ndraw, burnin=burnin) Z_null = (np.dot(X.T[select_pos], Y_null.T) + sigma * np.random.standard_normal(ndraw)) / np.sqrt(2) Z_inference = sigma * (cond_ncp + np.random.standard_normal()) Z_observed = (np.dot(X.T[select_pos], Y_select) * sign + Z_inference) / np.sqrt(2) dfam = discrete_family(Z_null, np.ones(Z_null.shape)) Pselect.append(dfam.ccdf(0, Z_observed)) if sign == +1: Psplit.append(ndist.sf(Z_inference / sigma)) else: Psplit.append(ndist.cdf(Z_inference / sigma)) Ugrid = np.linspace(0,1,101) Psplit = np.array(Psplit) Pselect = np.array(Pselect) hypotheses = np.array(hypotheses, np.bool) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8,8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Psplit)(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pselect)(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05,0.05],[0,1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8,8)) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Psplit[hypotheses])(Ugrid), label='Sample splitting', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pselect[hypotheses])(Ugrid), label='Selected using $i^*(Z_S)$', c='blue', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05,0.05],[0,1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['split'] = Psplit dbn1['select'] = Pselect dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def forward_step(X, Y, sigma=None, nstep=5, exact=False, burnin=1000, ndraw=5000): """ A simple implementation of forward stepwise that uses the `reduced_covtest` iteratively after adjusting fully for the selected variable. This implementation is not efficient, in that it computes more SVDs than it really has to. Parameters ---------- X : np.float((n,p)) Y : np.float(n) sigma : float (optional) Noise level (not needed for reduced). nstep : int How many steps of forward stepwise? exact : bool Which version of covtest should we use? burnin : int How many iterations until we start recording samples? ndraw : int How many samples should we return? tests : ['reduced_known', 'covtest', 'reduced_unknown'] Which test to use? A subset of the above sequence. """ n, p = X.shape FS = forward_stepwise(X, Y) spacings_P = [] covtest_P = [] reduced_Pknown = [] reduced_Punknown = [] for i in range(nstep): FS.next() # covtest if FS.P[i] is not None: RX = X - FS.P[i](X) RY = Y - FS.P[i](Y) covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T) else: RX = X RY = Y covariance = None RX -= RX.mean(0)[None,:] RX /= RX.std(0)[None,:] con, pval, idx, sign = covtest(RX, RY, sigma=sigma, covariance=covariance, exact=exact) covtest_P.append(pval) # reduced eta = RX[:,idx] * sign Acon = constraints(FS.A, np.zeros(FS.A.shape[0])) Acon.covariance *= sigma**2 if i > 0: U = FS.P[-2].U.T Uy = np.dot(U, Y) Bcon = Acon.conditional(U, Uy) else: Bcon = Acon spacings_P.append(Acon.pivot(eta, Y)) reduced_pval, _, _ = gibbs_test(Bcon, Y, eta, ndraw=ndraw, burnin=burnin, sigma_known=sigma is not None, alternative='greater') reduced_Pknown.append(reduced_pval) reduced_pval, _, _ = gibbs_test(Bcon, Y, eta, ndraw=ndraw, burnin=burnin, sigma_known=False, alternative='greater') reduced_Punknown.append(reduced_pval) return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5): # Design, mean vector and parameter vector X, mu, beta = parameters(n, rho, pos) Pcov = [] Pexact = [] Pu = [] Pr = [] Pfixed = [] Pmax = [] hypotheses = [] # Set seed np.random.seed(0) # Max test max_stat = np.fabs(np.dot(X.T, np.random.standard_normal( (n, 10000)))).max(0) * sigma max_fam = discrete_family(max_stat, np.ones(max_stat.shape)) max_fam.theta = 0 for i in range(nsim): Y = (snr * mu + np.random.standard_normal(n)) * sigma Z = np.dot(X.T, Y) # did this find the correct position and sign? correct = np.all(np.less_equal(np.fabs(Z), Z[pos])) hypotheses.append(correct) Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1]) Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1]) Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma)) Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1]) Pr.append( reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1]) p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max()) Pmax.append(p) Ugrid = np.linspace(0, 1, 101) Pcov = np.array(Pcov) Pexact = np.array(Pexact) Pu = np.array(Pu) Pr = np.array(Pr) Pfixed = np.array(Pfixed) Pmax = np.array(Pmax) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8, 8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Pcov)(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pexact)(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pmax)(Ugrid), label='Max test', c='cyan', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pu)(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pr)(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pfixed)(Ugrid), label=r'Fixed 1-sparse, $\sigma$ known', c='yellow', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05, 0.05], [0, 1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8, 8)) hypotheses = np.array(hypotheses, np.bool) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Pcov[hypotheses])(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pexact[hypotheses])(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pu[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pr[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05, 0.05], [0, 1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['exact'] = Pexact dbn1['covtest'] = Pcov dbn1['unknown'] = Pu dbn1['known'] = Pr dbn1['fixed'] = Pfixed dbn1['max'] = Pmax dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def simulation(n, snr, pos, rho=0.25, nsim=5000, sigma=1.5): # Design, mean vector and parameter vector X, mu, beta = parameters(n, rho, pos) Pcov = [] Pexact = [] Pu = [] Pr = [] Pfixed = [] Pmax = [] hypotheses = [] # Set seed np.random.seed(0) # Max test max_stat = np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0) * sigma max_fam = discrete_family(max_stat, np.ones(max_stat.shape)) max_fam.theta = 0 for i in range(nsim): Y = (snr * mu + np.random.standard_normal(n)) * sigma Z = np.dot(X.T, Y) # did this find the correct position and sign? correct = np.all(np.less_equal(np.fabs(Z), Z[pos])) hypotheses.append(correct) Pcov.append(covtest(X, Y, sigma=sigma, exact=False)[1]) Pexact.append(covtest(X, Y, sigma=sigma, exact=True)[1]) Pfixed.append(2 * ndist.sf(np.fabs(np.dot(X.T, Y))[pos] / sigma)) Pu.append(reduced_covtest(X, Y, burnin=500, ndraw=5000)[1]) Pr.append(reduced_covtest(X, Y, burnin=500, ndraw=5000, sigma=sigma)[1]) p = max_fam.ccdf(0, np.fabs(np.dot(X.T, Y)).max()) Pmax.append(p) Ugrid = np.linspace(0,1,101) Pcov = np.array(Pcov) Pexact = np.array(Pexact) Pu = np.array(Pu) Pr = np.array(Pr) Pfixed = np.array(Pfixed) Pmax = np.array(Pmax) # plot of marginal distribution of p-values fig1 = plt.figure(figsize=(8,8)) ax1 = fig1.gca() ax1.plot(Ugrid, ECDF(Pcov)(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pexact)(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pmax)(Ugrid), label='Max test', c='cyan', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pu)(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pr)(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax1.plot(Ugrid, ECDF(Pfixed)(Ugrid), label=r'Fixed 1-sparse, $\sigma$ known', c='yellow', linewidth=5, alpha=0.5) ax1.set_xlabel('P-value, $p$', fontsize=20) ax1.set_ylabel('ECDF($p$)', fontsize=20) ax1.plot([0.05,0.05],[0,1], 'k--') ax1.legend(loc='lower right') # conditional distribution of p-values # conditioned on selection choosing correct position and sign fig2 = plt.figure(figsize=(8,8)) hypotheses = np.array(hypotheses, np.bool) ax2 = fig2.gca() ax2.plot(Ugrid, ECDF(Pcov[hypotheses])(Ugrid), label='Full (exact)', c='red', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pexact[hypotheses])(Ugrid), label='Full (asymptotic)', c='k', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pu[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ unknown', c='blue', linewidth=5, alpha=0.5) ax2.plot(Ugrid, ECDF(Pr[hypotheses])(Ugrid), label=r'Selected 1-sparse, $\sigma$ known', c='green', linewidth=5, alpha=0.5) ax2.set_xlabel('P-value, $p$', fontsize=20) ax2.set_ylabel('ECDF($p$)', fontsize=20) ax2.plot([0.05,0.05],[0,1], 'k--') ax2.legend(loc='lower right') dbn1 = {} dbn1['exact'] = Pexact dbn1['covtest'] = Pcov dbn1['unknown'] = Pu dbn1['known'] = Pr dbn1['fixed'] = Pfixed dbn1['max'] = Pmax dbn1['hypotheses'] = hypotheses return fig1, fig2, dbn1
def forward_step(X, Y, sigma=None, nstep=5, exact=False, burnin=1000, ndraw=5000): """ A simple implementation of forward stepwise that uses the `reduced_covtest` iteratively after adjusting fully for the selected variable. This implementation is not efficient, in that it computes more SVDs than it really has to. Parameters ---------- X : np.float((n,p)) Y : np.float(n) sigma : float (optional) Noise level (not needed for reduced). nstep : int How many steps of forward stepwise? exact : bool Which version of covtest should we use? burnin : int How many iterations until we start recording samples? ndraw : int How many samples should we return? tests : ['reduced_known', 'covtest', 'reduced_unknown'] Which test to use? A subset of the above sequence. """ n, p = X.shape FS = forward_stepwise(X, Y) spacings_P = [] covtest_P = [] reduced_Pknown = [] reduced_Punknown = [] for i in range(nstep): FS.next() # covtest if FS.P[i] is not None: RX = X - FS.P[i](X) RY = Y - FS.P[i](Y) covariance = np.identity(n) - np.dot(FS.P[i].U, FS.P[i].U.T) else: RX = X RY = Y covariance = None RX -= RX.mean(0)[None, :] RX /= RX.std(0)[None, :] con, pval, idx, sign = covtest(RX, RY, sigma=sigma, covariance=covariance, exact=exact) covtest_P.append(pval) # reduced eta = RX[:, idx] * sign Acon = constraints(FS.A, np.zeros(FS.A.shape[0])) Acon.covariance *= sigma**2 if i > 0: U = FS.P[-2].U.T Uy = np.dot(U, Y) Bcon = Acon.conditional(U, Uy) else: Bcon = Acon spacings_P.append(Acon.pivot(eta, Y)) reduced_pval, _, _ = gibbs_test(Bcon, Y, eta, ndraw=ndraw, burnin=burnin, sigma_known=sigma is not None, alternative='greater') reduced_Pknown.append(reduced_pval) reduced_pval, _, _ = gibbs_test(Bcon, Y, eta, ndraw=ndraw, burnin=burnin, sigma_known=False, alternative='greater') reduced_Punknown.append(reduced_pval) return covtest_P, reduced_Pknown, reduced_Punknown, spacings_P, FS.variables