def test_subset(k=10): n, p = 100, 200 X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 subset = np.ones(n, np.bool) subset[-10:] = 0 FS = forward_stepwise(X, Y, subset=subset, covariance=0.5**2 * np.identity(n)) for i in range(k): FS.next() print 'first %s variables selected' % k, FS.variables print 'pivots for last variable of 3rd selected model knowing that we performed %d steps of forward stepwise' % k print FS.model_pivots(3, saturated=True) print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000) FS = forward_stepwise(X, Y, subset=subset) for i in range(k): FS.next() print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000)
def test_full_pvals(n=100, p=40, rho=0.3, snr=4): X, y, beta, active, sigma = instance(n=n, p=p, snr=snr, rho=rho) FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n)) from scipy.stats import norm as ndist pval = [] completed_yet = False for i in range(min(n, p)): FS.next() var_select, pval_select = FS.model_pivots(i+1, alternative='twosided', which_var=[FS.variables[-1]], saturated=False, burnin=2000, ndraw=8000)[0] pval_saturated = FS.model_pivots(i+1, alternative='twosided', which_var=[FS.variables[-1]], saturated=True)[0][1] # now, nominal ones LSfunc = np.linalg.pinv(FS.X[:,FS.variables]) Z = np.dot(LSfunc[-1], FS.Y) / (np.linalg.norm(LSfunc[-1]) * sigma) pval_nominal = 2 * ndist.sf(np.fabs(Z)) pval.append((var_select, pval_select, pval_saturated, pval_nominal)) if set(active).issubset(np.array(pval)[:,0]) and not completed_yet: completed_yet = True completion_index = i + 1 return X, y, beta, active, sigma, np.array(pval), completion_index
def test_FS(k=10): n, p = 100, 200 X = np.random.standard_normal( (n, p)) + 0.4 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 FS = forward_stepwise(X, Y, covariance=0.5**2 * np.identity(n)) for i in range(k): FS.next() print 'first %s variables selected' % k, FS.variables print 'pivots for 3rd selected model knowing that we performed %d steps of forward stepwise' % k print FS.model_pivots(3) print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000) print FS.model_quadratic(3)
def test_full_pvals(n=100, p=40, rho=0.3, snr=4): X, y, beta, active, sigma = instance(n=n, p=p, snr=snr, rho=rho) FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n)) from scipy.stats import norm as ndist pval = [] completed_yet = False for i in range(min(n, p)): FS.next() var_select, pval_select = FS.model_pivots(i + 1, alternative='twosided', which_var=[FS.variables[-1]], saturated=False, burnin=2000, ndraw=8000)[0] pval_saturated = FS.model_pivots(i + 1, alternative='twosided', which_var=[FS.variables[-1]], saturated=True)[0][1] # now, nominal ones LSfunc = np.linalg.pinv(FS.X[:, FS.variables]) Z = np.dot(LSfunc[-1], FS.Y) / (np.linalg.norm(LSfunc[-1]) * sigma) pval_nominal = 2 * ndist.sf(np.fabs(Z)) pval.append((var_select, pval_select, pval_saturated, pval_nominal)) if set(active).issubset(np.array(pval)[:, 0]) and not completed_yet: completed_yet = True completion_index = i + 1 return X, y, beta, active, sigma, np.array(pval), completion_index
def test_subset(k=10): n, p = 100, 200 X = np.random.standard_normal( (n, p)) + 0.4 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 subset = np.ones(n, np.bool) subset[-10:] = 0 FS = forward_stepwise(X, Y, subset=subset, covariance=0.5**2 * np.identity(n)) for i in range(k): FS.next() print 'first %s variables selected' % k, FS.variables print 'pivots for last variable of 3rd selected model knowing that we performed %d steps of forward stepwise' % k print FS.model_pivots(3, saturated=True) print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000) FS = forward_stepwise(X, Y, subset=subset) for i in range(k): FS.next() print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000)
def simulate_null(saturated=True): n, p = 100, 40 X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 FS = forward_stepwise(X, Y, covariance=0.5**2 * np.identity(n)) for i in range(5): FS.next() return [p[-1] for p in FS.model_pivots(3, saturated=saturated, use_new=False)]
def simulate_null(saturated=True): n, p = 100, 40 X = np.random.standard_normal( (n, p)) + 0.4 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 FS = forward_stepwise(X, Y, covariance=0.5**2 * np.identity(n)) for i in range(5): FS.next() return [ p[-1] for p in FS.model_pivots(3, saturated=saturated, use_new=False) ]
def test_FS_unknown(k=10): n, p = 100, 200 X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) Y = np.random.standard_normal(100) * 0.5 FS = forward_stepwise(X, Y) for i in range(k): FS.next() print 'first %s variables selected' % k, FS.variables print 'pivots for last variable of 3rd selected model knowing that we performed %d steps of forward stepwise' % k print FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=5000, ndraw=5000)