def test_skinny_fat(): X, Y = instance()[:2] n, p = X.shape lam = choose_lambda(X) obj1 = sqlasso_objective(X, Y) obj2 = sqlasso_objective_skinny(X, Y) soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its': 500})[0] soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its': 500})[0] np.testing.assert_allclose(soln1, soln2, rtol=1.e-3) X, Y = instance(p=50)[:2] n, p = X.shape lam = choose_lambda(X) obj1 = sqlasso_objective(X, Y) obj2 = sqlasso_objective_skinny(X, Y) soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its': 500})[0] soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its': 500})[0] np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
def test_gaussian_approx(n=100,p=200,s=10): """ using gaussian approximation for pvalues """ sigma = 3 y = np.random.standard_normal(n) * sigma beta = np.zeros(p) #beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1) beta[:s] = 18 X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) lam_theor = choose_lambda(X, quantile=0.75) L = sqrt_lasso(y, X, lam_theor) L.fit(tol=1.e-10, min_its=80) P = [] P_gaussian = [] intervals = [] if L.active.shape[0] > 0: np.testing.assert_array_less( \ np.dot(L.constraints.linear_part, L.y), L.constraints.offset) if set(range(s)).issubset(L.active): P = [p[1] for p in L.active_pvalues[s:]] P_gaussian = [p[1] for p in L.active_gaussian_pval[s:]] intervals = [u for u in L.active_gaussian_intervals if u[0] in range(s)] return P, P_gaussian, intervals, beta
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsim=10, burnin=2000, ndraw=8000): P = [] while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = .7 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit() pval = goodness_of_fit(L, lambda x: np.max(np.fabs(x)), burnin=burnin, ndraw=ndraw) P.append(pval) Pa = np.array(P) Pa = Pa[~np.isnan(Pa)] if (~np.isnan(np.array(Pa))).sum() >= nsim: break return Pa, np.zeros_like(Pa, np.bool)
def test_sqrt_lasso_sandwich_pvals(n=200, p=50, s=10, sigma=10, rho=0.3, signal=6., use_lasso_sd=False): X, y, beta, true_active, sigma, _ = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, signal=signal) heteroscedastic_error = sigma * np.random.standard_normal(n) * ( np.fabs(X[:, -1]) + 0.5)**2 heteroscedastic_error += sigma * np.random.standard_normal(n) * ( np.fabs(X[:, -2]) + 0.2)**2 heteroscedastic_error += sigma * np.random.standard_normal(n) * ( np.fabs(X[:, -3]) + 0.5)**2 y += heteroscedastic_error feature_weights = np.ones(p) * choose_lambda(X) feature_weights[10:12] = 0 L_SQ = lasso.sqrt_lasso(X, y, feature_weights, covariance='sandwich') L_SQ.fit() if set(true_active).issubset(L_SQ.active): S = L_SQ.summary('twosided') return S['pval'], [v in true_active for v in S['variable']]
def test_class(n=20, p=40, s=2): y = np.random.standard_normal(n) * 1.2 beta = np.zeros(p) beta[:s] = 5 X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] y += np.dot(X, beta) lam_theor = 0.7 * choose_lambda(X, quantile=0.9) L = sqrt_lasso(y, X, lam_theor) L.fit(tol=1.e-10, min_its=80) P = [] if L.active.shape[0] > 0: np.testing.assert_array_less( \ np.dot(L.constraints.linear_part, L.y), L.constraints.offset) nt.assert_true(L.constraints(y)) nt.assert_true(L.quasi_affine_constraints(y)) if set(range(s)).issubset(L.active): P = [p[1] for p in L.active_pvalues[s:]] else: P = [] return P
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsample=1000): P = [] while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = .7 * choose_lambda(X, quantile=0.9) L = sqrt_lasso(y, X, lam_theor) L.fit(tol=1.e-12, min_its=150, max_its=200) pval = L.goodness_of_fit(lambda x: np.max(np.fabs(x)), burnin=10000, ndraw=10000) P.append(pval) Pa = np.array(P) Pa = Pa[~np.isnan(Pa)] #print (~np.isnan(np.array(Pa))).sum() if (~np.isnan(np.array(Pa))).sum() >= nsample: break #print np.mean(Pa), np.std(Pa) U = np.linspace(0,1,nsample+1) plt.plot(U, sm.distributions.ECDF(Pa)(U)) plt.plot([0,1], [0,1]) plt.savefig("goodness_of_fit_uniform", format="pdf")
def method_instance(self): if not hasattr(self, "_method_instance"): n, p = self.X.shape lagrange = np.ones(p) * choose_lambda(self.X) * self.kappa self._method_instance = random_lasso_method.gaussian(self.X, self.Y, lagrange, randomizer_scale=self.randomizer_scale * np.std(self.Y)) return self._method_instance
def test_skinny_fat(): X, Y = instance()[:2] n, p = X.shape lam = choose_lambda(X) obj1 = sqlasso_objective(X, Y) obj2 = sqlasso_objective_skinny(X, Y) soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0] soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0] np.testing.assert_allclose(soln1, soln2, rtol=1.e-3) X, Y = instance(p=50)[:2] n, p = X.shape lam = choose_lambda(X) obj1 = sqlasso_objective(X, Y) obj2 = sqlasso_objective_skinny(X, Y) soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0] soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0] np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
def test_skinny_fat(): X, Y = instance()[:2] n, p = X.shape lam = SQ.choose_lambda(X) obj1 = SQ.sqlasso_objective(X, Y) obj2 = SQ.sqlasso_objective_skinny(X, Y) soln1 = SQ.solve_sqrt_lasso_fat(X, Y, min_its=500, weights=np.ones(p) * lam) soln2 = SQ.solve_sqrt_lasso_skinny(X, Y, min_its=500, weights=np.ones(p) * lam) np.testing.assert_almost_equal(soln1, soln2) X, Y = instance(p=50)[:2] n, p = X.shape lam = SQ.choose_lambda(X) obj1 = SQ.sqlasso_objective(X, Y) obj2 = SQ.sqlasso_objective_skinny(X, Y) soln1 = SQ.solve_sqrt_lasso_fat(X, Y, min_its=500, weights=np.ones(p) * lam) soln2 = SQ.solve_sqrt_lasso_skinny(X, Y, min_its=500, weights=np.ones(p) * lam) np.testing.assert_almost_equal(soln1, soln2)
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.): """ Check equivalent LASSO and sqrtLASSO solutions. """ Y = np.random.standard_normal(n) * sigma beta = np.zeros(p) beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s, )) - 1) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) Y += np.dot(X, beta) * sigma lam_theor = choose_lambda(X, quantile=0.9) weights = lam_theor * np.ones(p) weights[:3] = 0. soln1, loss1 = solve_sqrt_lasso(X, Y, weights=weights, quadratic=None, solve_args={ 'min_its': 500, 'tol': 1.e-10 }) G1 = loss1.smooth_objective(soln1, 'grad') # find active set, and estimate of sigma active = (soln1 != 0) nactive = active.sum() subgrad = np.sign(soln1[active]) * weights[active] X_E = X[:, active] X_Ei = np.linalg.pinv(X_E) sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive) multiplier = sigma_E * np.sqrt( (n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2)) # XXX how should quadratic be changed? # multiply everything by sigma_E? loss2 = rr.glm.gaussian(X, Y) penalty = rr.weighted_l1norm(weights, lagrange=multiplier) problem = rr.simple_problem(loss2, penalty) soln2 = problem.solve(tol=1.e-12, min_its=200) G2 = loss2.smooth_objective(soln2, 'grad') / multiplier np.testing.assert_allclose(G1[3:], G2[3:]) np.testing.assert_allclose(soln1, soln2)
def test_class_R(n=100, p=20): y = np.random.standard_normal(n) X = np.random.standard_normal((n,p)) lam_theor = choose_lambda(X, quantile=0.25) L = sqrt_lasso(y,X,lam_theor) L.fit(tol=1.e-7) if L.active.shape[0] > 0: np.testing.assert_array_less( \ np.dot(L.constraints.linear_part, L.y), L.constraints.offset) return L.active_constraints.linear_part, L.active_constraints.offset / L.sigma_E, L.R_E, L._XEinv[0] else: return None, None, None, None
def test_estimate_sigma(n=200, p=400, s=10, sigma=3.): y = np.random.standard_normal(n) * sigma beta = np.zeros(p) beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = choose_lambda(X, quantile=0.9) L = sqrt_lasso(y, X, lam_theor) L.fit(tol=1.e-12, min_its=150) P = [] if L.active.shape[0] > 0: return L.sigma_hat / sigma, L.sigma_E / sigma, L.df_E else: return (None,) * 3
def sqrt_lasso(X, Y, kappa, q=0.2): toc = time.time() lam = choose_lambda(X) L = lasso.sqrt_lasso(X, Y, kappa * lam) L.fit() S = L.summary('onesided') tic = time.time() selected = sm.stats.multipletests(S['pval'], q, 'fdr_bh')[0] return {'method':[r'$\kappa=%0.2f' % kappa], 'active':[S['variable']], 'active_signs':[L.active_signs], 'pval':[S['pval']], 'selected':[selected], 'runtime':tic-toc}
def _generate_constraints(n=15, p=20, sigma=1): while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = 0.3 * choose_lambda(X, quantile=0.9) L = sqrt_lasso(y, X, lam_theor) L.fit(tol=1.e-12, min_its=150) con = L.active_constraints if con is not None and L.active.shape[0] >= 3: break con.covariance = np.identity(con.covariance.shape[0]) con.mean *= 0 return con, y, L
def _generate_constraints(n=15, p=20, sigma=1): while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = 0.3 * choose_lambda(X, quantile=0.9) L = sqrt_lasso(y, X, lam_theor) L.fit(tol=1.e-12, min_its=150) con = L.active_constraints if con is not None and L.active.shape[0] >= 3: break con.covariance = np.identity(con.covariance.shape[0]) con.mean *= 0 return con, y, L
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.): """ Check equivalent LASSO and sqrtLASSO solutions. """ Y = np.random.standard_normal(n) * sigma beta = np.zeros(p) beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) Y += np.dot(X, beta) * sigma lam_theor = choose_lambda(X, quantile=0.9) weights = lam_theor*np.ones(p) weights[:3] = 0. soln1, loss1 = solve_sqrt_lasso(X, Y, weights=weights, quadratic=None, solve_args={'min_its':500, 'tol':1.e-10}) G1 = loss1.smooth_objective(soln1, 'grad') # find active set, and estimate of sigma active = (soln1 != 0) nactive = active.sum() subgrad = np.sign(soln1[active]) * weights[active] X_E = X[:,active] X_Ei = np.linalg.pinv(X_E) sigma_E= np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive) multiplier = sigma_E * np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2)) # XXX how should quadratic be changed? # multiply everything by sigma_E? loss2 = rr.glm.gaussian(X, Y) penalty = rr.weighted_l1norm(weights, lagrange=multiplier) problem = rr.simple_problem(loss2, penalty) soln2 = problem.solve(tol=1.e-12, min_its=200) G2 = loss2.smooth_objective(soln2, 'grad') / multiplier np.testing.assert_allclose(G1[3:], G2[3:]) np.testing.assert_allclose(soln1, soln2)
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsim=1000, burnin=2000, ndraw=8000): P = [] while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = .7 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit() pval = goodness_of_fit(L, lambda x: np.max(np.fabs(x)), burnin=burnin, ndraw=ndraw) P.append(pval) Pa = np.array(P) Pa = Pa[~np.isnan(Pa)] if (~np.isnan(np.array(Pa))).sum() >= nsim: break # make any plots not use display from matplotlib import use use('Agg') import matplotlib.pyplot as plt # used for ECDF import statsmodels.api as sm U = np.linspace(0, 1, 101) plt.plot(U, sm.distributions.ECDF(Pa)(U)) plt.plot([0, 1], [0, 1]) plt.savefig("goodness_of_fit_uniform", format="pdf")
def _generate_constraints(n=15, p=10, sigma=1): while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = 0.3 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit(solve_args={'tol':1.e-12, 'min_its':150}) con = L.constraints if con is not None and L.active.shape[0] >= 3: break offset = con.offset linear_part = -L.active_signs[:,None] * np.linalg.pinv(X[:,L.active]) con = AC.constraints(linear_part, offset) con.covariance = np.identity(con.covariance.shape[0]) con.mean *= 0 return con, y, L, X
def _generate_constraints(n=15, p=10, sigma=1): while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = 0.3 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit(solve_args={'tol': 1.e-12, 'min_its': 150}) con = L.constraints if con is not None and L.active.shape[0] >= 3: break offset = con.offset linear_part = -L.active_signs[:, None] * np.linalg.pinv(X[:, L.active]) con = AC.constraints(linear_part, offset) con.covariance = np.identity(con.covariance.shape[0]) con.mean *= 0 return con, y, L, X
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsim=1000, burnin=2000, ndraw=8000): P = [] while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = .7 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit() pval = goodness_of_fit(L, lambda x: np.max(np.fabs(x)), burnin=burnin, ndraw=ndraw) P.append(pval) Pa = np.array(P) Pa = Pa[~np.isnan(Pa)] if (~np.isnan(np.array(Pa))).sum() >= nsim: break # make any plots not use display from matplotlib import use use('Agg') import matplotlib.pyplot as plt # used for ECDF import statsmodels.api as sm U = np.linspace(0,1,101) plt.plot(U, sm.distributions.ECDF(Pa)(U)) plt.plot([0,1], [0,1]) plt.savefig("goodness_of_fit_uniform", format="pdf")
def test_class(n=20, p=40, s=2): y = np.random.standard_normal(n) * 1.2 beta = np.zeros(p) beta[:s] = 5 X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] y += np.dot(X, beta) lam_theor = 0.7 * choose_lambda(X, quantile=0.9) L = sqrt_lasso(y,X,lam_theor) L.fit(tol=1.e-10, min_its=80) P = [] if L.active.shape[0] > 0: np.testing.assert_array_less( \ np.dot(L.constraints.linear_part, L.y), L.constraints.offset) nt.assert_true(L.constraints(y)) nt.assert_true(L.quasi_affine_constraints(y)) if set(range(s)).issubset(L.active): P = [p[1] for p in L.active_pvalues[s:]] else: P = [] return P
def __init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid): parametric_method.__init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid) self.lagrange = self.kappa * choose_lambda(X)