def test_sqrt_lasso_pvals(n=100, p=200, s=7, sigma=5, rho=0.3, snr=7.): X, y, beta, true_active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr) lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n) Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0) weights_with_zeros = 0.7*lam_theor * np.ones(p) weights_with_zeros[:3] = 0. lasso.sqrt_lasso(X, y, weights_with_zeros, covariance='parametric') L = lasso.sqrt_lasso(X, y, weights_with_zeros) L.fit() if set(true_active).issubset(L.active): S = L.summary('onesided') S = L.summary('twosided') return S['pval'], [v in true_active for v in S['variable']]
def test_sqrt_lasso_pvals(n=100, p=200, s=7, sigma=5, rho=0.3, snr=7.): counter = 0 while True: counter += 1 X, y, beta, active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr) lam_theor = np.mean( np.fabs(np.dot(X.T, np.random.standard_normal( (n, 1000)))).max(0)) / np.sqrt(n) Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0) weights_with_zeros = 0.7 * lam_theor * np.ones(p) weights_with_zeros[:3] = 0. L = lasso.sqrt_lasso(X, y, weights_with_zeros) L.fit() v = {1: 'twosided', 0: 'onesided'}[counter % 2] if set(active).issubset(L.active): S = L.summary(v) return [ p for p, v in zip(S['pval'], S['variable']) if v not in active ]
def test_sqrt_lasso(n=100, p=20): y = np.random.standard_normal(n) X = np.random.standard_normal((n, p)) lam_theor = np.mean( np.fabs(np.dot(X.T, np.random.standard_normal( (n, 1000)))).max(0)) / np.sqrt(n) Q = rr.identity_quadratic(0.01, 0, np.random.standard_normal(p) / 5., 0) weights_with_zeros = 0.5 * lam_theor * np.ones(p) weights_with_zeros[:3] = 0. huge_weights = weights_with_zeros * 10000 for q, fw in product([None, Q], [0.5 * lam_theor, weights_with_zeros, huge_weights]): L = lasso.sqrt_lasso(X, y, fw, quadratic=q, solve_args={ 'min_its': 300, 'tol': 1.e-12 }) L.fit(solve_args={'min_its': 300, 'tol': 1.e-12}) C = L.constraints S = L.summary('onesided', compute_intervals=True) S = L.summary('twosided') yield (np.testing.assert_array_less, np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset)
def test_sqrt_lasso_pvals(n=100, p=200, s=7, sigma=5, rho=0.3, snr=7.): counter = 0 while True: counter += 1 X, y, beta, active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr) lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n) Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0) weights_with_zeros = 0.7*lam_theor * np.ones(p) weights_with_zeros[:3] = 0. L = lasso.sqrt_lasso(X, y, weights_with_zeros) L.fit() v = {1:'twosided', 0:'onesided'}[counter % 2] if set(active).issubset(L.active): S = L.summary(v) return [p for p, v in zip(S['pval'], S['variable']) if v not in active]
def test_sqrt_lasso(n=100, p=20): y = np.random.standard_normal(n) X = np.random.standard_normal((n,p)) lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n) Q = rr.identity_quadratic(0.01, 0, np.random.standard_normal(p) / 5., 0) weights_with_zeros = 0.5*lam_theor * np.ones(p) weights_with_zeros[:3] = 0. huge_weights = weights_with_zeros * 10000 for q, fw in product([None, Q], [0.5*lam_theor, weights_with_zeros, huge_weights]): L = lasso.sqrt_lasso(X, y, fw, quadratic=q, solve_args={'min_its':300, 'tol':1.e-12}) L.fit(solve_args={'min_its':300, 'tol':1.e-12}) C = L.constraints S = L.summary('onesided', compute_intervals=True) S = L.summary('twosided') yield (np.testing.assert_array_less, np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset)
def test_sqrt_lasso_sandwich_pvals(n=200, p=50, s=10, sigma=10, rho=0.3, signal=6., use_lasso_sd=False): X, y, beta, true_active, sigma, _ = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, signal=signal) heteroscedastic_error = sigma * np.random.standard_normal(n) * ( np.fabs(X[:, -1]) + 0.5)**2 heteroscedastic_error += sigma * np.random.standard_normal(n) * ( np.fabs(X[:, -2]) + 0.2)**2 heteroscedastic_error += sigma * np.random.standard_normal(n) * ( np.fabs(X[:, -3]) + 0.5)**2 y += heteroscedastic_error feature_weights = np.ones(p) * choose_lambda(X) feature_weights[10:12] = 0 L_SQ = lasso.sqrt_lasso(X, y, feature_weights, covariance='sandwich') L_SQ.fit() if set(true_active).issubset(L_SQ.active): S = L_SQ.summary('twosided') return S['pval'], [v in true_active for v in S['variable']]
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsim=10, burnin=2000, ndraw=8000): P = [] while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = .7 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit() pval = goodness_of_fit(L, lambda x: np.max(np.fabs(x)), burnin=burnin, ndraw=ndraw) P.append(pval) Pa = np.array(P) Pa = Pa[~np.isnan(Pa)] if (~np.isnan(np.array(Pa))).sum() >= nsim: break return Pa, np.zeros_like(Pa, np.bool)
def sqrt_lasso(X, Y, kappa, q=0.2): toc = time.time() lam = choose_lambda(X) L = lasso.sqrt_lasso(X, Y, kappa * lam) L.fit() S = L.summary('onesided') tic = time.time() selected = sm.stats.multipletests(S['pval'], q, 'fdr_bh')[0] return {'method':[r'$\kappa=%0.2f' % kappa], 'active':[S['variable']], 'active_signs':[L.active_signs], 'pval':[S['pval']], 'selected':[selected], 'runtime':tic-toc}
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsim=1000, burnin=2000, ndraw=8000): P = [] while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = .7 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit() pval = goodness_of_fit(L, lambda x: np.max(np.fabs(x)), burnin=burnin, ndraw=ndraw) P.append(pval) Pa = np.array(P) Pa = Pa[~np.isnan(Pa)] if (~np.isnan(np.array(Pa))).sum() >= nsim: break # make any plots not use display from matplotlib import use use('Agg') import matplotlib.pyplot as plt # used for ECDF import statsmodels.api as sm U = np.linspace(0, 1, 101) plt.plot(U, sm.distributions.ECDF(Pa)(U)) plt.plot([0, 1], [0, 1]) plt.savefig("goodness_of_fit_uniform", format="pdf")
def _generate_constraints(n=15, p=10, sigma=1): while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = 0.3 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit(solve_args={'tol':1.e-12, 'min_its':150}) con = L.constraints if con is not None and L.active.shape[0] >= 3: break offset = con.offset linear_part = -L.active_signs[:,None] * np.linalg.pinv(X[:,L.active]) con = AC.constraints(linear_part, offset) con.covariance = np.identity(con.covariance.shape[0]) con.mean *= 0 return con, y, L, X
def _generate_constraints(n=15, p=10, sigma=1): while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = 0.3 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit(solve_args={'tol': 1.e-12, 'min_its': 150}) con = L.constraints if con is not None and L.active.shape[0] >= 3: break offset = con.offset linear_part = -L.active_signs[:, None] * np.linalg.pinv(X[:, L.active]) con = AC.constraints(linear_part, offset) con.covariance = np.identity(con.covariance.shape[0]) con.mean *= 0 return con, y, L, X
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsim=1000, burnin=2000, ndraw=8000): P = [] while True: y = np.random.standard_normal(n) * sigma beta = np.zeros(p) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) y += np.dot(X, beta) * sigma lam_theor = .7 * choose_lambda(X, quantile=0.9) L = lasso.sqrt_lasso(X, y, lam_theor) L.fit() pval = goodness_of_fit(L, lambda x: np.max(np.fabs(x)), burnin=burnin, ndraw=ndraw) P.append(pval) Pa = np.array(P) Pa = Pa[~np.isnan(Pa)] if (~np.isnan(np.array(Pa))).sum() >= nsim: break # make any plots not use display from matplotlib import use use('Agg') import matplotlib.pyplot as plt # used for ECDF import statsmodels.api as sm U = np.linspace(0,1,101) plt.plot(U, sm.distributions.ECDF(Pa)(U)) plt.plot([0,1], [0,1]) plt.savefig("goodness_of_fit_uniform", format="pdf")
def method_instance(self): if not hasattr(self, "_method_instance"): self._method_instance = lasso.sqrt_lasso(self.X, self.Y, self.lagrange) return self._method_instance