def test_gaussian(n=100, p=20): y = np.random.standard_normal(n) X = np.random.standard_normal((n,p)) lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0) weights_with_zeros = 0.5*lam_theor * np.ones(p) weights_with_zeros[:3] = 0. huge_weights = weights_with_zeros * 10000 for q, fw in product([Q, None], [0.5*lam_theor, weights_with_zeros, huge_weights]): L = lasso.gaussian(X, y, fw, 1., quadratic=Q) L.fit() C = L.constraints sandwich = glm_sandwich_estimator(L.loglike, B=5000) L = lasso.gaussian(X, y, fw, 1., quadratic=Q, covariance_estimator=sandwich) L.fit() C = L.constraints S = L.summary('onesided', compute_intervals=True) S = L.summary('twosided') nt.assert_raises(ValueError, L.summary, 'none') print(L.active) yield (np.testing.assert_array_less, np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset)
def test_gaussian_sandwich_pvals(n=200, p=50, s=10, sigma=10, rho=0.3, snr=6., use_lasso_sd=False): X, y, beta, true_active, sigma = instance(n=n, p=p, s=s, sigma=sigma, rho=rho, snr=snr) heteroscedastic_error = sigma * np.random.standard_normal(n) * (np.fabs(X[:,-1]) + 0.5)**2 heteroscedastic_error += sigma * np.random.standard_normal(n) * (np.fabs(X[:,-2]) + 0.2)**2 heteroscedastic_error += sigma * np.random.standard_normal(n) * (np.fabs(X[:,-3]) + 0.5)**2 y += heteroscedastic_error # two different estimators of variance loss = rr.glm.gaussian(X, y) sandwich = glm_sandwich_estimator(loss, B=5000) # make sure things work with some unpenalized columns feature_weights = np.ones(p) * 3 * sigma feature_weights[10:12] = 0 # try using RSS from LASSO to estimate sigma if use_lasso_sd: L_prelim = lasso.gaussian(X, y, feature_weights) L_prelim.fit() beta_lasso = L_prelim.lasso_solution sigma_hat = np.linalg.norm(y - X.dot(beta_lasso))**2 / (n - len(L_prelim.active)) parametric = glm_parametric_estimator(loss, dispersion=sigma_hat**2) else: parametric = glm_parametric_estimator(loss, dispersion=None) L_P = lasso.gaussian(X, y, feature_weights, covariance_estimator=parametric) L_P.fit() if set(true_active).issubset(L_P.active): S = L_P.summary('twosided') P_P = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active] L_S = lasso.gaussian(X, y, feature_weights, covariance_estimator=sandwich) L_S.fit() S = L_S.summary('twosided') P_S = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active] return P_P, P_S, [v in true_active for v in S['variable']]