Ejemplo n.º 1
0
def test_gaussian(n=100, p=20):

    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n,p))

    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0))
    Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

    weights_with_zeros = 0.5*lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    huge_weights = weights_with_zeros * 10000

    for q, fw in product([Q, None],
                         [0.5*lam_theor, weights_with_zeros, huge_weights]):

        L = lasso.gaussian(X, y, fw, 1., quadratic=Q)
        L.fit()
        C = L.constraints

        sandwich = glm_sandwich_estimator(L.loglike, B=5000)
        L = lasso.gaussian(X, y, fw, 1., quadratic=Q, covariance_estimator=sandwich)
        L.fit()
        C = L.constraints

        S = L.summary('onesided', compute_intervals=True)
        S = L.summary('twosided')

        nt.assert_raises(ValueError, L.summary, 'none')
        print(L.active)
        yield (np.testing.assert_array_less,
               np.dot(L.constraints.linear_part, L.onestep_estimator),
               L.constraints.offset)
Ejemplo n.º 2
0
def test_gaussian_sandwich_pvals(n=200,
                                 p=50,
                                 s=10,
                                 sigma=10,
                                 rho=0.3,
                                 snr=6.,
                                 use_lasso_sd=False):

    X, y, beta, true_active, sigma = instance(n=n, 
                                         p=p, 
                                         s=s, 
                                         sigma=sigma, 
                                         rho=rho, 
                                         snr=snr)

    heteroscedastic_error = sigma * np.random.standard_normal(n) * (np.fabs(X[:,-1]) + 0.5)**2
    heteroscedastic_error += sigma * np.random.standard_normal(n) * (np.fabs(X[:,-2]) + 0.2)**2
    heteroscedastic_error += sigma * np.random.standard_normal(n) * (np.fabs(X[:,-3]) + 0.5)**2
    y += heteroscedastic_error

    # two different estimators of variance
    loss = rr.glm.gaussian(X, y)
    sandwich = glm_sandwich_estimator(loss, B=5000)


    # make sure things work with some unpenalized columns

    feature_weights = np.ones(p) * 3 * sigma
    feature_weights[10:12] = 0

    # try using RSS from LASSO to estimate sigma 

    if use_lasso_sd:
        L_prelim = lasso.gaussian(X, y, feature_weights)
        L_prelim.fit()
        beta_lasso = L_prelim.lasso_solution
        sigma_hat = np.linalg.norm(y - X.dot(beta_lasso))**2 / (n - len(L_prelim.active))
        parametric = glm_parametric_estimator(loss, dispersion=sigma_hat**2)
    else:
        parametric = glm_parametric_estimator(loss, dispersion=None)

    L_P = lasso.gaussian(X, y, feature_weights, covariance_estimator=parametric)
    L_P.fit()

    if set(true_active).issubset(L_P.active):

        S = L_P.summary('twosided')
        P_P = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active]

        L_S = lasso.gaussian(X, y, feature_weights, covariance_estimator=sandwich)
        L_S.fit()

        S = L_S.summary('twosided')
        P_S = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active]

        return P_P, P_S, [v in true_active for v in S['variable']]