Example #1
0
def test_sqrt_lasso_pvals(n=100,
                          p=200,
                          s=7,
                          sigma=5,
                          rho=0.3,
                          snr=7.):

    X, y, beta, true_active, sigma = instance(n=n, 
                                         p=p, 
                                         s=s, 
                                         sigma=sigma, 
                                         rho=rho, 
                                         snr=snr)

    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n)
    Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

    weights_with_zeros = 0.7*lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    lasso.sqrt_lasso(X, y, weights_with_zeros, covariance='parametric')
    L = lasso.sqrt_lasso(X, y, weights_with_zeros)
    L.fit()
    if set(true_active).issubset(L.active):
        S = L.summary('onesided')
        S = L.summary('twosided')
        return S['pval'], [v in true_active for v in S['variable']]
Example #2
0
def test_sqrt_lasso_pvals(n=100, p=200, s=7, sigma=5, rho=0.3, snr=7.):

    counter = 0

    while True:
        counter += 1
        X, y, beta, active, sigma = instance(n=n,
                                             p=p,
                                             s=s,
                                             sigma=sigma,
                                             rho=rho,
                                             snr=snr)

        lam_theor = np.mean(
            np.fabs(np.dot(X.T, np.random.standard_normal(
                (n, 1000)))).max(0)) / np.sqrt(n)
        Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

        weights_with_zeros = 0.7 * lam_theor * np.ones(p)
        weights_with_zeros[:3] = 0.

        L = lasso.sqrt_lasso(X, y, weights_with_zeros)
        L.fit()
        v = {1: 'twosided', 0: 'onesided'}[counter % 2]
        if set(active).issubset(L.active):
            S = L.summary(v)
            return [
                p for p, v in zip(S['pval'], S['variable']) if v not in active
            ]
Example #3
0
def test_sqrt_lasso(n=100, p=20):

    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n, p))

    lam_theor = np.mean(
        np.fabs(np.dot(X.T, np.random.standard_normal(
            (n, 1000)))).max(0)) / np.sqrt(n)
    Q = rr.identity_quadratic(0.01, 0, np.random.standard_normal(p) / 5., 0)

    weights_with_zeros = 0.5 * lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    huge_weights = weights_with_zeros * 10000

    for q, fw in product([None, Q],
                         [0.5 * lam_theor, weights_with_zeros, huge_weights]):

        L = lasso.sqrt_lasso(X,
                             y,
                             fw,
                             quadratic=q,
                             solve_args={
                                 'min_its': 300,
                                 'tol': 1.e-12
                             })
        L.fit(solve_args={'min_its': 300, 'tol': 1.e-12})
        C = L.constraints

        S = L.summary('onesided', compute_intervals=True)
        S = L.summary('twosided')

        yield (np.testing.assert_array_less,
               np.dot(L.constraints.linear_part,
                      L.onestep_estimator), L.constraints.offset)
Example #4
0
def test_sqrt_lasso_pvals(n=100,
                          p=200,
                          s=7,
                          sigma=5,
                          rho=0.3,
                          snr=7.):

    counter = 0

    while True:
        counter += 1
        X, y, beta, active, sigma = instance(n=n, 
                                             p=p, 
                                             s=s, 
                                             sigma=sigma, 
                                             rho=rho, 
                                             snr=snr)

        lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n)
        Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

        weights_with_zeros = 0.7*lam_theor * np.ones(p)
        weights_with_zeros[:3] = 0.

        L = lasso.sqrt_lasso(X, y, weights_with_zeros)
        L.fit()
        v = {1:'twosided',
             0:'onesided'}[counter % 2]
        if set(active).issubset(L.active):
            S = L.summary(v)
            return [p for p, v in zip(S['pval'], S['variable']) if v not in active]
Example #5
0
def test_sqrt_lasso(n=100, p=20):

    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n,p))

    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n)
    Q = rr.identity_quadratic(0.01, 0, np.random.standard_normal(p) / 5., 0)

    weights_with_zeros = 0.5*lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    huge_weights = weights_with_zeros * 10000

    for q, fw in product([None, Q],
                         [0.5*lam_theor, weights_with_zeros, huge_weights]):

        L = lasso.sqrt_lasso(X, y, fw, quadratic=q, solve_args={'min_its':300, 'tol':1.e-12})
        L.fit(solve_args={'min_its':300, 'tol':1.e-12})
        C = L.constraints

        S = L.summary('onesided', compute_intervals=True)
        S = L.summary('twosided')

        yield (np.testing.assert_array_less,
               np.dot(L.constraints.linear_part, L.onestep_estimator),
               L.constraints.offset)
Example #6
0
def test_sqrt_lasso_sandwich_pvals(n=200,
                                   p=50,
                                   s=10,
                                   sigma=10,
                                   rho=0.3,
                                   signal=6.,
                                   use_lasso_sd=False):

    X, y, beta, true_active, sigma, _ = instance(n=n,
                                                 p=p,
                                                 s=s,
                                                 sigma=sigma,
                                                 rho=rho,
                                                 signal=signal)

    heteroscedastic_error = sigma * np.random.standard_normal(n) * (
        np.fabs(X[:, -1]) + 0.5)**2
    heteroscedastic_error += sigma * np.random.standard_normal(n) * (
        np.fabs(X[:, -2]) + 0.2)**2
    heteroscedastic_error += sigma * np.random.standard_normal(n) * (
        np.fabs(X[:, -3]) + 0.5)**2
    y += heteroscedastic_error

    feature_weights = np.ones(p) * choose_lambda(X)
    feature_weights[10:12] = 0

    L_SQ = lasso.sqrt_lasso(X, y, feature_weights, covariance='sandwich')
    L_SQ.fit()

    if set(true_active).issubset(L_SQ.active):
        S = L_SQ.summary('twosided')
        return S['pval'], [v in true_active for v in S['variable']]
def test_goodness_of_fit(n=20,
                         p=25,
                         s=10,
                         sigma=20.,
                         nsim=10,
                         burnin=2000,
                         ndraw=8000):
    P = []
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal(
            (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
        X /= (X.std(0)[None, :] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = .7 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit()
        pval = goodness_of_fit(L,
                               lambda x: np.max(np.fabs(x)),
                               burnin=burnin,
                               ndraw=ndraw)
        P.append(pval)
        Pa = np.array(P)
        Pa = Pa[~np.isnan(Pa)]
        if (~np.isnan(np.array(Pa))).sum() >= nsim:
            break

    return Pa, np.zeros_like(Pa, np.bool)
def sqrt_lasso(X, Y, kappa, q=0.2):

    toc = time.time()
    lam = choose_lambda(X)
    L = lasso.sqrt_lasso(X, Y, kappa * lam)
    L.fit()
    S = L.summary('onesided')
    tic = time.time()

    selected = sm.stats.multipletests(S['pval'], q, 'fdr_bh')[0]

    return {'method':[r'$\kappa=%0.2f' % kappa],
            'active':[S['variable']], 
            'active_signs':[L.active_signs], 
            'pval':[S['pval']], 
            'selected':[selected],
            'runtime':tic-toc}
def test_goodness_of_fit(n=20,
                         p=25,
                         s=10,
                         sigma=20.,
                         nsim=1000,
                         burnin=2000,
                         ndraw=8000):
    P = []
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal(
            (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
        X /= (X.std(0)[None, :] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = .7 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit()
        pval = goodness_of_fit(L,
                               lambda x: np.max(np.fabs(x)),
                               burnin=burnin,
                               ndraw=ndraw)
        P.append(pval)
        Pa = np.array(P)
        Pa = Pa[~np.isnan(Pa)]
        if (~np.isnan(np.array(Pa))).sum() >= nsim:
            break

    # make any plots not use display

    from matplotlib import use
    use('Agg')
    import matplotlib.pyplot as plt

    # used for ECDF

    import statsmodels.api as sm

    U = np.linspace(0, 1, 101)
    plt.plot(U, sm.distributions.ECDF(Pa)(U))
    plt.plot([0, 1], [0, 1])
    plt.savefig("goodness_of_fit_uniform", format="pdf")
def _generate_constraints(n=15, p=10, sigma=1):
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
        X /= (X.std(0)[None,:] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = 0.3 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit(solve_args={'tol':1.e-12, 'min_its':150})

        con = L.constraints
        if con is not None and L.active.shape[0] >= 3:
            break

    offset = con.offset
    linear_part = -L.active_signs[:,None] * np.linalg.pinv(X[:,L.active])
    con = AC.constraints(linear_part, offset)
    con.covariance = np.identity(con.covariance.shape[0])
    con.mean *= 0
    return con, y, L, X
def _generate_constraints(n=15, p=10, sigma=1):
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal(
            (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
        X /= (X.std(0)[None, :] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = 0.3 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit(solve_args={'tol': 1.e-12, 'min_its': 150})

        con = L.constraints
        if con is not None and L.active.shape[0] >= 3:
            break

    offset = con.offset
    linear_part = -L.active_signs[:, None] * np.linalg.pinv(X[:, L.active])
    con = AC.constraints(linear_part, offset)
    con.covariance = np.identity(con.covariance.shape[0])
    con.mean *= 0
    return con, y, L, X
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20.,
                         nsim=1000, burnin=2000, ndraw=8000):
    P = []
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
        X /= (X.std(0)[None,:] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = .7 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit()
        pval = goodness_of_fit(L, 
                               lambda x: np.max(np.fabs(x)),
                               burnin=burnin,
                               ndraw=ndraw)
        P.append(pval)
        Pa = np.array(P)
        Pa = Pa[~np.isnan(Pa)]
        if (~np.isnan(np.array(Pa))).sum() >= nsim:
            break

    # make any plots not use display

    from matplotlib import use
    use('Agg')
    import matplotlib.pyplot as plt

    # used for ECDF

    import statsmodels.api as sm

    U = np.linspace(0,1,101)
    plt.plot(U, sm.distributions.ECDF(Pa)(U))
    plt.plot([0,1], [0,1])
    plt.savefig("goodness_of_fit_uniform", format="pdf")
Example #13
0
 def method_instance(self):
     if not hasattr(self, "_method_instance"):
         self._method_instance = lasso.sqrt_lasso(self.X, self.Y, self.lagrange)
     return self._method_instance