def test_skinny_fat():

    X, Y = instance()[:2]
    n, p = X.shape
    lam = choose_lambda(X)
    obj1 = sqlasso_objective(X, Y)
    obj2 = sqlasso_objective_skinny(X, Y)
    soln1 = solve_sqrt_lasso_fat(X,
                                 Y,
                                 weights=np.ones(p) * lam,
                                 solve_args={'min_its': 500})[0]
    soln2 = solve_sqrt_lasso_skinny(X,
                                    Y,
                                    weights=np.ones(p) * lam,
                                    solve_args={'min_its': 500})[0]

    np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)

    X, Y = instance(p=50)[:2]
    n, p = X.shape
    lam = choose_lambda(X)
    obj1 = sqlasso_objective(X, Y)
    obj2 = sqlasso_objective_skinny(X, Y)
    soln1 = solve_sqrt_lasso_fat(X,
                                 Y,
                                 weights=np.ones(p) * lam,
                                 solve_args={'min_its': 500})[0]
    soln2 = solve_sqrt_lasso_skinny(X,
                                    Y,
                                    weights=np.ones(p) * lam,
                                    solve_args={'min_its': 500})[0]

    np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
Example #2
0
def test_gaussian_approx(n=100,p=200,s=10):
    """
    using gaussian approximation for pvalues
    """
    sigma = 3
    y = np.random.standard_normal(n) * sigma
    beta = np.zeros(p)
    #beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1)
    beta[:s] = 18 
    X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    y += np.dot(X, beta)
    lam_theor = choose_lambda(X, quantile=0.75)
    L = sqrt_lasso(y, X, lam_theor)
    L.fit(tol=1.e-10, min_its=80)

    P = []
    P_gaussian = []
    intervals = []
    if L.active.shape[0] > 0:

        np.testing.assert_array_less( \
            np.dot(L.constraints.linear_part, L.y),
            L.constraints.offset)

        if set(range(s)).issubset(L.active):
            P = [p[1] for p in L.active_pvalues[s:]]
            P_gaussian = [p[1] for p in L.active_gaussian_pval[s:]]
            intervals = [u for u in L.active_gaussian_intervals if u[0] in range(s)]
    return P, P_gaussian, intervals, beta
def test_goodness_of_fit(n=20,
                         p=25,
                         s=10,
                         sigma=20.,
                         nsim=10,
                         burnin=2000,
                         ndraw=8000):
    P = []
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal(
            (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
        X /= (X.std(0)[None, :] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = .7 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit()
        pval = goodness_of_fit(L,
                               lambda x: np.max(np.fabs(x)),
                               burnin=burnin,
                               ndraw=ndraw)
        P.append(pval)
        Pa = np.array(P)
        Pa = Pa[~np.isnan(Pa)]
        if (~np.isnan(np.array(Pa))).sum() >= nsim:
            break

    return Pa, np.zeros_like(Pa, np.bool)
Example #4
0
def test_sqrt_lasso_sandwich_pvals(n=200,
                                   p=50,
                                   s=10,
                                   sigma=10,
                                   rho=0.3,
                                   signal=6.,
                                   use_lasso_sd=False):

    X, y, beta, true_active, sigma, _ = instance(n=n,
                                                 p=p,
                                                 s=s,
                                                 sigma=sigma,
                                                 rho=rho,
                                                 signal=signal)

    heteroscedastic_error = sigma * np.random.standard_normal(n) * (
        np.fabs(X[:, -1]) + 0.5)**2
    heteroscedastic_error += sigma * np.random.standard_normal(n) * (
        np.fabs(X[:, -2]) + 0.2)**2
    heteroscedastic_error += sigma * np.random.standard_normal(n) * (
        np.fabs(X[:, -3]) + 0.5)**2
    y += heteroscedastic_error

    feature_weights = np.ones(p) * choose_lambda(X)
    feature_weights[10:12] = 0

    L_SQ = lasso.sqrt_lasso(X, y, feature_weights, covariance='sandwich')
    L_SQ.fit()

    if set(true_active).issubset(L_SQ.active):
        S = L_SQ.summary('twosided')
        return S['pval'], [v in true_active for v in S['variable']]
def test_class(n=20, p=40, s=2):
    y = np.random.standard_normal(n) * 1.2
    beta = np.zeros(p)
    beta[:s] = 5
    X = np.random.standard_normal(
        (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
    y += np.dot(X, beta)
    lam_theor = 0.7 * choose_lambda(X, quantile=0.9)
    L = sqrt_lasso(y, X, lam_theor)
    L.fit(tol=1.e-10, min_its=80)
    P = []
    if L.active.shape[0] > 0:

        np.testing.assert_array_less( \
            np.dot(L.constraints.linear_part, L.y),
            L.constraints.offset)

        nt.assert_true(L.constraints(y))
        nt.assert_true(L.quasi_affine_constraints(y))

        if set(range(s)).issubset(L.active):
            P = [p[1] for p in L.active_pvalues[s:]]
        else:
            P = []
    return P
Example #6
0
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20.,
                         nsample=1000):
    P = []
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
        X /= (X.std(0)[None,:] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = .7 * choose_lambda(X, quantile=0.9)
        L = sqrt_lasso(y, X, lam_theor)
        L.fit(tol=1.e-12, min_its=150, max_its=200)

        pval = L.goodness_of_fit(lambda x: np.max(np.fabs(x)),
                                 burnin=10000,
                                 ndraw=10000)
        P.append(pval)
        Pa = np.array(P)
        Pa = Pa[~np.isnan(Pa)]
        #print (~np.isnan(np.array(Pa))).sum()
        if (~np.isnan(np.array(Pa))).sum() >= nsample:
            break
        #print np.mean(Pa), np.std(Pa)

    U = np.linspace(0,1,nsample+1)
    plt.plot(U, sm.distributions.ECDF(Pa)(U))
    plt.plot([0,1], [0,1])
    plt.savefig("goodness_of_fit_uniform", format="pdf")
Example #7
0
 def method_instance(self):
     if not hasattr(self, "_method_instance"):
         n, p = self.X.shape
         lagrange = np.ones(p) * choose_lambda(self.X) * self.kappa
         self._method_instance = random_lasso_method.gaussian(self.X,
                                                              self.Y,
                                                              lagrange,
                                                              randomizer_scale=self.randomizer_scale * np.std(self.Y))
     return self._method_instance
def test_skinny_fat():

    X, Y = instance()[:2]
    n, p = X.shape
    lam = choose_lambda(X)
    obj1 = sqlasso_objective(X, Y)
    obj2 = sqlasso_objective_skinny(X, Y)
    soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
    soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]

    np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)

    X, Y = instance(p=50)[:2]
    n, p = X.shape
    lam = choose_lambda(X)
    obj1 = sqlasso_objective(X, Y)
    obj2 = sqlasso_objective_skinny(X, Y)
    soln1 = solve_sqrt_lasso_fat(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]
    soln2 = solve_sqrt_lasso_skinny(X, Y, weights=np.ones(p) * lam, solve_args={'min_its':500})[0]

    np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
def test_skinny_fat():

    X, Y = instance()[:2]
    n, p = X.shape
    lam = SQ.choose_lambda(X)
    obj1 = SQ.sqlasso_objective(X, Y)
    obj2 = SQ.sqlasso_objective_skinny(X, Y)
    soln1 = SQ.solve_sqrt_lasso_fat(X, Y, min_its=500, weights=np.ones(p) * lam)
    soln2 = SQ.solve_sqrt_lasso_skinny(X, Y, min_its=500, weights=np.ones(p) * lam)

    np.testing.assert_almost_equal(soln1, soln2)

    X, Y = instance(p=50)[:2]
    n, p = X.shape
    lam = SQ.choose_lambda(X)
    obj1 = SQ.sqlasso_objective(X, Y)
    obj2 = SQ.sqlasso_objective_skinny(X, Y)
    soln1 = SQ.solve_sqrt_lasso_fat(X, Y, min_its=500, weights=np.ones(p) * lam)
    soln2 = SQ.solve_sqrt_lasso_skinny(X, Y, min_its=500, weights=np.ones(p) * lam)

    np.testing.assert_almost_equal(soln1, soln2)
Example #10
0
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.):
    """
    Check equivalent LASSO and sqrtLASSO solutions.
    """

    Y = np.random.standard_normal(n) * sigma
    beta = np.zeros(p)
    beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s, )) - 1)
    X = np.random.standard_normal(
        (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
    X /= (X.std(0)[None, :] * np.sqrt(n))
    Y += np.dot(X, beta) * sigma
    lam_theor = choose_lambda(X, quantile=0.9)

    weights = lam_theor * np.ones(p)
    weights[:3] = 0.
    soln1, loss1 = solve_sqrt_lasso(X,
                                    Y,
                                    weights=weights,
                                    quadratic=None,
                                    solve_args={
                                        'min_its': 500,
                                        'tol': 1.e-10
                                    })

    G1 = loss1.smooth_objective(soln1, 'grad')

    # find active set, and estimate of sigma

    active = (soln1 != 0)
    nactive = active.sum()
    subgrad = np.sign(soln1[active]) * weights[active]
    X_E = X[:, active]
    X_Ei = np.linalg.pinv(X_E)
    sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)

    multiplier = sigma_E * np.sqrt(
        (n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))

    # XXX how should quadratic be changed?
    # multiply everything by sigma_E?

    loss2 = rr.glm.gaussian(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=multiplier)
    problem = rr.simple_problem(loss2, penalty)

    soln2 = problem.solve(tol=1.e-12, min_its=200)
    G2 = loss2.smooth_objective(soln2, 'grad') / multiplier

    np.testing.assert_allclose(G1[3:], G2[3:])
    np.testing.assert_allclose(soln1, soln2)
Example #11
0
def test_class_R(n=100, p=20):
    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n,p))
    lam_theor = choose_lambda(X, quantile=0.25)
    L = sqrt_lasso(y,X,lam_theor)
    L.fit(tol=1.e-7)

    if L.active.shape[0] > 0:
        np.testing.assert_array_less( \
            np.dot(L.constraints.linear_part, L.y),
            L.constraints.offset)

        return L.active_constraints.linear_part, L.active_constraints.offset / L.sigma_E, L.R_E, L._XEinv[0]
    else:
        return None, None, None, None
Example #12
0
def test_estimate_sigma(n=200, p=400, s=10, sigma=3.):
    y = np.random.standard_normal(n) * sigma
    beta = np.zeros(p)
    beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1)
    X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    y += np.dot(X, beta) * sigma
    lam_theor = choose_lambda(X, quantile=0.9)
    L = sqrt_lasso(y, X, lam_theor)
    L.fit(tol=1.e-12, min_its=150)
    P = []

    if L.active.shape[0] > 0:

        return L.sigma_hat / sigma, L.sigma_E / sigma, L.df_E
    else:
        return (None,) * 3
def sqrt_lasso(X, Y, kappa, q=0.2):

    toc = time.time()
    lam = choose_lambda(X)
    L = lasso.sqrt_lasso(X, Y, kappa * lam)
    L.fit()
    S = L.summary('onesided')
    tic = time.time()

    selected = sm.stats.multipletests(S['pval'], q, 'fdr_bh')[0]

    return {'method':[r'$\kappa=%0.2f' % kappa],
            'active':[S['variable']], 
            'active_signs':[L.active_signs], 
            'pval':[S['pval']], 
            'selected':[selected],
            'runtime':tic-toc}
def _generate_constraints(n=15, p=20, sigma=1):
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
        X /= (X.std(0)[None,:] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = 0.3 * choose_lambda(X, quantile=0.9)
        L = sqrt_lasso(y, X, lam_theor)
        L.fit(tol=1.e-12, min_its=150)

        con = L.active_constraints
        if con is not None and L.active.shape[0] >= 3:
            break
    con.covariance = np.identity(con.covariance.shape[0])
    con.mean *= 0
    return con, y, L
Example #15
0
def _generate_constraints(n=15, p=20, sigma=1):
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal(
            (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
        X /= (X.std(0)[None, :] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = 0.3 * choose_lambda(X, quantile=0.9)
        L = sqrt_lasso(y, X, lam_theor)
        L.fit(tol=1.e-12, min_its=150)

        con = L.active_constraints
        if con is not None and L.active.shape[0] >= 3:
            break
    con.covariance = np.identity(con.covariance.shape[0])
    con.mean *= 0
    return con, y, L
Example #16
0
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.):

    """
    Check equivalent LASSO and sqrtLASSO solutions.
    """

    Y = np.random.standard_normal(n) * sigma
    beta = np.zeros(p)
    beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1)
    X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    Y += np.dot(X, beta) * sigma
    lam_theor = choose_lambda(X, quantile=0.9)

    weights = lam_theor*np.ones(p)
    weights[:3] = 0.
    soln1, loss1 = solve_sqrt_lasso(X, Y, weights=weights, quadratic=None, solve_args={'min_its':500, 'tol':1.e-10})

    G1 = loss1.smooth_objective(soln1, 'grad') 

    # find active set, and estimate of sigma                                                                                                                          

    active = (soln1 != 0)
    nactive = active.sum()
    subgrad = np.sign(soln1[active]) * weights[active]
    X_E = X[:,active]
    X_Ei = np.linalg.pinv(X_E)
    sigma_E= np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)

    multiplier = sigma_E * np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))

    # XXX how should quadratic be changed?                                                                                                                            
    # multiply everything by sigma_E?                                                                                                                                 

    loss2 = rr.glm.gaussian(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=multiplier)
    problem = rr.simple_problem(loss2, penalty)

    soln2 = problem.solve(tol=1.e-12, min_its=200)
    G2 = loss2.smooth_objective(soln2, 'grad') / multiplier

    np.testing.assert_allclose(G1[3:], G2[3:])
    np.testing.assert_allclose(soln1, soln2)
def test_goodness_of_fit(n=20,
                         p=25,
                         s=10,
                         sigma=20.,
                         nsim=1000,
                         burnin=2000,
                         ndraw=8000):
    P = []
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal(
            (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
        X /= (X.std(0)[None, :] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = .7 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit()
        pval = goodness_of_fit(L,
                               lambda x: np.max(np.fabs(x)),
                               burnin=burnin,
                               ndraw=ndraw)
        P.append(pval)
        Pa = np.array(P)
        Pa = Pa[~np.isnan(Pa)]
        if (~np.isnan(np.array(Pa))).sum() >= nsim:
            break

    # make any plots not use display

    from matplotlib import use
    use('Agg')
    import matplotlib.pyplot as plt

    # used for ECDF

    import statsmodels.api as sm

    U = np.linspace(0, 1, 101)
    plt.plot(U, sm.distributions.ECDF(Pa)(U))
    plt.plot([0, 1], [0, 1])
    plt.savefig("goodness_of_fit_uniform", format="pdf")
def _generate_constraints(n=15, p=10, sigma=1):
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
        X /= (X.std(0)[None,:] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = 0.3 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit(solve_args={'tol':1.e-12, 'min_its':150})

        con = L.constraints
        if con is not None and L.active.shape[0] >= 3:
            break

    offset = con.offset
    linear_part = -L.active_signs[:,None] * np.linalg.pinv(X[:,L.active])
    con = AC.constraints(linear_part, offset)
    con.covariance = np.identity(con.covariance.shape[0])
    con.mean *= 0
    return con, y, L, X
def _generate_constraints(n=15, p=10, sigma=1):
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal(
            (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
        X /= (X.std(0)[None, :] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = 0.3 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit(solve_args={'tol': 1.e-12, 'min_its': 150})

        con = L.constraints
        if con is not None and L.active.shape[0] >= 3:
            break

    offset = con.offset
    linear_part = -L.active_signs[:, None] * np.linalg.pinv(X[:, L.active])
    con = AC.constraints(linear_part, offset)
    con.covariance = np.identity(con.covariance.shape[0])
    con.mean *= 0
    return con, y, L, X
def test_goodness_of_fit(n=20, p=25, s=10, sigma=20.,
                         nsim=1000, burnin=2000, ndraw=8000):
    P = []
    while True:
        y = np.random.standard_normal(n) * sigma
        beta = np.zeros(p)
        X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
        X /= (X.std(0)[None,:] * np.sqrt(n))
        y += np.dot(X, beta) * sigma
        lam_theor = .7 * choose_lambda(X, quantile=0.9)
        L = lasso.sqrt_lasso(X, y, lam_theor)
        L.fit()
        pval = goodness_of_fit(L, 
                               lambda x: np.max(np.fabs(x)),
                               burnin=burnin,
                               ndraw=ndraw)
        P.append(pval)
        Pa = np.array(P)
        Pa = Pa[~np.isnan(Pa)]
        if (~np.isnan(np.array(Pa))).sum() >= nsim:
            break

    # make any plots not use display

    from matplotlib import use
    use('Agg')
    import matplotlib.pyplot as plt

    # used for ECDF

    import statsmodels.api as sm

    U = np.linspace(0,1,101)
    plt.plot(U, sm.distributions.ECDF(Pa)(U))
    plt.plot([0,1], [0,1])
    plt.savefig("goodness_of_fit_uniform", format="pdf")
def test_class(n=20, p=40, s=2):
    y = np.random.standard_normal(n) * 1.2
    beta = np.zeros(p)
    beta[:s] = 5
    X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
    y += np.dot(X, beta)
    lam_theor = 0.7 * choose_lambda(X, quantile=0.9)
    L = sqrt_lasso(y,X,lam_theor)
    L.fit(tol=1.e-10, min_its=80)
    P = []
    if L.active.shape[0] > 0:

        np.testing.assert_array_less( \
            np.dot(L.constraints.linear_part, L.y),
            L.constraints.offset)

        nt.assert_true(L.constraints(y))
        nt.assert_true(L.quasi_affine_constraints(y))

        if set(range(s)).issubset(L.active):
            P = [p[1] for p in L.active_pvalues[s:]]
        else:
            P = []
    return P
Example #22
0
    def __init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid):

        parametric_method.__init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid)
        self.lagrange = self.kappa * choose_lambda(X)