Beispiel #1
0
def test_l1prox_bound():
    '''
    this test verifies that the l1 prox in bound form can be solved
    by a primal/dual specification 

    obviously, we don't to solve the l1 prox this way,
    but it verifies that specification is working correctly

    '''

    l1 = rr.l1norm(4, bound=2.)
    ww = np.random.standard_normal(4)*2
    ab = l1.proximal(rr.identity_quadratic(0.5, ww, 0, 0))

    l1c = copy(l1)
    l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.)
    a = rr.simple_problem.nonsmooth(l1c)
    solver = rr.FISTA(a)
    solver.fit(min_its=100)

    l1c = copy(l1)
    l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.)
    a = rr.dual_problem.fromprimal(l1c)
    solver = rr.FISTA(a)
    solver.fit(min_its=100)

    ac = a.primal

    np.testing.assert_allclose(ac + 0.1, ab + 0.1, rtol=1.e-4)
def test_group_lasso_atom():


    ps = np.array([0]*5 + [3]*3)
    weights = {3:2., 0:2.3}

    lagrange = 1.5
    lipschitz = 0.2
    p = gl.group_lasso(ps, weights=weights, lagrange=lagrange)
    z = 30 * np.random.standard_normal(8)
    q = rr.identity_quadratic(lipschitz, z, 0, 0)

    x = p.solve(q)
    a = ml.mixed_lasso_lagrange_prox(z, lagrange, lipschitz, 
                                     np.array([],np.int), 
                                     np.array([],np.int), 
                                     np.array([], np.int), 
                                     np.array([], np.int), 
                                     np.array([0,0,0,0,0,1,1,1]), np.array([np.sqrt(5), 2]))

    result = np.zeros_like(a)
    result[:5] = z[:5] / np.linalg.norm(z[:5]) * max(np.linalg.norm(z[:5]) - weights[0] * lagrange/lipschitz, 0)
    result[5:] = z[5:] / np.linalg.norm(z[5:]) * max(np.linalg.norm(z[5:]) - weights[3] * lagrange/lipschitz, 0)

    lipschitz = 1.
    q = rr.identity_quadratic(lipschitz, z, 0, 0)
    x2 = p.solve(q)
    pc = p.conjugate
    a2 = pc.solve(q)

    np.testing.assert_allclose(z-a2, x2)
Beispiel #3
0
def test_l1prox():
    '''
    this test verifies that the l1 prox in lagrange form can be solved
    by a primal/dual specification 

    obviously, we don't to solve the l1 prox this way,
    but it verifies that specification is working correctly

    '''

    l1 = rr.l1norm(4, lagrange=0.3)
    ww = np.random.standard_normal(4)*3
    ab = l1.proximal(rr.identity_quadratic(0.5, ww, 0,0))

    l1c = copy(l1)
    l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.)
    a = rr.simple_problem.nonsmooth(l1c)
    solver = rr.FISTA(a)
    solver.fit(tol=1.e-10)

    ad = a.coefs

    l1c = copy(l1)
    l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.)
    a = rr.dual_problem.fromprimal(l1c)
    solver = rr.FISTA(a)
    solver.fit(tol=1.0e-14)

    ac = a.primal

    np.testing.assert_allclose(ac, ab, rtol=1.0e-4)
    np.testing.assert_allclose(ac, ad, rtol=1.0e-4)
Beispiel #4
0
    def test_simple_problem(self):
        tests = []
        atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L
        loss = self.loss

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, FISTA=self.FISTA, coef_stop=self.coef_stop, min_its=100)

        tests.append((atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem with monotonicity\n %s' % str(self)))

        # write the loss in terms of a quadratic for the smooth loss and a smooth function...

        q = rr.identity_quadratic(L, prox_center, 0, 0)
        lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6*L)
        lossq.quadratic = rr.identity_quadratic(0.4*L, prox_center.copy(), 0, 0)
        problem = rr.simple_problem(lossq, atom)

        tests.append((atom.proximal(q), 
              problem.solve(coef_stop=self.coef_stop, 
                            FISTA=self.FISTA, 
                            tol=1.0e-12), 
               'solving prox with simple_problem ' +
               'with monotonicity  but loss has identity_quadratic %s\n ' % str(self)))

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, monotonicity_restart=False,
                   coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100)

        tests.append((atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem no monotonicity_restart\n %s' % str(self)))

        d = atom.conjugate
        problem = rr.simple_problem(loss, d)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, monotonicity_restart=False, 
                   coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100)
        tests.append((d.proximal(q), problem.solve(tol=1.e-12,
                                                FISTA=self.FISTA,
                                                coef_stop=self.coef_stop,
                                                monotonicity_restart=False), 
               'solving dual prox with simple_problem no monotonocity\n %s ' % str(self)))

        if not self.interactive:
            for test in tests:
                yield (all_close,) + test + (self,)
        else:
            for test in tests:
                yield all_close(*((test + (self,))))
def test_adding_quadratic_lasso():

    X, y, beta, active, sigma = instance(n=300, p=200)
    Q = rr.identity_quadratic(0.01, 0, np.random.standard_normal(X.shape[1]), 0)

    L1 = lasso.gaussian(X, y, 20, quadratic=Q)
    beta1 = L1.fit(solve_args={'min_its':500, 'tol':1.e-12})
    G1 = X[:,L1.active].T.dot(X.dot(beta1) - y) + Q.objective(beta1,'grad')[L1.active]
    np.testing.assert_allclose(G1 * np.sign(beta1[L1.active]), -20)

    lin = rr.identity_quadratic(0.0, 0, np.random.standard_normal(X.shape[1]), 0)
    L2 = lasso.gaussian(X, y, 20, quadratic=lin)
    beta2 = L2.fit(solve_args={'min_its':500, 'tol':1.e-12})
    G2 = X[:,L2.active].T.dot(X.dot(beta2) - y) + lin.objective(beta2,'grad')[L2.active]
    np.testing.assert_allclose(G2 * np.sign(beta2[L2.active]), -20)
Beispiel #6
0
def test_conjugate_sqerror():
    """
    This verifies the conjugate class can compute the conjugate
    of a quadratic function.
    """

    ridge_coef = 0.4

    X = np.random.standard_normal((10,4))
    Y = np.random.standard_normal(10)
    l = rr.squared_error(X, Y)

    q = rr.identity_quadratic(ridge_coef,0,0,0)
    atom_conj = rr.conjugate(l, q, tol=1.e-12, min_its=100)
    w = np.random.standard_normal(4)
    u11, u12 = atom_conj.smooth_objective(w)

    # check that objective is half of squared error
    np.testing.assert_allclose(l.smooth_objective(w, mode='func'), 0.5 * np.linalg.norm(Y - np.dot(X, w))**2)
    np.testing.assert_allclose(atom_conj.atom.smooth_objective(w, mode='func'), 0.5 * np.linalg.norm(Y - np.dot(X, w))**2)

    XTX = np.dot(X.T, X) 
    XTXi = np.linalg.pinv(XTX)

    quadratic_term = XTX + ridge_coef * np.identity(4)
    linear_term = np.dot(X.T, Y) + w
    b = u22 = np.linalg.solve(quadratic_term, linear_term)
    u21 = (w*u12).sum() - l.smooth_objective(u12, mode='func') - q.objective(u12, mode='func')
    np.testing.assert_allclose(u12, u22, rtol=1.0e-05)
    np.testing.assert_approx_equal(u11, u21)
def test_gaussian(n=100, p=20):

    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n,p))

    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0))
    Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

    weights_with_zeros = 0.5*lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    huge_weights = weights_with_zeros * 10000

    for q, fw in product([Q, None],
                         [0.5*lam_theor, weights_with_zeros, huge_weights]):

        L = lasso.gaussian(X, y, fw, 1., quadratic=Q)
        L.fit()
        C = L.constraints

        sandwich = gaussian_sandwich_estimator(X, y)
        L = lasso.gaussian(X, y, fw, 1., quadratic=Q, covariance_estimator=sandwich)
        L.fit()
        C = L.constraints

        S = L.summary('onesided', compute_intervals=True)
        S = L.summary('twosided')

        nt.assert_raises(ValueError, L.summary, 'none')
        print(L.active)
        yield (np.testing.assert_array_less,
               np.dot(L.constraints.linear_part, L.onestep_estimator),
               L.constraints.offset)
Beispiel #8
0
def test_conjugate_l1norm():
    '''
    this test verifies that numerically computing the conjugate
    is essentially the same as using the smooth_conjugate
    of the atom
    '''

    q = rr.identity_quadratic(1.2,0,0,0)
    l1 = rr.l1norm(4, lagrange=0.3)
    pen2 = copy(l1)
    pen2.set_quadratic(q)

    v1 = rr.smooth_conjugate(l1, q)
    v2 = rr.conjugate(l1, q, tol=1.e-12, min_its=100)
    v3 = rr.conjugate(pen2, None, tol=1.e-12, min_its=100)
    w = np.random.standard_normal(4)

    u11, u12 = v1.smooth_objective(w)
    u21, u22 = v2.smooth_objective(w)
    u31, u32 = v3.smooth_objective(w)
    np.testing.assert_approx_equal(u11, u21)
    np.testing.assert_allclose(u12, u22, rtol=1.0e-05)
    np.testing.assert_approx_equal(u11, u31)
    np.testing.assert_allclose(u12, u32, rtol=1.0e-05)

    v2.smooth_objective(w, mode='func')
    v2.smooth_objective(w, mode='grad')
    nt.assert_raises(ValueError, v2.smooth_objective, w, 'blah')
def test_sqrt_lasso(n=100, p=20):

    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n,p))

    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n)
    Q = rr.identity_quadratic(0.01, 0, np.random.standard_normal(p) / 5., 0)

    weights_with_zeros = 0.5*lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    huge_weights = weights_with_zeros * 10000

    for q, fw in product([None, Q],
                         [0.5*lam_theor, weights_with_zeros, huge_weights]):

        L = lasso.sqrt_lasso(X, y, fw, quadratic=q, solve_args={'min_its':300, 'tol':1.e-12})
        L.fit(solve_args={'min_its':300, 'tol':1.e-12})
        C = L.constraints

        S = L.summary('onesided', compute_intervals=True)
        S = L.summary('twosided')

        yield (np.testing.assert_array_less,
               np.dot(L.constraints.linear_part, L.onestep_estimator),
               L.constraints.offset)
    def __iter__(self):
        for offset, FISTA, coef_stop, L, q, groups in itertools.product(self.offset_choices,
                                                                        self.FISTA_choices,
                                                                        self.coef_stop_choices,
                                                                        self.L_choices,
                                                                        self.quadratic_choices,
                                                                        self.group_choices):
            self.FISTA = FISTA
            self.coef_stop = coef_stop
            self.L = L

            if self.mode == 'lagrange':
                atom = self.klass(groups, lagrange=self.lagrange)
            else:
                atom = self.klass(groups, bound=self.bound)

            if q: 
                atom.quadratic = rr.identity_quadratic(0,0,np.random.standard_normal(atom.shape)*0.02)

            if offset:
                atom.offset = 0.02 * np.random.standard_normal(atom.shape)

            solver = Solver(atom, interactive=self.interactive, 
                            coef_stop=coef_stop,
                            FISTA=FISTA,
                            L=L)
            yield solver
Beispiel #11
0
def test_proximal_maps():
    bound = 0.14
    lagrange = 0.13
    shape = 20

    Z = np.random.standard_normal(shape) * 4
    W = 0.02 * np.random.standard_normal(shape)
    U = 0.02 * np.random.standard_normal(shape)
    linq = rr.identity_quadratic(0, 0, W, 0)

    for L, atom, q, offset, FISTA, coef_stop in itertools.product(
        [0.5, 1, 0.1],
        [A.l1norm, A.supnorm, A.l2norm, A.positive_part, A.constrained_max],
        [None, linq],
        [None, U],
        [False, True],
        [True, False],
    ):

        p = atom(shape, lagrange=lagrange, quadratic=q, offset=offset)
        d = p.conjugate
        yield ac, p.lagrange_prox(Z, lipschitz=L), Z - d.bound_prox(
            Z * L, lipschitz=1.0 / L
        ) / L, "testing lagrange_prox and bound_prox starting from atom %s " % atom
        # some arguments of the constructor

        nt.assert_raises(AttributeError, setattr, p, "bound", 4.0)
        nt.assert_raises(AttributeError, setattr, d, "lagrange", 4.0)

        nt.assert_raises(AttributeError, setattr, p, "bound", 4.0)
        nt.assert_raises(AttributeError, setattr, d, "lagrange", 4.0)

        for t in solveit(p, Z, W, U, linq, L, FISTA, coef_stop):
            yield t

        b = atom(shape, bound=bound, quadratic=q, offset=offset)

        for t in solveit(b, Z, W, U, linq, L, FISTA, coef_stop):
            yield t

    lagrange = 0.1
    for L, atom, q, offset, FISTA, coef_stop in itertools.product(
        [0.5, 1, 0.1], sorted(A.nonpaired_atoms), [None, linq], [None, U], [False, True], [False, True]
    ):

        p = atom(shape, lagrange=lagrange, quadratic=q, offset=offset)
        d = p.conjugate
        yield ac, p.lagrange_prox(Z, lipschitz=L), Z - d.bound_prox(
            Z * L, lipschitz=1.0 / L
        ) / L, "testing lagrange_prox and bound_prox starting from atom %s " % atom
        # some arguments of the constructor

        nt.assert_raises(AttributeError, setattr, p, "bound", 4.0)
        nt.assert_raises(AttributeError, setattr, d, "lagrange", 4.0)

        nt.assert_raises(AttributeError, setattr, p, "bound", 4.0)
        nt.assert_raises(AttributeError, setattr, d, "lagrange", 4.0)

        for t in solveit(p, Z, W, U, linq, L, FISTA, coef_stop):
            yield t
    def step_valid(self,
                   max_trials=10):
        """
        Try and move Y_valid
        by accept reject stopping after `max_trials`.
        """

        X, L, mults = self.X, self.L, self.mults
        n, p = X.shape

        count = 0
        Q_old = self.Q_valid

        while True:
            count += 1
            self.Q_valid = self.Q_inter + identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * 
                                                             self.scale_valid, 0) 

            if len(self.mults) > 0:
                proposal_value = self.choose_lambda(self.Y,
                                                    shift_size=0)

                if proposal_value[0] in self.accept_values:
                    break
            else:
                break

            if count >= max_trials:
                self.Q_valid = Q_old
                break
Beispiel #13
0
    def __init__(self, loss, 
                 linear_randomization,
                 quadratic_coef,
                 randomization, 
                 penalty,
                 solve_args={'tol':1.e-10, 'min_its':100, 'max_its':500}):

        (self.loss,
         self.linear_randomization,
         self.randomization,
         self.quadratic_coef) = (loss,
                                 linear_randomization,
                                 randomization,
                                 quadratic_coef)

        # initialize optimization problem

        self.penalty = penalty
        self.problem = rr.simple_problem(loss, penalty)

        random_term = rr.identity_quadratic(
                                quadratic_coef, 0, 
                                self.linear_randomization, 0)

        self.initial_soln = self.problem.solve(random_term,
                                               **solve_args)
        self.initial_grad = self.loss.smooth_objective(self.initial_soln, 
                                                       mode='grad')
        self.opt_vars = self.penalty.setup_sampling( \
            self.initial_grad,
            self.initial_soln,
            self.linear_randomization,
            self.quadratic_coef)
Beispiel #14
0
def test_gaussian(n=100, p=20):

    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n,p))

    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0))
    Q = identity_quadratic(0.01, 0, np.ones(p), 0)

    weights_with_zeros = 0.1 * np.ones(p)
    weights_with_zeros[:3] = 0.

    for q, fw in product([Q, None],
                         [0.5*lam_theor, weights_with_zeros]):

        L = lasso.gaussian(X, y, fw, 1., quadratic=Q)
        L.fit()
        C = L.constraints

        I = L.intervals
        S = L.summary('onesided')
        S = L.summary('twosided')

        yield (np.testing.assert_array_less,
               np.dot(L.constraints.linear_part, L._onestep),
               L.constraints.offset)
Beispiel #15
0
def test_lasso():
    '''
    this test verifies that the l1 prox can be solved
    by a primal/dual specification 

    obviously, we don't to solve the l1 prox this way,
    but it verifies that specification is working correctly

    '''

    l1 = rr.l1norm(4, lagrange=2.)
    l1.quadratic = rr.identity_quadratic(0.5, 0, None, 0.)

    X = np.random.standard_normal((10,4))
    Y = np.random.standard_normal(10) + 3
    
    loss = rr.quadratic.affine(X, -Y, coef=0.5)

    p2 = rr.separable_problem.singleton(l1, loss)
    solver2 = rr.FISTA(p2)
    solver2.fit(tol=1.0e-14, min_its=100)


    f = p2.objective
    ans = scipy.optimize.fmin_powell(f, np.zeros(4), ftol=1.0e-12, xtol=1.e-10)

    print(f(solver2.composite.coefs), f(ans))
    np.testing.assert_allclose(ans + 0.1, solver2.composite.coefs + 0.1, rtol=1.e-3)
Beispiel #16
0
    def __iter__(self):
        for offset, FISTA, coef_stop, L, q, w in itertools.product(self.offset_choices,
                                                                   self.FISTA_choices,
                                                                   self.coef_stop_choices,
                                                                   self.L_choices,
                                                                   self.quadratic_choices,
                                                                   self.weight_choices):
            self.FISTA = FISTA
            self.coef_stop = coef_stop
            self.L = L

            if self.mode == 'lagrange':
                atom = self.klass(w, lagrange=self.lagrange)
            else:
                atom = self.klass(w, bound=self.bound)
            atom.use_sklearn = self.use_sklearn and have_sklearn_iso # test out both prox maps if available

            if q: 
                atom.quadratic = rr.identity_quadratic(0, 0, np.random.standard_normal(atom.shape)*0.02)

            if offset:
                atom.offset = 0.02 * np.random.standard_normal(atom.shape)

            solver = Solver(atom, interactive=self.interactive, 
                            coef_stop=coef_stop,
                            FISTA=FISTA,
                            L=L)
            yield solver
Beispiel #17
0
def test_proximal_method():

    X = np.random.standard_normal((100, 50))
    X[:,:7] *= 5

    qX = identity_quadratic(1,X,0,0)
    P = FM.nuclear_norm(X.shape, lagrange=1)
    RP = todense(P.proximal(qX))

    B = FM.nuclear_norm(X.shape, bound=1)
    RB = todense(B.proximal(qX))

    BO = FM.operator_norm(X.shape, bound=1)
    PO = FM.operator_norm(X.shape, lagrange=1)

    RPO = todense(PO.proximal(qX))
    RBO = todense(BO.proximal(qX))

    D = np.linalg.svd(X, full_matrices=0)[1]
    lD = np.linalg.svd(RP, full_matrices=0)[1]
    lagrange_rank = (lD > 1.e-10).sum()
    all_close(lD[:lagrange_rank] + P.lagrange, D[:lagrange_rank], 'proximal method lagrange', None)

    bD = np.linalg.svd(RB, full_matrices=0)[1]
    bound_rank = (bD > 1.e-10).sum()

    all_close(bD[:bound_rank], projl1(D, B.bound)[:bound_rank], 'proximal method bound', None)

    nt.assert_true(np.linalg.norm(RPO+RB-X) / np.linalg.norm(X) < 0.01)
    nt.assert_true(np.linalg.norm(RBO+RP-X) / np.linalg.norm(X) < 0.01)
def test_sqrt_lasso_pvals(n=100,
                          p=200,
                          s=7,
                          sigma=5,
                          rho=0.3,
                          snr=7.):

    counter = 0

    while True:
        counter += 1
        X, y, beta, active, sigma = instance(n=n, 
                                             p=p, 
                                             s=s, 
                                             sigma=sigma, 
                                             rho=rho, 
                                             snr=snr)

        lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n)
        Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

        weights_with_zeros = 0.7*lam_theor * np.ones(p)
        weights_with_zeros[:3] = 0.

        L = lasso.sqrt_lasso(X, y, weights_with_zeros)
        L.fit()
        v = {1:'twosided',
             0:'onesided'}[counter % 2]
        if set(active).issubset(L.active):
            S = L.summary(v)
            return [p for p, v in zip(S['pval'], S['variable']) if v not in active]
Beispiel #19
0
    def __iter__(self):
        for offset, FISTA, coef_stop, L, q in itertools.product(self.offset_choices,
                                                                self.FISTA_choices,
                                                                self.coef_stop_choices,
                                                                self.L_choices,
                                                                self.quadratic_choices):
            self.FISTA = FISTA
            self.coef_stop = coef_stop
            self.L = L

            if self.mode == 'lagrange':
                atom = self.klass(self.shape, lagrange=self.lagrange)
            else:
                atom = self.klass(self.shape, bound=self.bound)

            if q: 
                atom.quadratic = rr.identity_quadratic(0,0,np.random.standard_normal(atom.shape)*0.02)

            if offset:
                atom.offset = 0.02 * np.random.standard_normal(atom.shape)

            solver = Solver(atom, interactive=self.interactive, 
                            coef_stop=coef_stop,
                            FISTA=FISTA,
                            L=L)

            # make sure certain lines of code are tested
            assert(atom == atom)
            atom.latexify(), atom.dual, atom.conjugate

            yield solver
Beispiel #20
0
def test_proximal_maps():

    X = np.random.standard_normal((100, 50))
    X[:,:7] *= 5

    P = FM.nuclear_norm(X.shape, lagrange=1)
    RP = todense(P.lagrange_prox(X))

    B = FM.nuclear_norm(X.shape, bound=1)
    RB = todense(B.bound_prox(X))

    BO = FM.operator_norm(X.shape, bound=1)
    PO = FM.operator_norm(X.shape, lagrange=1)

    RPO = todense(PO.lagrange_prox(X))
    RBO = todense(BO.bound_prox(X))

    D = np.linalg.svd(X, full_matrices=0)[1]
    lD = np.linalg.svd(RP, full_matrices=0)[1]
    lagrange_rank = (lD > 1.e-10).sum()
    all_close(lD[:lagrange_rank] + P.lagrange, D[:lagrange_rank], 'proximal lagrange', None)

    bD = np.linalg.svd(RB, full_matrices=0)[1]
    bound_rank = (bD > 1.e-10).sum()

    all_close(bD[:bound_rank], projl1(D, B.bound)[:bound_rank], 'proximal bound', None)

    nt.assert_true(np.linalg.norm(RPO+RB-X) / np.linalg.norm(X) < 0.01)
    nt.assert_true(np.linalg.norm(RBO+RP-X) / np.linalg.norm(X) < 0.01)

    # running code to ensure it is tested

    P.conjugate
    P.quadratic = identity_quadratic(1, 0, 0, 0)
    P.conjugate

    BO.conjugate
    BO.quadratic = identity_quadratic(1, 0, 0, 0)
    BO.conjugate

    B.conjugate
    B.quadratic = identity_quadratic(1, 0, 0, 0)
    B.conjugate

    PO.conjugate
    PO.quadratic = identity_quadratic(1, 0, 0, 0)
    PO.conjugate
Beispiel #21
0
def test_simple():
    Z = np.random.standard_normal(100) * 4
    p = rr.l1norm(100, lagrange=0.13)
    L = 0.14

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, p)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10, debug=True)

    simple_coef = solver.composite.coefs
    prox_coef = p.proximal(rr.identity_quadratic(L, Z, 0, 0))

    p2 = rr.l1norm(100, lagrange=0.13)
    p2 = copy(p)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, debug=True)
    simple_nonsmooth_coef = solver.composite.coefs

    p = rr.l1norm(100, lagrange=0.13)
    p.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p)
    simple_nonsmooth_gengrad = gengrad(problem, L, tol=1.0e-10)

    p = rr.l1norm(100, lagrange=0.13)
    problem = rr.separable_problem.singleton(p, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10)
    separable_coef = solver.composite.coefs

    loss2 = rr.quadratic.shift(-Z, coef=0.6*L)
    loss2.quadratic = rr.identity_quadratic(0.4*L, Z, 0, 0)
    p.coefs *= 0
    problem2 = rr.simple_problem(loss2, p)
    loss2_coefs = problem2.solve(coef_stop=True)
    solver2 = rr.FISTA(problem2)
    solver2.fit(tol=1.0e-10, debug=True, coef_stop=True)

    yield ac, prox_coef, simple_nonsmooth_gengrad, 'prox to nonsmooth gengrad'
    yield ac, prox_coef, separable_coef, 'prox to separable'
    yield ac, prox_coef, simple_nonsmooth_coef, 'prox to simple_nonsmooth'
    yield ac, prox_coef, simple_coef, 'prox to simple'
    yield ac, prox_coef, loss2_coefs, 'simple where loss has quadratic 1'
    yield ac, prox_coef, solver2.composite.coefs, 'simple where loss has quadratic 2'
Beispiel #22
0
def test_conjugate_l1norm():
    '''
    this test verifies that numerically computing the conjugate
    is essentially the same as using the smooth_conjugate
    of the atom
    '''


    l1 = rr.l1norm(4, lagrange=0.3)
    v1=rr.smooth_conjugate(l1, rr.identity_quadratic(0.3,None,None,0))
    v2 = rr.conjugate(l1, rr.identity_quadratic(0.3,None,None,0), tol=1.e-12)
    w=np.random.standard_normal(4)

    u11, u12 = v1.smooth_objective(w)
    u21, u22 = v2.smooth_objective(w)
    np.testing.assert_approx_equal(u11, u21)
    np.testing.assert_allclose(u12, u22, rtol=1.0e-05)
Beispiel #23
0
def test_nonnegative_positive_part(debug=False):
    """
    This test verifies that using nonnegative constraint
    with a linear term, with some unpenalized terms yields the same result
    as using separable with constrained_positive_part and nonnegative
    """
    import numpy as np
    import regreg.api as rr
    import regreg.atoms as rra

    # N - number of data points
    # P - number of columns in design == number of betas
    N, P = 40, 30
    # an arbitrary positive offset for data and design
    offset = 2
    # data
    Y = np.random.normal(size=(N,)) + offset
    # design - with ones as last column
    X = np.ones((N,P))
    X[:,:-1] = np.random.normal(size=(N,P-1)) + offset
    # coef for loss
    coef = 0.5
    # lagrange for penalty
    lagrange = .1

    # Loss function (squared difference between fitted and actual data)
    loss = rr.quadratic.affine(X, -Y, coef=coef)

    # Penalty using nonnegative, leave the last 5 unpenalized but
    # nonnegative
    weights = np.ones(P) * lagrange
    weights[-5:] = 0
    linq = rr.identity_quadratic(0,0,weights,0)
    penalty = rr.nonnegative(P, quadratic=linq)

    # Solution

    composite_form = rr.separable_problem.singleton(penalty, loss)
    solver = rr.FISTA(composite_form)
    solver.debug = debug
    solver.fit(tol=1.0e-12, min_its=200)
    coefs = solver.composite.coefs

    # using the separable penalty, only penalize the first
    # 25 coefficients with constrained_positive_part

    penalties_s = [rr.constrained_positive_part(25, lagrange=lagrange),
                   rr.nonnegative(5)]
    groups_s = [slice(0,25), slice(25,30)]
    penalty_s = rr.separable((P,), penalties_s,
                             groups_s)
    composite_form_s = rr.separable_problem.singleton(penalty_s, loss)
    solver_s = rr.FISTA(composite_form_s)
    solver_s.debug = debug
    solver_s.fit(tol=1.0e-12, min_its=200)
    coefs_s = solver_s.composite.coefs

    nt.assert_true(np.linalg.norm(coefs - coefs_s) / np.linalg.norm(coefs) < 1.0e-02)
    def randomize(self):
        """
        Carry out the randomization,
        finding the value of lambda
        as well as the selected variables and signs.

        Initiailizes the attributes: [Y_inter, Y_valid, Y_select].
        """

        n = self.Y.shape[0]

        # intermediate between 
        # CV and model selection 
        # and the actual data

        self.Q_inter = identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * self.scale_inter, 0)
        self.Q_valid = self.Q_inter + identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * self.scale_valid, 0) 
        self.Q_select = self.Q_inter + identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * self.scale_select, 0)
Beispiel #25
0
def test_lasso_separable():
    """
    This test verifies that the specification of a separable
    penalty yields the same results as having two linear_atoms
    with selector matrices. The penalty here is a lasso, i.e. l1
    penalty.
    """

    X = np.random.standard_normal((100,20))
    Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20))

    penalty1 = rr.l1norm(10, lagrange=1.2)
    penalty2 = rr.l1norm(10, lagrange=1.2)
    penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True)

    # ensure code is tested

    print(penalty1.latexify())

    print(penalty.latexify())
    print(penalty.conjugate)
    print(penalty.dual)
    print(penalty.seminorm(np.ones(penalty.shape)))
    print(penalty.constraint(np.ones(penalty.shape), bound=2.))

    pencopy = copy(penalty)
    pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0))
    pencopy.conjugate

    # solve using separable
    
    loss = rr.quadratic.affine(X, -Y, coef=0.5)
    problem = rr.separable_problem.fromatom(penalty, loss)
    solver = rr.FISTA(problem)
    solver.fit(min_its=200, tol=1.0e-12)
    coefs = solver.composite.coefs

    # solve using the usual composite

    penalty_all = rr.l1norm(20, lagrange=1.2)
    problem_all = rr.container(loss, penalty_all)
    solver_all = rr.FISTA(problem_all)
    solver_all.fit(min_its=100, tol=1.0e-12)

    coefs_all = solver_all.composite.coefs

    # solve using the selectors

    penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in
                 zip(penalty.atoms, penalty.groups)]
    problem_s = rr.container(loss, *penalty_s)
    solver_s = rr.FISTA(problem_s)
    solver_s.fit(min_its=500, tol=1.0e-12)
    coefs_s = solver_s.composite.coefs

    np.testing.assert_almost_equal(coefs, coefs_all)
    np.testing.assert_almost_equal(coefs, coefs_s)
Beispiel #26
0
    def test_duality_of_projections(self):
        if self.atom.quadratic == rr.identity_quadratic(0,0,0,0) or self.atom.quadratic is None:

            tests = []

            d = self.atom.conjugate
            q = rr.identity_quadratic(1, self.prox_center, 0, 0)
            tests.append((self.prox_center-self.atom.proximal(q), d.proximal(q), 'testing duality of projections starting from atom\n %s ' % str(self)))

            if hasattr(self.atom, 'check_subgradient') and self.atom.offset is None:
                # check subgradient condition
                v1, v2 = self.atom.check_subgradient(self.atom, self.prox_center)
                tests.append((v1, v2, 'checking subgradient condition\n %s' % str(self)))

            if not self.interactive:
                for test in tests:
                    yield (all_close,) + test + (self,)
            else:
                for test in tests:
                    yield all_close(*((test + (self,))))
Beispiel #27
0
def test_quadratic():

    l = rr.quadratic(5, coef=3., offset=np.arange(5))
    l.quadratic = rr.identity_quadratic(1,np.ones(5), 2*np.ones(5), 3.)
    c1 = l.get_conjugate(as_quadratic=True)

    q1 = rr.identity_quadratic(3, -np.arange(5), 0, 0)
    q2 = q1 + l.quadratic
    c2 = rr.zero(5, quadratic=q2.collapsed()).conjugate

    ww = np.random.standard_normal(5)
    np.testing.assert_almost_equal(c2.smooth_objective(ww, 'grad'),
                                   c1.smooth_objective(ww, 'grad'))

    np.testing.assert_almost_equal(c2.objective(ww),
                                   c1.objective(ww))

    np.testing.assert_almost_equal(c2.smooth_objective(ww, 'func') + 
                                   c2.nonsmooth_objective(ww),
                                   c1.smooth_objective(ww, 'func') + 
                                   c1.nonsmooth_objective(ww))
Beispiel #28
0
    def test_container(self):
        tests = []
        atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L
        loss = self.loss

        problem = rr.container(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, 
                   coef_stop=self.coef_stop, FISTA=self.FISTA)

        tests.append((atom.proximal(q), solver.composite.coefs, 'solving atom prox with container\n %s ' % str(self)))

        # write the loss in terms of a quadratic for the smooth loss and a smooth function...

        q = rr.identity_quadratic(L, prox_center, 0, 0)
        lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6*L)
        lossq.quadratic = rr.identity_quadratic(0.4*L, prox_center.copy(), 0, 0)
        problem = rr.container(lossq, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, FISTA=self.FISTA, coef_stop=self.coef_stop)

        tests.append((atom.proximal(q), 
                      problem.solve(tol=1.e-12,FISTA=self.FISTA,coef_stop=self.coef_stop), 
                      'solving prox with container with monotonicity ' + 
                      'but loss has identity_quadratic\n %s ' % str(self)))

        d = atom.conjugate
        problem = rr.container(d, loss)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, 
                   coef_stop=self.coef_stop, FISTA=self.FISTA)
        tests.append((d.proximal(q), solver.composite.coefs, 'solving dual prox with container\n %s ' % str(self)))

        if not self.interactive:
            for test in tests:
                yield (all_close,) + test + (self,)
        else:
            for test in tests:
                yield all_close(*((test + (self,))))
Beispiel #29
0
def test_conjugate_sqerror():

    X = np.random.standard_normal((10,4))
    Y = np.random.standard_normal(10)
    l = rr.quadratic.affine(X,-Y, coef=0.5)
    v = rr.conjugate(l, rr.identity_quadratic(0.3,None,None,0), tol=1.e-12)
    w=np.random.standard_normal(4)
    u11, u12 = v.smooth_objective(w)

    XTX = np.dot(X.T, X) 
    b = u22 = np.linalg.solve(XTX + 0.3 * np.identity(4), np.dot(X.T, Y) + w)
    u21 = - np.dot(b.T, np.dot(XTX + 0.3 * np.identity(4), b)) / 2. + (w*b).sum()  + (np.dot(X.T, Y) * b).sum() - np.linalg.norm(Y)**2/2.
    np.testing.assert_approx_equal(u11, u21)
    np.testing.assert_allclose(u12, u22, rtol=1.0e-05)
Beispiel #30
0
def test_gengrad_blocknorms():
    Z = np.random.standard_normal((10, 10)) * 4
    p = rr.l1_l2((10, 10), lagrange=0.13)
    dual = p.conjugate
    L = 0.23

    loss = rr.quadratic_loss.shift(Z, coef=L)
    problem = rr.simple_problem(loss, p)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10, debug=True)
    simple_coef = solver.composite.coefs

    q = rr.identity_quadratic(L, Z, 0, 0)
    prox_coef = p.proximal(q)

    p2 = copy(p)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, debug=True)
    simple_nonsmooth_coef = solver.composite.coefs

    p = rr.l1_l2((10, 10), lagrange=0.13)
    p.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p)
    simple_nonsmooth_gengrad = rr.gengrad(problem, L, tol=1.0e-10)

    p = rr.l1_l2((10, 10), lagrange=0.13)
    problem = rr.separable_problem.singleton(p, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10)
    separable_coef = solver.composite.coefs

    yield (all_close, prox_coef, simple_coef, "prox to simple", None)
    yield (all_close, prox_coef, simple_nonsmooth_gengrad, "prox to nonsmooth gengrad", None)
    yield (all_close, prox_coef, separable_coef, "prox to separable", None)
    yield (all_close, prox_coef, simple_nonsmooth_coef, "prox to simple_nonsmooth", None)
Beispiel #31
0
    def _solve_randomized_problem(self,
                                  perturb=None,
                                  solve_args={
                                      'tol': 1.e-12,
                                      'min_its': 50
                                  }):
        p = self.nfeature

        # take a new perturbation if supplied
        if perturb is not None:
            self._initial_omega = perturb
        if self._initial_omega is None:
            self._initial_omega = self.randomizer.sample()

        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega,
                                     0)
        problem = rr.simple_problem(self.loglike, self.penalty)
        initial_soln = problem.solve(quad, **solve_args)
        initial_subgrad = -(self.loglike.smooth_objective(
            initial_soln, 'grad') + quad.objective(initial_soln, 'grad'))

        return initial_soln, initial_subgrad
Beispiel #32
0
def test_proximal_maps():
    shape = (5, 4)

    bound = 0.14
    lagrange = 0.13

    Z = np.random.standard_normal(shape) * 2
    W = 0.02 * np.random.standard_normal(shape)
    U = 0.02 * np.random.standard_normal(shape)
    linq = rr.identity_quadratic(0, 0, W, 0)

    basis = np.linalg.svd(np.random.standard_normal((4, 20)),
                          full_matrices=0)[2]

    for L, atom, q, offset, FISTA, coef_stop in itertools.product(
        [0.5, 1, 0.1], sorted(S.conjugate_svd_pairs.keys()), [None, linq],
        [None, U], [False, True], [False, True]):

        p = atom(shape, quadratic=q, lagrange=lagrange, offset=offset)
        d = p.conjugate
        yield ac, p.lagrange_prox(Z, lipschitz=L), Z - d.bound_prox(
            Z * L, lipschitz=1. / L
        ) / L, 'testing lagrange_prox and bound_prox starting from atom %s ' % atom

        # some arguments of the constructor

        nt.assert_raises(AttributeError, setattr, p, 'bound', 4.)
        nt.assert_raises(AttributeError, setattr, d, 'lagrange', 4.)

        nt.assert_raises(AttributeError, setattr, p, 'bound', 4.)
        nt.assert_raises(AttributeError, setattr, d, 'lagrange', 4.)

        for t in solveit(p, Z, W, U, linq, L, FISTA, coef_stop):
            yield t

        b = atom(shape, bound=bound, quadratic=q, offset=offset)

        for t in solveit(b, Z, W, U, linq, L, FISTA, coef_stop):
            yield t
Beispiel #33
0
def test_class():

    n, p = (10, 5)
    D = np.random.standard_normal((n,p))
    v = np.random.standard_normal(n)
    pen = rr.l1norm.affine(D, v, lagrange=0.4)

    pen2 = rr.l1norm(n, lagrange=0.4, offset=np.random.standard_normal(n))
    pen2.quadratic = None
    cls = type(pen)
    pen_aff = cls(pen2, rr.affine_transform(D, v))

    for _pen in [pen, pen_aff]:
        # Run to ensure code gets executed in tests (smoke test)
        print(_pen.dual)
        print(_pen.latexify())
        print(str(_pen))
        print(repr(_pen))
        print(_pen._repr_latex_())
        _pen.nonsmooth_objective(np.random.standard_normal(p))
        q = rr.identity_quadratic(0.5,0,0,0)
        smoothed_pen = _pen.smoothed(q)
Beispiel #34
0
def test_lasso_dual_from_primal(l1=.1, L=2.):
    """
    Check that the solution of the lasso signal approximator dual composite is soft-thresholding, when call from primal composite object
    """

    sparsity = R.l1norm(500, lagrange=l1)
    x = np.random.normal(0, 1, 500)
    y = np.random.normal(0, 1, 500)

    X = np.random.standard_normal((1000, 500))
    Y = np.random.standard_normal((1000, ))
    regloss = R.quadratic.affine(-X, Y)
    p = R.container(regloss, sparsity)

    z = x - y / L
    soln = p.proximal(R.identity_quadratic(L, z, 0, 0))
    st = np.maximum(np.fabs(z) - l1 / L, 0) * np.sign(z)

    print x[range(10)]
    print soln[range(10)]
    print st[range(10)]
    np.testing.assert_almost_equal(soln, st, decimal=3)
Beispiel #35
0
def test_coxph():

    Q = rr.identity_quadratic(0.01, 0, np.ones(5), 0)
    X = np.random.standard_normal((100, 5))
    T = np.random.standard_exponential(100)
    S = np.random.binomial(1, 0.5, size=(100, ))

    L = lasso.coxph(X, T, S, 0.1, quadratic=Q)
    L.fit()

    L = lasso.coxph(X, T, S, 0.1, quadratic=Q)
    L.fit()

    C = L.constraints

    np.testing.assert_array_less( \
        np.dot(L.constraints.linear_part, L.onestep_estimator),
        L.constraints.offset)

    P = L.summary()['pval']

    return L, C, P
Beispiel #36
0
def test_gaussian(n=100, p=20):

    y = np.random.standard_normal(n)
    X = np.random.standard_normal((n, p))

    lam_theor = np.mean(
        np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0))
    Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

    weights_with_zeros = 0.5 * lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    huge_weights = weights_with_zeros * 10000

    for q, fw in product([Q, None],
                         [0.5 * lam_theor, weights_with_zeros, huge_weights]):

        L = lasso.gaussian(X, y, fw, 1., quadratic=Q)
        L.fit()
        C = L.constraints

        sandwich = glm_sandwich_estimator(L.loglike, B=5000)
        L = lasso.gaussian(X,
                           y,
                           fw,
                           1.,
                           quadratic=Q,
                           covariance_estimator=sandwich)
        L.fit()
        C = L.constraints

        S = L.summary('onesided', compute_intervals=True)
        S = L.summary('twosided')

        nt.assert_raises(ValueError, L.summary, 'none')
        print(L.active)
        yield (np.testing.assert_array_less,
               np.dot(L.constraints.linear_part,
                      L.onestep_estimator), L.constraints.offset)
    def __init__(self,
                 affine_con,
                 direction_of_interest,
                 offset=None,
                 quadratic=None,
                 initial=None):

        rr.smooth_atom.__init__(self,
                                affine_con.linear_part.shape[1] + 1,
                                offset=offset,
                                quadratic=quadratic,
                                initial=initial)

        self.affine_con = affine_con
        self.direction_of_interest = eta = direction_of_interest

        design = self.design = np.hstack(
            [np.identity(affine_con.dim),
             eta.reshape((-1, 1))])

        sqrt_inv = affine_con.covariance_factors()[1]
        Si = np.dot(sqrt_inv.T, sqrt_inv)
        self.Q = np.dot(design.T, np.dot(Si, design))

        gamma = affine_con.mean

        linear_part = np.dot(affine_con.linear_part, design)
        offset = affine_con.offset - np.dot(affine_con.linear_part,
                                            affine_con.mean)

        scaling = np.sqrt((linear_part**2).sum(1))
        linear_part /= scaling[:, None]
        offset /= scaling

        self.linear_objective = 0.

        smoothing_quadratic = rr.identity_quadratic(1.e-2, 0, 0, 0)
        self.smooth_constraint = rr.nonpositive.affine(
            linear_part, -offset).smoothed(smoothing_quadratic)
Beispiel #38
0
def test_conjugate():
    z = np.random.standard_normal(10)
    w = np.random.standard_normal(10)
    y = np.random.standard_normal(10)

    for atom_c in [
            R.l1norm, R.l2norm, R.positive_part, R.supnorm,
            R.constrained_positive_part
    ]:
        linq = R.identity_quadratic(0, 0, w, 0)
        atom = atom_c(10, quadratic=linq, offset=y, lagrange=2.345)
        np.testing.assert_almost_equal(
            atom.conjugate.conjugate.nonsmooth_objective(z),
            atom.nonsmooth_objective(z),
            decimal=3)

    for atom_c in [R.nonnegative, R.nonpositive]:
        atom = atom_c(10, quadratic=linq, offset=y)
        np.testing.assert_almost_equal(
            atom.conjugate.conjugate.nonsmooth_objective(z),
            atom.nonsmooth_objective(z),
            decimal=3)
Beispiel #39
0
def selection(X,
              y,
              random_Z,
              randomization_scale=1,
              sigma=None,
              method="theoretical"):
    n, p = X.shape
    loss = rr.glm.gaussian(X, y)
    epsilon = 1. / np.sqrt(n)
    lam_frac = 1.2
    if sigma is None:
        sigma = 1.
    if method == "theoretical":
        lam = 1. * sigma * lam_frac * np.mean(
            np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0))

    W = np.ones(p) * lam
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1.)

    # initial solution

    problem = rr.simple_problem(loss, penalty)
    random_term = rr.identity_quadratic(epsilon, 0,
                                        -randomization_scale * random_Z, 0)

    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)
    active = (initial_soln != 0)
    if np.sum(active) == 0:
        return None
    initial_grad = loss.smooth_objective(initial_soln, mode='grad')
    betaE = initial_soln[active]
    subgradient = -(initial_grad + epsilon * initial_soln -
                    randomization_scale * random_Z)
    cube = subgradient[~active] / lam
    return lam, epsilon, active, betaE, cube, initial_soln
def test_gaussian_unknown():

    n, p = 20, 5
    X = np.random.standard_normal((n, p))
    Y = np.random.standard_normal(n)

    T = X.T.dot(Y)
    N = -(Y**2).sum() / 2.

    sufficient_stat = np.hstack([T, N])

    cumulant = gaussian_cumulant(X)
    conj = gaussian_cumulant_conjugate(X)

    MLE = cumulant.regression_parameters(
        conj.smooth_objective(sufficient_stat, 'grad'))
    linear = rr.identity_quadratic(0, 0, -sufficient_stat, 0)
    cumulant.coefs[:] = 1.
    MLE2 = cumulant.solve(linear, tol=1.e-12, min_its=400)

    np.testing.assert_allclose(MLE2,
                               conj.smooth_objective(sufficient_stat, 'grad'),
                               rtol=1.e-4,
                               atol=1.e-4)

    beta_hat = np.linalg.pinv(X).dot(Y)
    sigmasq_hat = np.sum(((Y - X.dot(beta_hat))**2) / n)

    np.testing.assert_allclose(beta_hat, MLE[0])
    np.testing.assert_allclose(sigmasq_hat, MLE[1])

    G = conj.smooth_objective(sufficient_stat, 'grad')
    M = cumulant.smooth_objective(G, 'grad')
    np.testing.assert_allclose(sufficient_stat, M)

    G = cumulant.smooth_objective(MLE2, 'grad')
    M = conj.smooth_objective(G, 'grad')
    np.testing.assert_allclose(MLE2, M)
Beispiel #41
0
def _find_row_approx_inverse(Sigma,
                             j,
                             delta,
                             solve_args={
                                 'min_its': 100,
                                 'tol': 1.e-6,
                                 'max_its': 500
                             }):
    """
    Find an approximation of j-th row of inverse of Sigma.
    Solves the problem
    .. math::
        \text{min}_{\theta} \frac{1}{2} \theta^TS\theta
    subject to $\|\Sigma \hat{\theta} - e_j\|_{\infty} \leq \delta$ with
    $e_j$ the $j$-th elementary basis vector and `S` as $\Sigma$,
    and `delta` as $\delta$.
    Described in Table 1, display (4) of https://arxiv.org/pdf/1306.3171.pdf
    """
    p = Sigma.shape[0]
    elem_basis = np.zeros(p, np.float)
    elem_basis[j] = 1.
    loss = quadratic_loss(p, Q=Sigma)
    penalty = l1norm(p, lagrange=delta)
    iq = identity_quadratic(0, 0, elem_basis, 0)
    problem = simple_problem(loss, penalty)
    dual_soln = problem.solve(iq, **solve_args)

    soln = -dual_soln

    # check feasibility -- if it fails miserably
    # presume delta was too small

    feasibility_gap = np.fabs(Sigma.dot(soln) - elem_basis).max()
    if feasibility_gap > (1.01) * delta:
        raise ValueError(
            'does not seem to be a feasible point -- try increasing delta')

    return soln
Beispiel #42
0
def test_different_dim():
    """
    This test checks that the reshape argument of separable
    works properly.
    """

    X = np.random.standard_normal((100, 20))
    Y = (np.random.standard_normal(
        (100, )) + np.dot(X, np.random.standard_normal(20)))

    penalty1 = rr.nuclear_norm((5, 2), lagrange=1.2)
    penalty2 = rr.l1norm(10, lagrange=1.2)
    penalty = rr.separable((20, ), [penalty1, penalty2],
                           [slice(0, 10), slice(10, 20)],
                           test_for_overlap=True,
                           shapes=[(5, 2), None])

    # ensure code is tested

    print(penalty1.latexify())

    print(penalty.latexify())
    print(penalty.conjugate)
    print(penalty.dual)
    print(penalty.seminorm(np.ones(penalty.shape)))
    print(penalty.constraint(np.ones(penalty.shape), bound=2.))

    pencopy = copy(penalty)
    pencopy.set_quadratic(rr.identity_quadratic(1, 0, 0, 0))
    pencopy.conjugate

    # solve using separable

    loss = rr.quadratic_loss.affine(X, -Y, coef=0.5)
    problem = rr.separable_problem.fromatom(penalty, loss)
    solver = rr.FISTA(problem)
    solver.fit(min_its=200, tol=1.0e-12)
    coefs = solver.composite.coefs
Beispiel #43
0
    def __iter__(self):
        for offset, FISTA, coef_stop, L, q, groups in itertools.product(self.offset_choices,
                                                                        self.FISTA_choices,
                                                                        self.coef_stop_choices,
                                                                        self.L_choices,
                                                                        self.quadratic_choices,
                                                                        self.group_choices):
            self.FISTA = FISTA
            self.coef_stop = coef_stop
            self.L = L

            atom = self.klass(groups)

            if q: 
                atom.quadratic = rr.identity_quadratic(0,0,np.random.standard_normal(atom.shape)*0.02)

            if offset:
                atom.offset = 0.02 * np.random.standard_normal(atom.shape)

            solver = Solver(atom, interactive=self.interactive, 
                            coef_stop=coef_stop,
                            FISTA=FISTA,
                            L=L)
            yield solver
Beispiel #44
0
def test_sqrt_lasso_pvals(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7.):

    X, y, beta, true_active, sigma, _ = instance(n=n,
                                                 p=p,
                                                 s=s,
                                                 sigma=sigma,
                                                 rho=rho,
                                                 signal=signal)

    lam_theor = np.mean(
        np.fabs(np.dot(X.T, np.random.standard_normal(
            (n, 1000)))).max(0)) / np.sqrt(n)
    Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)

    weights_with_zeros = 0.7 * lam_theor * np.ones(p)
    weights_with_zeros[:3] = 0.

    lasso.sqrt_lasso(X, y, weights_with_zeros, covariance='parametric')
    L = lasso.sqrt_lasso(X, y, weights_with_zeros)
    L.fit()
    if set(true_active).issubset(L.active):
        S = L.summary('onesided')
        S = L.summary('twosided')
        return S['pval'], [v in true_active for v in S['variable']]
    def _solve_conjugate_problem(self, natural_param, niter=500, tol=1.e-10):

        affine_con = self.affine_con

        loss = softmax(affine_con, sigma=self.sigma)

        L = rr.identity_quadratic(0, 0, -natural_param, 0)  # linear_term
        A = affine_con.linear_part
        b = affine_con.offset
        mean_param = self.feasible_point.copy()
        step = 1. / self.sigma
        f_cur = np.inf
        for i in range(niter):
            G = -natural_param + loss.smooth_objective(mean_param, 'grad')
            proposed = mean_param - step * G
            slack = b - A.dot(proposed)
            if i % 5 == 0:
                step *= 2.
            if np.any(slack < 0):
                step *= 0.5
            else:

                f_proposed = (-(natural_param * proposed).sum() +
                              loss.smooth_objective(proposed, 'func'))

                if f_proposed > f_cur * (1 + tol):
                    step *= 0.5
                else:
                    mean_param = proposed
                    if np.fabs(f_cur - f_proposed) < tol * max(
                        [1, np.fabs(f_cur),
                         np.fabs(f_proposed)]):
                        break
                    f_cur = f_proposed

        return -f_proposed, mean_param
Beispiel #46
0
    def test_simple_problem(self):
        tests = []
        atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L
        loss = self.loss

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12,
                   FISTA=self.FISTA,
                   coef_stop=self.coef_stop,
                   min_its=100)

        tests.append(
            (atom.proximal(q), solver.composite.coefs,
             'solving prox with simple_problem with monotonicity\n %s' %
             str(self)))

        # write the loss in terms of a quadratic for the smooth loss and a smooth function...

        q = rr.identity_quadratic(L, prox_center, 0, 0)
        lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6 * L)
        lossq.quadratic = rr.identity_quadratic(0.4 * L, prox_center.copy(), 0,
                                                0)
        problem = rr.simple_problem(lossq, atom)

        tests.append(
            (atom.proximal(q),
             problem.solve(coef_stop=self.coef_stop,
                           FISTA=self.FISTA,
                           tol=1.0e-12), 'solving prox with simple_problem ' +
             'with monotonicity  but loss has identity_quadratic %s\n ' %
             str(self)))

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12,
                   monotonicity_restart=False,
                   coef_stop=self.coef_stop,
                   FISTA=self.FISTA,
                   min_its=100)

        tests.append(
            (atom.proximal(q), solver.composite.coefs,
             'solving prox with simple_problem no monotonicity_restart\n %s' %
             str(self)))

        d = atom.conjugate
        problem = rr.simple_problem(loss, d)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12,
                   monotonicity_restart=False,
                   coef_stop=self.coef_stop,
                   FISTA=self.FISTA,
                   min_its=100)
        tests.append(
            (d.proximal(q),
             problem.solve(tol=1.e-12,
                           FISTA=self.FISTA,
                           coef_stop=self.coef_stop,
                           monotonicity_restart=False),
             'solving dual prox with simple_problem no monotonocity\n %s ' %
             str(self)))

        if not self.interactive:
            for test in tests:
                yield (all_close, ) + test + (self, )
        else:
            for test in tests:
                yield all_close(*((test + (self, ))))
Beispiel #47
0
def test_lasso(s=0, n=100, p=20, weights = "neutral",
               randomization_dist = "logistic", randomization_scale = 1,
               Langevin_steps = 10000, burning = 2000, X_scaled = True,
               covariance_estimate = "nonparametric", noise = "uniform"):

    """ weights: exponential, gamma, normal, gumbel
    randomization_dist: logistic, laplace """

    step_size = 1./p

    X, y, true_beta, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0, scale=X_scaled, noise=noise)
    print 'true beta', true_beta
    lam_frac = 1.

    if randomization_dist == "laplace":
        randomization = laplace(loc=0, scale=1.)
        random_Z = randomization.rvs(p)
    if randomization_dist == "logistic":
        random_Z = np.random.logistic(loc=0, scale = 1, size = p)
    if randomization_dist== "normal":
        random_Z = np.random.standard_normal(p)

    print 'randomization', random_Z*randomization_scale
    loss = lasso_randomX.lasso_randomX(X, y)

    epsilon = 1./np.sqrt(n)
    #epsilon = 1.
    lam = sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))+randomization_scale*np.random.logistic(size=(p,10000))).max(0))

    lam_scaled = lam.copy()
    random_Z_scaled = random_Z.copy()
    epsilon_scaled = epsilon

    if (X_scaled == False):
        random_Z_scaled *= np.sqrt(n)
        lam_scaled *= np.sqrt(n)
        epsilon_scaled *= np.sqrt(n)

    penalty = randomized.selective_l1norm_lan(p, lagrange=lam_scaled)

    # initial solution

    problem = rr.simple_problem(loss, penalty)

    random_term = rr.identity_quadratic(epsilon_scaled, 0, -randomization_scale*random_Z_scaled, 0)
    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)
    print 'initial solution', initial_soln

    active = (initial_soln != 0)
    if np.sum(active)==0:
        return [-1], [-1]
    inactive = ~active
    betaE = initial_soln[active]
    signs = np.sign(betaE)

    initial_grad = -np.dot(X.T, y - np.dot(X, initial_soln))
    if (X_scaled==False):
        initial_grad /= np.sqrt(n)
    print 'initial_gradient', initial_grad
    subgradient = random_Z - initial_grad - epsilon * initial_soln
    cube = np.divide(subgradient[inactive], lam)

    nactive = betaE.shape[0]
    ninactive = cube.shape[0]

    beta_unpenalized = np.linalg.lstsq(X[:, active], y)[0]
    print 'beta_OLS onto E', beta_unpenalized
    obs_residuals = y - np.dot(X[:, active], beta_unpenalized)  # y-X_E\bar{\beta}^E
    N = np.dot(X[:, inactive].T, obs_residuals)  # X_{-E}^T(y-X_E\bar{\beta}_E), null statistic
    full_null = np.zeros(p)
    full_null[nactive:] = N

    # parametric coveriance estimate
    if covariance_estimate == "parametric":
        XE_pinv = np.linalg.pinv(X[:, active])
        mat = np.zeros((nactive+ninactive, n))
        mat[:nactive,:] = XE_pinv
        mat[nactive:,:] = X[:, inactive].T.dot(np.identity(n)-X[:, active].dot(XE_pinv))
        Sigma_full = mat.dot(mat.T)
    else:
        Sigma_full = bootstrap_covariance(X,y,active, beta_unpenalized)


    init_vec_state = np.zeros(n+nactive+ninactive)
    if weights =="exponential":
        init_vec_state[:n] = np.ones(n)
    else:
        init_vec_state[:n] = np.zeros(n)

    #init_vec_state[:n] = np.random.standard_normal(n)
    #init_vec_state[:n] = np.ones(n)
    init_vec_state[n:(n+nactive)] = betaE
    init_vec_state[(n+nactive):] = cube


    def full_projection(vec_state, signs = signs,
                        nactive=nactive, ninactive = ninactive):

        alpha = vec_state[:n].copy()
        betaE = vec_state[n:(n+nactive)].copy()
        cube = vec_state[(n+nactive):].copy()

        projected_alpha = alpha.copy()
        projected_betaE = betaE.copy()
        projected_cube = np.zeros_like(cube)

        if weights == "exponential":
            projected_alpha = np.clip(alpha, 0, np.inf)

        if weights == "gamma":
            projected_alpha = np.clip(alpha, -2+1./n, np.inf)
        for i in range(nactive):
            if (projected_betaE[i] * signs[i] < 0):
                projected_betaE[i] = 0

        projected_cube = np.clip(cube, -1, 1)

        return np.concatenate((projected_alpha, projected_betaE, projected_cube), 0)


    Sigma = np.linalg.inv(np.dot(X[:, active].T, X[:, active]))
    null, alt = pval(init_vec_state, full_projection, X, obs_residuals, beta_unpenalized, full_null,
                     signs, lam, epsilon,
                     nonzero, active, Sigma,
                     weights, randomization_dist, randomization_scale,
                     Langevin_steps, step_size, burning,
                     X_scaled)
                   #  Sigma_full[:nactive, :nactive])

    return null, alt
Beispiel #48
0
def solveit(atom, Z, W, U, linq, L, FISTA, coef_stop):

    p2 = copy(atom)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)

    d = atom.conjugate

    q = rr.identity_quadratic(1, Z, 0, 0)
    yield ac, Z - atom.proximal(q), d.proximal(
        q), 'testing duality of projections starting from atom %s ' % atom
    q = rr.identity_quadratic(L, Z, 0, 0)

    # use simple_problem.nonsmooth

    p2 = copy(atom)
    p2.quadratic = atom.quadratic + q
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, FISTA=FISTA, coef_stop=coef_stop)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with monotonicity %s ' % atom

    # use the solve method

    p2.coefs *= 0
    p2.quadratic = atom.quadratic + q
    soln = p2.solve()

    yield ac, atom.proximal(
        q), soln, 'solving prox with solve method %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem with monotonicity %s ' % atom

    dproblem2 = rr.dual_problem(loss.conjugate, rr.identity(loss.shape),
                                atom.conjugate)
    dcoef2 = dproblem2.solve(coef_stop=coef_stop, tol=1.e-14)
    yield ac, atom.proximal(
        q
    ), dcoef2, 'solving prox with dual_problem with monotonicity %s ' % atom

    dproblem = rr.dual_problem.fromprimal(loss, atom)
    dcoef = dproblem.solve(coef_stop=coef_stop, tol=1.0e-14)
    yield ac, atom.proximal(
        q
    ), dcoef, 'solving prox with dual_problem.fromprimal with monotonicity %s ' % atom

    # write the loss in terms of a quadratic for the smooth loss and a smooth function...

    lossq = rr.quadratic.shift(-Z, coef=0.6 * L)
    lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    problem = rr.simple_problem(lossq, atom)

    yield ac, atom.proximal(q), problem.solve(
        coef_stop=coef_stop, FISTA=FISTA, tol=1.0e-12
    ), 'solving prox with simple_problem with monotonicity  but loss has identity_quadratic %s ' % atom

    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14,
               monotonicity_restart=False,
               coef_stop=coef_stop,
               FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with no monotonocity %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12,
               monotonicity_restart=False,
               coef_stop=coef_stop,
               FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem %s no monotonicity_restart' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.separable_problem.singleton(atom, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving atom prox with separable_atom.singleton %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.container(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving atom prox with container %s ' % atom

    # write the loss in terms of a quadratic for the smooth loss and a smooth function...

    lossq = rr.quadratic.shift(-Z, coef=0.6 * L)
    lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    problem = rr.container(lossq, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop)

    yield (
        ac, atom.proximal(q),
        problem.solve(tol=1.e-12, FISTA=FISTA, coef_stop=coef_stop),
        'solving prox with container with monotonicity  but loss has identity_quadratic %s '
        % atom)

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, d)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12,
               monotonicity_restart=False,
               coef_stop=coef_stop,
               FISTA=FISTA)
    # ac(d.proximal(q), solver.composite.coefs, 'solving dual prox with simple_problem no monotonocity %s ' % atom)
    yield (ac, d.proximal(q),
           problem.solve(tol=1.e-12,
                         FISTA=FISTA,
                         coef_stop=coef_stop,
                         monotonicity_restart=False),
           'solving dual prox with simple_problem no monotonocity %s ' % atom)

    problem = rr.container(d, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)
    yield ac, d.proximal(
        q
    ), solver.composite.coefs, 'solving dual prox with container %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.separable_problem.singleton(d, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, d.proximal(
        q
    ), solver.composite.coefs, 'solving atom prox with separable_atom.singleton %s ' % atom
    def __init__(
            self,
            X,
            feasible_point,
            active,  # the active set chosen by randomized lasso
            active_sign,  # the set of signs of active coordinates chosen by lasso
            lagrange,  # in R^p
            mean_parameter,  # in R^n
            noise_variance,  # noise_level in data
            randomizer,  # specified randomization
            epsilon,  # ridge penalty for randomized lasso
            coef=1.,
            offset=None,
            quadratic=None,
            nstep=10):

        n, p = X.shape
        E = active.sum()
        self._X = X
        self.active = active
        self.noise_variance = noise_variance
        self.randomization = randomizer

        self.CGF_randomization = randomizer.CGF

        if self.CGF_randomization is None:
            raise ValueError(
                'randomization must know its cgf -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates'
            )

        self.inactive_lagrange = lagrange[~active]

        initial = feasible_point

        self.feasible_point = feasible_point

        rr.smooth_atom.__init__(self, (p, ),
                                offset=offset,
                                quadratic=quadratic,
                                initial=initial,
                                coef=coef)

        self.coefs[:] = feasible_point

        mean_parameter = np.squeeze(mean_parameter)

        self.active = active

        X_E = self.X_E = X[:, active]
        self.X_permute = np.hstack([self.X_E, self._X[:, ~active]])
        B = X.T.dot(X_E)

        B_E = B[active]
        B_mE = B[~active]

        self.active_slice = np.zeros_like(active, np.bool)
        self.active_slice[:active.sum()] = True

        self.B_active = np.hstack([
            (B_E + epsilon * np.identity(E)) * active_sign[None, :],
            np.zeros((E, p - E))
        ])
        self.B_inactive = np.hstack(
            [B_mE * active_sign[None, :],
             np.identity((p - E))])
        self.B_p = np.vstack((self.B_active, self.B_inactive))

        self.B_p_inv = np.linalg.inv(self.B_p.T)

        self.offset_active = active_sign * lagrange[active]
        self.inactive_subgrad = np.zeros(p - E)

        self.cube_bool = np.zeros(p, np.bool)

        self.cube_bool[E:] = 1

        self.dual_arg = self.B_p_inv.dot(
            np.append(self.offset_active, self.inactive_subgrad))

        self._opt_selector = rr.selector(~self.cube_bool, (p, ))

        self.set_parameter(mean_parameter, noise_variance)

        _barrier_star = barrier_conjugate_softmax_scaled_rr(
            self.cube_bool, self.inactive_lagrange)

        self.conjugate_barrier = rr.affine_smooth(_barrier_star,
                                                  np.identity(p))

        self.CGF_randomizer = rr.affine_smooth(self.CGF_randomization,
                                               -self.B_p_inv)

        self.constant = np.true_divide(mean_parameter.dot(mean_parameter),
                                       2 * noise_variance)

        self.linear_term = rr.identity_quadratic(0, 0, self.dual_arg,
                                                 -self.constant)

        self.total_loss = rr.smooth_sum([
            self.conjugate_barrier, self.CGF_randomizer, self.likelihood_loss
        ])

        self.total_loss.quadratic = self.linear_term
def test_sqrt_highdim_lasso(n=500, 
                            p=200, 
                            signal_fac=1.5, 
                            s=5, 
                            sigma=3, 
                            full=True, 
                            rho=0.4, 
                            randomizer_scale=1., 
                            ndraw=5000, 
                            burnin=1000, 
                            ridge_term=None, compare_to_lasso=True):
    """
    Compare to R randomized lasso
    """

    inst, const = gaussian_instance, lasso.sqrt_lasso
    signal = np.sqrt(signal_fac * 2 * np.log(p))
    X, Y, beta = inst(n=n,
                      p=p, 
                      signal=signal, 
                      s=s, 
                      equicorrelated=False, 
                      rho=rho, 
                      sigma=sigma, 
                      random_signs=True)[:3]

    if ridge_term is None:
        mean_diag = np.mean((X**2).sum(0))
        ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))

    W = np.ones(X.shape[1]) * choose_lambda(X) * 0.7

    perturb = np.random.standard_normal(p) * randomizer_scale / np.sqrt(n)

    conv = const(X, 
                 Y, 
                 W, 
                 randomizer_scale=randomizer_scale / np.sqrt(n),
                 perturb=perturb,
                 ridge_term=ridge_term)
    
    signs = conv.fit()
    nonzero = signs != 0

    # sanity check

    if compare_to_lasso:
        q_term = rr.identity_quadratic(ridge_term, 0, -perturb, 0)

        soln2, sqrt_loss = solve_sqrt_lasso(X, Y, W, solve_args={'min_its':1000}, quadratic=q_term, force_fat=True)
        soln = conv.initial_soln

        denom = np.linalg.norm(Y - X.dot(soln))
        new_weights = W * denom
        loss = rr.glm.gaussian(X, Y)
        pen = rr.weighted_l1norm(new_weights, lagrange=1.)
        prob = rr.simple_problem(loss, pen)

        rescaledQ = rr.identity_quadratic(ridge_term * denom,
                                          0,
                                          -perturb * denom,
                                          0)

        soln3 = prob.solve(quadratic=rescaledQ, min_its=1000, tol=1.e-12)
        np.testing.assert_allclose(conv._initial_omega, perturb * denom)
        np.testing.assert_allclose(soln, soln2)
        np.testing.assert_allclose(soln, soln3)

    if full:
        (observed_target, 
         cov_target, 
         cov_target_score, 
         alternatives) = full_targets(conv.loglike, 
                                      conv._W, 
                                      nonzero)
    else:
        (observed_target, 
         cov_target, 
         cov_target_score, 
         alternatives) = selected_targets(conv.loglike, 
                                          conv._W, 
                                          nonzero)

    _, pval, intervals = conv.summary(observed_target, 
                                      cov_target, 
                                      cov_target_score, 
                                      alternatives,
                                      ndraw=ndraw,
                                      burnin=burnin, 
                                      compute_intervals=False)

    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
Beispiel #51
0
def test_quadratic_for_smooth():
    '''
    this test is a check to ensure that the quadratic part 
    of the smooth functions are being used in the proximal step
    '''

    L = 0.45

    W = np.random.standard_normal(40)
    Z = np.random.standard_normal(40)
    U = np.random.standard_normal(40)

    atomq = rr.identity_quadratic(0.4, U, W, 0)
    atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12)

    # specifying in this way should be the same as if we put 0.5*L below
    loss = rr.quadratic.shift(Z, coef=0.6 * L)
    lq = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    loss.quadratic = lq

    ww = np.random.standard_normal(40)

    # specifying in this way should be the same as if we put 0.5*L below
    loss2 = rr.quadratic.shift(Z, coef=L)
    yield all_close, loss2.objective(ww), loss.objective(
        ww), 'checking objective', None

    yield all_close, lq.objective(ww, 'func'), loss.nonsmooth_objective(
        ww), 'checking nonsmooth objective', None
    yield all_close, loss2.smooth_objective(
        ww, 'func'), 0.5 / 0.3 * loss.smooth_objective(
            ww, 'func'), 'checking smooth objective func', None
    yield all_close, loss2.smooth_objective(
        ww, 'grad'), 0.5 / 0.3 * loss.smooth_objective(
            ww, 'grad'), 'checking smooth objective grad', None

    problem = rr.container(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12)

    problem3 = rr.simple_problem(loss, atom)
    solver3 = rr.FISTA(problem3)
    solver3.fit(tol=1.0e-12, coef_stop=True)

    loss4 = rr.quadratic.shift(Z, coef=0.6 * L)
    problem4 = rr.simple_problem(loss4, atom)
    problem4.quadratic = lq
    solver4 = rr.FISTA(problem4)
    solver4.fit(tol=1.0e-12)

    gg_soln = rr.gengrad(problem, L)

    loss6 = rr.quadratic.shift(Z, coef=0.6 * L)
    loss6.quadratic = lq + atom.quadratic
    atomcp = copy(atom)
    atomcp.quadratic = rr.identity_quadratic(0, 0, 0, 0)
    problem6 = rr.dual_problem(loss6.conjugate, rr.identity(loss6.shape),
                               atomcp.conjugate)
    problem6.lipschitz = L + atom.quadratic.coef
    dsoln2 = problem6.solve(coef_stop=True, tol=1.e-10, max_its=100)

    problem2 = rr.container(loss2, atom)
    solver2 = rr.FISTA(problem2)
    solver2.fit(tol=1.0e-12, coef_stop=True)

    q = rr.identity_quadratic(L, Z, 0, 0)

    yield all_close, problem.objective(
        ww), atom.nonsmooth_objective(ww) + q.objective(ww, 'func'), '', None

    atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12)
    aq = atom.solve(q)
    for p, msg in zip([
            solver3.composite.coefs, gg_soln, solver2.composite.coefs, dsoln2,
            solver.composite.coefs, solver4.composite.coefs
    ], [
            'simple_problem with loss having no quadratic', 'gen grad',
            'container with loss having no quadratic',
            'dual problem with loss having a quadratic',
            'container with loss having a quadratic',
            'simple_problem having a quadratic'
    ]):
        yield all_close, aq, p, msg, None
Beispiel #52
0
    def sqrt_lasso(X,
                   Y,
                   groups,
                   weights,
                   quadratic=None,
                   ridge_term=None,
                   randomizer_scale=None,
                   solve_args={'min_its': 200},
                   perturb=None):
        r"""
        Use sqrt-LASSO to choose variables.
        Objective function is (before randomization)

        .. math::

            \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|

        where $\lambda$ is `feature_weights`. After solving the problem
        treat as if `gaussian` with implied variance and choice of
        multiplier. See arxiv.org/abs/1504.08031 for details.

        Parameters
        ----------

        X : ndarray
            Shape (n,p) -- the design matrix.

        Y : ndarray
            Shape (n,) -- the response.

        feature_weights: [float, sequence]
            Penalty weights. An intercept, or other unpenalized
            features are handled by setting those entries of
            `feature_weights` to 0. If `feature_weights` is
            a float, then all parameters are penalized equally.

        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
            An optional quadratic term to be added to the objective.
            Can also be a linear term by setting quadratic
            coefficient to 0.

        covariance : str
            One of 'parametric' or 'sandwich'. Method
            used to estimate covariance for inference
            in second stage.

        solve_args : dict
            Arguments passed to solver.

        ridge_term : float
            How big a ridge term to add?

        randomizer_scale : float
            Scale for IID components of randomizer.

        randomizer : str
            One of ['laplace', 'logistic', 'gaussian']

        Returns
        -------

        L : `selection.randomized.lasso.lasso`

        Notes
        -----

        Unlike other variants of LASSO, this
        solves the problem on construction as the active
        set is needed to find equivalent gaussian LASSO.
        Assumes parametric model is correct for inference,
        i.e. does not accept a covariance estimator.
        """

        n, p = X.shape

        if np.asarray(feature_weights).shape == ():
            feature_weights = np.ones(p) * feature_weights

        mean_diag = np.mean((X**2).sum(0))
        if ridge_term is None:
            ridge_term = np.sqrt(mean_diag) / (n - 1)

        if randomizer_scale is None:
            randomizer_scale = 0.5 * np.sqrt(mean_diag) / np.sqrt(n - 1)

        if perturb is None:
            perturb = np.random.standard_normal(p) * randomizer_scale

        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb,
                                        0)  # a ridge + linear term

        if quadratic is not None:
            totalQ = randomQ + quadratic
        else:
            totalQ = randomQ

        soln, sqrt_loss = solve_sqrt_lasso(X,
                                           Y,
                                           weights=feature_weights,
                                           quadratic=totalQ,
                                           solve_args=solve_args,
                                           force_fat=True)

        denom = np.linalg.norm(Y - X.dot(soln))
        loglike = rr.glm.gaussian(X, Y)

        randomizer = randomization.isotropic_gaussian((p, ),
                                                      randomizer_scale * denom)

        weights = copy(weights)
        for k in weights.keys():
            weights[k] = weights[k] * denom

        obj = lasso(loglike,
                    groups,
                    weights,
                    ridge_term * denom,
                    randomizer,
                    perturb=perturb * denom)
        obj._sqrt_soln = soln

        return obj
Beispiel #53
0
 def form_penalty(self):
     penalty = weighted_l1norm(self.weights, lagrange=1.)
     penalty.quadratic = identity_quadratic(0, 0, self.random_linear_term,
                                            0)
     return penalty
Beispiel #54
0
def test_lasso_separable():
    """
    This test verifies that the specification of a separable
    penalty yields the same results as having two linear_atoms
    with selector matrices. The penalty here is a lasso, i.e. l1
    penalty.
    """

    X = np.random.standard_normal((100, 20))
    Y = np.random.standard_normal(
        (100, )) + np.dot(X, np.random.standard_normal(20))

    penalty1 = rr.l1norm(10, lagrange=1.2)
    penalty2 = rr.l1norm(10, lagrange=1.2)
    penalty = rr.separable((20, ), [penalty1, penalty2],
                           [slice(0, 10), slice(10, 20)],
                           test_for_overlap=True)

    # ensure code is tested

    print(penalty1.latexify())

    print(penalty.latexify())
    print(penalty.conjugate)
    print(penalty.dual)
    print(penalty.seminorm(np.ones(penalty.shape)))
    print(penalty.constraint(np.ones(penalty.shape), bound=2.))

    pencopy = copy(penalty)
    pencopy.set_quadratic(rr.identity_quadratic(1, 0, 0, 0))
    pencopy.conjugate

    # solve using separable

    loss = rr.quadratic_loss.affine(X, -Y, coef=0.5)
    problem = rr.separable_problem.fromatom(penalty, loss)
    solver = rr.FISTA(problem)
    solver.fit(min_its=200, tol=1.0e-12)
    coefs = solver.composite.coefs

    # solve using the usual composite

    penalty_all = rr.l1norm(20, lagrange=1.2)
    problem_all = rr.container(loss, penalty_all)
    solver_all = rr.FISTA(problem_all)
    solver_all.fit(min_its=100, tol=1.0e-12)

    coefs_all = solver_all.composite.coefs

    # solve using the selectors

    penalty_s = [
        rr.linear_atom(p, rr.selector(g, (20, )))
        for p, g in zip(penalty.atoms, penalty.groups)
    ]
    problem_s = rr.container(loss, *penalty_s)
    solver_s = rr.FISTA(problem_s)
    solver_s.fit(min_its=500, tol=1.0e-12)
    coefs_s = solver_s.composite.coefs

    np.testing.assert_almost_equal(coefs, coefs_all)
    np.testing.assert_almost_equal(coefs, coefs_s)
Beispiel #55
0
def solve_sqrt_lasso_skinny(X,
                            Y,
                            weights=None,
                            initial=None,
                            quadratic=None,
                            solve_args={}):
    """

    Solve the square-root LASSO optimization problem:

    $$
    \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|,
    $$
    where $D$ is the diagonal matrix with weights on its diagonal.

    Parameters
    ----------

    y : np.float((n,))
        The target, in the model $y = X\beta$

    X : np.float((n, p))
        The data, in the model $y = X\beta$

    weights : np.float
        Coefficients of the L-1 penalty in
        optimization problem, note that different
        coordinates can have different coefficients.

    initial : np.float(p)
        Initial point for optimization.

    solve_args : dict
        Arguments passed to regreg solver.

    quadratic : `regreg.identity_quadratic`
        A quadratic term added to objective function.

    """
    X = rr.astransform(X)
    n, p = X.output_shape[0], X.input_shape[0]
    if weights is None:
        lam = choose_lambda(X)
        weights = lam * np.ones((p, ))
    weight_dict = dict(zip(np.arange(p), 2 * weights))
    penalty = rr.mixed_lasso(list(np.arange(p)) + [rr.NONNEGATIVE],
                             lagrange=1.,
                             weights=weight_dict)

    loss = sqlasso_objective_skinny(X, Y)
    problem = rr.simple_problem(loss, penalty)
    problem.coefs[-1] = np.linalg.norm(Y)
    if initial is not None:
        problem.coefs[:-1] = initial

    if quadratic is not None:
        collapsed = quadratic.collapsed()
        new_linear_term = np.zeros(p + 1)
        new_linear_term[:p] = collapsed.linear_term
        new_quadratic = rr.identity_quadratic(collapsed.coef, 0.,
                                              new_linear_term,
                                              collapsed.constant_term)
    else:
        new_quadratic = None

    soln = problem.solve(new_quadratic, **solve_args)
    _loss = sqlasso_objective(X, Y)
    return soln[:-1], _loss
Beispiel #56
0
    def fit(self, 
            solve_args={'tol':1.e-12, 'min_its':50}, 
            perturb=None):
        """
        Fit the randomized lasso using `regreg`.

        Parameters
        ----------

        solve_args : keyword args
             Passed to `regreg.problems.simple_problem.solve`.

        Returns
        -------

        signs : np.float
             Support and non-zero signs of randomized lasso solution.
             
        """

        p = self.nfeature

        # take a new perturbation if supplied
        if perturb is not None:
            self._initial_omega = perturb
        if self._initial_omega is None:
            self._initial_omega = self.randomizer.sample()

        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0)
        quad_data = rr.identity_quadratic(0, 0, -self.X.T.dot(self.y), 0)
        problem = rr.simple_problem(self.loss, self.penalty)
        self.initial_soln = problem.solve(quad + quad_data, **solve_args)

        active_signs = np.sign(self.initial_soln)
        active = self._active = active_signs != 0

        self._lagrange = self.penalty.weights
        unpenalized = self._lagrange == 0

        active *= ~unpenalized

        self._overall = overall = (active + unpenalized) > 0
        self._inactive = inactive = ~self._overall
        self._unpenalized = unpenalized

        _active_signs = active_signs.copy()
        _active_signs[unpenalized] = np.nan # don't release sign of unpenalized variables
        self.selection_variable = {'sign':_active_signs,
                                   'variables':self._overall}

        # initial state for opt variables

        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + 
                            quad_data.objective(self.initial_soln, 'grad') +
                            quad.objective(self.initial_soln, 'grad')) 
        self.initial_subgrad = initial_subgrad

        initial_scalings = np.fabs(self.initial_soln[active])
        initial_unpenalized = self.initial_soln[self._unpenalized]

        self.observed_opt_state = np.concatenate([initial_scalings,
                                                  initial_unpenalized])

        E = overall
        Q_E = self.Q[E][:,E]
        _beta_unpenalized = np.linalg.inv(Q_E).dot(self.X[:,E].T.dot(self.y))
        beta_bar = np.zeros(p)
        beta_bar[overall] = _beta_unpenalized
        self._beta_full = beta_bar

        # observed state for score in internal coordinates

        self.observed_internal_state = np.hstack([_beta_unpenalized,
                                                  -self.loss.smooth_objective(beta_bar, 'grad')[inactive] + 
                                                  quad_data.objective(beta_bar, 'grad')[inactive]])

        # form linear part

        self.num_opt_var = self.observed_opt_state.shape[0]

        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
        # E for active
        # U for unpenalized
        # -E for inactive

        _opt_linear_term = np.zeros((p, self.num_opt_var))
        _score_linear_term = np.zeros((p, self.num_opt_var))

        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator

        X, y = self.X, self.y
        _hessian_active = self.Q[:, active]
        _hessian_unpen = self.Q[:, unpenalized]

        _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])

        # set the observed score (data dependent) state

        self.observed_score_state = _score_linear_term.dot(_beta_unpenalized)
        self.observed_score_state[inactive] += (self.loss.smooth_objective(beta_bar, 'grad')[inactive] + 
                                                quad_data.objective(beta_bar, 'grad')[inactive])

        def signed_basis_vector(p, j, s):
            v = np.zeros(p)
            v[j] = s
            return v

        active_directions = np.array([signed_basis_vector(p, j, active_signs[j]) for j in np.nonzero(active)[0]]).T

        scaling_slice = slice(0, active.sum())
        if np.sum(active) == 0:
            _opt_hessian = 0
        else:
            _opt_hessian = _hessian_active * active_signs[None, active] + self.ridge_term * active_directions
        _opt_linear_term[:, scaling_slice] = _opt_hessian

        # beta_U piece

        unpenalized_slice = slice(active.sum(), self.num_opt_var)
        unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
        if unpenalized.sum():
            _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
                                                      + self.ridge_term * unpenalized_directions) 

        # two transforms that encode score and optimization
        # variable roles 

        self.opt_transform = (_opt_linear_term, self.initial_subgrad)
        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))

        # now store everything needed for the projections
        # the projection acts only on the optimization
        # variables

        self._setup = True
        self.scaling_slice = scaling_slice
        self.unpenalized_slice = unpenalized_slice
        self.ndim = self.loss.shape[0]

        # compute implied mean and covariance

        opt_linear, opt_offset = self.opt_transform

        A_scaling = -np.identity(self.num_opt_var)
        b_scaling = np.zeros(self.num_opt_var)

        self._setup_sampler(A_scaling,
                            b_scaling,
                            opt_linear,
                            opt_offset)
        
        return active_signs
Beispiel #57
0
    def setup_sampler(self,
                      scaling=1.,
                      solve_args={
                          'min_its': 50,
                          'tol': 1.e-10
                      },
                      B=2000):

        M_estimator.setup_sampler(self, scaling=scaling, solve_args=solve_args)

        # now we need to estimate covariance of
        # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)

        m, n, p = self.subsample_size, self.total_size, self.loss.shape[
            0]  # shorthand

        from .glm import pairs_bootstrap_score  # need to correct these imports!!!

        bootstrap_score = pairs_bootstrap_score(
            self.loss,
            self._overall,
            beta_active=self._beta_full[self._overall],
            solve_args=solve_args)

        # find unpenalized MLE on subsample

        newq, oldq = rr.identity_quadratic(0, 0, 0,
                                           0), self.randomized_loss.quadratic
        self.randomized_loss.quadratic = newq
        beta_active_subsample = restricted_Mest(self.randomized_loss,
                                                self._overall)

        bootstrap_score_split = pairs_bootstrap_score(
            self.loss,
            self._overall,
            beta_active=beta_active_subsample,
            solve_args=solve_args)
        self.randomized_loss.quadratic = oldq

        inv_frac = n / m

        def subsample_diff(m, n, indices):
            subsample = np.random.choice(indices, size=m, replace=False)
            full_score = bootstrap_score(indices)  # a sum of n terms
            randomized_score = bootstrap_score_split(
                subsample)  # a sum of m terms
            return full_score - randomized_score * inv_frac

        first_moment = np.zeros(p)
        second_moment = np.zeros((p, p))

        _n = np.arange(n)
        for _ in range(B):
            indices = np.random.choice(_n, size=n, replace=True)
            randomized_score = subsample_diff(m, n, indices)
            first_moment += randomized_score
            second_moment += np.multiply.outer(randomized_score,
                                               randomized_score)

        first_moment /= B
        second_moment /= B

        cov = second_moment - np.multiply.outer(first_moment, first_moment)

        self.randomization.set_covariance(cov)
Beispiel #58
0
def test_solve_QP():
    """
    Check the R coordinate descent LASSO solver
    """

    n, p = 100, 50
    lam = 0.08

    X = np.random.standard_normal((n, p))

    loss = rr.squared_error(X, np.zeros(n), coef=1. / n)
    pen = rr.l1norm(p, lagrange=lam)
    E = np.zeros(p)
    E[2] = 1
    Q = rr.identity_quadratic(0, 0, E, 0)
    problem = rr.simple_problem(loss, pen)
    soln = problem.solve(Q, min_its=500, tol=1.e-12)

    numpy2ri.activate()

    rpy.r.assign('X', X)
    rpy.r.assign('E', E)
    rpy.r.assign('lam', lam)

    R_code = """

    library(selectiveInference)
    p = ncol(X)
    n = nrow(X)
    soln_R = rep(0, p)
    grad = 1. * E
    ever_active = as.integer(c(1, rep(0, p-1)))
    nactive = as.integer(1)
    kkt_tol = 1.e-12
    objective_tol = 1.e-16
    parameter_tol = 1.e-10
    maxiter = 500
    soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, 
                                           lam, 
                                           maxiter, 
                                           soln_R, 
                                           E,
                                           grad, 
                                           ever_active, 
                                           nactive, 
                                           kkt_tol, 
                                           objective_tol, 
                                           parameter_tol,
                                           p,
                                           TRUE,
                                           TRUE,
                                           TRUE)$soln

    # test wide solver
    Xtheta = rep(0, n)
    nactive = as.integer(1)
    ever_active = as.integer(c(1, rep(0, p-1)))
    soln_R_wide = rep(0, p)
    grad = 1. * E
    soln_R_wide = selectiveInference:::solve_QP_wide(X, 
                                                     rep(lam, p), 
                                                     0,
                                                     maxiter, 
                                                     soln_R_wide, 
                                                     E,
                                                     grad, 
                                                     Xtheta,
                                                     ever_active, 
                                                     nactive, 
                                                     kkt_tol, 
                                                     objective_tol, 
                                                     parameter_tol,
                                                     p,
                                                     TRUE,
                                                     TRUE,
                                                     TRUE)$soln

    """

    rpy.r(R_code)

    soln_R = np.asarray(rpy.r('soln_R'))
    soln_R_wide = np.asarray(rpy.r('soln_R_wide'))
    numpy2ri.deactivate()

    tol = 1.e-5
    print(soln - soln_R)
    print(soln_R - soln_R_wide)

    G = X.T.dot(X).dot(soln) / n + E

    yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
    yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver'
    yield np.testing.assert_allclose, G[soln != 0], -np.sign(
        soln[soln != 0]
    ) * lam, tol, tol, False, 'checking active coordinate KKT for QP solver'
    yield nt.assert_true, np.fabs(
        G).max() < lam * (1. + 1.e-6), 'testing linfinity norm'
def test_lasso(s=5, n=200, p=20):

    X, y, _, nonzero, sigma = instance(n=n,
                                       p=p,
                                       random_signs=True,
                                       s=s,
                                       sigma=1.,
                                       rho=0,
                                       snr=10)
    print 'sigma', sigma
    lam_frac = 1.

    randomization = laplace(loc=0, scale=1.)
    loss = randomized.gaussian_Xfixed(X, y)

    random_Z = randomization.rvs(p)
    epsilon = 1.
    lam = sigma * lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0))

    random_Z = randomization.rvs(p)
    penalty = randomized.selective_l1norm_lan(p, lagrange=lam)

    #sampler1 = randomized.selective_sampler_MH_lan(loss,
    #                                           random_Z,
    #                                           epsilon,
    #                                           randomization,
    #                                          penalty)

    #loss_args = {'mean': np.zeros(n),
    #             'sigma': sigma,
    #             'linear_part':np.identity(y.shape[0]),
    #             'value': 0}

    #sampler1.setup_sampling(y, loss_args=loss_args)
    # data, opt_vars = sampler1.state

    # initial solution
    # rr.smooth_atom instead of loss?
    problem = rr.simple_problem(loss, penalty)
    random_term = rr.identity_quadratic(epsilon, 0, -random_Z, 0)
    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)

    active = (initial_soln != 0)
    inactive = ~active
    initial_grad = -np.dot(X.T, y - np.dot(X, initial_soln))
    betaE = initial_soln[active]
    signs = np.sign(betaE)
    subgradient = random_Z - initial_grad - epsilon * initial_soln
    cube = np.divide(subgradient[inactive], lam)
    #print betaE, cube
    #initial_grad = loss.smooth_objective(initial_soln,  mode='grad')
    #print penalty.setup_sampling(initial_grad,
    #                                     initial_soln,
    #                                     random_Z,
    #                                     epsilon)

    data0 = y.copy()
    #active = penalty.active_set

    if (np.sum(active) == 0):
        print 'here'
        return [-1], [-1]

    nalpha = n
    nactive = betaE.shape[0]
    ninactive = cube.shape[0]

    alpha = np.ones(n)
    beta_bar = np.linalg.lstsq(X[:, active], y)[0]
    obs_residuals = y - np.dot(X[:, active], beta_bar)

    #obs_residuals -= np.mean(obs_residuals)
    #betaE, cube = opt_vars

    init_vec_state = np.zeros(n + nactive + ninactive)
    init_vec_state[:n] = alpha
    init_vec_state[n:(n + nactive)] = betaE
    init_vec_state[(n + nactive):] = cube

    def full_projection(vec_state,
                        signs=signs,
                        nalpha=nalpha,
                        nactive=nactive,
                        ninactive=ninactive):

        alpha = vec_state[:nalpha].copy()
        betaE = vec_state[nalpha:(nalpha + nactive)]
        cube = vec_state[(nalpha + nactive):]

        #signs = penalty.signs
        projected_alpha = alpha.copy()
        projected_betaE = betaE.copy()
        projected_cube = np.zeros_like(cube)

        projected_alpha = np.clip(alpha, 0, np.inf)

        for i in range(nactive):
            if (projected_betaE[i] * signs[i] < 0):
                projected_betaE[i] = 0

        projected_cube = np.clip(cube, -1, 1)

        return np.concatenate(
            (projected_alpha, projected_betaE, projected_cube), 0)

    null, alt = pval(init_vec_state, full_projection, X, y, obs_residuals,
                     signs, lam, epsilon, nonzero, active)

    return null, alt
def test_lasso(s=1, n=100, p=10):

    X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0)
    print 'sigma', sigma
    lam_frac = 1.

    randomization = laplace(loc=0, scale=1.)
    loss = randomized.gaussian_Xfixed(X, y)

    random_Z = randomization.rvs(p)
    epsilon = 1.
    lam = sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0))

    random_Z = randomization.rvs(p)
    penalty = randomized.selective_l1norm_lan(p, lagrange=lam)

    #sampler1 = randomized.selective_sampler_MH_lan(loss,
    #                                           random_Z,
    #                                           epsilon,
    #                                           randomization,
    #                                          penalty)

    #loss_args = {'mean': np.zeros(n),
    #             'sigma': sigma,
    #             'linear_part':np.identity(y.shape[0]),
    #             'value': 0}

    #sampler1.setup_sampling(y, loss_args=loss_args)
    # data, opt_vars = sampler1.state

    # initial solution
    problem = rr.simple_problem(loss, penalty)
    random_term = rr.identity_quadratic(epsilon, 0, random_Z, 0)
    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)
    initial_grad = loss.smooth_objective(initial_soln,  mode='grad')
    betaE, cube = penalty.setup_sampling(initial_grad,
                                         initial_soln,
                                         random_Z,
                                         epsilon)

    data = y.copy()
    active = penalty.active_set
    if (np.sum(active)==0):
        print 'here'
        return [-1], [-1]
    inactive = ~active

    #betaE, cube = opt_vars
    ndata = data.shape[0];  nactive = betaE.shape[0];  ninactive = cube.shape[0]
    init_vec_state = np.zeros(ndata+nactive+ninactive)
    init_vec_state[:ndata] = data
    init_vec_state[ndata:(ndata+nactive)] = betaE
    init_vec_state[(ndata+nactive):] = cube

    def bootstrap_samples(y, P, R):
        nsample = 50
        boot_samples = []
        for _ in range(nsample):
            indices = np.random.choice(n, size=(n,), replace=True)
            y_star = y[indices]
            boot_samples.append(np.dot(P,y)+np.dot(R,y_star-y))

        return boot_samples

   #boot_samples = bootstrap_samples(y)


    def move_data(vec_state, boot_samples,
                   ndata = ndata, nactive = nactive, ninactive = ninactive, loss=loss):

        weights = []

        betaE = vec_state[ndata:(ndata+nactive)]
        cube = vec_state[(ndata+nactive):]
        opt_vars = [betaE, cube]
        params, _, opt_vec = penalty.form_optimization_vector(opt_vars)  # opt_vec=\epsilon(\beta 0)+u, u=\grad P(\beta), P penalty

        for i in range(len(boot_samples)):
            gradient = loss.gradient(boot_samples[i], params)
            weights.append(np.exp(-np.sum(np.abs(gradient + opt_vec))))
        weights /= np.sum(weights)

        #m = max(weights)
        #idx = [i for i, j in enumerate(weights) if j == m][0]
        idx = np.nonzero(np.random.multinomial(1, weights, size=1)[0])[0][0]
        return boot_samples[idx]


    def full_projection(vec_state, penalty=penalty,
                        ndata=ndata, nactive=nactive, ninactive = ninactive):
        data = vec_state[:ndata].copy()
        betaE = vec_state[ndata:(ndata+nactive)]
        cube = vec_state[(ndata+nactive):]

        signs = penalty.signs
        projected_betaE = betaE.copy()
        projected_cube = np.zeros_like(cube)

        for i in range(nactive):
            if (projected_betaE[i] * signs[i] < 0):
                projected_betaE[i] = 0

        projected_cube = np.clip(cube, -1, 1)

        return np.concatenate((data, projected_betaE, projected_cube), 0)



    def full_gradient(vec_state, loss=loss, penalty =penalty, X=X,
                      lam=lam, epsilon=epsilon, ndata=ndata, active=active, inactive=inactive):
        nactive = np.sum(active); ninactive=np.sum(inactive)

        data = vec_state[:ndata]
        betaE = vec_state[ndata:(ndata + nactive)]
        cube = vec_state[(ndata + nactive):]

        opt_vars = [betaE, cube]
        params , _ , opt_vec = penalty.form_optimization_vector(opt_vars) # opt_vec=\epsilon(\beta 0)+u, u=\grad P(\beta), P penalty

        gradient = loss.gradient(data, params)
        hessian = loss.hessian()

        ndata = data.shape[0]
        nactive = betaE.shape[0]
        ninactive = cube.shape[0]

        sign_vec = - np.sign(gradient + opt_vec)  # sign(w), w=grad+\epsilon*beta+lambda*u

        B = hessian + epsilon * np.identity(nactive + ninactive)
        A = B[:, active]

        _gradient = np.zeros(ndata + nactive + ninactive)
        _gradient[:ndata] = 0 #- (data + np.dot(X, sign_vec))
        _gradient[ndata:(ndata + nactive)] = np.dot(A.T, sign_vec)
        _gradient[(ndata + nactive):] = lam * sign_vec[inactive]

        return _gradient


    null, alt = pval(init_vec_state, full_gradient, full_projection, move_data, bootstrap_samples,
                      X, y, nonzero, active)

    return null, alt