Esempio n. 1
0
def test_lasso_dual():

    """
    Check that the solution of the lasso signal approximator dual composite is soft-thresholding
    """

    l1 = .1
    sparsity = R.l1norm(10, lagrange=l1)
    x = np.arange(10) - 5
    loss = R.quadratic.shift(-x, coef=0.5)

    pen = R.simple_problem(loss, sparsity)
    solver = R.FISTA(pen)
    pen.lipschitz = 1
    solver.fit(backtrack=False)
    soln = solver.composite.coefs
    st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) 

    np.testing.assert_almost_equal(soln,st, decimal=3)

    pen = R.simple_problem(loss, sparsity)
    solver = R.FISTA(pen)
    solver.fit(monotonicity_restart=False)
    soln = solver.composite.coefs
    st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) 

    np.testing.assert_almost_equal(soln,st, decimal=3)


    pen = R.container(loss, sparsity)
    solver = R.FISTA(pen)
    solver.fit()
    soln = solver.composite.coefs

    np.testing.assert_almost_equal(soln,st, decimal=3)
Esempio n. 2
0
    def test_simple_problem(self):
        tests = []
        atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L
        loss = self.loss

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, FISTA=self.FISTA, coef_stop=self.coef_stop, min_its=100)

        tests.append((atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem with monotonicity\n %s' % str(self)))

        # write the loss in terms of a quadratic for the smooth loss and a smooth function...

        q = rr.identity_quadratic(L, prox_center, 0, 0)
        lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6*L)
        lossq.quadratic = rr.identity_quadratic(0.4*L, prox_center.copy(), 0, 0)
        problem = rr.simple_problem(lossq, atom)

        tests.append((atom.proximal(q), 
              problem.solve(coef_stop=self.coef_stop, 
                            FISTA=self.FISTA, 
                            tol=1.0e-12), 
               'solving prox with simple_problem ' +
               'with monotonicity  but loss has identity_quadratic %s\n ' % str(self)))

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, monotonicity_restart=False,
                   coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100)

        tests.append((atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem no monotonicity_restart\n %s' % str(self)))

        d = atom.conjugate
        problem = rr.simple_problem(loss, d)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12, monotonicity_restart=False, 
                   coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100)
        tests.append((d.proximal(q), problem.solve(tol=1.e-12,
                                                FISTA=self.FISTA,
                                                coef_stop=self.coef_stop,
                                                monotonicity_restart=False), 
               'solving dual prox with simple_problem no monotonocity\n %s ' % str(self)))

        if not self.interactive:
            for test in tests:
                yield (all_close,) + test + (self,)
        else:
            for test in tests:
                yield all_close(*((test + (self,))))
def test_using_SLOPE_weights():

    n, p = 500, 50

    X = np.random.standard_normal((n, p))
    #Y = np.random.standard_normal(n)
    X -= X.mean(0)[None, :]
    X /= (X.std(0)[None, :] * np.sqrt(n))
    beta = np.zeros(p)
    beta[:5] = 5.

    Y = X.dot(beta) + np.random.standard_normal(n)

    output_R = fit_slope_R(X, Y, W = None, normalize = True, choice_weights = "bhq")
    r_beta = output_R[0]
    r_lambda_seq = output_R[2]

    W = r_lambda_seq
    pen = slope(W, lagrange=1.)

    loss = rr.squared_error(X, Y)
    problem = rr.simple_problem(loss, pen)
    soln = problem.solve(tol=1.e-14, min_its=500)

    # we get a better objective value
    nt.assert_true(problem.objective(soln) < problem.objective(np.asarray(r_beta)))
    nt.assert_true(np.linalg.norm(soln - r_beta) < 1.e-6 * np.linalg.norm(soln))
Esempio n. 4
0
    def __init__(self, loss, 
                 linear_randomization,
                 quadratic_coef,
                 randomization, 
                 penalty,
                 solve_args={'tol':1.e-10, 'min_its':100, 'max_its':500}):

        (self.loss,
         self.linear_randomization,
         self.randomization,
         self.quadratic_coef) = (loss,
                                 linear_randomization,
                                 randomization,
                                 quadratic_coef)

        # initialize optimization problem

        self.penalty = penalty
        self.problem = rr.simple_problem(loss, penalty)

        random_term = rr.identity_quadratic(
                                quadratic_coef, 0, 
                                self.linear_randomization, 0)

        self.initial_soln = self.problem.solve(random_term,
                                               **solve_args)
        self.initial_grad = self.loss.smooth_objective(self.initial_soln, 
                                                       mode='grad')
        self.opt_vars = self.penalty.setup_sampling( \
            self.initial_grad,
            self.initial_soln,
            self.linear_randomization,
            self.quadratic_coef)
Esempio n. 5
0
def test_changepoint_scaled():

    p = 150
    M = multiscale(p)
    M.minsize = 10
    X = ra.adjoint(M)

    Y = np.random.standard_normal(p)
    Y[20:50] += 8
    Y += 2
    meanY = Y.mean()

    lammax = np.fabs(np.sqrt(M.sizes) * X.adjoint_map(Y) / (1 + np.sqrt(np.log(M.sizes)))).max()

    penalty = rr.weighted_l1norm((1 + np.sqrt(np.log(M.sizes))) / np.sqrt(M.sizes), lagrange=0.5*lammax)
    loss = rr.squared_error(X, Y - meanY)
    problem = rr.simple_problem(loss, penalty)
    soln = problem.solve()
    Yhat = X.linear_map(soln)
    Yhat += meanY

    if INTERACTIVE:
        plt.scatter(np.arange(p), Y)
        plt.plot(np.arange(p), Yhat)
        plt.show()
Esempio n. 6
0
def test_nesta_lasso():

    n, p = 1000, 20
    X = np.random.standard_normal((n, p))
    beta = np.zeros(p)
    beta[:4] = 30
    Y = np.random.standard_normal(n) + np.dot(X, beta)

    loss = rr.squared_error(X,Y)
    penalty = rr.l1norm(p, lagrange=2.)

    # using nesta
    z = rr.zero(p)
    primal, dual = rr.nesta(loss, z, penalty, tol=1.e-10,
                            epsilon=2.**(-np.arange(30)),
                            initial_dual=np.zeros(p))

    # using simple problem

    problem = rr.simple_problem(loss, penalty)
    problem.solve()
    nt.assert_true(np.linalg.norm(primal - problem.coefs) / np.linalg.norm(problem.coefs) < 1.e-3)

    # test None as smooth_atom

    rr.nesta(None, z, penalty, tol=1.e-10,
             epsilon=2.**(-np.arange(30)),
             initial_dual=np.zeros(p))

    # using coefficients to stop

    rr.nesta(loss, z, penalty, tol=1.e-10,
             epsilon=2.**(-np.arange(30)),
             initial_dual=np.zeros(p),
             coef_stop=True)
Esempio n. 7
0
def test_simple():
    Z = np.random.standard_normal(100) * 4
    p = rr.l1norm(100, lagrange=0.13)
    L = 0.14

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, p)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10, debug=True)

    simple_coef = solver.composite.coefs
    prox_coef = p.proximal(rr.identity_quadratic(L, Z, 0, 0))

    p2 = rr.l1norm(100, lagrange=0.13)
    p2 = copy(p)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, debug=True)
    simple_nonsmooth_coef = solver.composite.coefs

    p = rr.l1norm(100, lagrange=0.13)
    p.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p)
    simple_nonsmooth_gengrad = gengrad(problem, L, tol=1.0e-10)

    p = rr.l1norm(100, lagrange=0.13)
    problem = rr.separable_problem.singleton(p, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10)
    separable_coef = solver.composite.coefs

    loss2 = rr.quadratic.shift(-Z, coef=0.6*L)
    loss2.quadratic = rr.identity_quadratic(0.4*L, Z, 0, 0)
    p.coefs *= 0
    problem2 = rr.simple_problem(loss2, p)
    loss2_coefs = problem2.solve(coef_stop=True)
    solver2 = rr.FISTA(problem2)
    solver2.fit(tol=1.0e-10, debug=True, coef_stop=True)

    yield ac, prox_coef, simple_nonsmooth_gengrad, 'prox to nonsmooth gengrad'
    yield ac, prox_coef, separable_coef, 'prox to separable'
    yield ac, prox_coef, simple_nonsmooth_coef, 'prox to simple_nonsmooth'
    yield ac, prox_coef, simple_coef, 'prox to simple'
    yield ac, prox_coef, loss2_coefs, 'simple where loss has quadratic 1'
    yield ac, prox_coef, solver2.composite.coefs, 'simple where loss has quadratic 2'
Esempio n. 8
0
def test_path_group_lasso():
    """
    this test looks at the paths of three different parameterizations
    of the same problem

    """
    n = 100
    X = np.random.standard_normal((n, 10))
    U = np.random.standard_normal((n, 2))
    Y = np.random.standard_normal(100)
    betaX = np.array([3, 4, 5, 0, 0] + [0] * 5)
    betaU = np.array([10, -5])
    Y += (np.dot(X, betaX) + np.dot(U, betaU)) * 5

    Xn = rr.normalize(
        np.hstack([np.ones((100, 1)), X]), inplace=True, center=True, scale=True, intercept_column=0
    ).normalized_array()
    lasso = rr.lasso.squared_error(Xn[:, 1:], Y, penalty_structure=[0] * 7 + [1] * 3, nstep=10)

    sol = lasso.main(inner_tol=1.0e-12, verbose=True)
    beta = np.array(sol["beta"].todense())

    sols = []
    sols_sep = []
    for l in sol["lagrange"]:
        loss = rr.squared_error(Xn, Y, coef=1.0 / n)
        penalty = rr.mixed_lasso([rr.UNPENALIZED] + [0] * 7 + [1] * 3, lagrange=l)  # matrix contains an intercept...
        problem = rr.simple_problem(loss, penalty)
        sols.append(problem.solve(tol=1.0e-12).copy())

        sep = rr.separable(
            (11,),
            [rr.l2norm((7,), np.sqrt(7) * l), rr.l2norm((3,), np.sqrt(3) * l)],
            [np.arange(1, 8), np.arange(8, 11)],
        )
        sep_problem = rr.simple_problem(loss, sep)
        sols_sep.append(sep_problem.solve(tol=1.0e-12).copy())

    sols = np.array(sols).T
    sols_sep = np.array(sols_sep).T

    nt.assert_true(np.linalg.norm(beta - sols) / (1 + np.linalg.norm(beta)) <= 1.0e-4)
    nt.assert_true(np.linalg.norm(beta - sols_sep) / (1 + np.linalg.norm(beta)) <= 1.0e-4)
Esempio n. 9
0
def solve_sqrt_lasso_skinny(X, Y, weights=None, initial=None, quadratic=None, solve_args={}):
    """

    Solve the square-root LASSO optimization problem:

    $$
    \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|,
    $$
    where $D$ is the diagonal matrix with weights on its diagonal.

    Parameters
    ----------

    y : np.float((n,))
        The target, in the model $y = X\beta$

    X : np.float((n, p))
        The data, in the model $y = X\beta$

    weights : np.float
        Coefficients of the L-1 penalty in
        optimization problem, note that different
        coordinates can have different coefficients.

    initial : np.float(p)
        Initial point for optimization.

    solve_args : dict
        Arguments passed to regreg solver.

    quadratic : `regreg.identity_quadratic`
        A quadratic term added to objective function.

    """
    n, p = X.shape
    if weights is None:
        lam = choose_lambda(X)
        weights = lam * np.ones((p,))
    weight_dict = dict(zip(np.arange(p),
                           2 * weights))
    penalty = rr.mixed_lasso(range(p) + [rr.NONNEGATIVE], lagrange=1.,
                             weights=weight_dict)

    loss = sqlasso_objective_skinny(X, Y)
    problem = rr.simple_problem(loss, penalty)
    problem.coefs[-1] = np.linalg.norm(Y)
    if initial is not None:
        problem.coefs[:-1] = initial
    soln = problem.solve(quadratic, **solve_args)
    _loss = sqlasso_objective(X, Y)
    return soln[:-1], _loss
Esempio n. 10
0
def test_admm(n=100, p=10):

    X = np.random.standard_normal((n, p))
    Y = np.random.standard_normal(n)
    loss = rr.squared_error(X, Y)
    D = np.identity(p)
    pen = rr.l1norm(p, lagrange=1.5)

    ADMM = admm_problem(loss, pen, ra.astransform(D), 0.5)
    ADMM.solve(niter=1000)

    coef1 = ADMM.atom_coefs
    problem2 = rr.simple_problem(loss, pen)
    coef2 = problem2.solve(tol=1.e-12, min_its=500)

    np.testing.assert_allclose(coef1, coef2, rtol=1.e-3, atol=1.e-4)
Esempio n. 11
0
def test_class():
    """
    runs several class methods on generic instance
    """

    n, p = 100, 20
    X = np.random.standard_normal((n, p))
    Y = np.random.standard_normal(n)
    loss = rr.squared_error(X, Y)
    pen = rr.l1norm(p, lagrange=1.0)
    problem = rr.simple_problem(loss, pen)

    problem.latexify()

    for debug, coef_stop, max_its in product([True, False], [True, False], [5, 100]):
        rr.gengrad(problem, rr.power_L(X) ** 2, max_its=max_its, debug=debug, coef_stop=coef_stop)
Esempio n. 12
0
def solve_sqrt_lasso_fat(X, Y, weights=None, initial=None, quadratic=None, solve_args={}):
    """

    Solve the square-root LASSO optimization problem:

    $$
    \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|,
    $$
    where $D$ is the diagonal matrix with weights on its diagonal.

    Parameters
    ----------

    y : np.float((n,))
        The target, in the model $y = X\beta$

    X : np.float((n, p))
        The data, in the model $y = X\beta$

    weights : np.float
        Coefficients of the L-1 penalty in
        optimization problem, note that different
        coordinates can have different coefficients.

    initial : np.float(p)
        Initial point for optimization.

    solve_args : dict
        Arguments passed to regreg solver.

    quadratic : `regreg.identity_quadratic`
        A quadratic term added to objective function.

    """
    X = rr.astransform(X)
    n, p = X.output_shape[0], X.input_shape[0]
    if weights is None:
        lam = choose_lambda(X)
        weights = lam * np.ones((p,))

    loss = sqlasso_objective(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=1.)
    problem = rr.simple_problem(loss, penalty)
    if initial is not None:
        problem.coefs[:] = initial
    soln = problem.solve(quadratic, **solve_args)
    return soln, loss
Esempio n. 13
0
def test_lasso_dual_with_monotonicity():

    """
    restarting is funny for this simple problem
    """

    l1 = .1
    sparsity = R.l1norm(10, lagrange=l1)
    x = np.arange(10) - 5
    loss = R.quadratic.shift(-x, coef=0.5)


    pen = R.simple_problem(loss, sparsity)
    solver = R.FISTA(pen)
    solver.fit()
    soln = solver.composite.coefs
    st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) 

    np.testing.assert_almost_equal(soln,st, decimal=3)
Esempio n. 14
0
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.):

    """
    Check equivalent LASSO and sqrtLASSO solutions.
    """

    Y = np.random.standard_normal(n) * sigma
    beta = np.zeros(p)
    beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1)
    X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    Y += np.dot(X, beta) * sigma
    lam_theor = choose_lambda(X, quantile=0.9)

    weights = lam_theor*np.ones(p)
    weights[:3] = 0.
    soln1, loss1 = solve_sqrt_lasso(X, Y, weights=weights, quadratic=None, solve_args={'min_its':500, 'tol':1.e-10})

    G1 = loss1.smooth_objective(soln1, 'grad') 

    # find active set, and estimate of sigma                                                                                                                          

    active = (soln1 != 0)
    nactive = active.sum()
    subgrad = np.sign(soln1[active]) * weights[active]
    X_E = X[:,active]
    X_Ei = np.linalg.pinv(X_E)
    sigma_E= np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)

    multiplier = sigma_E * np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))

    # XXX how should quadratic be changed?                                                                                                                            
    # multiply everything by sigma_E?                                                                                                                                 

    loss2 = rr.glm.gaussian(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=multiplier)
    problem = rr.simple_problem(loss2, penalty)

    soln2 = problem.solve(tol=1.e-12, min_its=200)
    G2 = loss2.smooth_objective(soln2, 'grad') / multiplier

    np.testing.assert_allclose(G1[3:], G2[3:])
    np.testing.assert_allclose(soln1, soln2)
Esempio n. 15
0
def test_choose_parameter(delta=2, p=60):

    signal = np.zeros(p)
    signal[(p//2):] += delta
    Z = np.random.standard_normal(p) + signal
    p = Z.shape[0]
    M = multiscale(p)
    M.scaling = np.sqrt(M.sizes)
    lam = choose_tuning_parameter(M)
    weights = (lam + np.sqrt(2 * np.log(p / M.sizes))) / np.sqrt(p)

    Z0 = Z - Z.mean()
    loss = rr.squared_error(ra.adjoint(M), Z0)
    penalty = rr.weighted_l1norm(weights, lagrange=1.)
    problem = rr.simple_problem(loss, penalty)
    coef = problem.solve()
    active = coef != 0

    if active.sum():
        X = M.form_matrix(M.slices[active])[0]
Esempio n. 16
0
    def fit(self, **solve_args):
        """
        Fit the lasso using `regreg`.
        This sets the attributes `soln`, `onestep` and
        forms the constraints necessary for post-selection inference
        by calling `form_constraints()`.

        Parameters
        ----------

        solve_args : keyword args
             Passed to `regreg.problems.simple_problem.solve`.

        Returns
        -------

        soln : np.float
             Solution to lasso.
             
        """

        penalty = weighted_l1norm(self.feature_weights, lagrange=1.)
        problem = simple_problem(self.loglike, penalty)
        _soln = problem.solve(**solve_args)
        self._soln = _soln
        if not np.all(_soln == 0):
            self.active = np.nonzero(_soln != 0)[0]
            self.active_signs = np.sign(_soln[self.active])
            self._active_soln = _soln[self.active]
            H = self.loglike.hessian(self._soln)[self.active][:,self.active]
            Hinv = np.linalg.inv(H)
            G = self.loglike.gradient(self._soln)[self.active]
            delta = Hinv.dot(G)
            self._onestep = self._active_soln - delta
            self.active_penalized = self.feature_weights[self.active] != 0
            self._constraints = constraints(-np.diag(self.active_signs)[self.active_penalized],
                                             (self.active_signs * delta)[self.active_penalized],
                                             covariance=Hinv)
        else:
            self.active = []
        return self._soln
Esempio n. 17
0
    def _solve_randomized_problem(self, 
                                  perturb=None, 
                                  solve_args={'tol': 1.e-12, 'min_its': 50}):

        # take a new perturbation if supplied
        if perturb is not None:
            self._initial_omega = perturb
        if self._initial_omega is None:
            self._initial_omega = self.randomizer.sample()

        quad = rr.identity_quadratic(self.ridge_term, 
                                     0, 
                                     -self._initial_omega, 
                                     0)

        problem = rr.simple_problem(self.loglike, self.penalty)

        initial_soln = problem.solve(quad, **solve_args) 
        initial_subgrad = -(self.loglike.smooth_objective(initial_soln, 
                                                          'grad') +
                            quad.objective(initial_soln, 'grad'))

        return initial_soln, initial_subgrad
Esempio n. 18
0
def test_changepoint():

    p = 150
    M = multiscale(p)
    M.minsize = 10
    X = ra.adjoint(M)

    Y = np.random.standard_normal(p)
    Y[20:50] += 8
    Y += 2
    meanY = Y.mean()

    lammax = np.fabs(X.adjoint_map(Y)).max()

    penalty = rr.l1norm(X.input_shape, lagrange=0.5*lammax)
    loss = rr.squared_error(X, Y - meanY)
    problem = rr.simple_problem(loss, penalty)
    soln = problem.solve()
    Yhat = X.linear_map(soln)
    Yhat += meanY

    plt.scatter(np.arange(p), Y)
    plt.plot(np.arange(p), Yhat)
Esempio n. 19
0
    def fit(self, solve_args={'min_its': 30, 'tol': 1.e-8, 'max_its': 300}):
        """
        Fit the lasso using `regreg`.
        This sets the attribute `soln` and
        forms the constraints necessary for post-selection inference
        by calling `form_constraints()`.

        Parameters
        ----------

        solve_args : {}
             Passed to `regreg.simple_problem.solve``_

        Returns
        -------

        soln : np.float
             Solution to lasso with `sklearn_alpha=self.lagrange`.
             
        """

        n, p = self.X.shape
        loss = self.form_loss(np.arange(p))
        penalty = self.form_penalty()
        problem = simple_problem(loss, penalty)
        soln = problem.solve(**solve_args)

        self._soln = soln
        if not np.all(soln == 0):
            self.active = np.nonzero(soln)[0]
            self.inactive = np.array(
                sorted(set(xrange(p)).difference(self.active)))
            loss_E = self.form_loss(self.active)
            self._beta_unpenalized = loss_E.solve(**solve_args)
            self.form_constraints()
        else:
            self.active = []
Esempio n. 20
0
def test_simple():
    Z = np.random.standard_normal((10, 10)) * 4
    p = rr.l1_l2((10, 10), lagrange=0.13)
    dual = p.conjugate
    L = 0.23

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, p)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10, debug=True)

    simple_coef = solver.composite.coefs
    q = rr.identity_quadratic(L, Z, 0, 0)
    prox_coef = p.proximal(q)

    p2 = copy(p)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, debug=True)
    simple_nonsmooth_coef = solver.composite.coefs

    p = rr.l1_l2((10, 10), lagrange=0.13)
    p.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p)
    simple_nonsmooth_gengrad = gengrad(problem, L, tol=1.0e-10)

    p = rr.l1_l2((10, 10), lagrange=0.13)
    problem = rr.separable_problem.singleton(p, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10)
    separable_coef = solver.composite.coefs

    ac(prox_coef, Z - simple_coef, 'prox to simple')
    ac(prox_coef, simple_nonsmooth_gengrad, 'prox to nonsmooth gengrad')
    ac(prox_coef, separable_coef, 'prox to separable')
    ac(prox_coef, simple_nonsmooth_coef, 'prox to simple_nonsmooth')
Esempio n. 21
0
def test_gengrad_blocknorms():
    Z = np.random.standard_normal((10, 10)) * 4
    p = rr.l1_l2((10, 10), lagrange=0.13)
    dual = p.conjugate
    L = 0.23

    loss = rr.quadratic_loss.shift(Z, coef=L)
    problem = rr.simple_problem(loss, p)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10, debug=True)
    simple_coef = solver.composite.coefs

    q = rr.identity_quadratic(L, Z, 0, 0)
    prox_coef = p.proximal(q)

    p2 = copy(p)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, debug=True)
    simple_nonsmooth_coef = solver.composite.coefs

    p = rr.l1_l2((10, 10), lagrange=0.13)
    p.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p)
    simple_nonsmooth_gengrad = rr.gengrad(problem, L, tol=1.0e-10)

    p = rr.l1_l2((10, 10), lagrange=0.13)
    problem = rr.separable_problem.singleton(p, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10)
    separable_coef = solver.composite.coefs

    yield (all_close, prox_coef, simple_coef, "prox to simple", None)
    yield (all_close, prox_coef, simple_nonsmooth_gengrad, "prox to nonsmooth gengrad", None)
    yield (all_close, prox_coef, separable_coef, "prox to separable", None)
    yield (all_close, prox_coef, simple_nonsmooth_coef, "prox to simple_nonsmooth", None)
Esempio n. 22
0
    def fit(self, X, y):
        """
        Fit a regularized regression estimator.

        Parameters
        ----------

        X : np.ndarray((n, p))
            Feature matrix.

        y : np.ndarray(n)
            Response vector.

        Returns
        -------

        self

        """

        self._loglike = loglike = self._loglike_factory(X, y)

        # with unpenalized parameters possible,
        # this may be best found by solving a problem with an atom with lagrange=np.inf
        # this could get expensive though

        null_grad = loglike.smooth_objective(np.zeros(loglike.shape), 'grad')
        atom_ = self._construct_atom(null_grad)
        if self.unpenalized:
            null_grad = self._fit_null_soln(loglike, atom_)
        atom_ = self._construct_atom(null_grad)
        problem = simple_problem(loglike, atom_)
        if self.initial is not None:
            problem.coefs[:] = self.initial
        self._coefs = problem.solve(**self.solve_args)

        return self
Esempio n. 23
0
def test_simple():
    Z = np.random.standard_normal((10,10)) * 4
    p = rr.l1_l2((10,10), lagrange=0.13)
    dual = p.conjugate
    L = 0.23

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, p)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10, debug=True)

    simple_coef = solver.composite.coefs
    q = rr.identity_quadratic(L, Z, 0, 0)
    prox_coef = p.proximal(q)

    p2 = copy(p)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, debug=True)
    simple_nonsmooth_coef = solver.composite.coefs

    p = rr.l1_l2((10,10), lagrange=0.13)
    p.quadratic = rr.identity_quadratic(L, Z, 0, 0)
    problem = rr.simple_problem.nonsmooth(p)
    simple_nonsmooth_gengrad = gengrad(problem, L, tol=1.0e-10)

    p = rr.l1_l2((10,10), lagrange=0.13)
    problem = rr.separable_problem.singleton(p, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-10)
    separable_coef = solver.composite.coefs

    ac(prox_coef, Z-simple_coef, 'prox to simple')
    ac(prox_coef, simple_nonsmooth_gengrad, 'prox to nonsmooth gengrad')
    ac(prox_coef, separable_coef, 'prox to separable')
    ac(prox_coef, simple_nonsmooth_coef, 'prox to simple_nonsmooth')
Esempio n. 24
0
    def fit(self, tol=1.e-12, min_its=50, **solve_args):

        lasso.fit(self, tol=tol, min_its=min_its, **solve_args)

        n1 = self.loglike.get_data()[0].shape[0]
        n = self.loglike_full.get_data()[0].shape[0]

        _feature_weights = self.feature_weights.copy()
        _feature_weights[self.active] = 0.
        _feature_weights[self.inactive] = np.inf
        
        _unpenalized_problem = simple_problem(self.loglike_full, 
                                              weighted_l1norm(_feature_weights, lagrange=1.))
        _unpenalized = _unpenalized_problem.solve(**solve_args)
        _unpenalized_active = _unpenalized[self.active]

        s = len(self.active)
        H = self.loglike_full.hessian(_unpenalized)
        H_AA = H[self.active][:,self.active]

        _cov_block = np.linalg.inv(H_AA)
        _subsample_block = (n * 1. / n1) * _cov_block
        _carve_cov = np.zeros((2*s,2*s))
        _carve_cov[:s][:,:s] = _cov_block
        _carve_cov[s:][:,:s] = _subsample_block
        _carve_cov[:s][:,s:] = _subsample_block
        _carve_cov[s:][:,s:] = _subsample_block

        _carve_linear_part = self._constraints.linear_part.dot(np.identity(2*s)[s:])
        _carve_offset = self._constraints.offset
        self._carve_constraints = constraints(_carve_linear_part,
                                              _carve_offset,
                                              covariance=_carve_cov)
        self._carve_feasible = np.hstack([_unpenalized_active, self.onestep_estimator])
        self._unpenalized_active = _unpenalized_active
        self._carve_invcov = H_AA
Esempio n. 25
0
    def fit(self, tol=1.e-12, min_its=50, use_full=True, **solve_args):

        lasso.fit(self, tol=tol, min_its=min_its, **solve_args)

        _feature_weights = self.feature_weights.copy()
        _feature_weights[self.active] = 0.
        _feature_weights[self.inactive] = np.inf
        

        _unpenalized_problem = simple_problem(self.loglike_inference,
                                              weighted_l1norm(_feature_weights, lagrange=1.))
        _unpenalized = _unpenalized_problem.solve(**solve_args)
        self._unpenalized_active = _unpenalized[self.active]

        if use_full:
            H = self.loglike_full.hessian(_unpenalized)
            n_inference = self.loglike_inference.data[0].shape[0]
            n_full = self.loglike_full.data[0].shape[0]
            H *= (1. * n_inference / n_full)
        else:
            H = self.loglike_inference.hessian(_unpenalized)

        H_AA = H[self.active][:,self.active]
        self._cov_inference = np.linalg.inv(H_AA)
Esempio n. 26
0
def test_changepoint():

    p = 150
    M = multiscale(p)
    M.minsize = 10
    X = ra.adjoint(M)

    Y = np.random.standard_normal(p)
    Y[20:50] += 8
    Y += 2
    meanY = Y.mean()

    lammax = np.fabs(X.adjoint_map(Y)).max()

    penalty = rr.l1norm(X.input_shape, lagrange=0.5*lammax)
    loss = rr.squared_error(X, Y - meanY)
    problem = rr.simple_problem(loss, penalty)
    soln = problem.solve()
    Yhat = X.linear_map(soln)
    Yhat += meanY

    plt.scatter(np.arange(p), Y)
    plt.plot(np.arange(p), Yhat)
    plt.show()
Esempio n. 27
0
    def fit(self, solve_args={'min_its':30, 'tol':1.e-8, 'max_its':300}):
        """
        Fit the lasso using `regreg`.
        This sets the attribute `soln` and
        forms the constraints necessary for post-selection inference
        by calling `form_constraints()`.

        Parameters
        ----------

        solve_args : {}
             Passed to `regreg.simple_problem.solve``_

        Returns
        -------

        soln : np.float
             Solution to lasso with `sklearn_alpha=self.lagrange`.
             
        """

        n, p = self.X.shape
        loss = self.form_loss(np.arange(p))
        penalty = self.form_penalty()
        problem = simple_problem(loss, penalty)
        soln = problem.solve(**solve_args)

        self._soln = soln
        if not np.all(soln == 0):
            self.active = np.nonzero(soln)[0]
            self.inactive = np.array(sorted(set(xrange(p)).difference(self.active)))
            loss_E = self.form_loss(self.active)
            self._beta_unpenalized = loss_E.solve(**solve_args)
            self.form_constraints()
        else:
            self.active = []
def test_sqrt_highdim_lasso(n=500, 
                            p=200, 
                            signal_fac=1.5, 
                            s=5, 
                            sigma=3, 
                            full=True, 
                            rho=0.4, 
                            randomizer_scale=1., 
                            ndraw=5000, 
                            burnin=1000, 
                            ridge_term=None, compare_to_lasso=True):
    """
    Compare to R randomized lasso
    """

    inst, const = gaussian_instance, lasso.sqrt_lasso
    signal = np.sqrt(signal_fac * 2 * np.log(p))
    X, Y, beta = inst(n=n,
                      p=p, 
                      signal=signal, 
                      s=s, 
                      equicorrelated=False, 
                      rho=rho, 
                      sigma=sigma, 
                      random_signs=True)[:3]

    if ridge_term is None:
        mean_diag = np.mean((X**2).sum(0))
        ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))

    W = np.ones(X.shape[1]) * choose_lambda(X) * 0.7

    perturb = np.random.standard_normal(p) * randomizer_scale / np.sqrt(n)

    conv = const(X, 
                 Y, 
                 W, 
                 randomizer_scale=randomizer_scale / np.sqrt(n),
                 perturb=perturb,
                 ridge_term=ridge_term)
    
    signs = conv.fit()
    nonzero = signs != 0

    # sanity check

    if compare_to_lasso:
        q_term = rr.identity_quadratic(ridge_term, 0, -perturb, 0)

        soln2, sqrt_loss = solve_sqrt_lasso(X, Y, W, solve_args={'min_its':1000}, quadratic=q_term, force_fat=True)
        soln = conv.initial_soln

        denom = np.linalg.norm(Y - X.dot(soln))
        new_weights = W * denom
        loss = rr.glm.gaussian(X, Y)
        pen = rr.weighted_l1norm(new_weights, lagrange=1.)
        prob = rr.simple_problem(loss, pen)

        rescaledQ = rr.identity_quadratic(ridge_term * denom,
                                          0,
                                          -perturb * denom,
                                          0)

        soln3 = prob.solve(quadratic=rescaledQ, min_its=1000, tol=1.e-12)
        np.testing.assert_allclose(conv._initial_omega, perturb * denom)
        np.testing.assert_allclose(soln, soln2)
        np.testing.assert_allclose(soln, soln3)

    if full:
        (observed_target, 
         cov_target, 
         cov_target_score, 
         alternatives) = full_targets(conv.loglike, 
                                      conv._W, 
                                      nonzero)
    else:
        (observed_target, 
         cov_target, 
         cov_target_score, 
         alternatives) = selected_targets(conv.loglike, 
                                          conv._W, 
                                          nonzero)

    _, pval, intervals = conv.summary(observed_target, 
                                      cov_target, 
                                      cov_target_score, 
                                      alternatives,
                                      ndraw=ndraw,
                                      burnin=burnin, 
                                      compute_intervals=False)

    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
Esempio n. 29
0
def test_lasso(s=5, n=200, p=20):

    X, y, _, nonzero, sigma = instance(n=n,
                                       p=p,
                                       random_signs=True,
                                       s=s,
                                       sigma=1.,
                                       rho=0,
                                       snr=10)
    print 'sigma', sigma
    lam_frac = 1.

    randomization = laplace(loc=0, scale=1.)
    loss = randomized.gaussian_Xfixed(X, y)

    random_Z = randomization.rvs(p)
    epsilon = 1.
    lam = sigma * lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0))

    random_Z = randomization.rvs(p)
    penalty = randomized.selective_l1norm_lan(p, lagrange=lam)

    #sampler1 = randomized.selective_sampler_MH_lan(loss,
    #                                           random_Z,
    #                                           epsilon,
    #                                           randomization,
    #                                          penalty)

    #loss_args = {'mean': np.zeros(n),
    #             'sigma': sigma,
    #             'linear_part':np.identity(y.shape[0]),
    #             'value': 0}

    #sampler1.setup_sampling(y, loss_args=loss_args)
    # data, opt_vars = sampler1.state

    # initial solution
    # rr.smooth_atom instead of loss?
    problem = rr.simple_problem(loss, penalty)
    random_term = rr.identity_quadratic(epsilon, 0, -random_Z, 0)
    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)

    active = (initial_soln != 0)
    inactive = ~active
    initial_grad = -np.dot(X.T, y - np.dot(X, initial_soln))
    betaE = initial_soln[active]
    signs = np.sign(betaE)
    subgradient = random_Z - initial_grad - epsilon * initial_soln
    cube = np.divide(subgradient[inactive], lam)
    #print betaE, cube
    #initial_grad = loss.smooth_objective(initial_soln,  mode='grad')
    #print penalty.setup_sampling(initial_grad,
    #                                     initial_soln,
    #                                     random_Z,
    #                                     epsilon)

    data0 = y.copy()
    #active = penalty.active_set

    if (np.sum(active) == 0):
        print 'here'
        return [-1], [-1]

    nalpha = n
    nactive = betaE.shape[0]
    ninactive = cube.shape[0]

    alpha = np.ones(n)
    beta_bar = np.linalg.lstsq(X[:, active], y)[0]
    obs_residuals = y - np.dot(X[:, active], beta_bar)

    #obs_residuals -= np.mean(obs_residuals)
    #betaE, cube = opt_vars

    init_vec_state = np.zeros(n + nactive + ninactive)
    init_vec_state[:n] = alpha
    init_vec_state[n:(n + nactive)] = betaE
    init_vec_state[(n + nactive):] = cube

    def full_projection(vec_state,
                        signs=signs,
                        nalpha=nalpha,
                        nactive=nactive,
                        ninactive=ninactive):

        alpha = vec_state[:nalpha].copy()
        betaE = vec_state[nalpha:(nalpha + nactive)]
        cube = vec_state[(nalpha + nactive):]

        #signs = penalty.signs
        projected_alpha = alpha.copy()
        projected_betaE = betaE.copy()
        projected_cube = np.zeros_like(cube)

        projected_alpha = np.clip(alpha, 0, np.inf)

        for i in range(nactive):
            if (projected_betaE[i] * signs[i] < 0):
                projected_betaE[i] = 0

        projected_cube = np.clip(cube, -1, 1)

        return np.concatenate(
            (projected_alpha, projected_betaE, projected_cube), 0)

    null, alt = pval(init_vec_state, full_projection, X, y, obs_residuals,
                     signs, lam, epsilon, nonzero, active)

    return null, alt
Esempio n. 30
0
def test_lasso(s=0, n=100, p=20, weights = "neutral",
               randomization_dist = "logistic", randomization_scale = 1,
               Langevin_steps = 10000, burning = 2000, X_scaled = True,
               covariance_estimate = "nonparametric", noise = "uniform"):

    """ weights: exponential, gamma, normal, gumbel
    randomization_dist: logistic, laplace """

    step_size = 1./p

    X, y, true_beta, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0, scale=X_scaled, noise=noise)
    print 'true beta', true_beta
    lam_frac = 1.

    if randomization_dist == "laplace":
        randomization = laplace(loc=0, scale=1.)
        random_Z = randomization.rvs(p)
    if randomization_dist == "logistic":
        random_Z = np.random.logistic(loc=0, scale = 1, size = p)
    if randomization_dist== "normal":
        random_Z = np.random.standard_normal(p)

    print 'randomization', random_Z*randomization_scale
    loss = lasso_randomX.lasso_randomX(X, y)

    epsilon = 1./np.sqrt(n)
    #epsilon = 1.
    lam = sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))+randomization_scale*np.random.logistic(size=(p,10000))).max(0))

    lam_scaled = lam.copy()
    random_Z_scaled = random_Z.copy()
    epsilon_scaled = epsilon

    if (X_scaled == False):
        random_Z_scaled *= np.sqrt(n)
        lam_scaled *= np.sqrt(n)
        epsilon_scaled *= np.sqrt(n)

    penalty = randomized.selective_l1norm_lan(p, lagrange=lam_scaled)

    # initial solution

    problem = rr.simple_problem(loss, penalty)

    random_term = rr.identity_quadratic(epsilon_scaled, 0, -randomization_scale*random_Z_scaled, 0)
    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)
    print 'initial solution', initial_soln

    active = (initial_soln != 0)
    if np.sum(active)==0:
        return [-1], [-1]
    inactive = ~active
    betaE = initial_soln[active]
    signs = np.sign(betaE)

    initial_grad = -np.dot(X.T, y - np.dot(X, initial_soln))
    if (X_scaled==False):
        initial_grad /= np.sqrt(n)
    print 'initial_gradient', initial_grad
    subgradient = random_Z - initial_grad - epsilon * initial_soln
    cube = np.divide(subgradient[inactive], lam)

    nactive = betaE.shape[0]
    ninactive = cube.shape[0]

    beta_unpenalized = np.linalg.lstsq(X[:, active], y)[0]
    print 'beta_OLS onto E', beta_unpenalized
    obs_residuals = y - np.dot(X[:, active], beta_unpenalized)  # y-X_E\bar{\beta}^E
    N = np.dot(X[:, inactive].T, obs_residuals)  # X_{-E}^T(y-X_E\bar{\beta}_E), null statistic
    full_null = np.zeros(p)
    full_null[nactive:] = N

    # parametric coveriance estimate
    if covariance_estimate == "parametric":
        XE_pinv = np.linalg.pinv(X[:, active])
        mat = np.zeros((nactive+ninactive, n))
        mat[:nactive,:] = XE_pinv
        mat[nactive:,:] = X[:, inactive].T.dot(np.identity(n)-X[:, active].dot(XE_pinv))
        Sigma_full = mat.dot(mat.T)
    else:
        Sigma_full = bootstrap_covariance(X,y,active, beta_unpenalized)


    init_vec_state = np.zeros(n+nactive+ninactive)
    if weights =="exponential":
        init_vec_state[:n] = np.ones(n)
    else:
        init_vec_state[:n] = np.zeros(n)

    #init_vec_state[:n] = np.random.standard_normal(n)
    #init_vec_state[:n] = np.ones(n)
    init_vec_state[n:(n+nactive)] = betaE
    init_vec_state[(n+nactive):] = cube


    def full_projection(vec_state, signs = signs,
                        nactive=nactive, ninactive = ninactive):

        alpha = vec_state[:n].copy()
        betaE = vec_state[n:(n+nactive)].copy()
        cube = vec_state[(n+nactive):].copy()

        projected_alpha = alpha.copy()
        projected_betaE = betaE.copy()
        projected_cube = np.zeros_like(cube)

        if weights == "exponential":
            projected_alpha = np.clip(alpha, 0, np.inf)

        if weights == "gamma":
            projected_alpha = np.clip(alpha, -2+1./n, np.inf)
        for i in range(nactive):
            if (projected_betaE[i] * signs[i] < 0):
                projected_betaE[i] = 0

        projected_cube = np.clip(cube, -1, 1)

        return np.concatenate((projected_alpha, projected_betaE, projected_cube), 0)


    Sigma = np.linalg.inv(np.dot(X[:, active].T, X[:, active]))
    null, alt = pval(init_vec_state, full_projection, X, obs_residuals, beta_unpenalized, full_null,
                     signs, lam, epsilon,
                     nonzero, active, Sigma,
                     weights, randomization_dist, randomization_scale,
                     Langevin_steps, step_size, burning,
                     X_scaled)
                   #  Sigma_full[:nactive, :nactive])

    return null, alt
Esempio n. 31
0
def highdim_model_inference(X,
                            y,
                            truth,
                            selection_algorithm,
                            sampler,
                            lam_min,
                            dispersion,
                            success_params=(1, 1),
                            fit_probability=keras_fit,
                            fit_args={
                                'epochs': 10,
                                'sizes': [100] * 5,
                                'dropout': 0.,
                                'activation': 'relu'
                            },
                            alpha=0.1,
                            B=2000,
                            naive=True,
                            learner_klass=mixture_learner,
                            how_many=None):

    n, p = X.shape
    XTX = X.T.dot(X)

    instance_hash = hashlib.md5()
    instance_hash.update(X.tobytes())
    instance_hash.update(y.tobytes())
    instance_hash.update(truth.tobytes())
    instance_id = instance_hash.hexdigest()

    # run selection algorithm

    observed_set = repeat_selection(selection_algorithm, sampler,
                                    *success_params)
    observed_list = sorted(observed_set)

    # observed debiased LASSO estimate

    loss = rr.squared_error(X, y)
    pen = rr.l1norm(p, lagrange=lam_min)
    problem = rr.simple_problem(loss, pen)
    soln = problem.solve()
    grad = X.T.dot(X.dot(soln) - y)  # gradient at beta_hat

    M = pseudoinverse_debiasing_matrix(X, observed_list)

    observed_target = soln[observed_list] - M.dot(grad)
    tmp = X.dot(M.T)
    target_cov = tmp.T.dot(tmp) * dispersion
    cross_cov = np.identity(p)[:, observed_list] * dispersion

    if len(observed_list) > 0:

        if how_many is None:
            how_many = len(observed_list)
        observed_list = observed_list[:how_many]

        # find the target, based on the observed outcome

        (pivots, covered, lengths, pvalues, lower,
         upper) = [], [], [], [], [], []

        targets = []
        true_target = truth[observed_list]

        results = infer_set_target(selection_algorithm,
                                   observed_set,
                                   observed_list,
                                   sampler,
                                   observed_target,
                                   target_cov,
                                   cross_cov,
                                   hypothesis=true_target,
                                   fit_probability=fit_probability,
                                   fit_args=fit_args,
                                   success_params=success_params,
                                   alpha=alpha,
                                   B=B,
                                   learner_klass=learner_klass)

        for i, result in enumerate(results):

            (pivot, interval, pvalue, _) = result

            pvalues.append(pvalue)
            pivots.append(pivot)
            covered.append((interval[0] < true_target[i]) *
                           (interval[1] > true_target[i]))
            lengths.append(interval[1] - interval[0])
            lower.append(interval[0])
            upper.append(interval[1])

        if len(pvalues) > 0:
            df = pd.DataFrame({
                'pivot': pivots,
                'pvalue': pvalues,
                'coverage': covered,
                'length': lengths,
                'upper': upper,
                'lower': lower,
                'id': [instance_id] * len(pvalues),
                'target': true_target,
                'variable': observed_list,
                'B': [B] * len(pvalues)
            })
            if naive:

                (naive_pvalues, naive_pivots, naive_covered, naive_lengths,
                 naive_upper, naive_lower) = [], [], [], [], [], []

                for j, idx in enumerate(observed_list):
                    true_target = truth[idx]
                    target_sd = np.sqrt(target_cov[j, j])
                    observed_target_j = observed_target[j]
                    quantile = normal_dbn.ppf(1 - 0.5 * alpha)
                    naive_interval = (observed_target_j - quantile * target_sd,
                                      observed_target_j + quantile * target_sd)
                    naive_upper.append(naive_interval[1])
                    naive_lower.append(naive_interval[0])
                    naive_pivot = (1 - normal_dbn.cdf(
                        (observed_target_j - true_target) / target_sd))
                    naive_pivot = 2 * min(naive_pivot, 1 - naive_pivot)
                    naive_pivots.append(naive_pivot)

                    naive_pvalue = (
                        1 - normal_dbn.cdf(observed_target_j / target_sd))
                    naive_pvalue = 2 * min(naive_pvalue, 1 - naive_pvalue)
                    naive_pvalues.append(naive_pvalue)

                    naive_covered.append((naive_interval[0] < true_target) *
                                         (naive_interval[1] > true_target))
                    naive_lengths.append(naive_interval[1] - naive_interval[0])

                naive_df = pd.DataFrame({
                    'naive_pivot': naive_pivots,
                    'naive_pvalue': naive_pvalues,
                    'naive_coverage': naive_covered,
                    'naive_length': naive_lengths,
                    'naive_upper': naive_upper,
                    'naive_lower': naive_lower,
                    'variable': observed_list,
                })

                df = pd.merge(df, naive_df, on='variable')
            return df
Esempio n. 32
0
def solve_sqrt_lasso_skinny(X,
                            Y,
                            weights=None,
                            initial=None,
                            quadratic=None,
                            solve_args={}):
    """

    Solve the square-root LASSO optimization problem:

    $$
    \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|,
    $$
    where $D$ is the diagonal matrix with weights on its diagonal.

    Parameters
    ----------

    y : np.float((n,))
        The target, in the model $y = X\beta$

    X : np.float((n, p))
        The data, in the model $y = X\beta$

    weights : np.float
        Coefficients of the L-1 penalty in
        optimization problem, note that different
        coordinates can have different coefficients.

    initial : np.float(p)
        Initial point for optimization.

    solve_args : dict
        Arguments passed to regreg solver.

    quadratic : `regreg.identity_quadratic`
        A quadratic term added to objective function.

    """
    X = rr.astransform(X)
    n, p = X.output_shape[0], X.input_shape[0]
    if weights is None:
        lam = choose_lambda(X)
        weights = lam * np.ones((p, ))
    weight_dict = dict(zip(np.arange(p), 2 * weights))
    penalty = rr.mixed_lasso(list(np.arange(p)) + [rr.NONNEGATIVE],
                             lagrange=1.,
                             weights=weight_dict)

    loss = sqlasso_objective_skinny(X, Y)
    problem = rr.simple_problem(loss, penalty)
    problem.coefs[-1] = np.linalg.norm(Y)
    if initial is not None:
        problem.coefs[:-1] = initial

    if quadratic is not None:
        collapsed = quadratic.collapsed()
        new_linear_term = np.zeros(p + 1)
        new_linear_term[:p] = collapsed.linear_term
        new_quadratic = rr.identity_quadratic(collapsed.coef, 0.,
                                              new_linear_term,
                                              collapsed.constant_term)
    else:
        new_quadratic = None

    soln = problem.solve(new_quadratic, **solve_args)
    _loss = sqlasso_objective(X, Y)
    return soln[:-1], _loss
Esempio n. 33
0
def test_quadratic_for_smooth():
    '''
    this test is a check to ensure that the quadratic part 
    of the smooth functions are being used in the proximal step
    '''

    L = 0.45

    W = np.random.standard_normal(40)
    Z = np.random.standard_normal(40)
    U = np.random.standard_normal(40)

    atomq = rr.identity_quadratic(0.4, U, W, 0)
    atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12)

    # specifying in this way should be the same as if we put 0.5*L below
    loss = rr.quadratic.shift(Z, coef=0.6 * L)
    lq = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    loss.quadratic = lq

    ww = np.random.standard_normal(40)

    # specifying in this way should be the same as if we put 0.5*L below
    loss2 = rr.quadratic.shift(Z, coef=L)
    yield all_close, loss2.objective(ww), loss.objective(
        ww), 'checking objective', None

    yield all_close, lq.objective(ww, 'func'), loss.nonsmooth_objective(
        ww), 'checking nonsmooth objective', None
    yield all_close, loss2.smooth_objective(
        ww, 'func'), 0.5 / 0.3 * loss.smooth_objective(
            ww, 'func'), 'checking smooth objective func', None
    yield all_close, loss2.smooth_objective(
        ww, 'grad'), 0.5 / 0.3 * loss.smooth_objective(
            ww, 'grad'), 'checking smooth objective grad', None

    problem = rr.container(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12)

    problem3 = rr.simple_problem(loss, atom)
    solver3 = rr.FISTA(problem3)
    solver3.fit(tol=1.0e-12, coef_stop=True)

    loss4 = rr.quadratic.shift(Z, coef=0.6 * L)
    problem4 = rr.simple_problem(loss4, atom)
    problem4.quadratic = lq
    solver4 = rr.FISTA(problem4)
    solver4.fit(tol=1.0e-12)

    gg_soln = rr.gengrad(problem, L)

    loss6 = rr.quadratic.shift(Z, coef=0.6 * L)
    loss6.quadratic = lq + atom.quadratic
    atomcp = copy(atom)
    atomcp.quadratic = rr.identity_quadratic(0, 0, 0, 0)
    problem6 = rr.dual_problem(loss6.conjugate, rr.identity(loss6.shape),
                               atomcp.conjugate)
    problem6.lipschitz = L + atom.quadratic.coef
    dsoln2 = problem6.solve(coef_stop=True, tol=1.e-10, max_its=100)

    problem2 = rr.container(loss2, atom)
    solver2 = rr.FISTA(problem2)
    solver2.fit(tol=1.0e-12, coef_stop=True)

    q = rr.identity_quadratic(L, Z, 0, 0)

    yield all_close, problem.objective(
        ww), atom.nonsmooth_objective(ww) + q.objective(ww, 'func'), '', None

    atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12)
    aq = atom.solve(q)
    for p, msg in zip([
            solver3.composite.coefs, gg_soln, solver2.composite.coefs, dsoln2,
            solver.composite.coefs, solver4.composite.coefs
    ], [
            'simple_problem with loss having no quadratic', 'gen grad',
            'container with loss having no quadratic',
            'dual problem with loss having a quadratic',
            'container with loss having a quadratic',
            'simple_problem having a quadratic'
    ]):
        yield all_close, aq, p, msg, None
Esempio n. 34
0
def test_solve_QP_lasso():
    """
    Check the R coordinate descent LASSO solver
    """

    n, p = 100, 200
    lam = 0.1

    X = np.random.standard_normal((n, p))
    Y = np.random.standard_normal(n)

    loss = rr.squared_error(X, Y, coef=1. / n)
    pen = rr.l1norm(p, lagrange=lam)
    problem = rr.simple_problem(loss, pen)
    soln = problem.solve(min_its=500, tol=1.e-12)

    numpy2ri.activate()

    rpy.r.assign('X', X)
    rpy.r.assign('Y', Y)
    rpy.r.assign('lam', lam)

    R_code = """

    library(selectiveInference)
    p = ncol(X)
    n = nrow(X)
    soln_R = rep(0, p)
    grad = -t(X) %*% Y / n
    ever_active = as.integer(c(1, rep(0, p-1)))
    nactive = as.integer(1)
    kkt_tol = 1.e-12
    objective_tol = 1.e-16
    parameter_tol = 1.e-10
    maxiter = 500
    soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, 
                                           lam, 
                                           maxiter, 
                                           soln_R, 
                                           1. * grad,
                                           grad, 
                                           ever_active, 
                                           nactive, 
                                           kkt_tol, 
                                           objective_tol, 
                                           parameter_tol,
                                           p,
                                           TRUE,
                                           TRUE,
                                           TRUE)$soln

    # test wide solver
    Xtheta = rep(0, n)
    nactive = as.integer(1)
    ever_active = as.integer(c(1, rep(0, p-1)))
    soln_R_wide = rep(0, p)
    grad = - t(X) %*% Y / n
    soln_R_wide = selectiveInference:::solve_QP_wide(X, 
                                                     rep(lam, p), 
                                                     0,
                                                     maxiter, 
                                                     soln_R_wide, 
                                                     1. * grad,
                                                     grad, 
                                                     Xtheta,
                                                     ever_active, 
                                                     nactive, 
                                                     kkt_tol, 
                                                     objective_tol, 
                                                     parameter_tol,
                                                     p,
                                                     TRUE,
                                                     TRUE,
                                                     TRUE)$soln

    """

    rpy.r(R_code)

    soln_R = np.asarray(rpy.r('soln_R'))
    soln_R_wide = np.asarray(rpy.r('soln_R_wide'))
    numpy2ri.deactivate()

    tol = 1.e-5
    print(soln - soln_R)
    print(soln_R - soln_R_wide)

    yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver for LASSO problem'
    yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver for LASSO problem'
Esempio n. 35
0
    def fit(self, 
            solve_args={'tol':1.e-12, 'min_its':50}, 
            perturb=None):
        """
        Fit the randomized lasso using `regreg`.

        Parameters
        ----------

        solve_args : keyword args
             Passed to `regreg.problems.simple_problem.solve`.

        Returns
        -------

        signs : np.float
             Support and non-zero signs of randomized lasso solution.
             
        """

        p = self.nfeature

        # take a new perturbation if supplied
        if perturb is not None:
            self._initial_omega = perturb
        if self._initial_omega is None:
            self._initial_omega = self.randomizer.sample()

        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0)
        quad_data = rr.identity_quadratic(0, 0, -self.X.T.dot(self.y), 0)
        problem = rr.simple_problem(self.loss, self.penalty)
        self.initial_soln = problem.solve(quad + quad_data, **solve_args)

        active_signs = np.sign(self.initial_soln)
        active = self._active = active_signs != 0

        self._lagrange = self.penalty.weights
        unpenalized = self._lagrange == 0

        active *= ~unpenalized

        self._overall = overall = (active + unpenalized) > 0
        self._inactive = inactive = ~self._overall
        self._unpenalized = unpenalized

        _active_signs = active_signs.copy()
        _active_signs[unpenalized] = np.nan # don't release sign of unpenalized variables
        self.selection_variable = {'sign':_active_signs,
                                   'variables':self._overall}

        # initial state for opt variables

        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + 
                            quad_data.objective(self.initial_soln, 'grad') +
                            quad.objective(self.initial_soln, 'grad')) 
        self.initial_subgrad = initial_subgrad

        initial_scalings = np.fabs(self.initial_soln[active])
        initial_unpenalized = self.initial_soln[self._unpenalized]

        self.observed_opt_state = np.concatenate([initial_scalings,
                                                  initial_unpenalized])

        E = overall
        Q_E = self.Q[E][:,E]
        _beta_unpenalized = np.linalg.inv(Q_E).dot(self.X[:,E].T.dot(self.y))
        beta_bar = np.zeros(p)
        beta_bar[overall] = _beta_unpenalized
        self._beta_full = beta_bar

        # observed state for score in internal coordinates

        self.observed_internal_state = np.hstack([_beta_unpenalized,
                                                  -self.loss.smooth_objective(beta_bar, 'grad')[inactive] + 
                                                  quad_data.objective(beta_bar, 'grad')[inactive]])

        # form linear part

        self.num_opt_var = self.observed_opt_state.shape[0]

        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
        # E for active
        # U for unpenalized
        # -E for inactive

        _opt_linear_term = np.zeros((p, self.num_opt_var))
        _score_linear_term = np.zeros((p, self.num_opt_var))

        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator

        X, y = self.X, self.y
        _hessian_active = self.Q[:, active]
        _hessian_unpen = self.Q[:, unpenalized]

        _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])

        # set the observed score (data dependent) state

        self.observed_score_state = _score_linear_term.dot(_beta_unpenalized)
        self.observed_score_state[inactive] += (self.loss.smooth_objective(beta_bar, 'grad')[inactive] + 
                                                quad_data.objective(beta_bar, 'grad')[inactive])

        def signed_basis_vector(p, j, s):
            v = np.zeros(p)
            v[j] = s
            return v

        active_directions = np.array([signed_basis_vector(p, j, active_signs[j]) for j in np.nonzero(active)[0]]).T

        scaling_slice = slice(0, active.sum())
        if np.sum(active) == 0:
            _opt_hessian = 0
        else:
            _opt_hessian = _hessian_active * active_signs[None, active] + self.ridge_term * active_directions
        _opt_linear_term[:, scaling_slice] = _opt_hessian

        # beta_U piece

        unpenalized_slice = slice(active.sum(), self.num_opt_var)
        unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
        if unpenalized.sum():
            _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
                                                      + self.ridge_term * unpenalized_directions) 

        # two transforms that encode score and optimization
        # variable roles 

        self.opt_transform = (_opt_linear_term, self.initial_subgrad)
        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))

        # now store everything needed for the projections
        # the projection acts only on the optimization
        # variables

        self._setup = True
        self.scaling_slice = scaling_slice
        self.unpenalized_slice = unpenalized_slice
        self.ndim = self.loss.shape[0]

        # compute implied mean and covariance

        opt_linear, opt_offset = self.opt_transform

        A_scaling = -np.identity(self.num_opt_var)
        b_scaling = np.zeros(self.num_opt_var)

        self._setup_sampler(A_scaling,
                            b_scaling,
                            opt_linear,
                            opt_offset)
        
        return active_signs
Esempio n. 36
0
    def test_simple_problem(self):
        tests = []
        atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L
        loss = self.loss

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12,
                   FISTA=self.FISTA,
                   coef_stop=self.coef_stop,
                   min_its=100)

        tests.append(
            (atom.proximal(q), solver.composite.coefs,
             'solving prox with simple_problem with monotonicity\n %s' %
             str(self)))

        # write the loss in terms of a quadratic for the smooth loss and a smooth function...

        q = rr.identity_quadratic(L, prox_center, 0, 0)
        lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6 * L)
        lossq.quadratic = rr.identity_quadratic(0.4 * L, prox_center.copy(), 0,
                                                0)
        problem = rr.simple_problem(lossq, atom)

        tests.append(
            (atom.proximal(q),
             problem.solve(coef_stop=self.coef_stop,
                           FISTA=self.FISTA,
                           tol=1.0e-12), 'solving prox with simple_problem ' +
             'with monotonicity  but loss has identity_quadratic %s\n ' %
             str(self)))

        problem = rr.simple_problem(loss, atom)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12,
                   monotonicity_restart=False,
                   coef_stop=self.coef_stop,
                   FISTA=self.FISTA,
                   min_its=100)

        tests.append(
            (atom.proximal(q), solver.composite.coefs,
             'solving prox with simple_problem no monotonicity_restart\n %s' %
             str(self)))

        d = atom.conjugate
        problem = rr.simple_problem(loss, d)
        solver = rr.FISTA(problem)
        solver.fit(tol=1.0e-12,
                   monotonicity_restart=False,
                   coef_stop=self.coef_stop,
                   FISTA=self.FISTA,
                   min_its=100)
        tests.append(
            (d.proximal(q),
             problem.solve(tol=1.e-12,
                           FISTA=self.FISTA,
                           coef_stop=self.coef_stop,
                           monotonicity_restart=False),
             'solving dual prox with simple_problem no monotonocity\n %s ' %
             str(self)))

        if not self.interactive:
            for test in tests:
                yield (all_close, ) + test + (self, )
        else:
            for test in tests:
                yield all_close(*((test + (self, ))))
Esempio n. 37
0
def test_quadratic_for_smooth2():
    """
    this test is a check to ensure that the
    quadratic part of the smooth functions are being used in the proximal step

    """

    L = 2

    W = np.arange(5)
    Z = 0.5 * np.arange(5)[::-1]
    U = 1.5 * np.arange(5)

    atomq = rr.identity_quadratic(0.4, U, W, 0)
    atom = rr.l1norm(5, quadratic=atomq, lagrange=0.1)

    # specifying in this way should be the same as if we put 0.5*L below
    loss = rr.quadratic.shift(-Z, coef=0.6 * L)
    lq = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    loss.quadratic = lq

    ww = np.ones(5)

    # specifying in this way should be the same as if we put 0.5*L below
    loss2 = rr.quadratic.shift(-Z, coef=L)
    np.testing.assert_allclose(loss2.objective(ww), loss.objective(ww))
    np.testing.assert_allclose(lq.objective(ww, "func"), loss.nonsmooth_objective(ww))
    np.testing.assert_allclose(loss2.smooth_objective(ww, "func"), 0.5 / 0.3 * loss.smooth_objective(ww, "func"))
    np.testing.assert_allclose(loss2.smooth_objective(ww, "grad"), 0.5 / 0.3 * loss.smooth_objective(ww, "grad"))

    problem = rr.container(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12)

    problem3 = rr.simple_problem(loss, atom)
    solver3 = rr.FISTA(problem3)
    solver3.fit(tol=1.0e-12, coef_stop=True)

    loss4 = rr.quadratic.shift(-Z, coef=0.6 * L)
    problem4 = rr.simple_problem(loss4, atom)
    problem4.quadratic = lq
    solver4 = rr.FISTA(problem4)
    solver4.fit(tol=1.0e-12)

    gg_soln = rr.gengrad(problem4, L)

    loss6 = rr.quadratic.shift(-Z, coef=0.6 * L)
    loss6.quadratic = lq + atom.quadratic
    atomcp = copy(atom)
    atomcp.quadratic = rr.identity_quadratic(0, 0, 0, 0)
    problem6 = rr.dual_problem(loss6.conjugate, rr.identity(loss6.primal_shape), atomcp.conjugate)
    problem6.lipschitz = L + atom.quadratic.coef
    dsoln2 = problem6.solve(coef_stop=True, tol=1.0e-10, max_its=100)

    problem2 = rr.container(loss2, atom)
    solver2 = rr.FISTA(problem2)
    solver2.fit(tol=1.0e-12, coef_stop=True)

    q = rr.identity_quadratic(L, Z, 0, 0)

    ac(problem.objective(ww), atom.nonsmooth_objective(ww) + q.objective(ww, "func"))

    aq = atom.solve(q)
    for p, msg in zip(
        [
            solver3.composite.coefs,
            gg_soln,
            solver2.composite.coefs,
            solver4.composite.coefs,
            dsoln2,
            solver.composite.coefs,
        ],
        [
            "simple_problem with loss having no quadratic",
            "gen grad",
            "container with loss having no quadratic",
            "simple_problem container with quadratic",
            "dual problem with loss having a quadratic",
            "container with loss having a quadratic",
        ],
    ):
        yield ac, aq, p, msg
    def solve(self,
              nboot=2000,
              solve_args={
                  'min_its': 20,
                  'tol': 1.e-10
              },
              perturb=None):

        self.randomize(perturb=perturb)

        (loss, randomized_loss, epsilon, penalty,
         randomization) = (self.loss, self.randomized_loss, self.epsilon,
                           self.penalty, self.randomization)

        # initial solution

        p = penalty.shape[0]

        problem = rr.simple_problem(randomized_loss, penalty)
        self.initial_soln = problem.solve(**solve_args)

        # find the active groups and their direction vectors
        # as well as unpenalized groups

        active_signs = np.sign(self.initial_soln)
        active = self._active = active_signs != 0

        if isinstance(penalty, rr.l1norm):
            self._lagrange = penalty.lagrange * np.ones(p)
            unpenalized = np.zeros(p, np.bool)
        elif isinstance(penalty, rr.weighted_l1norm):
            self._lagrange = penalty.weights
            unpenalized = self._lagrange == 0
        else:
            raise ValueError('penalty must be `l1norm` or `weighted_l1norm`')

        active *= ~unpenalized

        # solve the restricted problem

        self._overall = (active + unpenalized) > 0
        self._inactive = ~self._overall
        self._unpenalized = unpenalized

        _active_signs = active_signs.copy()
        _active_signs[
            unpenalized] = np.nan  # don't release sign of unpenalized variables
        self.selection_variable = {
            'sign': _active_signs,
            'variables': self._overall
        }

        # initial state for opt variables

        initial_subgrad = -(
            self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
            self.randomized_loss.quadratic.objective(self.initial_soln,
                                                     'grad'))
        # the quadratic of a smooth_atom is not included in computing the smooth_objective
        self.initial_subgrad = initial_subgrad

        initial_scalings = np.fabs(self.initial_soln[active])
        initial_unpenalized = self.initial_soln[self._unpenalized]

        self.observed_opt_state = np.concatenate([
            initial_scalings, initial_unpenalized,
            self.initial_subgrad[self._inactive]
        ],
                                                 axis=0)

        # set the _solved bit

        self._solved = True

        # Now setup the pieces for linear decomposition

        (loss, epsilon, penalty, initial_soln, overall, inactive,
         unpenalized) = (self.loss, self.epsilon, self.penalty,
                         self.initial_soln, self._overall, self._inactive,
                         self._unpenalized)

        # we are implicitly assuming that
        # loss is a pairs model

        _beta_unpenalized = restricted_estimator(loss,
                                                 overall,
                                                 solve_args=solve_args)

        beta_bar = np.zeros(p)
        beta_bar[overall] = _beta_unpenalized
        self._beta_full = beta_bar

        # observed state for score in internal coordinates

        self.observed_internal_state = np.hstack([
            _beta_unpenalized,
            -loss.smooth_objective(beta_bar, 'grad')[inactive]
        ])

        # form linear part

        self.num_opt_var = self.observed_opt_state.shape[0]

        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
        # E for active
        # U for unpenalized
        # -E for inactive

        _opt_linear_term = np.zeros((p, p))
        _score_linear_term = np.zeros((p, p))

        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator

        est_slice = slice(0, overall.sum())
        X, y = loss.data
        W = self.loss.saturated_loss.hessian(X.dot(beta_bar))
        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
        _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])

        _score_linear_term[:, est_slice] = -np.hstack(
            [_hessian_active, _hessian_unpen])

        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution

        null_idx = np.arange(overall.sum(), p)
        inactive_idx = np.nonzero(inactive)[0]
        for _i, _n in zip(inactive_idx, null_idx):
            _score_linear_term[_i, _n] = -1

        # c_E piece

        def signed_basis_vector(p, j, s):
            v = np.zeros(p)
            v[j] = s
            return v

        active_directions = np.array([
            signed_basis_vector(p, j, active_signs[j])
            for j in np.nonzero(active)[0]
        ]).T

        scaling_slice = slice(0, active.sum())
        if np.sum(active) == 0:
            _opt_hessian = 0
        else:
            _opt_hessian = _hessian_active * active_signs[
                None, active] + epsilon * active_directions
        _opt_linear_term[:, scaling_slice] = _opt_hessian

        # beta_U piece

        unpenalized_slice = slice(active.sum(),
                                  active.sum() + unpenalized.sum())
        unpenalized_directions = np.array([
            signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]
        ]).T
        if unpenalized.sum():
            _opt_linear_term[:, unpenalized_slice] = (
                _hessian_unpen + epsilon * unpenalized_directions)

            # subgrad piece

        subgrad_idx = range(active.sum() + unpenalized.sum(),
                            active.sum() + inactive.sum() + unpenalized.sum())
        subgrad_slice = slice(
            active.sum() + unpenalized.sum(),
            active.sum() + inactive.sum() + unpenalized.sum())
        for _i, _s in zip(inactive_idx, subgrad_idx):
            _opt_linear_term[_i, _s] = 1

        # form affine part

        _opt_affine_term = np.zeros(p)
        idx = 0
        _opt_affine_term[
            active] = active_signs[active] * self._lagrange[active]

        # two transforms that encode score and optimization
        # variable roles

        self.opt_transform = (_opt_linear_term, _opt_affine_term)
        self.score_transform = (_score_linear_term,
                                np.zeros(_score_linear_term.shape[0]))

        # everything now expressed in observed_score_state

        self.observed_score_state = _score_linear_term.dot(
            self.observed_internal_state)

        # now store everything needed for the projections
        # the projection acts only on the optimization
        # variables

        # we form a dual group lasso object
        # to do the projection

        self._setup = True
        self.subgrad_slice = subgrad_slice
        self.scaling_slice = scaling_slice
        self.unpenalized_slice = unpenalized_slice
        self.ndim = loss.shape[0]

        self.nboot = nboot
Esempio n. 39
0
def test_cv(n=100,
            p=50,
            s=5,
            signal=7.5,
            K=5,
            rho=0.,
            randomizer='gaussian',
            randomizer_scale=1.,
            scale1=0.1,
            scale2=0.2,
            lam_frac=1.,
            glmnet=True,
            loss='gaussian',
            bootstrap=False,
            condition_on_CVR=True,
            marginalize_subgrad=True,
            ndraw=10000,
            burnin=2000,
            nboot=nboot):

    print(n, p, s, condition_on_CVR, scale1, scale2)
    if randomizer == 'laplace':
        randomizer = randomization.laplace((p, ), scale=randomizer_scale)
    elif randomizer == 'gaussian':
        randomizer = randomization.isotropic_gaussian((p, ), randomizer_scale)
    elif randomizer == 'logistic':
        randomizer = randomization.logistic((p, ), scale=randomizer_scale)

    if loss == "gaussian":
        X, y, beta, nonzero, sigma = gaussian_instance(n=n,
                                                       p=p,
                                                       s=s,
                                                       rho=rho,
                                                       signal=signal,
                                                       sigma=1)
        glm_loss = rr.glm.gaussian(X, y)
    elif loss == "logistic":
        X, y, beta, _ = logistic_instance(n=n,
                                          p=p,
                                          s=s,
                                          rho=rho,
                                          signal=signal)
        glm_loss = rr.glm.logistic(X, y)

    epsilon = 1. / np.sqrt(n)

    # view 1
    cv = CV_view(glm_loss,
                 loss_label=loss,
                 lasso_randomization=randomizer,
                 epsilon=epsilon,
                 scale1=scale1,
                 scale2=scale2)
    if glmnet:
        try:
            cv.solve(glmnet=glmnet)
        except ImportError:
            cv.solve(glmnet=False)
    else:
        cv.solve(glmnet=False)

    # for the test make sure we also run the python code

    cv_py = CV_view(glm_loss,
                    loss_label=loss,
                    lasso_randomization=randomizer,
                    epsilon=epsilon,
                    scale1=scale1,
                    scale2=scale2)
    cv_py.solve(glmnet=False)

    lam = cv.lam_CVR
    print("lam", lam)

    if condition_on_CVR:
        cv.condition_on_opt_state()
        lam = cv.one_SD_rule(direction="up")
        print("new lam", lam)

    # non-randomized Lasso, just looking how many vars it selects
    problem = rr.simple_problem(glm_loss, rr.l1norm(p, lagrange=lam))
    beta_hat = problem.solve()
    active_hat = beta_hat != 0
    print("non-randomized lasso ", active_hat.sum())

    # view 2
    W = lam_frac * np.ones(p) * lam
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1.)
    M_est = glm_group_lasso(glm_loss, epsilon, penalty, randomizer)

    if nboot > 0:
        cv.nboot = M_est.nboot = nboot

    mv = multiple_queries([cv, M_est])
    mv.solve()

    active_union = M_est._overall
    nactive = np.sum(active_union)
    print("nactive", nactive)
    if nactive == 0:
        return None

    nonzero = np.where(beta)[0]

    if set(nonzero).issubset(np.nonzero(active_union)[0]):

        active_set = np.nonzero(active_union)[0]
        true_vec = beta[active_union]

        if marginalize_subgrad == True:
            M_est.decompose_subgradient(conditioning_groups=np.zeros(p, bool),
                                        marginalizing_groups=np.ones(p, bool))

        selected_features = np.zeros(p, np.bool)
        selected_features[active_set] = True

        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)

        form_covariances = glm_nonparametric_bootstrap(n, n)
        target_info, target_observed = pairs_bootstrap_glm(M_est.loss,
                                                           selected_features,
                                                           inactive=None)

        cov_info = M_est.setup_sampler()
        target_cov, score_cov = form_covariances(target_info,
                                                 cross_terms=[cov_info],
                                                 nsample=M_est.nboot)

        opt_sample = M_est.sampler.sample(ndraw, burnin)

        pvalues = M_est.sampler.coefficient_pvalues(
            unpenalized_mle,
            target_cov,
            score_cov,
            parameter=np.zeros(selected_features.sum()),
            sample=opt_sample)
        intervals = M_est.sampler.confidence_intervals(unpenalized_mle,
                                                       target_cov,
                                                       score_cov,
                                                       sample=opt_sample)

        L, U = intervals.T
        sel_covered = np.zeros(nactive, np.bool)
        sel_length = np.zeros(nactive)

        LU_naive = naive_confidence_intervals(np.diag(target_cov),
                                              target_observed)
        naive_covered = np.zeros(nactive, np.bool)
        naive_length = np.zeros(nactive)
        naive_pvals = naive_pvalues(np.diag(target_cov), target_observed,
                                    true_vec)

        active_var = np.zeros(nactive, np.bool)

        for j in range(nactive):
            if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
                sel_covered[j] = 1
            if (LU_naive[j, 0] <= true_vec[j]) and (LU_naive[j, 1] >=
                                                    true_vec[j]):
                naive_covered[j] = 1
            sel_length[j] = U[j] - L[j]
            naive_length[j] = LU_naive[j, 1] - LU_naive[j, 0]
            active_var[j] = active_set[j] in nonzero

        q = 0.2
        BH_desicions = multipletests(pvalues, alpha=q, method="fdr_bh")[0]
        return sel_covered, sel_length, naive_pvals, naive_covered, naive_length, active_var, BH_desicions, active_var
Esempio n. 40
0
    def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):

        self.randomize()

        (loss, randomized_loss, epsilon, penalty, randomization,
         solve_args) = (self.loss, self.randomized_loss, self.epsilon,
                        self.penalty, self.randomization, self.solve_args)

        # initial solution

        problem = rr.simple_problem(randomized_loss, penalty)
        self.initial_soln = problem.solve(**solve_args)

        # find the active groups and their direction vectors
        # as well as unpenalized groups

        groups = np.unique(penalty.groups)
        active_groups = np.zeros(len(groups), np.bool)
        unpenalized_groups = np.zeros(len(groups), np.bool)

        active_directions = []
        active = np.zeros(loss.shape, np.bool)
        unpenalized = np.zeros(loss.shape, np.bool)

        initial_scalings = []

        for i, g in enumerate(groups):
            group = penalty.groups == g
            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) >
                                1.e-6 * penalty.weights[g]) and (
                                    penalty.weights[g] > 0)
            unpenalized_groups[i] = (penalty.weights[g] == 0)
            if active_groups[i]:
                active[group] = True
                z = np.zeros(active.shape, np.float)
                z[group] = self.initial_soln[group] / np.linalg.norm(
                    self.initial_soln[group])
                active_directions.append(z)
                initial_scalings.append(
                    np.linalg.norm(self.initial_soln[group]))
            if unpenalized_groups[i]:
                unpenalized[group] = True

        # solve the restricted problem

        self._overall = active + unpenalized
        self._inactive = ~self._overall
        self._unpenalized = unpenalized
        self._active_directions = np.array(active_directions).T
        self._active_groups = np.array(active_groups, np.bool)
        self._unpenalized_groups = np.array(unpenalized_groups, np.bool)

        self.selection_variable = {
            'groups': self._active_groups,
            'variables': self._overall,
            'directions': self._active_directions
        }

        # initial state for opt variables

        initial_subgrad = -(
            self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
            self.randomized_loss.quadratic.objective(self.initial_soln,
                                                     'grad'))
        # the quadratic of a smooth_atom is not included in computing the smooth_objective

        initial_subgrad = initial_subgrad[self._inactive]
        initial_unpenalized = self.initial_soln[self._unpenalized]
        self.observed_opt_state = np.concatenate(
            [initial_scalings, initial_unpenalized, initial_subgrad], axis=0)

        # set the _solved bit

        self._solved = True

        # Now setup the pieces for linear decomposition

        (loss, epsilon, penalty, initial_soln, overall, inactive, unpenalized,
         active_groups,
         active_directions) = (self.loss, self.epsilon, self.penalty,
                               self.initial_soln, self._overall,
                               self._inactive, self._unpenalized,
                               self._active_groups, self._active_directions)

        # scaling should be chosen to be Lipschitz constant for gradient of Gaussian part

        # we are implicitly assuming that
        # loss is a pairs model

        _sqrt_scaling = np.sqrt(scaling)

        _beta_unpenalized = restricted_Mest(loss,
                                            overall,
                                            solve_args=solve_args)

        beta_full = np.zeros(overall.shape)
        beta_full[overall] = _beta_unpenalized
        _hessian = loss.hessian(beta_full)
        self._beta_full = beta_full

        # observed state for score

        self.observed_score_state = np.hstack([
            _beta_unpenalized * _sqrt_scaling,
            -loss.smooth_objective(beta_full, 'grad')[inactive] / _sqrt_scaling
        ])

        # form linear part

        self.num_opt_var = p = loss.shape[0]  # shorthand for p

        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
        # E for active
        # U for unpenalized
        # -E for inactive

        _opt_linear_term = np.zeros(
            (p,
             self._active_groups.sum() + unpenalized.sum() + inactive.sum()))
        _score_linear_term = np.zeros((p, p))

        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator

        Mest_slice = slice(0, overall.sum())
        _Mest_hessian = _hessian[:, overall]
        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling

        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution

        null_idx = range(overall.sum(), p)
        inactive_idx = np.nonzero(inactive)[0]
        for _i, _n in zip(inactive_idx, null_idx):
            _score_linear_term[_i, _n] = -_sqrt_scaling

        # c_E piece

        scaling_slice = slice(0, active_groups.sum())
        if len(active_directions) == 0:
            _opt_hessian = 0
        else:
            _opt_hessian = (_hessian +
                            epsilon * np.identity(p)).dot(active_directions)
        _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling

        self.observed_opt_state[scaling_slice] *= _sqrt_scaling

        # beta_U piece

        unpenalized_slice = slice(active_groups.sum(),
                                  active_groups.sum() + unpenalized.sum())
        unpenalized_directions = np.identity(p)[:, unpenalized]
        if unpenalized.sum():
            _opt_linear_term[:, unpenalized_slice] = (
                _hessian + epsilon *
                np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling

        self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling

        # subgrad piece

        subgrad_idx = range(
            active_groups.sum() + unpenalized.sum(),
            active_groups.sum() + inactive.sum() + unpenalized.sum())
        subgrad_slice = slice(
            active_groups.sum() + unpenalized.sum(),
            active_groups.sum() + inactive.sum() + unpenalized.sum())
        for _i, _s in zip(inactive_idx, subgrad_idx):
            _opt_linear_term[_i, _s] = _sqrt_scaling

        self.observed_opt_state[subgrad_slice] /= _sqrt_scaling

        # form affine part

        _opt_affine_term = np.zeros(p)
        idx = 0
        groups = np.unique(penalty.groups)
        for i, g in enumerate(groups):
            if active_groups[i]:
                group = penalty.groups == g
                _opt_affine_term[group] = active_directions[:, idx][
                    group] * penalty.weights[g]
                idx += 1

        # two transforms that encode score and optimization
        # variable roles

        # later, we will modify `score_transform`
        # in `linear_decomposition`

        self.opt_transform = (_opt_linear_term, _opt_affine_term)
        self.score_transform = (_score_linear_term,
                                np.zeros(_score_linear_term.shape[0]))

        # now store everything needed for the projections
        # the projection acts only on the optimization
        # variables

        self.scaling_slice = scaling_slice

        # weights are scaled here because the linear terms scales them by scaling

        new_groups = penalty.groups[inactive]
        new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling)
                            for g in penalty.weights.keys()
                            if g in np.unique(new_groups)])

        # we form a dual group lasso object
        # to do the projection

        self.group_lasso_dual = rr.group_lasso_dual(new_groups,
                                                    weights=new_weights,
                                                    bound=1.)
        self.subgrad_slice = subgrad_slice

        self._setup = True
Esempio n. 41
0
def test_solve_QP():
    """
    Check the R coordinate descent LASSO solver
    """

    n, p = 100, 50
    lam = 0.08

    X = np.random.standard_normal((n, p))

    loss = rr.squared_error(X, np.zeros(n), coef=1. / n)
    pen = rr.l1norm(p, lagrange=lam)
    E = np.zeros(p)
    E[2] = 1
    Q = rr.identity_quadratic(0, 0, E, 0)
    problem = rr.simple_problem(loss, pen)
    soln = problem.solve(Q, min_its=500, tol=1.e-12)

    numpy2ri.activate()

    rpy.r.assign('X', X)
    rpy.r.assign('E', E)
    rpy.r.assign('lam', lam)

    R_code = """

    library(selectiveInference)
    p = ncol(X)
    n = nrow(X)
    soln_R = rep(0, p)
    grad = 1. * E
    ever_active = as.integer(c(1, rep(0, p-1)))
    nactive = as.integer(1)
    kkt_tol = 1.e-12
    objective_tol = 1.e-16
    parameter_tol = 1.e-10
    maxiter = 500
    soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, 
                                           lam, 
                                           maxiter, 
                                           soln_R, 
                                           E,
                                           grad, 
                                           ever_active, 
                                           nactive, 
                                           kkt_tol, 
                                           objective_tol, 
                                           parameter_tol,
                                           p,
                                           TRUE,
                                           TRUE,
                                           TRUE)$soln

    # test wide solver
    Xtheta = rep(0, n)
    nactive = as.integer(1)
    ever_active = as.integer(c(1, rep(0, p-1)))
    soln_R_wide = rep(0, p)
    grad = 1. * E
    soln_R_wide = selectiveInference:::solve_QP_wide(X, 
                                                     rep(lam, p), 
                                                     0,
                                                     maxiter, 
                                                     soln_R_wide, 
                                                     E,
                                                     grad, 
                                                     Xtheta,
                                                     ever_active, 
                                                     nactive, 
                                                     kkt_tol, 
                                                     objective_tol, 
                                                     parameter_tol,
                                                     p,
                                                     TRUE,
                                                     TRUE,
                                                     TRUE)$soln

    """

    rpy.r(R_code)

    soln_R = np.asarray(rpy.r('soln_R'))
    soln_R_wide = np.asarray(rpy.r('soln_R_wide'))
    numpy2ri.deactivate()

    tol = 1.e-5
    print(soln - soln_R)
    print(soln_R - soln_R_wide)

    G = X.T.dot(X).dot(soln) / n + E

    yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
    yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver'
    yield np.testing.assert_allclose, G[soln != 0], -np.sign(
        soln[soln != 0]
    ) * lam, tol, tol, False, 'checking active coordinate KKT for QP solver'
    yield nt.assert_true, np.fabs(
        G).max() < lam * (1. + 1.e-6), 'testing linfinity norm'
Esempio n. 42
0
# IPython log file

import numpy as np
import regreg.smooth.mglm as M
import regreg.smooth.mglm as M
import regreg.api as rr

np.random.seed(0)
n, p = 2000, 4
Y = np.random.multinomial(1, [0.1, 0.4, 0.5], size=(n, ))
q = Y.shape[1]

X = np.random.standard_normal((n, p))
pen = rr.l1_l2((p, q), lagrange=0.4 * np.sqrt(n))
loss = M.mglm.multinomial(X, Y)
problem = rr.simple_problem(loss, pen)
problem.solve(debug=True, min_its=50, tol=1e-12)

loss_baseline = M.mglm.multinomial(X, Y, baseline=True)
pen_baseline = rr.l1_l2((p, q - 1), lagrange=0.4 * np.sqrt(n))
problem_baseline = rr.simple_problem(loss_baseline, pen_baseline)
problem_baseline.solve(debug=True, min_its=50, tol=1e-12)
Esempio n. 43
0
    def fit(self, tol=1.e-12, min_its=50, **solve_args):
        """
        Fit the lasso using `regreg`.
        This sets the attributes `soln`, `onestep` and
        forms the constraints necessary for post-selection inference
        by calling `form_constraints()`.

        Parameters
        ----------

        solve_args : keyword args
             Passed to `regreg.problems.simple_problem.solve`.

        Returns
        -------

        soln : np.float
             Solution to lasso.
             
        """

        penalty = weighted_l1norm(self.feature_weights, lagrange=1.)
        problem = simple_problem(self.loglike, penalty)
        lasso_solution = problem.solve(tol=tol, min_its=min_its, **solve_args)
        self.lasso_solution = lasso_solution
        if not np.all(lasso_solution == 0):
            self.active = np.nonzero(lasso_solution != 0)[0]
            self.inactive = lasso_solution == 0
            self.active_signs = np.sign(lasso_solution[self.active])
            self._active_soln = lasso_solution[self.active]
            H = self.loglike.hessian(self.lasso_solution)
            H_AA = H[self.active][:,self.active]
            H_AAinv = np.linalg.inv(H_AA)
            Q = self.loglike.quadratic
            G_Q = Q.objective(self.lasso_solution, 'grad')
            G = self.loglike.gradient(self.lasso_solution) + G_Q
            G_A = G[self.active]
            G_I = self._G_I = G[self.inactive]
            dbeta_A = H_AAinv.dot(G_A)
            self.onestep_estimator = self._active_soln - dbeta_A
            self.active_penalized = self.feature_weights[self.active] != 0
            self._constraints = constraints(-np.diag(self.active_signs)[self.active_penalized],
                                             (self.active_signs * dbeta_A)[self.active_penalized],
                                             covariance=H_AAinv)
            if self.inactive.sum():

                # inactive constraints

                H_IA = H[self.inactive][:,self.active]
                H_II = H[self.inactive][:,self.inactive]
                inactive_cov = H_II - H_IA.dot(H_AAinv).dot(H_IA.T)
                irrepresentable = H_IA.dot(H_AAinv)
                inactive_mean = irrepresentable.dot(-G_A)
                self._inactive_constraints = constraints(np.vstack([np.identity(self.inactive.sum()),
                                                                    -np.identity(self.inactive.sum())]),
                                                         np.hstack([self.feature_weights[self.inactive],
                                                                    self.feature_weights[self.inactive]]),
                                                         covariance=inactive_cov,
                                                         mean=inactive_mean)
                if not self._inactive_constraints(G_I):
                    warnings.warn('inactive constraint of KKT conditions not satisfied -- perhaps need to solve with more accuracy')

                if self.covariance_estimator is not None:

                    # make full constraints

                    _cov_FA = self.covariance_estimator(self.onestep_estimator,
                                                        self.active,
                                                        self.inactive)

                    _cov_IA = _cov_FA[len(self.active):]
                    _cov_AA = _cov_FA[:len(self.active)]

                    # X_{-E}^T(y - X_E \bar{\beta}_E)

                    _inactive_score = - G_I - inactive_mean

                    _beta_bar = self.onestep_estimator
                    _indep_linear_part = _cov_IA.dot(np.linalg.inv(_cov_AA))

                    # we "fix" _nuisance, effectively conditioning on it

                    _nuisance = _inactive_score - _indep_linear_part.dot(_beta_bar)
                    _upper_lim = (self.feature_weights[self.inactive] - 
                                  _nuisance - 
                                  inactive_mean)
                    _lower_lim = (_nuisance + 
                                  self.feature_weights[self.inactive] +
                                  inactive_mean)

                    _upper_linear = _indep_linear_part
                    _lower_linear = -_indep_linear_part

                    C = self._constraints
                    _full_linear = np.vstack([C.linear_part,
                                              _upper_linear,
                                              _lower_linear])

                    _full_offset = np.hstack([C.offset,
                                              _upper_lim,
                                              _lower_lim])

                    self._constraints = constraints(_full_linear,
                                                    _full_offset,
                                                    covariance=_cov_AA)

                    if not self._constraints(_beta_bar):
                        warnings.warn('constraints of KKT conditions on one-step estimator ' + 
                                      ' not satisfied -- perhaps need to solve with more' + 
                                      'accuracy')

            else:
                self._inactive_constraints = None
        else:
            self.active = []
            self.inactive = np.arange(lasso_solution.shape[0])
            self._constraints = None
            self._inactive_constraints = None
        return self.lasso_solution
Esempio n. 44
0
def solveit(atom, Z, W, U, linq, L, FISTA, coef_stop):

    p2 = copy(atom)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)

    d = atom.conjugate

    q = rr.identity_quadratic(1, Z, 0, 0)
    yield ac, Z - atom.proximal(q), d.proximal(q), "testing duality of projections starting from atom %s " % atom
    q = rr.identity_quadratic(L, Z, 0, 0)

    # use simple_problem.nonsmooth

    p2 = copy(atom)
    p2.quadratic = atom.quadratic + q
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, FISTA=FISTA, coef_stop=coef_stop)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, "solving prox with simple_problem.nonsmooth with monotonicity %s " % atom

    # use the solve method

    p2.coefs *= 0
    p2.quadratic = atom.quadratic + q
    soln = p2.solve()

    yield ac, atom.proximal(q), soln, "solving prox with solve method %s " % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop)

    yield ac, atom.proximal(q), solver.composite.coefs, "solving prox with simple_problem with monotonicity %s " % atom

    dproblem2 = rr.dual_problem(loss.conjugate, rr.identity(loss.primal_shape), atom.conjugate)
    dcoef2 = dproblem2.solve(coef_stop=coef_stop, tol=1.0e-14)
    yield ac, atom.proximal(q), dcoef2, "solving prox with dual_problem with monotonicity %s " % atom

    dproblem = rr.dual_problem.fromprimal(loss, atom)
    dcoef = dproblem.solve(coef_stop=coef_stop, tol=1.0e-14)
    yield ac, atom.proximal(q), dcoef, "solving prox with dual_problem.fromprimal with monotonicity %s " % atom

    # write the loss in terms of a quadratic for the smooth loss and a smooth function...

    lossq = rr.quadratic.shift(-Z, coef=0.6 * L)
    lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    problem = rr.simple_problem(lossq, atom)

    yield ac, atom.proximal(q), problem.solve(
        coef_stop=coef_stop, FISTA=FISTA, tol=1.0e-12
    ), "solving prox with simple_problem with monotonicity  but loss has identity_quadratic %s " % atom

    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, monotonicity_restart=False, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, "solving prox with simple_problem.nonsmooth with no monotonocity %s " % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, monotonicity_restart=False, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, "solving prox with simple_problem %s no monotonicity_restart" % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.separable_problem.singleton(atom, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(q), solver.composite.coefs, "solving atom prox with separable_atom.singleton %s " % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.container(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(q), solver.composite.coefs, "solving atom prox with container %s " % atom

    # write the loss in terms of a quadratic for the smooth loss and a smooth function...

    lossq = rr.quadratic.shift(-Z, coef=0.6 * L)
    lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    problem = rr.container(lossq, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop)

    yield (
        ac,
        atom.proximal(q),
        problem.solve(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop),
        "solving prox with container with monotonicity  but loss has identity_quadratic %s " % atom,
    )

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, d)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, monotonicity_restart=False, coef_stop=coef_stop, FISTA=FISTA)
    # ac(d.proximal(q), solver.composite.coefs, 'solving dual prox with simple_problem no monotonocity %s ' % atom)
    yield (
        ac,
        d.proximal(q),
        problem.solve(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop, monotonicity_restart=False),
        "solving dual prox with simple_problem no monotonocity %s " % atom,
    )

    problem = rr.container(d, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)
    yield ac, d.proximal(q), solver.composite.coefs, "solving dual prox with container %s " % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.separable_problem.singleton(d, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, d.proximal(q), solver.composite.coefs, "solving atom prox with separable_atom.singleton %s " % atom
Esempio n. 45
0
    def CV_err(self,
               penalty,
               loss=None,
               residual_randomization=None,
               scale=None,
               solve_args={
                   'min_its': 20,
                   'tol': 1.e-1
               }):
        """
        Computes the non-randomized CV error and the one with added residual randomization
        """
        if loss is None:
            loss = copy.copy(self.loss)
        X, y = loss.data
        n, p = X.shape

        CV_err = 0
        CV_err_squared = 0

        if residual_randomization is not None:
            CV_err_randomized = 0
            CV_err_squared_randomized = 0
            if scale is None:
                scale = 1.

        for fold in np.unique(self.folds):
            test = self.folds == fold
            train = ~test

            loss_train = loss.subsample(train)
            loss_test = loss.subsample(test)
            X_test, y_test = X[test], y[test]
            n_test = y_test.shape[0]

            if self.objective_randomization is not None:
                randomized_train_loss = self.objective_randomization.randomize(
                    loss_train, self.epsilon)[0]  # randomized train loss
                problem = rr.simple_problem(randomized_train_loss, penalty)
            else:
                problem = rr.simple_problem(loss_train, penalty)
            beta_train = problem.solve(**solve_args)

            _mu = lambda X, beta: loss_test.saturated_loss.mean_function(
                X.dot(beta))
            resid = y_test - _mu(X_test, beta_train)
            cur = (resid**2).sum() / n_test
            CV_err += cur
            CV_err_squared += (cur**2)

            if residual_randomization is not None:
                random_noise = scale * np.random.standard_normal(n_test)
                cur_randomized = ((resid + random_noise)**2).sum() / n_test
                CV_err_randomized += cur_randomized
                CV_err_squared_randomized += cur_randomized**2

        SD_CV = np.sqrt(
            (CV_err_squared - ((CV_err**2) / self.K)) / float(self.K - 1))
        if residual_randomization is not None:
            SD_CV_randomized = np.sqrt(
                (CV_err_squared_randomized -
                 (CV_err_randomized**2 / self.K)) / (self.K - 1))
            return CV_err, SD_CV, CV_err_randomized, SD_CV_randomized
        else:
            return CV_err, SD_CV
Esempio n. 46
0
def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
                    randomizer = 'gaussian',
                    randomizer_scale = 1.,
                    scale1 = 0.1,
                    scale2 = 0.2,
                    lam_frac = 1.,
                    bootstrap = False,
                    condition_on_CVR = False,
                    marginalize_subgrad = True,
                    ndraw = 10000,
                    burnin = 2000):

    print(n,p,s)
    if randomizer == 'laplace':
        randomizer = randomization.laplace((p,), scale=randomizer_scale)
    elif randomizer == 'gaussian':
        randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
    elif randomizer == 'logistic':
        randomizer = randomization.logistic((p,), scale=randomizer_scale)

    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, signal=signal, sigma=1)
    lam_nonrandom = choose_lambda(X)
    lam_random = choose_lambda_with_randomization(X, randomizer)
    loss = l2norm_glm(X, y)
    #sqloss = rr.glm.gaussian(X, y)
    epsilon = 1./n

    # non-randomized sqrt-Lasso, just looking how many vars it selects
    problem = rr.simple_problem(loss, rr.l1norm(p, lagrange=lam_nonrandom))
    beta_hat = problem.solve()
    active_hat = beta_hat !=0
    print("non-randomized sqrt-root Lasso active set", np.where(beta_hat)[0])
    print("non-randomized sqrt-lasso", active_hat.sum())

    # view 2
    W = lam_frac * np.ones(p) * lam_random
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)), lagrange=1. / np.sqrt(n))
    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)

    mv = multiple_queries([M_est1])
    mv.solve()

    #active = soln != 0
    active_union = M_est1._overall
    nactive = np.sum(active_union)
    print("nactive", nactive)
    if nactive==0:
        return None

    nonzero = np.where(beta)[0]
    if set(nonzero).issubset(np.nonzero(active_union)[0]):

        active_set = np.nonzero(active_union)[0]
        true_vec = beta[active_union]

        if marginalize_subgrad == True:
            M_est1.decompose_subgradient(conditioning_groups=np.zeros(p, dtype=bool),
                                         marginalizing_groups=np.ones(p, bool))

        target_sampler, target_observed = glm_target(loss,
                                                     active_union,
                                                     mv,
                                                     bootstrap=bootstrap)

        target_sample = target_sampler.sample(ndraw=ndraw,
                                              burnin=burnin)
        LU = target_sampler.confidence_intervals(target_observed,
                                                 sample=target_sample,
                                                 level=0.9)

        #pivots_mle = target_sampler.coefficient_pvalues(target_observed,
        #                                                parameter=target_sampler.reference,
        #                                                sample=target_sample)
        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
                                                          parameter=true_vec,
                                                          sample=target_sample)
        pvalues = target_sampler.coefficient_pvalues(target_observed,
                                                     parameter=np.zeros_like(true_vec),
                                                     sample=target_sample)

        L, U = LU.T
        sel_covered = np.zeros(nactive, np.bool)
        sel_length = np.zeros(nactive)

        LU_naive = naive_confidence_intervals(target_sampler, target_observed)
        naive_covered = np.zeros(nactive, np.bool)
        naive_length = np.zeros(nactive)
        naive_pvals = naive_pvalues(target_sampler, target_observed, true_vec)

        active_var = np.zeros(nactive, np.bool)

        for j in range(nactive):
            if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
                sel_covered[j] = 1
            if (LU_naive[j, 0] <= true_vec[j]) and (LU_naive[j, 1] >= true_vec[j]):
                naive_covered[j] = 1
            sel_length[j] = U[j]-L[j]
            naive_length[j] = LU_naive[j,1]-LU_naive[j,0]
            active_var[j] = active_set[j] in nonzero

        print("individual coverage", np.true_divide(sel_covered.sum(),nactive))
        from statsmodels.sandbox.stats.multicomp import multipletests
        q = 0.1
        BH_desicions = multipletests(pvalues, alpha=q, method="fdr_bh")[0]
        return pivots_truth, sel_covered, sel_length, naive_pvals, naive_covered, naive_length, active_var, BH_desicions, active_var
Esempio n. 47
0
def solveit(atom, Z, W, U, linq, L, FISTA, coef_stop):

    p2 = copy(atom)
    p2.quadratic = rr.identity_quadratic(L, Z, 0, 0)

    d = atom.conjugate

    q = rr.identity_quadratic(1, Z, 0, 0)
    yield ac, Z - atom.proximal(q), d.proximal(
        q), 'testing duality of projections starting from atom %s ' % atom
    q = rr.identity_quadratic(L, Z, 0, 0)

    # use simple_problem.nonsmooth

    p2 = copy(atom)
    p2.quadratic = atom.quadratic + q
    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14, FISTA=FISTA, coef_stop=coef_stop)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with monotonicity %s ' % atom

    # use the solve method

    p2.coefs *= 0
    p2.quadratic = atom.quadratic + q
    soln = p2.solve()

    yield ac, atom.proximal(
        q), soln, 'solving prox with solve method %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem with monotonicity %s ' % atom

    dproblem2 = rr.dual_problem(loss.conjugate, rr.identity(loss.shape),
                                atom.conjugate)
    dcoef2 = dproblem2.solve(coef_stop=coef_stop, tol=1.e-14)
    yield ac, atom.proximal(
        q
    ), dcoef2, 'solving prox with dual_problem with monotonicity %s ' % atom

    dproblem = rr.dual_problem.fromprimal(loss, atom)
    dcoef = dproblem.solve(coef_stop=coef_stop, tol=1.0e-14)
    yield ac, atom.proximal(
        q
    ), dcoef, 'solving prox with dual_problem.fromprimal with monotonicity %s ' % atom

    # write the loss in terms of a quadratic for the smooth loss and a smooth function...

    lossq = rr.quadratic.shift(-Z, coef=0.6 * L)
    lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    problem = rr.simple_problem(lossq, atom)

    yield ac, atom.proximal(q), problem.solve(
        coef_stop=coef_stop, FISTA=FISTA, tol=1.0e-12
    ), 'solving prox with simple_problem with monotonicity  but loss has identity_quadratic %s ' % atom

    problem = rr.simple_problem.nonsmooth(p2)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-14,
               monotonicity_restart=False,
               coef_stop=coef_stop,
               FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with no monotonocity %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12,
               monotonicity_restart=False,
               coef_stop=coef_stop,
               FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving prox with simple_problem %s no monotonicity_restart' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.separable_problem.singleton(atom, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving atom prox with separable_atom.singleton %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.container(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, atom.proximal(
        q
    ), solver.composite.coefs, 'solving atom prox with container %s ' % atom

    # write the loss in terms of a quadratic for the smooth loss and a smooth function...

    lossq = rr.quadratic.shift(-Z, coef=0.6 * L)
    lossq.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0)
    problem = rr.container(lossq, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, FISTA=FISTA, coef_stop=coef_stop)

    yield (
        ac, atom.proximal(q),
        problem.solve(tol=1.e-12, FISTA=FISTA, coef_stop=coef_stop),
        'solving prox with container with monotonicity  but loss has identity_quadratic %s '
        % atom)

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.simple_problem(loss, d)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12,
               monotonicity_restart=False,
               coef_stop=coef_stop,
               FISTA=FISTA)
    # ac(d.proximal(q), solver.composite.coefs, 'solving dual prox with simple_problem no monotonocity %s ' % atom)
    yield (ac, d.proximal(q),
           problem.solve(tol=1.e-12,
                         FISTA=FISTA,
                         coef_stop=coef_stop,
                         monotonicity_restart=False),
           'solving dual prox with simple_problem no monotonocity %s ' % atom)

    problem = rr.container(d, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)
    yield ac, d.proximal(
        q
    ), solver.composite.coefs, 'solving dual prox with container %s ' % atom

    loss = rr.quadratic.shift(-Z, coef=L)
    problem = rr.separable_problem.singleton(d, loss)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12, coef_stop=coef_stop, FISTA=FISTA)

    yield ac, d.proximal(
        q
    ), solver.composite.coefs, 'solving atom prox with separable_atom.singleton %s ' % atom
Esempio n. 48
0
def test_quadratic_for_smooth():
    '''
    this test is a check to ensure that the quadratic part 
    of the smooth functions are being used in the proximal step
    '''

    L = 0.45

    W = np.random.standard_normal(40)
    Z = np.random.standard_normal(40)
    U = np.random.standard_normal(40)

    atomq = rr.identity_quadratic(0.4, U, W, 0)
    atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12)

    # specifying in this way should be the same as if we put 0.5*L below
    loss = rr.quadratic_loss.shift(Z, coef=0.6*L)
    lq = rr.identity_quadratic(0.4*L, Z, 0, 0)
    loss.quadratic = lq 

    ww = np.random.standard_normal(40)

    # specifying in this way should be the same as if we put 0.5*L below
    loss2 = rr.quadratic_loss.shift(Z, coef=L)
    yield all_close, loss2.objective(ww), loss.objective(ww), 'checking objective', None

    yield all_close, lq.objective(ww, 'func'), loss.nonsmooth_objective(ww), 'checking nonsmooth objective', None
    yield all_close, loss2.smooth_objective(ww, 'func'), 0.5 / 0.3 * loss.smooth_objective(ww, 'func'), 'checking smooth objective func', None
    yield all_close, loss2.smooth_objective(ww, 'grad'), 0.5 / 0.3 * loss.smooth_objective(ww, 'grad'), 'checking smooth objective grad', None

    problem = rr.container(loss, atom)
    solver = rr.FISTA(problem)
    solver.fit(tol=1.0e-12)

    problem3 = rr.simple_problem(loss, atom)
    solver3 = rr.FISTA(problem3)
    solver3.fit(tol=1.0e-12, coef_stop=True)

    loss4 = rr.quadratic_loss.shift(Z, coef=0.6*L)
    problem4 = rr.simple_problem(loss4, atom)
    problem4.quadratic = lq
    solver4 = rr.FISTA(problem4)
    solver4.fit(tol=1.0e-12)

    gg_soln = rr.gengrad(problem, L)

    loss6 = rr.quadratic_loss.shift(Z, coef=0.6*L)
    loss6.quadratic = lq + atom.quadratic
    atomcp = copy(atom)
    atomcp.quadratic = rr.identity_quadratic(0,0,0,0)
    problem6 = rr.dual_problem(loss6.conjugate, rr.identity(loss6.shape), atomcp.conjugate)
    problem6.lipschitz = L + atom.quadratic.coef
    dsoln2 = problem6.solve(coef_stop=True, tol=1.e-10, 
                            max_its=100)

    problem2 = rr.container(loss2, atom)
    solver2 = rr.FISTA(problem2)
    solver2.fit(tol=1.0e-12, coef_stop=True)

    q = rr.identity_quadratic(L, Z, 0, 0)

    yield all_close, problem.objective(ww), atom.nonsmooth_objective(ww) + q.objective(ww,'func'), '', None

    atom = rr.l1norm(40, quadratic=atomq, lagrange=0.12)
    aq = atom.solve(q)
    for p, msg in zip([solver3.composite.coefs,
                       gg_soln,
                       solver2.composite.coefs,
                       dsoln2,
                       solver.composite.coefs,
                       solver4.composite.coefs],
                      ['simple_problem with loss having no quadratic',
                       'gen grad',
                       'container with loss having no quadratic',
                       'dual problem with loss having a quadratic',
                       'container with loss having a quadratic',
                       'simple_problem having a quadratic']):
        yield all_close, aq, p, msg, None
Esempio n. 49
0
def test_lasso(s=1, n=100, p=10):

    X, y, _, nonzero, sigma = instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0)
    print 'sigma', sigma
    lam_frac = 1.

    randomization = laplace(loc=0, scale=1.)
    loss = randomized.gaussian_Xfixed(X, y)

    random_Z = randomization.rvs(p)
    epsilon = 1.
    lam = sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0))

    random_Z = randomization.rvs(p)
    penalty = randomized.selective_l1norm_lan(p, lagrange=lam)

    #sampler1 = randomized.selective_sampler_MH_lan(loss,
    #                                           random_Z,
    #                                           epsilon,
    #                                           randomization,
    #                                          penalty)

    #loss_args = {'mean': np.zeros(n),
    #             'sigma': sigma,
    #             'linear_part':np.identity(y.shape[0]),
    #             'value': 0}

    #sampler1.setup_sampling(y, loss_args=loss_args)
    # data, opt_vars = sampler1.state

    # initial solution
    problem = rr.simple_problem(loss, penalty)
    random_term = rr.identity_quadratic(epsilon, 0, random_Z, 0)
    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)
    initial_grad = loss.smooth_objective(initial_soln,  mode='grad')
    betaE, cube = penalty.setup_sampling(initial_grad,
                                         initial_soln,
                                         random_Z,
                                         epsilon)

    data = y.copy()
    active = penalty.active_set
    if (np.sum(active)==0):
        print 'here'
        return [-1], [-1]
    inactive = ~active

    #betaE, cube = opt_vars
    ndata = data.shape[0];  nactive = betaE.shape[0];  ninactive = cube.shape[0]
    init_vec_state = np.zeros(ndata+nactive+ninactive)
    init_vec_state[:ndata] = data
    init_vec_state[ndata:(ndata+nactive)] = betaE
    init_vec_state[(ndata+nactive):] = cube

    def bootstrap_samples(y, P, R):
        nsample = 50
        boot_samples = []
        for _ in range(nsample):
            indices = np.random.choice(n, size=(n,), replace=True)
            y_star = y[indices]
            boot_samples.append(np.dot(P,y)+np.dot(R,y_star-y))

        return boot_samples

   #boot_samples = bootstrap_samples(y)


    def move_data(vec_state, boot_samples,
                   ndata = ndata, nactive = nactive, ninactive = ninactive, loss=loss):

        weights = []

        betaE = vec_state[ndata:(ndata+nactive)]
        cube = vec_state[(ndata+nactive):]
        opt_vars = [betaE, cube]
        params, _, opt_vec = penalty.form_optimization_vector(opt_vars)  # opt_vec=\epsilon(\beta 0)+u, u=\grad P(\beta), P penalty

        for i in range(len(boot_samples)):
            gradient = loss.gradient(boot_samples[i], params)
            weights.append(np.exp(-np.sum(np.abs(gradient + opt_vec))))
        weights /= np.sum(weights)

        #m = max(weights)
        #idx = [i for i, j in enumerate(weights) if j == m][0]
        idx = np.nonzero(np.random.multinomial(1, weights, size=1)[0])[0][0]
        return boot_samples[idx]


    def full_projection(vec_state, penalty=penalty,
                        ndata=ndata, nactive=nactive, ninactive = ninactive):
        data = vec_state[:ndata].copy()
        betaE = vec_state[ndata:(ndata+nactive)]
        cube = vec_state[(ndata+nactive):]

        signs = penalty.signs
        projected_betaE = betaE.copy()
        projected_cube = np.zeros_like(cube)

        for i in range(nactive):
            if (projected_betaE[i] * signs[i] < 0):
                projected_betaE[i] = 0

        projected_cube = np.clip(cube, -1, 1)

        return np.concatenate((data, projected_betaE, projected_cube), 0)



    def full_gradient(vec_state, loss=loss, penalty =penalty, X=X,
                      lam=lam, epsilon=epsilon, ndata=ndata, active=active, inactive=inactive):
        nactive = np.sum(active); ninactive=np.sum(inactive)

        data = vec_state[:ndata]
        betaE = vec_state[ndata:(ndata + nactive)]
        cube = vec_state[(ndata + nactive):]

        opt_vars = [betaE, cube]
        params , _ , opt_vec = penalty.form_optimization_vector(opt_vars) # opt_vec=\epsilon(\beta 0)+u, u=\grad P(\beta), P penalty

        gradient = loss.gradient(data, params)
        hessian = loss.hessian()

        ndata = data.shape[0]
        nactive = betaE.shape[0]
        ninactive = cube.shape[0]

        sign_vec = - np.sign(gradient + opt_vec)  # sign(w), w=grad+\epsilon*beta+lambda*u

        B = hessian + epsilon * np.identity(nactive + ninactive)
        A = B[:, active]

        _gradient = np.zeros(ndata + nactive + ninactive)
        _gradient[:ndata] = 0 #- (data + np.dot(X, sign_vec))
        _gradient[ndata:(ndata + nactive)] = np.dot(A.T, sign_vec)
        _gradient[(ndata + nactive):] = lam * sign_vec[inactive]

        return _gradient


    null, alt = pval(init_vec_state, full_gradient, full_projection, move_data, bootstrap_samples,
                      X, y, nonzero, active)

    return null, alt
def test_lasso(s=3, n=1000, p=10, scale=True):

    X, y, true_beta, nonzero, sigma = instance(n=n,
                                               p=p,
                                               random_signs=True,
                                               s=s,
                                               sigma=1.,
                                               rho=0,
                                               scale=scale)
    print 'true beta', true_beta
    lam_frac = 1.

    randomization = laplace(loc=0, scale=1.)
    loss = lasso_randomX.lasso_randomX(X, y)

    random_Z = randomization.rvs(p)
    epsilon = 1.
    lam = sigma * lam_frac * np.mean(
        np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0))

    if (scale == False):
        random_Z = np.sqrt(n) * random_Z
        lam = np.sqrt(n) * lam

    random_Z = randomization.rvs(p)
    penalty = randomized.selective_l1norm_lan(p, lagrange=lam)

    # initial solution

    problem = rr.simple_problem(loss, penalty)
    random_term = rr.identity_quadratic(epsilon, 0, random_Z, 0)

    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
    initial_soln = problem.solve(random_term, **solve_args)
    initial_grad = loss.smooth_objective(initial_soln, mode='grad')
    betaE, cube = penalty.setup_sampling(initial_grad, initial_soln, random_Z,
                                         epsilon)
    print initial_soln

    active = penalty.active_set
    inactive = ~active
    loss.fit_E(active)
    beta_unpenalized = loss._beta_unpenalized
    residual = y - np.dot(X[:, active], beta_unpenalized)  # y-X_E\bar{\beta}^E
    N = np.dot(X[:, inactive].T,
               residual)  # X_{-E}^T(y-X_E\bar{\beta}_E), null statistic
    data = np.concatenate((beta_unpenalized, N), axis=0)
    ndata = data.shape[0]
    nactive = betaE.shape[0]
    ninactive = cube.shape[0]

    # parametric coveriance estimate
    XE_pinv = np.linalg.pinv(X[:, active])
    mat = np.zeros((nactive + ninactive, n))
    mat[:nactive, :] = XE_pinv
    mat[nactive:, :] = X[:, inactive].T.dot(
        np.identity(n) - X[:, active].dot(XE_pinv))

    Sigma_full = mat.dot(mat.T)
    Sigma_full_inv = np.linalg.inv(Sigma_full)

    # non-parametric covariance estimate
    #Sigma_full = loss._Sigma_full
    #Sigma_full_inv = np.linalg.inv(Sigma_full)

    init_vec_state = np.zeros(ndata + nactive + ninactive)
    init_vec_state[:ndata] = data
    init_vec_state[ndata:(ndata + nactive)] = betaE
    init_vec_state[(ndata + nactive):] = cube

    def bootstrap_samples(data0, P, R, X=X):
        nsample = 200
        boot_samples = []
        X_E = X[:, active]

        for _ in range(nsample):
            indices = np.random.choice(n, size=(n, ), replace=True)
            data_star = np.zeros_like(data0)
            data_star[:nactive] = np.linalg.lstsq(X_E[indices, :],
                                                  y[indices])[0]
            data_star[nactive:] = 0
            boot_samples.append(
                np.dot(P, data0) + np.dot(R, data_star - data0))

        return boot_samples

        # boot_samples = bootstrap_samples(y)

    def move_data(vec_state,
                  boot_samples,
                  ndata=ndata,
                  nactive=nactive,
                  ninactive=ninactive,
                  loss=loss):

        weights = []

        betaE = vec_state[ndata:(ndata + nactive)]
        cube = vec_state[(ndata + nactive):]
        opt_vars = [betaE, cube]
        params, _, opt_vec = penalty.form_optimization_vector(
            opt_vars
        )  # opt_vec=\epsilon(\beta 0)+u, u=\grad P(\beta), P penalty

        for i in range(len(boot_samples)):
            gradient = loss.gradient(boot_samples[i], params)
            weights.append(np.exp(-np.sum(np.abs(gradient + opt_vec))))

        weights /= np.sum(weights)

        idx = np.nonzero(np.random.multinomial(1, weights, size=1)[0])[0][0]
        return boot_samples[idx]

    def full_projection(vec_state,
                        penalty=penalty,
                        ndata=ndata,
                        nactive=nactive,
                        ninactive=ninactive):
        data = vec_state[:ndata].copy()
        betaE = vec_state[ndata:(ndata + nactive)]
        cube = vec_state[(ndata + nactive):]

        signs = penalty.signs

        projected_betaE = betaE.copy()
        projected_cube = np.zeros_like(cube)

        for i in range(nactive):
            if (projected_betaE[i] * signs[i] < 0):
                projected_betaE[i] = 0

        projected_cube = np.clip(cube, -1, 1)

        return np.concatenate((data, projected_betaE, projected_cube), 0)

    def full_gradient(vec_state,
                      loss=loss,
                      penalty=penalty,
                      Sigma_full_inv=Sigma_full_inv,
                      lam=lam,
                      epsilon=epsilon,
                      ndata=ndata,
                      active=active,
                      inactive=inactive):
        nactive = np.sum(active)
        ninactive = np.sum(inactive)

        data = vec_state[:ndata]
        betaE = vec_state[ndata:(ndata + nactive)]
        cube = vec_state[(ndata + nactive):]

        opt_vars = [betaE, cube]
        params, _, opt_vec = penalty.form_optimization_vector(
            opt_vars
        )  # opt_vec=\epsilon(\beta 0)+u, u=\grad P(\beta), P penalty

        gradient = loss.gradient(data, params)
        hessian = loss.hessian

        ndata = data.shape[0]
        nactive = betaE.shape[0]
        ninactive = cube.shape[0]

        sign_vec = -np.sign(
            gradient + opt_vec)  # sign(w), w=grad+\epsilon*beta+lambda*u

        A = hessian + epsilon * np.identity(nactive + ninactive)
        A_restricted = A[:, active]

        T = data[:nactive]
        _gradient = np.zeros(ndata + nactive + ninactive)

        # saturated model
        _gradient[:ndata] = -np.dot(Sigma_full_inv, data)
        _gradient[:nactive] -= hessian[:, active].T.dot(sign_vec)
        _gradient[nactive:(ndata)] -= sign_vec[inactive]

        # selected model
        #_gradient[:nactive] = - (np.dot(Sigma_T_inv, data[:nactive]) + np.dot(hessian[:, active].T, sign_vec))
        _gradient[ndata:(ndata + nactive)] = np.dot(A_restricted.T, sign_vec)
        _gradient[(ndata + nactive):] = lam * sign_vec[inactive]

        return _gradient

    null, alt = pval(init_vec_state, full_gradient, full_projection,
                     bootstrap_samples, move_data,
                     Sigma_full[:nactive, :nactive], data, nonzero, active)

    return null, alt
Esempio n. 51
0
def test_sqrt_lasso(n=500,
                    p=20,
                    s=3,
                    signal=10,
                    K=5,
                    rho=0.,
                    randomizer='gaussian',
                    randomizer_scale=1.,
                    scale1=0.1,
                    scale2=0.2,
                    lam_frac=1.,
                    bootstrap=False,
                    condition_on_CVR=False,
                    marginalize_subgrad=True,
                    ndraw=10000,
                    burnin=2000):

    print(n, p, s)
    if randomizer == 'laplace':
        randomizer = randomization.laplace((p, ), scale=randomizer_scale)
    elif randomizer == 'gaussian':
        randomizer = randomization.isotropic_gaussian((p, ), randomizer_scale)
    elif randomizer == 'logistic':
        randomizer = randomization.logistic((p, ), scale=randomizer_scale)

    X, y, beta, nonzero, sigma = gaussian_instance(n=n,
                                                   p=p,
                                                   s=s,
                                                   rho=rho,
                                                   signal=signal,
                                                   sigma=1)
    lam_nonrandom = choose_lambda(X)
    lam_random = choose_lambda_with_randomization(X, randomizer)
    loss = l2norm_glm(X, y)
    #sqloss = rr.glm.gaussian(X, y)
    epsilon = 1. / n

    # non-randomized sqrt-Lasso, just looking how many vars it selects
    problem = rr.simple_problem(loss, rr.l1norm(p, lagrange=lam_nonrandom))
    beta_hat = problem.solve()
    active_hat = beta_hat != 0
    print("non-randomized sqrt-root Lasso active set", np.where(beta_hat)[0])
    print("non-randomized sqrt-lasso", active_hat.sum())

    # view 2
    W = lam_frac * np.ones(p) * lam_random
    penalty = rr.group_lasso(np.arange(p),
                             weights=dict(zip(np.arange(p), W)),
                             lagrange=1. / np.sqrt(n))
    M_est = glm_group_lasso(loss, epsilon, penalty, randomizer)

    mv = multiple_queries([M_est])
    mv.solve()

    active_set = M_est._overall
    nactive = np.sum(active_set)

    if nactive == 0:
        return None

    nonzero = np.where(beta)[0]
    if set(nonzero).issubset(np.nonzero(active_set)[0]):

        active_set = np.nonzero(active_set)[0]
        true_vec = beta[active_set]

        if marginalize_subgrad == True:
            M_est.decompose_subgradient(conditioning_groups=np.zeros(
                p, dtype=bool),
                                        marginalizing_groups=np.ones(p, bool))

        selected_features = np.zeros(p, np.bool)
        selected_features[active_set] = True

        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)

        form_covariances = glm_nonparametric_bootstrap(n, n)
        boot_target, boot_target_observed = pairs_bootstrap_glm(
            M_est.loss, selected_features, inactive=None)
        target_info = boot_target

        cov_info = M_est.setup_sampler()
        target_cov, score_cov = form_covariances(target_info,
                                                 cross_terms=[cov_info],
                                                 nsample=M_est.nboot)

        opt_sample = M_est.sampler.sample(ndraw, burnin)

        pvalues = M_est.sampler.coefficient_pvalues(
            unpenalized_mle,
            target_cov,
            score_cov,
            parameter=np.zeros(selected_features.sum()),
            sample=opt_sample)
        intervals = M_est.sampler.confidence_intervals(unpenalized_mle,
                                                       target_cov,
                                                       score_cov,
                                                       sample=opt_sample)

        true_vec = beta[M_est.selection_variable['variables']]

        L, U = intervals.T

        covered = np.zeros(nactive, np.bool)
        active_var = np.zeros(nactive, np.bool)

        for j in range(nactive):
            if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
                covered[j] = 1
            active_var[j] = active_set[j] in nonzero

        return pvalues, covered, active_var
Esempio n. 52
0
    def solve(self):

        (loss,
         epsilon,
         penalty,
         randomization,
         solve_args) = (self.loss,
                        self.epsilon,
                        self.penalty,
                        self.randomization,
                        self.solve_args)

        # initial solution

        problem = rr.simple_problem(loss, penalty)
        self._randomZ = self.randomization.sample()
        self._random_term = rr.identity_quadratic(epsilon, 0, -self._randomZ, 0)
        self.initial_soln = problem.solve(self._random_term, **solve_args)

        # find the active groups and their direction vectors
        # as well as unpenalized groups

        groups = np.unique(penalty.groups) 
        active_groups = np.zeros(len(groups), np.bool)
        unpenalized_groups = np.zeros(len(groups), np.bool)

        active_directions = []
        active = np.zeros(loss.shape, np.bool)
        unpenalized = np.zeros(loss.shape, np.bool)

        initial_scalings = []

        for i, g in enumerate(groups):
            group = penalty.groups == g
            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (penalty.weights[g] > 0)
            unpenalized_groups[i] = (penalty.weights[g] == 0)
            if active_groups[i]:
                active[group] = True
                z = np.zeros(active.shape, np.float)
                z[group] = self.initial_soln[group] / np.linalg.norm(self.initial_soln[group])
                active_directions.append(z)
                initial_scalings.append(np.linalg.norm(self.initial_soln[group]))
            if unpenalized_groups[i]:
                unpenalized[group] = True

        # solve the restricted problem

        self.overall = active + unpenalized
        self.inactive = ~self.overall
        self.unpenalized = unpenalized
        self.active_directions = np.array(active_directions).T
        self.active_groups = np.array(active_groups, np.bool)
        self.unpenalized_groups = np.array(unpenalized_groups, np.bool)

        self.selection_variable = (self.active_groups, self.active_directions)

        # initial state for opt variables

        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + self._random_term.objective(self.initial_soln, 'grad') + epsilon * self.initial_soln)
        initial_subgrad = initial_subgrad[self.inactive]
        initial_unpenalized = self.initial_soln[self.unpenalized]
        self.observed_opt_state = np.concatenate([initial_scalings,
                                                  initial_unpenalized,
                                                  initial_subgrad], axis=0)