Beispiel #1
0
def test_group_lasso_weightedl1_lagrange():
    n, p = 100, 50

    X = np.random.standard_normal((n, p))
    Y = np.random.standard_normal(n)

    loss = rr.glm.gaussian(X, Y)
    weights = np.ones(p)
    weights[-2:] = np.inf
    weights[:2] = 0
    weight_dict = dict([(i, w) for i, w in enumerate(weights)])
    pen1 = rr.weighted_l1norm(weights, lagrange=0.5 * np.sqrt(n))
    pen2 = rr.group_lasso(np.arange(p),
                          weights=weight_dict,
                          lagrange=0.5 * np.sqrt(n))

    problem1 = rr.simple_problem(loss, pen1)
    problem2 = rr.simple_problem(loss, pen2)

    beta1 = problem1.solve(tol=1.e-14, min_its=500)
    beta2 = problem2.solve(tol=1e-14, min_its=500)

    npt.assert_allclose(beta1, beta2)

    bound_val = pen1.seminorm(beta1, lagrange=1)
    bound1 = rr.weighted_l1norm(weights, bound=bound_val)
    bound2 = rr.group_lasso(np.arange(p), weights=weight_dict, bound=bound_val)
    problem3 = rr.simple_problem(loss, bound1)
    problem4 = rr.simple_problem(loss, bound2)

    beta3 = problem3.solve(tol=1.e-14, min_its=500)
    beta4 = problem4.solve(tol=1.e-14, min_its=500)

    npt.assert_allclose(beta3, beta4)
    npt.assert_allclose(beta3, beta1)
    def __init__(
        self,
        loglike,
        groups,
        weights,
        ridge_term,
        randomizer,
        use_lasso=True,  # should lasso solver be used where applicable - defaults to True
        perturb=None):

        _check_groups(groups)  # make sure groups looks sensible

        # log likelihood : quadratic loss
        self.loglike = loglike
        self.nfeature = self.loglike.shape[0]

        # ridge parameter
        self.ridge_term = ridge_term

        # group lasso penalty (from regreg)
        # use regular lasso penalty if all groups are size 1
        if use_lasso and groups.size == np.unique(groups).size:
            # need to provide weights an an np.array rather than a dictionary
            weights_np = np.array([w[1] for w in sorted(weights.items())])
            self.penalty = rr.weighted_l1norm(weights=weights_np, lagrange=1.)
        else:
            self.penalty = rr.group_lasso(groups, weights=weights, lagrange=1.)

        # store groups as a class variable since the non-group lasso doesn't
        self.groups = groups

        self._initial_omega = perturb

        # gaussian randomization
        self.randomizer = randomizer
def test_changepoint_scaled():

    p = 150
    M = multiscale(p)
    M.minsize = 10
    X = ra.adjoint(M)

    Y = np.random.standard_normal(p)
    Y[20:50] += 8
    Y += 2
    meanY = Y.mean()

    lammax = np.fabs(np.sqrt(M.sizes) * X.adjoint_map(Y) / (1 + np.sqrt(np.log(M.sizes)))).max()

    penalty = rr.weighted_l1norm((1 + np.sqrt(np.log(M.sizes))) / np.sqrt(M.sizes), lagrange=0.5*lammax)
    loss = rr.squared_error(X, Y - meanY)
    problem = rr.simple_problem(loss, penalty)
    soln = problem.solve()
    Yhat = X.linear_map(soln)
    Yhat += meanY

    if INTERACTIVE:
        plt.scatter(np.arange(p), Y)
        plt.plot(np.arange(p), Yhat)
        plt.show()
def test_weighted_l1_with_zero():
    z = np.random.standard_normal(5)
    a=rr.weighted_l1norm([0,1,1,1,1], lagrange=0.5)
    b=a.dual[1]
    c=rr.l1norm(4, lagrange=0.5)
    npt.assert_equal(a.lagrange_prox(z), z-b.bound_prox(z))
    npt.assert_equal(a.lagrange_prox(z)[0], z[0])
    npt.assert_equal(a.lagrange_prox(z)[1:], c.lagrange_prox(z[1:]))
Beispiel #5
0
    def __init__(self, 
                 Q,
                 X, 
                 y,
                 feature_weights,
                 ridge_term=None,
                 randomizer_scale=None,
                 perturb=None):
        r"""

        Create a new post-selection object for the LASSO problem

        Parameters
        ----------

        loglike : `regreg.smooth.glm.glm`
            A (negative) log-likelihood as implemented in `regreg`.

        feature_weights : np.ndarray
            Feature weights for L-1 penalty. If a float,
            it is brodcast to all features.

        ridge_term : float
            How big a ridge term to add?

        randomizer_scale : float
            Scale for IID components of randomization.

        perturb : np.ndarray
            Random perturbation subtracted as a linear
            term in the objective function.

        """

        (self.Q,
         self.X,
         self.y) = (Q, X, y)

        self.loss = rr.quadratic_loss(Q.shape[0], Q=Q)
        n, p = X.shape
        self.nfeature = p

        if np.asarray(feature_weights).shape == ():
            feature_weights = np.ones(loglike.shape) * feature_weights
        self.feature_weights = np.asarray(feature_weights)

        mean_diag = np.diag(Q).mean()
        if ridge_term is None:
            ridge_term = np.std(y) * np.sqrt(mean_diag) / np.sqrt(n - 1)

        if randomizer_scale is None:
            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(y) * np.sqrt(n / (n - 1.))

        self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
        self.ridge_term = ridge_term
        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
        self._initial_omega = perturb # random perturbation
Beispiel #6
0
def test_weighted_l1_bound_loose():
    n, p = 100, 10
    X = np.random.standard_normal((n, p))
    Y = np.random.standard_normal(n)
    beta = np.linalg.pinv(X).dot(Y)
    bound = 2 * np.fabs(beta).sum()
    atom = rr.weighted_l1norm(np.ones(p), bound=bound)
    loss = rr.squared_error(X, Y)
    problem = rr.simple_problem(loss, atom)
    soln = problem.solve(tol=1.e-12, min_its=100)
    npt.assert_allclose(soln, beta)
Beispiel #7
0
def solve_sqrt_lasso_fat(X,
                         Y,
                         weights=None,
                         initial=None,
                         quadratic=None,
                         solve_args={}):
    """

    Solve the square-root LASSO optimization problem:

    $$
    \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|,
    $$
    where $D$ is the diagonal matrix with weights on its diagonal.

    Parameters
    ----------

    y : np.float((n,))
        The target, in the model $y = X\beta$

    X : np.float((n, p))
        The data, in the model $y = X\beta$

    weights : np.float
        Coefficients of the L-1 penalty in
        optimization problem, note that different
        coordinates can have different coefficients.

    initial : np.float(p)
        Initial point for optimization.

    solve_args : dict
        Arguments passed to regreg solver.

    quadratic : `regreg.identity_quadratic`
        A quadratic term added to objective function.

    """
    #X = rr.astransform(X)
    #n, p = X.output_shape[0], X.input_shape[0]
    n, p = X.shape
    if weights is None:
        lam = choose_lambda(X)
        weights = lam * np.ones((p, ))

    loss = sqlasso_objective(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=1.)
    problem = rr.simple_problem(loss, penalty)
    if initial is not None:
        problem.coefs[:] = initial
    soln = problem.solve(quadratic, **solve_args)
    return soln, loss
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.):
    """
    Check equivalent LASSO and sqrtLASSO solutions.
    """

    Y = np.random.standard_normal(n) * sigma
    beta = np.zeros(p)
    beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s, )) - 1)
    X = np.random.standard_normal(
        (n, p)) + 0.3 * np.random.standard_normal(n)[:, None]
    X /= (X.std(0)[None, :] * np.sqrt(n))
    Y += np.dot(X, beta) * sigma
    lam_theor = choose_lambda(X, quantile=0.9)

    weights = lam_theor * np.ones(p)
    weights[:3] = 0.
    soln1, loss1 = solve_sqrt_lasso(X,
                                    Y,
                                    weights=weights,
                                    quadratic=None,
                                    solve_args={
                                        'min_its': 500,
                                        'tol': 1.e-10
                                    })

    G1 = loss1.smooth_objective(soln1, 'grad')

    # find active set, and estimate of sigma

    active = (soln1 != 0)
    nactive = active.sum()
    subgrad = np.sign(soln1[active]) * weights[active]
    X_E = X[:, active]
    X_Ei = np.linalg.pinv(X_E)
    sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)

    multiplier = sigma_E * np.sqrt(
        (n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))

    # XXX how should quadratic be changed?
    # multiply everything by sigma_E?

    loss2 = rr.glm.gaussian(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=multiplier)
    problem = rr.simple_problem(loss2, penalty)

    soln2 = problem.solve(tol=1.e-12, min_its=200)
    G2 = loss2.smooth_objective(soln2, 'grad') / multiplier

    np.testing.assert_allclose(G1[3:], G2[3:])
    np.testing.assert_allclose(soln1, soln2)
Beispiel #9
0
    def __init__(self,
                 loglike,
                 feature_weights,
                 proportion_select,
                 ridge_term=0,
                 perturb=None):

        (self.loglike, self.feature_weights, self.proportion_select,
         self.ridge_term) = (loglike, feature_weights, proportion_select,
                             ridge_term)

        self.nfeature = p = self.loglike.shape[0]
        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
        self._initial_omega = perturb
def solve_problem(Qbeta_bar, Q, lagrange, initial=None):
    p = Qbeta_bar.shape[0]
    loss = rr.quadratic_loss(
        (p, ), Q=Q, quadratic=rr.identity_quadratic(0, 0, -Qbeta_bar, 0))
    lagrange = np.asarray(lagrange)
    if lagrange.shape in [(), (1, )]:
        lagrange = np.ones(p) * lagrange
    pen = rr.weighted_l1norm(lagrange, lagrange=1.)
    problem = rr.simple_problem(loss, pen)
    if initial is not None:
        problem.coefs[:] = initial
    soln = problem.solve(tol=1.e12, min_its=500)

    return soln
    def __init__(self,
                 loglike,
                 feature_weights,
                 ridge_term,
                 randomizer_scale,
                 randomizer='gaussian',
                 parametric_cov_estimator=False,
                 perturb=None):
        r"""
        Create a new post-selection object for the LASSO problem
        Parameters
        ----------
        loglike : `regreg.smooth.glm.glm`
            A (negative) log-likelihood as implemented in `regreg`.
        feature_weights : np.ndarray
            Feature weights for L-1 penalty. If a float,
            it is brodcast to all features.
        ridge_term : float
            How big a ridge term to add?
        randomizer_scale : float
            Scale for IID components of randomization.
        randomizer : str (optional)
            One of ['laplace', 'logistic', 'gaussian']
        """

        self.loglike = loglike
        self.nfeature = p = self.loglike.shape[0]

        if np.asarray(feature_weights).shape == ():
            feature_weights = np.ones(loglike.shape) * feature_weights
        self.feature_weights = np.asarray(feature_weights)

        self.parametric_cov_estimator = parametric_cov_estimator

        if randomizer == 'laplace':
            self.randomizer = randomization.laplace((p, ),
                                                    scale=randomizer_scale)
        elif randomizer == 'gaussian':
            self.randomizer = randomization.isotropic_gaussian(
                (p, ), randomizer_scale)
        elif randomizer == 'logistic':
            self.randomizer = randomization.logistic((p, ),
                                                     scale=randomizer_scale)

        self.ridge_term = ridge_term

        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)

        self._initial_omega = perturb
def solve_sqrt_lasso_fat(X, Y, weights=None, initial=None, quadratic=None, solve_args={}):
    """

    Solve the square-root LASSO optimization problem:

    $$
    \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|,
    $$
    where $D$ is the diagonal matrix with weights on its diagonal.

    Parameters
    ----------

    y : np.float((n,))
        The target, in the model $y = X\beta$

    X : np.float((n, p))
        The data, in the model $y = X\beta$

    weights : np.float
        Coefficients of the L-1 penalty in
        optimization problem, note that different
        coordinates can have different coefficients.

    initial : np.float(p)
        Initial point for optimization.

    solve_args : dict
        Arguments passed to regreg solver.

    quadratic : `regreg.identity_quadratic`
        A quadratic term added to objective function.

    """
    X = rr.astransform(X)
    n, p = X.output_shape[0], X.input_shape[0]
    if weights is None:
        lam = choose_lambda(X)
        weights = lam * np.ones((p,))

    loss = sqlasso_objective(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=1.)
    problem = rr.simple_problem(loss, penalty)
    if initial is not None:
        problem.coefs[:] = initial
    soln = problem.solve(quadratic, **solve_args)
    return soln, loss
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.):

    """
    Check equivalent LASSO and sqrtLASSO solutions.
    """

    Y = np.random.standard_normal(n) * sigma
    beta = np.zeros(p)
    beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1)
    X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None]
    X /= (X.std(0)[None,:] * np.sqrt(n))
    Y += np.dot(X, beta) * sigma
    lam_theor = choose_lambda(X, quantile=0.9)

    weights = lam_theor*np.ones(p)
    weights[:3] = 0.
    soln1, loss1 = solve_sqrt_lasso(X, Y, weights=weights, quadratic=None, solve_args={'min_its':500, 'tol':1.e-10})

    G1 = loss1.smooth_objective(soln1, 'grad') 

    # find active set, and estimate of sigma                                                                                                                          

    active = (soln1 != 0)
    nactive = active.sum()
    subgrad = np.sign(soln1[active]) * weights[active]
    X_E = X[:,active]
    X_Ei = np.linalg.pinv(X_E)
    sigma_E= np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)

    multiplier = sigma_E * np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))

    # XXX how should quadratic be changed?                                                                                                                            
    # multiply everything by sigma_E?                                                                                                                                 

    loss2 = rr.glm.gaussian(X, Y)
    penalty = rr.weighted_l1norm(weights, lagrange=multiplier)
    problem = rr.simple_problem(loss2, penalty)

    soln2 = problem.solve(tol=1.e-12, min_its=200)
    G2 = loss2.smooth_objective(soln2, 'grad') / multiplier

    np.testing.assert_allclose(G1[3:], G2[3:])
    np.testing.assert_allclose(soln1, soln2)
Beispiel #14
0
    def __init__(self,
                 loglike,
                 feature_weights,
                 ridge_term,
                 randomizer,
                 perturb=None):
        r"""
        Create a new post-selection object for the LASSO problem

        Parameters
        ----------

        loglike : `regreg.smooth.glm.glm`
            A (negative) log-likelihood as implemented in `regreg`.

        feature_weights : np.ndarray
            Feature weights for L-1 penalty. If a float,
            it is brodcast to all features.

        ridge_term : float
            How big a ridge term to add?

        randomizer : object
            Randomizer -- contains representation of randomization density.

        perturb : np.ndarray
            Random perturbation subtracted as a linear
            term in the objective function.
        """

        self.loglike = loglike
        self.nfeature = p = self.loglike.shape[0]

        if np.asarray(feature_weights).shape == ():
            feature_weights = np.ones(loglike.shape) * feature_weights
        self.feature_weights = np.asarray(feature_weights)

        self.ridge_term = ridge_term
        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
        self._initial_omega = perturb  # random perturbation

        self.randomizer = randomizer
Beispiel #15
0
def test_choose_parameter(delta=2, p=60):

    signal = np.zeros(p)
    signal[(p//2):] += delta
    Z = np.random.standard_normal(p) + signal
    p = Z.shape[0]
    M = multiscale(p)
    M.scaling = np.sqrt(M.sizes)
    lam = choose_tuning_parameter(M)
    weights = (lam + np.sqrt(2 * np.log(p / M.sizes))) / np.sqrt(p)

    Z0 = Z - Z.mean()
    loss = rr.squared_error(ra.adjoint(M), Z0)
    penalty = rr.weighted_l1norm(weights, lagrange=1.)
    problem = rr.simple_problem(loss, penalty)
    coef = problem.solve()
    active = coef != 0

    if active.sum():
        X = M.form_matrix(M.slices[active])[0]
Beispiel #16
0
    def __init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid):

        parametric_method.__init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid)

        self.lagrange = l_1se * np.ones(X.shape[1])

        n, p = self.X.shape
        n1 = int(self.selection_frac * n)
        X1, X2 = self.X1, self.X2 = self.X[:n1], self.X[n1:]
        Y1, Y2 = self.Y1, self.Y2 = self.Y[:n1], self.Y[n1:]

        pen = rr.weighted_l1norm(np.sqrt(n1) * self.lagrange, lagrange=1.)
        loss = rr.squared_error(X1, Y1)
        problem = rr.simple_problem(loss, pen)
        soln = problem.solve()

        self.active_set = np.nonzero(soln)[0]
        self.signs = np.sign(soln)[self.active_set]

        self._fit = True
    def fit(self, **solve_args):
        """
        Fit the lasso using `regreg`.
        This sets the attributes `soln`, `onestep` and
        forms the constraints necessary for post-selection inference
        by calling `form_constraints()`.

        Parameters
        ----------

        solve_args : keyword args
             Passed to `regreg.problems.simple_problem.solve`.

        Returns
        -------

        soln : np.float
             Solution to lasso.
             
        """

        penalty = weighted_l1norm(self.feature_weights, lagrange=1.)
        problem = simple_problem(self.loglike, penalty)
        _soln = problem.solve(**solve_args)
        self._soln = _soln
        if not np.all(_soln == 0):
            self.active = np.nonzero(_soln != 0)[0]
            self.active_signs = np.sign(_soln[self.active])
            self._active_soln = _soln[self.active]
            H = self.loglike.hessian(self._soln)[self.active][:,self.active]
            Hinv = np.linalg.inv(H)
            G = self.loglike.gradient(self._soln)[self.active]
            delta = Hinv.dot(G)
            self._onestep = self._active_soln - delta
            self.active_penalized = self.feature_weights[self.active] != 0
            self._constraints = constraints(-np.diag(self.active_signs)[self.active_penalized],
                                             (self.active_signs * delta)[self.active_penalized],
                                             covariance=Hinv)
        else:
            self.active = []
        return self._soln
Beispiel #18
0
def test_group_lasso_weightedl1_bound():
    n, p = 100, 50

    X = np.random.standard_normal((n, p))
    Y = np.random.standard_normal(n)

    loss = rr.glm.gaussian(X, Y)
    weights = np.ones(p)
    weights[-2:] = np.inf
    weights[:2] = 0
    weight_dict = dict([(i, w) for i, w in enumerate(weights)])
    bound1 = rr.weighted_l1norm(weights, bound=2)
    bound2 = rr.group_lasso(np.arange(p), weights=weight_dict, bound=2)

    problem1 = rr.simple_problem(loss, bound1)
    problem2 = rr.simple_problem(loss, bound2)

    beta1 = problem1.solve(tol=1.e-14, min_its=500)
    beta2 = problem2.solve(tol=1e-14, min_its=500)

    npt.assert_allclose(beta1, beta2)
Beispiel #19
0
    def fit(self, tol=1.e-12, min_its=50, use_full=True, **solve_args):

        lasso.fit(self, tol=tol, min_its=min_its, **solve_args)

        _feature_weights = self.feature_weights.copy()
        _feature_weights[self.active] = 0.
        _feature_weights[self.inactive] = np.inf
        

        _unpenalized_problem = simple_problem(self.loglike_inference,
                                              weighted_l1norm(_feature_weights, lagrange=1.))
        _unpenalized = _unpenalized_problem.solve(**solve_args)
        self._unpenalized_active = _unpenalized[self.active]

        if use_full:
            H = self.loglike_full.hessian(_unpenalized)
            n_inference = self.loglike_inference.data[0].shape[0]
            n_full = self.loglike_full.data[0].shape[0]
            H *= (1. * n_inference / n_full)
        else:
            H = self.loglike_inference.hessian(_unpenalized)

        H_AA = H[self.active][:,self.active]
        self._cov_inference = np.linalg.inv(H_AA)
Beispiel #20
0
    def fit(self, tol=1.e-12, min_its=50, **solve_args):

        lasso.fit(self, tol=tol, min_its=min_its, **solve_args)

        n1 = self.loglike.get_data()[0].shape[0]
        n = self.loglike_full.get_data()[0].shape[0]

        _feature_weights = self.feature_weights.copy()
        _feature_weights[self.active] = 0.
        _feature_weights[self.inactive] = np.inf
        
        _unpenalized_problem = simple_problem(self.loglike_full, 
                                              weighted_l1norm(_feature_weights, lagrange=1.))
        _unpenalized = _unpenalized_problem.solve(**solve_args)
        _unpenalized_active = _unpenalized[self.active]

        s = len(self.active)
        H = self.loglike_full.hessian(_unpenalized)
        H_AA = H[self.active][:,self.active]

        _cov_block = np.linalg.inv(H_AA)
        _subsample_block = (n * 1. / n1) * _cov_block
        _carve_cov = np.zeros((2*s,2*s))
        _carve_cov[:s][:,:s] = _cov_block
        _carve_cov[s:][:,:s] = _subsample_block
        _carve_cov[:s][:,s:] = _subsample_block
        _carve_cov[s:][:,s:] = _subsample_block

        _carve_linear_part = self._constraints.linear_part.dot(np.identity(2*s)[s:])
        _carve_offset = self._constraints.offset
        self._carve_constraints = constraints(_carve_linear_part,
                                              _carve_offset,
                                              covariance=_carve_cov)
        self._carve_feasible = np.hstack([_unpenalized_active, self.onestep_estimator])
        self._unpenalized_active = _unpenalized_active
        self._carve_invcov = H_AA
def test_sqrt_highdim_lasso(n=500, 
                            p=200, 
                            signal_fac=1.5, 
                            s=5, 
                            sigma=3, 
                            full=True, 
                            rho=0.4, 
                            randomizer_scale=1., 
                            ndraw=5000, 
                            burnin=1000, 
                            ridge_term=None, compare_to_lasso=True):
    """
    Compare to R randomized lasso
    """

    inst, const = gaussian_instance, lasso.sqrt_lasso
    signal = np.sqrt(signal_fac * 2 * np.log(p))
    X, Y, beta = inst(n=n,
                      p=p, 
                      signal=signal, 
                      s=s, 
                      equicorrelated=False, 
                      rho=rho, 
                      sigma=sigma, 
                      random_signs=True)[:3]

    if ridge_term is None:
        mean_diag = np.mean((X**2).sum(0))
        ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))

    W = np.ones(X.shape[1]) * choose_lambda(X) * 0.7

    perturb = np.random.standard_normal(p) * randomizer_scale / np.sqrt(n)

    conv = const(X, 
                 Y, 
                 W, 
                 randomizer_scale=randomizer_scale / np.sqrt(n),
                 perturb=perturb,
                 ridge_term=ridge_term)
    
    signs = conv.fit()
    nonzero = signs != 0

    # sanity check

    if compare_to_lasso:
        q_term = rr.identity_quadratic(ridge_term, 0, -perturb, 0)

        soln2, sqrt_loss = solve_sqrt_lasso(X, Y, W, solve_args={'min_its':1000}, quadratic=q_term, force_fat=True)
        soln = conv.initial_soln

        denom = np.linalg.norm(Y - X.dot(soln))
        new_weights = W * denom
        loss = rr.glm.gaussian(X, Y)
        pen = rr.weighted_l1norm(new_weights, lagrange=1.)
        prob = rr.simple_problem(loss, pen)

        rescaledQ = rr.identity_quadratic(ridge_term * denom,
                                          0,
                                          -perturb * denom,
                                          0)

        soln3 = prob.solve(quadratic=rescaledQ, min_its=1000, tol=1.e-12)
        np.testing.assert_allclose(conv._initial_omega, perturb * denom)
        np.testing.assert_allclose(soln, soln2)
        np.testing.assert_allclose(soln, soln3)

    if full:
        (observed_target, 
         cov_target, 
         cov_target_score, 
         alternatives) = full_targets(conv.loglike, 
                                      conv._W, 
                                      nonzero)
    else:
        (observed_target, 
         cov_target, 
         cov_target_score, 
         alternatives) = selected_targets(conv.loglike, 
                                          conv._W, 
                                          nonzero)

    _, pval, intervals = conv.summary(observed_target, 
                                      cov_target, 
                                      cov_target_score, 
                                      alternatives,
                                      ndraw=ndraw,
                                      burnin=burnin, 
                                      compute_intervals=False)

    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
def test_weighted_l1():
    a =rr.weighted_l1norm(2*np.ones(10), lagrange=0.5)
    b= rr.l1norm(10, lagrange=1)
    z = np.random.standard_normal(10)
    npt.assert_equal(b.lagrange_prox(z), a.lagrange_prox(z))
    npt.assert_equal(b.dual[1].bound_prox(z), a.dual[1].bound_prox(z))
    def decompose_subgradient(self, condition=None, marginalize=None):
        """
        ADD DOCSTRING
        condition and marginalize should be disjoint
        """

        p = self.penalty.shape[0]
        condition_inactive = np.zeros(p, dtype=np.bool)

        if condition is None:
            condition = np.zeros(p, dtype=np.bool)

        if marginalize is None:
            marginalize = np.zeros(p, dtype=np.bool)
            marginalize[self._overall] = 0

        if np.any(condition * marginalize):
            raise ValueError(
                "cannot simultaneously condition and marginalize over a group's subgradient"
            )

        if not self._setup:
            raise ValueError(
                'setup_sampler should be called before using this function')

        _inactive = self._inactive

        limits_marginal = np.zeros_like(_inactive, np.float)

        condition_inactive = _inactive * condition
        moving_inactive = _inactive * ~(marginalize + condition)
        margin_inactive = _inactive * marginalize

        limits_marginal = self._lagrange
        if np.asarray(self._lagrange).shape in [(), (1, )]:
            limits_marginal = np.zeros_like(_inactive) * self._lagrange

        opt_linear, opt_offset = self.opt_transform

        new_linear = np.zeros((opt_linear.shape[0],
                               (self._active.sum() + self._unpenalized.sum() +
                                moving_inactive.sum())))
        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
        new_linear[:,
                   self.unpenalized_slice] = opt_linear[:,
                                                        self.unpenalized_slice]

        inactive_moving_idx = np.nonzero(moving_inactive)[0]
        subgrad_idx = range(
            self._active.sum() + self._unpenalized.sum(),
            self._active.sum() + self._unpenalized.sum() +
            moving_inactive.sum())
        for _i, _s in zip(inactive_moving_idx, subgrad_idx):
            new_linear[_i, _s] = 1.

        observed_opt_state = self.observed_opt_state[:(
            self._active.sum() + self._unpenalized.sum() +
            moving_inactive.sum())]
        observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive]

        condition_linear = np.zeros(
            (opt_linear.shape[0],
             (self._active.sum() + self._unpenalized.sum() +
              condition_inactive.sum())))

        new_offset = opt_offset + 0.
        new_offset[condition_inactive] += self.initial_subgrad[
            condition_inactive]
        new_opt_transform = (new_linear, new_offset)

        if not hasattr(self.randomization, "cov_prec") or marginalize.sum(
        ):  # use Langevin -- not gaussian

            def _fraction(_cdf, _pdf, full_state_plus, full_state_minus,
                          margin_inactive):
                return (np.divide(
                    _pdf(full_state_plus) - _pdf(full_state_minus),
                    _cdf(full_state_plus) -
                    _cdf(full_state_minus)))[margin_inactive]

            def new_grad_log_density(query, limits_marginal, margin_inactive,
                                     _cdf, _pdf, new_opt_transform,
                                     deriv_log_dens, score_state, opt_state):

                full_state = score_state + reconstruct_opt(
                    new_opt_transform, opt_state)

                p = query.penalty.shape[0]
                weights = np.zeros(p)

                if margin_inactive.sum() > 0:
                    full_state_plus = full_state + limits_marginal * margin_inactive
                    full_state_minus = full_state - limits_marginal * margin_inactive
                    weights[margin_inactive] = _fraction(
                        _cdf, _pdf, full_state_plus, full_state_minus,
                        margin_inactive)
                weights[~margin_inactive] = deriv_log_dens(
                    full_state)[~margin_inactive]
                return -opt_linear.T.dot(weights)

            new_grad_log_density = functools.partial(
                new_grad_log_density, self, limits_marginal, margin_inactive,
                self.randomization._cdf, self.randomization._pdf,
                new_opt_transform, self.randomization._derivative_log_density)

            def new_log_density(query, limits_marginal, margin_inactive, _cdf,
                                _pdf, new_opt_transform, log_dens, score_state,
                                opt_state):

                full_state = score_state + reconstruct_opt(
                    new_opt_transform, opt_state)

                full_state = np.atleast_2d(full_state)
                p = query.penalty.shape[0]
                logdens = np.zeros(full_state.shape[0])

                if margin_inactive.sum() > 0:
                    full_state_plus = full_state + limits_marginal * margin_inactive
                    full_state_minus = full_state - limits_marginal * margin_inactive
                    logdens += np.sum(
                        np.log(_cdf(full_state_plus) -
                               _cdf(full_state_minus))[:, margin_inactive],
                        axis=1)

                logdens += log_dens(full_state[:, ~margin_inactive])

                return np.squeeze(
                    logdens
                )  # should this be negative to match the gradient log density?

            new_log_density = functools.partial(
                new_log_density, self, limits_marginal, margin_inactive,
                self.randomization._cdf, self.randomization._pdf,
                new_opt_transform, self.randomization._log_density)

            new_lagrange = self.penalty.weights[moving_inactive]
            new_dual = rr.weighted_l1norm(new_lagrange, lagrange=1.).conjugate

            def new_projection(dual, noverall, opt_state):
                new_state = opt_state.copy()
                new_state[self.scaling_slice] = np.maximum(
                    opt_state[self.scaling_slice], 0)
                new_state[noverall:] = dual.bound_prox(opt_state[noverall:])
                return new_state

            new_projection = functools.partial(new_projection, new_dual,
                                               self._overall.sum())

            new_selection_variable = copy(self.selection_variable)
            new_selection_variable['subgradient'] = self.observed_opt_state[
                condition_inactive]

            self.sampler = langevin_sampler(
                observed_opt_state,
                self.observed_score_state,
                self.score_transform,
                new_opt_transform,
                new_projection,
                new_grad_log_density,
                new_log_density,
                selection_info=(self, new_selection_variable))
        else:

            cov, prec = self.randomization.cov_prec
            prec_array = len(np.asarray(prec).shape) == 2

            if prec_array:
                cond_precision = new_linear.T.dot(prec.dot(new_linear))
                cond_cov = np.linalg.inv(cond_precision)
                logdens_linear = cond_cov.dot(new_linear.T.dot(prec))
            else:
                cond_precision = new_linear.T.dot(new_linear) * prec
                cond_cov = np.linalg.inv(cond_precision)
                logdens_linear = cond_cov.dot(new_linear.T) * prec

            cond_mean = -logdens_linear.dot(self.observed_score_state +
                                            new_offset)

            def log_density(logdens_linear, offset, cond_prec, score, opt):
                if score.ndim == 1:
                    mean_term = logdens_linear.dot(score.T + offset).T
                else:
                    mean_term = logdens_linear.dot(score.T + offset[:, None]).T
                arg = opt + mean_term
                return -0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)

            log_density = functools.partial(log_density, logdens_linear,
                                            new_offset, cond_precision)

            # now make the constraints

            # scaling constraints

            # the scalings are first set of opt variables
            # then unpenalized
            # then the subgradients

            I = np.identity(cond_cov.shape[0])
            A_scaling = -I[self.scaling_slice]
            b_scaling = np.zeros(A_scaling.shape[0])

            A_subgrad = np.vstack(
                [I[self._overall.sum():], -I[self._overall.sum():]])

            inactive_lagrange = self.penalty.weights[moving_inactive]
            b_subgrad = np.hstack([inactive_lagrange, inactive_lagrange])

            linear_term = np.vstack([A_scaling, A_subgrad])
            offset = np.hstack([b_scaling, b_subgrad])

            affine_con = constraints(linear_term,
                                     offset,
                                     mean=cond_mean,
                                     covariance=cond_cov)

            logdens_transform = (logdens_linear, new_offset)
            self._sampler = affine_gaussian_sampler(
                affine_con,
                observed_opt_state,
                self.observed_score_state,
                log_density,
                logdens_transform,
                selection_info=self.selection_variable
            )  # should be signs and the subgradients we've conditioned on
 def form_penalty(self):
     penalty = weighted_l1norm(self.weights, lagrange=1.)
     penalty.quadratic = identity_quadratic(0, 0, self.random_linear_term, 0)
     return penalty
Beispiel #25
0
 def form_penalty(self):
     penalty = weighted_l1norm(self.weights, lagrange=1.)
     penalty.quadratic = identity_quadratic(0, 0, self.random_linear_term,
                                            0)
     return penalty
Beispiel #26
0
    def fit(self, tol=1.e-12, min_its=50, **solve_args):
        """
        Fit the lasso using `regreg`.
        This sets the attributes `soln`, `onestep` and
        forms the constraints necessary for post-selection inference
        by calling `form_constraints()`.

        Parameters
        ----------

        solve_args : keyword args
             Passed to `regreg.problems.simple_problem.solve`.

        Returns
        -------

        soln : np.float
             Solution to lasso.
             
        """

        penalty = weighted_l1norm(self.feature_weights, lagrange=1.)
        problem = simple_problem(self.loglike, penalty)
        lasso_solution = problem.solve(tol=tol, min_its=min_its, **solve_args)
        self.lasso_solution = lasso_solution
        if not np.all(lasso_solution == 0):
            self.active = np.nonzero(lasso_solution != 0)[0]
            self.inactive = lasso_solution == 0
            self.active_signs = np.sign(lasso_solution[self.active])
            self._active_soln = lasso_solution[self.active]
            H = self.loglike.hessian(self.lasso_solution)
            H_AA = H[self.active][:,self.active]
            H_AAinv = np.linalg.inv(H_AA)
            Q = self.loglike.quadratic
            G_Q = Q.objective(self.lasso_solution, 'grad')
            G = self.loglike.gradient(self.lasso_solution) + G_Q
            G_A = G[self.active]
            G_I = self._G_I = G[self.inactive]
            dbeta_A = H_AAinv.dot(G_A)
            self.onestep_estimator = self._active_soln - dbeta_A
            self.active_penalized = self.feature_weights[self.active] != 0
            self._constraints = constraints(-np.diag(self.active_signs)[self.active_penalized],
                                             (self.active_signs * dbeta_A)[self.active_penalized],
                                             covariance=H_AAinv)
            if self.inactive.sum():

                # inactive constraints

                H_IA = H[self.inactive][:,self.active]
                H_II = H[self.inactive][:,self.inactive]
                inactive_cov = H_II - H_IA.dot(H_AAinv).dot(H_IA.T)
                irrepresentable = H_IA.dot(H_AAinv)
                inactive_mean = irrepresentable.dot(-G_A)
                self._inactive_constraints = constraints(np.vstack([np.identity(self.inactive.sum()),
                                                                    -np.identity(self.inactive.sum())]),
                                                         np.hstack([self.feature_weights[self.inactive],
                                                                    self.feature_weights[self.inactive]]),
                                                         covariance=inactive_cov,
                                                         mean=inactive_mean)
                if not self._inactive_constraints(G_I):
                    warnings.warn('inactive constraint of KKT conditions not satisfied -- perhaps need to solve with more accuracy')

                if self.covariance_estimator is not None:

                    # make full constraints

                    _cov_FA = self.covariance_estimator(self.onestep_estimator,
                                                        self.active,
                                                        self.inactive)

                    _cov_IA = _cov_FA[len(self.active):]
                    _cov_AA = _cov_FA[:len(self.active)]

                    # X_{-E}^T(y - X_E \bar{\beta}_E)

                    _inactive_score = - G_I - inactive_mean

                    _beta_bar = self.onestep_estimator
                    _indep_linear_part = _cov_IA.dot(np.linalg.inv(_cov_AA))

                    # we "fix" _nuisance, effectively conditioning on it

                    _nuisance = _inactive_score - _indep_linear_part.dot(_beta_bar)
                    _upper_lim = (self.feature_weights[self.inactive] - 
                                  _nuisance - 
                                  inactive_mean)
                    _lower_lim = (_nuisance + 
                                  self.feature_weights[self.inactive] +
                                  inactive_mean)

                    _upper_linear = _indep_linear_part
                    _lower_linear = -_indep_linear_part

                    C = self._constraints
                    _full_linear = np.vstack([C.linear_part,
                                              _upper_linear,
                                              _lower_linear])

                    _full_offset = np.hstack([C.offset,
                                              _upper_lim,
                                              _lower_lim])

                    self._constraints = constraints(_full_linear,
                                                    _full_offset,
                                                    covariance=_cov_AA)

                    if not self._constraints(_beta_bar):
                        warnings.warn('constraints of KKT conditions on one-step estimator ' + 
                                      ' not satisfied -- perhaps need to solve with more' + 
                                      'accuracy')

            else:
                self._inactive_constraints = None
        else:
            self.active = []
            self.inactive = np.arange(lasso_solution.shape[0])
            self._constraints = None
            self._inactive_constraints = None
        return self.lasso_solution