def test_group_lasso_weightedl1_lagrange(): n, p = 100, 50 X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) loss = rr.glm.gaussian(X, Y) weights = np.ones(p) weights[-2:] = np.inf weights[:2] = 0 weight_dict = dict([(i, w) for i, w in enumerate(weights)]) pen1 = rr.weighted_l1norm(weights, lagrange=0.5 * np.sqrt(n)) pen2 = rr.group_lasso(np.arange(p), weights=weight_dict, lagrange=0.5 * np.sqrt(n)) problem1 = rr.simple_problem(loss, pen1) problem2 = rr.simple_problem(loss, pen2) beta1 = problem1.solve(tol=1.e-14, min_its=500) beta2 = problem2.solve(tol=1e-14, min_its=500) npt.assert_allclose(beta1, beta2) bound_val = pen1.seminorm(beta1, lagrange=1) bound1 = rr.weighted_l1norm(weights, bound=bound_val) bound2 = rr.group_lasso(np.arange(p), weights=weight_dict, bound=bound_val) problem3 = rr.simple_problem(loss, bound1) problem4 = rr.simple_problem(loss, bound2) beta3 = problem3.solve(tol=1.e-14, min_its=500) beta4 = problem4.solve(tol=1.e-14, min_its=500) npt.assert_allclose(beta3, beta4) npt.assert_allclose(beta3, beta1)
def __init__( self, loglike, groups, weights, ridge_term, randomizer, use_lasso=True, # should lasso solver be used where applicable - defaults to True perturb=None): _check_groups(groups) # make sure groups looks sensible # log likelihood : quadratic loss self.loglike = loglike self.nfeature = self.loglike.shape[0] # ridge parameter self.ridge_term = ridge_term # group lasso penalty (from regreg) # use regular lasso penalty if all groups are size 1 if use_lasso and groups.size == np.unique(groups).size: # need to provide weights an an np.array rather than a dictionary weights_np = np.array([w[1] for w in sorted(weights.items())]) self.penalty = rr.weighted_l1norm(weights=weights_np, lagrange=1.) else: self.penalty = rr.group_lasso(groups, weights=weights, lagrange=1.) # store groups as a class variable since the non-group lasso doesn't self.groups = groups self._initial_omega = perturb # gaussian randomization self.randomizer = randomizer
def test_changepoint_scaled(): p = 150 M = multiscale(p) M.minsize = 10 X = ra.adjoint(M) Y = np.random.standard_normal(p) Y[20:50] += 8 Y += 2 meanY = Y.mean() lammax = np.fabs(np.sqrt(M.sizes) * X.adjoint_map(Y) / (1 + np.sqrt(np.log(M.sizes)))).max() penalty = rr.weighted_l1norm((1 + np.sqrt(np.log(M.sizes))) / np.sqrt(M.sizes), lagrange=0.5*lammax) loss = rr.squared_error(X, Y - meanY) problem = rr.simple_problem(loss, penalty) soln = problem.solve() Yhat = X.linear_map(soln) Yhat += meanY if INTERACTIVE: plt.scatter(np.arange(p), Y) plt.plot(np.arange(p), Yhat) plt.show()
def test_weighted_l1_with_zero(): z = np.random.standard_normal(5) a=rr.weighted_l1norm([0,1,1,1,1], lagrange=0.5) b=a.dual[1] c=rr.l1norm(4, lagrange=0.5) npt.assert_equal(a.lagrange_prox(z), z-b.bound_prox(z)) npt.assert_equal(a.lagrange_prox(z)[0], z[0]) npt.assert_equal(a.lagrange_prox(z)[1:], c.lagrange_prox(z[1:]))
def __init__(self, Q, X, y, feature_weights, ridge_term=None, randomizer_scale=None, perturb=None): r""" Create a new post-selection object for the LASSO problem Parameters ---------- loglike : `regreg.smooth.glm.glm` A (negative) log-likelihood as implemented in `regreg`. feature_weights : np.ndarray Feature weights for L-1 penalty. If a float, it is brodcast to all features. ridge_term : float How big a ridge term to add? randomizer_scale : float Scale for IID components of randomization. perturb : np.ndarray Random perturbation subtracted as a linear term in the objective function. """ (self.Q, self.X, self.y) = (Q, X, y) self.loss = rr.quadratic_loss(Q.shape[0], Q=Q) n, p = X.shape self.nfeature = p if np.asarray(feature_weights).shape == (): feature_weights = np.ones(loglike.shape) * feature_weights self.feature_weights = np.asarray(feature_weights) mean_diag = np.diag(Q).mean() if ridge_term is None: ridge_term = np.std(y) * np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(y) * np.sqrt(n / (n - 1.)) self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) self.ridge_term = ridge_term self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.) self._initial_omega = perturb # random perturbation
def test_weighted_l1_bound_loose(): n, p = 100, 10 X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) beta = np.linalg.pinv(X).dot(Y) bound = 2 * np.fabs(beta).sum() atom = rr.weighted_l1norm(np.ones(p), bound=bound) loss = rr.squared_error(X, Y) problem = rr.simple_problem(loss, atom) soln = problem.solve(tol=1.e-12, min_its=100) npt.assert_allclose(soln, beta)
def solve_sqrt_lasso_fat(X, Y, weights=None, initial=None, quadratic=None, solve_args={}): """ Solve the square-root LASSO optimization problem: $$ \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|, $$ where $D$ is the diagonal matrix with weights on its diagonal. Parameters ---------- y : np.float((n,)) The target, in the model $y = X\beta$ X : np.float((n, p)) The data, in the model $y = X\beta$ weights : np.float Coefficients of the L-1 penalty in optimization problem, note that different coordinates can have different coefficients. initial : np.float(p) Initial point for optimization. solve_args : dict Arguments passed to regreg solver. quadratic : `regreg.identity_quadratic` A quadratic term added to objective function. """ #X = rr.astransform(X) #n, p = X.output_shape[0], X.input_shape[0] n, p = X.shape if weights is None: lam = choose_lambda(X) weights = lam * np.ones((p, )) loss = sqlasso_objective(X, Y) penalty = rr.weighted_l1norm(weights, lagrange=1.) problem = rr.simple_problem(loss, penalty) if initial is not None: problem.coefs[:] = initial soln = problem.solve(quadratic, **solve_args) return soln, loss
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.): """ Check equivalent LASSO and sqrtLASSO solutions. """ Y = np.random.standard_normal(n) * sigma beta = np.zeros(p) beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s, )) - 1) X = np.random.standard_normal( (n, p)) + 0.3 * np.random.standard_normal(n)[:, None] X /= (X.std(0)[None, :] * np.sqrt(n)) Y += np.dot(X, beta) * sigma lam_theor = choose_lambda(X, quantile=0.9) weights = lam_theor * np.ones(p) weights[:3] = 0. soln1, loss1 = solve_sqrt_lasso(X, Y, weights=weights, quadratic=None, solve_args={ 'min_its': 500, 'tol': 1.e-10 }) G1 = loss1.smooth_objective(soln1, 'grad') # find active set, and estimate of sigma active = (soln1 != 0) nactive = active.sum() subgrad = np.sign(soln1[active]) * weights[active] X_E = X[:, active] X_Ei = np.linalg.pinv(X_E) sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive) multiplier = sigma_E * np.sqrt( (n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2)) # XXX how should quadratic be changed? # multiply everything by sigma_E? loss2 = rr.glm.gaussian(X, Y) penalty = rr.weighted_l1norm(weights, lagrange=multiplier) problem = rr.simple_problem(loss2, penalty) soln2 = problem.solve(tol=1.e-12, min_its=200) G2 = loss2.smooth_objective(soln2, 'grad') / multiplier np.testing.assert_allclose(G1[3:], G2[3:]) np.testing.assert_allclose(soln1, soln2)
def __init__(self, loglike, feature_weights, proportion_select, ridge_term=0, perturb=None): (self.loglike, self.feature_weights, self.proportion_select, self.ridge_term) = (loglike, feature_weights, proportion_select, ridge_term) self.nfeature = p = self.loglike.shape[0] self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.) self._initial_omega = perturb
def solve_problem(Qbeta_bar, Q, lagrange, initial=None): p = Qbeta_bar.shape[0] loss = rr.quadratic_loss( (p, ), Q=Q, quadratic=rr.identity_quadratic(0, 0, -Qbeta_bar, 0)) lagrange = np.asarray(lagrange) if lagrange.shape in [(), (1, )]: lagrange = np.ones(p) * lagrange pen = rr.weighted_l1norm(lagrange, lagrange=1.) problem = rr.simple_problem(loss, pen) if initial is not None: problem.coefs[:] = initial soln = problem.solve(tol=1.e12, min_its=500) return soln
def __init__(self, loglike, feature_weights, ridge_term, randomizer_scale, randomizer='gaussian', parametric_cov_estimator=False, perturb=None): r""" Create a new post-selection object for the LASSO problem Parameters ---------- loglike : `regreg.smooth.glm.glm` A (negative) log-likelihood as implemented in `regreg`. feature_weights : np.ndarray Feature weights for L-1 penalty. If a float, it is brodcast to all features. ridge_term : float How big a ridge term to add? randomizer_scale : float Scale for IID components of randomization. randomizer : str (optional) One of ['laplace', 'logistic', 'gaussian'] """ self.loglike = loglike self.nfeature = p = self.loglike.shape[0] if np.asarray(feature_weights).shape == (): feature_weights = np.ones(loglike.shape) * feature_weights self.feature_weights = np.asarray(feature_weights) self.parametric_cov_estimator = parametric_cov_estimator if randomizer == 'laplace': self.randomizer = randomization.laplace((p, ), scale=randomizer_scale) elif randomizer == 'gaussian': self.randomizer = randomization.isotropic_gaussian( (p, ), randomizer_scale) elif randomizer == 'logistic': self.randomizer = randomization.logistic((p, ), scale=randomizer_scale) self.ridge_term = ridge_term self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.) self._initial_omega = perturb
def solve_sqrt_lasso_fat(X, Y, weights=None, initial=None, quadratic=None, solve_args={}): """ Solve the square-root LASSO optimization problem: $$ \text{minimize}_{\beta} \|y-X\beta\|_2 + D |\beta|, $$ where $D$ is the diagonal matrix with weights on its diagonal. Parameters ---------- y : np.float((n,)) The target, in the model $y = X\beta$ X : np.float((n, p)) The data, in the model $y = X\beta$ weights : np.float Coefficients of the L-1 penalty in optimization problem, note that different coordinates can have different coefficients. initial : np.float(p) Initial point for optimization. solve_args : dict Arguments passed to regreg solver. quadratic : `regreg.identity_quadratic` A quadratic term added to objective function. """ X = rr.astransform(X) n, p = X.output_shape[0], X.input_shape[0] if weights is None: lam = choose_lambda(X) weights = lam * np.ones((p,)) loss = sqlasso_objective(X, Y) penalty = rr.weighted_l1norm(weights, lagrange=1.) problem = rr.simple_problem(loss, penalty) if initial is not None: problem.coefs[:] = initial soln = problem.solve(quadratic, **solve_args) return soln, loss
def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.): """ Check equivalent LASSO and sqrtLASSO solutions. """ Y = np.random.standard_normal(n) * sigma beta = np.zeros(p) beta[:s] = 8 * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1) X = np.random.standard_normal((n,p)) + 0.3 * np.random.standard_normal(n)[:,None] X /= (X.std(0)[None,:] * np.sqrt(n)) Y += np.dot(X, beta) * sigma lam_theor = choose_lambda(X, quantile=0.9) weights = lam_theor*np.ones(p) weights[:3] = 0. soln1, loss1 = solve_sqrt_lasso(X, Y, weights=weights, quadratic=None, solve_args={'min_its':500, 'tol':1.e-10}) G1 = loss1.smooth_objective(soln1, 'grad') # find active set, and estimate of sigma active = (soln1 != 0) nactive = active.sum() subgrad = np.sign(soln1[active]) * weights[active] X_E = X[:,active] X_Ei = np.linalg.pinv(X_E) sigma_E= np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive) multiplier = sigma_E * np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2)) # XXX how should quadratic be changed? # multiply everything by sigma_E? loss2 = rr.glm.gaussian(X, Y) penalty = rr.weighted_l1norm(weights, lagrange=multiplier) problem = rr.simple_problem(loss2, penalty) soln2 = problem.solve(tol=1.e-12, min_its=200) G2 = loss2.smooth_objective(soln2, 'grad') / multiplier np.testing.assert_allclose(G1[3:], G2[3:]) np.testing.assert_allclose(soln1, soln2)
def __init__(self, loglike, feature_weights, ridge_term, randomizer, perturb=None): r""" Create a new post-selection object for the LASSO problem Parameters ---------- loglike : `regreg.smooth.glm.glm` A (negative) log-likelihood as implemented in `regreg`. feature_weights : np.ndarray Feature weights for L-1 penalty. If a float, it is brodcast to all features. ridge_term : float How big a ridge term to add? randomizer : object Randomizer -- contains representation of randomization density. perturb : np.ndarray Random perturbation subtracted as a linear term in the objective function. """ self.loglike = loglike self.nfeature = p = self.loglike.shape[0] if np.asarray(feature_weights).shape == (): feature_weights = np.ones(loglike.shape) * feature_weights self.feature_weights = np.asarray(feature_weights) self.ridge_term = ridge_term self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.) self._initial_omega = perturb # random perturbation self.randomizer = randomizer
def test_choose_parameter(delta=2, p=60): signal = np.zeros(p) signal[(p//2):] += delta Z = np.random.standard_normal(p) + signal p = Z.shape[0] M = multiscale(p) M.scaling = np.sqrt(M.sizes) lam = choose_tuning_parameter(M) weights = (lam + np.sqrt(2 * np.log(p / M.sizes))) / np.sqrt(p) Z0 = Z - Z.mean() loss = rr.squared_error(ra.adjoint(M), Z0) penalty = rr.weighted_l1norm(weights, lagrange=1.) problem = rr.simple_problem(loss, penalty) coef = problem.solve() active = coef != 0 if active.sum(): X = M.form_matrix(M.slices[active])[0]
def __init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid): parametric_method.__init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid) self.lagrange = l_1se * np.ones(X.shape[1]) n, p = self.X.shape n1 = int(self.selection_frac * n) X1, X2 = self.X1, self.X2 = self.X[:n1], self.X[n1:] Y1, Y2 = self.Y1, self.Y2 = self.Y[:n1], self.Y[n1:] pen = rr.weighted_l1norm(np.sqrt(n1) * self.lagrange, lagrange=1.) loss = rr.squared_error(X1, Y1) problem = rr.simple_problem(loss, pen) soln = problem.solve() self.active_set = np.nonzero(soln)[0] self.signs = np.sign(soln)[self.active_set] self._fit = True
def fit(self, **solve_args): """ Fit the lasso using `regreg`. This sets the attributes `soln`, `onestep` and forms the constraints necessary for post-selection inference by calling `form_constraints()`. Parameters ---------- solve_args : keyword args Passed to `regreg.problems.simple_problem.solve`. Returns ------- soln : np.float Solution to lasso. """ penalty = weighted_l1norm(self.feature_weights, lagrange=1.) problem = simple_problem(self.loglike, penalty) _soln = problem.solve(**solve_args) self._soln = _soln if not np.all(_soln == 0): self.active = np.nonzero(_soln != 0)[0] self.active_signs = np.sign(_soln[self.active]) self._active_soln = _soln[self.active] H = self.loglike.hessian(self._soln)[self.active][:,self.active] Hinv = np.linalg.inv(H) G = self.loglike.gradient(self._soln)[self.active] delta = Hinv.dot(G) self._onestep = self._active_soln - delta self.active_penalized = self.feature_weights[self.active] != 0 self._constraints = constraints(-np.diag(self.active_signs)[self.active_penalized], (self.active_signs * delta)[self.active_penalized], covariance=Hinv) else: self.active = [] return self._soln
def test_group_lasso_weightedl1_bound(): n, p = 100, 50 X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) loss = rr.glm.gaussian(X, Y) weights = np.ones(p) weights[-2:] = np.inf weights[:2] = 0 weight_dict = dict([(i, w) for i, w in enumerate(weights)]) bound1 = rr.weighted_l1norm(weights, bound=2) bound2 = rr.group_lasso(np.arange(p), weights=weight_dict, bound=2) problem1 = rr.simple_problem(loss, bound1) problem2 = rr.simple_problem(loss, bound2) beta1 = problem1.solve(tol=1.e-14, min_its=500) beta2 = problem2.solve(tol=1e-14, min_its=500) npt.assert_allclose(beta1, beta2)
def fit(self, tol=1.e-12, min_its=50, use_full=True, **solve_args): lasso.fit(self, tol=tol, min_its=min_its, **solve_args) _feature_weights = self.feature_weights.copy() _feature_weights[self.active] = 0. _feature_weights[self.inactive] = np.inf _unpenalized_problem = simple_problem(self.loglike_inference, weighted_l1norm(_feature_weights, lagrange=1.)) _unpenalized = _unpenalized_problem.solve(**solve_args) self._unpenalized_active = _unpenalized[self.active] if use_full: H = self.loglike_full.hessian(_unpenalized) n_inference = self.loglike_inference.data[0].shape[0] n_full = self.loglike_full.data[0].shape[0] H *= (1. * n_inference / n_full) else: H = self.loglike_inference.hessian(_unpenalized) H_AA = H[self.active][:,self.active] self._cov_inference = np.linalg.inv(H_AA)
def fit(self, tol=1.e-12, min_its=50, **solve_args): lasso.fit(self, tol=tol, min_its=min_its, **solve_args) n1 = self.loglike.get_data()[0].shape[0] n = self.loglike_full.get_data()[0].shape[0] _feature_weights = self.feature_weights.copy() _feature_weights[self.active] = 0. _feature_weights[self.inactive] = np.inf _unpenalized_problem = simple_problem(self.loglike_full, weighted_l1norm(_feature_weights, lagrange=1.)) _unpenalized = _unpenalized_problem.solve(**solve_args) _unpenalized_active = _unpenalized[self.active] s = len(self.active) H = self.loglike_full.hessian(_unpenalized) H_AA = H[self.active][:,self.active] _cov_block = np.linalg.inv(H_AA) _subsample_block = (n * 1. / n1) * _cov_block _carve_cov = np.zeros((2*s,2*s)) _carve_cov[:s][:,:s] = _cov_block _carve_cov[s:][:,:s] = _subsample_block _carve_cov[:s][:,s:] = _subsample_block _carve_cov[s:][:,s:] = _subsample_block _carve_linear_part = self._constraints.linear_part.dot(np.identity(2*s)[s:]) _carve_offset = self._constraints.offset self._carve_constraints = constraints(_carve_linear_part, _carve_offset, covariance=_carve_cov) self._carve_feasible = np.hstack([_unpenalized_active, self.onestep_estimator]) self._unpenalized_active = _unpenalized_active self._carve_invcov = H_AA
def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1., ndraw=5000, burnin=1000, ridge_term=None, compare_to_lasso=True): """ Compare to R randomized lasso """ inst, const = gaussian_instance, lasso.sqrt_lasso signal = np.sqrt(signal_fac * 2 * np.log(p)) X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=rho, sigma=sigma, random_signs=True)[:3] if ridge_term is None: mean_diag = np.mean((X**2).sum(0)) ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.)) W = np.ones(X.shape[1]) * choose_lambda(X) * 0.7 perturb = np.random.standard_normal(p) * randomizer_scale / np.sqrt(n) conv = const(X, Y, W, randomizer_scale=randomizer_scale / np.sqrt(n), perturb=perturb, ridge_term=ridge_term) signs = conv.fit() nonzero = signs != 0 # sanity check if compare_to_lasso: q_term = rr.identity_quadratic(ridge_term, 0, -perturb, 0) soln2, sqrt_loss = solve_sqrt_lasso(X, Y, W, solve_args={'min_its':1000}, quadratic=q_term, force_fat=True) soln = conv.initial_soln denom = np.linalg.norm(Y - X.dot(soln)) new_weights = W * denom loss = rr.glm.gaussian(X, Y) pen = rr.weighted_l1norm(new_weights, lagrange=1.) prob = rr.simple_problem(loss, pen) rescaledQ = rr.identity_quadratic(ridge_term * denom, 0, -perturb * denom, 0) soln3 = prob.solve(quadratic=rescaledQ, min_its=1000, tol=1.e-12) np.testing.assert_allclose(conv._initial_omega, perturb * denom) np.testing.assert_allclose(soln, soln2) np.testing.assert_allclose(soln, soln3) if full: (observed_target, cov_target, cov_target_score, alternatives) = full_targets(conv.loglike, conv._W, nonzero) else: (observed_target, cov_target, cov_target_score, alternatives) = selected_targets(conv.loglike, conv._W, nonzero) _, pval, intervals = conv.summary(observed_target, cov_target, cov_target_score, alternatives, ndraw=ndraw, burnin=burnin, compute_intervals=False) return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
def test_weighted_l1(): a =rr.weighted_l1norm(2*np.ones(10), lagrange=0.5) b= rr.l1norm(10, lagrange=1) z = np.random.standard_normal(10) npt.assert_equal(b.lagrange_prox(z), a.lagrange_prox(z)) npt.assert_equal(b.dual[1].bound_prox(z), a.dual[1].bound_prox(z))
def decompose_subgradient(self, condition=None, marginalize=None): """ ADD DOCSTRING condition and marginalize should be disjoint """ p = self.penalty.shape[0] condition_inactive = np.zeros(p, dtype=np.bool) if condition is None: condition = np.zeros(p, dtype=np.bool) if marginalize is None: marginalize = np.zeros(p, dtype=np.bool) marginalize[self._overall] = 0 if np.any(condition * marginalize): raise ValueError( "cannot simultaneously condition and marginalize over a group's subgradient" ) if not self._setup: raise ValueError( 'setup_sampler should be called before using this function') _inactive = self._inactive limits_marginal = np.zeros_like(_inactive, np.float) condition_inactive = _inactive * condition moving_inactive = _inactive * ~(marginalize + condition) margin_inactive = _inactive * marginalize limits_marginal = self._lagrange if np.asarray(self._lagrange).shape in [(), (1, )]: limits_marginal = np.zeros_like(_inactive) * self._lagrange opt_linear, opt_offset = self.opt_transform new_linear = np.zeros((opt_linear.shape[0], (self._active.sum() + self._unpenalized.sum() + moving_inactive.sum()))) new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice] new_linear[:, self.unpenalized_slice] = opt_linear[:, self.unpenalized_slice] inactive_moving_idx = np.nonzero(moving_inactive)[0] subgrad_idx = range( self._active.sum() + self._unpenalized.sum(), self._active.sum() + self._unpenalized.sum() + moving_inactive.sum()) for _i, _s in zip(inactive_moving_idx, subgrad_idx): new_linear[_i, _s] = 1. observed_opt_state = self.observed_opt_state[:( self._active.sum() + self._unpenalized.sum() + moving_inactive.sum())] observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive] condition_linear = np.zeros( (opt_linear.shape[0], (self._active.sum() + self._unpenalized.sum() + condition_inactive.sum()))) new_offset = opt_offset + 0. new_offset[condition_inactive] += self.initial_subgrad[ condition_inactive] new_opt_transform = (new_linear, new_offset) if not hasattr(self.randomization, "cov_prec") or marginalize.sum( ): # use Langevin -- not gaussian def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive): return (np.divide( _pdf(full_state_plus) - _pdf(full_state_minus), _cdf(full_state_plus) - _cdf(full_state_minus)))[margin_inactive] def new_grad_log_density(query, limits_marginal, margin_inactive, _cdf, _pdf, new_opt_transform, deriv_log_dens, score_state, opt_state): full_state = score_state + reconstruct_opt( new_opt_transform, opt_state) p = query.penalty.shape[0] weights = np.zeros(p) if margin_inactive.sum() > 0: full_state_plus = full_state + limits_marginal * margin_inactive full_state_minus = full_state - limits_marginal * margin_inactive weights[margin_inactive] = _fraction( _cdf, _pdf, full_state_plus, full_state_minus, margin_inactive) weights[~margin_inactive] = deriv_log_dens( full_state)[~margin_inactive] return -opt_linear.T.dot(weights) new_grad_log_density = functools.partial( new_grad_log_density, self, limits_marginal, margin_inactive, self.randomization._cdf, self.randomization._pdf, new_opt_transform, self.randomization._derivative_log_density) def new_log_density(query, limits_marginal, margin_inactive, _cdf, _pdf, new_opt_transform, log_dens, score_state, opt_state): full_state = score_state + reconstruct_opt( new_opt_transform, opt_state) full_state = np.atleast_2d(full_state) p = query.penalty.shape[0] logdens = np.zeros(full_state.shape[0]) if margin_inactive.sum() > 0: full_state_plus = full_state + limits_marginal * margin_inactive full_state_minus = full_state - limits_marginal * margin_inactive logdens += np.sum( np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:, margin_inactive], axis=1) logdens += log_dens(full_state[:, ~margin_inactive]) return np.squeeze( logdens ) # should this be negative to match the gradient log density? new_log_density = functools.partial( new_log_density, self, limits_marginal, margin_inactive, self.randomization._cdf, self.randomization._pdf, new_opt_transform, self.randomization._log_density) new_lagrange = self.penalty.weights[moving_inactive] new_dual = rr.weighted_l1norm(new_lagrange, lagrange=1.).conjugate def new_projection(dual, noverall, opt_state): new_state = opt_state.copy() new_state[self.scaling_slice] = np.maximum( opt_state[self.scaling_slice], 0) new_state[noverall:] = dual.bound_prox(opt_state[noverall:]) return new_state new_projection = functools.partial(new_projection, new_dual, self._overall.sum()) new_selection_variable = copy(self.selection_variable) new_selection_variable['subgradient'] = self.observed_opt_state[ condition_inactive] self.sampler = langevin_sampler( observed_opt_state, self.observed_score_state, self.score_transform, new_opt_transform, new_projection, new_grad_log_density, new_log_density, selection_info=(self, new_selection_variable)) else: cov, prec = self.randomization.cov_prec prec_array = len(np.asarray(prec).shape) == 2 if prec_array: cond_precision = new_linear.T.dot(prec.dot(new_linear)) cond_cov = np.linalg.inv(cond_precision) logdens_linear = cond_cov.dot(new_linear.T.dot(prec)) else: cond_precision = new_linear.T.dot(new_linear) * prec cond_cov = np.linalg.inv(cond_precision) logdens_linear = cond_cov.dot(new_linear.T) * prec cond_mean = -logdens_linear.dot(self.observed_score_state + new_offset) def log_density(logdens_linear, offset, cond_prec, score, opt): if score.ndim == 1: mean_term = logdens_linear.dot(score.T + offset).T else: mean_term = logdens_linear.dot(score.T + offset[:, None]).T arg = opt + mean_term return -0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1) log_density = functools.partial(log_density, logdens_linear, new_offset, cond_precision) # now make the constraints # scaling constraints # the scalings are first set of opt variables # then unpenalized # then the subgradients I = np.identity(cond_cov.shape[0]) A_scaling = -I[self.scaling_slice] b_scaling = np.zeros(A_scaling.shape[0]) A_subgrad = np.vstack( [I[self._overall.sum():], -I[self._overall.sum():]]) inactive_lagrange = self.penalty.weights[moving_inactive] b_subgrad = np.hstack([inactive_lagrange, inactive_lagrange]) linear_term = np.vstack([A_scaling, A_subgrad]) offset = np.hstack([b_scaling, b_subgrad]) affine_con = constraints(linear_term, offset, mean=cond_mean, covariance=cond_cov) logdens_transform = (logdens_linear, new_offset) self._sampler = affine_gaussian_sampler( affine_con, observed_opt_state, self.observed_score_state, log_density, logdens_transform, selection_info=self.selection_variable ) # should be signs and the subgradients we've conditioned on
def form_penalty(self): penalty = weighted_l1norm(self.weights, lagrange=1.) penalty.quadratic = identity_quadratic(0, 0, self.random_linear_term, 0) return penalty
def fit(self, tol=1.e-12, min_its=50, **solve_args): """ Fit the lasso using `regreg`. This sets the attributes `soln`, `onestep` and forms the constraints necessary for post-selection inference by calling `form_constraints()`. Parameters ---------- solve_args : keyword args Passed to `regreg.problems.simple_problem.solve`. Returns ------- soln : np.float Solution to lasso. """ penalty = weighted_l1norm(self.feature_weights, lagrange=1.) problem = simple_problem(self.loglike, penalty) lasso_solution = problem.solve(tol=tol, min_its=min_its, **solve_args) self.lasso_solution = lasso_solution if not np.all(lasso_solution == 0): self.active = np.nonzero(lasso_solution != 0)[0] self.inactive = lasso_solution == 0 self.active_signs = np.sign(lasso_solution[self.active]) self._active_soln = lasso_solution[self.active] H = self.loglike.hessian(self.lasso_solution) H_AA = H[self.active][:,self.active] H_AAinv = np.linalg.inv(H_AA) Q = self.loglike.quadratic G_Q = Q.objective(self.lasso_solution, 'grad') G = self.loglike.gradient(self.lasso_solution) + G_Q G_A = G[self.active] G_I = self._G_I = G[self.inactive] dbeta_A = H_AAinv.dot(G_A) self.onestep_estimator = self._active_soln - dbeta_A self.active_penalized = self.feature_weights[self.active] != 0 self._constraints = constraints(-np.diag(self.active_signs)[self.active_penalized], (self.active_signs * dbeta_A)[self.active_penalized], covariance=H_AAinv) if self.inactive.sum(): # inactive constraints H_IA = H[self.inactive][:,self.active] H_II = H[self.inactive][:,self.inactive] inactive_cov = H_II - H_IA.dot(H_AAinv).dot(H_IA.T) irrepresentable = H_IA.dot(H_AAinv) inactive_mean = irrepresentable.dot(-G_A) self._inactive_constraints = constraints(np.vstack([np.identity(self.inactive.sum()), -np.identity(self.inactive.sum())]), np.hstack([self.feature_weights[self.inactive], self.feature_weights[self.inactive]]), covariance=inactive_cov, mean=inactive_mean) if not self._inactive_constraints(G_I): warnings.warn('inactive constraint of KKT conditions not satisfied -- perhaps need to solve with more accuracy') if self.covariance_estimator is not None: # make full constraints _cov_FA = self.covariance_estimator(self.onestep_estimator, self.active, self.inactive) _cov_IA = _cov_FA[len(self.active):] _cov_AA = _cov_FA[:len(self.active)] # X_{-E}^T(y - X_E \bar{\beta}_E) _inactive_score = - G_I - inactive_mean _beta_bar = self.onestep_estimator _indep_linear_part = _cov_IA.dot(np.linalg.inv(_cov_AA)) # we "fix" _nuisance, effectively conditioning on it _nuisance = _inactive_score - _indep_linear_part.dot(_beta_bar) _upper_lim = (self.feature_weights[self.inactive] - _nuisance - inactive_mean) _lower_lim = (_nuisance + self.feature_weights[self.inactive] + inactive_mean) _upper_linear = _indep_linear_part _lower_linear = -_indep_linear_part C = self._constraints _full_linear = np.vstack([C.linear_part, _upper_linear, _lower_linear]) _full_offset = np.hstack([C.offset, _upper_lim, _lower_lim]) self._constraints = constraints(_full_linear, _full_offset, covariance=_cov_AA) if not self._constraints(_beta_bar): warnings.warn('constraints of KKT conditions on one-step estimator ' + ' not satisfied -- perhaps need to solve with more' + 'accuracy') else: self._inactive_constraints = None else: self.active = [] self.inactive = np.arange(lasso_solution.shape[0]) self._constraints = None self._inactive_constraints = None return self.lasso_solution