def __init__(self, map, generative_mean, coef=1., offset=None, quadratic=None): self.map = map self.q = map.p - map.nactive self.r = map.p + map.nactive self.p = map.p rr.smooth_atom.__init__(self, (2 * self.p, ), offset=offset, quadratic=quadratic, initial=self.map.feasible_point, coef=coef) self.coefs[:] = self.map.feasible_point opt_vars_0 = np.zeros(self.r, bool) opt_vars_0[self.p:] = 1 opt_vars = np.append(opt_vars_0, np.ones(self.q, bool)) opt_vars_active = np.append(opt_vars_0, np.zeros(self.q, bool)) opt_vars_inactive = np.zeros(2 * self.p, bool) opt_vars_inactive[self.r:] = 1 self._response_selector = rr.selector(~opt_vars, (2 * self.p, )) self._opt_selector_active = rr.selector(opt_vars_active, (2 * self.p, )) self._opt_selector_inactive = rr.selector(opt_vars_inactive, (2 * self.p, )) nonnegative = nonnegative_softmax_scaled(self.map.nactive) self.nonnegative_barrier = nonnegative.linear( self._opt_selector_active) cube_objective = smooth_cube_barrier(self.map.inactive_lagrange) self.cube_barrier = rr.affine_smooth(cube_objective, self._opt_selector_inactive) linear_map = np.hstack( [self.map._score_linear_term, self.map._opt_linear_term]) randomization_loss = log_likelihood(np.zeros(self.p), self.map.randomization_cov, self.p) self.randomization_loss = rr.affine_smooth( randomization_loss, rr.affine_transform(linear_map, self.map._opt_affine_term)) likelihood_loss = log_likelihood(generative_mean, self.map.score_cov, self.p) self.likelihood_loss = rr.affine_smooth(likelihood_loss, self._response_selector) self.total_loss = rr.smooth_sum([ self.randomization_loss, self.likelihood_loss, self.nonnegative_barrier, self.cube_barrier ])
def test_group_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a group_lasso, i.e. l2 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l2norm(10, lagrange=.2) penalty2 = rr.l2norm(10, lagrange=.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)]) # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=200, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_s)
def test_group_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a group_lasso, i.e. l2 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l2norm(10, lagrange=.2) penalty2 = rr.l2norm(10, lagrange=.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)]) # solve using separable loss = rr.quadratic.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=200, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_s)
def test_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a lasso, i.e. l1 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l1norm(10, lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0)) pencopy.conjugate # solve using separable loss = rr.quadratic.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the usual composite penalty_all = rr.l1norm(20, lagrange=1.2) problem_all = rr.container(loss, penalty_all) solver_all = rr.FISTA(problem_all) solver_all.fit(min_its=100, tol=1.0e-12) coefs_all = solver_all.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=500, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_all) np.testing.assert_almost_equal(coefs, coefs_s)
def test_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a lasso, i.e. l1 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l1norm(10, lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0)) pencopy.conjugate # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the usual composite penalty_all = rr.l1norm(20, lagrange=1.2) problem_all = rr.container(loss, penalty_all) solver_all = rr.FISTA(problem_all) solver_all.fit(min_its=100, tol=1.0e-12) coefs_all = solver_all.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=500, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_all) np.testing.assert_almost_equal(coefs, coefs_s)
def __init__( self, X, feasible_point, active, # the active set chosen by randomized lasso active_sign, # the set of signs of active coordinates chosen by lasso lagrange, # in R^p mean_parameter, # in R^n noise_variance, #noise_level in data randomizer, #specified randomization epsilon, # ridge penalty for randomized lasso coef=1., offset=None, quadratic=None, nstep=10): n, p = X.shape self._X = X E = active.sum() self.q = p - E self.active = active self.noise_variance = noise_variance self.randomization = randomizer self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate if self.active_conjugate is None: raise ValueError( 'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates' ) initial = np.zeros(n + E, ) initial[n:] = feasible_point self.n = n rr.smooth_atom.__init__(self, (n + E, ), offset=offset, quadratic=quadratic, initial=initial, coef=coef) self.coefs[:] = initial opt_vars = np.zeros(n + E, bool) opt_vars[n:] = 1 nonnegative = nonnegative_softmax_scaled(E) self._opt_selector = rr.selector(opt_vars, (n + E, )) self.nonnegative_barrier = nonnegative.linear(self._opt_selector) self._response_selector = rr.selector(~opt_vars, (n + E, )) self.set_parameter(mean_parameter, noise_variance) X_E = X[:, active] B = X.T.dot(X_E) B_E = B[active] B_mE = B[~active] self.A_active = np.hstack([ -X[:, active].T, (B_E + epsilon * np.identity(E)) * active_sign[None, :] ]) self.A_inactive = np.hstack( [-X[:, ~active].T, (B_mE * active_sign[None, :])]) self.offset_active = active_sign * lagrange[active] self.offset_inactive = np.zeros(p - E) self.active_conj_loss = rr.affine_smooth( self.active_conjugate, rr.affine_transform(self.A_active, self.offset_active)) cube_obj = neg_log_cube_probability(self.q, lagrange[~active], randomization_scale=1.) self.cube_loss = rr.affine_smooth(cube_obj, self.A_inactive) self.total_loss = rr.smooth_sum([ self.active_conj_loss, self.cube_loss, self.likelihood_loss, self.nonnegative_barrier ])
def __init__(self, X, feasible_point, active, # the active set chosen by randomized marginal screening active_signs, # the set of signs of active coordinates chosen by ms threshold, # in R^p mean_parameter, noise_variance, randomizer, coef=1., offset=None, quadratic=None, nstep=10): n, p = X.shape self._X = X E = active.sum() self.q = p - E sigma = np.sqrt(noise_variance) self.active = active self.noise_variance = noise_variance self.randomization = randomizer self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate if self.active_conjugate is None: raise ValueError( 'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates') initial = np.zeros(n + E, ) initial[n:] = feasible_point self.n = n rr.smooth_atom.__init__(self, (n + E,), offset=offset, quadratic=quadratic, initial=initial, coef=coef) self.coefs[:] = initial nonnegative = nonnegative_softmax_scaled(E) opt_vars = np.zeros(n + E, bool) opt_vars[n:] = 1 self._opt_selector = rr.selector(opt_vars, (n + E,)) self.nonnegative_barrier = nonnegative.linear(self._opt_selector) self._response_selector = rr.selector(~opt_vars, (n + E,)) self.set_parameter(mean_parameter, noise_variance) self.A_active = np.hstack([np.true_divide(-X[:, active].T, sigma), np.identity(E) * active_signs[None, :]]) self.A_inactive = np.hstack([np.true_divide(-X[:, ~active].T, sigma), np.zeros((p - E, E))]) self.offset_active = active_signs * threshold[active] self.offset_inactive = np.zeros(p - E) self.active_conj_loss = rr.affine_smooth(self.active_conjugate, rr.affine_transform(self.A_active, self.offset_active)) cube_obj = neg_log_cube_probability(self.q, threshold[~active], randomization_scale=1.) self.cube_loss = rr.affine_smooth(cube_obj, rr.affine_transform(self.A_inactive, self.offset_inactive)) self.total_loss = rr.smooth_sum([self.active_conj_loss, self.cube_loss, self.likelihood_loss, self.nonnegative_barrier])
def __init__( self, X, feasible_point, #in R^{|E|_1 + |E|_2} active_1, #the active set chosen by randomized marginal screening active_2, #the active set chosen by randomized lasso active_signs_1, #the set of signs of active coordinates chosen by ms active_signs_2, #the set of signs of active coordinates chosen by lasso lagrange, #in R^p threshold, #in R^p mean_parameter, # in R^n noise_variance, randomizer, epsilon, #ridge penalty for randomized lasso coef=1., offset=None, quadratic=None, nstep=10): n, p = X.shape self._X = X E_1 = active_1.sum() E_2 = active_2.sum() sigma = np.sqrt(noise_variance) self.active_1 = active_1 self.active_2 = active_2 self.noise_variance = noise_variance self.randomization = randomizer self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate if self.active_conjugate is None: raise ValueError( 'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates' ) initial = np.zeros(n + E_1 + E_2, ) initial[n:] = feasible_point self.n = n rr.smooth_atom.__init__(self, (n + E_1 + E_2, ), offset=offset, quadratic=quadratic, initial=initial, coef=coef) self.coefs[:] = initial nonnegative = nonnegative_softmax_scaled(E_1 + E_2) opt_vars = np.zeros(n + E_1 + E_2, bool) opt_vars[n:] = 1 self._opt_selector = rr.selector(opt_vars, (n + E_1 + E_2, )) self.nonnegative_barrier = nonnegative.linear(self._opt_selector) self._response_selector = rr.selector(~opt_vars, (n + E_1 + E_2, )) self.set_parameter(mean_parameter, noise_variance) arg_ms = np.zeros(self.n + E_1 + E_2, bool) arg_ms[:self.n + E_1] = 1 arg_lasso = np.zeros(self.n + E_1, bool) arg_lasso[:self.n] = 1 arg_lasso = np.append(arg_lasso, np.ones(E_2, bool)) self.A_active_1 = np.hstack([ np.true_divide(-X[:, active_1].T, sigma), np.identity(E_1) * active_signs_1[None, :] ]) self.A_inactive_1 = np.hstack([ np.true_divide(-X[:, ~active_1].T, sigma), np.zeros((p - E_1, E_1)) ]) self.offset_active_1 = active_signs_1 * threshold[active_1] self.offset_inactive_1 = np.zeros(p - E_1) self._active_ms = rr.selector( arg_ms, (self.n + E_1 + E_2, ), rr.affine_transform(self.A_active_1, self.offset_active_1)) self._inactive_ms = rr.selector( arg_ms, (self.n + E_1 + E_2, ), rr.affine_transform(self.A_inactive_1, self.offset_inactive_1)) self.active_conj_loss_1 = rr.affine_smooth(self.active_conjugate, self._active_ms) self.q_1 = p - E_1 cube_obj_1 = neg_log_cube_probability(self.q_1, threshold[~active_1], randomization_scale=1.) self.cube_loss_1 = rr.affine_smooth(cube_obj_1, self._inactive_ms) X_step2 = X[:, active_1] X_E_2 = X_step2[:, active_2] B = X_step2.T.dot(X_E_2) B_E = B[active_2] B_mE = B[~active_2] self.A_active_2 = np.hstack([ -X_step2[:, active_2].T, (B_E + epsilon * np.identity(E_2)) * active_signs_2[None, :] ]) self.A_inactive_2 = np.hstack( [-X_step2[:, ~active_2].T, (B_mE * active_signs_2[None, :])]) self.offset_active_2 = active_signs_2 * lagrange[active_2] self.offset_inactive_2 = np.zeros(E_1 - E_2) self._active_lasso = rr.selector( arg_lasso, (self.n + E_1 + E_2, ), rr.affine_transform(self.A_active_2, self.offset_active_2)) self._inactive_lasso = rr.selector( arg_lasso, (self.n + E_1 + E_2, ), rr.affine_transform(self.A_inactive_2, self.offset_inactive_2)) self.active_conj_loss_2 = rr.affine_smooth(self.active_conjugate, self._active_lasso) self.q_2 = E_1 - E_2 cube_obj_2 = neg_log_cube_probability(self.q_2, lagrange[~active_2], randomization_scale=1.) self.cube_loss_2 = rr.affine_smooth(cube_obj_2, self._inactive_lasso) self.total_loss = rr.smooth_sum([ self.active_conj_loss_1, self.active_conj_loss_2, self.cube_loss_1, self.cube_loss_2, self.likelihood_loss, self.nonnegative_barrier ])
def __init__(self, map, generative_mean, coef=1., offset=None, quadratic=None): self.map = map self.q = map.p - map.nactive self.r = map.p + map.nactive self.p = map.p self.inactive_conjugate = self.active_conjugate = map.randomization.CGF_conjugate if self.active_conjugate is None: raise ValueError( 'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates') self.inactive_lagrange = self.map.inactive_lagrange rr.smooth_atom.__init__(self, (self.r,), offset=offset, quadratic=quadratic, initial=self.map.feasible_point, coef=coef) self.coefs[:] = self.map.feasible_point nonnegative = nonnegative_softmax_scaled(self.map.nactive) opt_vars = np.zeros(self.r, bool) opt_vars[map.p:] = 1 self._opt_selector = rr.selector(opt_vars, (self.r,)) self._response_selector = rr.selector(~opt_vars, (self.r,)) self.nonnegative_barrier = nonnegative.linear(self._opt_selector) self.active_conj_loss = rr.affine_smooth(self.active_conjugate, rr.affine_transform(np.hstack([self.map.A_active, self.map.B_active]), self.map.offset_active)) cube_obj = neg_log_cube_probability(self.q, self.inactive_lagrange, randomization_scale=1.) self.cube_loss = rr.affine_smooth(cube_obj, np.hstack([self.map.A_inactive, self.map.B_inactive])) # w_1, v_1 = np.linalg.eig(self.map.score_cov) # self.score_cov_inv_half = (v_1.T.dot(np.diag(np.power(w_1, -0.5)))).dot(v_1) # likelihood_loss = rr.signal_approximator(np.squeeze(np.zeros(self.p)), coef=1.) # scaled_response_selector = rr.selector(~opt_vars, (self.r,), rr.affine_transform(self.score_cov_inv_half, # self.score_cov_inv_half. # dot(np.squeeze(generative_mean)))) #print("cov", self.map.score_cov.shape ) likelihood_loss = log_likelihood(generative_mean, self.map.score_cov, self.p) self.likelihood_loss = rr.affine_smooth(likelihood_loss, self._response_selector) self.total_loss = rr.smooth_sum([self.active_conj_loss, self.likelihood_loss, self.nonnegative_barrier, self.cube_loss])
P = 200 Y = 2 * np.random.binomial(1, 0.5, size=(N, )) - 1. X = np.random.standard_normal((N, P)) X[Y == 1] += np.array([30, -20] + (P - 2) * [0])[np.newaxis, :] X -= X.mean(0)[np.newaxis, :] X_1 = np.hstack([X, np.ones((N, 1))]) transform = rr.affine_transform(-Y[:, np.newaxis] * X_1, np.ones(N)) C = 0.2 hinge = rr.positive_part(N, lagrange=C) hinge_loss = rr.linear_atom(hinge, transform) epsilon = 0.04 smoothed_hinge_loss = rr.smoothed_atom(hinge_loss, epsilon=epsilon) s = rr.selector(slice(0, P), (P + 1, )) sparsity = rr.l1norm.linear(s, lagrange=3.) quadratic = rr.quadratic.linear(s, coef=0.5) from regreg.affine import power_L ltransform = rr.linear_transform(X_1) singular_value_sq = power_L(X_1) # the other smooth piece is a quadratic with identity # for quadratic form, so its lipschitz constant is 1 lipschitz = 1.05 * singular_value_sq / epsilon + 1.1 problem = rr.container(quadratic, smoothed_hinge_loss, sparsity) solver = rr.FISTA(problem) solver.composite.lipschitz = lipschitz solver.debug = True
def __init__( self, X, feasible_point, active, # the active set chosen by randomized lasso active_sign, # the set of signs of active coordinates chosen by lasso lagrange, # in R^p mean_parameter, # in R^n noise_variance, # noise_level in data randomizer, # specified randomization epsilon, # ridge penalty for randomized lasso coef=1., offset=None, quadratic=None, nstep=10): n, p = X.shape E = active.sum() self._X = X self.active = active self.noise_variance = noise_variance self.randomization = randomizer self.CGF_randomization = randomizer.CGF if self.CGF_randomization is None: raise ValueError( 'randomization must know its cgf -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates' ) self.inactive_lagrange = lagrange[~active] initial = feasible_point self.feasible_point = feasible_point rr.smooth_atom.__init__(self, (p, ), offset=offset, quadratic=quadratic, initial=initial, coef=coef) self.coefs[:] = feasible_point mean_parameter = np.squeeze(mean_parameter) self.active = active X_E = self.X_E = X[:, active] self.X_permute = np.hstack([self.X_E, self._X[:, ~active]]) B = X.T.dot(X_E) B_E = B[active] B_mE = B[~active] self.active_slice = np.zeros_like(active, np.bool) self.active_slice[:active.sum()] = True self.B_active = np.hstack([ (B_E + epsilon * np.identity(E)) * active_sign[None, :], np.zeros((E, p - E)) ]) self.B_inactive = np.hstack( [B_mE * active_sign[None, :], np.identity((p - E))]) self.B_p = np.vstack((self.B_active, self.B_inactive)) self.B_p_inv = np.linalg.inv(self.B_p.T) self.offset_active = active_sign * lagrange[active] self.inactive_subgrad = np.zeros(p - E) self.cube_bool = np.zeros(p, np.bool) self.cube_bool[E:] = 1 self.dual_arg = self.B_p_inv.dot( np.append(self.offset_active, self.inactive_subgrad)) self._opt_selector = rr.selector(~self.cube_bool, (p, )) self.set_parameter(mean_parameter, noise_variance) _barrier_star = barrier_conjugate_softmax_scaled_rr( self.cube_bool, self.inactive_lagrange) self.conjugate_barrier = rr.affine_smooth(_barrier_star, np.identity(p)) self.CGF_randomizer = rr.affine_smooth(self.CGF_randomization, -self.B_p_inv) self.constant = np.true_divide(mean_parameter.dot(mean_parameter), 2 * noise_variance) self.linear_term = rr.identity_quadratic(0, 0, self.dual_arg, -self.constant) self.total_loss = rr.smooth_sum([ self.conjugate_barrier, self.CGF_randomizer, self.likelihood_loss ]) self.total_loss.quadratic = self.linear_term
N = 500 P = 2 Y = 2 * np.random.binomial(1, 0.5, size=(N,)) - 1. X = np.random.standard_normal((N,P)) X[Y==1] += np.array([3,-2])[np.newaxis,:] X_1 = np.hstack([X, np.ones((N,1))]) X_1_signs = -Y[:,np.newaxis] * X_1 transform = rr.affine_transform(X_1_signs, np.ones(N)) C = 0.2 hinge = rr.positive_part(N, lagrange=C) hinge_loss = rr.linear_atom(hinge, transform) quadratic = rr.quadratic.linear(rr.selector(slice(0,P), (P+1,)), coef=0.5) problem = rr.container(quadratic, hinge_loss) solver = rr.FISTA(problem) solver.fit() import pylab pylab.clf() pylab.scatter(X[Y==1,0],X[Y==1,1], facecolor='red') pylab.scatter(X[Y==-1,0],X[Y==-1,1], facecolor='blue') fits = np.dot(X_1, problem.coefs) labels = 2 * (fits > 0) - 1 pointX = [X[:,0].min(), X[:,0].max()] pointY = [-(pointX[0]*problem.coefs[0]+problem.coefs[2])/problem.coefs[1], -(pointX[1]*problem.coefs[0]+problem.coefs[2])/problem.coefs[1]]
P = 200 Y = 2 * np.random.binomial(1, 0.5, size=(N,)) - 1. X = np.random.standard_normal((N,P)) X[Y==1] += np.array([30,-20] + (P-2)*[0])[np.newaxis,:] X -= X.mean(0)[np.newaxis, :] X_1 = np.hstack([X, np.ones((N,1))]) transform = rr.affine_transform(-Y[:,np.newaxis] * X_1, np.ones(N)) C = 0.2 hinge = rr.positive_part(N, lagrange=C) hinge_loss = rr.linear_atom(hinge, transform) epsilon = 0.04 smoothed_hinge_loss = rr.smoothed_atom(hinge_loss, epsilon=epsilon) s = rr.selector(slice(0,P), (P+1,)) sparsity = rr.l1norm.linear(s, lagrange=3.) quadratic = rr.quadratic.linear(s, coef=0.5) from regreg.affine import power_L ltransform = rr.linear_transform(X_1) singular_value_sq = power_L(X_1) # the other smooth piece is a quadratic with identity # for quadratic form, so its lipschitz constant is 1 lipschitz = 1.05 * singular_value_sq / epsilon + 1.1 problem = rr.container(quadratic, smoothed_hinge_loss, sparsity)
def __init__(self, X, feasible_point, active, active_sign, mean_parameter, # in R^n noise_variance, randomizer, coef=1., offset=None, quadratic=None, nstep=10): self.n, p = X.shape E = 1 self.q = p-1 self._X = X self.active = active self.noise_variance = noise_variance self.randomization = randomizer self.inactive_conjugate = self.active_conjugate = randomizer.CGF_conjugate if self.active_conjugate is None: raise ValueError( 'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates') initial = np.zeros(self.n + E, ) initial[self.n:] = feasible_point rr.smooth_atom.__init__(self, (self.n + E,), offset=offset, quadratic=quadratic, initial=initial, coef=coef) self.coefs[:] = initial nonnegative = nonnegative_softmax_scaled(E) opt_vars = np.zeros(self.n + E, bool) opt_vars[self.n:] = 1 self._opt_selector = rr.selector(opt_vars, (self.n + E,)) self._response_selector = rr.selector(~opt_vars, (self.n + E,)) self.nonnegative_barrier = nonnegative.linear(self._opt_selector) sign = np.zeros((1, 1)) sign[0:, :] = active_sign self.A_active = np.hstack([-X[:, active].T, sign]) self.active_conj_loss = rr.affine_smooth(self.active_conjugate, self.A_active) self.A_in_1 = np.hstack([-X[:, ~active].T, np.zeros((p - 1, 1))]) self.A_in_2 = np.hstack([np.zeros((self.n, 1)).T, np.ones((1, 1))]) self.A_inactive = np.vstack([self.A_in_1, self.A_in_2]) cube_loss = neg_log_cube_probability_fs(self.q, p) self.cube_loss = rr.affine_smooth(cube_loss, self.A_inactive) self.set_parameter(mean_parameter, noise_variance) self.total_loss = rr.smooth_sum([self.active_conj_loss, self.cube_loss, self.likelihood_loss, self.nonnegative_barrier])