def test_l1prox(): ''' this test verifies that the l1 prox in lagrange form can be solved by a primal/dual specification obviously, we don't to solve the l1 prox this way, but it verifies that specification is working correctly ''' l1 = rr.l1norm(4, lagrange=0.3) ww = np.random.standard_normal(4)*3 ab = l1.proximal(rr.identity_quadratic(0.5, ww, 0,0)) l1c = copy(l1) l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.) a = rr.simple_problem.nonsmooth(l1c) solver = rr.FISTA(a) solver.fit(tol=1.e-10) ad = a.coefs l1c = copy(l1) l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.) a = rr.dual_problem.fromprimal(l1c) solver = rr.FISTA(a) solver.fit(tol=1.0e-14) ac = a.primal np.testing.assert_allclose(ac, ab, rtol=1.0e-4) np.testing.assert_allclose(ac, ad, rtol=1.0e-4)
def test_multinomial_vs_logistic(): """ Test that multinomial regression with two categories is the same as logistic regression """ n = 500 p = 10 J = 2 X = np.random.standard_normal(n*p).reshape((n,p)) counts = np.random.randint(0,10,n*J).reshape((n,J)) + 2 mult_x = rr.linear_transform(X, input_shape=(p,J-1)) loss = rr.multinomial_deviance.linear(mult_x, counts=counts) problem = rr.container(loss) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs1 = solver.composite.coefs loss = rr.logistic_deviance.linear(X, successes=counts[:,0], trials = np.sum(counts, axis=1)) problem = rr.container(loss) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs2 = solver.composite.coefs loss = rr.logistic_deviance.linear(X, successes=counts[:,1], trials = np.sum(counts, axis=1)) problem = rr.container(loss) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs3 = solver.composite.coefs npt.assert_equal(coefs1.shape,(p,J-1)) npt.assert_array_almost_equal(coefs1.flatten(), coefs2.flatten(), 5) npt.assert_array_almost_equal(coefs1.flatten(), -coefs3.flatten(), 5)
def test_separable(self): tests = [] atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L loss = self.loss problem = rr.separable_problem.singleton(atom, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((atom.proximal(q), solver.composite.coefs, 'solving atom prox with separable_atom.singleton \n%s ' % str(self))) d = atom.conjugate problem = rr.separable_problem.singleton(d, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append( (d.proximal(q), solver.composite.coefs, 'solving dual atom prox with separable_atom.singleton \n%s ' % str(self))) if not self.interactive: for test in tests: yield (all_close, ) + test + (self, ) else: for test in tests: yield all_close(*((test + (self, ))))
def test_group_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a group_lasso, i.e. l2 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l2norm(10, lagrange=.2) penalty2 = rr.l2norm(10, lagrange=.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)]) # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=200, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_s)
def test_nonnegative_positive_part(debug=False): """ This test verifies that using nonnegative constraint with a linear term, with some unpenalized terms yields the same result as using separable with constrained_positive_part and nonnegative """ import numpy as np import regreg.api as rr import regreg.atoms as rra # N - number of data points # P - number of columns in design == number of betas N, P = 40, 30 # an arbitrary positive offset for data and design offset = 2 # data Y = np.random.normal(size=(N, )) + offset # design - with ones as last column X = np.ones((N, P)) X[:, :-1] = np.random.normal(size=(N, P - 1)) + offset # coef for loss coef = 0.5 # lagrange for penalty lagrange = .1 # Loss function (squared difference between fitted and actual data) loss = rr.quadratic_loss.affine(X, -Y, coef=coef) # Penalty using nonnegative, leave the last 5 unpenalized but # nonnegative weights = np.ones(P) * lagrange weights[-5:] = 0 linq = rr.identity_quadratic(0, 0, weights, 0) penalty = rr.nonnegative(P, quadratic=linq) # Solution composite_form = rr.separable_problem.singleton(penalty, loss) solver = rr.FISTA(composite_form) solver.debug = debug solver.fit(tol=1.0e-12, min_its=200) coefs = solver.composite.coefs # using the separable penalty, only penalize the first # 25 coefficients with constrained_positive_part penalties_s = [ rr.constrained_positive_part(25, lagrange=lagrange), rr.nonnegative(5) ] groups_s = [slice(0, 25), slice(25, 30)] penalty_s = rr.separable((P, ), penalties_s, groups_s) composite_form_s = rr.separable_problem.singleton(penalty_s, loss) solver_s = rr.FISTA(composite_form_s) solver_s.debug = debug solver_s.fit(tol=1.0e-12, min_its=200) coefs_s = solver_s.composite.coefs nt.assert_true( np.linalg.norm(coefs - coefs_s) / np.linalg.norm(coefs) < 1.0e-02)
def test_lasso_dual(): """ Check that the solution of the lasso signal approximator dual composite is soft-thresholding """ l1 = .1 sparsity = R.l1norm(10, lagrange=l1) x = np.arange(10) - 5 loss = R.quadratic.shift(-x, coef=0.5) pen = R.simple_problem(loss, sparsity) solver = R.FISTA(pen) pen.lipschitz = 1 solver.fit(backtrack=False) soln = solver.composite.coefs st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) np.testing.assert_almost_equal(soln,st, decimal=3) pen = R.simple_problem(loss, sparsity) solver = R.FISTA(pen) solver.fit(monotonicity_restart=False) soln = solver.composite.coefs st = np.maximum(np.fabs(x)-l1,0) * np.sign(x) np.testing.assert_almost_equal(soln,st, decimal=3) pen = R.container(loss, sparsity) solver = R.FISTA(pen) solver.fit() soln = solver.composite.coefs np.testing.assert_almost_equal(soln,st, decimal=3)
def test_l1prox_bound(): ''' this test verifies that the l1 prox in bound form can be solved by a primal/dual specification obviously, we don't to solve the l1 prox this way, but it verifies that specification is working correctly ''' l1 = rr.l1norm(4, bound=2.) ww = np.random.standard_normal(4) * 2 ab = l1.proximal(rr.identity_quadratic(0.5, ww, 0, 0)) l1c = copy(l1) l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.) a = rr.simple_problem.nonsmooth(l1c) solver = rr.FISTA(a) solver.fit(min_its=100) l1c = copy(l1) l1c.quadratic = rr.identity_quadratic(0.5, ww, None, 0.) a = rr.dual_problem.fromprimal(l1c) solver = rr.FISTA(a) solver.fit(min_its=100) ac = a.primal np.testing.assert_allclose(ac + 0.1, ab + 0.1, rtol=1.e-4)
def test_lasso(): ''' this test verifies that the l1 prox can be solved by a primal/dual specification obviously, we don't to solve the l1 prox this way, but it verifies that specification is working correctly ''' l1 = rr.l1norm(4, lagrange=2.) l11 = rr.l1norm(4, lagrange=1.) l12 = rr.l1norm(4, lagrange=1.) X = np.random.standard_normal((10, 4)) Y = np.random.standard_normal(10) + 3 loss = rr.quadratic.affine(X, -Y) p1 = rr.container(l11, loss, l12) solver1 = rr.FISTA(p1) solver1.fit(tol=1.0e-12, min_its=500) p2 = rr.separable_problem.singleton(l1, loss) solver2 = rr.FISTA(p2) solver2.fit(tol=1.0e-12) f = p2.objective ans = scipy.optimize.fmin_powell(f, np.zeros(4), ftol=1.0e-12) print(f(solver2.composite.coefs), f(ans)) print(f(solver1.composite.coefs), f(ans)) yield all_close, ans, solver2.composite.coefs, 'singleton solver', None yield all_close, solver1.composite.coefs, solver2.composite.coefs, 'container solver', None
def test_multiple_lasso_dual(n=500): """ Check that the solution of the lasso signal approximator dual composite is soft-thresholding even when specified with multiple seminorms """ l1 = 1 sparsity1 = R.l1norm(n, lagrange=l1 * 0.75) sparsity2 = R.l1norm(n, lagrange=l1 * 0.25) x = np.random.normal(0, 1, n) loss = R.quadratic.shift(-x, coef=0.5) p = R.dual_problem.fromprimal(loss, sparsity1, sparsity2) t1 = time.time() solver = R.FISTA(p) solver.debug = True vals = solver.fit(tol=1.0e-16) soln = p.primal t2 = time.time() print t2 - t1 st = np.maximum(np.fabs(x) - l1, 0) * np.sign(x) np.testing.assert_almost_equal(soln, st, decimal=3) p = R.container(loss, sparsity1, sparsity2) t1 = time.time() solver = R.FISTA(p) solver.debug = True vals = solver.fit(tol=1.0e-16) soln = p.primal t2 = time.time() print t2 - t1 st = np.maximum(np.fabs(x) - l1, 0) * np.sign(x) print soln[range(10)] print st[range(10)] np.testing.assert_almost_equal(soln, st, decimal=3)
def test_conjugate_solver(): # Solve Lagrange problem Y = np.random.standard_normal(500) Y[100:150] += 7 Y[250:300] += 14 loss = R.quadratic.shift(-Y, coef=0.5) sparsity = R.l1norm(len(Y), lagrange=1.4) D = sparse.csr_matrix((np.identity(500) + np.diag([-1] * 499, k=1))[:-1]) fused = R.l1norm.linear(D, lagrange=25.5) problem = R.container(loss, sparsity, fused) solver = R.FISTA(problem) solver.fit(max_its=500, tol=1e-10) solution = solver.composite.coefs # Solve constrained version delta1 = np.fabs(D * solution).sum() delta2 = np.fabs(solution).sum() fused_constraint = R.l1norm.linear(D, bound=delta1) sparsity_constraint = R.l1norm(500, bound=delta2) constrained_problem = R.container(loss, fused_constraint, sparsity_constraint) constrained_solver = R.FISTA(constrained_problem) vals = constrained_solver.fit(max_its=500, tol=1e-10) constrained_solution = constrained_solver.composite.coefs npt.assert_almost_equal(np.fabs(constrained_solution).sum(), delta2, 3) npt.assert_almost_equal(np.fabs(D * constrained_solution).sum(), delta1, 3) # Solve with (shifted) conjugate function loss = R.quadratic.shift(-Y, coef=0.5) true_conjugate = R.quadratic.shift(Y, coef=0.5) problem = R.container(loss, fused_constraint, sparsity_constraint) solver = R.FISTA(problem.conjugate_composite(true_conjugate)) solver.fit(max_its=500, tol=1e-10) conjugate_coefs = problem.conjugate_primal_from_dual( solver.composite.coefs) # Solve with generic conjugate function loss = R.quadratic.shift(-Y, coef=0.5) problem = R.container(loss, fused_constraint, sparsity_constraint) solver2 = R.FISTA(problem.conjugate_composite(conjugate_tol=1e-12)) solver2.fit(max_its=500, tol=1e-10) conjugate_coefs_gen = problem.conjugate_primal_from_dual( solver2.composite.coefs) d1 = np.linalg.norm(solution - constrained_solution) / np.linalg.norm(solution) d2 = np.linalg.norm(solution - conjugate_coefs) / np.linalg.norm(solution) d3 = np.linalg.norm(solution - conjugate_coefs_gen) / np.linalg.norm(solution) npt.assert_array_less(d1, 0.01) npt.assert_array_less(d2, 0.01) npt.assert_array_less(d3, 0.01)
def test_simple_problem_nonsmooth(self): tests = [] atom, q = self.atom, self.q loss = self.loss p2 = copy(atom) p2.quadratic = atom.quadratic + q problem = rr.simple_problem.nonsmooth(p2) solver = rr.FISTA(problem) solver.fit(tol=1.0e-14, FISTA=self.FISTA, coef_stop=self.coef_stop, min_its=100) gg = rr.gengrad( problem, 2.) # this lipschitz constant is based on knowing our loss... tests.append((atom.proximal(q), gg, 'solving prox with gengrad\n %s ' % str(self))) tests.append((atom.proximal(q), atom.solve(q), 'solving prox with solve method\n %s ' % str(self))) tests.append(( atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with monotonicity\n %s ' % str(self))) # use the solve method p3 = copy(atom) p3.quadratic = atom.quadratic + q soln = p3.solve(tol=1.e-14, min_its=10) tests.append((atom.proximal(q), soln, 'solving prox with solve method\n %s ' % str(self))) p4 = copy(atom) p4.quadratic = atom.quadratic + q problem = rr.simple_problem.nonsmooth(p4) solver = rr.FISTA(problem) solver.fit(tol=1.0e-14, monotonicity_restart=False, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append(( atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem.nonsmooth with no monotonocity\n %s ' % str(self))) if not self.interactive: for test in tests: yield (all_close, ) + test + (self, ) else: for test in tests: yield all_close(*((test + (self, ))))
def test_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a lasso, i.e. l1 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l1norm(10, lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0)) pencopy.conjugate # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the usual composite penalty_all = rr.l1norm(20, lagrange=1.2) problem_all = rr.container(loss, penalty_all) solver_all = rr.FISTA(problem_all) solver_all.fit(min_its=100, tol=1.0e-12) coefs_all = solver_all.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=500, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_all) np.testing.assert_almost_equal(coefs, coefs_s)
def test_affine_linear_offset_l1norm(): """ Test linear, affine and offset with the l1norm atom """ n = 1000 p = 10 X = np.random.standard_normal((n,p)) Y = 10*np.random.standard_normal(n) coefs = [] loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm(p, lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.linear(np.eye(p), lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.affine(np.eye(p),np.zeros(p), lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.linear(np.eye(p), lagrange=5., offset=np.zeros(p)) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) loss = rr.quadratic.affine(X,-Y, coef=0.5) sparsity = rr.l1norm.shift(np.zeros(p), lagrange=5.) problem = rr.container(loss, sparsity) solver = rr.FISTA(problem) solver.fit(debug=False, tol=1e-10) coefs.append(1.*solver.composite.coefs) for i,j in itertools.combinations(range(len(coefs)), 2): npt.assert_almost_equal(coefs[i], coefs[j])
def test_simple_problem(self): tests = [] atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L loss = self.loss problem = rr.simple_problem(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, FISTA=self.FISTA, coef_stop=self.coef_stop, min_its=100) tests.append((atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem with monotonicity\n %s' % str(self))) # write the loss in terms of a quadratic for the smooth loss and a smooth function... q = rr.identity_quadratic(L, prox_center, 0, 0) lossq = rr.quadratic_loss.shift(prox_center.copy(), coef=0.6*L) lossq.quadratic = rr.identity_quadratic(0.4*L, prox_center.copy(), 0, 0) problem = rr.simple_problem(lossq, atom) tests.append((atom.proximal(q), problem.solve(coef_stop=self.coef_stop, FISTA=self.FISTA, tol=1.0e-12), 'solving prox with simple_problem ' + 'with monotonicity but loss has identity_quadratic %s\n ' % str(self))) problem = rr.simple_problem(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, monotonicity_restart=False, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((atom.proximal(q), solver.composite.coefs, 'solving prox with simple_problem no monotonicity_restart\n %s' % str(self))) d = atom.conjugate problem = rr.simple_problem(loss, d) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, monotonicity_restart=False, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((d.proximal(q), problem.solve(tol=1.e-12, FISTA=self.FISTA, coef_stop=self.coef_stop, monotonicity_restart=False), 'solving dual prox with simple_problem no monotonocity\n %s ' % str(self))) if not self.interactive: for test in tests: yield (all_close,) + test + (self,) else: for test in tests: yield all_close(*((test + (self,))))
def test_container(self): tests = [] atom, q, prox_center, L = self.atom, self.q, self.prox_center, self.L loss = self.loss problem = rr.container(loss, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((atom.proximal(q), solver.composite.coefs, 'solving atom prox with container\n %s ' % str(self))) # write the loss in terms of a quadratic for the smooth loss and a smooth function... q = rr.identity_quadratic(L, prox_center, 0, 0) lossq = rr.quadratic.shift(prox_center.copy(), coef=0.6 * L) lossq.quadratic = rr.identity_quadratic(0.4 * L, prox_center.copy(), 0, 0) problem = rr.container(lossq, atom) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, FISTA=self.FISTA, coef_stop=self.coef_stop) tests.append((atom.proximal(q), problem.solve(tol=1.e-12, FISTA=self.FISTA, coef_stop=self.coef_stop), 'solving prox with container with monotonicity ' + 'but loss has identity_quadratic\n %s ' % str(self))) d = atom.conjugate problem = rr.container(d, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-12, coef_stop=self.coef_stop, FISTA=self.FISTA, min_its=100) tests.append((d.proximal(q), solver.composite.coefs, 'solving dual prox with container\n %s ' % str(self))) if not self.interactive: for test in tests: yield (all_close, ) + test + (self, ) else: for test in tests: yield all_close(*((test + (self, ))))
def group_lasso_example(): def selector(p, slice): return np.identity(p)[slice] penalties = [R.l2norm(selector(500, slice(i*100,(i+1)*100)), lagrange=.1) for i in range(5)] penalties[0].lagrange = 250. penalties[1].lagrange = 225. penalties[2].lagrange = 150. penalties[3].lagrange = 100. X = np.random.standard_normal((1000,500)) Y = np.random.standard_normal((1000,)) loss = R.quadratic.affine(X, -Y, coef=0.5) group_lasso = R.container(loss, *penalties) solver=R.FISTA(group_lasso) solver.debug = True vals = solver.fit(max_its=2000, min_its=20,tol=1e-10) soln = solver.composite.coefs # solution pylab.figure(num=1) pylab.clf() pylab.plot(soln, c='g') # objective values pylab.figure(num=2) pylab.clf() pylab.plot(vals)
def fused_example(): x=np.random.standard_normal(500); x[100:150] += 7 sparsity = R.l1norm(500, lagrange=1.3) D = (np.identity(500) + np.diag([-1]*499,k=1))[:-1] fused = R.l1norm.linear(D, lagrange=10.5) loss = R.quadratic.shift(-x, coef=0.5) pen = R.container(loss, sparsity,fused) solver = R.FISTA(pen) vals = solver.fit() soln = solver.composite.coefs # solution pylab.figure(num=1) pylab.clf() pylab.plot(soln, c='g') pylab.scatter(np.arange(x.shape[0]), x) # objective values pylab.figure(num=2) pylab.clf() pylab.plot(vals)
def test_lasso(n=100): l1 = 1. sparsity = R.l1norm(n, lagrange=l1) X = np.random.standard_normal((5000,n)) Y = np.random.standard_normal((5000,)) regloss = R.quadratic.affine(-X,Y) p=R.container(regloss, sparsity) solver=R.FISTA(p) solver.debug = True t1 = time.time() vals1 = solver.fit(max_its=800) t2 = time.time() dt1 = t2 - t1 soln = solver.composite.coefs time.sleep(5) print soln[range(10)] print solver.composite.objective(soln) print "Times", dt1
def test_lasso(): ''' this test verifies that the l1 prox can be solved by a primal/dual specification obviously, we don't to solve the l1 prox this way, but it verifies that specification is working correctly ''' l1 = rr.l1norm(4, lagrange=2.) l1.quadratic = rr.identity_quadratic(0.5, 0, None, 0.) X = np.random.standard_normal((10, 4)) Y = np.random.standard_normal(10) + 3 loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) p2 = rr.separable_problem.singleton(l1, loss) solver2 = rr.FISTA(p2) solver2.fit(tol=1.0e-14, min_its=100) f = p2.objective ans = scipy.optimize.fmin_powell(f, np.zeros(4), ftol=1.0e-12, xtol=1.e-10) print(f(solver2.composite.coefs), f(ans)) np.testing.assert_allclose(ans + 0.1, solver2.composite.coefs + 0.1, rtol=1.e-3)
def test_group_lasso_sparse(n=100): def selector(p, slice): return np.identity(p)[slice] def selector_sparse(p, slice): return sparse.csr_matrix(np.identity(p)[slice]) X = np.random.standard_normal((1000,500)) Y = np.random.standard_normal((1000,)) loss = R.quadratic.affine(X, -Y, coef=0.5) penalties = [R.l2norm.linear(selector(500, slice(i*100,(i+1)*100)), lagrange=.1) for i in range(5)] penalties[0].lagrange = 250. penalties[1].lagrange = 225. penalties[2].lagrange = 150. penalties[3].lagrange = 100. group_lasso = R.container(loss, *penalties) solver=R.FISTA(group_lasso) solver.debug = True t1 = time.time() vals = solver.fit(max_its=2000, min_its=20,tol=1e-8) soln1 = solver.composite.coefs t2 = time.time() dt1 = t2 - t1 print soln1[range(10)]
def test_logistic_offset(): """ Test the equivalence of binary/count specification in logistic_likelihood """ #Form the count version of the problem trials = np.random.binomial(5, 0.5, 10) + 1 successes = np.random.binomial(trials, 0.5, len(trials)) n = len(successes) p = 2 * n X = np.hstack( [np.ones((n, 1)), np.random.normal(0, 1, n * p).reshape((n, p))]) loss = rr.logistic_loglike.linear(X, successes=successes, trials=trials) weights = np.ones(p + 1) weights[0] = 0. penalty = rr.quadratic_loss.linear(weights, coef=.1, diag=True) prob1 = rr.container(loss, penalty) solver1 = rr.FISTA(prob1) vals = solver1.fit(tol=1e-12) solution1 = solver1.composite.coefs diff = 0.1 loss = rr.logistic_loglike.affine(X, successes=successes, trials=trials, offset=diff * np.ones(n)) weights = np.ones(p + 1) weights[0] = 0. penalty = rr.quadratic_loss.linear(weights, coef=.1, diag=True) prob2 = rr.container(loss, penalty) solver2 = rr.FISTA(prob2) vals = solver2.fit(tol=1e-12) solution2 = solver2.composite.coefs ind = np.arange(1, p + 1) print(solution1[np.arange(5)]) print(solution2[np.arange(5)]) npt.assert_array_almost_equal(solution1[ind], solution2[ind], 3) npt.assert_almost_equal(solution1[0] - diff, solution2[0], 2)
def test_gengrad(): Z = np.random.standard_normal(100) * 4 p = rr.l1norm(100, lagrange=0.13) L = 0.14 loss = rr.quadratic_loss.shift(Z, coef=L) problem = rr.simple_problem(loss, p) solver = rr.FISTA(problem) solver.fit(tol=1.0e-10, debug=True) simple_coef = solver.composite.coefs prox_coef = p.proximal(rr.identity_quadratic(L, Z, 0, 0)) p2 = rr.l1norm(100, lagrange=0.13) p2 = copy(p) p2.quadratic = rr.identity_quadratic(L, Z, 0, 0) problem = rr.simple_problem.nonsmooth(p2) solver = rr.FISTA(problem) solver.fit(tol=1.0e-14, debug=True) simple_nonsmooth_coef = solver.composite.coefs p = rr.l1norm(100, lagrange=0.13) p.quadratic = rr.identity_quadratic(L, Z, 0, 0) problem = rr.simple_problem.nonsmooth(p) simple_nonsmooth_gengrad = rr.gengrad(problem, L, tol=1.0e-10) p = rr.l1norm(100, lagrange=0.13) problem = rr.separable_problem.singleton(p, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-10) separable_coef = solver.composite.coefs loss2 = rr.quadratic_loss.shift(Z, coef=0.6 * L) loss2.quadratic = rr.identity_quadratic(0.4 * L, Z, 0, 0) p.coefs *= 0 problem2 = rr.simple_problem(loss2, p) loss2_coefs = problem2.solve(coef_stop=True) solver2 = rr.FISTA(problem2) solver2.fit(tol=1.0e-10, debug=True, coef_stop=True) yield all_close, prox_coef, simple_nonsmooth_gengrad, 'prox to nonsmooth gengrad', None yield all_close, prox_coef, separable_coef, 'prox to separable', None yield all_close, prox_coef, simple_nonsmooth_coef, 'prox to simple_nonsmooth', None yield all_close, prox_coef, simple_coef, 'prox to simple', None yield all_close, prox_coef, loss2_coefs, 'simple where loss has quadratic 1', None yield all_close, prox_coef, solver2.composite.coefs, 'simple where loss has quadratic 2', None
def lasso_via_dual_split(): def selector(p, slice): return np.identity(p)[slice] penalties = [R.l1norm(selector(500, slice(i*100,(i+1)*100)), lagrange=0.2) for i in range(5)] x = np.random.standard_normal(500) loss = R.quadratic.shift(-x, coef=0.5) lasso = R.container(loss,*penalties) solver = R.FISTA(lasso) np.testing.assert_almost_equal(np.maximum(np.fabs(x)-0.2, 0) * np.sign(x), solver.composite.coefs, decimal=3)
def test_logistic_counts(): """ Test the equivalence of binary/count specification in logistic_deviance """ #Form the count version of the problem trials = np.random.binomial(5,0.5,100)+1 successes = np.random.binomial(trials,0.5,len(trials)) n = len(successes) p = 2*n X = np.random.normal(0,1,n*p).reshape((n,p)) loss = rr.logistic_deviance.linear(X, successes=successes, trials=trials) penalty = rr.quadratic(p, coef=1.) prob1 = rr.container(loss, penalty) solver1 = rr.FISTA(prob1) solver1.fit() solution1 = solver1.composite.coefs #Form the binary version of the problem Ynew = [] Xnew = [] for i, (s,n) in enumerate(zip(successes,trials)): Ynew.append([1]*s + [0]*(n-s)) for j in range(n): Xnew.append(X[i,:]) Ynew = np.hstack(Ynew) Xnew = np.vstack(Xnew) loss = rr.logistic_deviance.linear(Xnew, successes=Ynew) penalty = rr.quadratic(p, coef=1.) prob2 = rr.container(loss, penalty) solver2 = rr.FISTA(prob2) solver2.fit() solution2 = solver2.composite.coefs npt.assert_array_almost_equal(solution1, solution2, 3)
def group_lasso_signal_approx(): def selector(p, slice): return np.identity(p)[slice] penalties = [R.l2norm(selector(500, slice(i*100,(i+1)*100)), lagrange=10.) for i in range(5)] loss = R.quadratic.shift(-x, coef=0.5) group_lasso = R.container(loss, **penalties) x = np.random.standard_normal(500) solver = R.FISTA(group_lasso) solver.fit() a = solver.composite.coefs
def test_gengrad_blocknorms(): Z = np.random.standard_normal((10, 10)) * 4 p = rr.l1_l2((10, 10), lagrange=0.13) dual = p.conjugate L = 0.23 loss = rr.quadratic_loss.shift(Z, coef=L) problem = rr.simple_problem(loss, p) solver = rr.FISTA(problem) solver.fit(tol=1.0e-10, debug=True) simple_coef = solver.composite.coefs q = rr.identity_quadratic(L, Z, 0, 0) prox_coef = p.proximal(q) p2 = copy(p) p2.quadratic = rr.identity_quadratic(L, Z, 0, 0) problem = rr.simple_problem.nonsmooth(p2) solver = rr.FISTA(problem) solver.fit(tol=1.0e-14, debug=True) simple_nonsmooth_coef = solver.composite.coefs p = rr.l1_l2((10, 10), lagrange=0.13) p.quadratic = rr.identity_quadratic(L, Z, 0, 0) problem = rr.simple_problem.nonsmooth(p) simple_nonsmooth_gengrad = rr.gengrad(problem, L, tol=1.0e-10) p = rr.l1_l2((10, 10), lagrange=0.13) problem = rr.separable_problem.singleton(p, loss) solver = rr.FISTA(problem) solver.fit(tol=1.0e-10) separable_coef = solver.composite.coefs yield (all_close, prox_coef, simple_coef, 'prox to simple', None) yield (all_close, prox_coef, simple_nonsmooth_gengrad, 'prox to nonsmooth gengrad', None) yield (all_close, prox_coef, separable_coef, 'prox to separable', None) yield (all_close, prox_coef, simple_nonsmooth_coef, 'prox to simple_nonsmooth', None)
def __init__(self, X, initial=None, lagrange=1, rho=1): self.X = R.affine_transform(X, None) self.atom = R.l1norm(X.shape[1], l) self.rho = rho self.loss = R.quadratic.affine(X, -np.zeros(X.shape[0]), lagrange=rho / 2.) self.lasso = R.container(self.loss, self.atom) self.solver = R.FISTA(self.lasso.problem()) if initial is None: self.beta[:] = np.random.standard_normal(self.atom.primal_shape) else: self.beta[:] = initial
def test_1d_fused_lasso(): """ Check the 1d fused lasso solution using an equivalent lasso formulation """ n = 100 l1 = 1. D = (np.identity(n) - np.diag(np.ones(n - 1), -1))[1:] extra = np.zeros(n) extra[0] = 1. D = np.vstack([D, extra]) D = sparse.csr_matrix(D) fused = R.l1norm.linear(D, lagrange=l1) X = np.random.standard_normal((2 * n, n)) Y = np.random.standard_normal((2 * n, )) loss = R.quadratic.affine(X, -Y, coef=0.5) fused_lasso = R.container(loss, fused) solver = R.FISTA(fused_lasso) vals1 = solver.fit(max_its=25000, tol=1e-10) soln1 = solver.composite.coefs B = np.array(sparse.tril(np.ones((n, n))).todense()) X2 = np.dot(X, B) loss = R.quadratic.affine(X2, -Y, coef=0.5) sparsity = R.l1norm(n, lagrange=l1) lasso = R.container(loss, sparsity) solver = R.FISTA(lasso) solver.fit(tol=1e-10) soln2 = np.dot(B, solver.composite.coefs) npt.assert_array_almost_equal(soln1, soln2, 3)
def test_lasso_dual_with_monotonicity(): """ restarting is funny for this simple problem """ l1 = .1 sparsity = R.l1norm(10, lagrange=l1) x = np.arange(10) - 5 loss = R.quadratic.shift(-x, coef=0.5) pen = R.simple_problem(loss, sparsity) solver = R.FISTA(pen) solver.fit() soln = solver.composite.coefs st = np.maximum(np.fabs(x) - l1, 0) * np.sign(x) np.testing.assert_almost_equal(soln, st, decimal=3)
def test_l1_constraint(): """ Test using the l1norm in bound form """ p = 1000 sparsity = R.l1norm(p, bound=5.) prob = R.simple_problem.nonsmooth(sparsity) prob.coefs[:] = np.random.standard_normal(1000) problem = prob solver = R.FISTA(problem) vals = solver.fit(tol=1e-8, max_its=500) solution = solver.composite.coefs npt.assert_almost_equal(np.fabs(solution).sum(), sparsity.bound, 3)