def test_group_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a group_lasso, i.e. l2 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l2norm(10, lagrange=.2) penalty2 = rr.l2norm(10, lagrange=.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)]) # solve using separable loss = rr.quadratic.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=200, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_s)
def test_nonnegative_positive_part(debug=False): """ This test verifies that using nonnegative constraint with a linear term, with some unpenalized terms yields the same result as using separable with constrained_positive_part and nonnegative """ import numpy as np import regreg.api as rr import regreg.atoms as rra # N - number of data points # P - number of columns in design == number of betas N, P = 40, 30 # an arbitrary positive offset for data and design offset = 2 # data Y = np.random.normal(size=(N, )) + offset # design - with ones as last column X = np.ones((N, P)) X[:, :-1] = np.random.normal(size=(N, P - 1)) + offset # coef for loss coef = 0.5 # lagrange for penalty lagrange = .1 # Loss function (squared difference between fitted and actual data) loss = rr.quadratic_loss.affine(X, -Y, coef=coef) # Penalty using nonnegative, leave the last 5 unpenalized but # nonnegative weights = np.ones(P) * lagrange weights[-5:] = 0 linq = rr.identity_quadratic(0, 0, weights, 0) penalty = rr.nonnegative(P, quadratic=linq) # Solution composite_form = rr.separable_problem.singleton(penalty, loss) solver = rr.FISTA(composite_form) solver.debug = debug solver.fit(tol=1.0e-12, min_its=200) coefs = solver.composite.coefs # using the separable penalty, only penalize the first # 25 coefficients with constrained_positive_part penalties_s = [ rr.constrained_positive_part(25, lagrange=lagrange), rr.nonnegative(5) ] groups_s = [slice(0, 25), slice(25, 30)] penalty_s = rr.separable((P, ), penalties_s, groups_s) composite_form_s = rr.separable_problem.singleton(penalty_s, loss) solver_s = rr.FISTA(composite_form_s) solver_s.debug = debug solver_s.fit(tol=1.0e-12, min_its=200) coefs_s = solver_s.composite.coefs nt.assert_true( np.linalg.norm(coefs - coefs_s) / np.linalg.norm(coefs) < 1.0e-02)
def test_group_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a group_lasso, i.e. l2 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l2norm(10, lagrange=.2) penalty2 = rr.l2norm(10, lagrange=.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)]) # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=200, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_s)
def test_nonnegative_positive_part(debug=False): """ This test verifies that using nonnegative constraint with a linear term, with some unpenalized terms yields the same result as using separable with constrained_positive_part and nonnegative """ import numpy as np import regreg.api as rr import regreg.atoms as rra # N - number of data points # P - number of columns in design == number of betas N, P = 40, 30 # an arbitrary positive offset for data and design offset = 2 # data Y = np.random.normal(size=(N,)) + offset # design - with ones as last column X = np.ones((N,P)) X[:,:-1] = np.random.normal(size=(N,P-1)) + offset # coef for loss coef = 0.5 # lagrange for penalty lagrange = .1 # Loss function (squared difference between fitted and actual data) loss = rr.quadratic.affine(X, -Y, coef=coef) # Penalty using nonnegative, leave the last 5 unpenalized but # nonnegative weights = np.ones(P) * lagrange weights[-5:] = 0 linq = rr.identity_quadratic(0,0,weights,0) penalty = rr.nonnegative(P, quadratic=linq) # Solution composite_form = rr.separable_problem.singleton(penalty, loss) solver = rr.FISTA(composite_form) solver.debug = debug solver.fit(tol=1.0e-12, min_its=200) coefs = solver.composite.coefs # using the separable penalty, only penalize the first # 25 coefficients with constrained_positive_part penalties_s = [rr.constrained_positive_part(25, lagrange=lagrange), rr.nonnegative(5)] groups_s = [slice(0,25), slice(25,30)] penalty_s = rr.separable((P,), penalties_s, groups_s) composite_form_s = rr.separable_problem.singleton(penalty_s, loss) solver_s = rr.FISTA(composite_form_s) solver_s.debug = debug solver_s.fit(tol=1.0e-12, min_its=200) coefs_s = solver_s.composite.coefs nt.assert_true(np.linalg.norm(coefs - coefs_s) / np.linalg.norm(coefs) < 1.0e-02)
def test_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a lasso, i.e. l1 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l1norm(10, lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0)) pencopy.conjugate # solve using separable loss = rr.quadratic.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the usual composite penalty_all = rr.l1norm(20, lagrange=1.2) problem_all = rr.container(loss, penalty_all) solver_all = rr.FISTA(problem_all) solver_all.fit(min_its=100, tol=1.0e-12) coefs_all = solver_all.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=500, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_all) np.testing.assert_almost_equal(coefs, coefs_s)
def test_lasso_separable(): """ This test verifies that the specification of a separable penalty yields the same results as having two linear_atoms with selector matrices. The penalty here is a lasso, i.e. l1 penalty. """ X = np.random.standard_normal((100,20)) Y = np.random.standard_normal((100,)) + np.dot(X, np.random.standard_normal(20)) penalty1 = rr.l1norm(10, lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20,), [penalty1, penalty2], [slice(0,10), slice(10,20)], test_for_overlap=True) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1,0,0,0)) pencopy.conjugate # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs # solve using the usual composite penalty_all = rr.l1norm(20, lagrange=1.2) problem_all = rr.container(loss, penalty_all) solver_all = rr.FISTA(problem_all) solver_all.fit(min_its=100, tol=1.0e-12) coefs_all = solver_all.composite.coefs # solve using the selectors penalty_s = [rr.linear_atom(p, rr.selector(g, (20,))) for p, g in zip(penalty.atoms, penalty.groups)] problem_s = rr.container(loss, *penalty_s) solver_s = rr.FISTA(problem_s) solver_s.fit(min_its=500, tol=1.0e-12) coefs_s = solver_s.composite.coefs np.testing.assert_almost_equal(coefs, coefs_all) np.testing.assert_almost_equal(coefs, coefs_s)
def test_path_group_lasso(): ''' this test looks at the paths of three different parameterizations of the same problem ''' n = 100 X = np.random.standard_normal((n, 10)) U = np.random.standard_normal((n, 2)) Y = np.random.standard_normal(100) betaX = np.array([3, 4, 5, 0, 0] + [0] * 5) betaU = np.array([10, -5]) Y += (np.dot(X, betaX) + np.dot(U, betaU)) * 5 Xn = rr.normalize(np.hstack([np.ones((100, 1)), X]), inplace=True, center=True, scale=True, intercept_column=0).normalized_array() lasso = mixed_lasso.mixed_lasso_path.gaussian(Xn[:, 1:], Y, penalty_structure=[0] * 7 + [1] * 3, nstep=10) sol = lasso.main(inner_tol=1.e-12, verbose=True) beta = np.array(sol['beta'].todense()) sols = [] sols_sep = [] for l in sol['lagrange']: loss = rr.glm.gaussian(Xn, Y) penalty = rr.mixed_lasso([mixed_lasso.UNPENALIZED] + [0] * 7 + [1] * 3, lagrange=l) # matrix contains an intercept... problem = rr.simple_problem(loss, penalty) sols.append(problem.solve(tol=1.e-12).copy()) sep = rr.separable((11, ), [ rr.l2norm((7, ), np.sqrt(7) * l), rr.l2norm((3, ), np.sqrt(3) * l) ], [np.arange(1, 8), np.arange(8, 11)]) sep_problem = rr.simple_problem(loss, sep) sols_sep.append(sep_problem.solve(tol=1.e-12).copy()) sols = np.array(sols).T sols_sep = np.array(sols_sep).T nt.assert_true( np.linalg.norm(beta - sols) / (1 + np.linalg.norm(beta)) <= 1.e-4) nt.assert_true( np.linalg.norm(beta - sols_sep) / (1 + np.linalg.norm(beta)) <= 1.e-4)
def test_path_group_lasso(): """ this test looks at the paths of three different parameterizations of the same problem """ n = 100 X = np.random.standard_normal((n, 10)) U = np.random.standard_normal((n, 2)) Y = np.random.standard_normal(100) betaX = np.array([3, 4, 5, 0, 0] + [0] * 5) betaU = np.array([10, -5]) Y += (np.dot(X, betaX) + np.dot(U, betaU)) * 5 Xn = rr.normalize( np.hstack([np.ones((100, 1)), X]), inplace=True, center=True, scale=True, intercept_column=0 ).normalized_array() lasso = rr.lasso.squared_error(Xn[:, 1:], Y, penalty_structure=[0] * 7 + [1] * 3, nstep=10) sol = lasso.main(inner_tol=1.0e-12, verbose=True) beta = np.array(sol["beta"].todense()) sols = [] sols_sep = [] for l in sol["lagrange"]: loss = rr.squared_error(Xn, Y, coef=1.0 / n) penalty = rr.mixed_lasso([rr.UNPENALIZED] + [0] * 7 + [1] * 3, lagrange=l) # matrix contains an intercept... problem = rr.simple_problem(loss, penalty) sols.append(problem.solve(tol=1.0e-12).copy()) sep = rr.separable( (11,), [rr.l2norm((7,), np.sqrt(7) * l), rr.l2norm((3,), np.sqrt(3) * l)], [np.arange(1, 8), np.arange(8, 11)], ) sep_problem = rr.simple_problem(loss, sep) sols_sep.append(sep_problem.solve(tol=1.0e-12).copy()) sols = np.array(sols).T sols_sep = np.array(sols_sep).T nt.assert_true(np.linalg.norm(beta - sols) / (1 + np.linalg.norm(beta)) <= 1.0e-4) nt.assert_true(np.linalg.norm(beta - sols_sep) / (1 + np.linalg.norm(beta)) <= 1.0e-4)
def test_different_dim(): """ This test checks that the reshape argument of separable works properly. """ X = np.random.standard_normal((100, 20)) Y = (np.random.standard_normal( (100, )) + np.dot(X, np.random.standard_normal(20))) penalty1 = rr.nuclear_norm((5, 2), lagrange=1.2) penalty2 = rr.l1norm(10, lagrange=1.2) penalty = rr.separable((20, ), [penalty1, penalty2], [slice(0, 10), slice(10, 20)], test_for_overlap=True, shapes=[(5, 2), None]) # ensure code is tested print(penalty1.latexify()) print(penalty.latexify()) print(penalty.conjugate) print(penalty.dual) print(penalty.seminorm(np.ones(penalty.shape))) print(penalty.constraint(np.ones(penalty.shape), bound=2.)) pencopy = copy(penalty) pencopy.set_quadratic(rr.identity_quadratic(1, 0, 0, 0)) pencopy.conjugate # solve using separable loss = rr.quadratic_loss.affine(X, -Y, coef=0.5) problem = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(problem) solver.fit(min_its=200, tol=1.0e-12) coefs = solver.composite.coefs
def test_centering_fit_inplace(debug=False): # N - number of data points # P - number of columns in design == number of betas N, P = 40, 30 # an arbitrary positive offset for data and design offset = 2 # design - with ones as last column X = np.random.normal(size=(N, P)) + offset L = rr.normalize(X, center=True, scale=False, inplace=True) # X should have been normalized in place np.testing.assert_almost_equal(np.sum(X, 0), 0) # data Y = np.random.normal(size=(N, )) + offset # coef for loss coef = 0.5 # lagrange for penalty lagrange = .1 # Loss function (squared difference between fitted and actual data) loss = rr.quadratic_loss.affine(L, -Y, coef=coef) penalties = [ rr.constrained_positive_part(25, lagrange=lagrange), rr.nonnegative(5) ] groups = [slice(0, 25), slice(25, 30)] penalty = rr.separable((P, ), penalties, groups) initial = np.random.standard_normal(P) composite_form = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(composite_form) solver.debug = debug solver.fit(tol=1.0e-12, min_its=200) coefs = solver.composite.coefs # Solve the problem with X, which has been normalized in place loss2 = rr.quadratic_loss.affine(X, -Y, coef=coef) initial2 = np.random.standard_normal(P) composite_form2 = rr.separable_problem.fromatom(penalty, loss2) solver2 = rr.FISTA(composite_form2) solver2.debug = debug solver2.fit(tol=1.0e-12, min_its=200) coefs2 = solver2.composite.coefs for _ in range(10): beta = np.random.standard_normal(P) g1 = loss.smooth_objective(beta, mode='grad') g2 = loss2.smooth_objective(beta, mode='grad') np.testing.assert_almost_equal(g1, g2) b1 = penalty.proximal(sq(1, beta, g1, 0)) b2 = penalty.proximal(sq(1, beta, g2, 0)) np.testing.assert_almost_equal(b1, b2) f1 = composite_form.objective(beta) f2 = composite_form2.objective(beta) np.testing.assert_almost_equal(f1, f2) np.testing.assert_almost_equal(composite_form.objective(coefs), composite_form.objective(coefs2)) np.testing.assert_almost_equal(composite_form2.objective(coefs), composite_form2.objective(coefs2)) nt.assert_true( np.linalg.norm(coefs - coefs2) / max(np.linalg.norm(coefs), 1) < 1.0e-04)
def test_centering_fit_inplace(debug=False): # N - number of data points # P - number of columns in design == number of betas N, P = 40, 30 # an arbitrary positive offset for data and design offset = 2 # design - with ones as last column X = np.random.normal(size=(N,P)) + offset L = rr.normalize(X, center=True, scale=False, inplace=True) # X should have been normalized in place np.testing.assert_almost_equal(np.sum(X, 0), 0) # data Y = np.random.normal(size=(N,)) + offset # coef for loss coef = 0.5 # lagrange for penalty lagrange = .1 # Loss function (squared difference between fitted and actual data) loss = rr.quadratic.affine(L, -Y, coef=coef) penalties = [rr.constrained_positive_part(25, lagrange=lagrange), rr.nonnegative(5)] groups = [slice(0,25), slice(25,30)] penalty = rr.separable((P,), penalties, groups) initial = np.random.standard_normal(P) composite_form = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(composite_form) solver.debug = debug solver.fit(tol=1.0e-12, min_its=200) coefs = solver.composite.coefs # Solve the problem with X, which has been normalized in place loss2 = rr.quadratic.affine(X, -Y, coef=coef) initial2 = np.random.standard_normal(P) composite_form2 = rr.separable_problem.fromatom(penalty, loss2) solver2 = rr.FISTA(composite_form2) solver2.debug = debug solver2.fit(tol=1.0e-12, min_its=200) coefs2 = solver2.composite.coefs for _ in range(10): beta = np.random.standard_normal(P) g1 = loss.smooth_objective(beta, mode='grad') g2 = loss2.smooth_objective(beta, mode='grad') np.testing.assert_almost_equal(g1, g2) b1 = penalty.proximal(sq(1, beta, g1,0)) b2 = penalty.proximal(sq(1, beta, g2,0)) np.testing.assert_almost_equal(b1, b2) f1 = composite_form.objective(beta) f2 = composite_form2.objective(beta) np.testing.assert_almost_equal(f1, f2) np.testing.assert_almost_equal(composite_form.objective(coefs), composite_form.objective(coefs2)) np.testing.assert_almost_equal(composite_form2.objective(coefs), composite_form2.objective(coefs2)) nt.assert_true(np.linalg.norm(coefs - coefs2) / max(np.linalg.norm(coefs),1) < 1.0e-04)
def test_scaling_and_centering_intercept_fit(debug=False): # N - number of data points # P - number of columns in design == number of betas N, P = 40, 30 # an arbitrary positive offset for data and design offset = 2 # design - with ones as last column X = np.random.normal(size=(N, P)) + 0 * offset X2 = X - X.mean(0)[None, :] X2 = X2 / np.std(X2, 0, ddof=1)[None, :] X2 = np.hstack([np.ones((X2.shape[0], 1)), X2]) L = rr.normalize(X, center=True, scale=True, intercept=True) # data Y = np.random.normal(size=(N, )) + offset # lagrange for penalty lagrange = .1 # Loss function (squared difference between fitted and actual data) loss = rr.squared_error(L, Y) penalties = [ rr.constrained_positive_part(25, lagrange=lagrange), rr.nonnegative(5) ] groups = [slice(0, 25), slice(25, 30)] penalty = rr.separable((P + 1, ), penalties, groups) initial = np.random.standard_normal(P + 1) composite_form = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(composite_form) solver.debug = debug solver.fit(tol=1.0e-12, min_its=200) coefs = solver.composite.coefs # Solve the problem with X2 loss2 = rr.squared_error(X2, Y) initial2 = np.random.standard_normal(P + 1) composite_form2 = rr.separable_problem.fromatom(penalty, loss2) solver2 = rr.FISTA(composite_form2) solver2.debug = debug solver2.fit(tol=1.0e-12, min_its=200) coefs2 = solver2.composite.coefs for _ in range(10): beta = np.random.standard_normal(P + 1) g1 = loss.smooth_objective(beta, mode='grad') g2 = loss2.smooth_objective(beta, mode='grad') np.testing.assert_almost_equal(g1, g2) b1 = penalty.proximal(sq(1, beta, g1, 0)) b2 = penalty.proximal(sq(1, beta, g2, 0)) np.testing.assert_almost_equal(b1, b2) f1 = composite_form.objective(beta) f2 = composite_form2.objective(beta) np.testing.assert_almost_equal(f1, f2) np.testing.assert_almost_equal(composite_form.objective(coefs), composite_form.objective(coefs2)) np.testing.assert_almost_equal(composite_form2.objective(coefs), composite_form2.objective(coefs2)) nt.assert_true( np.linalg.norm(coefs - coefs2) / max(np.linalg.norm(coefs), 1) < 1.0e-04)