def test_ProxElasticNet(self): """...Test of ProxElasticNet """ coeffs = self.coeffs.copy() l_enet = 3e-2 ratio = .3 t = 1.7 prox_enet = ProxElasticNet(l_enet, ratio=ratio) prox_l1 = ProxL1(ratio * l_enet) prox_l2 = ProxL2Sq((1 - ratio) * l_enet) self.assertAlmostEqual( prox_enet.value(coeffs), prox_l1.value(coeffs) + prox_l2.value(coeffs), delta=1e-15) out = coeffs.copy() prox_l1.call(out, t, out) prox_l2.call(out, t, out) assert_almost_equal(prox_enet.call(coeffs, step=t), out, decimal=10) prox_enet = ProxElasticNet(l_enet, ratio=ratio, positive=True) prox_l1 = ProxL1(ratio * l_enet, positive=True) prox_l2 = ProxL2Sq((1 - ratio) * l_enet, positive=True) self.assertAlmostEqual( prox_enet.value(coeffs), prox_l1.value(coeffs) + prox_l2.value(coeffs), delta=1e-15) out = coeffs.copy() prox_l1.call(out, t, out) prox_l2.call(out, t, out) assert_almost_equal(prox_enet.call(coeffs, step=t), out, decimal=10)
def compare_solver_sdca(self): """...Compare SDCA solution with SVRG solution """ np.random.seed(12) n_samples = Test.n_samples n_features = Test.n_features for fit_intercept in [True, False]: y, X, coeffs0, interc0 = TestSolver.generate_logistic_data( n_features, n_samples) model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y) ratio = 0.5 l_enet = 1e-2 # SDCA "elastic-net" formulation is different from elastic-net # implementation l_l2_sdca = ratio * l_enet l_l1_sdca = (1 - ratio) * l_enet sdca = SDCA(l_l2sq=l_l2_sdca, max_iter=100, verbose=False, tol=0, seed=Test.sto_seed).set_model(model) prox_l1 = ProxL1(l_l1_sdca) sdca.set_prox(prox_l1) coeffs_sdca = sdca.solve() # Compare with SVRG svrg = SVRG(max_iter=100, verbose=False, tol=0, seed=Test.sto_seed).set_model(model) prox_enet = ProxElasticNet(l_enet, ratio) svrg.set_prox(prox_enet) coeffs_svrg = svrg.solve(step=0.1) np.testing.assert_allclose(coeffs_sdca, coeffs_svrg)
def test_dense_and_sparse_match(self): """...Test in SVRG that dense and sparse code matches in all possible settings """ variance_reductions = ['last', 'rand'] rand_types = ['perm', 'unif'] seed = 123 tol = 0. max_iter = 50 n_samples = 500 n_features = 20 # Crazy prox examples proxs = [ ProxTV(strength=1e-2, range=(5, 13), positive=True).astype(self.dtype), ProxElasticNet(strength=1e-2, ratio=0.9).astype(self.dtype), ProxEquality(range=(0, n_features)).astype(self.dtype), ProxL1(strength=1e-3, range=(5, 17)).astype(self.dtype), ProxL1w(strength=1e-3, weights=np.arange(5, 17, dtype=np.double), range=(5, 17)).astype(self.dtype), ] for intercept in [-1, None]: X, y = self.simu_linreg_data(dtype=self.dtype, interc=intercept, n_features=n_features, n_samples=n_samples) fit_intercept = intercept is not None model_dense, model_spars = self.get_dense_and_sparse_linreg_model( X, y, dtype=self.dtype, fit_intercept=fit_intercept) step = 1 / model_spars.get_lip_max() for variance_reduction, rand_type, prox in product( variance_reductions, rand_types, proxs): solver_sparse = SVRG(step=step, tol=tol, max_iter=max_iter, verbose=False, variance_reduction=variance_reduction, rand_type=rand_type, seed=seed) solver_sparse.set_model(model_spars).set_prox(prox) solver_dense = SVRG(step=step, tol=tol, max_iter=max_iter, verbose=False, variance_reduction=variance_reduction, rand_type=rand_type, seed=seed) solver_dense.set_model(model_dense).set_prox(prox) solver_sparse.solve() solver_dense.solve() places = 7 if self.dtype is "float32": places = 3 np.testing.assert_array_almost_equal(solver_sparse.solution, solver_dense.solution, decimal=places)
def test_ProxL1(self): """...Test of ProxL1 """ coeffs = self.coeffs.copy().astype(self.dtype) l_l1 = 3e-2 t = 1.7 prox = ProxL1(l_l1).astype(self.dtype) thresh = t * l_l1 out = np.sign(coeffs) * (np.abs(coeffs) - thresh) \ * (np.abs(coeffs) > thresh) self.assertAlmostEqual( prox.value(coeffs), l_l1 * np.abs(coeffs).sum(), delta=self.delta) assert_almost_equal(prox.call(coeffs, step=t), out, decimal=10) prox = ProxL1(l_l1, (3, 8)).astype(self.dtype) thresh = t * l_l1 sub_coeffs = coeffs[3:8] out = coeffs.copy() out[3:8] = np.sign(sub_coeffs) \ * (np.abs(sub_coeffs) - thresh) \ * (np.abs(sub_coeffs) > thresh) self.assertAlmostEqual( prox.value(coeffs), l_l1 * np.abs(coeffs[3:8]).sum(), delta=self.delta) assert_almost_equal(prox.call(coeffs, step=t), out, decimal=10) prox = ProxL1(l_l1, (3, 8), positive=True).astype(self.dtype) thresh = t * l_l1 sub_coeffs = coeffs[3:8] out = coeffs.copy() out[3:8] = np.sign(sub_coeffs) * (np.abs(sub_coeffs) - thresh) \ * (np.abs(sub_coeffs) > thresh) idx = out[3:8] < 0 out[3:8][idx] = 0 self.assertAlmostEqual( prox.value(coeffs), l_l1 * np.abs(coeffs[3:8]).sum(), delta=self.delta) assert_almost_equal(prox.call(coeffs, step=t), out, decimal=10)
def test_solver_gfb(self): """...Check GFB's solver for a Logistic Regression with ElasticNet penalization Notes ----- Using GFB solver with l1 and l2 penalizations is obviously a bad idea as ElasticNet prox is meant to do this, but it allows us to compare with another algorithm. """ n_samples = 200 n_features = 10 y, X, w, c = TestSolver.generate_logistic_data(n_features=n_features, n_samples=n_samples, dtype=self.dtype) strength = 1e-3 ratio = 0.3 prox_elasticnet = ProxElasticNet(strength, ratio).astype(self.dtype) prox_l1 = ProxL1(strength * ratio).astype(self.dtype) prox_l2 = ProxL2Sq(strength * (1 - ratio)).astype(self.dtype) # First we get GFB solution with prox l1 and prox l2 gfb = GFB(tol=1e-13, max_iter=1000, verbose=False, step=1) TestSolver.prepare_solver(gfb, X, y, prox=None) gfb.set_prox([prox_l1, prox_l2]) gfb_solution = gfb.solve() # Then we get AGD solution with prox ElasticNet agd = AGD(tol=1e-13, max_iter=1000, verbose=False, step=0.5, linesearch=False) TestSolver.prepare_solver(agd, X, y, prox=prox_elasticnet) agd_solution = agd.solve() # Finally we assert that both algorithms lead to the same solution np.testing.assert_almost_equal(gfb_solution, agd_solution, decimal=1)
def test_serializing_solvers(self): """...Test serialization of solvers """ ratio = 0.5 l_enet = 1e-2 sd = ratio * l_enet solvers = [ AdaGrad(step=1e-3, max_iter=100, verbose=False, tol=0), SGD(step=1e-3, max_iter=100, verbose=False, tol=0), SDCA(l_l2sq=sd, max_iter=100, verbose=False, tol=0), SAGA(step=1e-3, max_iter=100, verbose=False, tol=0), SVRG(step=1e-3, max_iter=100, verbose=False, tol=0) ] model_map = { ModelLinReg: SimuLinReg, ModelLogReg: SimuLogReg, ModelPoisReg: SimuPoisReg, ModelHinge: SimuLogReg, ModelQuadraticHinge: SimuLogReg, ModelSmoothedHinge: SimuLogReg, ModelAbsoluteRegression: SimuLinReg, ModelEpsilonInsensitive: SimuLinReg, ModelHuber: SimuLinReg, ModelLinRegWithIntercepts: SimuLinReg, ModelModifiedHuber: SimuLogReg } for solver in solvers: for mod in model_map: np.random.seed(12) n_samples, n_features = 100, 5 w0 = np.random.randn(n_features) intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30) c0 = None X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False, seed=2038).simulate() if mod == ModelLinRegWithIntercepts: y += intercept0 model = mod(fit_intercept=False).fit(X, y) prox = ProxL1(2.) solver.set_model(model) solver.set_prox(prox) pickled = pickle.loads(pickle.dumps(solver)) self.assertTrue(solver._solver.compare(pickled._solver)) self.assertTrue( solver.model._model.compare(pickled.model._model)) self.assertTrue(solver.prox._prox.compare(pickled.prox._prox)) if mod == ModelLinRegWithIntercepts: test_vector = np.hstack((X[0], np.ones(n_samples))) self.assertEqual(model.loss(test_vector), solver.model.loss(test_vector)) else: self.assertEqual(model.loss(X[0]), solver.model.loss(X[0]))
def __init__(self, decay, n_threads=1, approx_type='smooth', decay_neg=100.0, gamma=1000.0, epsilon=1e-3, hawkes_penalty='l1', hawkes_base_C=1e3, hawkes_adj_C=1e3, noise_penalty='l2', noise_C=1e4, solver='sgd', n_chunks=10, max_iter=1000, tol=1e-5, step_z=1e-2, step_theta=1e-2, verbose=True, print_every=100, record_every=100): self.decay = decay if solver not in self._available_solvers: raise ValueError("``solver`` must be one of [%s], got %s" % (', '.join(self._available_solvers), solver)) self.solver = solver self.n_chunks = n_chunks self.max_iter = max_iter self.tol = tol self._n_iter_done = 0 if callable(step_z): self.step_z = step_z else: self.step_z = lambda t: step_z if callable(step_theta): self.step_theta = step_theta else: self.step_theta = lambda t: step_theta assert noise_penalty == 'l2' assert hawkes_penalty == 'l1' self.noise_C = noise_C self.hawkes_base_C = hawkes_base_C self.hawkes_adj_C = hawkes_adj_C self.prox_noise = ProxL2Sq(strength=1 / self.noise_C) self.prox_base = ProxL2Sq(strength=1 / self.hawkes_base_C) self.prox_adj = ProxL1(strength=1 / self.hawkes_adj_C) self.model_obj = ModelHawkesExpKernCondLogLikSyncNoise( decay=decay, n_threads=n_threads, approx_type=approx_type, decay_neg=decay_neg, gamma=gamma, epsilon=epsilon) self._fitted = False self._verbose = verbose self._print_every = print_every self._record_every = record_every self._init_monitor()
import numpy as np import matplotlib.pyplot as plt from tick.prox import ProxL1 x = 0.5 * np.random.randn(50) a, b = x.min() - 1e-1, x.max() + 1e-1 proxs = [ ProxL1(strength=0.), ProxL1(strength=3e-1), ProxL1(strength=3e-1, range=(10, 40)), ProxL1(strength=3e-1, positive=True), ProxL1(strength=3e-1, range=(10, 40), positive=True), ] names = [ "original vector", "prox", "prox with range=(10, 40)", "prox with positive=True", "range=(10, 40) and positive=True", ] _, ax_list = plt.subplots(1, 5, figsize=(20, 4), sharey=True) for prox, name, ax in zip(proxs, names, ax_list): ax.stem(prox.call(x)) ax.set_title(name) ax.set_xlim((-1, 51)) ax.set_ylim((a, b))
import numpy as np import matplotlib.pyplot as plt from tick.prox import ProxL1, ProxTV, ProxMulti s = 0.4 prox = ProxMulti( proxs=( ProxTV(strength=s, range=(0, 20)), ProxL1(strength=2 * s, range=(20, 50)) ) ) x = np.random.randn(50) a, b = x.min() - 1e-1, x.max() + 1e-1 plt.figure(figsize=(8, 4)) plt.subplot(1, 2, 1) plt.stem(x) plt.title("original vector", fontsize=16) plt.xlim((-1, 51)) plt.ylim((a, b)) plt.subplot(1, 2, 2) plt.stem(prox.call(x)) plt.title("ProxMulti: TV and L1", fontsize=16) plt.xlim((-1, 51)) plt.ylim((a, b)) plt.vlines(20, a, b, linestyles='dashed') plt.tight_layout() plt.show()
import numpy as np import matplotlib.pyplot as plt from tick.prox import ProxL1, ProxElasticNet, ProxL2Sq, \ ProxPositive, ProxSlope, ProxTV, ProxZero, ProxBinarsity, ProxGroupL1, \ ProxEquality, ProxL1w np.random.seed(12) x = np.random.randn(50) a, b = x.min() - 1e-1, x.max() + 1e-1 s = 0.4 proxs = [ ProxZero(), ProxPositive(), ProxL2Sq(strength=s), ProxL1(strength=s), ProxElasticNet(strength=s, ratio=0.5), ProxSlope(strength=s), ProxTV(strength=s), ProxEquality(range=(25, 40)), ProxL1w(strength=s, weights=0.1 * np.arange(50, dtype=np.double)), ProxGroupL1(strength=2 * s, blocks_start=np.arange(0, 50, 10), blocks_length=10 * np.ones((5, ))), ProxBinarsity(strength=s, blocks_start=np.arange(0, 50, 10), blocks_length=10 * np.ones((5, ))) ] fig, _ = plt.subplots(3, 4, figsize=(16, 12), sharey=True, sharex=True) fig.axes[0].stem(x)