def test_ModelEpsilonInsensitive(self): """...Numerical consistency check of loss and gradient for Epsilon-Insensitive model """ np.random.seed(12) n_samples, n_features = 5000, 10 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False).simulate() X_spars = csr_matrix(X) model = ModelEpsilonInsensitive(fit_intercept=True, threshold=1.13).fit(X, y) model_spars = ModelEpsilonInsensitive(fit_intercept=True, threshold=1.13).fit(X_spars, y) self.run_test_for_glm(model, model_spars, 1e-5, 1e-3) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuLinReg(w0, None, n_samples=n_samples, verbose=False, seed=2038).simulate() X_spars = csr_matrix(X) model = ModelEpsilonInsensitive(fit_intercept=False).fit(X, y) model_spars = ModelEpsilonInsensitive(fit_intercept=False).fit( X_spars, y) self.run_test_for_glm(model, model_spars, 1e-5, 1e-3)
def test_SimuLinReg(self): """...Test simulation of a Linear Regression """ n_samples = 10 n_features = 3 idx = np.arange(n_features) weights = np.exp(-idx / 10.) weights[::2] *= -1 seed = 123 simu = SimuLinReg(weights, None, n_samples=n_samples, seed=seed, verbose=False) X, y = simu.simulate() X_truth = np.array([[1.4912667, 0.80881799, 0.26977298], [ 1.23227551, 0.50697013, 1.9409132 ], [1.8891494, 1.49834791, 2.41445794], [0.19431319, 0.80245126, 1.02577552], [ -1.61687582, -1.08411865, -0.83438387 ], [2.30419894, -0.68987056, -0.39750262], [-0.28826405, -1.23635074, -0.76124386], [-1.32869473, -1.8752391, -0.182537], [0.79464218, 0.65055633, 1.57572506], [0.71524202, 1.66759831, 0.88679047]]) y_truth = np.array([ -1.23590872, -5.1612244, -4.28171221, -1.00793646, 2.24652287, -2.7766077, -0.20433269, 0.46957959, -2.37562537, 0.35124802 ]) np.testing.assert_array_almost_equal(X_truth, X) np.testing.assert_array_almost_equal(y_truth, y)
def test_ModelLinReg(self): """...Numerical consistency check of loss and gradient for Linear Regression """ np.random.seed(12) n_samples, n_features = 5000, 10 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False).simulate() X_spars = csr_matrix(X) model = ModelLinReg(fit_intercept=True).fit(X, y) model_spars = ModelLinReg(fit_intercept=True).fit(X_spars, y) self.run_test_for_glm(model, model_spars, 1e-5, 1e-4) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuLinReg(w0, None, n_samples=n_samples, verbose=False, seed=2038).simulate() X_spars = csr_matrix(X) model = ModelLinReg(fit_intercept=False).fit(X, y) model_spars = ModelLinReg(fit_intercept=False).fit(X_spars, y) self.run_test_for_glm(model, model_spars, 1e-5, 1e-4) self._test_glm_intercept_vs_hardcoded_intercept(model) # Test for the Lipschitz constants without intercept self.assertAlmostEqual(model.get_lip_best(), 2.6873683857125981) self.assertAlmostEqual(model.get_lip_mean(), 9.95845726788432) self.assertAlmostEqual(model.get_lip_max(), 54.82616964855237) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean()) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max()) # Test for the Lipschitz constants with intercept model = ModelLinReg(fit_intercept=True).fit(X, y) model_spars = ModelLinReg(fit_intercept=True).fit(X_spars, y) self.assertAlmostEqual(model.get_lip_best(), 2.687568385712598) self.assertAlmostEqual(model.get_lip_mean(), 10.958457267884327) self.assertAlmostEqual(model.get_lip_max(), 55.82616964855237) self.assertAlmostEqual(model_spars.get_lip_mean(), model.get_lip_mean()) self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())
def test_ModelLinRegWithInterceptsWithoutGlobalIntercept(self): """...Numerical consistency check of loss and gradient for linear regression with sample intercepts and no global intercept """ np.random.seed(12) n_samples, n_features = 200, 5 w0 = np.random.randn(n_features) intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30) c0 = None X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False, seed=2038).simulate() # Add gross outliers to the labels y += intercept0 X_spars = csr_matrix(X) model = ModelLinRegWithIntercepts(fit_intercept=True).fit(X, y) model_spars = ModelLinRegWithIntercepts(fit_intercept=True) \ .fit(X_spars, y) self.run_test_for_glm(model, model_spars, 1e-4, 1e-4) self.assertAlmostEqual(model.get_lip_mean(), 7.324960325598536) self.assertAlmostEqual(model.get_lip_max(), 31.277118951892113) self.assertAlmostEqual(model.get_lip_mean(), model_spars.get_lip_mean()) self.assertAlmostEqual(model.get_lip_max(), model_spars.get_lip_max()) self.assertAlmostEqual(model.get_lip_best(), 2.7267793249045438)
def get_train_data(n_samples=2000, n_features=20, fit_intercept=True): np.random.seed(12) weights0 = weights_sparse_gauss(n_features) if fit_intercept: intercept0 = -1. else: intercept0 = None X, y = SimuLinReg(weights0, intercept0, n_samples=n_samples, verbose=False).simulate() return X, y, weights0, intercept0
def create_model(model_type, n_samples, n_features, with_intercept=True): weights = np.random.randn(n_features) intercept = None if with_intercept: intercept = np.random.normal() if model_type == 'Poisson': # we need to rescale features to avoid overflows weights /= n_features if intercept is not None: intercept /= n_features if model_type == 'Linear': simulator = SimuLinReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Logistic': simulator = SimuLogReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Poisson': simulator = SimuPoisReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) labels, features = simulator.simulate() if model_type == 'Linear': model = ModelLinReg(fit_intercept=with_intercept) elif model_type == 'Logistic': model = ModelLogReg(fit_intercept=with_intercept) elif model_type == 'Poisson': model = ModelPoisReg(fit_intercept=with_intercept) model.fit(labels, features) return model
def test_ModelEpsilonInsensitive_threshold(self): np.random.seed(12) n_samples, n_features = 5000, 10 w0 = np.random.randn(n_features) c0 = np.random.randn() # First check with intercept X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False).simulate() model = ModelEpsilonInsensitive(threshold=1.541).fit(X, y) self.assertEqual(model._model.get_threshold(), 1.541) model.threshold = 3.14 self.assertEqual(model._model.get_threshold(), 3.14) msg = '^threshold must be > 0$' with self.assertRaisesRegex(RuntimeError, msg): model = ModelEpsilonInsensitive(threshold=-1).fit(X, y) with self.assertRaisesRegex(RuntimeError, msg): model.threshold = 0.
def test_ModelLinRegWithInterceptsWithGlobalInterceptExtras(self): """...Extra tests for linear regression with sample intercepts and global intercept, check gradient wrt homemade gradient """ np.random.seed(12) n_samples, n_features = 200, 5 w0 = np.random.randn(n_features) intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30) c0 = -1. X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False, seed=2038).simulate() # Add gross outliers to the labels y += intercept0 model = ModelLinRegWithIntercepts(fit_intercept=True).fit(X, y) coeffs = np.random.randn(model.n_coeffs) grad1 = model.grad(coeffs) X2 = np.hstack((X, np.ones((n_samples, 1)), np.identity(n_samples))) grad2 = X2.T.dot(X2.dot(coeffs) - y) / n_samples np.testing.assert_almost_equal(grad1, grad2, decimal=10)
def test_set_model_and_set_prox(self): np.random.seed(12) n_samples = TestSolver.n_samples n_features = TestSolver.n_features weights0 = weights_sparse_gauss(n_features, nnz=5) interc0 = 2. model = ModelLinReg() msg = '^Passed object ModelLinReg has not been fitted. You must call' \ ' ``fit`` on it before passing it to ``set_model``$' with self.assertRaisesRegex(ValueError, msg): for solver_class in self.solvers: if solver_class is SDCA: solver = solver_class(l_l2sq=1e-1) else: solver = solver_class() solver.set_model(model) X, y = SimuLinReg(weights0, interc0, n_samples=n_samples, verbose=False, seed=123, dtype=self.dtype).simulate() prox = ProxL2Sq(strength=1e-1) msg = '^Passed object of class ProxL2Sq is not a Model class$' with self.assertRaisesRegex(ValueError, msg): for solver_class in self.solvers: if solver_class is SDCA: solver = solver_class(l_l2sq=1e-1) else: solver = solver_class() solver.set_model(prox) model.fit(X, y) msg = '^Passed object of class ModelLinReg is not a Prox class$' with self.assertRaisesRegex(ValueError, msg): for solver_class in self.solvers: if solver_class is SDCA: solver = solver_class(l_l2sq=1e-1) else: solver = solver_class() solver.set_model(model).set_prox(model)
def test_robust_model_serialization(self): """...Test serialization of robust models """ model_map = { ModelAbsoluteRegression: SimuLinReg, ModelEpsilonInsensitive: SimuLinReg, ModelHuber: SimuLinReg, ModelLinRegWithIntercepts: SimuLinReg, ModelModifiedHuber: SimuLogReg } for mod in model_map: np.random.seed(12) n_samples, n_features = 100, 5 w0 = np.random.randn(n_features) intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30) c0 = None X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False, seed=2038).simulate() if mod == ModelLinRegWithIntercepts: y += intercept0 model = mod(fit_intercept=False).fit(X, y) pickled = pickle.loads(pickle.dumps(model)) self.assertTrue(model._model.compare(pickled._model)) if mod == ModelLinRegWithIntercepts: test_vector = np.hstack((X[0], np.ones(n_samples))) self.assertEqual(model.loss(test_vector), pickled.loss(test_vector)) else: self.assertEqual(model.loss(X[0]), pickled.loss(X[0]))
def test_serializing_solvers(self): """...Test serialization of solvers """ ratio = 0.5 l_enet = 1e-2 sd = ratio * l_enet solvers = [ AdaGrad(step=1e-3, max_iter=100, verbose=False, tol=0), SGD(step=1e-3, max_iter=100, verbose=False, tol=0), SDCA(l_l2sq=sd, max_iter=100, verbose=False, tol=0), SAGA(step=1e-3, max_iter=100, verbose=False, tol=0), SVRG(step=1e-3, max_iter=100, verbose=False, tol=0) ] model_map = { ModelLinReg: SimuLinReg, ModelLogReg: SimuLogReg, ModelPoisReg: SimuPoisReg, ModelHinge: SimuLogReg, ModelQuadraticHinge: SimuLogReg, ModelSmoothedHinge: SimuLogReg, ModelAbsoluteRegression: SimuLinReg, ModelEpsilonInsensitive: SimuLinReg, ModelHuber: SimuLinReg, ModelLinRegWithIntercepts: SimuLinReg, ModelModifiedHuber: SimuLogReg } for solver in solvers: for mod in model_map: np.random.seed(12) n_samples, n_features = 100, 5 w0 = np.random.randn(n_features) intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30) c0 = None X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False, seed=2038).simulate() if mod == ModelLinRegWithIntercepts: y += intercept0 model = mod(fit_intercept=False).fit(X, y) prox = ProxL1(2.) solver.set_model(model) solver.set_prox(prox) pickled = pickle.loads(pickle.dumps(solver)) self.assertTrue(solver._solver.compare(pickled._solver)) self.assertTrue( solver.model._model.compare(pickled.model._model)) self.assertTrue(solver.prox._prox.compare(pickled.prox._prox)) if mod == ModelLinRegWithIntercepts: test_vector = np.hstack((X[0], np.ones(n_samples))) self.assertEqual(model.loss(test_vector), solver.model.loss(test_vector)) else: self.assertEqual(model.loss(X[0]), solver.model.loss(X[0]))
Generates Linear, Logistic and Poisson regression realizations given a weight vector. """ import matplotlib.pyplot as plt import numpy as np from tick.linear_model import SimuLinReg, SimuLogReg, SimuPoisReg n_samples, n_features = 150, 2 weights0 = np.array([0.3, 1.2]) intercept0 = 0.5 simu_linreg = SimuLinReg(weights0, intercept0, n_samples=n_samples, seed=123, verbose=False) X_linreg, y_linreg = simu_linreg.simulate() simu_logreg = SimuLogReg(weights0, intercept0, n_samples=n_samples, seed=123, verbose=False) X_logreg, y_logreg = simu_logreg.simulate() simu_poisreg = SimuPoisReg(weights0, intercept0, n_samples=n_samples, link='exponential',
def check_solver(self, solver, fit_intercept=True, model='logreg', decimal=1): """Check solver instance finds same parameters as scipy BFGS Parameters ---------- solver : `Solver` Instance of a solver to be tested fit_intercept : `bool`, default=True Model uses intercept is `True` model : 'linreg' | 'logreg' | 'poisreg', default='logreg' Name of the model used to test the solver decimal : `int`, default=1 Number of decimals required for the test """ # Set seed for data simulation np.random.seed(12) n_samples = TestSolver.n_samples n_features = TestSolver.n_features coeffs0 = weights_sparse_gauss(n_features, nnz=5) if fit_intercept: interc0 = 2. else: interc0 = None if model == 'linreg': X, y = SimuLinReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'logreg': X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'poisreg': X, y = SimuPoisReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() # Rescale features to avoid overflows in Poisson simulations X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1) model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y) else: raise ValueError("``model`` must be either 'linreg', 'logreg' or" " 'poisreg'") solver.set_model(model) strength = 1e-2 prox = ProxL2Sq(strength, (0, model.n_features)) if type(solver) is not SDCA: solver.set_prox(prox) else: solver.set_prox(ProxZero()) solver.l_l2sq = strength coeffs_solver = solver.solve() # Compare with BFGS bfgs = BFGS(max_iter=100, verbose=False).set_model(model).set_prox(prox) coeffs_bfgs = bfgs.solve() np.testing.assert_almost_equal(coeffs_solver, coeffs_bfgs, decimal=decimal) # We ensure that reached coeffs are not equal to zero self.assertGreater(norm(coeffs_solver), 0) self.assertAlmostEqual(solver.objective(coeffs_bfgs), solver.objective(coeffs_solver), delta=1e-2)