def test_SimuPoisReg(self): """...Test simulation of a Poisson Regression """ n_samples = 10 n_features = 3 idx = np.arange(n_features) weights = np.exp(-idx / 10.) weights[::2] *= -1 seed = 123 simu = SimuPoisReg(weights, None, n_samples=n_samples, seed=seed, verbose=False) X, y = simu.simulate() X_truth = np.array([[1.4912667, 0.80881799, 0.26977298], [1.23227551, 0.50697013, 1.9409132], [1.8891494, 1.49834791, 2.41445794], [0.19431319, 0.80245126, 1.02577552], [-1.61687582, -1.08411865, -0.83438387], [2.30419894, -0.68987056, -0.39750262], [-0.28826405, -1.23635074, -0.76124386], [-1.32869473, -1.8752391, -0.182537], [0.79464218, 0.65055633, 1.57572506], [0.71524202, 1.66759831, 0.88679047]]) y_truth = np.array([0., 0., 0., 0., 5., 0., 0., 1., 0., 2.]) np.testing.assert_array_almost_equal(X_truth, X) np.testing.assert_array_almost_equal(y_truth, y)
def test_sdca_identity_poisreg(self): """...Test SDCA on specific case of Poisson regression with indentity link """ l_l2sq = 1e-3 n_samples = 10000 n_features = 3 np.random.seed(123) weight0 = np.random.rand(n_features) features = np.random.rand(n_samples, n_features) for intercept in [None, 0.45]: if intercept is None: fit_intercept = False else: fit_intercept = True simu = SimuPoisReg(weight0, intercept=intercept, features=features, n_samples=n_samples, link='identity', verbose=False) features, labels = simu.simulate() model = ModelPoisReg(fit_intercept=fit_intercept, link='identity') model.fit(features, labels) sdca = SDCA(l_l2sq=l_l2sq, max_iter=100, verbose=False, tol=1e-14, seed=Test.sto_seed) sdca.set_model(model).set_prox(ProxZero()) start_dual = np.sqrt(sdca._rand_max * l_l2sq) start_dual = start_dual * np.ones(sdca._rand_max) sdca.solve(start_dual) # Check that duality gap is 0 self.assertAlmostEqual(sdca.objective(sdca.solution), sdca.dual_objective(sdca.dual_solution)) # Check that original vector is approximatively retrieved if fit_intercept: original_coeffs = np.hstack((weight0, intercept)) else: original_coeffs = weight0 np.testing.assert_array_almost_equal(original_coeffs, sdca.solution, decimal=1) # Ensure that we solve the same problem as other solvers svrg = SVRG(max_iter=100, verbose=False, tol=1e-14, seed=Test.sto_seed) svrg.set_model(model).set_prox(ProxL2Sq(l_l2sq)) svrg.solve(0.5 * np.ones(model.n_coeffs), step=1e-2) np.testing.assert_array_almost_equal(svrg.solution, sdca.solution, decimal=4)
def test_ModelPoisReg(self): """...Numerical consistency check of loss and gradient for Poisson Regression """ np.random.seed(12) n_samples, n_features = 100, 10 w0 = np.random.randn(n_features) / n_features c0 = np.random.randn() / n_features # First check with intercept X, y = SimuPoisReg(w0, c0, n_samples=n_samples, verbose=False, seed=1234).simulate() # Rescale features since ModelPoisReg with exponential link # (default) is not overflow proof X /= n_features X_spars = csr_matrix(X) model = ModelPoisReg(fit_intercept=True).fit(X, y) model_sparse = ModelPoisReg(fit_intercept=True).fit(X_spars, y) self.run_test_for_glm(model, model_sparse, 1e-3, 1e-4) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuPoisReg(w0, None, n_samples=n_samples, verbose=False, seed=1234).simulate() X /= n_features X_spars = csr_matrix(X) model = ModelPoisReg(fit_intercept=False).fit(X, y) model_sparse = ModelPoisReg(fit_intercept=False).fit(X_spars, y) self.run_test_for_glm(model, model_sparse, 1e-3, 1e-4) self._test_glm_intercept_vs_hardcoded_intercept(model) # Test the self-concordance constant n_samples, n_features = 5, 2 X = np.zeros((n_samples, n_features)) X_spars = csr_matrix(X) y = np.array([0, 0, 3, 2, 5], dtype=np.double) model = ModelPoisReg(fit_intercept=True, link="identity").fit(X, y) model_sparse = ModelPoisReg(fit_intercept=True, link="identity").fit(X_spars, y) self.assertAlmostEqual(model._sc_constant, 1.41421356237) self.assertAlmostEqual(model_sparse._sc_constant, 1.41421356237) y = np.array([0, 0, 3, 2, 1], dtype=np.double) model.fit(X, y) model_sparse.fit(X_spars, y) self.assertAlmostEqual(model._sc_constant, 2.) self.assertAlmostEqual(model_sparse._sc_constant, 2.)
def get_train_data(n_samples=2000, n_features=20, fit_intercept=True): np.random.seed(123) weights0 = weights_sparse_gauss(n_features, nnz=2) if fit_intercept: intercept0 = 1. else: intercept0 = None X, y = SimuPoisReg(weights0, intercept0, n_samples=n_samples, seed=123, verbose=False).simulate() return X, y, weights0, intercept0
def create_model(model_type, n_samples, n_features, with_intercept=True): weights = np.random.randn(n_features) intercept = None if with_intercept: intercept = np.random.normal() if model_type == 'Poisson': # we need to rescale features to avoid overflows weights /= n_features if intercept is not None: intercept /= n_features if model_type == 'Linear': simulator = SimuLinReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Logistic': simulator = SimuLogReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Poisson': simulator = SimuPoisReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) labels, features = simulator.simulate() if model_type == 'Linear': model = ModelLinReg(fit_intercept=with_intercept) elif model_type == 'Logistic': model = ModelLogReg(fit_intercept=with_intercept) elif model_type == 'Poisson': model = ModelPoisReg(fit_intercept=with_intercept) model.fit(labels, features) return model
def check_solver(self, solver, fit_intercept=True, model='logreg', decimal=1): """Check solver instance finds same parameters as scipy BFGS Parameters ---------- solver : `Solver` Instance of a solver to be tested fit_intercept : `bool`, default=True Model uses intercept is `True` model : 'linreg' | 'logreg' | 'poisreg', default='logreg' Name of the model used to test the solver decimal : `int`, default=1 Number of decimals required for the test """ # Set seed for data simulation np.random.seed(12) n_samples = TestSolver.n_samples n_features = TestSolver.n_features coeffs0 = weights_sparse_gauss(n_features, nnz=5) if fit_intercept: interc0 = 2. else: interc0 = None if model == 'linreg': X, y = SimuLinReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'logreg': X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'poisreg': X, y = SimuPoisReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() # Rescale features to avoid overflows in Poisson simulations X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1) model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y) else: raise ValueError("``model`` must be either 'linreg', 'logreg' or" " 'poisreg'") solver.set_model(model) strength = 1e-2 prox = ProxL2Sq(strength, (0, model.n_features)) if type(solver) is not SDCA: solver.set_prox(prox) else: solver.set_prox(ProxZero()) solver.l_l2sq = strength coeffs_solver = solver.solve() # Compare with BFGS bfgs = BFGS(max_iter=100, verbose=False).set_model(model).set_prox(prox) coeffs_bfgs = bfgs.solve() np.testing.assert_almost_equal(coeffs_solver, coeffs_bfgs, decimal=decimal) # We ensure that reached coeffs are not equal to zero self.assertGreater(norm(coeffs_solver), 0) self.assertAlmostEqual(solver.objective(coeffs_bfgs), solver.objective(coeffs_solver), delta=1e-2)
""" import numpy as np import matplotlib.pyplot as plt from tick.simulation import SimuPoisReg, weights_sparse_gauss from tick.inference import PoissonRegression from tick.plot import plot_history n_samples = 50000 n_features = 100 np.random.seed(123) weight0 = weights_sparse_gauss(n_features, nnz=int(n_features-1)) / 20. intercept0 = -0.1 X, y = SimuPoisReg(weight0, intercept0, n_samples=n_samples, verbose=False, seed=123).simulate() opts = {'verbose': False, 'record_every': 1, 'tol': 1e-8, 'max_iter': 40} poisson_regressions = [ PoissonRegression(solver='gd', **opts), PoissonRegression(solver='agd', **opts), PoissonRegression(solver='svrg', random_state=1234, **opts), PoissonRegression(solver='bfgs', **opts) ] for poisson_regression in poisson_regressions: poisson_regression.fit(X, y) plot_history(poisson_regressions, log_scale=True, dist_min=True) plt.title('Solvers comparison for Poisson regression', fontsize=16)
from tick.simulation import SimuLinReg, SimuLogReg, SimuPoisReg n_samples, n_features = 150, 2 weights0 = np.array([0.3, 1.2]) intercept0 = 0.5 simu_linreg = SimuLinReg(weights0, intercept0, n_samples=n_samples, seed=123) X_linreg, y_linreg = simu_linreg.simulate() simu_logreg = SimuLogReg(weights0, intercept0, n_samples=n_samples, seed=123) X_logreg, y_logreg = simu_logreg.simulate() simu_poisreg = SimuPoisReg(weights0, intercept0, n_samples=n_samples, link='exponential', seed=123) X_poisreg, y_poisreg = simu_poisreg.simulate() plt.figure(figsize=(12, 3)) plt.subplot(1, 3, 1) plt.scatter(*X_linreg.T, c=y_linreg, cmap='RdBu') plt.colorbar() plt.title('Linear', fontsize=16) plt.subplot(1, 3, 2) plt.scatter(*X_logreg[y_logreg == 1].T, color='b', s=10, label=r'$y_i=1$') plt.scatter(*X_logreg[y_logreg == -1].T, color='r', s=10, label=r'$y_i=-1$') plt.legend(loc='upper left')