def prepare_solver(solver, X, y, fit_intercept=True, model="logistic", prox="l2"): if model == "logistic": model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y) elif model == "poisson": model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y) solver.set_model(model) if prox == "l2": l_l2sq = TestSolver.l_l2sq prox = ProxL2Sq(l_l2sq, (0, model.n_coeffs)) if prox is not None: solver.set_prox(prox)
def create_model(model_type, n_samples, n_features, with_intercept=True): weights = np.random.randn(n_features) intercept = None if with_intercept: intercept = np.random.normal() if model_type == 'Poisson': # we need to rescale features to avoid overflows weights /= n_features if intercept is not None: intercept /= n_features if model_type == 'Linear': simulator = SimuLinReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Logistic': simulator = SimuLogReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) elif model_type == 'Poisson': simulator = SimuPoisReg(weights, intercept=intercept, n_samples=n_samples, verbose=False) labels, features = simulator.simulate() if model_type == 'Linear': model = ModelLinReg(fit_intercept=with_intercept) elif model_type == 'Logistic': model = ModelLogReg(fit_intercept=with_intercept) elif model_type == 'Poisson': model = ModelPoisReg(fit_intercept=with_intercept) model.fit(labels, features) return model
import matplotlib.pyplot as plt import numpy as np from tick.optim.model import ModelPoisReg n = 1000 x = np.linspace(-1.5, 2, n) models = [ ModelPoisReg(fit_intercept=False, link='exponential'), ModelPoisReg(fit_intercept=False, link='identity') ] labels = ["ModelPoisReg(link='exponential')", "ModelPoisReg(link='identity')"] plt.figure(figsize=(8, 6)) for model, label in zip(models, labels): model.fit(np.array([[1.]]), np.array([1.])) y = [model.loss(np.array([t])) for t in x] plt.plot(x, y, lw=4, label=label) plt.xlabel(r"$y'$", fontsize=16) plt.ylabel(r"$y' \mapsto \ell(1, y')$", fontsize=16) plt.title('Losses for count data', fontsize=20) plt.xticks(fontsize=14) plt.yticks(fontsize=14) plt.legend(fontsize=16) plt.tight_layout()
def check_solver(self, solver, fit_intercept=True, model='logreg', decimal=1): """Check solver instance finds same parameters as scipy BFGS Parameters ---------- solver : `Solver` Instance of a solver to be tested fit_intercept : `bool`, default=True Model uses intercept is `True` model : 'linreg' | 'logreg' | 'poisreg', default='logreg' Name of the model used to test the solver decimal : `int`, default=1 Number of decimals required for the test """ # Set seed for data simulation np.random.seed(12) n_samples = TestSolver.n_samples n_features = TestSolver.n_features coeffs0 = weights_sparse_gauss(n_features, nnz=5) if fit_intercept: interc0 = 2. else: interc0 = None if model == 'linreg': X, y = SimuLinReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'logreg': X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y) elif model == 'poisreg': X, y = SimuPoisReg(coeffs0, interc0, n_samples=n_samples, verbose=False, seed=123).simulate() # Rescale features to avoid overflows in Poisson simulations X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1) model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y) else: raise ValueError("``model`` must be either 'linreg', 'logreg' or" " 'poisreg'") solver.set_model(model) strength = 1e-2 prox = ProxL2Sq(strength, (0, model.n_features)) if type(solver) is not SDCA: solver.set_prox(prox) else: solver.set_prox(ProxZero()) solver.l_l2sq = strength coeffs_solver = solver.solve() # Compare with BFGS bfgs = BFGS(max_iter=100, verbose=False).set_model(model).set_prox(prox) coeffs_bfgs = bfgs.solve() np.testing.assert_almost_equal(coeffs_solver, coeffs_bfgs, decimal=decimal) # We ensure that reached coeffs are not equal to zero self.assertGreater(norm(coeffs_solver), 0) self.assertAlmostEqual(solver.objective(coeffs_bfgs), solver.objective(coeffs_solver), delta=1e-2)
def test_ModelPoisReg(self): """...Numerical consistency check of loss and gradient for Poisson Regression """ np.random.seed(12) n_samples, n_features = 100, 10 w0 = np.random.randn(n_features) / n_features c0 = np.random.randn() / n_features # First check with intercept X, y = SimuPoisReg(w0, c0, n_samples=n_samples, verbose=False, seed=1234).simulate() # Rescale features since ModelPoisReg with exponential link # (default) is not overflow proof X /= n_features X_spars = csr_matrix(X) model = ModelPoisReg(fit_intercept=True).fit(X, y) model_sparse = ModelPoisReg(fit_intercept=True).fit(X_spars, y) self.run_test_for_glm(model, model_sparse, 1e-3, 1e-4) self._test_glm_intercept_vs_hardcoded_intercept(model) # Then check without intercept X, y = SimuPoisReg(w0, None, n_samples=n_samples, verbose=False, seed=1234).simulate() X /= n_features X_spars = csr_matrix(X) model = ModelPoisReg(fit_intercept=False).fit(X, y) model_sparse = ModelPoisReg(fit_intercept=False).fit(X_spars, y) self.run_test_for_glm(model, model_sparse, 1e-3, 1e-4) self._test_glm_intercept_vs_hardcoded_intercept(model) # Test the self-concordance constant n_samples, n_features = 5, 2 X = np.zeros((n_samples, n_features)) X_spars = csr_matrix(X) y = np.array([0, 0, 3, 2, 5], dtype=np.double) model = ModelPoisReg(fit_intercept=True, link="identity").fit(X, y) model_sparse = ModelPoisReg(fit_intercept=True, link="identity").fit(X_spars, y) self.assertAlmostEqual(model._sc_constant, 1.41421356237) self.assertAlmostEqual(model_sparse._sc_constant, 1.41421356237) y = np.array([0, 0, 3, 2, 1], dtype=np.double) model.fit(X, y) model_sparse.fit(X_spars, y) self.assertAlmostEqual(model._sc_constant, 2.) self.assertAlmostEqual(model_sparse._sc_constant, 2.)
def _construct_model_obj(self, fit_intercept=True): return ModelPoisReg(fit_intercept=fit_intercept, link='exponential')