def test_ModelEpsilonInsensitive(self):
        """...Numerical consistency check of loss and gradient for
        Epsilon-Insensitive model
        """
        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()

        # First check with intercept
        X, y = SimuLinReg(w0, c0, n_samples=n_samples,
                          verbose=False).simulate()
        X_spars = csr_matrix(X)
        model = ModelEpsilonInsensitive(fit_intercept=True,
                                        threshold=1.13).fit(X, y)
        model_spars = ModelEpsilonInsensitive(fit_intercept=True,
                                              threshold=1.13).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-3)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuLinReg(w0, None, n_samples=n_samples, verbose=False,
                          seed=2038).simulate()
        X_spars = csr_matrix(X)
        model = ModelEpsilonInsensitive(fit_intercept=False).fit(X, y)
        model_spars = ModelEpsilonInsensitive(fit_intercept=False).fit(
            X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-3)
Esempio n. 2
0
    def test_SimuLinReg(self):
        """...Test simulation of a Linear Regression
        """
        n_samples = 10
        n_features = 3
        idx = np.arange(n_features)

        weights = np.exp(-idx / 10.)
        weights[::2] *= -1
        seed = 123
        simu = SimuLinReg(weights, None, n_samples=n_samples, seed=seed,
                          verbose=False)
        X, y = simu.simulate()

        X_truth = np.array([[1.4912667, 0.80881799, 0.26977298], [
            1.23227551, 0.50697013, 1.9409132
        ], [1.8891494, 1.49834791,
            2.41445794], [0.19431319, 0.80245126, 1.02577552], [
                -1.61687582, -1.08411865, -0.83438387
            ], [2.30419894, -0.68987056,
                -0.39750262], [-0.28826405, -1.23635074, -0.76124386],
                            [-1.32869473, -1.8752391,
                             -0.182537], [0.79464218, 0.65055633, 1.57572506],
                            [0.71524202, 1.66759831, 0.88679047]])

        y_truth = np.array([
            -1.23590872, -5.1612244, -4.28171221, -1.00793646, 2.24652287,
            -2.7766077, -0.20433269, 0.46957959, -2.37562537, 0.35124802
        ])

        np.testing.assert_array_almost_equal(X_truth, X)
        np.testing.assert_array_almost_equal(y_truth, y)
Esempio n. 3
0
    def test_ModelLinReg(self):
        """...Numerical consistency check of loss and gradient for Linear
        Regression
        """
        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()

        # First check with intercept
        X, y = SimuLinReg(w0, c0, n_samples=n_samples,
                          verbose=False).simulate()
        X_spars = csr_matrix(X)
        model = ModelLinReg(fit_intercept=True).fit(X, y)
        model_spars = ModelLinReg(fit_intercept=True).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-4)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuLinReg(w0,
                          None,
                          n_samples=n_samples,
                          verbose=False,
                          seed=2038).simulate()
        X_spars = csr_matrix(X)
        model = ModelLinReg(fit_intercept=False).fit(X, y)

        model_spars = ModelLinReg(fit_intercept=False).fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-5, 1e-4)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Test for the Lipschitz constants without intercept
        self.assertAlmostEqual(model.get_lip_best(), 2.6873683857125981)
        self.assertAlmostEqual(model.get_lip_mean(), 9.95845726788432)
        self.assertAlmostEqual(model.get_lip_max(), 54.82616964855237)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean())
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())

        # Test for the Lipschitz constants with intercept
        model = ModelLinReg(fit_intercept=True).fit(X, y)
        model_spars = ModelLinReg(fit_intercept=True).fit(X_spars, y)
        self.assertAlmostEqual(model.get_lip_best(), 2.687568385712598)
        self.assertAlmostEqual(model.get_lip_mean(), 10.958457267884327)
        self.assertAlmostEqual(model.get_lip_max(), 55.82616964855237)
        self.assertAlmostEqual(model_spars.get_lip_mean(),
                               model.get_lip_mean())
        self.assertAlmostEqual(model_spars.get_lip_max(), model.get_lip_max())
Esempio n. 4
0
    def test_ModelLinRegWithInterceptsWithoutGlobalIntercept(self):
        """...Numerical consistency check of loss and gradient for linear
        regression with sample intercepts and no global intercept
        """
        np.random.seed(12)
        n_samples, n_features = 200, 5
        w0 = np.random.randn(n_features)
        intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30)
        c0 = None
        X, y = SimuLinReg(w0,
                          c0,
                          n_samples=n_samples,
                          verbose=False,
                          seed=2038).simulate()
        # Add gross outliers to the labels
        y += intercept0
        X_spars = csr_matrix(X)
        model = ModelLinRegWithIntercepts(fit_intercept=True).fit(X, y)
        model_spars = ModelLinRegWithIntercepts(fit_intercept=True) \
            .fit(X_spars, y)
        self.run_test_for_glm(model, model_spars, 1e-4, 1e-4)

        self.assertAlmostEqual(model.get_lip_mean(), 7.324960325598536)
        self.assertAlmostEqual(model.get_lip_max(), 31.277118951892113)
        self.assertAlmostEqual(model.get_lip_mean(),
                               model_spars.get_lip_mean())
        self.assertAlmostEqual(model.get_lip_max(), model_spars.get_lip_max())
        self.assertAlmostEqual(model.get_lip_best(), 2.7267793249045438)
 def get_train_data(n_samples=2000, n_features=20, fit_intercept=True):
     np.random.seed(12)
     weights0 = weights_sparse_gauss(n_features)
     if fit_intercept:
         intercept0 = -1.
     else:
         intercept0 = None
     X, y = SimuLinReg(weights0, intercept0, n_samples=n_samples,
                       verbose=False).simulate()
     return X, y, weights0, intercept0
Esempio n. 6
0
def create_model(model_type, n_samples, n_features, with_intercept=True):
    weights = np.random.randn(n_features)
    intercept = None
    if with_intercept:
        intercept = np.random.normal()

    if model_type == 'Poisson':
        # we need to rescale features to avoid overflows
        weights /= n_features
        if intercept is not None:
            intercept /= n_features

    if model_type == 'Linear':
        simulator = SimuLinReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Logistic':
        simulator = SimuLogReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Poisson':
        simulator = SimuPoisReg(weights,
                                intercept=intercept,
                                n_samples=n_samples,
                                verbose=False)

    labels, features = simulator.simulate()

    if model_type == 'Linear':
        model = ModelLinReg(fit_intercept=with_intercept)
    elif model_type == 'Logistic':
        model = ModelLogReg(fit_intercept=with_intercept)
    elif model_type == 'Poisson':
        model = ModelPoisReg(fit_intercept=with_intercept)

    model.fit(labels, features)
    return model
    def test_ModelEpsilonInsensitive_threshold(self):
        np.random.seed(12)
        n_samples, n_features = 5000, 10
        w0 = np.random.randn(n_features)
        c0 = np.random.randn()
        # First check with intercept
        X, y = SimuLinReg(w0, c0, n_samples=n_samples,
                          verbose=False).simulate()

        model = ModelEpsilonInsensitive(threshold=1.541).fit(X, y)
        self.assertEqual(model._model.get_threshold(), 1.541)
        model.threshold = 3.14
        self.assertEqual(model._model.get_threshold(), 3.14)

        msg = '^threshold must be > 0$'
        with self.assertRaisesRegex(RuntimeError, msg):
            model = ModelEpsilonInsensitive(threshold=-1).fit(X, y)
        with self.assertRaisesRegex(RuntimeError, msg):
            model.threshold = 0.
Esempio n. 8
0
 def test_ModelLinRegWithInterceptsWithGlobalInterceptExtras(self):
     """...Extra tests for linear regression with sample intercepts and
     global intercept, check gradient wrt homemade gradient
     """
     np.random.seed(12)
     n_samples, n_features = 200, 5
     w0 = np.random.randn(n_features)
     intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30)
     c0 = -1.
     X, y = SimuLinReg(w0, c0, n_samples=n_samples, verbose=False,
                       seed=2038).simulate()
     # Add gross outliers to the labels
     y += intercept0
     model = ModelLinRegWithIntercepts(fit_intercept=True).fit(X, y)
     coeffs = np.random.randn(model.n_coeffs)
     grad1 = model.grad(coeffs)
     X2 = np.hstack((X, np.ones((n_samples, 1)), np.identity(n_samples)))
     grad2 = X2.T.dot(X2.dot(coeffs) - y) / n_samples
     np.testing.assert_almost_equal(grad1, grad2, decimal=10)
Esempio n. 9
0
    def test_set_model_and_set_prox(self):
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features
        weights0 = weights_sparse_gauss(n_features, nnz=5)
        interc0 = 2.
        model = ModelLinReg()
        msg = '^Passed object ModelLinReg has not been fitted. You must call' \
              ' ``fit`` on it before passing it to ``set_model``$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(model)

        X, y = SimuLinReg(weights0,
                          interc0,
                          n_samples=n_samples,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()
        prox = ProxL2Sq(strength=1e-1)
        msg = '^Passed object of class ProxL2Sq is not a Model class$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(prox)
        model.fit(X, y)
        msg = '^Passed object of class ModelLinReg is not a Prox class$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(model).set_prox(model)
Esempio n. 10
0
    def test_robust_model_serialization(self):
        """...Test serialization of robust models
        """
        model_map = {
            ModelAbsoluteRegression: SimuLinReg,
            ModelEpsilonInsensitive: SimuLinReg,
            ModelHuber: SimuLinReg,
            ModelLinRegWithIntercepts: SimuLinReg,
            ModelModifiedHuber: SimuLogReg
        }

        for mod in model_map:
            np.random.seed(12)
            n_samples, n_features = 100, 5
            w0 = np.random.randn(n_features)
            intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples, nnz=30)
            c0 = None
            X, y = SimuLinReg(w0,
                              c0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=2038).simulate()

            if mod == ModelLinRegWithIntercepts:
                y += intercept0

            model = mod(fit_intercept=False).fit(X, y)

            pickled = pickle.loads(pickle.dumps(model))

            self.assertTrue(model._model.compare(pickled._model))

            if mod == ModelLinRegWithIntercepts:
                test_vector = np.hstack((X[0], np.ones(n_samples)))
                self.assertEqual(model.loss(test_vector),
                                 pickled.loss(test_vector))
            else:
                self.assertEqual(model.loss(X[0]), pickled.loss(X[0]))
Esempio n. 11
0
    def test_serializing_solvers(self):
        """...Test serialization of solvers
        """
        ratio = 0.5
        l_enet = 1e-2
        sd = ratio * l_enet

        solvers = [
            AdaGrad(step=1e-3, max_iter=100, verbose=False, tol=0),
            SGD(step=1e-3, max_iter=100, verbose=False, tol=0),
            SDCA(l_l2sq=sd, max_iter=100, verbose=False, tol=0),
            SAGA(step=1e-3, max_iter=100, verbose=False, tol=0),
            SVRG(step=1e-3, max_iter=100, verbose=False, tol=0)
        ]
        model_map = {
            ModelLinReg: SimuLinReg,
            ModelLogReg: SimuLogReg,
            ModelPoisReg: SimuPoisReg,
            ModelHinge: SimuLogReg,
            ModelQuadraticHinge: SimuLogReg,
            ModelSmoothedHinge: SimuLogReg,
            ModelAbsoluteRegression: SimuLinReg,
            ModelEpsilonInsensitive: SimuLinReg,
            ModelHuber: SimuLinReg,
            ModelLinRegWithIntercepts: SimuLinReg,
            ModelModifiedHuber: SimuLogReg
        }

        for solver in solvers:
            for mod in model_map:

                np.random.seed(12)
                n_samples, n_features = 100, 5
                w0 = np.random.randn(n_features)
                intercept0 = 50 * weights_sparse_gauss(n_weights=n_samples,
                                                       nnz=30)
                c0 = None
                X, y = SimuLinReg(w0,
                                  c0,
                                  n_samples=n_samples,
                                  verbose=False,
                                  seed=2038).simulate()

                if mod == ModelLinRegWithIntercepts:
                    y += intercept0

                model = mod(fit_intercept=False).fit(X, y)

                prox = ProxL1(2.)
                solver.set_model(model)
                solver.set_prox(prox)

                pickled = pickle.loads(pickle.dumps(solver))

                self.assertTrue(solver._solver.compare(pickled._solver))

                self.assertTrue(
                    solver.model._model.compare(pickled.model._model))

                self.assertTrue(solver.prox._prox.compare(pickled.prox._prox))

                if mod == ModelLinRegWithIntercepts:
                    test_vector = np.hstack((X[0], np.ones(n_samples)))
                    self.assertEqual(model.loss(test_vector),
                                     solver.model.loss(test_vector))
                else:
                    self.assertEqual(model.loss(X[0]), solver.model.loss(X[0]))
Esempio n. 12
0
Generates Linear, Logistic and Poisson regression realizations given a 
weight vector.
"""

import matplotlib.pyplot as plt
import numpy as np
from tick.linear_model import SimuLinReg, SimuLogReg, SimuPoisReg

n_samples, n_features = 150, 2

weights0 = np.array([0.3, 1.2])
intercept0 = 0.5

simu_linreg = SimuLinReg(weights0,
                         intercept0,
                         n_samples=n_samples,
                         seed=123,
                         verbose=False)
X_linreg, y_linreg = simu_linreg.simulate()

simu_logreg = SimuLogReg(weights0,
                         intercept0,
                         n_samples=n_samples,
                         seed=123,
                         verbose=False)
X_logreg, y_logreg = simu_logreg.simulate()

simu_poisreg = SimuPoisReg(weights0,
                           intercept0,
                           n_samples=n_samples,
                           link='exponential',
Esempio n. 13
0
    def check_solver(self,
                     solver,
                     fit_intercept=True,
                     model='logreg',
                     decimal=1):
        """Check solver instance finds same parameters as scipy BFGS

        Parameters
        ----------
        solver : `Solver`
            Instance of a solver to be tested

        fit_intercept : `bool`, default=True
            Model uses intercept is `True`

        model : 'linreg' | 'logreg' | 'poisreg', default='logreg'
            Name of the model used to test the solver

        decimal : `int`, default=1
            Number of decimals required for the test
        """
        # Set seed for data simulation
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features

        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        if fit_intercept:
            interc0 = 2.
        else:
            interc0 = None

        if model == 'linreg':
            X, y = SimuLinReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'logreg':
            X, y = SimuLogReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'poisreg':
            X, y = SimuPoisReg(coeffs0,
                               interc0,
                               n_samples=n_samples,
                               verbose=False,
                               seed=123).simulate()
            # Rescale features to avoid overflows in Poisson simulations
            X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1)
            model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
        else:
            raise ValueError("``model`` must be either 'linreg', 'logreg' or"
                             " 'poisreg'")

        solver.set_model(model)

        strength = 1e-2
        prox = ProxL2Sq(strength, (0, model.n_features))

        if type(solver) is not SDCA:
            solver.set_prox(prox)
        else:
            solver.set_prox(ProxZero())
            solver.l_l2sq = strength

        coeffs_solver = solver.solve()
        # Compare with BFGS
        bfgs = BFGS(max_iter=100,
                    verbose=False).set_model(model).set_prox(prox)
        coeffs_bfgs = bfgs.solve()
        np.testing.assert_almost_equal(coeffs_solver,
                                       coeffs_bfgs,
                                       decimal=decimal)

        # We ensure that reached coeffs are not equal to zero
        self.assertGreater(norm(coeffs_solver), 0)

        self.assertAlmostEqual(solver.objective(coeffs_bfgs),
                               solver.objective(coeffs_solver),
                               delta=1e-2)