예제 #1
0
    def test_ProxElasticNet(self):
        """...Test of ProxElasticNet
        """
        coeffs = self.coeffs.copy()

        l_enet = 3e-2
        ratio = .3
        t = 1.7
        prox_enet = ProxElasticNet(l_enet, ratio=ratio)
        prox_l1 = ProxL1(ratio * l_enet)
        prox_l2 = ProxL2Sq((1 - ratio) * l_enet)

        self.assertAlmostEqual(
            prox_enet.value(coeffs),
            prox_l1.value(coeffs) + prox_l2.value(coeffs), delta=1e-15)

        out = coeffs.copy()
        prox_l1.call(out, t, out)
        prox_l2.call(out, t, out)
        assert_almost_equal(prox_enet.call(coeffs, step=t), out, decimal=10)

        prox_enet = ProxElasticNet(l_enet, ratio=ratio, positive=True)
        prox_l1 = ProxL1(ratio * l_enet, positive=True)
        prox_l2 = ProxL2Sq((1 - ratio) * l_enet, positive=True)

        self.assertAlmostEqual(
            prox_enet.value(coeffs),
            prox_l1.value(coeffs) + prox_l2.value(coeffs), delta=1e-15)

        out = coeffs.copy()
        prox_l1.call(out, t, out)
        prox_l2.call(out, t, out)
        assert_almost_equal(prox_enet.call(coeffs, step=t), out, decimal=10)
예제 #2
0
    def test_ProxL2Sq(self):
        """...Test of ProxL2Sq
        """
        coeffs = self.coeffs.copy().astype(self.dtype)

        l_l2sq = 3e-2
        t = 1.7

        prox = ProxL2Sq(l_l2sq).astype(self.dtype)
        out = coeffs.copy()
        out *= 1. / (1. + t * l_l2sq)
        self.assertAlmostEqual(prox.value(coeffs),
                               0.5 * l_l2sq * norm(coeffs)**2.,
                               delta=self.delta)
        assert_almost_equal(prox.call(coeffs, step=t),
                            out,
                            decimal=self.decimal_places)

        prox = ProxL2Sq(l_l2sq, (3, 8)).astype(self.dtype)
        out = coeffs.copy()
        out[3:8] *= 1. / (1. + t * l_l2sq)
        self.assertAlmostEqual(prox.value(coeffs),
                               0.5 * l_l2sq * norm(coeffs[3:8])**2.,
                               delta=self.delta)
        assert_almost_equal(prox.call(coeffs, step=t),
                            out,
                            decimal=self.decimal_places)

        prox = ProxL2Sq(l_l2sq, (3, 8), positive=True).astype(self.dtype)
        out = coeffs.copy()
        out[3:8] *= 1. / (1. + t * l_l2sq)
        idx = out[3:8] < 0
        out[3:8][idx] = 0
        self.assertAlmostEqual(prox.value(coeffs),
                               0.5 * l_l2sq * norm(coeffs[3:8])**2.,
                               delta=self.delta)
        assert_almost_equal(prox.call(coeffs, step=t),
                            out,
                            decimal=self.decimal_places)

        prox = ProxL2Sq(l_l2sq, (3, 8)).astype(self.dtype)
        out = coeffs.copy()
        t = np.linspace(1, 10, 5).astype(self.dtype)
        out[3:8] *= 1. / (1. + t * l_l2sq)
        self.assertAlmostEqual(prox.value(coeffs),
                               0.5 * l_l2sq * norm(coeffs[3:8])**2.,
                               delta=self.delta)
        assert_almost_equal(prox.call(coeffs, t),
                            out,
                            decimal=self.decimal_places)
예제 #3
0
    def _test_solver_astype_consistency(self, create_solver):
        # Launch this test only once
        if self.dtype != 'float64':
            return

        prox = ProxL2Sq(0.1)

        use_intercept = True
        y_64, X_64, coeffs0_64, interc0 = self.generate_logistic_data(
            100, 30, 'float64', use_intercept)

        model_64 = ModelLogReg(fit_intercept=use_intercept)
        model_64.fit(X_64, y_64)
        solver_64 = create_solver()
        solver_64.set_model(model_64).set_prox(prox)
        solution_64 = solver_64.solve()

        solver_32 = solver_64.astype('float32')
        solution_32 = solver_32.solve()

        self.assertEqual(solution_64.dtype, 'float64')
        self.assertEqual(solution_32.dtype, 'float32')

        np.testing.assert_array_almost_equal(solution_32,
                                             solution_64,
                                             decimal=3)
예제 #4
0
    def test_sdca_identity_poisreg(self):
        """...Test SDCA on specific case of Poisson regression with
        indentity link
        """
        l_l2sq = 1e-3
        n_samples = 10000
        n_features = 3

        np.random.seed(123)
        weight0 = np.random.rand(n_features)
        features = np.random.rand(n_samples, n_features)

        for intercept in [None, 0.45]:
            if intercept is None:
                fit_intercept = False
            else:
                fit_intercept = True

            simu = SimuPoisReg(weight0, intercept=intercept, features=features,
                               n_samples=n_samples, link='identity',
                               verbose=False)
            features, labels = simu.simulate()

            model = ModelPoisReg(fit_intercept=fit_intercept, link='identity')
            model.fit(features, labels)

            sdca = SDCA(l_l2sq=l_l2sq, max_iter=100, verbose=False, tol=1e-14,
                        seed=Test.sto_seed)

            sdca.set_model(model).set_prox(ProxZero())
            start_dual = np.sqrt(sdca._rand_max * l_l2sq)
            start_dual = start_dual * np.ones(sdca._rand_max)

            sdca.solve(start_dual)

            # Check that duality gap is 0
            self.assertAlmostEqual(
                sdca.objective(sdca.solution),
                sdca.dual_objective(sdca.dual_solution))

            # Check that original vector is approximatively retrieved
            if fit_intercept:
                original_coeffs = np.hstack((weight0, intercept))
            else:
                original_coeffs = weight0

            np.testing.assert_array_almost_equal(original_coeffs,
                                                 sdca.solution, decimal=1)

            # Ensure that we solve the same problem as other solvers
            svrg = SVRG(max_iter=100, verbose=False, tol=1e-14,
                        seed=Test.sto_seed)

            svrg.set_model(model).set_prox(ProxL2Sq(l_l2sq))
            svrg.solve(0.5 * np.ones(model.n_coeffs), step=1e-2)
            np.testing.assert_array_almost_equal(svrg.solution, sdca.solution,
                                                 decimal=4)
예제 #5
0
def run_solvers(model, l_l2sq):
    try:
        svrg_step = 1. / model.get_lip_max()
    except AttributeError:
        svrg_step = 1e-3
    try:
        gd_step = 1. / model.get_lip_best()
    except AttributeError:
        gd_step = 1e-1

    bfgs = BFGS(verbose=False, tol=1e-13)
    bfgs.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    bfgs.solve()
    bfgs.history.set_minimizer(bfgs.solution)
    bfgs.history.set_minimum(bfgs.objective(bfgs.solution))
    bfgs.solve()

    svrg = SVRG(step=svrg_step, verbose=False, tol=1e-10, seed=seed)
    svrg.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    svrg.history.set_minimizer(bfgs.solution)
    svrg.history.set_minimum(bfgs.objective(bfgs.solution))
    svrg.solve()

    sdca = SDCA(l_l2sq, verbose=False, seed=seed, tol=1e-10)
    sdca.set_model(model).set_prox(ProxZero())
    sdca.history.set_minimizer(bfgs.solution)
    sdca.history.set_minimum(bfgs.objective(bfgs.solution))
    sdca.solve()

    gd = GD(verbose=False, tol=1e-10, step=gd_step, linesearch=False)
    gd.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    gd.history.set_minimizer(bfgs.solution)
    gd.history.set_minimum(bfgs.objective(bfgs.solution))
    gd.solve()

    agd = AGD(verbose=False, tol=1e-10, step=gd_step, linesearch=False)
    agd.set_model(model).set_prox(ProxL2Sq(l_l2sq))
    agd.history.set_minimizer(bfgs.solution)
    agd.history.set_minimum(bfgs.objective(bfgs.solution))
    agd.solve()

    return bfgs, svrg, sdca, gd, agd
예제 #6
0
파일: solver.py 프로젝트: thomasdes/tick
 def prepare_solver(solver, X, y, fit_intercept=True, model="logistic",
                    prox="l2"):
     if model == "logistic":
         model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
     elif model == "poisson":
         model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
     solver.set_model(model)
     if prox == "l2":
         l_l2sq = TestSolver.l_l2sq
         prox = ProxL2Sq(l_l2sq, (0, model.n_coeffs))
     if prox is not None:
         solver.set_prox(prox)
예제 #7
0
파일: bfgs_test.py 프로젝트: thomasdes/tick
 def test_solver_bfgs(self):
     """...Check BFGS solver for Logistic Regression with Ridge
     penalization
     """
     # It is the reference solver used in other unittests so we check that
     # it's actually close to the true parameter of the simulated dataset
     np.random.seed(12)
     n_samples = 3000
     n_features = 10
     coeffs0 = weights_sparse_gauss(n_features, nnz=5)
     interc0 = 2.
     X, y = SimuLogReg(coeffs0, interc0, n_samples=n_samples,
                       verbose=False).simulate()
     model = ModelLogReg(fit_intercept=True).fit(X, y)
     prox = ProxL2Sq(strength=1e-6)
     solver = BFGS(max_iter=100, print_every=1, verbose=False,
                   tol=1e-6).set_model(model).set_prox(prox)
     coeffs = solver.solve()
     err = Test.evaluate_model(coeffs, coeffs0, interc0)
     self.assertAlmostEqual(err, 0., delta=5e-1)
예제 #8
0
    def test_set_model_and_set_prox(self):
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features
        weights0 = weights_sparse_gauss(n_features, nnz=5)
        interc0 = 2.
        model = ModelLinReg()
        msg = '^Passed object ModelLinReg has not been fitted. You must call' \
              ' ``fit`` on it before passing it to ``set_model``$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(model)

        X, y = SimuLinReg(weights0,
                          interc0,
                          n_samples=n_samples,
                          verbose=False,
                          seed=123,
                          dtype=self.dtype).simulate()
        prox = ProxL2Sq(strength=1e-1)
        msg = '^Passed object of class ProxL2Sq is not a Model class$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(prox)
        model.fit(X, y)
        msg = '^Passed object of class ModelLinReg is not a Prox class$'
        with self.assertRaisesRegex(ValueError, msg):
            for solver_class in self.solvers:
                if solver_class is SDCA:
                    solver = solver_class(l_l2sq=1e-1)
                else:
                    solver = solver_class()
                solver.set_model(model).set_prox(model)
예제 #9
0
    def test_solver_scpg(self):
        """...Check Self-concordant proximal gradient solver for a Hawkes
        model with ridge penalization
        """
        beta = 3
        betas = beta * np.ones((2, 2))

        alphas = np.zeros((2, 2))

        alphas[0, 0] = 1
        alphas[0, 1] = 2
        alphas[1, 1] = 3

        mus = np.arange(1, 3) / 3

        hawkes = SimuHawkesExpKernels(adjacency=alphas,
                                      decays=betas,
                                      baseline=mus,
                                      seed=1231,
                                      end_time=20000,
                                      verbose=False)
        hawkes.adjust_spectral_radius(0.8)
        alphas = hawkes.adjacency

        hawkes.simulate()
        timestamps = hawkes.timestamps

        model = ModelHawkesExpKernLogLik(beta).fit(timestamps)
        prox = ProxL2Sq(1e-7, positive=True)
        pg = SCPG(max_iter=2000, tol=1e-10, verbose=False,
                  step=1e-5).set_model(model).set_prox(prox)

        pg.solve(np.ones(model.n_coeffs))

        original_coeffs = np.hstack((mus, alphas.reshape(4)))
        np.testing.assert_array_almost_equal(pg.solution,
                                             original_coeffs,
                                             decimal=2)
예제 #10
0
    def test_solver_gfb(self):
        """...Check GFB's solver for a Logistic Regression with ElasticNet
        penalization

        Notes
        -----
        Using GFB solver with l1 and l2 penalizations is obviously a bad
        idea as ElasticNet prox is meant to do this, but it allows us to
        compare with another algorithm.
        """
        n_samples = 200
        n_features = 10
        y, X, w, c = TestSolver.generate_logistic_data(n_features=n_features,
                                                       n_samples=n_samples,
                                                       dtype=self.dtype)
        strength = 1e-3
        ratio = 0.3
        prox_elasticnet = ProxElasticNet(strength, ratio).astype(self.dtype)
        prox_l1 = ProxL1(strength * ratio).astype(self.dtype)
        prox_l2 = ProxL2Sq(strength * (1 - ratio)).astype(self.dtype)

        # First we get GFB solution with prox l1 and prox l2
        gfb = GFB(tol=1e-13, max_iter=1000, verbose=False, step=1)
        TestSolver.prepare_solver(gfb, X, y, prox=None)
        gfb.set_prox([prox_l1, prox_l2])
        gfb_solution = gfb.solve()

        # Then we get AGD solution with prox ElasticNet
        agd = AGD(tol=1e-13,
                  max_iter=1000,
                  verbose=False,
                  step=0.5,
                  linesearch=False)
        TestSolver.prepare_solver(agd, X, y, prox=prox_elasticnet)
        agd_solution = agd.solve()

        # Finally we assert that both algorithms lead to the same solution
        np.testing.assert_almost_equal(gfb_solution, agd_solution, decimal=1)
예제 #11
0
    def __init__(self,
                 decay,
                 n_threads=1,
                 approx_type='smooth',
                 decay_neg=100.0,
                 gamma=1000.0,
                 epsilon=1e-3,
                 hawkes_penalty='l1',
                 hawkes_base_C=1e3,
                 hawkes_adj_C=1e3,
                 noise_penalty='l2',
                 noise_C=1e4,
                 solver='sgd',
                 n_chunks=10,
                 max_iter=1000,
                 tol=1e-5,
                 step_z=1e-2,
                 step_theta=1e-2,
                 verbose=True,
                 print_every=100,
                 record_every=100):
        self.decay = decay

        if solver not in self._available_solvers:
            raise ValueError("``solver`` must be one of [%s], got %s" %
                             (', '.join(self._available_solvers), solver))
        self.solver = solver

        self.n_chunks = n_chunks
        self.max_iter = max_iter
        self.tol = tol
        self._n_iter_done = 0

        if callable(step_z):
            self.step_z = step_z
        else:
            self.step_z = lambda t: step_z

        if callable(step_theta):
            self.step_theta = step_theta
        else:
            self.step_theta = lambda t: step_theta

        assert noise_penalty == 'l2'
        assert hawkes_penalty == 'l1'

        self.noise_C = noise_C
        self.hawkes_base_C = hawkes_base_C
        self.hawkes_adj_C = hawkes_adj_C

        self.prox_noise = ProxL2Sq(strength=1 / self.noise_C)
        self.prox_base = ProxL2Sq(strength=1 / self.hawkes_base_C)
        self.prox_adj = ProxL1(strength=1 / self.hawkes_adj_C)

        self.model_obj = ModelHawkesExpKernCondLogLikSyncNoise(
            decay=decay,
            n_threads=n_threads,
            approx_type=approx_type,
            decay_neg=decay_neg,
            gamma=gamma,
            epsilon=epsilon)

        self._fitted = False

        self._verbose = verbose
        self._print_every = print_every
        self._record_every = record_every
        self._init_monitor()
예제 #12
0
파일: solver.py 프로젝트: rafael-glima/tick
    def check_solver(self,
                     solver,
                     fit_intercept=True,
                     model='logreg',
                     decimal=1):
        """Check solver instance finds same parameters as scipy BFGS

        Parameters
        ----------
        solver : `Solver`
            Instance of a solver to be tested

        fit_intercept : `bool`, default=True
            Model uses intercept is `True`

        model : 'linreg' | 'logreg' | 'poisreg', default='logreg'
            Name of the model used to test the solver

        decimal : `int`, default=1
            Number of decimals required for the test
        """
        # Set seed for data simulation
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features

        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        if fit_intercept:
            interc0 = 2.
        else:
            interc0 = None

        if model == 'linreg':
            X, y = SimuLinReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'logreg':
            X, y = SimuLogReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'poisreg':
            X, y = SimuPoisReg(coeffs0,
                               interc0,
                               n_samples=n_samples,
                               verbose=False,
                               seed=123).simulate()
            # Rescale features to avoid overflows in Poisson simulations
            X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1)
            model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
        else:
            raise ValueError("``model`` must be either 'linreg', 'logreg' or"
                             " 'poisreg'")

        solver.set_model(model)

        strength = 1e-2
        prox = ProxL2Sq(strength, (0, model.n_features))

        if type(solver) is not SDCA:
            solver.set_prox(prox)
        else:
            solver.set_prox(ProxZero())
            solver.l_l2sq = strength

        coeffs_solver = solver.solve()
        # Compare with BFGS
        bfgs = BFGS(max_iter=100,
                    verbose=False).set_model(model).set_prox(prox)
        coeffs_bfgs = bfgs.solve()
        np.testing.assert_almost_equal(coeffs_solver,
                                       coeffs_bfgs,
                                       decimal=decimal)

        # We ensure that reached coeffs are not equal to zero
        self.assertGreater(norm(coeffs_solver), 0)

        self.assertAlmostEqual(solver.objective(coeffs_bfgs),
                               solver.objective(coeffs_solver),
                               delta=1e-2)
예제 #13
0
 def __init__(self, **kwargs):
     PROX.__init__(self, **kwargs)
     TPL2SQ.__init__(self, **kwargs)
     object.__setattr__(self, "_MANGLING", "l2sq")
예제 #14
0
"""
import numpy as np
import matplotlib.pyplot as plt
from tick.prox import ProxL1, ProxElasticNet, ProxL2Sq, \
    ProxPositive, ProxSlope, ProxTV, ProxZero, ProxBinarsity, ProxGroupL1, \
    ProxEquality, ProxL1w

np.random.seed(12)
x = np.random.randn(50)
a, b = x.min() - 1e-1, x.max() + 1e-1
s = 0.4

proxs = [
    ProxZero(),
    ProxPositive(),
    ProxL2Sq(strength=s),
    ProxL1(strength=s),
    ProxElasticNet(strength=s, ratio=0.5),
    ProxSlope(strength=s),
    ProxTV(strength=s),
    ProxEquality(range=(25, 40)),
    ProxL1w(strength=s, weights=0.1 * np.arange(50, dtype=np.double)),
    ProxGroupL1(strength=2 * s,
                blocks_start=np.arange(0, 50, 10),
                blocks_length=10 * np.ones((5, ))),
    ProxBinarsity(strength=s,
                  blocks_start=np.arange(0, 50, 10),
                  blocks_length=10 * np.ones((5, )))
]

fig, _ = plt.subplots(3, 4, figsize=(16, 12), sharey=True, sharex=True)
예제 #15
0
#!/usr/bin/python3
# expect tick first on PYTHONPATH

from tick.array.build.array import tick_double_sparse2d_from_file, tick_double_array_from_file
from tick.prox import ProxL2Sq; from tick.solver import SAGA; from tick.linear_model import ModelLogReg

X = tick_double_sparse2d_from_file("url.features.cereal")
n_samples = X.shape[0]; n_features = X.shape[1]
y = tick_double_array_from_file   ("url.labels.cereal")

model = ModelLogReg(fit_intercept=False).fit(X, y)
prox = ProxL2Sq((1. / n_samples) + 1e-10, range=(0, n_features))
asaga = SAGA(step=0.00257480411965, max_iter=200, tol=1e-10, verbose=False,
            n_threads=8, log_every_n_epochs=10)
asaga.set_model(model).set_prox(prox)
asaga.solve()
asaga.print_history()