예제 #1
0
    def test_SimuPoisReg(self):
        """...Test simulation of a Poisson Regression
        """
        n_samples = 10
        n_features = 3
        idx = np.arange(n_features)

        weights = np.exp(-idx / 10.)
        weights[::2] *= -1
        seed = 123
        simu = SimuPoisReg(weights,
                           None,
                           n_samples=n_samples,
                           seed=seed,
                           verbose=False)
        X, y = simu.simulate()

        X_truth = np.array([[1.4912667, 0.80881799, 0.26977298],
                            [1.23227551, 0.50697013, 1.9409132],
                            [1.8891494, 1.49834791, 2.41445794],
                            [0.19431319, 0.80245126, 1.02577552],
                            [-1.61687582, -1.08411865, -0.83438387],
                            [2.30419894, -0.68987056, -0.39750262],
                            [-0.28826405, -1.23635074, -0.76124386],
                            [-1.32869473, -1.8752391, -0.182537],
                            [0.79464218, 0.65055633, 1.57572506],
                            [0.71524202, 1.66759831, 0.88679047]])

        y_truth = np.array([0., 0., 0., 0., 5., 0., 0., 1., 0., 2.])

        np.testing.assert_array_almost_equal(X_truth, X)
        np.testing.assert_array_almost_equal(y_truth, y)
예제 #2
0
    def test_sdca_identity_poisreg(self):
        """...Test SDCA on specific case of Poisson regression with
        indentity link
        """
        l_l2sq = 1e-3
        n_samples = 10000
        n_features = 3

        np.random.seed(123)
        weight0 = np.random.rand(n_features)
        features = np.random.rand(n_samples, n_features)

        for intercept in [None, 0.45]:
            if intercept is None:
                fit_intercept = False
            else:
                fit_intercept = True

            simu = SimuPoisReg(weight0, intercept=intercept, features=features,
                               n_samples=n_samples, link='identity',
                               verbose=False)
            features, labels = simu.simulate()

            model = ModelPoisReg(fit_intercept=fit_intercept, link='identity')
            model.fit(features, labels)

            sdca = SDCA(l_l2sq=l_l2sq, max_iter=100, verbose=False, tol=1e-14,
                        seed=Test.sto_seed)

            sdca.set_model(model).set_prox(ProxZero())
            start_dual = np.sqrt(sdca._rand_max * l_l2sq)
            start_dual = start_dual * np.ones(sdca._rand_max)

            sdca.solve(start_dual)

            # Check that duality gap is 0
            self.assertAlmostEqual(
                sdca.objective(sdca.solution),
                sdca.dual_objective(sdca.dual_solution))

            # Check that original vector is approximatively retrieved
            if fit_intercept:
                original_coeffs = np.hstack((weight0, intercept))
            else:
                original_coeffs = weight0

            np.testing.assert_array_almost_equal(original_coeffs,
                                                 sdca.solution, decimal=1)

            # Ensure that we solve the same problem as other solvers
            svrg = SVRG(max_iter=100, verbose=False, tol=1e-14,
                        seed=Test.sto_seed)

            svrg.set_model(model).set_prox(ProxL2Sq(l_l2sq))
            svrg.solve(0.5 * np.ones(model.n_coeffs), step=1e-2)
            np.testing.assert_array_almost_equal(svrg.solution, sdca.solution,
                                                 decimal=4)
예제 #3
0
    def test_ModelPoisReg(self):
        """...Numerical consistency check of loss and gradient for Poisson
        Regression
        """

        np.random.seed(12)
        n_samples, n_features = 100, 10
        w0 = np.random.randn(n_features) / n_features
        c0 = np.random.randn() / n_features

        # First check with intercept
        X, y = SimuPoisReg(w0,
                           c0,
                           n_samples=n_samples,
                           verbose=False,
                           seed=1234).simulate()
        # Rescale features since ModelPoisReg with exponential link
        #   (default) is not overflow proof
        X /= n_features
        X_spars = csr_matrix(X)
        model = ModelPoisReg(fit_intercept=True).fit(X, y)
        model_sparse = ModelPoisReg(fit_intercept=True).fit(X_spars, y)
        self.run_test_for_glm(model, model_sparse, 1e-3, 1e-4)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Then check without intercept
        X, y = SimuPoisReg(w0,
                           None,
                           n_samples=n_samples,
                           verbose=False,
                           seed=1234).simulate()
        X /= n_features
        X_spars = csr_matrix(X)
        model = ModelPoisReg(fit_intercept=False).fit(X, y)
        model_sparse = ModelPoisReg(fit_intercept=False).fit(X_spars, y)
        self.run_test_for_glm(model, model_sparse, 1e-3, 1e-4)
        self._test_glm_intercept_vs_hardcoded_intercept(model)

        # Test the self-concordance constant
        n_samples, n_features = 5, 2
        X = np.zeros((n_samples, n_features))
        X_spars = csr_matrix(X)
        y = np.array([0, 0, 3, 2, 5], dtype=np.double)
        model = ModelPoisReg(fit_intercept=True, link="identity").fit(X, y)
        model_sparse = ModelPoisReg(fit_intercept=True,
                                    link="identity").fit(X_spars, y)
        self.assertAlmostEqual(model._sc_constant, 1.41421356237)
        self.assertAlmostEqual(model_sparse._sc_constant, 1.41421356237)
        y = np.array([0, 0, 3, 2, 1], dtype=np.double)
        model.fit(X, y)
        model_sparse.fit(X_spars, y)
        self.assertAlmostEqual(model._sc_constant, 2.)
        self.assertAlmostEqual(model_sparse._sc_constant, 2.)
 def get_train_data(n_samples=2000, n_features=20, fit_intercept=True):
     np.random.seed(123)
     weights0 = weights_sparse_gauss(n_features, nnz=2)
     if fit_intercept:
         intercept0 = 1.
     else:
         intercept0 = None
     X, y = SimuPoisReg(weights0, intercept0, n_samples=n_samples, seed=123,
                        verbose=False).simulate()
     return X, y, weights0, intercept0
예제 #5
0
def create_model(model_type, n_samples, n_features, with_intercept=True):
    weights = np.random.randn(n_features)
    intercept = None
    if with_intercept:
        intercept = np.random.normal()

    if model_type == 'Poisson':
        # we need to rescale features to avoid overflows
        weights /= n_features
        if intercept is not None:
            intercept /= n_features

    if model_type == 'Linear':
        simulator = SimuLinReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Logistic':
        simulator = SimuLogReg(weights,
                               intercept=intercept,
                               n_samples=n_samples,
                               verbose=False)
    elif model_type == 'Poisson':
        simulator = SimuPoisReg(weights,
                                intercept=intercept,
                                n_samples=n_samples,
                                verbose=False)

    labels, features = simulator.simulate()

    if model_type == 'Linear':
        model = ModelLinReg(fit_intercept=with_intercept)
    elif model_type == 'Logistic':
        model = ModelLogReg(fit_intercept=with_intercept)
    elif model_type == 'Poisson':
        model = ModelPoisReg(fit_intercept=with_intercept)

    model.fit(labels, features)
    return model
예제 #6
0
                         intercept0,
                         n_samples=n_samples,
                         seed=123,
                         verbose=False)
X_linreg, y_linreg = simu_linreg.simulate()

simu_logreg = SimuLogReg(weights0,
                         intercept0,
                         n_samples=n_samples,
                         seed=123,
                         verbose=False)
X_logreg, y_logreg = simu_logreg.simulate()

simu_poisreg = SimuPoisReg(weights0,
                           intercept0,
                           n_samples=n_samples,
                           link='exponential',
                           seed=123,
                           verbose=False)
X_poisreg, y_poisreg = simu_poisreg.simulate()

plt.figure(figsize=(12, 3))

plt.subplot(1, 3, 1)
plt.scatter(*X_linreg.T, c=y_linreg, cmap='RdBu')
plt.colorbar()
plt.title('Linear', fontsize=16)

plt.subplot(1, 3, 2)
plt.scatter(*X_logreg[y_logreg == 1].T, color='b', s=10, label=r'$y_i=1$')
plt.scatter(*X_logreg[y_logreg == -1].T, color='r', s=10, label=r'$y_i=-1$')
plt.legend(loc='upper left')
예제 #7
0
"""

import numpy as np
import matplotlib.pyplot as plt

from tick.simulation import weights_sparse_gauss
from tick.linear_model import SimuPoisReg, PoissonRegression
from tick.plot import plot_history


n_samples = 50000
n_features = 100
np.random.seed(123)
weight0 = weights_sparse_gauss(n_features, nnz=int(n_features-1)) / 20.
intercept0 = -0.1
X, y = SimuPoisReg(weight0, intercept0, n_samples=n_samples,
                   verbose=False, seed=123).simulate()

opts = {'verbose': False, 'record_every': 1, 'tol': 1e-8, 'max_iter': 40}

poisson_regressions = [
    PoissonRegression(solver='gd', **opts),
    PoissonRegression(solver='agd', **opts),
    PoissonRegression(solver='svrg', random_state=1234, **opts),
    PoissonRegression(solver='bfgs', **opts)
]

for poisson_regression in poisson_regressions:
    poisson_regression.fit(X, y)

plot_history(poisson_regressions, log_scale=True, dist_min=True)
plt.title('Solvers comparison for Poisson regression', fontsize=16)
예제 #8
0
파일: solver.py 프로젝트: rafael-glima/tick
    def check_solver(self,
                     solver,
                     fit_intercept=True,
                     model='logreg',
                     decimal=1):
        """Check solver instance finds same parameters as scipy BFGS

        Parameters
        ----------
        solver : `Solver`
            Instance of a solver to be tested

        fit_intercept : `bool`, default=True
            Model uses intercept is `True`

        model : 'linreg' | 'logreg' | 'poisreg', default='logreg'
            Name of the model used to test the solver

        decimal : `int`, default=1
            Number of decimals required for the test
        """
        # Set seed for data simulation
        np.random.seed(12)
        n_samples = TestSolver.n_samples
        n_features = TestSolver.n_features

        coeffs0 = weights_sparse_gauss(n_features, nnz=5)
        if fit_intercept:
            interc0 = 2.
        else:
            interc0 = None

        if model == 'linreg':
            X, y = SimuLinReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLinReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'logreg':
            X, y = SimuLogReg(coeffs0,
                              interc0,
                              n_samples=n_samples,
                              verbose=False,
                              seed=123).simulate()
            model = ModelLogReg(fit_intercept=fit_intercept).fit(X, y)
        elif model == 'poisreg':
            X, y = SimuPoisReg(coeffs0,
                               interc0,
                               n_samples=n_samples,
                               verbose=False,
                               seed=123).simulate()
            # Rescale features to avoid overflows in Poisson simulations
            X /= np.linalg.norm(X, axis=1).reshape(n_samples, 1)
            model = ModelPoisReg(fit_intercept=fit_intercept).fit(X, y)
        else:
            raise ValueError("``model`` must be either 'linreg', 'logreg' or"
                             " 'poisreg'")

        solver.set_model(model)

        strength = 1e-2
        prox = ProxL2Sq(strength, (0, model.n_features))

        if type(solver) is not SDCA:
            solver.set_prox(prox)
        else:
            solver.set_prox(ProxZero())
            solver.l_l2sq = strength

        coeffs_solver = solver.solve()
        # Compare with BFGS
        bfgs = BFGS(max_iter=100,
                    verbose=False).set_model(model).set_prox(prox)
        coeffs_bfgs = bfgs.solve()
        np.testing.assert_almost_equal(coeffs_solver,
                                       coeffs_bfgs,
                                       decimal=decimal)

        # We ensure that reached coeffs are not equal to zero
        self.assertGreater(norm(coeffs_solver), 0)

        self.assertAlmostEqual(solver.objective(coeffs_bfgs),
                               solver.objective(coeffs_solver),
                               delta=1e-2)