Ejemplo n.º 1
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
Ejemplo n.º 2
0
def test_group_lasso():
    """Group Lasso test."""
    n_samples, n_features = 100, 90

    # assign group ids
    groups = np.zeros(90)
    groups[0:29] = 1
    groups[30:59] = 2
    groups[60:] = 3

    # sample random coefficients
    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)
    beta[groups == 2] = 0.

    # create an instance of the GLM class
    glm_group = GLM(distr='softplus', alpha=1.)

    # simulate training data
    Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
    yr = simulate_glm(glm_group.distr, beta0, beta, Xr)

    # scale and fit
    scaler = StandardScaler().fit(Xr)
    glm_group.fit(scaler.transform(Xr), yr)
Ejemplo n.º 3
0
def test_group_lasso():
    """Group Lasso test."""
    n_samples, n_features = 100, 90

    # assign group ids
    groups = np.zeros(90)
    groups[0:29] = 1
    groups[30:59] = 2
    groups[60:] = 3

    # sample random coefficients
    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)
    beta[groups == 2] = 0.

    # create an instance of the GLM class
    glm_group = GLM(distr='softplus', alpha=1.)

    # simulate training data
    Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
    yr = simulate_glm(glm_group.distr, beta0, beta, Xr)

    # scale and fit
    scaler = StandardScaler().fit(Xr)
    glm_group.fit(scaler.transform(Xr), yr)
Ejemplo n.º 4
0
def glm_bernoulli_pyglmnet(Xr, Yr, Xt):
    #poissonexp isn't listed as an option for distr?
    #glm = GLM(distr='poissonexp', alpha=0., reg_lambda=[0.], tol=1e-6)
    glm = GLM(distr='binomial', alpha=0., reg_lambda=[0.], tol=1e-6)
    glm.fit(Xr, Yr)
    Yt = glm.predict(Xt)[0]
    return Yt
Ejemplo n.º 5
0
def test_tikhonov():
    """Tikhonov regularization test."""
    n_samples, n_features = 100, 10

    # design covariance matrix of parameters
    Gam = 15.
    PriorCov = np.zeros([n_features, n_features])
    for i in np.arange(0, n_features):
        for j in np.arange(i, n_features):
            PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) *
                                    (np.float(i) - np.float(j)) ** 2)
            PriorCov[j, i] = PriorCov[i, j]
            if i == j:
                PriorCov[i, j] += 0.01
    PriorCov = 1. / np.max(PriorCov) * PriorCov

    # sample parameters as multivariate normal
    beta0 = np.random.randn()
    beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov)

    # sample train and test data
    glm_sim = GLM(distr='softplus', score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    from sklearn.cross_validation import train_test_split
    Xtrain, Xtest, ytrain, ytest = \
        train_test_split(X, y, test_size=0.5, random_state=42)

    # design tikhonov matrix
    [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False)
    Tau = np.dot(np.diag(1. / np.sqrt(S)), U)
    Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max()

    # fit model with batch gradient
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='batch-gradient',
                       tol=1e-5,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)

    # fit model with cdfast
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='cdfast',
                       tol=1e-5,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
Ejemplo n.º 6
0
def test_cv():
    """Simple CV check"""
    X, y = make_regression()
    model_mn = GLM(distr='normal', alpha=0.01, reg_lambda=np.array([0.0, 0.1, 0.2]))
    model_mn.fit(X, y)

    cv = KFold(X.shape[0], 5)

    # check that it returns 5 scores
    assert_equal(len(cross_val_score(model_mn, X, y, cv=cv, scoring=simple_cv_scorer)), 5)
Ejemplo n.º 7
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate = 2e-1, tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'],
                                  glm_mn.fit_[0]['beta'],
                                  X)),
                 X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
Ejemplo n.º 8
0
def test_cdfast(distr):
    """Test all functionality related to fast coordinate descent."""
    scaler = StandardScaler()
    n_samples = 1000
    n_features = 100
    n_classes = 5
    density = 0.1

    # Batch gradient not available for gamma
    if distr == 'gamma':
        return

    glm = GLM(distr, solver='cdfast')

    np.random.seed(glm.random_state)

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()[:, 0]
    # data
    X = np.random.normal(0.0, 1.0, [n_samples, n_features])
    X = scaler.fit_transform(X)
    y = simulate_glm(glm.distr, beta0, beta, X)

    # compute grad and hess
    beta_ = np.zeros((n_features + 1,))
    beta_[0] = beta0
    beta_[1:] = beta
    z = beta_[0] + np.dot(X, beta_[1:])
    k = 1
    xk = X[:, k - 1]
    gk, hk = _gradhess_logloss_1d(glm.distr, xk, y, z, glm.eta, glm.theta)

    # test grad and hess
    if distr != 'multinomial':
        assert(np.size(gk) == 1)
        assert(np.size(hk) == 1)
        assert(isinstance(gk, float))
        assert(isinstance(hk, float))
    else:
        assert(gk.shape[0] == n_classes)
        assert(hk.shape[0] == n_classes)
        assert(isinstance(gk, np.ndarray))
        assert(isinstance(hk, np.ndarray))
        assert(gk.ndim == 1)
        assert(hk.ndim == 1)

    # test cdfast
    ActiveSet = np.ones(n_features + 1)
    beta_ret = glm._cdfast(X, y, ActiveSet, beta_, glm.reg_lambda)
    assert(beta_ret.shape == beta_.shape)
    assert(True not in np.isnan(beta_ret))
Ejemplo n.º 9
0
def test_cdfast():
    """Test all functionality related to fast coordinate descent"""
    scaler = StandardScaler()
    n_samples = 1000
    n_features = 100
    n_classes = 5
    density = 0.1

    distrs = ['softplus', 'gaussian', 'binomial', 'poisson', 'probit']
    for distr in distrs:
        glm = GLM(distr, solver='cdfast')

        np.random.seed(glm.random_state)

        # coefficients
        beta0 = np.random.rand()
        beta = sps.rand(n_features, 1, density=density).toarray()[:, 0]
        # data
        X = np.random.normal(0.0, 1.0, [n_samples, n_features])
        X = scaler.fit_transform(X)
        y = simulate_glm(glm.distr, beta0, beta, X)

        # compute grad and hess
        beta_ = np.zeros((n_features + 1,))
        beta_[0] = beta0
        beta_[1:] = beta
        z = beta_[0] + np.dot(X, beta_[1:])
        k = 1
        xk = X[:, k - 1]
        gk, hk = glm._gradhess_logloss_1d(xk, y, z)

        # test grad and hess
        if distr != 'multinomial':
            assert_equal(np.size(gk), 1)
            assert_equal(np.size(hk), 1)
            assert_true(isinstance(gk, float))
            assert_true(isinstance(hk, float))
        else:
            assert_equal(gk.shape[0], n_classes)
            assert_equal(hk.shape[0], n_classes)
            assert_true(isinstance(gk, np.ndarray))
            assert_true(isinstance(hk, np.ndarray))
            assert_equal(gk.ndim, 1)
            assert_equal(hk.ndim, 1)

        # test cdfast
        ActiveSet = np.ones(n_features + 1)
        beta_ret, z_ret = glm._cdfast(X, y, z,
                                      ActiveSet, beta_, glm.reg_lambda)
        assert_equal(beta_ret.shape, beta_.shape)
        assert_equal(z_ret.shape, z.shape)
Ejemplo n.º 10
0
def test_cdfast():
    """Test all functionality related to fast coordinate descent"""
    scaler = StandardScaler()
    n_samples = 1000
    n_features = 100
    n_classes = 5
    density = 0.1

    distrs = ['softplus', 'gaussian', 'binomial', 'poisson', 'probit']
    for distr in distrs:
        glm = GLM(distr, solver='cdfast')

        np.random.seed(glm.random_state)

        # coefficients
        beta0 = np.random.rand()
        beta = sps.rand(n_features, 1, density=density).toarray()[:, 0]
        # data
        X = np.random.normal(0.0, 1.0, [n_samples, n_features])
        X = scaler.fit_transform(X)
        y = simulate_glm(glm.distr, beta0, beta, X)

        # compute grad and hess
        beta_ = np.zeros((n_features + 1,))
        beta_[0] = beta0
        beta_[1:] = beta
        z = beta_[0] + np.dot(X, beta_[1:])
        k = 1
        xk = X[:, k - 1]
        gk, hk = _gradhess_logloss_1d(glm.distr, xk, y, z, glm.eta)

        # test grad and hess
        if distr != 'multinomial':
            assert_equal(np.size(gk), 1)
            assert_equal(np.size(hk), 1)
            assert_true(isinstance(gk, float))
            assert_true(isinstance(hk, float))
        else:
            assert_equal(gk.shape[0], n_classes)
            assert_equal(hk.shape[0], n_classes)
            assert_true(isinstance(gk, np.ndarray))
            assert_true(isinstance(hk, np.ndarray))
            assert_equal(gk.ndim, 1)
            assert_equal(hk.ndim, 1)

        # test cdfast
        ActiveSet = np.ones(n_features + 1)
        beta_ret, z_ret = glm._cdfast(X, y, z,
                                      ActiveSet, beta_, glm.reg_lambda)
        assert_equal(beta_ret.shape, beta_.shape)
        assert_equal(z_ret.shape, z.shape)
Ejemplo n.º 11
0
def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()

    glm_normal = GLM(distr='gaussian', alpha=0.01,
                     reg_lambda=0.1)
    glm_normal.fit(X, y)

    cv = KFold(X.shape[0], 5)

    # check that it returns 5 scores
    assert_equal(len(cross_val_score(glm_normal, X, y, cv=cv,
                 scoring=simple_cv_scorer)), 5)
Ejemplo n.º 12
0
def test_cv():
    """Simple CV check"""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()

    glm_normal = GLM(distr='gaussian', alpha=0.01,
                     reg_lambda=[0.0, 0.1, 0.2])
    glm_normal.fit(X, y)

    cv = KFold(X.shape[0], 5)

    # check that it returns 5 scores
    assert_equal(len(cross_val_score(glm_normal, X, y, cv=cv,
                 scoring=simple_cv_scorer)), 5)
Ejemplo n.º 13
0
def test_pseudoR2():
    """Test pseudo r2."""
    n_samples, n_features = 1000, 100

    beta0 = np.random.rand()
    beta = np.random.normal(0.0, 1.0, n_features)

    # sample train and test data
    glm_sim = GLM(score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert (isinstance(score, float))
Ejemplo n.º 14
0
def test_gradients(distr):
    """Test gradient accuracy."""
    # data
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    X = np.random.normal(0.0, 1.0, [n_samples, n_features])
    X = scaler.fit_transform(X)

    density = 0.1
    beta_ = np.zeros(n_features + 1)
    beta_[0] = np.random.rand()
    beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0]

    reg_lambda = 0.1

    glm = GLM(distr=distr, reg_lambda=reg_lambda)
    y = simulate_glm(glm.distr, beta_[0], beta_[1:], X)

    func = partial(_L2loss, distr, glm.alpha,
                   glm.Tau, reg_lambda, X, y, glm.eta, glm.theta, glm.group)
    grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau,
                   reg_lambda, X, y,
                   glm.eta, glm.theta)
    approx_grad = approx_fprime(beta_, func, 1.5e-8)
    analytical_grad = grad(beta_)
    assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3)
Ejemplo n.º 15
0
def test_deviance():
    """Test deviance."""
    n_samples, n_features = 1000, 100

    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)

    # sample train and test data
    glm_sim = GLM(score_metric='deviance')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert_true(isinstance(score, float))
Ejemplo n.º 16
0
def test_accuracy():
    """Testing accuracy."""
    n_samples, n_features, n_classes = 1000, 100, 2

    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, (n_features, n_classes))

    # sample train and test data
    glm_sim = GLM(distr='binomial', score_metric='accuracy')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)
    y = np.argmax(y, axis=1)
    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert_true(isinstance(score, float))
Ejemplo n.º 17
0
def test_random_state_consistency():
    """Test model's random_state."""
    # Generate the dataset
    n_samples, n_features = 1000, 10

    beta0 = 1. / (np.float(n_features) + 1.) * np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))
    Xtrain = np.random.normal(0.0, 1.0, [n_samples, n_features])

    ytrain = simulate_glm("gaussian", beta0, beta, Xtrain,
                          sample=False, random_state=42)

    # Test simple glm
    glm_a = GLM(distr="gaussian", random_state=1)
    ypred_a = glm_a.fit_predict(Xtrain, ytrain)
    glm_b = GLM(distr="gaussian", random_state=1)
    ypred_b = glm_b.fit_predict(Xtrain, ytrain)
    match = "This glm object has already been fit"
    with pytest.raises(ValueError, match=match):
        ypred_c = glm_b.fit_predict(Xtrain, ytrain)

    # Consistency between two different models
    assert_array_equal(ypred_a, ypred_b)

    # Test also cross-validation
    glm_cv_a = GLMCV(distr="gaussian", cv=3, random_state=1)
    ypred_a = glm_cv_a.fit_predict(Xtrain, ytrain)
    glm_cv_b = GLMCV(distr="gaussian", cv=3, random_state=1)
    ypred_b = glm_cv_b.fit_predict(Xtrain, ytrain)
    ypred_c = glm_cv_b.fit_predict(Xtrain, ytrain)

    assert_array_equal(ypred_a, ypred_b)
    assert_array_equal(ypred_b, ypred_c)
Ejemplo n.º 18
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate = 2e-1, tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred_proba = glm_mn.predict_proba(X)
    assert_equal(y_pred_proba.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred_proba[0]

    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]

    # pseudo_R2 should be greater than 0
    assert_true(glm_mn[-1].score(X, y) > 0.)
    assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'],
                                  glm_mn.fit_[0]['beta'],
                                  X)),
                 X.shape[0])

    # check that score is computed for sliced estimator
    scorelist = glm_mn[-1].score(X, y)
    assert_equal(scorelist.shape[0], 1)

    # check that score is computed for all lambdas
    scorelist = glm_mn.score(X, y)
    assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
Ejemplo n.º 19
0
def test_group_lasso():
    """Group Lasso test."""
    n_samples, n_features = 100, 90

    # assign group ids
    groups = np.zeros(90)
    groups[0:29] = 1
    groups[30:59] = 2
    groups[60:] = 3

    # sample random coefficients
    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)
    beta[groups == 2] = 0.

    # create an instance of the GLM class
    glm_group = GLM(distr='softplus', alpha=1., reg_lambda=0.2, group=groups)

    # simulate training data
    np.random.seed(glm_group.random_state)
    Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
    yr = simulate_glm(glm_group.distr, beta0, beta, Xr)

    # scale and fit
    scaler = StandardScaler().fit(Xr)
    glm_group.fit(scaler.transform(Xr), yr)

    # count number of nonzero coefs for each group.
    # in each group, coef must be [all nonzero] or [all zero].
    beta = glm_group.beta_
    group_ids = np.unique(groups)
    for group_id in group_ids:
        if group_id == 0:
            continue

        target_beta = beta[groups == group_id]
        n_nonzero = (target_beta != 0.0).sum()
        assert n_nonzero in (len(target_beta), 0)

    # one of the groups must be [all zero]
    assert np.any([
        beta[groups == group_id].sum() == 0 for group_id in group_ids
        if group_id != 0
    ])
Ejemplo n.º 20
0
def test_glmnet():
    """Test glmnet."""
    glm = GLM(distr='poisson')
    scaler = StandardScaler()
    n_samples, n_features = 10000, 100
    density = 0.1

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()

    X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
    y_train = glm.simulate(beta0, beta, X_train)

    X_train = scaler.fit_transform(X_train)
    glm.fit(X_train, y_train)

    beta_ = glm.fit_params[-2]['beta'][:]
    assert_allclose(beta[:], beta_, atol=0.1)  # check fit
    density_ = np.sum(beta_ > 0.1) / float(n_features)
    assert_allclose(density_, density, atol=0.02)  # check density
Ejemplo n.º 21
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      solver=solver,
                      score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Ejemplo n.º 22
0
def buildGLM(data, iters=1000):
    '''
	returns GLM model set to run for iters iterations
	'''
    reg_lambda = np.logspace(np.log(1e-6), np.log(1e-6), 100, base=np.exp(1))
    glm_poissonexp = GLM(distr='poisson',
                         verbose=False,
                         alpha=.05,
                         max_iter=iters,
                         learning_rate=2e-1,
                         score_metric='pseudo_R2',
                         reg_lambda=reg_lambda,
                         eta=4.0)
    return glm_poissonexp
Ejemplo n.º 23
0
def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(n_splits=5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert(len(scores) == 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y)
Ejemplo n.º 24
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred_proba = glm_mn.predict_proba(X)
    assert_equal(y_pred_proba.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred_proba[0]

    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]

    # pseudo_R2 should be greater than 0
    assert_true(glm_mn[-1].score(X, y) > 0.)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])

    # check that score is computed for sliced estimator
    scorelist = glm_mn[-1].score(X, y)
    assert_equal(scorelist.shape[0], 1)

    # check that score is computed for all lambdas
    scorelist = glm_mn.score(X, y)
    assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
Ejemplo n.º 25
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict,
                  X_train[None, ...], y_train)
Ejemplo n.º 26
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      solver=solver,
                      score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(X_train, y_train)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='softplus')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Ejemplo n.º 27
0
def scca_solution(x, y, x_omega, y_omega, alpha0, beta0, 
                  alpha_lambda_ratio, beta_lambda_ratio, 
                  alpha_lambda, beta_lambda, niter, eps, glm_impl):
    """ computes one pair of canonical weights
    """
    for idx in range(niter):
        x0 = x_omega @ beta0

        if glm_impl == 'glmnet_python':
            from glmnet_python import glmnet
            lambda_a = np.array([alpha_lambda])
            m_ = glmnet(x=y.copy(), y=x0.copy(), standardize=False, intr=False, 
                        family='gaussian', lambdau=lambda_a, 
                        alpha=alpha_lambda_ratio)
            alpha1 = m_['beta'][:, -1]
        elif glm_impl == 'pyglmnet':
            from pyglmnet import GLM
            alpha1 = GLM(distr='gaussian', alpha=alpha_lambda_ratio, reg_lambda=alpha_lambda,
                         fit_intercept=False).fit(y.copy(), x0.copy()).beta_
        else:
            raise Exception(str(glm_impl) + ' not supported.')

        if np.sum(np.abs(alpha1)) < eps: 
            alpha0 = [0]*y.shape[1]
            break

        idx_nz = np.where(alpha1 != 0)[0]
        alpha1_scale = y[:, idx_nz] @ alpha1[idx_nz]

        alpha1 = alpha1 / np.sqrt(alpha1_scale @ alpha1_scale / (x.shape[0] - 1))

        y0 = y_omega @ alpha1

        if glm_impl == 'glmnet_python':
            from glmnet_python import glmnet
            lambda_b = np.array([beta_lambda])
            m_ = glmnet(x=x.copy(), y=y0.copy(), standardize=False, intr=False, 
                        family='gaussian', lambdau=lambda_b,
                        alpha=beta_lambda_ratio)
            beta1 = m_['beta'][:, -1]
        elif glm_impl == 'pyglmnet':
            from pyglmnet import GLM
            beta1 = GLM(distr='gaussian', alpha=beta_lambda_ratio, reg_lambda=beta_lambda,
                         fit_intercept=False).fit(x.copy(), y0.copy()).beta_
        else:
            raise Exception(str(glm_impl) + ' not supported.')



        if np.sum(np.abs(beta1)) < eps:
            beta0 = [0]*x.shape[1]
            break

        idx_nz = np.where(beta1 != 0)[0]
        beta1_scale = x[:, idx_nz] @ beta1[idx_nz]
        beta1 = beta1 / np.sqrt(beta1_scale @ beta1_scale / (x.shape[0] - 1))

        if (np.sum(np.abs(alpha1 - alpha0)) < eps and 
                np.sum(np.abs(beta1 - beta0)) < eps):
            break

        alpha0 = alpha1
        beta0 = beta1

    return alpha0, beta0
Ejemplo n.º 28
0
########################################################
# Now use GridSearchCV to compare

import numpy as np  # noqa
from sklearn.model_selection import GridSearchCV  # noqa
from sklearn.model_selection import KFold  # noqa

cv = KFold(3)

reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))
param_grid = [{'reg_lambda': reg_lambda}]

glm = GLM(distr='binomial',
          alpha=0.05,
          score_metric='pseudo_R2',
          learning_rate=0.1,
          tol=1e-4,
          verbose=True)
glmcv = GridSearchCV(glm, param_grid, cv=cv)
glmcv.fit(X_train, y_train)

print("test set pseudo $R^2$ = %f" % glmcv.score(X_test, y_test))

########################################################
# Plot the true and predicted test set target values

plt.plot(y_test[:50], 'ko-')
plt.plot(y_test_hat[:50], 'ro-')
plt.legend(['true', 'pred'], frameon=False)
plt.xlabel('Counties')
plt.ylabel('Per capita violent crime')
Ejemplo n.º 29
0
# %%
import statsmodels.api as sm


mod = sm.GLM(df['cnt'] / df['offset'], df[np.arange(10)], family=sm.families.Poisson())

mod = mod.fit()

mod.summary()

# %%
from pyglmnet import GLM

# create an instance of the GLM class
glm = GLM(distr='poisson')
glm = glm.fit(df[np.arange(10)].values, df['cnt'].values/df['offset'].values)
glm

# %%
glm.get_params()

# %%
import keras

inl = keras.layers.Input((10,))
out = keras.layers.Dense(1, use_bias=False)(inl)
out = keras.layers.Lambda(lambda x: keras.backend.exp(x))(out)
model = keras.models.Model(inl, out)

model.compile(keras.optimizers.Adam(1e-3), 'poisson')
Ejemplo n.º 30
0
from sklearn.preprocessing import StandardScaler

########################################################
# Import ``GLM`` class from ``pyglmnet``

########################################################

# import GLM model
from pyglmnet import GLM

# create regularization parameters for model
reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))
glm_poissonexp = GLM(distr='poissonexp',
                     verbose=False,
                     alpha=0.05,
                     max_iter=1000,
                     learning_rate=1e-5,
                     reg_lambda=reg_lambda,
                     eta=4.0)

########################################################
#
#
# .. math::
#
#     J = \sum_i \lambda_i - y_i \log \lambda_i
#
# where
#
# .. math::
#
Ejemplo n.º 31
0
from copy import deepcopy
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

########################################################
# Import ``GLM`` class from ``pyglmnet``

########################################################

# import GLM model
from pyglmnet import GLM

# create regularization parameters for model
reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))
glm_poissonexp = GLM(distr='poissonexp', verbose=False, alpha=0.05,
            max_iter=1000, learning_rate=1e-5,
            reg_lambda=reg_lambda, eta=4.0)

########################################################
#
#
# .. math::
#
#     J = \sum_i \lambda_i - y_i \log \lambda_i
#
# where
#
# .. math::
#
#     \lambda_i =
#     \begin{cases}
Ejemplo n.º 32
0
# separating the dependent variables from the independent variables
X, y = build_Xy(df,'att128')

n_features = X.shape[1]
n_samples = X.shape[0]
#print n_samples
#print n_features


#Splitting the training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.5,test_size = 0.5, random_state=0)

# Defining the model
reg_lambda = np.logspace(np.log(2.0), np.log(0.01), 100, base=np.exp(1))
model = GLM(distr='poisson', verbose=False, alpha=0.05,
           max_iter=1000, learning_rate=0.001,
           reg_lambda=reg_lambda, eta=10.0)
# Best values for the parameters for R2s:
# R2r:	0.714862
# 	alpha: 			0.052
# 	learning rate: 	1e-3
# R2t:	0.674098
#	alpha:			0.052
#	learning rate:	1e-3

print 'alpha: ', model.alpha
print 'learning rate: ', model.learning_rate


#model.threshold = 1e-5
Ejemplo n.º 33
0
#     default: 100

########################################################

########################################################
# Import ``GLM`` class from ``pyglmnet``

########################################################

# import GLM model
from pyglmnet import GLM

# create regularize parameters for model
reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))
glm_poisson = GLM(distr='poisson', verbose=False, alpha=0.05,
            max_iter=1000, learning_rate=1e-4,
            reg_lambda=reg_lambda)

##########################################################
# Simulate a dataset
# ------------------
# The ``GLM`` class has a very useful method called ``simulate()``.
#
# Since a canonical link function is already specified by the distribution
# parameters, or provided by the user, ``simulate()`` requires
# only the independent variables ``X`` and the coefficients ``beta0``
# and ``beta``

##########################################################

n_samples, n_features = 10000, 100
# reading the dataset
df = read_dataset('community_crime.csv',0)

# separating the dependent variables from the independent variables
X, y = build_Xy(df,'att128')

n_features = X.shape[1]
n_samples = X.shape[0]


#Splitting the training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33,random_state=0)

# Defining the model
model = GLM(distr='multinomial', alpha=0.5,
               reg_lambda=np.array([0.02, 0.01]), learning_rate=1e-3 ,verbose=False,)


#initial values for the coefficients
beta0 = np.random.normal(0.0, 1.0, 1)
beta = sps.rand(n_features, 1, 0.1)
beta = np.array(beta.todense())


model.threshold = 1e-5

#scaler = StandardScaler().fit(X_train)
#model.fit(scaler.transform(X_train),y_train)

# Fitting the model
model.fit(X_train,y_train)
Ejemplo n.º 35
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(X_train, y_train)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='softplus')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Ejemplo n.º 36
0
def test_cdfast():
    """Test all functionality related to fast coordinate descent"""
    scaler = StandardScaler()
    n_samples = 1000
    n_features = 100
    n_classes = 5
    density = 0.1

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial', 'multinomial']
    for distr in distrs:
        glm = GLM(distr, solver='cdfast')

        np.random.seed(glm.random_state)
        if distr != 'multinomial':
            # coefficients
            beta0 = np.random.rand()
            beta = sps.rand(n_features, 1, density=density).toarray()
            # data
            X = np.random.normal(0.0, 1.0, [n_samples, n_features])
            X = scaler.fit_transform(X)
            y = glm.simulate(beta0, beta, X)

        elif distr == 'multinomial':
            # coefficients
            beta0 = 1 / (n_features + 1) * \
                np.random.normal(0.0, 1.0, n_classes)
            beta = 1 / (n_features + 1) * \
                np.random.normal(0.0, 1.0, [n_features, n_classes])
            # data
            X, y = make_classification(n_samples=n_samples,
                                       n_features=n_features,
                                       n_redundant=0,
                                       n_informative=n_features,
                                       random_state=1,
                                       n_classes=n_classes)
            y_bk = y.ravel()
            y = np.zeros([X.shape[0], y.max() + 1])
            y[np.arange(X.shape[0]), y_bk] = 1

        # compute grad and hess
        beta_ = np.zeros([n_features + 1, beta.shape[1]])
        beta_[0] = beta0
        beta_[1:] = beta
        z = beta_[0] + np.dot(X, beta_[1:])
        k = 1
        xk = np.expand_dims(X[:, k - 1], axis=1)
        gk, hk = glm._gradhess_logloss_1d(xk, y, z)

        # test grad and hess
        if distr != 'multinomial':
            assert_equal(np.size(gk), 1)
            assert_equal(np.size(hk), 1)
            assert_true(isinstance(gk, float))
            assert_true(isinstance(hk, float))
        else:
            assert_equal(gk.shape[0], n_classes)
            assert_equal(hk.shape[0], n_classes)
            assert_true(isinstance(gk, np.ndarray))
            assert_true(isinstance(hk, np.ndarray))
            assert_equal(gk.ndim, 1)
            assert_equal(hk.ndim, 1)

        # test cdfast
        ActiveSet = np.ones(n_features + 1)
        rl = glm.reg_lambda[0]
        beta_ret, z_ret = glm._cdfast(X, y, z, ActiveSet, beta_, rl)
        assert_equal(beta_ret.shape, beta_.shape)
        assert_equal(z_ret.shape, z.shape)
Ejemplo n.º 37
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 10000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    learning_rate = 2e-1
    for distr in distrs:

        glm = GLM(distr, learning_rate=learning_rate)

        assert_true(repr(glm))

        np.random.seed(glm.random_state)
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = glm.simulate(beta0, beta, X_train)

        X_train = scaler.fit_transform(X_train)
        glm.fit(X_train, y_train)

        beta_ = glm.fit_[-2]['beta'][:]
        assert_allclose(beta[:], beta_, atol=0.5)  # check fit
        density_ = np.sum(beta_ > 0.1) / float(n_features)
        assert_allclose(density_, density, atol=0.05)  # check density

        y_pred = glm.predict(scaler.transform(X_train))
        assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='poisson')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Ejemplo n.º 38
0
########################################################

########################################################
# Import ``GLM`` class from ``pyglmnet``

########################################################

# import GLM model
from pyglmnet import GLM

# create regularization parameters for model
reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))
glm_poisson = GLM(distr='softplus',
                  verbose=False,
                  alpha=0.05,
                  max_iter=1000,
                  learning_rate=2e-1,
                  reg_lambda=reg_lambda)

##########################################################
# Simulate a dataset
# ------------------
# The ``GLM`` class has a very useful method called ``simulate()``.
#
# Since a canonical link function is already specified by the distribution
# parameters, or provided by the user, ``simulate()`` requires
# only the independent variables ``X`` and the coefficients ``beta0``
# and ``beta``

##########################################################
Ejemplo n.º 39
0
                           n_classes=5,
                           n_informative=100,
                           n_features=100,
                           n_redundant=0)

########################################################

########################################################
# Fit the model

########################################################

########################################################
from pyglmnet import GLM
glm_mn = GLM(distr='multinomial',
             alpha=0.01,
             reg_lambda=np.array([0.02, 0.01]),
             verbose=False)
glm_mn.threshold = 1e-5
glm_mn.fit(X, y)

########################################################

########################################################
# Predict and score the output

########################################################

y_pred = glm_mn[-1].predict(X).argmax(axis=1)
print('Percentage correct = %f percent.' % (y_pred == y).mean())

########################################################
Ejemplo n.º 40
0
def test_api_input():
    """Test that the input value of y can be of different types."""

    random_state = 1
    state = np.random.RandomState(random_state)
    n_samples, n_features = 100, 5

    X = state.normal(0, 1, (n_samples, n_features))
    y = state.normal(0, 1, (n_samples, ))

    glm = GLM(distr='gaussian')

    # Test that ValueError is raised when the shapes mismatch
    with pytest.raises(ValueError):
        GLM().fit(X, y[3:])

    # This would work without errors
    glm.fit(X, y)
    glm.predict(X)
    glm.score(X, y)
    glm.plot_convergence()
    glm = GLM(distr='gaussian', solver='test')

    with pytest.raises(ValueError, match="solver must be one of"):
        glm.fit(X, y)

    with pytest.raises(ValueError, match="fit_intercept must be"):
        glm = GLM(distr='gaussian', fit_intercept='blah')

    glm = GLM(distr='gaussian', max_iter=2)
    with pytest.warns(UserWarning, match='Reached max number of iterat'):
        glm.fit(X, y)
Ejemplo n.º 41
0
# - `max_iter`: int (maximum iteration for the model)
#     default: 1000

########################################################

########################################################
# Import ``GLM`` class from ``pyglmnet``

########################################################

# import GLM model
from pyglmnet import GLM

# create regularize parameters for model
reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1))
glm_poisson = GLM(distr="poisson", verbose=False, alpha=0.05, max_iter=1000, learning_rate=2e-1, reg_lambda=reg_lambda)

##########################################################
# Simulate a dataset
# ------------------
# The ``GLM`` class has a very useful method called ``simulate()``.
#
# Since a canonical link function is already specified by the distribution
# parameters, or provided by the user, ``simulate()`` requires
# only the independent variables ``X`` and the coefficients ``beta0``
# and ``beta``

##########################################################

n_samples, n_features = 10000, 100
Ejemplo n.º 42
0
def test_cdfast():
    """Test all functionality related to fast coordinate descent"""
    scaler = StandardScaler()
    n_samples = 1000
    n_features = 100
    n_classes = 5
    density = 0.1

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial', 'multinomial']
    for distr in distrs:
        glm = GLM(distr, solver='cdfast')

        np.random.seed(glm.random_state)
        if distr != 'multinomial':
            # coefficients
            beta0 = np.random.rand()
            beta = sps.rand(n_features, 1, density=density).toarray()
            # data
            X = np.random.normal(0.0, 1.0, [n_samples, n_features])
            X = scaler.fit_transform(X)
            y = glm.simulate(beta0, beta, X)

        elif distr == 'multinomial':
            # coefficients
            beta0 = 1 / (n_features + 1) * \
                np.random.normal(0.0, 1.0, n_classes)
            beta = 1 / (n_features + 1) * \
                np.random.normal(0.0, 1.0, [n_features, n_classes])
            # data
            X, y = make_classification(n_samples=n_samples,
                                       n_features=n_features,
                                       n_redundant=0,
                                       n_informative=n_features,
                           random_state=1, n_classes=n_classes)
            y_bk = y.ravel()
            y = np.zeros([X.shape[0], y.max() + 1])
            y[np.arange(X.shape[0]), y_bk] = 1

        # compute grad and hess
        beta_ = np.zeros([n_features+1, beta.shape[1]])
        beta_[0] = beta0
        beta_[1:] = beta
        z = beta_[0] + np.dot(X, beta_[1:])
        k = 1
        xk = np.expand_dims(X[:, k - 1], axis=1)
        gk, hk = glm._gradhess_logloss_1d(xk, y, z)

        # test grad and hess
        if distr != 'multinomial':
            assert_equal(np.size(gk), 1)
            assert_equal(np.size(hk), 1)
            assert_true(isinstance(gk, float))
            assert_true(isinstance(hk, float))
        else:
            assert_equal(gk.shape[0], n_classes)
            assert_equal(hk.shape[0], n_classes)
            assert_true(isinstance(gk, np.ndarray))
            assert_true(isinstance(hk, np.ndarray))
            assert_equal(gk.ndim, 1)
            assert_equal(hk.ndim, 1)

        # test cdfast
        ActiveSet = np.ones(n_features + 1)
        rl = glm.reg_lambda[0]
        beta_ret, z_ret = glm._cdfast(X, y, z, ActiveSet, beta_, rl)
        assert_equal(beta_ret.shape, beta_.shape)
        assert_equal(z_ret.shape, z.shape)
Ejemplo n.º 43
0
def test_glmnet(distr, reg_lambda, fit_intercept, solver):
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 0.
    if fit_intercept:
        beta0 = 1. / (np.float(n_features) + 1.) * \
            np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + int(fit_intercept)) * \
        np.random.normal(0.0, 1.0, (n_features,))

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    betas_ = list()

    if not (distr == 'gamma' and solver == 'cdfast'):

        np.random.seed(random_state)

        theta = 1.0
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta,
                               sample=False)

        alpha = 0.
        loss_trace = list()
        eta = 2.0
        group = None
        Tau = None

        def callback(beta):
            Tau = None
            loss_trace.append(
                _loss(distr, alpha, Tau, reg_lambda,
                      X_train, y_train, eta, theta, group, beta,
                      fit_intercept=fit_intercept))

        glm = GLM(distr, learning_rate=learning_rate,
                  reg_lambda=reg_lambda, tol=1e-5, max_iter=5000,
                  alpha=alpha, solver=solver, score_metric=score_metric,
                  random_state=random_state, callback=callback,
                  fit_intercept=fit_intercept, theta=theta)
        assert(repr(glm))

        glm.fit(X_train, y_train)

        # verify loss decreases
        assert(np.all(np.diff(loss_trace) <= 1e-7))

        # true loss and beta should be recovered when reg_lambda == 0
        if reg_lambda == 0.:
            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, alpha, Tau, reg_lambda,
                           X_train, y_train, eta, theta, group,
                           np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
        betas_.append(glm.beta_)

        y_pred = glm.predict(X_train)
        assert(y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

        # test fit_predict
        glm_poisson = GLM(distr='softplus')
        glm_poisson.fit_predict(X_train, y_train)
        raises(ValueError, glm_poisson.fit_predict,
               X_train[None, ...], y_train)
Ejemplo n.º 44
0
np.shape(prior_cov)

########################################################
# Fit models

from sklearn.cross_validation import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, spike_counts, test_size=0.2, random_state=42)

########################################################

from pyglmnet import utils
n_samples = Xtrain.shape[0]
Tau = utils.tikhonov_from_prior(prior_cov, n_samples)

glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2')
glm.fit(Xtrain, Ytrain)
cvopt_lambda = glm.score(Xtest, Ytest).argmax()
print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain))
print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest))
weights = glm[cvopt_lambda].fit_['beta']

########################################################
# Visualize

for time_bin_ in range(n_temporal_basis):
    RF = strf_model.make_image_from_spatial_basis(spatial_basis,
                                             weights[range(time_bin_,
                                                           n_spatial_basis * n_temporal_basis,
                                                           n_temporal_basis)])
Ejemplo n.º 45
0
def test_compare_sklearn(solver):
    """Test results against sklearn."""
    def rmse(a, b):
        return np.sqrt(np.mean((a - b) ** 2))

    X, Y, coef_ = make_regression(
        n_samples=1000, n_features=500,
        noise=0.1, n_informative=10, coef=True,
        random_state=42)

    alpha = 0.1
    l1_ratio = 0.5

    clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, tol=1e-5)
    clf.fit(X, Y)
    glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha,
              solver=solver, tol=1e-6, max_iter=500)
    glm.fit(X, Y)

    y_sk = clf.predict(X)
    y_pg = glm.predict(X)
    assert abs(rmse(Y, y_sk) - rmse(Y, y_pg)) < 0.5

    glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha,
              solver=solver, tol=1e-6, max_iter=5, fit_intercept=False)
    glm.fit(X, Y)
    assert glm.beta0_ == 0.

    glm.predict(X)
Ejemplo n.º 46
0
# Download and preprocess data files

X, y = datasets.fetch_community_crime_data('/tmp/glm-tools')
n_samples, n_features = X.shape

########################################################
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a gaussian distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLM(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm[-1].predict(X_test)
print ("test set pseudo $R^2$ = %f" % glm[-1].score(X_test, y_test))

########################################################
# Plot the true and predicted test set target values

plt.plot(y_test[:50], 'ko-')
plt.plot(y_test_hat[:50], 'ro-')
plt.legend(['true', 'pred'], frameon=False)
plt.xlabel('Counties')
Ejemplo n.º 47
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])
    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm.grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert grad_beta0[0] != grad_beta0[1]
    glm.fit(X, y)
    y_pred = glm.predict(X)
    assert_equal(y_pred.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes
    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm.pseudo_R2(y, yhat, ynull) > 0.)
    glm.deviance(y, yhat)
    assert_equal(len(glm.simulate(glm.fit_[0]['beta0'],
                                  glm.fit_[0]['beta'],
                                  X)),
                 X.shape[0])
    # these should raise an exception
    try:
        glm.pseudo_R2(y, y, y)
        assert False
    except Exception:
        assert True
    try:
        glm.deviance(y, y)
        assert False
    except Exception:
        assert True
Ejemplo n.º 48
0
########################################################
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=10000, n_classes=5,
                           n_informative=100, n_features=100, n_redundant=0)

########################################################

########################################################
# Fit the model

########################################################

########################################################
from pyglmnet import GLM
glm_mn = GLM(distr='multinomial', alpha=0.01,
               reg_lambda=np.array([0.02, 0.01]), verbose=False)
glm_mn.threshold = 1e-5
glm_mn.fit(X, y)

########################################################

########################################################
# Predict and score the output

########################################################

y_pred = glm_mn[-1].predict(X)
print('Percentage correct = %f percent.' % (y_pred == y).mean())

########################################################
Ejemplo n.º 49
0
def test_tikhonov():
    """Tikhonov regularization test."""
    n_samples, n_features = 100, 10

    # design covariance matrix of parameters
    Gam = 15.
    PriorCov = np.zeros([n_features, n_features])
    for i in np.arange(0, n_features):
        for j in np.arange(i, n_features):
            PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) *
                                    (np.float(i) - np.float(j)) ** 2)
            PriorCov[j, i] = PriorCov[i, j]
            if i == j:
                PriorCov[i, j] += 0.01
    PriorCov = 1. / np.max(PriorCov) * PriorCov

    # sample parameters as multivariate normal
    beta0 = np.random.randn()
    beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov)

    # sample train and test data
    glm_sim = GLM(distr='softplus', score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    from sklearn.model_selection import train_test_split
    Xtrain, Xtest, ytrain, ytest = \
        train_test_split(X, y, test_size=0.5, random_state=42)

    # design tikhonov matrix
    [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False)
    Tau = np.dot(np.diag(1. / np.sqrt(S)), U)
    Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max()

    # fit model with batch gradient
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='batch-gradient',
                       tol=1e-3,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)

    # fit model with cdfast
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='cdfast',
                       tol=1e-3,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
Ejemplo n.º 50
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except Exception:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res
Ejemplo n.º 51
0
    def fit(self, X, Y, get_history_terms=True):
        """
        Fits the model to the data in X to predict the response Y.

        Imports models and creates model instance as well.

        Parameters
        ----------
        X: float, n_samples x n_features, features of interest
        Y: float, n_samples x 1, population activity
        get_history_terms = Boolean. Whether to compute the temporal features.
                    Note that if spike_history and cov_history are False,
                    no history will be computed anyways and the flag does nothing.


        """
        if self.default_params:
            warnings.warn(
                '\n  Using default hyperparameters. Consider optimizing on' +
                ' a held-out dataset using, e.g. hyperopt or random search')

        # make the covariate matrix. Include spike or covariate history?
        # The different methods here are to satisfy the needs of recurrent keras
        # models
        if get_history_terms:
            if self.tunemodel == 'lstm':
                X, Y = self.get_all_with_history_keras(X, Y)
            else:
                X, Y = self.get_all_with_history(X, Y)

        if self.tunemodel == 'glm':
            model = GLM(**self.params)
            model.fit(X, Y)

            # we want the last of the regularization path
            self.model = model[-1]

        elif self.tunemodel == 'feedforward_nn':

            if np.ndim(X) == 1:
                X = np.transpose(np.atleast_2d(X))

            params = self.params
            model = Sequential()
            model.add(
                Dense(params['n1'],
                      input_dim=np.shape(X)[1],
                      kernel_initializer='glorot_normal',
                      activation='relu',
                      kernel_regularizer=l2(params['l2'])))
            model.add(Dropout(params['dropout']))
            model.add(BatchNormalization())
            model.add(
                Dense(params['n2'],
                      kernel_initializer='glorot_normal',
                      activation='relu',
                      kernel_regularizer=l2(params['l2'])))
            model.add(BatchNormalization())
            model.add(Dense(1, activation='softplus'))
            optim = adam(lr=params['lr'],
                         clipnorm=params['clipnorm'],
                         decay=params['decay'],
                         beta_1=1 - params['b1'],
                         beta_2=1 - params['b2'])
            model.compile(
                loss='poisson',
                optimizer=optim,
            )
            hist = model.fit(X,
                             Y,
                             batch_size=128,
                             epochs=30,
                             verbose=self.verbose)

            self.model = model

        elif self.tunemodel == 'xgboost':

            dtrain = xgb.DMatrix(X, label=Y)
            num_round = 200
            self.model = xgb.train(self.params, dtrain, num_round)

        elif self.tunemodel == 'random_forest':

            self.model = RandomForestRegressor(**self.params)
            self.model.fit(X, Y)

        elif self.tunemodel == 'lstm':

            if np.ndim(X) == 1:
                X = np.transpose(np.atleast_2d(X))

            params = self.params
            model = Sequential()  #Declare model
            #Add recurrent layer
            model.add(LSTM(int(params['n_units']),input_shape=(X.shape[1],X.shape[2]),\
                           dropout_W=params['dropout'],dropout_U=params['dropout']))
            #Within recurrent layer, include dropout
            model.add(Dropout(params['dropout'])
                      )  #Dropout some units (recurrent layer output units)

            #Add dense connections to output layer
            model.add(Dense(1, activation='softplus'))

            #Fit model (and set fitting parameters)
            model.compile(loss='poisson',
                          optimizer='rmsprop',
                          metrics=['accuracy'])
            model.fit(X,
                      Y,
                      epochs=int(params['epochs']),
                      batch_size=int(params['batch_size']),
                      verbose=self.verbose)  #Fit the model

            self.model = model

        else:  #using predefined model
            self.model.fit(X, Y)
Ejemplo n.º 52
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res