Ejemplo n.º 1
0
def test_compare_sklearn(solver):
    """Test results against sklearn."""
    def rmse(a, b):
        return np.sqrt(np.mean((a - b) ** 2))

    X, Y, coef_ = make_regression(
        n_samples=1000, n_features=500,
        noise=0.1, n_informative=10, coef=True,
        random_state=42)

    alpha = 0.1
    l1_ratio = 0.5

    clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, tol=1e-5)
    clf.fit(X, Y)
    glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha,
              solver=solver, tol=1e-6, max_iter=500)
    glm.fit(X, Y)

    y_sk = clf.predict(X)
    y_pg = glm.predict(X)
    assert abs(rmse(Y, y_sk) - rmse(Y, y_pg)) < 0.5

    glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha,
              solver=solver, tol=1e-6, max_iter=5, fit_intercept=False)
    glm.fit(X, Y)
    assert glm.beta0_ == 0.

    glm.predict(X)
Ejemplo n.º 2
0
def test_api_input():
    """Test that the input value of y can be of different types."""

    random_state = 1
    state = np.random.RandomState(random_state)
    n_samples, n_features = 100, 5

    X = state.normal(0, 1, (n_samples, n_features))
    y = state.normal(0, 1, (n_samples, ))

    glm = GLM(distr='gaussian')

    # Test that ValueError is raised when the shapes mismatch
    with pytest.raises(ValueError):
        GLM().fit(X, y[3:])

    # This would work without errors
    glm.fit(X, y)
    glm.predict(X)
    glm.score(X, y)
    glm.plot_convergence()
    glm = GLM(distr='gaussian', solver='test')

    with pytest.raises(ValueError, match="solver must be one of"):
        glm.fit(X, y)

    with pytest.raises(ValueError, match="fit_intercept must be"):
        glm = GLM(distr='gaussian', fit_intercept='blah')

    glm = GLM(distr='gaussian', max_iter=2)
    with pytest.warns(UserWarning, match='Reached max number of iterat'):
        glm.fit(X, y)
Ejemplo n.º 3
0
def glm_bernoulli_pyglmnet(Xr, Yr, Xt):
    #poissonexp isn't listed as an option for distr?
    #glm = GLM(distr='poissonexp', alpha=0., reg_lambda=[0.], tol=1e-6)
    glm = GLM(distr='binomial', alpha=0., reg_lambda=[0.], tol=1e-6)
    glm.fit(Xr, Yr)
    Yt = glm.predict(Xt)[0]
    return Yt
Ejemplo n.º 4
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate = 2e-1, tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'],
                                  glm_mn.fit_[0]['beta'],
                                  X)),
                 X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
Ejemplo n.º 5
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])
    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm.grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert grad_beta0[0] != grad_beta0[1]
    glm.fit(X, y)
    y_pred = glm.predict(X)
    assert_equal(y_pred.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes
    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm.pseudo_R2(y, yhat, ynull) > 0.)
    glm.deviance(y, yhat)
    assert_equal(len(glm.simulate(glm.fit_[0]['beta0'],
                                  glm.fit_[0]['beta'],
                                  X)),
                 X.shape[0])
    # these should raise an exception
    try:
        glm.pseudo_R2(y, y, y)
        assert False
    except Exception:
        assert True
    try:
        glm.deviance(y, y)
        assert False
    except Exception:
        assert True
Ejemplo n.º 6
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
Ejemplo n.º 7
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(X_train, y_train)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='softplus')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Ejemplo n.º 8
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate, solver=solver)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='poisson')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Ejemplo n.º 9
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 10000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    for distr in distrs:

        # FIXME: why do we need such this learning rate for 'poissonexp'?
        learning_rate = 1e-5 if distr == 'poissonexp' else 1e-4
        glm = GLM(distr, learning_rate=learning_rate)

        assert_true(repr(glm))

        np.random.seed(glm.random_state)
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = glm.simulate(beta0, beta, X_train)

        X_train = scaler.fit_transform(X_train)
        glm.fit(X_train, y_train)

        beta_ = glm.fit_[-2]['beta'][:]
        assert_allclose(beta[:], beta_, atol=0.1)  # check fit
        density_ = np.sum(beta_ > 0.1) / float(n_features)
        assert_allclose(density_, density, atol=0.05)  # check density

        y_pred = glm.predict(scaler.transform(X_train))
        assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.deviance(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm = GLM(distr='poisson')
    assert_raises(ValueError, glm.__getitem__, 2)

    # test fit_predict
    glm.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm.fit_predict, X_train[None, ...], y_train)
Ejemplo n.º 10
0
def test_api_input_types_y():
    """Test that the input value of y can be of different types."""

    random_state = 1
    state = np.random.RandomState(random_state)
    n_samples, n_features = 100, 5

    X = state.normal(0, 1, (n_samples, n_features))
    y = state.normal(0, 1, (n_samples, ))

    glm = GLM(distr='gaussian')

    # Test that a list will not work - the types have to be ndarray
    with pytest.raises(ValueError):
        glm.fit(X, list(y))

    # Test that ValueError is raised when the shapes mismatch
    with pytest.raises(ValueError):
        GLM().fit(X, y[3:])

    # This would work without errors
    glm.fit(X, y)
    glm.predict(X)
    glm.score(X, y)
Ejemplo n.º 11
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      solver=solver,
                      score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Ejemplo n.º 12
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict,
                  X_train[None, ...], y_train)
Ejemplo n.º 13
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]

    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]

    # pseudo_R2 should be greater than 0
    assert_true(glm_mn[-1].score(X, y) > 0.)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])

    # check that score is computed for sliced estimator
    scorelist = glm_mn[-1].score(X, y)
    assert_equal(scorelist.shape[0], 1)

    # check that score is computed for all lambdas
    scorelist = glm_mn.score(X, y)
    assert_equal(scorelist.shape[0], y_pred.shape[0])
Ejemplo n.º 14
0
train_y = simulate_glm("neg-binomial", beta0, beta, train_x)

# plot the data distribution
sns.set(color_codes=True)
sns.distplot(train_y)
plt.show()

# Create the GLM and train it
glm = GLM(distr="neg-binomial", max_iter=10000)
glm.fit(train_x, train_y)

# Print the betas and the beta0 to check for correctness
print("")
print(glm.beta0_)
print(glm.beta_)
print("")
print(beta0)
print(beta)

# Generate test data
# simulate testing data
X_test = np.random.normal(0.0, 1.0, [1000, 10])
y_test = simulate_glm("poisson", beta0, beta, X_test)

# predict using fitted model on the test data
yhat_test = glm.predict(X_test)

# score the model
deviance = glm.score(X_test, y_test)
print(deviance)
Ejemplo n.º 15
0
print(position_array.shape)
pl.figure()
for n in range(n_frames):
    pl.scatter(all_position[n, 0:4], all_position[n, 4:8], s=2, c='k')

pl.show()

# GLM
glm = GLM(distr='gaussian', alpha=0.05)

X = np.delete(all_position, 0, axis=1)
y = all_position[:, 0]
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=42)

scaler = StandardScaler().fit(X_train)
glm.fit(scaler.transform(X_train), y_train)

yhat = glm.predict(scaler.transform(X))
# print(glm.score(X_test, Y_test))
#
# plot
pl.figure()
pl.plot(y, marker='x', color='b', label='observed')
pl.plot(yhat[9, :], marker='o', color='r', label='trained')

pl.show()
Ejemplo n.º 16
0
def test_glmnet(distr, reg_lambda, fit_intercept, solver):
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 0.
    if fit_intercept:
        beta0 = 1. / (np.float(n_features) + 1.) * \
            np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + int(fit_intercept)) * \
        np.random.normal(0.0, 1.0, (n_features,))

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    betas_ = list()

    if not (distr == 'gamma' and solver == 'cdfast'):

        np.random.seed(random_state)

        theta = 1.0
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta,
                               sample=False)

        alpha = 0.
        loss_trace = list()
        eta = 2.0
        group = None
        Tau = None

        def callback(beta):
            Tau = None
            loss_trace.append(
                _loss(distr, alpha, Tau, reg_lambda,
                      X_train, y_train, eta, theta, group, beta,
                      fit_intercept=fit_intercept))

        glm = GLM(distr, learning_rate=learning_rate,
                  reg_lambda=reg_lambda, tol=1e-5, max_iter=5000,
                  alpha=alpha, solver=solver, score_metric=score_metric,
                  random_state=random_state, callback=callback,
                  fit_intercept=fit_intercept, theta=theta)
        assert(repr(glm))

        glm.fit(X_train, y_train)

        # verify loss decreases
        assert(np.all(np.diff(loss_trace) <= 1e-7))

        # true loss and beta should be recovered when reg_lambda == 0
        if reg_lambda == 0.:
            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, alpha, Tau, reg_lambda,
                           X_train, y_train, eta, theta, group,
                           np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
        betas_.append(glm.beta_)

        y_pred = glm.predict(X_train)
        assert(y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

        # test fit_predict
        glm_poisson = GLM(distr='softplus')
        glm_poisson.fit_predict(X_train, y_train)
        raises(ValueError, glm_poisson.fit_predict,
               X_train[None, ...], y_train)
Ejemplo n.º 17
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res
Ejemplo n.º 18
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except Exception:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res
Ejemplo n.º 19
0
def test_glmnet():
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    for distr in distrs:
        betas_ = list()
        for solver in solvers:

            np.random.seed(random_state)

            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(distr, beta0, beta, X_train, sample=False)

            alpha = 0.
            reg_lambda = 0.
            loss_trace = list()

            def callback(beta):
                Tau = None
                eta = 2.0
                group = None

                loss_trace.append(
                    _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta,
                          group, beta))

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      reg_lambda=reg_lambda,
                      tol=1e-3,
                      max_iter=5000,
                      alpha=alpha,
                      solver=solver,
                      score_metric=score_metric,
                      random_state=random_state,
                      callback=callback)
            assert (repr(glm))

            glm.fit(X_train, y_train)

            # verify loss decreases
            assert (np.all(np.diff(loss_trace) <= 1e-7))

            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, 0., np.eye(beta.shape[0]), 0., X_train,
                           y_train, 2.0, None, np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
            betas_.append(glm.beta_)

            y_pred = glm.predict(X_train)
            assert (y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
########################################################
# **Fitting and predicting with a linear-Gaussian GLM**
#
# For a general linear model, the observed spikes can be
# thought of an underlying parameter
# :math:`\beta_0, \beta` that control the spiking.
#
# You can simply use linear Gaussian GLM with no regularization
# to predict the spike counts.

glm_lg = GLM(distr='gaussian', reg_lambda=0.0, score_metric='pseudo_R2')
glm_lg.fit(Xdsgn, y)

# predict spike counts
ypred_lg = glm_lg.predict(Xdsgn)

########################################################
# **Fitting and predicting with a Poisson GLM**
#
# We can also assume that there is a non-linear function governing
# the underlying the firing patterns.
# In pyglmnet, we use an exponential inverse link function
# for the Poisson distribution.

glm_poisson = GLM(distr='poisson',
                  alpha=0.05,
                  learning_rate=1.0,
                  score_metric='pseudo_R2',
                  reg_lambda=1e-7)
glm_poisson.fit(Xdsgn, y)