Exemple #1
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate = 2e-1, tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'],
                                  glm_mn.fit_[0]['beta'],
                                  X)),
                 X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_api_input():
    """Test that the input value of y can be of different types."""

    random_state = 1
    state = np.random.RandomState(random_state)
    n_samples, n_features = 100, 5

    X = state.normal(0, 1, (n_samples, n_features))
    y = state.normal(0, 1, (n_samples, ))

    glm = GLM(distr='gaussian')

    # Test that ValueError is raised when the shapes mismatch
    with pytest.raises(ValueError):
        GLM().fit(X, y[3:])

    # This would work without errors
    glm.fit(X, y)
    glm.predict(X)
    glm.score(X, y)
    glm.plot_convergence()
    glm = GLM(distr='gaussian', solver='test')

    with pytest.raises(ValueError, match="solver must be one of"):
        glm.fit(X, y)

    with pytest.raises(ValueError, match="fit_intercept must be"):
        glm = GLM(distr='gaussian', fit_intercept='blah')

    glm = GLM(distr='gaussian', max_iter=2)
    with pytest.warns(UserWarning, match='Reached max number of iterat'):
        glm.fit(X, y)
def test_tikhonov():
    """Tikhonov regularization test."""
    n_samples, n_features = 100, 10

    # design covariance matrix of parameters
    Gam = 15.
    PriorCov = np.zeros([n_features, n_features])
    for i in np.arange(0, n_features):
        for j in np.arange(i, n_features):
            PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) *
                                    (np.float(i) - np.float(j)) ** 2)
            PriorCov[j, i] = PriorCov[i, j]
            if i == j:
                PriorCov[i, j] += 0.01
    PriorCov = 1. / np.max(PriorCov) * PriorCov

    # sample parameters as multivariate normal
    beta0 = np.random.randn()
    beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov)

    # sample train and test data
    glm_sim = GLM(distr='softplus', score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    from sklearn.cross_validation import train_test_split
    Xtrain, Xtest, ytrain, ytest = \
        train_test_split(X, y, test_size=0.5, random_state=42)

    # design tikhonov matrix
    [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False)
    Tau = np.dot(np.diag(1. / np.sqrt(S)), U)
    Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max()

    # fit model with batch gradient
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='batch-gradient',
                       tol=1e-5,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)

    # fit model with cdfast
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='cdfast',
                       tol=1e-5,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
def test_tikhonov():
    """Tikhonov regularization test."""
    n_samples, n_features = 100, 10

    # design covariance matrix of parameters
    Gam = 15.
    PriorCov = np.zeros([n_features, n_features])
    for i in np.arange(0, n_features):
        for j in np.arange(i, n_features):
            PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) *
                                    (np.float(i) - np.float(j)) ** 2)
            PriorCov[j, i] = PriorCov[i, j]
            if i == j:
                PriorCov[i, j] += 0.01
    PriorCov = 1. / np.max(PriorCov) * PriorCov

    # sample parameters as multivariate normal
    beta0 = np.random.randn()
    beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov)

    # sample train and test data
    glm_sim = GLM(distr='softplus', score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    from sklearn.model_selection import train_test_split
    Xtrain, Xtest, ytrain, ytest = \
        train_test_split(X, y, test_size=0.5, random_state=42)

    # design tikhonov matrix
    [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False)
    Tau = np.dot(np.diag(1. / np.sqrt(S)), U)
    Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max()

    # fit model with batch gradient
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='batch-gradient',
                       tol=1e-3,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)

    # fit model with cdfast
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='cdfast',
                       tol=1e-3,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
Exemple #6
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(X_train, y_train)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='softplus')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Exemple #7
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate, solver=solver)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='poisson')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 10000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    learning_rate = 2e-1
    for distr in distrs:

        glm = GLM(distr, learning_rate=learning_rate)

        assert_true(repr(glm))

        np.random.seed(glm.random_state)
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = glm.simulate(beta0, beta, X_train)

        X_train = scaler.fit_transform(X_train)
        glm.fit(X_train, y_train)

        beta_ = glm.fit_[-2]['beta'][:]
        assert_allclose(beta[:], beta_, atol=0.5)  # check fit
        density_ = np.sum(beta_ > 0.1) / float(n_features)
        assert_allclose(density_, density, atol=0.05)  # check density

        y_pred = glm.predict(scaler.transform(X_train))
        assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='poisson')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Exemple #9
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate = 2e-1, tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred_proba = glm_mn.predict_proba(X)
    assert_equal(y_pred_proba.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred_proba[0]

    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]

    # pseudo_R2 should be greater than 0
    assert_true(glm_mn[-1].score(X, y) > 0.)
    assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'],
                                  glm_mn.fit_[0]['beta'],
                                  X)),
                 X.shape[0])

    # check that score is computed for sliced estimator
    scorelist = glm_mn[-1].score(X, y)
    assert_equal(scorelist.shape[0], 1)

    # check that score is computed for all lambdas
    scorelist = glm_mn.score(X, y)
    assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
def test_api_input_types_y():
    """Test that the input value of y can be of different types."""

    random_state = 1
    state = np.random.RandomState(random_state)
    n_samples, n_features = 100, 5

    X = state.normal(0, 1, (n_samples, n_features))
    y = state.normal(0, 1, (n_samples, ))

    glm = GLM(distr='gaussian')

    # Test that a list will not work - the types have to be ndarray
    with pytest.raises(ValueError):
        glm.fit(X, list(y))

    # Test that ValueError is raised when the shapes mismatch
    with pytest.raises(ValueError):
        GLM().fit(X, y[3:])

    # This would work without errors
    glm.fit(X, y)
    glm.predict(X)
    glm.score(X, y)
Exemple #11
0
def test_accuracy():
    """Testing accuracy."""
    n_samples, n_features, n_classes = 1000, 100, 2

    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, (n_features, n_classes))

    # sample train and test data
    glm_sim = GLM(distr='binomial', score_metric='accuracy')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)
    y = np.argmax(y, axis=1)
    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert_true(isinstance(score, float))
Exemple #12
0
def test_deviance():
    """Test deviance."""
    n_samples, n_features = 1000, 100

    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)

    # sample train and test data
    glm_sim = GLM(score_metric='deviance')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert_true(isinstance(score, float))
Exemple #13
0
def test_pseudoR2():
    """Test pseudo r2."""
    n_samples, n_features = 1000, 100

    beta0 = np.random.rand()
    beta = np.random.normal(0.0, 1.0, n_features)

    # sample train and test data
    glm_sim = GLM(score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert (isinstance(score, float))
Exemple #14
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred_proba = glm_mn.predict_proba(X)
    assert_equal(y_pred_proba.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred_proba[0]

    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]

    # pseudo_R2 should be greater than 0
    assert_true(glm_mn[-1].score(X, y) > 0.)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])

    # check that score is computed for sliced estimator
    scorelist = glm_mn[-1].score(X, y)
    assert_equal(scorelist.shape[0], 1)

    # check that score is computed for all lambdas
    scorelist = glm_mn.score(X, y)
    assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
Exemple #15
0
########################################################
# Fit models

from sklearn.cross_validation import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, spike_counts, test_size=0.2, random_state=42)

########################################################

from pyglmnet import utils
n_samples = Xtrain.shape[0]
Tau = utils.tikhonov_from_prior(prior_cov, n_samples)

glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2')
glm.fit(Xtrain, Ytrain)
cvopt_lambda = glm.score(Xtest, Ytest).argmax()
print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain))
print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest))
weights = glm[cvopt_lambda].fit_['beta']

########################################################
# Visualize

for time_bin_ in range(n_temporal_basis):
    RF = strf_model.make_image_from_spatial_basis(spatial_basis,
                                             weights[range(time_bin_,
                                                           n_spatial_basis * n_temporal_basis,
                                                           n_temporal_basis)])

    plt.subplot(1, n_temporal_basis, time_bin_+1)
    plt.imshow(RF, cmap='Blues', interpolation='none')
Exemple #16
0
train_y = simulate_glm("neg-binomial", beta0, beta, train_x)

# plot the data distribution
sns.set(color_codes=True)
sns.distplot(train_y)
plt.show()

# Create the GLM and train it
glm = GLM(distr="neg-binomial", max_iter=10000)
glm.fit(train_x, train_y)

# Print the betas and the beta0 to check for correctness
print("")
print(glm.beta0_)
print(glm.beta_)
print("")
print(beta0)
print(beta)

# Generate test data
# simulate testing data
X_test = np.random.normal(0.0, 1.0, [1000, 10])
y_test = simulate_glm("poisson", beta0, beta, X_test)

# predict using fitted model on the test data
yhat_test = glm.predict(X_test)

# score the model
deviance = glm.score(X_test, y_test)
print(deviance)
Exemple #17
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res
Exemple #18
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except Exception:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res
          reg_lambda=np.logspace(np.log(100), np.log(0.01), 5, base=np.exp(1)))

print("gl_glm: ", gl_glm)
print("glm: ", glm)

##########################################################
# Fit models

gl_glm.fit(Xtrain, ytrain)
glm.fit(Xtrain, ytrain)

##########################################################
# Visualize model scores on test set

plt.figure()
plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtest, ytest), 'go-')
plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtrain, ytrain), 'go--')
plt.semilogx(glm.reg_lambda, glm.score(Xtest, ytest), 'ro-')
plt.semilogx(glm.reg_lambda, glm.score(Xtrain, ytrain), 'ro--')
plt.legend(
    ['Group Lasso: test', 'Group Lasso: train', 'Lasso: test', 'Lasso: train'],
    frameon=False,
    loc='best')
plt.xlabel('$\lambda$')
plt.ylabel('pseudo-$R^2$')
plt.ylim([-0.1, 0.7])

plt.tick_params(axis='y', right='off')
plt.tick_params(axis='x', top='off')
ax = plt.gca()
ax.spines['top'].set_visible(False)
Exemple #20
0
from sklearn.cross_validation import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features,
                                                spike_counts,
                                                test_size=0.2,
                                                random_state=42)

########################################################

from pyglmnet import utils
n_samples = Xtrain.shape[0]
Tau = utils.tikhonov_from_prior(prior_cov, n_samples)

glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2')
glm.fit(Xtrain, Ytrain)
cvopt_lambda = glm.score(Xtest, Ytest).argmax()
print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain))
print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest))
weights = glm[cvopt_lambda].fit_['beta']

########################################################
# Visualize

for time_bin_ in range(n_temporal_basis):
    RF = strf_model.make_image_from_spatial_basis(
        spatial_basis,
        weights[range(time_bin_, n_spatial_basis * n_temporal_basis,
                      n_temporal_basis)])

    plt.subplot(1, n_temporal_basis, time_bin_ + 1)
    plt.imshow(RF, cmap='Blues', interpolation='none')
markerline.set_markerfacecolor('none')
plt.plot(t_sample,
         ypred_lg[sample_idx],
         color='gold',
         linewidth=2,
         label='lgGLM with offset')
plt.plot(t_sample,
         ypred_poisson[sample_idx],
         color='green',
         linewidth=2,
         label='poissonGLM')
plt.plot(t_sample,
         ypred_poisson_hist[sample_idx],
         color='red',
         linewidth=2,
         label='poissonGLM_hist')
plt.xlim([0., tmax])
plt.title('Spike count prediction')
plt.xlabel('Time (sec)')
plt.ylabel('Binned Spike Counts')
plt.legend()
plt.show()

# print scores of all the fitted models
print('Training perf (R^2): lin-gauss GLM, w/ offset: {:.2f}'.format(
    glm_lg.score(Xdsgn, y)))
print('Training perf (R^2): Pyglmnet possion GLM {:.2f}'.format(
    glm_poisson.score(Xdsgn, y)))
print('Training perf (R^2): Pyglmnet poisson GLM w/ spikes history {:.2f}'.
      format(glm_poisson_hist.score(Xdsgn_hist, y)))