def comp_varexp(Y,S,nneuron):
    """
    Compute variance explained from the GLM results, with parameters set above
    """
    y = np.squeeze(Y[nneuron,:])  #spike train of interest
    stimulus = S[:,None]  #same stimulus for all neurons
    X = build_convolved_matrix(stimulus, Y.T, Ks, couple)  #design matrix with features projected onto basis functions
    glm = GLMCV(distr="binomial", tol=1e-5, eta=1.0,
                score_metric="deviance",
                alpha=0., learning_rate=1e-6, max_iter=1000, cv=3, verbose=True)  #important to have v slow learning_rate
    glm.fit(X, y)
    yhat = simulate_glm('binomial', glm.beta0_, glm.beta_, X)  #simulate spike rate given the firring results
    varexp = np.corrcoef(y,yhat)
    return varexp
Ejemplo n.º 2
0
def test_glmcv():
    """Test GLMCV class."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit', 'gamma']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            if distr == 'gamma' and solver == 'cdfast':
                continue

            glm = GLMCV(distr,
                        learning_rate=learning_rate,
                        solver=solver,
                        score_metric=score_metric,
                        cv=2)

            assert (repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert (y_pred.shape[0] == X_train.shape[0])
Ejemplo n.º 3
0
def kernel_MSE(Y, S, nneuron, Ks):
    """
    Compute MSE from the GLM results, with simulated activity, stimulus, and the ground truth kernel
    """
    ###GLM
    y = np.squeeze(Y[nneuron, :])  #spike train of interest
    stimulus = S[:, None]  #same stimulus for all neurons
    X = build_convolved_matrix(
        stimulus, Y.T, Ks,
        couple)  #design matrix with features projected onto basis functions
    glm = GLMCV(distr="binomial",
                tol=1e-5,
                eta=1.0,
                score_metric="deviance",
                alpha=0.,
                learning_rate=0.1,
                max_iter=1000,
                cv=3,
                verbose=True)  #important to have v slow learning_rate
    glm.fit(X, y)

    ###store kernel
    theta_rec = glm.beta_[1:]
    theta_rec = theta_rec.reshape(nbasis, N + 1)
    K_rec = np.zeros((N + 1, pad))
    for ii in range(N + 1):
        K_rec[ii, :] = np.dot(theta_rec[:, ii], Ks)

    ###normalize and shift kernels
    K_rec_norm = np.array([
        K_rec[ii, :] / np.linalg.norm(K_rec[ii, :]) for ii in range(N + 1)
    ])  #
    K_tru_norm = np.array([
        allK[nneuron, ii, :] / np.linalg.norm(allK[nneuron, ii, :])
        for ii in range(N + 1)
    ])  #
    #    K_rec_norm = K_rec_norm.T-K_rec_norm[:,-1]
    #    K_tru_norm = K_tru_norm.T-K_tru_norm[:,-1]
    mses = np.sum((K_rec_norm - K_tru_norm)**2,
                  axis=1)  #measuring MSE for each kernel
    return mses
Ejemplo n.º 4
0
def test_random_state_consistency():
    """Test model's random_state."""
    # Generate the dataset
    n_samples, n_features = 1000, 10

    beta0 = 1. / (np.float(n_features) + 1.) * np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))
    Xtrain = np.random.normal(0.0, 1.0, [n_samples, n_features])

    ytrain = simulate_glm("gaussian", beta0, beta, Xtrain,
                          sample=False, random_state=42)

    # Test simple glm
    glm_a = GLM(distr="gaussian", random_state=1)
    ypred_a = glm_a.fit_predict(Xtrain, ytrain)
    glm_b = GLM(distr="gaussian", random_state=1)
    ypred_b = glm_b.fit_predict(Xtrain, ytrain)

    # Consistency between two different models
    assert_array_equal(ypred_a, ypred_b)

    # Test also cross-validation
    glm_cv_a = GLMCV(distr="gaussian", cv=3, random_state=1)
    ypred_a = glm_cv_a.fit_predict(Xtrain, ytrain)
    glm_cv_b = GLMCV(distr="gaussian", cv=3, random_state=1)
    ypred_b = glm_cv_b.fit_predict(Xtrain, ytrain)
    ypred_c = glm_cv_b.fit_predict(Xtrain, ytrain)

    assert_array_equal(ypred_a, ypred_b)
    assert_array_equal(ypred_b, ypred_c)
Ejemplo n.º 5
0
def test_glmcv():
    """Test GLMCV class."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit', 'gamma']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            if distr == 'gamma' and solver == 'cdfast':
                continue

            glm = GLMCV(distr, learning_rate=learning_rate,
                        solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])
Ejemplo n.º 6
0
def test_glmcv(distr, solver):
    """Test GLMCV class."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    if not (distr == 'gamma' and solver == 'cdfast'):

        glm = GLMCV(distr, learning_rate=learning_rate,
                    solver=solver, score_metric=score_metric, cv=2)

        assert(repr(glm))

        np.random.seed(glm.random_state)
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = simulate_glm(glm.distr, beta0, beta, X_train)

        X_train = scaler.fit_transform(X_train)
        glm.fit(X_train, y_train)

        beta_ = glm.beta_
        assert_allclose(beta, beta_, atol=0.5)  # check fit

        y_pred = glm.predict(scaler.transform(X_train))
        assert(y_pred.shape[0] == X_train.shape[0])

        # test picky score_metric check within fit().
        glm.score_metric = 'bad_score_metric'  # reuse last glm
        raises(ValueError, glm.fit, X_train, y_train)
Ejemplo n.º 7
0
X, y = datasets.fetch_community_crime_data()
n_samples, n_features = X.shape

########################################################
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a binomial distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLMCV(distr='binomial',
            alpha=0.05,
            score_metric='pseudo_R2',
            cv=3,
            tol=1e-4)

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm.predict_proba(X_test)
print("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test))

########################################################
# Now use GridSearchCV to compare

import numpy as np  # noqa
from sklearn.model_selection import GridSearchCV  # noqa

# %%
###############################################################################
# %% inference method (single)
nneuron = 2
pad = 100  #window for kernel
nbasis = 7  #number of basis
couple = 1  #wether or not coupling cells considered
Y = np.squeeze(rate[nneuron,:])  #spike train of interest
Ks = (np.fliplr(basis_function1(pad,nbasis).T).T).T  #basis function used for kernel approximation
stimulus = stim[:,None]  #same stimulus for all neurons
X = build_convolved_matrix(stimulus, rate.T, Ks, couple)  #design matrix with features projected onto basis functions
###pyGLMnet function with optimal parameters
glm = GLMCV(distr="binomial", tol=1e-5, eta=1.0,
            score_metric="deviance",
            alpha=0., learning_rate=1e-6, max_iter=1000, cv=3, verbose=True)  #important to have v slow learning_rate
glm.fit(X, Y)

# %% direct simulation
yhat = simulate_glm('binomial', glm.beta0_, glm.beta_, X)  #simulate spike rate given the firring results
plt.figure()
plt.plot(Y*1.)  #ground truth
plt.plot(yhat,'--')

# %%reconstruct kernel
theta = glm.beta_
dc_ = theta[0]
theta_ = theta[1:]
if couple == 1:
    theta_ = theta_.reshape(nbasis,N+1)  #nbasis times (stimulus + N neurons)
Ejemplo n.º 9
0
# Set up the training and testing sets

from sklearn.model_selection import train_test_split # noqa

X = df[df.columns.difference(["Label"])].values
y = df.loc[:, "Label"].values

Xtrain, Xtest, ytrain, ytest = \
    train_test_split(X, y, test_size=0.2, random_state=42)

##########################################################
# Setup the models

# set up the group lasso GLM model
gl_glm = GLMCV(distr="binomial", tol=1e-3,
               group=group_idxs, score_metric="pseudo_R2",
               alpha=1.0, cv=3)


# set up the lasso model
glm = GLMCV(distr="binomial", tol=1e-3,
            score_metric="pseudo_R2", alpha=1.0, cv=3)

print("gl_glm: ", gl_glm)
print("glm: ", glm)

##########################################################
# Fit models

gl_glm.fit(Xtrain, ytrain)
glm.fit(Xtrain, ytrain)
Ejemplo n.º 10
0
########################################################
# Fit models

from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features,
                                                spike_counts,
                                                test_size=0.2,
                                                random_state=42)

########################################################

from pyglmnet import utils
n_samples = Xtrain.shape[0]
Tau = utils.tikhonov_from_prior(prior_cov, n_samples)

glm = GLMCV(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2', cv=3)
glm.fit(Xtrain, Ytrain)
print("train score: %f" % glm.score(Xtrain, Ytrain))
print("test score: %f" % glm.score(Xtest, Ytest))
weights = glm.beta_

########################################################
# Visualize

for time_bin_ in range(n_temporal_basis):
    RF = strf_model.make_image_from_spatial_basis(
        spatial_basis,
        weights[range(time_bin_, n_spatial_basis * n_temporal_basis,
                      n_temporal_basis)])

    plt.subplot(1, n_temporal_basis, time_bin_ + 1)
Ejemplo n.º 11
0
# Download and preprocess data files

X, y = datasets.fetch_community_crime_data('/tmp/glm-tools')
n_samples, n_features = X.shape

########################################################
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a gaussian distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm.predict(X_test)
print ("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test))

########################################################
# Plot the true and predicted test set target values

plt.plot(y_test[:50], 'ko-')
plt.plot(y_test_hat[:50], 'ro-')
plt.legend(['true', 'pred'], frameon=False)
plt.xlabel('Counties')
Ejemplo n.º 12
0
def fit_group_lasso(
    X,
    knockoffs,
    y,
    groups,
    use_pyglm=True,
    y_dist=None,
    group_lasso=True,
    **kwargs,
):
    """ Fits a group lasso model.
	:param X: n x p design matrix
	:param knockoffs: n x p knockoff matrix
	:param groups: p length numpy array of groups
	:param use_pyglm: If true, use the pyglmnet grouplasso
	Else use the regular one
	:param y_dist: Either "gaussian" or "binomial" (for logistic regression)
	:param group_lasso: If False, do not use group regularization.
	:param kwargs: kwargs for group-lasso GroupLasso class.
	In particular includes reg_vals, a list of regularizations
	(lambda values) which defaults to [(0.05, 0.05)]. In each
	tuple of the list, the first value is the group regularization,
	the second value is the individual regularization.
	"""

    warnings.filterwarnings("ignore")

    # Parse some kwargs/defaults
    if "max_iter" in kwargs:
        max_iter = kwargs.pop("max_iter")
    else:
        max_iter = 100
    if "tol" in kwargs:
        tol = kwargs.pop("tol")
    else:
        tol = 1e-2
    if "cv" in kwargs:
        cv = kwargs.pop("cv")
    else:
        cv = 5
    if "learning_rate" in kwargs:
        learning_rate = kwargs.pop("learning_rate")
    else:
        learning_rate = 2
    if y_dist is None:
        y_dist = parse_y_dist(y)

    # Bind data
    n = X.shape[0]
    p = X.shape[1]
    features = np.concatenate([X, knockoffs], axis=1)

    # By default, all variables are their own group
    if groups is None:
        groups = np.arange(1, p + 1, 1)
    m = np.unique(groups).shape[0]

    # If m == p, meaning each variable is their own group,
    # just fit a regular lasso
    if m == p or not group_lasso:
        return fit_lasso(X, knockoffs, y, y_dist, **kwargs)

    # Make sure variables and their knockoffs are in the same group
    # This is necessary for antisymmetry
    doubled_groups = np.concatenate([groups, groups], axis=0)

    # Randomize coordinates to make sure everything is symmetric
    inds, rev_inds = random_permutation_inds(2 * p)
    features = features[:, inds]
    doubled_groups = doubled_groups[inds]

    # Standardize - important for pyglmnet performance,
    # highly detrimental for group_lasso performance
    if use_pyglm:
        features = (features - features.mean()) / features.std()
        if y_dist == "gaussian":
            y = (y - y.mean()) / y.std()

    # Get regularizations
    if "reg_vals" in kwargs:
        reg_vals = kwargs.pop("reg_vals")
    else:
        reg_vals = [(x, x) for x in DEFAULT_REG_VALS]

    # Fit pyglm model using warm starts
    if use_pyglm:

        l1_regs = [x[0] for x in reg_vals]

        gl = GLMCV(
            distr=y_dist,
            tol=tol,
            group=doubled_groups,
            alpha=1.0,
            learning_rate=learning_rate,
            max_iter=max_iter,
            reg_lambda=l1_regs,
            cv=cv,
            solver="cdfast",
        )
        gl.fit(features, y)

        # Pull score, rename
        best_score = -1 * calc_mse(gl, features, y)
        best_gl = gl

    # Fit model
    if not use_pyglm:
        best_gl = None
        best_score = -1 * np.inf
        for group_reg, l1_reg in reg_vals:

            # Fit logistic/gaussian group lasso
            if not use_pyglm:
                if y_dist.lower() == "gaussian":
                    gl = GroupLasso(
                        groups=doubled_groups,
                        tol=tol,
                        group_reg=group_reg,
                        l1_reg=l1_reg,
                        **kwargs,
                    )
                elif y_dist.lower() == "binomial":
                    gl = LogisticGroupLasso(
                        groups=doubled_groups,
                        tol=tol,
                        group_reg=group_reg,
                        l1_reg=l1_reg,
                        **kwargs,
                    )
                else:
                    raise ValueError(
                        f"y_dist must be one of gaussian, binomial, not {y_dist}"
                    )

                gl.fit(features, y.reshape(n, 1))
                score = -1 * calc_mse(gl, features, y.reshape(n, 1))

            # Score, possibly select
            if score > best_score:
                best_score = score
                best_gl = gl

    warnings.resetwarnings()

    return best_gl, inds, rev_inds
Ejemplo n.º 13
0
                                 normalize=True)
        else:
            model = LassoCV(max_iter=100000,
                            n_jobs=-1,
                            n_alphas=1000,
                            random_state=1,
                            cv=5)
        model.fit(train, labels["incidence"].fillna(0))
        result = model.predict(test)

        if using_elastic:
            print("Best L1_ratio: {}".format(model.l1_ratio_))

        print("Best Alpha: {}".format(model.alpha_))
    elif using_negbinomial:
        model = GLMCV(distr='neg-binomial', score_metric="pseudo_R2", cv=3)
        model.fit(train.values.copy(),
                  labels["incidence"].fillna(0).copy().values)
        result = model.predict(test.values)
    elif using_poisson:
        model = cvglmnet(x=train.values.copy(),
                         y=labels["incidence"].fillna(0).copy().values,
                         family='poisson',
                         alpha=1.0,
                         ptype="mse",
                         parallel=True,
                         nfolds=10)
        result = cvglmnetPredict(model,
                                 test.values,
                                 ptype='response',
                                 s="lambda_min")
Ejemplo n.º 14
0
    def fit(self, X, Y, get_history_terms=True):
        """
        Fits the model to the data in X to predict the response Y.

        Imports models and creates model instance as well.

        Parameters
        ----------
        X: float, n_samples x n_features, features of interest
        Y: float, n_samples x 1, population activity
        get_history_terms = Boolean. Whether to compute the temporal features.
                    Note that if spike_history and cov_history are False,
                    no history will be computed anyways and the flag does nothing.


        """
        if self.default_params:
            warnings.warn(
                '\n  Using default hyperparameters. Consider optimizing on' +
                ' a held-out dataset using, e.g. hyperopt or random search')

        # make the covariate matrix. Include spike or covariate history?
        # The different methods here are to satisfy the needs of recurrent keras
        # models
        if get_history_terms:
            if self.tunemodel == 'lstm':
                X, Y = self.get_all_with_history_keras(X, Y)
            else:
                X, Y = self.get_all_with_history(X, Y)

        if self.tunemodel == 'glm':
            model = GLMCV(**self.params)
            model.fit(X, Y)

            # we want the last of the regularization path
            # self.model = model[-1]
            self.GLMCV = model
            self.model = model.glm_

        elif self.tunemodel == 'feedforward_nn':

            if np.ndim(X) == 1:
                X = np.transpose(np.atleast_2d(X))

            params = self.params
            model = Sequential()
            model.add(
                Dense(params['n1'],
                      input_dim=np.shape(X)[1],
                      kernel_initializer='glorot_normal',
                      activation='relu',
                      kernel_regularizer=l2(params['l2'])))
            model.add(Dropout(params['dropout']))
            model.add(BatchNormalization())
            model.add(
                Dense(params['n2'],
                      kernel_initializer='glorot_normal',
                      activation='relu',
                      kernel_regularizer=l2(params['l2'])))
            model.add(BatchNormalization())
            model.add(Dense(1, activation='softplus'))
            optim = adam(lr=params['lr'],
                         clipnorm=params['clipnorm'],
                         decay=params['decay'],
                         beta_1=1 - params['b1'],
                         beta_2=1 - params['b2'])
            model.compile(
                loss='poisson',
                optimizer=optim,
            )
            hist = model.fit(X,
                             Y,
                             batch_size=128,
                             epochs=30,
                             verbose=self.verbose)

            self.model = model

        elif self.tunemodel == 'xgboost':

            dtrain = xgb.DMatrix(X, label=Y)
            num_round = 200
            self.model = xgb.train(self.params, dtrain, num_round)

        elif self.tunemodel == 'random_forest':

            self.model = RandomForestRegressor(**self.params)
            self.model.fit(X, Y)

        elif self.tunemodel == 'lstm':

            if np.ndim(X) == 1:
                X = np.transpose(np.atleast_2d(X))

            params = self.params
            model = Sequential()  #Declare model
            #Add recurrent layer
            model.add(LSTM(int(params['n_units']),input_shape=(X.shape[1],X.shape[2]),\
                           dropout_W=params['dropout'],dropout_U=params['dropout']))
            #Within recurrent layer, include dropout
            model.add(Dropout(params['dropout'])
                      )  #Dropout some units (recurrent layer output units)

            #Add dense connections to output layer
            model.add(Dense(1, activation='softplus'))

            #Fit model (and set fitting parameters)
            model.compile(loss='poisson',
                          optimizer='rmsprop',
                          metrics=['accuracy'])
            model.fit(X,
                      Y,
                      epochs=int(params['epochs']),
                      batch_size=int(params['batch_size']),
                      verbose=self.verbose)  #Fit the model

            self.model = model

        else:  #using predefined model
            self.model.fit(X, Y)
Ejemplo n.º 15
0
# Set up the training and testing sets

from sklearn.model_selection import train_test_split  # noqa

Xtrain, Xtest, ytrain, ytest = \
    train_test_split(X, y, test_size=0.2, random_state=42)

##########################################################
# Setup the models

# set up the group lasso GLM model
gl_glm = GLMCV(distr="binomial",
               tol=1e-3,
               group=group,
               score_metric="pseudo_R2",
               alpha=1.0,
               learning_rate=3,
               max_iter=100,
               cv=3,
               verbose=True)

# set up the lasso model
glm = GLMCV(distr="binomial",
            tol=1e-3,
            score_metric="pseudo_R2",
            alpha=1.0,
            learning_rate=3,
            max_iter=100,
            cv=3,
            verbose=True)
Ejemplo n.º 16
0
# Download and preprocess data files

X, y = datasets.fetch_community_crime_data('/tmp/glm-tools')
n_samples, n_features = X.shape

########################################################
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a gaussian distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm.predict(X_test)
print ("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test))

########################################################
# Now use plain grid search cv to compare

import numpy as np # noqa
from sklearn.model_selection import GridSearchCV # noqa
from sklearn.cross_validation import StratifiedKFold # noqa
Ejemplo n.º 17
0
# %%
plt.figure()
plt.plot(K)
plt.plot(kernel(theta_infer, pad), '-o')
plt.plot(kernel(theta0, pad), '--')

# %%
###############################################################################
###############################################################################
# %% test pyGLM
### This is super-easy if we rely on built-in GLM fitting code
glm = GLMCV(distr="binomial",
            tol=1e-3,
            score_metric="pseudo_R2",
            alpha=1.0,
            learning_rate=3,
            max_iter=100,
            cv=3,
            verbose=True)

glm.fit(X, np.squeeze(spk))

# %%
plt.figure()
pyglm_infer = glm.beta_
plt.plot(pyglm_infer / np.linalg.norm(pyglm_infer))
plt.plot(K / np.linalg.norm(K), '--')

# %% Two-neuron circuit with pyGLMnet
###############################################################################
###############################################################################
Ejemplo n.º 18
0
                                 normalize=True)
        else:
            model = LassoCV(max_iter=100000,
                            n_jobs=-1,
                            n_alphas=1000,
                            random_state=1,
                            cv=5)
        model.fit(train, labels["incidence"].fillna(0))
        result = model.predict(test)

        if using_elastic:
            print("Best L1_ratio: {}".format(model.l1_ratio_))

        print("Best Alpha: {}".format(model.alpha_))
    elif using_negbinomial:
        model = GLMCV(distr='neg-binomial', score_metric="pseudo_R2", cv=3)
        model.fit(train.values.copy(),
                  labels["incidence"].fillna(0).copy().values)
        result = model.predict(test.values)
    elif using_poisson:
        # Convert features into count based values
        train_converted = train.values.copy()
        incidence_converted = labels["incidence"].fillna(1).copy().values
        model = GLMCV(distr='poisson',
                      score_metric="deviance",
                      max_iter=5000,
                      cv=3)
        model.fit(train_converted, incidence_converted)
        result = model.predict(test.values)

    # Get the feature coefficients
Ejemplo n.º 19
0
# Set up the training and testing sets

from sklearn.cross_validation import train_test_split # noqa

X = df[df.columns.difference(["Label"])].values
y = df.loc[:, "Label"].values

Xtrain, Xtest, ytrain, ytest = \
    train_test_split(X, y, test_size=0.2, random_state=42)

##########################################################
# Setup the models

# set up the group lasso GLM model
gl_glm = GLMCV(distr="binomial", tol=1e-2,
               group=group_idxs, score_metric="pseudo_R2",
               alpha=1.0)


# set up the lasso model
glm = GLMCV(distr="binomial", tol=1e-2,
            score_metric="pseudo_R2", alpha=1.0)

print("gl_glm: ", gl_glm)
print("glm: ", glm)

##########################################################
# Fit models

gl_glm.fit(Xtrain, ytrain)
glm.fit(Xtrain, ytrain)