def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate=2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred = glm_mn.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.) glm_mn.score(y, yhat) assert_equal( len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2') assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate = 2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred = glm_mn.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.) glm_mn.score(y, yhat) assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2') assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_api_input(): """Test that the input value of y can be of different types.""" random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5 X = state.normal(0, 1, (n_samples, n_features)) y = state.normal(0, 1, (n_samples, )) glm = GLM(distr='gaussian') # Test that ValueError is raised when the shapes mismatch with pytest.raises(ValueError): GLM().fit(X, y[3:]) # This would work without errors glm.fit(X, y) glm.predict(X) glm.score(X, y) glm.plot_convergence() glm = GLM(distr='gaussian', solver='test') with pytest.raises(ValueError, match="solver must be one of"): glm.fit(X, y) with pytest.raises(ValueError, match="fit_intercept must be"): glm = GLM(distr='gaussian', fit_intercept='blah') glm = GLM(distr='gaussian', max_iter=2) with pytest.warns(UserWarning, match='Reached max number of iterat'): glm.fit(X, y)
def test_tikhonov(): """Tikhonov regularization test.""" n_samples, n_features = 100, 10 # design covariance matrix of parameters Gam = 15. PriorCov = np.zeros([n_features, n_features]) for i in np.arange(0, n_features): for j in np.arange(i, n_features): PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) * (np.float(i) - np.float(j)) ** 2) PriorCov[j, i] = PriorCov[i, j] if i == j: PriorCov[i, j] += 0.01 PriorCov = 1. / np.max(PriorCov) * PriorCov # sample parameters as multivariate normal beta0 = np.random.randn() beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov) # sample train and test data glm_sim = GLM(distr='softplus', score_metric='pseudo_R2') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) from sklearn.cross_validation import train_test_split Xtrain, Xtest, ytrain, ytest = \ train_test_split(X, y, test_size=0.5, random_state=42) # design tikhonov matrix [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False) Tau = np.dot(np.diag(1. / np.sqrt(S)), U) Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max() # fit model with batch gradient glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='batch-gradient', tol=1e-5, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest) # fit model with cdfast glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='cdfast', tol=1e-5, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
def test_tikhonov(): """Tikhonov regularization test.""" n_samples, n_features = 100, 10 # design covariance matrix of parameters Gam = 15. PriorCov = np.zeros([n_features, n_features]) for i in np.arange(0, n_features): for j in np.arange(i, n_features): PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) * (np.float(i) - np.float(j)) ** 2) PriorCov[j, i] = PriorCov[i, j] if i == j: PriorCov[i, j] += 0.01 PriorCov = 1. / np.max(PriorCov) * PriorCov # sample parameters as multivariate normal beta0 = np.random.randn() beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov) # sample train and test data glm_sim = GLM(distr='softplus', score_metric='pseudo_R2') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) from sklearn.model_selection import train_test_split Xtrain, Xtest, ytrain, ytest = \ train_test_split(X, y, test_size=0.5, random_state=42) # design tikhonov matrix [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False) Tau = np.dot(np.diag(1. / np.sqrt(S)), U) Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max() # fit model with batch gradient glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='batch-gradient', tol=1e-3, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest) # fit model with cdfast glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='cdfast', tol=1e-3, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['softplus', 'poisson', 'gaussian', 'binomial'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(X_train, y_train) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='softplus') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] solvers = ['batch-gradient', 'cdfast'] learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='poisson') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 10000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = np.random.rand() beta = sps.rand(n_features, 1, density=density).toarray() distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] learning_rate = 2e-1 for distr in distrs: glm = GLM(distr, learning_rate=learning_rate) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-2]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit density_ = np.sum(beta_ > 0.1) / float(n_features) assert_allclose(density_, density, atol=0.05) # check density y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='poisson') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate = 2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred_proba = glm_mn.predict_proba(X) assert_equal(y_pred_proba.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred_proba[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn[-1].score(X, y) > 0.) assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # check that score is computed for sliced estimator scorelist = glm_mn[-1].score(X, y) assert_equal(scorelist.shape[0], 1) # check that score is computed for all lambdas scorelist = glm_mn.score(X, y) assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
def test_api_input_types_y(): """Test that the input value of y can be of different types.""" random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5 X = state.normal(0, 1, (n_samples, n_features)) y = state.normal(0, 1, (n_samples, )) glm = GLM(distr='gaussian') # Test that a list will not work - the types have to be ndarray with pytest.raises(ValueError): glm.fit(X, list(y)) # Test that ValueError is raised when the shapes mismatch with pytest.raises(ValueError): GLM().fit(X, y[3:]) # This would work without errors glm.fit(X, y) glm.predict(X) glm.score(X, y)
def test_accuracy(): """Testing accuracy.""" n_samples, n_features, n_classes = 1000, 100, 2 beta0 = np.random.normal(0.0, 1.0, 1) beta = np.random.normal(0.0, 1.0, (n_features, n_classes)) # sample train and test data glm_sim = GLM(distr='binomial', score_metric='accuracy') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) y = np.argmax(y, axis=1) glm_sim.fit(X, y) score = glm_sim.score(X, y) assert_true(isinstance(score, float))
def test_deviance(): """Test deviance.""" n_samples, n_features = 1000, 100 beta0 = np.random.normal(0.0, 1.0, 1) beta = np.random.normal(0.0, 1.0, n_features) # sample train and test data glm_sim = GLM(score_metric='deviance') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) glm_sim.fit(X, y) score = glm_sim.score(X, y) assert_true(isinstance(score, float))
def test_pseudoR2(): """Test pseudo r2.""" n_samples, n_features = 1000, 100 beta0 = np.random.rand() beta = np.random.normal(0.0, 1.0, n_features) # sample train and test data glm_sim = GLM(score_metric='pseudo_R2') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) glm_sim.fit(X, y) score = glm_sim.score(X, y) assert (isinstance(score, float))
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate=2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred_proba = glm_mn.predict_proba(X) assert_equal(y_pred_proba.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred_proba[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn[-1].score(X, y) > 0.) assert_equal( len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # check that score is computed for sliced estimator scorelist = glm_mn[-1].score(X, y) assert_equal(scorelist.shape[0], 1) # check that score is computed for all lambdas scorelist = glm_mn.score(X, y) assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
######################################################## # Fit models from sklearn.cross_validation import train_test_split Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, spike_counts, test_size=0.2, random_state=42) ######################################################## from pyglmnet import utils n_samples = Xtrain.shape[0] Tau = utils.tikhonov_from_prior(prior_cov, n_samples) glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2') glm.fit(Xtrain, Ytrain) cvopt_lambda = glm.score(Xtest, Ytest).argmax() print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain)) print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest)) weights = glm[cvopt_lambda].fit_['beta'] ######################################################## # Visualize for time_bin_ in range(n_temporal_basis): RF = strf_model.make_image_from_spatial_basis(spatial_basis, weights[range(time_bin_, n_spatial_basis * n_temporal_basis, n_temporal_basis)]) plt.subplot(1, n_temporal_basis, time_bin_+1) plt.imshow(RF, cmap='Blues', interpolation='none')
train_y = simulate_glm("neg-binomial", beta0, beta, train_x) # plot the data distribution sns.set(color_codes=True) sns.distplot(train_y) plt.show() # Create the GLM and train it glm = GLM(distr="neg-binomial", max_iter=10000) glm.fit(train_x, train_y) # Print the betas and the beta0 to check for correctness print("") print(glm.beta0_) print(glm.beta_) print("") print(beta0) print(beta) # Generate test data # simulate testing data X_test = np.random.normal(0.0, 1.0, [1000, 10]) y_test = simulate_glm("poisson", beta0, beta, X_test) # predict using fitted model on the test data yhat_test = glm.predict(X_test) # score the model deviance = glm.score(X_test, y_test) print(deviance)
def get_benchmarks(self, X_train, y_train, X_test, y_test): """ """ n_repeats = self.n_repeats distr = self.distr res = dict() for env in self.envs: res[env] = dict() if env == 'pyglmnet': # initialize model model = GLM(distr=distr, reg_lambda=[self.reg_lambda], alpha=self.alpha, solver='batch-gradient', score_metric='pseudo_R2') # fit-predict-score model.fit(X_train, y_train) y_test_hat = model[-1].predict(X_test) y_test_hat = np.squeeze(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'sklearn': if distr in ['gaussian', 'binomial']: # initialize model if distr == 'gaussian': model = ElasticNet(alpha=self.reg_lambda, l1_ratio=self.alpha) elif distr == 'binomial': model = SGDClassifier(loss='log', penalty='elasticnet', alpha=self.reg_lambda, l1_ratio=self.alpha) # fit-predict-score model.fit(X_train, y_train) y_test_hat = model.predict(X_test) res[env]['score'] = model.score(X_test, y_test) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 else: res[env]['score'] = -999. res[env]['time'] = -999. if env == 'statsmodels': # initialize model if distr == 'gaussian': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Gaussian()) elif distr == 'binomial': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Binomial()) elif distr == 'poisson': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Poisson()) # fit-predict-score statsmodels_res = model.fit() y_test_hat = model.predict(statsmodels_res.params, exog=sm.add_constant(X_test)) y_test_hat = np.array(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() statsmodels_res = model.fit() stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'R': # initialize model glmnet = importr('glmnet') predict = robjects.r('predict') # fit-predict-score try: fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) tmp = predict(fit, newx=X_test, s=0) y_test_hat = np.zeros(y_test.shape[0]) for i in range(y_test.shape[0]): y_test_hat[i] = tmp[i] if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 except: res[env]['score'] = -999. res[env]['time'] = -999. return res
def get_benchmarks(self, X_train, y_train, X_test, y_test): """ """ n_repeats = self.n_repeats distr = self.distr res = dict() for env in self.envs: res[env] = dict() if env == 'pyglmnet': # initialize model model = GLM(distr=distr, reg_lambda=[self.reg_lambda], alpha=self.alpha, solver='batch-gradient', score_metric='pseudo_R2') # fit-predict-score model.fit(X_train, y_train) y_test_hat = model[-1].predict(X_test) y_test_hat = np.squeeze(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'sklearn': if distr in ['gaussian', 'binomial']: # initialize model if distr == 'gaussian': model = ElasticNet(alpha=self.reg_lambda, l1_ratio=self.alpha) elif distr == 'binomial': model = SGDClassifier(loss='log', penalty='elasticnet', alpha=self.reg_lambda, l1_ratio=self.alpha) # fit-predict-score model.fit(X_train, y_train) y_test_hat = model.predict(X_test) res[env]['score'] = model.score(X_test, y_test) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 else: res[env]['score'] = -999. res[env]['time'] = -999. if env == 'statsmodels': # initialize model if distr == 'gaussian': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Gaussian()) elif distr == 'binomial': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Binomial()) elif distr == 'poisson': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Poisson()) # fit-predict-score statsmodels_res = model.fit() y_test_hat = model.predict(statsmodels_res.params, exog=sm.add_constant(X_test)) y_test_hat = np.array(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() statsmodels_res = model.fit() stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'R': # initialize model glmnet = importr('glmnet') predict = robjects.r('predict') # fit-predict-score try: fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) tmp = predict(fit, newx=X_test, s=0) y_test_hat = np.zeros(y_test.shape[0]) for i in range(y_test.shape[0]): y_test_hat[i] = tmp[i] if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 except Exception: res[env]['score'] = -999. res[env]['time'] = -999. return res
reg_lambda=np.logspace(np.log(100), np.log(0.01), 5, base=np.exp(1))) print("gl_glm: ", gl_glm) print("glm: ", glm) ########################################################## # Fit models gl_glm.fit(Xtrain, ytrain) glm.fit(Xtrain, ytrain) ########################################################## # Visualize model scores on test set plt.figure() plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtest, ytest), 'go-') plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtrain, ytrain), 'go--') plt.semilogx(glm.reg_lambda, glm.score(Xtest, ytest), 'ro-') plt.semilogx(glm.reg_lambda, glm.score(Xtrain, ytrain), 'ro--') plt.legend( ['Group Lasso: test', 'Group Lasso: train', 'Lasso: test', 'Lasso: train'], frameon=False, loc='best') plt.xlabel('$\lambda$') plt.ylabel('pseudo-$R^2$') plt.ylim([-0.1, 0.7]) plt.tick_params(axis='y', right='off') plt.tick_params(axis='x', top='off') ax = plt.gca() ax.spines['top'].set_visible(False)
from sklearn.cross_validation import train_test_split Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, spike_counts, test_size=0.2, random_state=42) ######################################################## from pyglmnet import utils n_samples = Xtrain.shape[0] Tau = utils.tikhonov_from_prior(prior_cov, n_samples) glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2') glm.fit(Xtrain, Ytrain) cvopt_lambda = glm.score(Xtest, Ytest).argmax() print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain)) print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest)) weights = glm[cvopt_lambda].fit_['beta'] ######################################################## # Visualize for time_bin_ in range(n_temporal_basis): RF = strf_model.make_image_from_spatial_basis( spatial_basis, weights[range(time_bin_, n_spatial_basis * n_temporal_basis, n_temporal_basis)]) plt.subplot(1, n_temporal_basis, time_bin_ + 1) plt.imshow(RF, cmap='Blues', interpolation='none')
markerline.set_markerfacecolor('none') plt.plot(t_sample, ypred_lg[sample_idx], color='gold', linewidth=2, label='lgGLM with offset') plt.plot(t_sample, ypred_poisson[sample_idx], color='green', linewidth=2, label='poissonGLM') plt.plot(t_sample, ypred_poisson_hist[sample_idx], color='red', linewidth=2, label='poissonGLM_hist') plt.xlim([0., tmax]) plt.title('Spike count prediction') plt.xlabel('Time (sec)') plt.ylabel('Binned Spike Counts') plt.legend() plt.show() # print scores of all the fitted models print('Training perf (R^2): lin-gauss GLM, w/ offset: {:.2f}'.format( glm_lg.score(Xdsgn, y))) print('Training perf (R^2): Pyglmnet possion GLM {:.2f}'.format( glm_poisson.score(Xdsgn, y))) print('Training perf (R^2): Pyglmnet poisson GLM w/ spikes history {:.2f}'. format(glm_poisson_hist.score(Xdsgn_hist, y)))