def test_compare_sklearn(solver): """Test results against sklearn.""" def rmse(a, b): return np.sqrt(np.mean((a - b) ** 2)) X, Y, coef_ = make_regression( n_samples=1000, n_features=500, noise=0.1, n_informative=10, coef=True, random_state=42) alpha = 0.1 l1_ratio = 0.5 clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, tol=1e-5) clf.fit(X, Y) glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=500) glm.fit(X, Y) y_sk = clf.predict(X) y_pg = glm.predict(X) assert abs(rmse(Y, y_sk) - rmse(Y, y_pg)) < 0.5 glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=5, fit_intercept=False) glm.fit(X, Y) assert glm.beta0_ == 0. glm.predict(X)
def test_api_input(): """Test that the input value of y can be of different types.""" random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5 X = state.normal(0, 1, (n_samples, n_features)) y = state.normal(0, 1, (n_samples, )) glm = GLM(distr='gaussian') # Test that ValueError is raised when the shapes mismatch with pytest.raises(ValueError): GLM().fit(X, y[3:]) # This would work without errors glm.fit(X, y) glm.predict(X) glm.score(X, y) glm.plot_convergence() glm = GLM(distr='gaussian', solver='test') with pytest.raises(ValueError, match="solver must be one of"): glm.fit(X, y) with pytest.raises(ValueError, match="fit_intercept must be"): glm = GLM(distr='gaussian', fit_intercept='blah') glm = GLM(distr='gaussian', max_iter=2) with pytest.warns(UserWarning, match='Reached max number of iterat'): glm.fit(X, y)
def glm_bernoulli_pyglmnet(Xr, Yr, Xt): #poissonexp isn't listed as an option for distr? #glm = GLM(distr='poissonexp', alpha=0., reg_lambda=[0.], tol=1e-6) glm = GLM(distr='binomial', alpha=0., reg_lambda=[0.], tol=1e-6) glm.fit(Xr, Yr) Yt = glm.predict(Xt)[0] return Yt
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate = 2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred = glm_mn.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.) glm_mn.score(y, yhat) assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2') assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_multinomial(): """Test all multinomial functionality""" glm = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm.grad_L2loss(beta[0], beta[1:], 0, X, y) assert grad_beta0[0] != grad_beta0[1] glm.fit(X, y) y_pred = glm.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm.pseudo_R2(y, yhat, ynull) > 0.) glm.deviance(y, yhat) assert_equal(len(glm.simulate(glm.fit_[0]['beta0'], glm.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception try: glm.pseudo_R2(y, y, y) assert False except Exception: assert True try: glm.deviance(y, y) assert False except Exception: assert True
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate=2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred = glm_mn.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.) glm_mn.score(y, yhat) assert_equal( len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2') assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['softplus', 'poisson', 'gaussian', 'binomial'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(X_train, y_train) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='softplus') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] solvers = ['batch-gradient', 'cdfast'] learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='poisson') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 10000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = np.random.rand() beta = sps.rand(n_features, 1, density=density).toarray() distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] for distr in distrs: # FIXME: why do we need such this learning rate for 'poissonexp'? learning_rate = 1e-5 if distr == 'poissonexp' else 1e-4 glm = GLM(distr, learning_rate=learning_rate) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-2]['beta'][:] assert_allclose(beta[:], beta_, atol=0.1) # check fit density_ = np.sum(beta_ > 0.1) / float(n_features) assert_allclose(density_, density, atol=0.05) # check density y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.deviance(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm = GLM(distr='poisson') assert_raises(ValueError, glm.__getitem__, 2) # test fit_predict glm.fit_predict(X_train, y_train) assert_raises(ValueError, glm.fit_predict, X_train[None, ...], y_train)
def test_api_input_types_y(): """Test that the input value of y can be of different types.""" random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5 X = state.normal(0, 1, (n_samples, n_features)) y = state.normal(0, 1, (n_samples, )) glm = GLM(distr='gaussian') # Test that a list will not work - the types have to be ndarray with pytest.raises(ValueError): glm.fit(X, list(y)) # Test that ValueError is raised when the shapes mismatch with pytest.raises(ValueError): GLM().fit(X, y[3:]) # This would work without errors glm.fit(X, y) glm.predict(X) glm.score(X, y)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(glm.distr, beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.beta_ assert_allclose(beta, beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape[0], X_train.shape[0]) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate=2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred = glm_mn.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn[-1].score(X, y) > 0.) assert_equal( len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # check that score is computed for sliced estimator scorelist = glm_mn[-1].score(X, y) assert_equal(scorelist.shape[0], 1) # check that score is computed for all lambdas scorelist = glm_mn.score(X, y) assert_equal(scorelist.shape[0], y_pred.shape[0])
train_y = simulate_glm("neg-binomial", beta0, beta, train_x) # plot the data distribution sns.set(color_codes=True) sns.distplot(train_y) plt.show() # Create the GLM and train it glm = GLM(distr="neg-binomial", max_iter=10000) glm.fit(train_x, train_y) # Print the betas and the beta0 to check for correctness print("") print(glm.beta0_) print(glm.beta_) print("") print(beta0) print(beta) # Generate test data # simulate testing data X_test = np.random.normal(0.0, 1.0, [1000, 10]) y_test = simulate_glm("poisson", beta0, beta, X_test) # predict using fitted model on the test data yhat_test = glm.predict(X_test) # score the model deviance = glm.score(X_test, y_test) print(deviance)
print(position_array.shape) pl.figure() for n in range(n_frames): pl.scatter(all_position[n, 0:4], all_position[n, 4:8], s=2, c='k') pl.show() # GLM glm = GLM(distr='gaussian', alpha=0.05) X = np.delete(all_position, 0, axis=1) y = all_position[:, 0] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) scaler = StandardScaler().fit(X_train) glm.fit(scaler.transform(X_train), y_train) yhat = glm.predict(scaler.transform(X)) # print(glm.score(X_test, Y_test)) # # plot pl.figure() pl.plot(y, marker='x', color='b', label='observed') pl.plot(yhat[9, :], marker='o', color='r', label='trained') pl.show()
def test_glmnet(distr, reg_lambda, fit_intercept, solver): """Test glmnet.""" raises(ValueError, GLM, distr='blah') raises(ValueError, GLM, distr='gaussian', max_iter=1.8) n_samples, n_features = 100, 10 # coefficients beta0 = 0. if fit_intercept: beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + int(fit_intercept)) * \ np.random.normal(0.0, 1.0, (n_features,)) score_metric = 'pseudo_R2' learning_rate = 2e-1 random_state = 0 betas_ = list() if not (distr == 'gamma' and solver == 'cdfast'): np.random.seed(random_state) theta = 1.0 X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta, sample=False) alpha = 0. loss_trace = list() eta = 2.0 group = None Tau = None def callback(beta): Tau = None loss_trace.append( _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, beta, fit_intercept=fit_intercept)) glm = GLM(distr, learning_rate=learning_rate, reg_lambda=reg_lambda, tol=1e-5, max_iter=5000, alpha=alpha, solver=solver, score_metric=score_metric, random_state=random_state, callback=callback, fit_intercept=fit_intercept, theta=theta) assert(repr(glm)) glm.fit(X_train, y_train) # verify loss decreases assert(np.all(np.diff(loss_trace) <= 1e-7)) # true loss and beta should be recovered when reg_lambda == 0 if reg_lambda == 0.: # verify loss at convergence = loss when beta=beta_ l_true = _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) # beta=beta_ when reg_lambda = 0. assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2) betas_.append(glm.beta_) y_pred = glm.predict(X_train) assert(y_pred.shape[0] == X_train.shape[0]) # compare all solvers pairwise to make sure they're close for i, first_beta in enumerate(betas_[:-1]): for second_beta in betas_[i + 1:]: assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def get_benchmarks(self, X_train, y_train, X_test, y_test): """ """ n_repeats = self.n_repeats distr = self.distr res = dict() for env in self.envs: res[env] = dict() if env == 'pyglmnet': # initialize model model = GLM(distr=distr, reg_lambda=[self.reg_lambda], alpha=self.alpha, solver='batch-gradient', score_metric='pseudo_R2') # fit-predict-score model.fit(X_train, y_train) y_test_hat = model[-1].predict(X_test) y_test_hat = np.squeeze(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'sklearn': if distr in ['gaussian', 'binomial']: # initialize model if distr == 'gaussian': model = ElasticNet(alpha=self.reg_lambda, l1_ratio=self.alpha) elif distr == 'binomial': model = SGDClassifier(loss='log', penalty='elasticnet', alpha=self.reg_lambda, l1_ratio=self.alpha) # fit-predict-score model.fit(X_train, y_train) y_test_hat = model.predict(X_test) res[env]['score'] = model.score(X_test, y_test) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 else: res[env]['score'] = -999. res[env]['time'] = -999. if env == 'statsmodels': # initialize model if distr == 'gaussian': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Gaussian()) elif distr == 'binomial': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Binomial()) elif distr == 'poisson': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Poisson()) # fit-predict-score statsmodels_res = model.fit() y_test_hat = model.predict(statsmodels_res.params, exog=sm.add_constant(X_test)) y_test_hat = np.array(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() statsmodels_res = model.fit() stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'R': # initialize model glmnet = importr('glmnet') predict = robjects.r('predict') # fit-predict-score try: fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) tmp = predict(fit, newx=X_test, s=0) y_test_hat = np.zeros(y_test.shape[0]) for i in range(y_test.shape[0]): y_test_hat[i] = tmp[i] if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 except: res[env]['score'] = -999. res[env]['time'] = -999. return res
def get_benchmarks(self, X_train, y_train, X_test, y_test): """ """ n_repeats = self.n_repeats distr = self.distr res = dict() for env in self.envs: res[env] = dict() if env == 'pyglmnet': # initialize model model = GLM(distr=distr, reg_lambda=[self.reg_lambda], alpha=self.alpha, solver='batch-gradient', score_metric='pseudo_R2') # fit-predict-score model.fit(X_train, y_train) y_test_hat = model[-1].predict(X_test) y_test_hat = np.squeeze(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'sklearn': if distr in ['gaussian', 'binomial']: # initialize model if distr == 'gaussian': model = ElasticNet(alpha=self.reg_lambda, l1_ratio=self.alpha) elif distr == 'binomial': model = SGDClassifier(loss='log', penalty='elasticnet', alpha=self.reg_lambda, l1_ratio=self.alpha) # fit-predict-score model.fit(X_train, y_train) y_test_hat = model.predict(X_test) res[env]['score'] = model.score(X_test, y_test) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 else: res[env]['score'] = -999. res[env]['time'] = -999. if env == 'statsmodels': # initialize model if distr == 'gaussian': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Gaussian()) elif distr == 'binomial': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Binomial()) elif distr == 'poisson': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Poisson()) # fit-predict-score statsmodels_res = model.fit() y_test_hat = model.predict(statsmodels_res.params, exog=sm.add_constant(X_test)) y_test_hat = np.array(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() statsmodels_res = model.fit() stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'R': # initialize model glmnet = importr('glmnet') predict = robjects.r('predict') # fit-predict-score try: fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) tmp = predict(fit, newx=X_test, s=0) y_test_hat = np.zeros(y_test.shape[0]) for i in range(y_test.shape[0]): y_test_hat[i] = tmp[i] if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 except Exception: res[env]['score'] = -999. res[env]['time'] = -999. return res
def test_glmnet(): """Test glmnet.""" raises(ValueError, GLM, distr='blah') raises(ValueError, GLM, distr='gaussian', max_iter=1.8) n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 random_state = 0 for distr in distrs: betas_ = list() for solver in solvers: np.random.seed(random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(distr, beta0, beta, X_train, sample=False) alpha = 0. reg_lambda = 0. loss_trace = list() def callback(beta): Tau = None eta = 2.0 group = None loss_trace.append( _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, group, beta)) glm = GLM(distr, learning_rate=learning_rate, reg_lambda=reg_lambda, tol=1e-3, max_iter=5000, alpha=alpha, solver=solver, score_metric=score_metric, random_state=random_state, callback=callback) assert (repr(glm)) glm.fit(X_train, y_train) # verify loss decreases assert (np.all(np.diff(loss_trace) <= 1e-7)) # verify loss at convergence = loss when beta=beta_ l_true = _loss(distr, 0., np.eye(beta.shape[0]), 0., X_train, y_train, 2.0, None, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) # beta=beta_ when reg_lambda = 0. assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2) betas_.append(glm.beta_) y_pred = glm.predict(X_train) assert (y_pred.shape[0] == X_train.shape[0]) # compare all solvers pairwise to make sure they're close for i, first_beta in enumerate(betas_[:-1]): for second_beta in betas_[i + 1:]: assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
######################################################## # **Fitting and predicting with a linear-Gaussian GLM** # # For a general linear model, the observed spikes can be # thought of an underlying parameter # :math:`\beta_0, \beta` that control the spiking. # # You can simply use linear Gaussian GLM with no regularization # to predict the spike counts. glm_lg = GLM(distr='gaussian', reg_lambda=0.0, score_metric='pseudo_R2') glm_lg.fit(Xdsgn, y) # predict spike counts ypred_lg = glm_lg.predict(Xdsgn) ######################################################## # **Fitting and predicting with a Poisson GLM** # # We can also assume that there is a non-linear function governing # the underlying the firing patterns. # In pyglmnet, we use an exponential inverse link function # for the Poisson distribution. glm_poisson = GLM(distr='poisson', alpha=0.05, learning_rate=1.0, score_metric='pseudo_R2', reg_lambda=1e-7) glm_poisson.fit(Xdsgn, y)