def test_group_lasso(): """Group Lasso test.""" n_samples, n_features = 100, 90 # assign group ids groups = np.zeros(90) groups[0:29] = 1 groups[30:59] = 2 groups[60:] = 3 # sample random coefficients beta0 = np.random.normal(0.0, 1.0, 1) beta = np.random.normal(0.0, 1.0, n_features) beta[groups == 2] = 0. # create an instance of the GLM class glm_group = GLM(distr='softplus', alpha=1.) # simulate training data Xr = np.random.normal(0.0, 1.0, [n_samples, n_features]) yr = simulate_glm(glm_group.distr, beta0, beta, Xr) # scale and fit scaler = StandardScaler().fit(Xr) glm_group.fit(scaler.transform(Xr), yr)
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate = 2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred_proba = glm_mn.predict_proba(X) assert_equal(y_pred_proba.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred_proba[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn[-1].score(X, y) > 0.) assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # check that score is computed for sliced estimator scorelist = glm_mn[-1].score(X, y) assert_equal(scorelist.shape[0], 1) # check that score is computed for all lambdas scorelist = glm_mn.score(X, y) assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
def test_compare_sklearn(solver): """Test results against sklearn.""" def rmse(a, b): return np.sqrt(np.mean((a - b) ** 2)) X, Y, coef_ = make_regression( n_samples=1000, n_features=500, noise=0.1, n_informative=10, coef=True, random_state=42) alpha = 0.1 l1_ratio = 0.5 clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, tol=1e-5) clf.fit(X, Y) glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=500) glm.fit(X, Y) y_sk = clf.predict(X) y_pg = glm.predict(X) assert abs(rmse(Y, y_sk) - rmse(Y, y_pg)) < 0.5 glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha, solver=solver, tol=1e-6, max_iter=5, fit_intercept=False) glm.fit(X, Y) assert glm.beta0_ == 0. glm.predict(X)
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate = 2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred = glm_mn.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.) glm_mn.score(y, yhat) assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2') assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_multinomial(): """Test all multinomial functionality""" glm = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm.grad_L2loss(beta[0], beta[1:], 0, X, y) assert grad_beta0[0] != grad_beta0[1] glm.fit(X, y) y_pred = glm.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm.pseudo_R2(y, yhat, ynull) > 0.) glm.deviance(y, yhat) assert_equal(len(glm.simulate(glm.fit_[0]['beta0'], glm.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception try: glm.pseudo_R2(y, y, y) assert False except Exception: assert True try: glm.deviance(y, y) assert False except Exception: assert True
def glm_bernoulli_pyglmnet(Xr, Yr, Xt): #poissonexp isn't listed as an option for distr? #glm = GLM(distr='poissonexp', alpha=0., reg_lambda=[0.], tol=1e-6) glm = GLM(distr='binomial', alpha=0., reg_lambda=[0.], tol=1e-6) glm.fit(Xr, Yr) Yt = glm.predict(Xt)[0] return Yt
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate=2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred = glm_mn.predict(X) assert_equal(y_pred.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.) glm_mn.score(y, yhat) assert_equal( len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # these should raise an exception assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2') assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
def test_tikhonov(): """Tikhonov regularization test.""" n_samples, n_features = 100, 10 # design covariance matrix of parameters Gam = 15. PriorCov = np.zeros([n_features, n_features]) for i in np.arange(0, n_features): for j in np.arange(i, n_features): PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) * (np.float(i) - np.float(j)) ** 2) PriorCov[j, i] = PriorCov[i, j] if i == j: PriorCov[i, j] += 0.01 PriorCov = 1. / np.max(PriorCov) * PriorCov # sample parameters as multivariate normal beta0 = np.random.randn() beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov) # sample train and test data glm_sim = GLM(distr='softplus', score_metric='pseudo_R2') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) from sklearn.cross_validation import train_test_split Xtrain, Xtest, ytrain, ytest = \ train_test_split(X, y, test_size=0.5, random_state=42) # design tikhonov matrix [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False) Tau = np.dot(np.diag(1. / np.sqrt(S)), U) Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max() # fit model with batch gradient glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='batch-gradient', tol=1e-5, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest) # fit model with cdfast glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='cdfast', tol=1e-5, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
def test_tikhonov(): """Tikhonov regularization test.""" n_samples, n_features = 100, 10 # design covariance matrix of parameters Gam = 15. PriorCov = np.zeros([n_features, n_features]) for i in np.arange(0, n_features): for j in np.arange(i, n_features): PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) * (np.float(i) - np.float(j)) ** 2) PriorCov[j, i] = PriorCov[i, j] if i == j: PriorCov[i, j] += 0.01 PriorCov = 1. / np.max(PriorCov) * PriorCov # sample parameters as multivariate normal beta0 = np.random.randn() beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov) # sample train and test data glm_sim = GLM(distr='softplus', score_metric='pseudo_R2') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) from sklearn.model_selection import train_test_split Xtrain, Xtest, ytrain, ytest = \ train_test_split(X, y, test_size=0.5, random_state=42) # design tikhonov matrix [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False) Tau = np.dot(np.diag(1. / np.sqrt(S)), U) Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max() # fit model with batch gradient glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='batch-gradient', tol=1e-3, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest) # fit model with cdfast glm_tikhonov = GLM(distr='softplus', alpha=0.0, Tau=Tau, solver='cdfast', tol=1e-3, score_metric='pseudo_R2') glm_tikhonov.fit(Xtrain, ytrain) R2_train, R2_test = dict(), dict() R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain) R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['softplus', 'poisson', 'gaussian', 'binomial'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(X_train, y_train) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='softplus') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] solvers = ['batch-gradient', 'cdfast'] learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='poisson') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_cv(): """Simple CV check""" X, y = make_regression() model_mn = GLM(distr='normal', alpha=0.01, reg_lambda=np.array([0.0, 0.1, 0.2])) model_mn.fit(X, y) cv = KFold(X.shape[0], 5) # check that it returns 5 scores assert_equal(len(cross_val_score(model_mn, X, y, cv=cv, scoring=simple_cv_scorer)), 5)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 10000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = np.random.rand() beta = sps.rand(n_features, 1, density=density).toarray() distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] for distr in distrs: # FIXME: why do we need such this learning rate for 'poissonexp'? learning_rate = 1e-5 if distr == 'poissonexp' else 1e-4 glm = GLM(distr, learning_rate=learning_rate) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-2]['beta'][:] assert_allclose(beta[:], beta_, atol=0.1) # check fit density_ = np.sum(beta_ > 0.1) / float(n_features) assert_allclose(density_, density, atol=0.05) # check density y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.deviance(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm = GLM(distr='poisson') assert_raises(ValueError, glm.__getitem__, 2) # test fit_predict glm.fit_predict(X_train, y_train) assert_raises(ValueError, glm.fit_predict, X_train[None, ...], y_train)
def test_cv(): """Simple CV check""" # XXX: don't use scikit-learn for tests. X, y = make_regression() glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=[0.0, 0.1, 0.2]) glm_normal.fit(X, y) cv = KFold(X.shape[0], 5) # check that it returns 5 scores assert_equal(len(cross_val_score(glm_normal, X, y, cv=cv, scoring=simple_cv_scorer)), 5)
def test_api_input(): """Test that the input value of y can be of different types.""" random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5 X = state.normal(0, 1, (n_samples, n_features)) y = state.normal(0, 1, (n_samples, )) glm = GLM(distr='gaussian') # Test that a list will not work - the types have to be ndarray with pytest.raises(ValueError): glm.fit(X, list(y)) # Test that ValueError is raised when the shapes mismatch with pytest.raises(ValueError): GLM().fit(X, y[3:]) # This would work without errors glm.fit(X, y) glm.predict(X) glm.score(X, y) glm = GLM(distr='gaussian', solver='test') with pytest.raises(ValueError, match="solver must be one of"): glm.fit(X, y) with pytest.raises(ValueError, match="fit_intercept must be"): glm = GLM(distr='gaussian', fit_intercept='blah') glm = GLM(distr='gaussian', max_iter=2) with pytest.warns(UserWarning, match='Reached max number of iterat'): glm.fit(X, y)
def test_cv(): """Simple CV check.""" # XXX: don't use scikit-learn for tests. X, y = make_regression() glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1) glm_normal.fit(X, y) cv = KFold(X.shape[0], 5) # check that it returns 5 scores assert_equal(len(cross_val_score(glm_normal, X, y, cv=cv, scoring=simple_cv_scorer)), 5)
def test_pseudoR2(): """Test pseudo r2.""" n_samples, n_features = 1000, 100 beta0 = np.random.rand() beta = np.random.normal(0.0, 1.0, n_features) # sample train and test data glm_sim = GLM(score_metric='pseudo_R2') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) glm_sim.fit(X, y) score = glm_sim.score(X, y) assert (isinstance(score, float))
def test_deviance(): """Test deviance.""" n_samples, n_features = 1000, 100 beta0 = np.random.normal(0.0, 1.0, 1) beta = np.random.normal(0.0, 1.0, n_features) # sample train and test data glm_sim = GLM(score_metric='deviance') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) glm_sim.fit(X, y) score = glm_sim.score(X, y) assert_true(isinstance(score, float))
def test_accuracy(): """Testing accuracy.""" n_samples, n_features, n_classes = 1000, 100, 2 beta0 = np.random.normal(0.0, 1.0, 1) beta = np.random.normal(0.0, 1.0, (n_features, n_classes)) # sample train and test data glm_sim = GLM(distr='binomial', score_metric='accuracy') X = np.random.randn(n_samples, n_features) y = simulate_glm(glm_sim.distr, beta0, beta, X) y = np.argmax(y, axis=1) glm_sim.fit(X, y) score = glm_sim.score(X, y) assert_true(isinstance(score, float))
def test_group_lasso(): """Group Lasso test.""" n_samples, n_features = 100, 90 # assign group ids groups = np.zeros(90) groups[0:29] = 1 groups[30:59] = 2 groups[60:] = 3 # sample random coefficients beta0 = np.random.normal(0.0, 1.0, 1) beta = np.random.normal(0.0, 1.0, n_features) beta[groups == 2] = 0. # create an instance of the GLM class glm_group = GLM(distr='softplus', alpha=1., reg_lambda=0.2, group=groups) # simulate training data np.random.seed(glm_group.random_state) Xr = np.random.normal(0.0, 1.0, [n_samples, n_features]) yr = simulate_glm(glm_group.distr, beta0, beta, Xr) # scale and fit scaler = StandardScaler().fit(Xr) glm_group.fit(scaler.transform(Xr), yr) # count number of nonzero coefs for each group. # in each group, coef must be [all nonzero] or [all zero]. beta = glm_group.beta_ group_ids = np.unique(groups) for group_id in group_ids: if group_id == 0: continue target_beta = beta[groups == group_id] n_nonzero = (target_beta != 0.0).sum() assert n_nonzero in (len(target_beta), 0) # one of the groups must be [all zero] assert np.any([ beta[groups == group_id].sum() == 0 for group_id in group_ids if group_id != 0 ])
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(glm.distr, beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.beta_ assert_allclose(beta, beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape[0], X_train.shape[0]) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" glm = GLM(distr='poisson') scaler = StandardScaler() n_samples, n_features = 10000, 100 density = 0.1 # coefficients beta0 = np.random.rand() beta = sps.rand(n_features, 1, density=density).toarray() X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_params[-2]['beta'][:] assert_allclose(beta[:], beta_, atol=0.1) # check fit density_ = np.sum(beta_ > 0.1) / float(n_features) assert_allclose(density_, density, atol=0.02) # check density
def test_multinomial(): """Test all multinomial functionality""" glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), learning_rate=2e-1, tol=1e-10) X = np.array([[-1, -2, -3], [4, 5, 6]]) y = np.array([1, 0]) # test gradient beta = np.zeros([4, 2]) grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y) assert_true(grad_beta0[0] != grad_beta0[1]) glm_mn.fit(X, y) y_pred_proba = glm_mn.predict_proba(X) assert_equal(y_pred_proba.shape, (3, X.shape[0], 2)) # n_lambdas x n_samples x n_classes # pick one as yhat yhat = y_pred_proba[0] # uniform prediction ynull = np.ones(yhat.shape) / yhat.shape[1] # pseudo_R2 should be greater than 0 assert_true(glm_mn[-1].score(X, y) > 0.) assert_equal( len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'], X)), X.shape[0]) # check that score is computed for sliced estimator scorelist = glm_mn[-1].score(X, y) assert_equal(scorelist.shape[0], 1) # check that score is computed for all lambdas scorelist = glm_mn.score(X, y) assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
def test_api_input_types_y(): """Test that the input value of y can be of different types.""" random_state = 1 state = np.random.RandomState(random_state) n_samples, n_features = 100, 5 X = state.normal(0, 1, (n_samples, n_features)) y = state.normal(0, 1, (n_samples, )) glm = GLM(distr='gaussian') # Test that a list will not work - the types have to be ndarray with pytest.raises(ValueError): glm.fit(X, list(y)) # Test that ValueError is raised when the shapes mismatch with pytest.raises(ValueError): GLM().fit(X, y[3:]) # This would work without errors glm.fit(X, y) glm.predict(X) glm.score(X, y)
n_samples, n_features = X.shape ######################################################## # Split the data into training and test sets X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.33, random_state=0) ######################################################## # Fit a gaussian distributed GLM with elastic net regularization # use the default value for reg_lambda glm = GLM(distr='gaussian', alpha=0.05, score_metric='pseudo_R2') # fit model glm.fit(X_train, y_train) # score the test set prediction y_test_hat = glm[-1].predict(X_test) print("test set pseudo $R^2$ = %f" % glm[-1].score(X_test, y_test)) ######################################################## # Plot the true and predicted test set target values plt.plot(y_test[:50], 'ko-') plt.plot(y_test_hat[:50], 'ro-') plt.legend(['true', 'pred'], frameon=False) plt.xlabel('Counties') plt.ylabel('Per capita violent crime') plt.tick_params(axis='y', right='off')
def get_benchmarks(self, X_train, y_train, X_test, y_test): """ """ n_repeats = self.n_repeats distr = self.distr res = dict() for env in self.envs: res[env] = dict() if env == 'pyglmnet': # initialize model model = GLM(distr=distr, reg_lambda=[self.reg_lambda], alpha=self.alpha, solver='batch-gradient', score_metric='pseudo_R2') # fit-predict-score model.fit(X_train, y_train) y_test_hat = model[-1].predict(X_test) y_test_hat = np.squeeze(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'sklearn': if distr in ['gaussian', 'binomial']: # initialize model if distr == 'gaussian': model = ElasticNet(alpha=self.reg_lambda, l1_ratio=self.alpha) elif distr == 'binomial': model = SGDClassifier(loss='log', penalty='elasticnet', alpha=self.reg_lambda, l1_ratio=self.alpha) # fit-predict-score model.fit(X_train, y_train) y_test_hat = model.predict(X_test) res[env]['score'] = model.score(X_test, y_test) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 else: res[env]['score'] = -999. res[env]['time'] = -999. if env == 'statsmodels': # initialize model if distr == 'gaussian': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Gaussian()) elif distr == 'binomial': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Binomial()) elif distr == 'poisson': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Poisson()) # fit-predict-score statsmodels_res = model.fit() y_test_hat = model.predict(statsmodels_res.params, exog=sm.add_constant(X_test)) y_test_hat = np.array(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() statsmodels_res = model.fit() stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'R': # initialize model glmnet = importr('glmnet') predict = robjects.r('predict') # fit-predict-score try: fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) tmp = predict(fit, newx=X_test, s=0) y_test_hat = np.zeros(y_test.shape[0]) for i in range(y_test.shape[0]): y_test_hat[i] = tmp[i] if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 except: res[env]['score'] = -999. res[env]['time'] = -999. return res
def get_benchmarks(self, X_train, y_train, X_test, y_test): """ """ n_repeats = self.n_repeats distr = self.distr res = dict() for env in self.envs: res[env] = dict() if env == 'pyglmnet': # initialize model model = GLM(distr=distr, reg_lambda=[self.reg_lambda], alpha=self.alpha, solver='batch-gradient', score_metric='pseudo_R2') # fit-predict-score model.fit(X_train, y_train) y_test_hat = model[-1].predict(X_test) y_test_hat = np.squeeze(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'sklearn': if distr in ['gaussian', 'binomial']: # initialize model if distr == 'gaussian': model = ElasticNet(alpha=self.reg_lambda, l1_ratio=self.alpha) elif distr == 'binomial': model = SGDClassifier(loss='log', penalty='elasticnet', alpha=self.reg_lambda, l1_ratio=self.alpha) # fit-predict-score model.fit(X_train, y_train) y_test_hat = model.predict(X_test) res[env]['score'] = model.score(X_test, y_test) # time tmp = list() for r in range(n_repeats): start = time.time() model.fit(X_train, y_train) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 else: res[env]['score'] = -999. res[env]['time'] = -999. if env == 'statsmodels': # initialize model if distr == 'gaussian': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Gaussian()) elif distr == 'binomial': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Binomial()) elif distr == 'poisson': model = sm.GLM(y_train, sm.add_constant(X_train), family=sm.families.Poisson()) # fit-predict-score statsmodels_res = model.fit() y_test_hat = model.predict(statsmodels_res.params, exog=sm.add_constant(X_test)) y_test_hat = np.array(y_test_hat) if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() statsmodels_res = model.fit() stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 if env == 'R': # initialize model glmnet = importr('glmnet') predict = robjects.r('predict') # fit-predict-score try: fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) tmp = predict(fit, newx=X_test, s=0) y_test_hat = np.zeros(y_test.shape[0]) for i in range(y_test.shape[0]): y_test_hat[i] = tmp[i] if distr in ['gaussian', 'poisson']: res[env]['score'] = \ r2_score(y_test, y_test_hat) elif distr == 'binomial': res[env]['score'] = \ accuracy_score(y_test, (y_test_hat > 0.5).astype(int)) # time tmp = list() for r in range(n_repeats): start = time.time() fit = glmnet.glmnet(X_train, y_train, family=distr, alpha=self.alpha, nlambda=1) stop = time.time() tmp.append(stop - start) res[env]['time'] = np.min(tmp) * 1e3 except Exception: res[env]['score'] = -999. res[env]['time'] = -999. return res
######################################################## from sklearn.datasets import make_classification X, y = make_classification(n_samples=10000, n_classes=5, n_informative=100, n_features=100, n_redundant=0) ######################################################## ######################################################## # Fit the model ######################################################## ######################################################## from pyglmnet import GLM glm_mn = GLM(distr='multinomial', alpha=0.01, reg_lambda=np.array([0.02, 0.01]), verbose=False) glm_mn.threshold = 1e-5 glm_mn.fit(X, y) ######################################################## ######################################################## # Predict and score the output ######################################################## y_pred = glm_mn[-1].predict(X) print('Percentage correct = %f percent.' % (y_pred == y).mean()) ########################################################
n_samples, n_features = X.shape ######################################################## # Split the data into training and test sets X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.33, random_state=0) ######################################################## # Fit a gaussian distributed GLM with elastic net regularization # use the default value for reg_lambda glm = GLM(distr='gaussian', alpha=0.05, score_metric='pseudo_R2') # fit model glm.fit(X_train, y_train) # score the test set prediction y_test_hat = glm[-1].predict(X_test) print ("test set pseudo $R^2$ = %f" % glm[-1].score(X_test, y_test)) ######################################################## # Plot the true and predicted test set target values plt.plot(y_test[:50], 'ko-') plt.plot(y_test_hat[:50], 'ro-') plt.legend(['true', 'pred'], frameon=False) plt.xlabel('Counties') plt.ylabel('Per capita violent crime') plt.tick_params(axis='y', right='off')
Xr = np.random.normal(0.0, 1.0, [n_samples, n_features]) yr = glm_poisson.simulate(beta0, beta, Xr) # testing data Xt = np.random.normal(0.0, 1.0, [n_samples, n_features]) yt = glm_poisson.simulate(beta0, beta, Xt) ########################################################## # Fit the model # ^^^^^^^^^^^^^ # Fitting the model is accomplished by a single GLM method called `fit()`. ########################################################## scaler = StandardScaler().fit(Xr) glm_poisson.fit(scaler.transform(Xr), yr) ########################################################## # Slicing the model object # ^^^^^^^^^^^^^^^^^^^^^^^^ # Although the model is fit to all values of reg_lambda specified by a regularization # path, often we are only interested in further analysis for a particular value of # ``reg_lambda``. We can easily do this by slicing the object. # # For instance ``model[0]`` returns an object identical to model but with ``.fit_`` # as a dictionary corresponding to the estimated coefficients for ``reg_lambda[0]``. ########################################################## # Visualize the fit coefficients # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # The estimated coefficients are stored in an instance variable called ``.fit_``
beta0 = np.random.normal(0.0, 1.0, 1).flatten() beta = sps.rand(10, 1, 1) beta = np.array(beta.todense()).flatten() # Generate random training data by using the previous betas train_x = np.random.normal(0.0, 1.0, [10000, 10]) train_y = simulate_glm("neg-binomial", beta0, beta, train_x) # plot the data distribution sns.set(color_codes=True) sns.distplot(train_y) plt.show() # Create the GLM and train it glm = GLM(distr="neg-binomial", max_iter=10000) glm.fit(train_x, train_y) # Print the betas and the beta0 to check for correctness print("") print(glm.beta0_) print(glm.beta_) print("") print(beta0) print(beta) # Generate test data # simulate testing data X_test = np.random.normal(0.0, 1.0, [1000, 10]) y_test = simulate_glm("poisson", beta0, beta, X_test) # predict using fitted model on the test data
# %% import statsmodels.api as sm mod = sm.GLM(df['cnt'] / df['offset'], df[np.arange(10)], family=sm.families.Poisson()) mod = mod.fit() mod.summary() # %% from pyglmnet import GLM # create an instance of the GLM class glm = GLM(distr='poisson') glm = glm.fit(df[np.arange(10)].values, df['cnt'].values/df['offset'].values) glm # %% glm.get_params() # %% import keras inl = keras.layers.Input((10,)) out = keras.layers.Dense(1, use_bias=False)(inl) out = keras.layers.Lambda(lambda x: keras.backend.exp(x))(out) model = keras.models.Model(inl, out) model.compile(keras.optimizers.Adam(1e-3), 'poisson') model.summary()
# training data Xr = np.random.normal(0.0, 1.0, [n_samples, n_features]) yr = glm_poissonexp.simulate(beta0, beta, Xr) # testing data Xt = np.random.normal(0.0, 1.0, [n_samples, n_features]) yt = glm_poissonexp.simulate(beta0, beta, Xt) ######################################################## # Fit model to training data ######################################################## scaler = StandardScaler().fit(Xr) glm_poissonexp.fit(scaler.transform(Xr),yr); ######################################################## # Use one model to predict ######################################################## m = glm_poissonexp[-1] this_model_param = m.fit_ yrhat = m.predict(scaler.transform(Xr)) ythat = m.predict(scaler.transform(Xt)) ######################################################## # Visualize predicted output ########################################################
n_features=100, n_redundant=0) ######################################################## ######################################################## # Fit the model ######################################################## ######################################################## from pyglmnet import GLM glm_mn = GLM(distr='multinomial', alpha=0.01, reg_lambda=np.array([0.02, 0.01]), verbose=False) glm_mn.threshold = 1e-5 glm_mn.fit(X, y) ######################################################## ######################################################## # Predict and score the output ######################################################## y_pred = glm_mn[-1].predict(X).argmax(axis=1) print('Percentage correct = %f percent.' % (y_pred == y).mean()) ########################################################
def test_glmnet(): """Test glmnet.""" raises(ValueError, GLM, distr='blah') raises(ValueError, GLM, distr='gaussian', max_iter=1.8) n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 random_state = 0 for distr in distrs: betas_ = list() for solver in solvers: np.random.seed(random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(distr, beta0, beta, X_train, sample=False) alpha = 0. reg_lambda = 0. loss_trace = list() def callback(beta): Tau = None eta = 2.0 group = None loss_trace.append( _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, group, beta)) glm = GLM(distr, learning_rate=learning_rate, reg_lambda=reg_lambda, tol=1e-3, max_iter=5000, alpha=alpha, solver=solver, score_metric=score_metric, random_state=random_state, callback=callback) assert (repr(glm)) glm.fit(X_train, y_train) # verify loss decreases assert (np.all(np.diff(loss_trace) <= 1e-7)) # verify loss at convergence = loss when beta=beta_ l_true = _loss(distr, 0., np.eye(beta.shape[0]), 0., X_train, y_train, 2.0, None, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) # beta=beta_ when reg_lambda = 0. assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2) betas_.append(glm.beta_) y_pred = glm.predict(X_train) assert (y_pred.shape[0] == X_train.shape[0]) # compare all solvers pairwise to make sure they're close for i, first_beta in enumerate(betas_[:-1]): for second_beta in betas_[i + 1:]: assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
print(position_array.shape) pl.figure() for n in range(n_frames): pl.scatter(all_position[n, 0:4], all_position[n, 4:8], s=2, c='k') pl.show() # GLM glm = GLM(distr='gaussian', alpha=0.05) X = np.delete(all_position, 0, axis=1) y = all_position[:, 0] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) scaler = StandardScaler().fit(X_train) glm.fit(scaler.transform(X_train), y_train) yhat = glm.predict(scaler.transform(X)) # print(glm.score(X_test, Y_test)) # # plot pl.figure() pl.plot(y, marker='x', color='b', label='observed') pl.plot(yhat[9, :], marker='o', color='r', label='trained') pl.show()
# training data Xr = np.random.normal(0.0, 1.0, [n_samples, n_features]) yr = glm_poissonexp.simulate(beta0, beta, Xr) # testing data Xt = np.random.normal(0.0, 1.0, [n_samples, n_features]) yt = glm_poissonexp.simulate(beta0, beta, Xt) ######################################################## # Fit model to training data ######################################################## scaler = StandardScaler().fit(Xr) glm_poissonexp.fit(scaler.transform(Xr), yr) ######################################################## # Gradient of loss function ######################################################## grad_beta0, grad_beta = glm_poissonexp._grad_L2loss( glm_poissonexp.fit_[-1]['beta0'], glm_poissonexp.fit_[-1]['beta'], 0.01, Xr, yr) print(grad_beta[:5]) ######################################################## # Use one model to predict ########################################################
plt.ylabel('time bin of response') plt.title('Sample first 50 rows of design' ' matrix created using Hankel') plt.show() ######################################################## # **Fitting and predicting with a linear-Gaussian GLM** # # For a general linear model, the observed spikes can be # thought of an underlying parameter # :math:`\beta_0, \beta` that control the spiking. # # You can simply use linear Gaussian GLM with no regularization # to predict the spike counts. glm_lg = GLM(distr='gaussian', reg_lambda=0.0, score_metric='pseudo_R2') glm_lg.fit(Xdsgn, y) # predict spike counts ypred_lg = glm_lg.predict(Xdsgn) ######################################################## # **Fitting and predicting with a Poisson GLM** # # We can also assume that there is a non-linear function governing # the underlying the firing patterns. # In pyglmnet, we use an exponential inverse link function # for the Poisson distribution. glm_poisson = GLM(distr='poisson', alpha=0.05, learning_rate=1.0,
def test_glmnet(distr, reg_lambda, fit_intercept, solver): """Test glmnet.""" raises(ValueError, GLM, distr='blah') raises(ValueError, GLM, distr='gaussian', max_iter=1.8) n_samples, n_features = 100, 10 # coefficients beta0 = 0. if fit_intercept: beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + int(fit_intercept)) * \ np.random.normal(0.0, 1.0, (n_features,)) score_metric = 'pseudo_R2' learning_rate = 2e-1 random_state = 0 betas_ = list() if not (distr == 'gamma' and solver == 'cdfast'): np.random.seed(random_state) theta = 1.0 X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta, sample=False) alpha = 0. loss_trace = list() eta = 2.0 group = None Tau = None def callback(beta): Tau = None loss_trace.append( _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, beta, fit_intercept=fit_intercept)) glm = GLM(distr, learning_rate=learning_rate, reg_lambda=reg_lambda, tol=1e-5, max_iter=5000, alpha=alpha, solver=solver, score_metric=score_metric, random_state=random_state, callback=callback, fit_intercept=fit_intercept, theta=theta) assert(repr(glm)) glm.fit(X_train, y_train) # verify loss decreases assert(np.all(np.diff(loss_trace) <= 1e-7)) # true loss and beta should be recovered when reg_lambda == 0 if reg_lambda == 0.: # verify loss at convergence = loss when beta=beta_ l_true = _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) # beta=beta_ when reg_lambda = 0. assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2) betas_.append(glm.beta_) y_pred = glm.predict(X_train) assert(y_pred.shape[0] == X_train.shape[0]) # compare all solvers pairwise to make sure they're close for i, first_beta in enumerate(betas_[:-1]): for second_beta in betas_[i + 1:]: assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
base=np.exp(1))) #set up the lasso model glm = GLM(distr="binomial", tol=1e-2, score_metric="pseudo_R2", alpha=1.0, reg_lambda=np.logspace(np.log(100), np.log(0.01), 5, base=np.exp(1))) print("gl_glm: ", gl_glm) print("glm: ", glm) ########################################################## # Fit models gl_glm.fit(Xtrain, ytrain) glm.fit(Xtrain, ytrain) ########################################################## # Visualize model scores on test set plt.figure() plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtest, ytest), 'go-') plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtrain, ytrain), 'go--') plt.semilogx(glm.reg_lambda, glm.score(Xtest, ytest), 'ro-') plt.semilogx(glm.reg_lambda, glm.score(Xtrain, ytrain), 'ro--') plt.legend( ['Group Lasso: test', 'Group Lasso: train', 'Lasso: test', 'Lasso: train'], frameon=False, loc='best') plt.xlabel('$\lambda$')
reg_lambda=np.array([0.02, 0.01]), learning_rate=1e-3 ,verbose=False,) #initial values for the coefficients beta0 = np.random.normal(0.0, 1.0, 1) beta = sps.rand(n_features, 1, 0.1) beta = np.array(beta.todense()) model.threshold = 1e-5 #scaler = StandardScaler().fit(X_train) #model.fit(scaler.transform(X_train),y_train) # Fitting the model model.fit(X_train,y_train) #ploting the fit coefficients # TODO: fix this graph fit_param = model[0].fit_ plt.plot(beta[:], 'bo', label ='bo') plt.plot(fit_param['beta'][:], 'ro', label='ro') plt.xlabel('samples') plt.ylabel('outputs') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=1, ncol=2, borderaxespad=0.) plt.show() # Makin the predictions base on fit model yt_predicted = model[-1].predict(X_test)
# Fit models from sklearn.cross_validation import train_test_split Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, spike_counts, test_size=0.2, random_state=42) ######################################################## from pyglmnet import utils n_samples = Xtrain.shape[0] Tau = utils.tikhonov_from_prior(prior_cov, n_samples) glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2') glm.fit(Xtrain, Ytrain) cvopt_lambda = glm.score(Xtest, Ytest).argmax() print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain)) print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest)) weights = glm[cvopt_lambda].fit_['beta'] ######################################################## # Visualize for time_bin_ in range(n_temporal_basis): RF = strf_model.make_image_from_spatial_basis( spatial_basis, weights[range(time_bin_, n_spatial_basis * n_temporal_basis, n_temporal_basis)]) plt.subplot(1, n_temporal_basis, time_bin_ + 1)
def fit(self, X, Y, get_history_terms=True): """ Fits the model to the data in X to predict the response Y. Imports models and creates model instance as well. Parameters ---------- X: float, n_samples x n_features, features of interest Y: float, n_samples x 1, population activity get_history_terms = Boolean. Whether to compute the temporal features. Note that if spike_history and cov_history are False, no history will be computed anyways and the flag does nothing. """ if self.default_params: warnings.warn( '\n Using default hyperparameters. Consider optimizing on' + ' a held-out dataset using, e.g. hyperopt or random search') # make the covariate matrix. Include spike or covariate history? # The different methods here are to satisfy the needs of recurrent keras # models if get_history_terms: if self.tunemodel == 'lstm': X, Y = self.get_all_with_history_keras(X, Y) else: X, Y = self.get_all_with_history(X, Y) if self.tunemodel == 'glm': model = GLM(**self.params) model.fit(X, Y) # we want the last of the regularization path self.model = model[-1] elif self.tunemodel == 'feedforward_nn': if np.ndim(X) == 1: X = np.transpose(np.atleast_2d(X)) params = self.params model = Sequential() model.add( Dense(params['n1'], input_dim=np.shape(X)[1], kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=l2(params['l2']))) model.add(Dropout(params['dropout'])) model.add(BatchNormalization()) model.add( Dense(params['n2'], kernel_initializer='glorot_normal', activation='relu', kernel_regularizer=l2(params['l2']))) model.add(BatchNormalization()) model.add(Dense(1, activation='softplus')) optim = adam(lr=params['lr'], clipnorm=params['clipnorm'], decay=params['decay'], beta_1=1 - params['b1'], beta_2=1 - params['b2']) model.compile( loss='poisson', optimizer=optim, ) hist = model.fit(X, Y, batch_size=128, epochs=30, verbose=self.verbose) self.model = model elif self.tunemodel == 'xgboost': dtrain = xgb.DMatrix(X, label=Y) num_round = 200 self.model = xgb.train(self.params, dtrain, num_round) elif self.tunemodel == 'random_forest': self.model = RandomForestRegressor(**self.params) self.model.fit(X, Y) elif self.tunemodel == 'lstm': if np.ndim(X) == 1: X = np.transpose(np.atleast_2d(X)) params = self.params model = Sequential() #Declare model #Add recurrent layer model.add(LSTM(int(params['n_units']),input_shape=(X.shape[1],X.shape[2]),\ dropout_W=params['dropout'],dropout_U=params['dropout'])) #Within recurrent layer, include dropout model.add(Dropout(params['dropout']) ) #Dropout some units (recurrent layer output units) #Add dense connections to output layer model.add(Dense(1, activation='softplus')) #Fit model (and set fitting parameters) model.compile(loss='poisson', optimizer='rmsprop', metrics=['accuracy']) model.fit(X, Y, epochs=int(params['epochs']), batch_size=int(params['batch_size']), verbose=self.verbose) #Fit the model self.model = model else: #using predefined model self.model.fit(X, Y)
np.shape(prior_cov) ######################################################## # Fit models from sklearn.cross_validation import train_test_split Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, spike_counts, test_size=0.2, random_state=42) ######################################################## from pyglmnet import utils n_samples = Xtrain.shape[0] Tau = utils.tikhonov_from_prior(prior_cov, n_samples) glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2') glm.fit(Xtrain, Ytrain) cvopt_lambda = glm.score(Xtest, Ytest).argmax() print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain)) print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest)) weights = glm[cvopt_lambda].fit_['beta'] ######################################################## # Visualize for time_bin_ in range(n_temporal_basis): RF = strf_model.make_image_from_spatial_basis(spatial_basis, weights[range(time_bin_, n_spatial_basis * n_temporal_basis, n_temporal_basis)]) plt.subplot(1, n_temporal_basis, time_bin_+1)