def test_random_state_consistency(): """Test model's random_state.""" # Generate the dataset n_samples, n_features = 1000, 10 beta0 = 1. / (np.float(n_features) + 1.) * np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) Xtrain = np.random.normal(0.0, 1.0, [n_samples, n_features]) ytrain = simulate_glm("gaussian", beta0, beta, Xtrain, sample=False, random_state=42) # Test simple glm glm_a = GLM(distr="gaussian", random_state=1) ypred_a = glm_a.fit_predict(Xtrain, ytrain) glm_b = GLM(distr="gaussian", random_state=1) ypred_b = glm_b.fit_predict(Xtrain, ytrain) # Consistency between two different models assert_array_equal(ypred_a, ypred_b) # Test also cross-validation glm_cv_a = GLMCV(distr="gaussian", cv=3, random_state=1) ypred_a = glm_cv_a.fit_predict(Xtrain, ytrain) glm_cv_b = GLMCV(distr="gaussian", cv=3, random_state=1) ypred_b = glm_cv_b.fit_predict(Xtrain, ytrain) ypred_c = glm_cv_b.fit_predict(Xtrain, ytrain) assert_array_equal(ypred_a, ypred_b) assert_array_equal(ypred_b, ypred_c)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['softplus', 'poisson', 'gaussian', 'binomial'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(X_train, y_train) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='softplus') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 1000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, [n_features, 1]) distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] solvers = ['batch-gradient', 'cdfast'] learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-1]['beta'][:] assert_allclose(beta[:], beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.score(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm_poisson = GLM(distr='poisson') assert_raises(ValueError, glm_poisson.__getitem__, 2) # test fit_predict glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 10000, 100 density = 0.1 n_lambda = 10 # coefficients beta0 = np.random.rand() beta = sps.rand(n_features, 1, density=density).toarray() distrs = ['poisson', 'poissonexp', 'normal', 'binomial'] for distr in distrs: # FIXME: why do we need such this learning rate for 'poissonexp'? learning_rate = 1e-5 if distr == 'poissonexp' else 1e-4 glm = GLM(distr, learning_rate=learning_rate) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = glm.simulate(beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.fit_[-2]['beta'][:] assert_allclose(beta[:], beta_, atol=0.1) # check fit density_ = np.sum(beta_ > 0.1) / float(n_features) assert_allclose(density_, density, atol=0.05) # check density y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (n_lambda, X_train.shape[0])) # checks for slicing. glm = glm[:3] glm_copy = glm.copy() assert_true(glm_copy is not glm) assert_equal(len(glm.reg_lambda), 3) y_pred = glm[:2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (2, X_train.shape[0])) y_pred = glm[2].predict(scaler.transform(X_train)) assert_equal(y_pred.shape, (X_train.shape[0], )) assert_raises(IndexError, glm.__getitem__, [2]) glm.deviance(y_train, y_pred) # don't allow slicing if model has not been fit yet. glm = GLM(distr='poisson') assert_raises(ValueError, glm.__getitem__, 2) # test fit_predict glm.fit_predict(X_train, y_train) assert_raises(ValueError, glm.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" scaler = StandardScaler() n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: glm = GLM(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(glm.distr, beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.beta_ assert_allclose(beta, beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape[0], X_train.shape[0]) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(distr, reg_lambda, fit_intercept, solver): """Test glmnet.""" raises(ValueError, GLM, distr='blah') raises(ValueError, GLM, distr='gaussian', max_iter=1.8) n_samples, n_features = 100, 10 # coefficients beta0 = 0. if fit_intercept: beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + int(fit_intercept)) * \ np.random.normal(0.0, 1.0, (n_features,)) score_metric = 'pseudo_R2' learning_rate = 2e-1 random_state = 0 betas_ = list() if not (distr == 'gamma' and solver == 'cdfast'): np.random.seed(random_state) theta = 1.0 X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta, sample=False) alpha = 0. loss_trace = list() eta = 2.0 group = None Tau = None def callback(beta): Tau = None loss_trace.append( _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, beta, fit_intercept=fit_intercept)) glm = GLM(distr, learning_rate=learning_rate, reg_lambda=reg_lambda, tol=1e-5, max_iter=5000, alpha=alpha, solver=solver, score_metric=score_metric, random_state=random_state, callback=callback, fit_intercept=fit_intercept, theta=theta) assert(repr(glm)) glm.fit(X_train, y_train) # verify loss decreases assert(np.all(np.diff(loss_trace) <= 1e-7)) # true loss and beta should be recovered when reg_lambda == 0 if reg_lambda == 0.: # verify loss at convergence = loss when beta=beta_ l_true = _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, theta, group, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) # beta=beta_ when reg_lambda = 0. assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2) betas_.append(glm.beta_) y_pred = glm.predict(X_train) assert(y_pred.shape[0] == X_train.shape[0]) # compare all solvers pairwise to make sure they're close for i, first_beta in enumerate(betas_[:-1]): for second_beta in betas_[i + 1:]: assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
def test_glmnet(): """Test glmnet.""" raises(ValueError, GLM, distr='blah') raises(ValueError, GLM, distr='gaussian', max_iter=1.8) n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 random_state = 0 for distr in distrs: betas_ = list() for solver in solvers: np.random.seed(random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(distr, beta0, beta, X_train, sample=False) alpha = 0. reg_lambda = 0. loss_trace = list() def callback(beta): Tau = None eta = 2.0 group = None loss_trace.append( _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta, group, beta)) glm = GLM(distr, learning_rate=learning_rate, reg_lambda=reg_lambda, tol=1e-3, max_iter=5000, alpha=alpha, solver=solver, score_metric=score_metric, random_state=random_state, callback=callback) assert (repr(glm)) glm.fit(X_train, y_train) # verify loss decreases assert (np.all(np.diff(loss_trace) <= 1e-7)) # verify loss at convergence = loss when beta=beta_ l_true = _loss(distr, 0., np.eye(beta.shape[0]), 0., X_train, y_train, 2.0, None, np.concatenate(([beta0], beta))) assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5) # beta=beta_ when reg_lambda = 0. assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2) betas_.append(glm.beta_) y_pred = glm.predict(X_train) assert (y_pred.shape[0] == X_train.shape[0]) # compare all solvers pairwise to make sure they're close for i, first_beta in enumerate(betas_[:-1]): for second_beta in betas_[i + 1:]: assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2) # test fit_predict glm_poisson = GLM(distr='softplus') glm_poisson.fit_predict(X_train, y_train) raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)