def test_LogisticRegression(sparse_X): np.random.seed(1409) X, y = build_dataset( n_samples=30, n_features=60, sparse_X=sparse_X) y = np.sign(y) alpha_max = norm(X.T.dot(y), ord=np.inf) / 2 C = 30. / alpha_max tol = 1e-8 clf1 = LogisticRegression(C=C, tol=tol, verbose=2) clf1.fit(X, y) clf2 = sklearn_Logreg( C=C, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol) clf2.fit(X, y) np.testing.assert_allclose(clf1.coef_, clf2.coef_, rtol=1e-3, atol=1e-5) # this uses float32 so we increase the tol else there are precision issues clf1.tol = 1e-4 check_estimator(clf1) # multinomial test, need to have a slightly lower tol # for results to be comparable y = np.random.choice(4, len(y)) clf3 = LogisticRegression(C=C, tol=tol, verbose=2) clf3.fit(X, y) clf4 = sklearn_Logreg( C=C, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol) clf4.fit(X, y) np.testing.assert_allclose(clf3.coef_, clf4.coef_, rtol=1e-3, atol=1e-3) clf3.tol = 1e-3 check_estimator(clf3)
def test_dropin_logreg(): np.random.seed(1409) check_estimator(LogisticRegression) X, y, _, _ = build_dataset(n_samples=100, n_features=100, sparse_X=True) y = np.sign(y) alpha_max = norm(X.T.dot(y), ord=np.inf) / 2 C = 30. / alpha_max tol = 1e-8 clf1 = LogisticRegression(C=C, tol=tol) clf1.fit(X, y) clf2 = sklearn_Logreg(C=C, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol) clf2.fit(X, y) np.testing.assert_allclose(clf1.coef_, clf2.coef_, rtol=1e-3, atol=1e-5) # multinomial test: y = np.random.choice(4, len(y)) clf3 = LogisticRegression(C=C, tol=tol) clf3.fit(X, y) clf4 = sklearn_Logreg(C=C, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol) clf4.fit(X, y) np.testing.assert_allclose(clf3.coef_, clf4.coef_, rtol=1e-3, atol=1e-4)
def test_zero_iter(): X, y = build_dataset(n_samples=30, n_features=50) # convergence warning is raised bc we return -1 as gap with warnings.catch_warnings(record=True): assert_allclose(Lasso(max_iter=0).fit(X, y).coef_, 0) y = 2 * (y > 0) - 1 assert_allclose( LogisticRegression(max_iter=0, solver="celer-pn").fit(X, y).coef_, 0) assert_allclose( LogisticRegression(max_iter=0, solver="celer").fit(X, y).coef_, 0)
def test_binary(sparse_X): np.random.seed(1409) X, y = build_dataset(n_samples=30, n_features=60, sparse_X=sparse_X) y = np.sign(y) alpha_max = norm(X.T.dot(y), ord=np.inf) / 2 C = 20. / alpha_max clf = LogisticRegression(C=-1) np.testing.assert_raises(ValueError, clf.fit, X, y) tol = 1e-8 clf = LogisticRegression(C=C, tol=tol, verbose=0) clf.fit(X, y) clf_sk = sklearn_Logreg(C=C, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol) clf_sk.fit(X, y) assert_allclose(clf.coef_, clf_sk.coef_, rtol=1e-3, atol=1e-5)
def test_multinomial(sparse_X): np.random.seed(1409) X, y = build_dataset(n_samples=30, n_features=60, sparse_X=sparse_X) y = np.random.choice(4, len(y)) tol = 1e-8 clf = LogisticRegression(C=1, tol=tol, verbose=0) clf.fit(X, y) clf_sk = sklearn_Logreg(C=1, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol) clf_sk.fit(X, y) assert_allclose(clf.coef_, clf_sk.coef_, rtol=1e-3, atol=1e-3)
def test_check_estimator(solver): # sklearn fits on unnormalized data for which there are convergence issues # fix with increased tolerance: clf = LogisticRegression(C=1, solver=solver, tol=0.1) check_estimator(clf)