def test_fista_multiclass_l1l2_log_margin(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=200, penalty="l1/l2", loss="log_margin", multiclass=True) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.95, 2)
def test_fista_multiclass_l1_no_line_search(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=500, penalty="l1", multiclass=True, max_steps=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.95, 2)
def test_fista_custom_prox(data, request): # test FISTA with a custom prox l1_pen = L1Penalty() X, y = request.getfixturevalue(data) clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) clf.fit(X, y) clf2 = FistaClassifier(max_iter=500, penalty=l1_pen, max_steps=0) clf2.fit(X, y) np.testing.assert_array_almost_equal_nulp(clf.coef_.ravel(), clf2.coef_.ravel())
def test_fista_custom_prox(): # test FISTA with a custom prox l1_pen = L1Penalty() for data in (bin_dense, bin_csr): clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) clf.fit(data, bin_target) clf2 = FistaClassifier(max_iter=500, penalty=l1_pen, max_steps=0) clf2.fit(data, bin_target) np.testing.assert_array_almost_equal_nulp(clf.coef_.ravel(), clf2.coef_.ravel())
def test_fista_custom_prox(): # test FISTA with a custom prox l1_pen = L1Penalty() for data in (bin_dense, bin_csr): clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) clf.fit(data, bin_target) clf2 = FistaClassifier(max_iter=500, penalty=l1_pen, max_steps=0) clf2.fit(data, bin_target) np.testing.assert_array_almost_equal_nulp(clf.coef_.ravel(), clf2.coef_.ravel())
def test_fista_multiclass_tv1d(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.97, 2) # adding a lot of regularization coef_ should be constant clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True, alpha=1e6) clf.fit(data, mult_target) for i in range(clf.coef_.shape[0]): np.testing.assert_array_almost_equal( clf.coef_[i], np.mean(clf.coef_[i]) * np.ones(data.shape[1]))
def test_fista_multiclass_tv1d(data, request): X, y = request.getfixturevalue(data) clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.97, 2) # adding a lot of regularization coef_ should be constant clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True, alpha=1e6) clf.fit(X, y) for i in range(clf.coef_.shape[0]): np.testing.assert_array_almost_equal( clf.coef_[i], np.mean(clf.coef_[i]) * np.ones(X.shape[1]))
def test_fista_multiclass_tv1d(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True) clf.fit(data, mult_target) np.testing.assert_almost_equal(clf.score(data, mult_target), 0.97, 2) # adding a lot of regularization coef_ should be constant clf = FistaClassifier(max_iter=200, penalty="tv1d", multiclass=True, alpha=1e6) clf.fit(data, mult_target) for i in range(clf.coef_.shape[0]): np.testing.assert_array_almost_equal( clf.coef_[i], np.mean(clf.coef_[i]) * np.ones(data.shape[1]))
def test_fista_multiclass_l1_no_line_search(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=500, penalty="l1", multiclass=True, max_steps=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.95, 2)
def test_fista_multiclass_l1l2_log_margin(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=200, penalty="l1/l2", loss="log_margin", multiclass=True) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.95, 2)
def test_fista_multiclass_trace(data, request): X, y = request.getfixturevalue(data) clf = FistaClassifier(max_iter=100, penalty="trace", multiclass=True) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.96, 2)
import time import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized from lightning.classification import FistaClassifier bunch = fetch_20newsgroups_vectorized(subset="all") X = bunch.data y = bunch.target y[y >= 1] = 1 clf = FistaClassifier(C=1. / X.shape[0], alpha=1e-5, max_iter=200) start = time.time() clf.fit(X, y) print "Training time", time.time() - start print "Accuracy", np.mean(clf.predict(X) == y) print "% non-zero", clf.n_nonzero(percentage=True)
def test_fista_bin_classes(): clf = FistaClassifier() clf.fit(bin_dense, bin_target) assert_equal(list(clf.classes_), [0, 1])
def test_fista_bin_l1_no_line_search(): for data in (bin_dense, bin_csr): clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) clf.fit(data, bin_target) assert_almost_equal(clf.score(data, bin_target), 1.0, 2)
def test_fista_multiclass_trace(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=100, penalty="trace", multiclass=True) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.98, 2)
def test_fista_multiclass_l1l2_log_margin(data, request): X, y = request.getfixturevalue(data) clf = FistaClassifier(max_iter=200, penalty="l1/l2", loss="log_margin", multiclass=True) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.93, 2)
def rank(M, eps=1e-9): U, s, V = svd(M, full_matrices=False) return np.sum(s > eps) bunch = fetch_20newsgroups_vectorized(subset="train") X_train = bunch.data y_train = bunch.target # Reduces dimensionality to make the example faster ch2 = SelectKBest(chi2, k=5000) X_train = ch2.fit_transform(X_train, y_train) bunch = fetch_20newsgroups_vectorized(subset="test") X_test = bunch.data y_test = bunch.target X_test = ch2.transform(X_test) clf = FistaClassifier(C=1.0 / X_train.shape[0], max_iter=200, penalty="trace", multiclass=True) for alpha in (1e-3, 1e-2, 0.1, 0.2, 0.3): print("alpha=", alpha) clf.alpha = alpha clf.fit(X_train, y_train) print(clf.score(X_test, y_test)) print(rank(clf.coef_))
def test_fista_multiclass_classes(mult_dense_train_data): X, y = mult_dense_train_data clf = FistaClassifier() clf.fit(X, y) assert list(clf.classes_) == [0, 1, 2]
def test_fista_bin_classes(bin_dense_train_data): X, y = bin_dense_train_data clf = FistaClassifier() clf.fit(X, y) assert list(clf.classes_) == [0, 1]
def test_fista_multiclass_l1(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=200, penalty="l1", multiclass=True) clf.fit(data, mult_target) np.testing.assert_almost_equal(clf.score(data, mult_target), 0.98, 2)
def test_fista_bin_l1(): for data in (bin_dense, bin_csr): clf = FistaClassifier(max_iter=200, penalty="l1") clf.fit(data, bin_target) assert_almost_equal(clf.score(data, bin_target), 1.0, 2)
import time import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized from lightning.classification import FistaClassifier bunch = fetch_20newsgroups_vectorized(subset="all") X = bunch.data y = bunch.target y[y >= 1] = 1 clf = FistaClassifier(C=1.0 / X.shape[0], alpha=1e-5, max_iter=200) start = time.time() clf.fit(X, y) print "Training time", time.time() - start print "Accuracy", np.mean(clf.predict(X) == y) print "% non-zero", clf.n_nonzero(percentage=True)
def test_fista_bin_l1_no_line_search(): for data in (bin_dense, bin_csr): clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) clf.fit(data, bin_target) assert_almost_equal(clf.score(data, bin_target), 1.0, 2)
def test_fista_multiclass_classes(): clf = FistaClassifier() clf.fit(mult_dense, mult_target) assert_equal(list(clf.classes_), [0, 1, 2])
def test_fista_bin_l1(): for data in (bin_dense, bin_csr): clf = FistaClassifier(max_iter=200, penalty="l1") clf.fit(data, bin_target) assert_almost_equal(clf.score(data, bin_target), 1.0, 2)
def test_fista_bin_l1(data, request): X, y = request.getfixturevalue(data) clf = FistaClassifier(max_iter=200, penalty="l1") clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 1.0, 2)
def test_fista_multiclass_trace(): for data in (mult_dense, mult_csr): clf = FistaClassifier(max_iter=100, penalty="trace", multiclass=True) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.98, 2)
def test_fista_bin_l1_no_line_search(data, request): X, y = request.getfixturevalue(data) clf = FistaClassifier(max_iter=500, penalty="l1", max_steps=0) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 1.0, 2)
def test_fista_multiclass_classes(): clf = FistaClassifier() clf.fit(mult_dense, mult_target) assert_equal(list(clf.classes_), [0, 1, 2])
def rank(M, eps=1e-9): U, s, V = svd(M, full_matrices=False) return np.sum(s > eps) bunch = fetch_20newsgroups_vectorized(subset="train") X_train = bunch.data y_train = bunch.target # Reduces dimensionality to make the example faster ch2 = SelectKBest(chi2, k=5000) X_train = ch2.fit_transform(X_train, y_train) bunch = fetch_20newsgroups_vectorized(subset="test") X_test = bunch.data y_test = bunch.target X_test = ch2.transform(X_test) clf = FistaClassifier(C=1.0 / X_train.shape[0], max_iter=200, penalty="trace", multiclass=True) print(f"{'alpha': <10}| {'score': <25}| {'rank': <5}") for alpha in (1e-3, 1e-2, 0.1, 0.2, 0.3): clf.alpha = alpha clf.fit(X_train, y_train) print( f"{alpha: <10}| {clf.score(X_test, y_test): <25}| {rank(clf.coef_): <5}" )
def test_fista_multiclass_no_line_search(data, penalty, request): X, y = request.getfixturevalue(data) clf = FistaClassifier(max_iter=500, penalty=penalty, multiclass=True, max_steps=0) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.94, 2)
def test_fista_bin_classes(): clf = FistaClassifier() clf.fit(bin_dense, bin_target) assert_equal(list(clf.classes_), [0, 1])