def test_sag_proba(): n_samples = 10 X, y = make_classification(n_samples, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, loss='log', random_state=0) sag.fit(X, y) check_predict_proba(sag, X)
def test_sag_proba(): n_samples = 10 X, y = make_classification(n_samples, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, loss='log', random_state=0) sag.fit(X, y) probas = sag.predict_proba(X) assert_equal(probas.sum(), n_samples)
def test_no_reg_sag(bin_train_data): X_bin, y_bin = bin_train_data pysag = PySAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, max_iter=10, random_state=0) pysag.fit(X_bin, y_bin) sag.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
def test_sag_multiclass_classes(): X, y = make_classification(n_samples=10, random_state=0, n_classes=3, n_informative=4) sag = SAGClassifier() sag.fit(X, y) assert list(sag.classes_) == [0, 1, 2]
def test_l2_regularized_sag(): pysag = PySAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=1.0, max_iter=10, random_state=0) pysag.fit(X_bin, y_bin) sag.fit(X_bin, y_bin) np.testing.assert_array_almost_equal(pysag.coef_, sag.coef_)
def test_sag_score(): X, y = make_classification(1000, random_state=0) pysag = PySAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0) sag = SAGClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, random_state=0) pysag.fit(X, y) sag.fit(X, y) assert_equal(pysag.score(X, y), sag.score(X, y))
def test_sag_sparse(): # FIX for https://github.com/mblondel/lightning/issues/33 # check that SAG has the same results with dense # and sparse data X = sparse.rand(100, 50, density=.5, random_state=0) y = np.random.randint(0, high=2, size=100) for alpha in np.logspace(-3, 3, 10): clf_sparse = SAGClassifier(max_iter=1, random_state=0, alpha=alpha) clf_sparse.fit(X, y) clf_dense = SAGClassifier(max_iter=1, random_state=0, alpha=alpha) clf_dense.fit(X.toarray(), y) assert_equal(clf_sparse.score(X, y), clf_dense.score(X, y))
def test_sag_adaptive(): """Check that the adaptive step size strategy yields the same solution as the non-adaptive""" np.random.seed(0) X = sparse.rand(100, 10, density=.5, random_state=0).tocsr() y = np.random.randint(0, high=2, size=100) for alpha in np.logspace(-3, 1, 5): clf_adaptive = SAGClassifier(eta='line-search', random_state=0, alpha=alpha) clf_adaptive.fit(X, y) clf = SAGClassifier(eta='auto', random_state=0, alpha=alpha) clf.fit(X, y) np.testing.assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1) clf_adaptive = SAGAClassifier(eta='line-search', loss='log', random_state=0, alpha=alpha, max_iter=20) clf_adaptive.fit(X, y) assert np.isnan(clf_adaptive.coef_.sum()) == False clf = SAGAClassifier(eta='auto', loss='log', random_state=0, alpha=alpha, max_iter=20) clf.fit(X, y) np.testing.assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)
def test_sag_callback(): class Callback(object): def __init__(self, X, y): self.X = X self.y = y self.obj = [] def __call__(self, clf): clf._finalize_coef() y_pred = clf.decision_function(self.X).ravel() loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean() coef = clf.coef_.ravel() regul = 0.5 * clf.alpha * np.dot(coef, coef) self.obj.append(loss + regul) cb = Callback(X_bin, y_bin) clf = SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb) clf.fit(X_bin, y_bin) assert_true(np.all(np.diff(cb.obj) <= 0))
def test_sag_callback(): class Callback(object): def __init__(self, X, y): self.X = X self.y = y self.obj = [] def __call__(self, clf): clf._finalize_coef() y_pred = clf.decision_function(self.X).ravel() loss = (np.maximum(1 - self.y * y_pred, 0)**2).mean() coef = clf.coef_.ravel() regul = 0.5 * clf.alpha * np.dot(coef, coef) self.obj.append(loss + regul) cb = Callback(X_bin, y_bin) clf = SAGClassifier(loss="squared_hinge", eta=1e-3, max_iter=20, random_state=0, callback=cb) clf.fit(X_bin, y_bin) assert_true(np.all(np.diff(cb.obj) <= 0))
def test_sag_adaptive(): """Check that the adaptive step size strategy yields the same solution as the non-adaptive""" np.random.seed(0) X = sparse.rand(100, 10, density=.5, random_state=0).tocsr() y = np.random.randint(0, high=2, size=100) for alpha in np.logspace(-3, 1, 5): clf_adaptive = SAGClassifier( eta='line-search', random_state=0, alpha=alpha) clf_adaptive.fit(X, y) clf = SAGClassifier( eta='auto', random_state=0, alpha=alpha) clf.fit(X, y) assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1) clf_adaptive = SAGAClassifier( eta='line-search', loss='log', random_state=0, alpha=alpha, max_iter=20) clf_adaptive.fit(X, y) assert np.isnan(clf_adaptive.coef_.sum()) == False clf = SAGAClassifier( eta='auto', loss='log', random_state=0, alpha=alpha, max_iter=20) clf.fit(X, y) assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)
import time import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized from lightning.classification import SAGClassifier bunch = fetch_20newsgroups_vectorized(subset="all") X = bunch.data y = bunch.target y[y >= 1] = 1 clf = SAGClassifier(eta=1e-4, alpha=1e-5, tol=1e-3, max_iter=20, verbose=1, random_state=0) start = time.time() clf.fit(X, y) print "Training time", time.time() - start print "Accuracy", np.mean(clf.predict(X) == y) print "% non-zero", clf.n_nonzero(percentage=True)
def test_sag(): clf = SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0) clf.fit(X_bin, y_bin) assert_equal(clf.score(X_bin, y_bin), 1.0)
def test_sag_multiclass_classes(): X, y = make_classification(n_samples=10, random_state=0, n_classes=3, n_informative=4) sag = SAGClassifier() sag.fit(X, y) assert_equal(list(sag.classes_), [0, 1, 2])