コード例 #1
0
def test_fit_linear_binary_l1r_log_loss_no_linesearch(bin_dense_train_data):
    bin_dense, bin_target = bin_dense_train_data
    clf = CDClassifier(C=1.0,
                       max_steps=0,
                       random_state=0,
                       selection="uniform",
                       penalty="l1",
                       loss="log")
    clf.fit(bin_dense, bin_target)
    acc = clf.score(bin_dense, bin_target)
    np.testing.assert_almost_equal(acc, 0.995)
コード例 #2
0
def test_fit_squared_loss_l1():
    clf = CDClassifier(C=0.5, random_state=0, penalty="l1",
                       loss="squared", max_iter=100, shrinking=False)
    clf.fit(bin_dense, bin_target)
    assert_almost_equal(clf.score(bin_dense, bin_target), 0.985, 3)
    y = bin_target.copy()
    y[y == 0] = -1
    assert_array_almost_equal(np.dot(bin_dense, clf.coef_.ravel()) - y,
                              clf.errors_.ravel())
    n_nz = clf.n_nonzero()
    assert_equal(n_nz, 89)
コード例 #3
0
def test_fit_squared_loss():
    clf = CDClassifier(C=1.0,
                       random_state=0,
                       penalty="l2",
                       loss="squared",
                       max_iter=100)
    clf.fit(bin_dense, bin_target)
    assert_almost_equal(clf.score(bin_dense, bin_target), 0.99)
    y = bin_target.copy()
    y[y == 0] = -1
    assert_array_almost_equal(
        np.dot(bin_dense, clf.coef_.ravel()) - y, clf.errors_.ravel())
コード例 #4
0
def test_warm_start_l1r():
    clf = CDClassifier(warm_start=True, random_state=0, penalty="l1")

    clf.C = 0.1
    clf.fit(bin_dense, bin_target)
    n_nz = clf.n_nonzero()

    clf.C = 0.2
    clf.fit(bin_dense, bin_target)
    n_nz2 = clf.n_nonzero()

    assert_true(n_nz < n_nz2)
コード例 #5
0
def test_debiasing_l1(bin_dense_train_data, warm_debiasing):
    bin_dense, bin_target = bin_dense_train_data
    clf = CDClassifier(penalty="l1",
                       debiasing=True,
                       warm_debiasing=warm_debiasing,
                       C=0.05,
                       Cd=1.0,
                       max_iter=10,
                       random_state=0)
    clf.fit(bin_dense, bin_target)
    assert clf.n_nonzero() == 22
    np.testing.assert_almost_equal(clf.score(bin_dense, bin_target), 0.955, 3)
コード例 #6
0
def test_l1l2_multi_task_log_loss(mult_dense_train_data):
    mult_dense, mult_target = mult_dense_train_data
    clf = CDClassifier(penalty="l1/l2",
                       loss="log",
                       multiclass=False,
                       max_steps=30,
                       max_iter=20,
                       C=5.0,
                       random_state=0)
    clf.fit(mult_dense, mult_target)
    np.testing.assert_almost_equal(clf.score(mult_dense, mult_target), 0.8633,
                                   3)
コード例 #7
0
def test_debiasing_l1():
    for warm_debiasing in (True, False):
        clf = CDClassifier(penalty="l1",
                           debiasing=True,
                           warm_debiasing=warm_debiasing,
                           C=0.05,
                           Cd=1.0,
                           max_iter=10,
                           random_state=0)
        clf.fit(bin_dense, bin_target)
        assert_equal(clf.n_nonzero(), 22)
        assert_almost_equal(clf.score(bin_dense, bin_target), 0.955, 3)
コード例 #8
0
def test_debiasing_l1l2():
    for warm_debiasing in (True, False):
        clf = CDClassifier(penalty="l1/l2",
                           loss="squared_hinge",
                           multiclass=False,
                           debiasing=True,
                           warm_debiasing=warm_debiasing,
                           max_iter=20,
                           C=0.01,
                           random_state=0)
        clf.fit(mult_csc, mult_target)
        assert_greater(clf.score(mult_csc, mult_target), 0.75)
        assert_equal(clf.n_nonzero(percentage=True), 0.08)
コード例 #9
0
def test_debiasing_l1l2(mult_sparse_train_data, warm_debiasing):
    mult_sparse, mult_target = mult_sparse_train_data
    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=False,
                       debiasing=True,
                       warm_debiasing=warm_debiasing,
                       max_iter=20,
                       C=0.01,
                       random_state=0)
    clf.fit(mult_sparse, mult_target)
    assert clf.score(mult_sparse, mult_target) > 0.75
    assert 0.0 <= clf.n_nonzero(percentage=True) <= 0.1
コード例 #10
0
def test_debiasing_warm_start():
    clf = CDClassifier(penalty="l1", max_iter=10,
                       warm_start=True, random_state=0)
    clf.C = 0.5
    clf.fit(bin_dense, bin_target)
    assert_equal(clf.n_nonzero(), 74)
    assert_almost_equal(clf.score(bin_dense, bin_target), 1.0)

    clf.C = 1.0
    clf.fit(bin_dense, bin_target)
    # FIXME: not the same sparsity as without warm start...
    assert_equal(clf.n_nonzero(), 77)
    assert_almost_equal(clf.score(bin_dense, bin_target), 1.0)
コード例 #11
0
def test_l1l2_multiclass_log_loss():
    for data in (mult_dense, mult_csc):
        clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True,
                           max_steps=30, max_iter=5, C=1.0, random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.8766, 3)
        df = clf.decision_function(data)
        sel = np.array([df[i, int(mult_target[i])] for i in xrange(df.shape[0])])
        df -= sel[:, np.newaxis]
        df = np.exp(df)
        assert_array_almost_equal(clf.errors_, df.T)
        for i in xrange(data.shape[0]):
            assert_almost_equal(clf.errors_[mult_target[i], i], 1.0)
        nz = np.sum(clf.coef_ != 0)
        assert_equal(nz, 297)

        clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True,
                           max_steps=30, max_iter=5, C=0.3, random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.8566, 3)
        nz = np.sum(clf.coef_ != 0)
        assert_equal(nz, 213)
        assert_true(nz % 3 == 0) # should be a multiple of n_classes
コード例 #12
0
def test_l1l2_multiclass_log_loss_no_linesearch():
    data = mult_csc
    clf = CDClassifier(penalty="l1/l2",
                       loss="log",
                       multiclass=True,
                       selection="uniform",
                       max_steps=0,
                       max_iter=30,
                       C=1.0,
                       random_state=0)
    clf.fit(data, mult_target)
    assert_almost_equal(clf.score(data, mult_target), 0.88, 3)
    nz = np.sum(clf.coef_ != 0)
    assert_equal(nz, 297)
コード例 #13
0
def test_l1l2_multiclass_squared_hinge_loss_no_linesearch(
        mult_sparse_train_data):
    mult_sparse, mult_target = mult_sparse_train_data
    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=True,
                       shrinking=False,
                       selection="uniform",
                       max_steps=0,
                       max_iter=200,
                       C=1.0,
                       random_state=0)
    clf.fit(mult_sparse, mult_target)
    np.testing.assert_almost_equal(clf.score(mult_sparse, mult_target), 0.9166,
                                   3)
    df = clf.decision_function(mult_sparse)
    n_samples, n_vectors = df.shape
    diff = np.zeros_like(clf.errors_)
    for i in range(n_samples):
        for k in range(n_vectors):
            diff[k, i] = 1 - (df[i, mult_target[i]] - df[i, k])
    np.testing.assert_array_almost_equal(clf.errors_, diff)
    assert np.sum(clf.coef_ != 0) == 300

    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=True,
                       max_iter=20,
                       C=0.05,
                       random_state=0)
    clf.fit(mult_sparse, mult_target)
    np.testing.assert_almost_equal(clf.score(mult_sparse, mult_target), 0.83,
                                   3)
    nz = np.sum(clf.coef_ != 0)
    assert nz == 207
    assert nz % 3 == 0  # should be a multiple of n_classes
コード例 #14
0
def test_l1l2_multiclass_squared_hinge_loss():
    for data in (mult_dense, mult_csc):
        clf = CDClassifier(penalty="l1/l2", loss="squared_hinge",
                           multiclass=True,
                           max_iter=20, C=1.0, random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.913, 3)
        df = clf.decision_function(data)
        n_samples, n_vectors = df.shape
        diff = np.zeros_like(clf.errors_)
        for i in xrange(n_samples):
            for k in xrange(n_vectors):
                diff[k, i] = 1 - (df[i, mult_target[i]] - df[i, k])
        assert_array_almost_equal(clf.errors_, diff)
        assert_equal(np.sum(clf.coef_ != 0), 300)

        clf = CDClassifier(penalty="l1/l2", loss="squared_hinge",
                           multiclass=True,
                           max_iter=20, C=0.05, random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.83, 3)
        nz = np.sum(clf.coef_ != 0)
        assert_equal(nz, 207)
        assert_true(nz % 3 == 0) # should be a multiple of n_classes
コード例 #15
0
def test_l1l2_multiclass_log_loss_no_linesearch(mult_sparse_train_data):
    mult_sparse, mult_target = mult_sparse_train_data
    clf = CDClassifier(penalty="l1/l2",
                       loss="log",
                       multiclass=True,
                       selection="uniform",
                       max_steps=0,
                       max_iter=30,
                       C=1.0,
                       random_state=0)
    clf.fit(mult_sparse, mult_target)
    np.testing.assert_almost_equal(clf.score(mult_sparse, mult_target), 0.88,
                                   3)
    nz = np.sum(clf.coef_ != 0)
    assert nz == 297
コード例 #16
0
def test_multiclass_error_nongrouplasso(mult_dense_train_data, penalty):
    mult_dense, mult_target = mult_dense_train_data
    clf = CDClassifier(multiclass=True, penalty=penalty)
    with pytest.raises(NotImplementedError):
        clf.fit(mult_dense, mult_target)
コード例 #17
0
def test_fit_linear_binary_l1r_smooth_hinge():
    clf = CDClassifier(C=1.0, loss="smooth_hinge", random_state=0, penalty="l1")
    clf.fit(bin_dense, bin_target)
    assert not hasattr(clf, 'predict_proba')
    acc = clf.score(bin_dense, bin_target)
    assert_almost_equal(acc, 1.0)
コード例 #18
0
def test_fit_linear_multi_l2r(mult_dense_train_data):
    mult_dense, mult_target = mult_dense_train_data
    clf = CDClassifier(C=1.0, random_state=0, penalty="l2")
    clf.fit(mult_dense, mult_target)
    acc = clf.score(mult_dense, mult_target)
    np.testing.assert_almost_equal(acc, 0.8833, 4)
コード例 #19
0
def test_fit_linear_binary_l1r_no_linesearch():
    clf = CDClassifier(C=1.0, selection="uniform", max_steps=0,
                       random_state=0, penalty="l1")
    clf.fit(bin_dense, bin_target)
    acc = clf.score(bin_dense, bin_target)
    assert_almost_equal(acc, 1.0)
コード例 #20
0
def test_l1r_shrinking():
    for shrinking in (True, False):
        clf = CDClassifier(C=0.5, penalty="l1", random_state=0,
                           shrinking=shrinking)
        clf.fit(bin_dense, bin_target)
        assert_equal(clf.score(bin_dense, bin_target), 1.0)
コード例 #21
0
def test_fit_linear_binary_l2r(bin_dense_train_data):
    bin_dense, bin_target = bin_dense_train_data
    clf = CDClassifier(C=1.0, random_state=0, penalty="l2")
    clf.fit(bin_dense, bin_target)
    acc = clf.score(bin_dense, bin_target)
    np.testing.assert_almost_equal(acc, 1.0)
コード例 #22
0
def test_multiclass_classes():
    clf = CDClassifier()
    clf.fit(mult_dense, mult_target)
    assert_equal(list(clf.classes_), [0, 1, 2])
コード例 #23
0
#                     max_iter=1000,
#                     verbose=1,
#                     random_state=0)
saga = SAGAClassifier(eta='auto',
                      loss='log',
                      penalty='l1',
                      alpha=0,
                      beta=alpha,
                      tol=1e-10,
                      max_iter=20,
                      verbose=1,
                      random_state=0)
cd_classifier = CDClassifier(loss='log',
                             penalty='l1',
                             alpha=alpha,
                             C=1 / n_samples,
                             tol=1e-10,
                             max_iter=20,
                             verbose=1,
                             random_state=0)
sklearn_sag = LogisticRegression(tol=1e-10, max_iter=1000,
                                 verbose=2, random_state=0,
                                 C=1. / (n_samples * alpha),
                                 solver='liblinear',
                                 penalty='l1',
                                 dual=False,
                                 fit_intercept=False)
sklearn_sgd = SGDClassifier(loss='log', penalty='l1', alpha=alpha,
                            n_iter=15)

classifiers = [
    # {'name': 'Lightning SAG', 'estimator': sag},
コード例 #24
0
def test_bin_classes():
    clf = CDClassifier()
    clf.fit(bin_dense, bin_target)
    assert_equal(list(clf.classes_), [0, 1])
コード例 #25
0
def test_fit_linear_binary_l1r_log_loss():
    clf = CDClassifier(C=1.0, random_state=0, penalty="l1", loss="log")
    clf.fit(bin_dense, bin_target)
    acc = clf.score(bin_dense, bin_target)
    assert_almost_equal(acc, 0.995)
コード例 #26
0
def test_bin_classes(bin_dense_train_data):
    bin_dense, bin_target = bin_dense_train_data
    clf = CDClassifier()
    clf.fit(bin_dense, bin_target)
    assert list(clf.classes_) == [0, 1]
コード例 #27
0
def test_fit_linear_binary_l2r():
    clf = CDClassifier(C=1.0, random_state=0, penalty="l2")
    clf.fit(bin_dense, bin_target)
    acc = clf.score(bin_dense, bin_target)
    assert_almost_equal(acc, 1.0)
コード例 #28
0
def test_fit_linear_multi_l2r():
    clf = CDClassifier(C=1.0, random_state=0, penalty="l2")
    clf.fit(mult_dense, mult_target)
    acc = clf.score(mult_dense, mult_target)
    assert_almost_equal(acc, 0.8833, 4)
コード例 #29
0
def test_multiclass_classes(mult_dense_train_data):
    mult_dense, mult_target = mult_dense_train_data
    clf = CDClassifier()
    clf.fit(mult_dense, mult_target)
    assert list(clf.classes_) == [0, 1, 2]
コード例 #30
0
def test_multiclass_error_nongrouplasso():
    for penalty in ['l1', 'l2']:
        clf = CDClassifier(multiclass=True, penalty=penalty)
        assert_raises(NotImplementedError, clf.fit, mult_dense, mult_target)