Ejemplo n.º 1
0
def test_l1l2_multi_task_squared_hinge_loss(mult_dense_train_data):
    mult_dense, mult_target = mult_dense_train_data
    Y = LabelBinarizer(neg_label=-1).fit_transform(mult_target)
    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=False,
                       max_iter=20,
                       C=5.0,
                       random_state=0)
    clf.fit(mult_dense, mult_target)
    df = clf.decision_function(mult_dense)
    np.testing.assert_array_almost_equal(clf.errors_.T, 1 - Y * df)
    np.testing.assert_almost_equal(clf.score(mult_dense, mult_target), 0.8633,
                                   3)
    nz = np.sum(clf.coef_ != 0)
    assert nz == 300

    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=False,
                       max_iter=20,
                       C=0.05,
                       random_state=0)
    clf.fit(mult_dense, mult_target)
    np.testing.assert_almost_equal(clf.score(mult_dense, mult_target), 0.8266,
                                   3)
    nz = np.sum(clf.coef_ != 0)
    assert nz == 231
Ejemplo n.º 2
0
def test_l1l2_multiclass_log_loss(data, request):
    X, y = request.getfixturevalue(data)
    clf = CDClassifier(penalty="l1/l2",
                       loss="log",
                       multiclass=True,
                       max_steps=30,
                       max_iter=5,
                       C=1.0,
                       random_state=0)
    clf.fit(X, y)
    np.testing.assert_almost_equal(clf.score(X, y), 0.8766, 3)
    df = clf.decision_function(X)
    sel = np.array([df[i, int(y[i])] for i in range(df.shape[0])])
    df -= sel[:, np.newaxis]
    df = np.exp(df)
    np.testing.assert_array_almost_equal(clf.errors_, df.T)
    for i in range(X.shape[0]):
        np.testing.assert_almost_equal(clf.errors_[y[i], i], 1.0)
    nz = np.sum(clf.coef_ != 0)
    assert nz == 297

    clf = CDClassifier(penalty="l1/l2",
                       loss="log",
                       multiclass=True,
                       max_steps=30,
                       max_iter=5,
                       C=0.3,
                       random_state=0)
    clf.fit(X, y)
    np.testing.assert_almost_equal(clf.score(X, y), 0.8566, 3)
    nz = np.sum(clf.coef_ != 0)
    assert nz == 213
    assert nz % 3 == 0  # should be a multiple of n_classes
Ejemplo n.º 3
0
def test_l1l2_multiclass_squared_hinge_loss(data, request):
    X, y = request.getfixturevalue(data)
    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=True,
                       max_iter=20,
                       C=1.0,
                       random_state=0)
    clf.fit(X, y)
    np.testing.assert_almost_equal(clf.score(X, y), 0.913, 3)
    df = clf.decision_function(X)
    n_samples, n_vectors = df.shape
    diff = np.zeros_like(clf.errors_)
    for i in range(n_samples):
        for k in range(n_vectors):
            diff[k, i] = 1 - (df[i, y[i]] - df[i, k])
    np.testing.assert_array_almost_equal(clf.errors_, diff)
    assert np.sum(clf.coef_ != 0) == 300

    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=True,
                       max_iter=20,
                       C=0.05,
                       random_state=0)
    clf.fit(X, y)
    np.testing.assert_almost_equal(clf.score(X, y), 0.83, 3)
    nz = np.sum(clf.coef_ != 0)
    assert nz == 207
    assert nz % 3 == 0  # should be a multiple of n_classes
Ejemplo n.º 4
0
def test_l1l2_multiclass_squared_hinge_loss_no_linesearch():
    data = mult_csc
    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=True,
                       shrinking=False,
                       selection="uniform",
                       max_steps=0,
                       max_iter=200,
                       C=1.0,
                       random_state=0)
    clf.fit(data, mult_target)
    assert_almost_equal(clf.score(data, mult_target), 0.9166, 3)
    df = clf.decision_function(data)
    n_samples, n_vectors = df.shape
    diff = np.zeros_like(clf.errors_)
    for i in xrange(n_samples):
        for k in xrange(n_vectors):
            diff[k, i] = 1 - (df[i, mult_target[i]] - df[i, k])
    assert_array_almost_equal(clf.errors_, diff)
    assert_equal(np.sum(clf.coef_ != 0), 300)

    clf = CDClassifier(penalty="l1/l2",
                       loss="squared_hinge",
                       multiclass=True,
                       max_iter=20,
                       C=0.05,
                       random_state=0)
    clf.fit(data, mult_target)
    assert_almost_equal(clf.score(data, mult_target), 0.83, 3)
    nz = np.sum(clf.coef_ != 0)
    assert_equal(nz, 207)
    assert_true(nz % 3 == 0)  # should be a multiple of n_classes
Ejemplo n.º 5
0
def test_l1l2_multiclass_log_loss():
    for data in (mult_dense, mult_csc):
        clf = CDClassifier(penalty="l1/l2",
                           loss="log",
                           multiclass=True,
                           max_steps=30,
                           max_iter=5,
                           C=1.0,
                           random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.8766, 3)
        df = clf.decision_function(data)
        sel = np.array(
            [df[i, int(mult_target[i])] for i in xrange(df.shape[0])])
        df -= sel[:, np.newaxis]
        df = np.exp(df)
        assert_array_almost_equal(clf.errors_, df.T)
        for i in xrange(data.shape[0]):
            assert_almost_equal(clf.errors_[mult_target[i], i], 1.0)
        nz = np.sum(clf.coef_ != 0)
        assert_equal(nz, 297)

        clf = CDClassifier(penalty="l1/l2",
                           loss="log",
                           multiclass=True,
                           max_steps=30,
                           max_iter=5,
                           C=0.3,
                           random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.8566, 3)
        nz = np.sum(clf.coef_ != 0)
        assert_equal(nz, 213)
        assert_true(nz % 3 == 0)  # should be a multiple of n_classes
Ejemplo n.º 6
0
def test_l1l2_multi_task_squared_hinge_loss():
    Y = LabelBinarizer(neg_label=-1).fit_transform(mult_target)
    clf = CDClassifier(penalty="l1/l2", loss="squared_hinge",
                       multiclass=False,
                       max_iter=20, C=5.0, random_state=0)
    clf.fit(mult_dense, mult_target)
    df = clf.decision_function(mult_dense)
    assert_array_almost_equal(clf.errors_.T, 1 - Y * df)
    assert_almost_equal(clf.score(mult_dense, mult_target), 0.8633, 3)
    nz = np.sum(clf.coef_ != 0)
    assert_equal(nz, 300)

    clf = CDClassifier(penalty="l1/l2", loss="squared_hinge",
                       multiclass=False,
                       max_iter=20, C=0.05, random_state=0)
    clf.fit(mult_dense, mult_target)
    assert_almost_equal(clf.score(mult_dense, mult_target), 0.8266, 3)
    nz = np.sum(clf.coef_ != 0)
    assert_equal(nz, 231)
Ejemplo n.º 7
0
def test_l1l2_multiclass_log_loss():
    for data in (mult_dense, mult_csc):
        clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True,
                           max_steps=30, max_iter=5, C=1.0, random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.8766, 3)
        df = clf.decision_function(data)
        sel = np.array([df[i, int(mult_target[i])] for i in xrange(df.shape[0])])
        df -= sel[:, np.newaxis]
        df = np.exp(df)
        assert_array_almost_equal(clf.errors_, df.T)
        for i in xrange(data.shape[0]):
            assert_almost_equal(clf.errors_[mult_target[i], i], 1.0)
        nz = np.sum(clf.coef_ != 0)
        assert_equal(nz, 297)

        clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True,
                           max_steps=30, max_iter=5, C=0.3, random_state=0)
        clf.fit(data, mult_target)
        assert_almost_equal(clf.score(data, mult_target), 0.8566, 3)
        nz = np.sum(clf.coef_ != 0)
        assert_equal(nz, 213)
        assert_true(nz % 3 == 0) # should be a multiple of n_classes
Ejemplo n.º 8
0
def test_l1l2_multiclass_squared_hinge_loss_no_linesearch():
    data = mult_csc
    clf = CDClassifier(penalty="l1/l2", loss="squared_hinge",
                       multiclass=True, shrinking=False, selection="uniform",
                       max_steps=0, max_iter=200, C=1.0, random_state=0)
    clf.fit(data, mult_target)
    assert_almost_equal(clf.score(data, mult_target), 0.9166, 3)
    df = clf.decision_function(data)
    n_samples, n_vectors = df.shape
    diff = np.zeros_like(clf.errors_)
    for i in xrange(n_samples):
        for k in xrange(n_vectors):
            diff[k, i] = 1 - (df[i, mult_target[i]] - df[i, k])
    assert_array_almost_equal(clf.errors_, diff)
    assert_equal(np.sum(clf.coef_ != 0), 300)

    clf = CDClassifier(penalty="l1/l2", loss="squared_hinge",
                       multiclass=True,
                       max_iter=20, C=0.05, random_state=0)
    clf.fit(data, mult_target)
    assert_almost_equal(clf.score(data, mult_target), 0.83, 3)
    nz = np.sum(clf.coef_ != 0)
    assert_equal(nz, 207)
    assert_true(nz % 3 == 0) # should be a multiple of n_classes