def test_l1l2_multi_task_squared_hinge_loss(mult_dense_train_data): mult_dense, mult_target = mult_dense_train_data Y = LabelBinarizer(neg_label=-1).fit_transform(mult_target) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=False, max_iter=20, C=5.0, random_state=0) clf.fit(mult_dense, mult_target) df = clf.decision_function(mult_dense) np.testing.assert_array_almost_equal(clf.errors_.T, 1 - Y * df) np.testing.assert_almost_equal(clf.score(mult_dense, mult_target), 0.8633, 3) nz = np.sum(clf.coef_ != 0) assert nz == 300 clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=False, max_iter=20, C=0.05, random_state=0) clf.fit(mult_dense, mult_target) np.testing.assert_almost_equal(clf.score(mult_dense, mult_target), 0.8266, 3) nz = np.sum(clf.coef_ != 0) assert nz == 231
def test_l1l2_multiclass_log_loss(data, request): X, y = request.getfixturevalue(data) clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=1.0, random_state=0) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.8766, 3) df = clf.decision_function(X) sel = np.array([df[i, int(y[i])] for i in range(df.shape[0])]) df -= sel[:, np.newaxis] df = np.exp(df) np.testing.assert_array_almost_equal(clf.errors_, df.T) for i in range(X.shape[0]): np.testing.assert_almost_equal(clf.errors_[y[i], i], 1.0) nz = np.sum(clf.coef_ != 0) assert nz == 297 clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=0.3, random_state=0) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.8566, 3) nz = np.sum(clf.coef_ != 0) assert nz == 213 assert nz % 3 == 0 # should be a multiple of n_classes
def test_l1l2_multiclass_squared_hinge_loss(data, request): X, y = request.getfixturevalue(data) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=True, max_iter=20, C=1.0, random_state=0) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.913, 3) df = clf.decision_function(X) n_samples, n_vectors = df.shape diff = np.zeros_like(clf.errors_) for i in range(n_samples): for k in range(n_vectors): diff[k, i] = 1 - (df[i, y[i]] - df[i, k]) np.testing.assert_array_almost_equal(clf.errors_, diff) assert np.sum(clf.coef_ != 0) == 300 clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=True, max_iter=20, C=0.05, random_state=0) clf.fit(X, y) np.testing.assert_almost_equal(clf.score(X, y), 0.83, 3) nz = np.sum(clf.coef_ != 0) assert nz == 207 assert nz % 3 == 0 # should be a multiple of n_classes
def test_l1l2_multiclass_squared_hinge_loss_no_linesearch(): data = mult_csc clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=True, shrinking=False, selection="uniform", max_steps=0, max_iter=200, C=1.0, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.9166, 3) df = clf.decision_function(data) n_samples, n_vectors = df.shape diff = np.zeros_like(clf.errors_) for i in xrange(n_samples): for k in xrange(n_vectors): diff[k, i] = 1 - (df[i, mult_target[i]] - df[i, k]) assert_array_almost_equal(clf.errors_, diff) assert_equal(np.sum(clf.coef_ != 0), 300) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=True, max_iter=20, C=0.05, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.83, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 207) assert_true(nz % 3 == 0) # should be a multiple of n_classes
def test_l1l2_multiclass_log_loss(): for data in (mult_dense, mult_csc): clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=1.0, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8766, 3) df = clf.decision_function(data) sel = np.array( [df[i, int(mult_target[i])] for i in xrange(df.shape[0])]) df -= sel[:, np.newaxis] df = np.exp(df) assert_array_almost_equal(clf.errors_, df.T) for i in xrange(data.shape[0]): assert_almost_equal(clf.errors_[mult_target[i], i], 1.0) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 297) clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=0.3, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8566, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 213) assert_true(nz % 3 == 0) # should be a multiple of n_classes
def test_l1l2_multi_task_squared_hinge_loss(): Y = LabelBinarizer(neg_label=-1).fit_transform(mult_target) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=False, max_iter=20, C=5.0, random_state=0) clf.fit(mult_dense, mult_target) df = clf.decision_function(mult_dense) assert_array_almost_equal(clf.errors_.T, 1 - Y * df) assert_almost_equal(clf.score(mult_dense, mult_target), 0.8633, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 300) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=False, max_iter=20, C=0.05, random_state=0) clf.fit(mult_dense, mult_target) assert_almost_equal(clf.score(mult_dense, mult_target), 0.8266, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 231)
def test_l1l2_multiclass_log_loss(): for data in (mult_dense, mult_csc): clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=1.0, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8766, 3) df = clf.decision_function(data) sel = np.array([df[i, int(mult_target[i])] for i in xrange(df.shape[0])]) df -= sel[:, np.newaxis] df = np.exp(df) assert_array_almost_equal(clf.errors_, df.T) for i in xrange(data.shape[0]): assert_almost_equal(clf.errors_[mult_target[i], i], 1.0) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 297) clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=0.3, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8566, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 213) assert_true(nz % 3 == 0) # should be a multiple of n_classes