def test_l1l2_multi_task_squared_hinge_loss(): Y = LabelBinarizer(neg_label=-1).fit_transform(mult_target) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=False, max_iter=20, C=5.0, random_state=0) clf.fit(mult_dense, mult_target) df = clf.decision_function(mult_dense) assert_array_almost_equal(clf.errors_.T, 1 - Y * df) assert_almost_equal(clf.score(mult_dense, mult_target), 0.8633, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 300) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=False, max_iter=20, C=0.05, random_state=0) clf.fit(mult_dense, mult_target) assert_almost_equal(clf.score(mult_dense, mult_target), 0.8266, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 231)
def test_l1l2_multiclass_squared_hinge_loss_no_linesearch(): data = mult_csc clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=True, shrinking=False, selection="uniform", max_steps=0, max_iter=200, C=1.0, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.9166, 3) df = clf.decision_function(data) n_samples, n_vectors = df.shape diff = np.zeros_like(clf.errors_) for i in xrange(n_samples): for k in xrange(n_vectors): diff[k, i] = 1 - (df[i, mult_target[i]] - df[i, k]) assert_array_almost_equal(clf.errors_, diff) assert_equal(np.sum(clf.coef_ != 0), 300) clf = CDClassifier(penalty="l1/l2", loss="squared_hinge", multiclass=True, max_iter=20, C=0.05, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.83, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 207) assert_true(nz % 3 == 0) # should be a multiple of n_classes
def test_l1l2_multiclass_log_loss(): for data in (mult_dense, mult_csc): clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=1.0, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8766, 3) df = clf.decision_function(data) sel = np.array( [df[i, int(mult_target[i])] for i in xrange(df.shape[0])]) df -= sel[:, np.newaxis] df = np.exp(df) assert_array_almost_equal(clf.errors_, df.T) for i in xrange(data.shape[0]): assert_almost_equal(clf.errors_[mult_target[i], i], 1.0) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 297) clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=0.3, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8566, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 213) assert_true(nz % 3 == 0) # should be a multiple of n_classes
def test_l1l2_multiclass_log_loss(): for data in (mult_dense, mult_csc): clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=1.0, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8766, 3) df = clf.decision_function(data) sel = np.array([df[i, int(mult_target[i])] for i in xrange(df.shape[0])]) df -= sel[:, np.newaxis] df = np.exp(df) assert_array_almost_equal(clf.errors_, df.T) for i in xrange(data.shape[0]): assert_almost_equal(clf.errors_[mult_target[i], i], 1.0) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 297) clf = CDClassifier(penalty="l1/l2", loss="log", multiclass=True, max_steps=30, max_iter=5, C=0.3, random_state=0) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.8566, 3) nz = np.sum(clf.coef_ != 0) assert_equal(nz, 213) assert_true(nz % 3 == 0) # should be a multiple of n_classes