def test_grid_search_labels(): # Check if ValueError (when labels is None) propagates to GridSearchCV # And also check if labels is correctly passed to the cv object rng = np.random.RandomState(0) X, y = make_classification(n_samples=15, n_classes=2, random_state=0) labels = rng.randint(0, 3, 15) clf = LinearSVC(random_state=0) grid = {'C': [1]} label_cvs = [ LeaveOneLabelOut(), LeavePLabelOut(2), LabelKFold(), LabelShuffleSplit() ] for cv in label_cvs: gs = GridSearchCV(clf, grid, cv=cv) assert_raise_message(ValueError, "The labels parameter should not be None", gs.fit, X, y) gs.fit(X, y, labels) non_label_cvs = [StratifiedKFold(), StratifiedShuffleSplit()] for cv in non_label_cvs: gs = GridSearchCV(clf, grid, cv=cv) # Should not raise an error gs.fit(X, y)
def test_cross_val_score_predict_labels(): # Check if ValueError (when labels is None) propagates to cross_val_score # and cross_val_predict # And also check if labels is correctly passed to the cv object X, y = make_classification(n_samples=20, n_classes=2, random_state=0) clf = SVC(kernel="linear") label_cvs = [ LeaveOneLabelOut(), LeavePLabelOut(2), LabelKFold(), LabelShuffleSplit() ] for cv in label_cvs: assert_raise_message(ValueError, "The labels parameter should not be None", cross_val_score, estimator=clf, X=X, y=y, cv=cv) assert_raise_message(ValueError, "The labels parameter should not be None", cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
def test_cross_validator_with_default_params(): n_samples = 4 n_unique_labels = 4 n_splits = 2 p = 2 n_shuffle_splits = 10 # (the default value) X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) X_1d = np.array([1, 2, 3, 4]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) loo = LeaveOneOut() lpo = LeavePOut(p) kf = KFold(n_splits) skf = StratifiedKFold(n_splits) lolo = LeaveOneLabelOut() lopo = LeavePLabelOut(p) ss = ShuffleSplit(random_state=0) ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2 loo_repr = "LeaveOneOut()" lpo_repr = "LeavePOut(p=2)" kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)" skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)" lolo_repr = "LeaveOneLabelOut()" lopo_repr = "LeavePLabelOut(n_labels=2)" ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, test_size=0.1, " "train_size=None)") ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))" n_splits_expected = [ n_samples, comb(n_samples, p), n_splits, n_splits, n_unique_labels, comb(n_unique_labels, p), n_shuffle_splits, 2 ] for i, (cv, cv_repr) in enumerate( zip([loo, lpo, kf, skf, lolo, lopo, ss, ps], [ loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr, ss_repr, ps_repr ])): # Test if get_n_splits works correctly assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, labels)) # Test if the cross-validator works as expected even if # the data is 1d np.testing.assert_equal(list(cv.split(X, y, labels)), list(cv.split(X_1d, y, labels))) # Test that train, test indices returned are integers for train, test in cv.split(X, y, labels): assert_equal(np.asarray(train).dtype.kind, 'i') assert_equal(np.asarray(train).dtype.kind, 'i') # Test if the repr works without any errors assert_equal(cv_repr, repr(cv))
def test_leave_label_out_changing_labels(): # Check that LeaveOneLabelOut and LeavePLabelOut work normally if # the labels variable is changed before calling split labels = np.array([0, 1, 2, 1, 1, 2, 0, 0]) X = np.ones(len(labels)) labels_changing = np.array(labels, copy=True) lolo = LeaveOneLabelOut().split(X, labels=labels) lolo_changing = LeaveOneLabelOut().split(X, labels=labels) lplo = LeavePLabelOut(n_labels=2).split(X, labels=labels) lplo_changing = LeavePLabelOut(n_labels=2).split(X, labels=labels) labels_changing[:] = 0 for llo, llo_changing in [(lolo, lolo_changing), (lplo, lplo_changing)]: for (train, test), (train_chan, test_chan) in zip(llo, llo_changing): assert_array_equal(train, train_chan) assert_array_equal(test, test_chan) # n_splits = no of 2 (p) label combinations of the unique labels = 3C2 = 3 assert_equal(3, LeavePLabelOut(n_labels=2).get_n_splits(X, y, labels)) # n_splits = no of unique labels (C(uniq_lbls, 1) = n_unique_labels) assert_equal(3, LeaveOneLabelOut().get_n_splits(X, y, labels))
def test_cross_validator_with_default_params(): n_samples = 4 n_unique_labels = 4 n_folds = 2 p = 2 n_iter = 10 # (the default value) X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) X_1d = np.array([1, 2, 3, 4]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) cvs = [ (LeaveOneOut(), "LeaveOneOut()", n_samples), (LeavePOut(p), "LeavePOut(p=%u)" % p, comb(n_samples, p)), (KFold(n_folds), "KFold(n_folds=2, random_state=None, shuffle=False)", n_folds), (StratifiedKFold(n_folds), ("StratifiedKFold(n_folds=2, " "random_state=None, shuffle=False)"), n_folds), (LeaveOneLabelOut(), "LeaveOneLabelOut()", n_unique_labels), (LeavePLabelOut(p), "LeavePLabelOut(n_labels=%u)" % p, comb(n_unique_labels, p)), (ShuffleSplit(random_state=0), ("ShuffleSplit(n_iter=10, random_state=0, test_size=0.1, " "train_size=None)"), n_iter), (PredefinedSplit([1, 1, 2, 2]), "PredefinedSplit(test_fold=array([1, 1, 2, 2]))", 2), ] # n_splits = np of unique folds = 2 n_splits = [ n_samples, comb(n_samples, p), n_folds, n_folds, n_unique_labels, comb(n_unique_labels, p), n_iter, 2 ] for i, (cv, cv_repr, n_splits_) in enumerate(cvs): print(cv, cv_repr, n_splits_) # Test if get_n_splits works correctly assert_equal(n_splits_, cv.get_n_splits(X, y, labels)) # Test if the cross-validator works as expected even if # the data is 1d np.testing.assert_equal(list(cv.split(X, y, labels)), list(cv.split(X_1d, y, labels))) # Test that train, test indices returned are integers for train, test in cv.split(X, y, labels): assert_equal(np.asarray(train).dtype.kind, 'i') assert_equal(np.asarray(train).dtype.kind, 'i') # Test if the repr works without any errors assert_equal(cv_repr, repr(cv))
def test_cross_validator_with_default_indices(): n_samples = 4 n_unique_labels = 4 n_folds = 2 p = 2 n_iter = 10 # (the default value) X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) X_1d = np.array([1, 2, 3, 4]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) loo = LeaveOneOut() lpo = LeavePOut(p) kf = KFold(n_folds) skf = StratifiedKFold(n_folds) lolo = LeaveOneLabelOut() lopo = LeavePLabelOut(p) ss = ShuffleSplit(random_state=0) ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2 n_splits = [ n_samples, comb(n_samples, p), n_folds, n_folds, n_unique_labels, comb(n_unique_labels, p), n_iter, 2 ] for i, cv in enumerate([loo, lpo, kf, skf, lolo, lopo, ss, ps]): # Test if get_n_splits works correctly assert_equal(n_splits[i], cv.get_n_splits(X, y, labels)) # Test if the cross-validator works as expected even if # the data is 1d np.testing.assert_equal(list(cv.split(X, y, labels)), list(cv.split(X_1d, y, labels))) # Test that train, test indices returned are integers for train, test in cv.split(X, y, labels): assert_equal(np.asarray(train).dtype.kind, 'i') assert_equal(np.asarray(train).dtype.kind, 'i')