def test_kfolds(cls, has_shuffle): assert tokenize(cls(n_splits=3)) == tokenize(cls(n_splits=3)) assert tokenize(cls(n_splits=3)) != tokenize(cls(n_splits=4)) if has_shuffle: assert tokenize(cls(shuffle=True, random_state=0, n_splits=3)) == tokenize( cls(shuffle=True, random_state=0, n_splits=3)) rs = np.random.RandomState(42) assert tokenize(cls(shuffle=True, random_state=rs, n_splits=3)) == tokenize( cls(shuffle=True, random_state=rs, n_splits=3)) assert tokenize(cls(shuffle=True, random_state=0, n_splits=3)) != tokenize( cls(shuffle=True, random_state=2, n_splits=3)) assert tokenize(cls(shuffle=False, random_state=None, n_splits=3)) == tokenize( cls(shuffle=False, random_state=None, n_splits=3)) cv = cls(n_splits=3) assert compute_n_splits(cv, np_X, np_y, np_groups) == 3 with assert_dask_compute(False): assert compute_n_splits(cv, da_X, da_y, da_groups) == 3
def test_predefined_split(): cv = PredefinedSplit(np.array(list(range(4)) * 5)) cv2 = PredefinedSplit(np.array(list(range(5)) * 4)) assert tokenize(cv) == tokenize(cv) assert tokenize(cv) != tokenize(cv2) sol = cv.get_n_splits(np_X, np_y, np_groups) assert compute_n_splits(cv, np_X, np_y, np_groups) == sol with assert_dask_compute(False): assert compute_n_splits(cv, da_X, da_y, da_groups) == sol
def test_shuffle_split(cls): assert tokenize(cls(n_splits=3, random_state=0)) == tokenize( cls(n_splits=3, random_state=0)) assert tokenize(cls(n_splits=3, random_state=0)) != tokenize( cls(n_splits=3, random_state=2)) assert tokenize(cls(n_splits=3, random_state=0)) != tokenize( cls(n_splits=4, random_state=0)) cv = cls(n_splits=3) assert compute_n_splits(cv, np_X, np_y, np_groups) == 3 with assert_dask_compute(False): assert compute_n_splits(cv, da_X, da_y, da_groups) == 3
def test_old_style_cv(): cv1 = _CVIterableWrapper([ np.array([True, False, True, False] * 5), np.array([False, True, False, True] * 5), ]) cv2 = _CVIterableWrapper([ np.array([True, False, True, False] * 5), np.array([False, True, True, True] * 5), ]) assert tokenize(cv1) == tokenize(cv1) assert tokenize(cv1) != tokenize(cv2) sol = cv1.get_n_splits(np_X, np_y, np_groups) assert compute_n_splits(cv1, np_X, np_y, np_groups) == sol with assert_dask_compute(False): assert compute_n_splits(cv1, da_X, da_y, da_groups) == sol
def test_leave_group_out(cvs): tokens = [] for cv in cvs: assert tokenize(cv) == tokenize(cv) tokens.append(cv) assert len(set(tokens)) == len(tokens) cv = cvs[0] sol = cv.get_n_splits(np_X, np_y, np_groups) assert compute_n_splits(cv, np_X, np_y, np_groups) == sol with assert_dask_compute(True): assert compute_n_splits(cv, da_X, da_y, da_groups) == sol with assert_dask_compute(False): assert compute_n_splits(cv, da_X, da_y, np_groups) == sol