Ejemplo n.º 1
0
def test_kfolds(cls, has_shuffle):
    assert tokenize(cls(n_splits=3)) == tokenize(cls(n_splits=3))
    assert tokenize(cls(n_splits=3)) != tokenize(cls(n_splits=4))
    if has_shuffle:
        assert tokenize(cls(shuffle=True, random_state=0,
                            n_splits=3)) == tokenize(
                                cls(shuffle=True, random_state=0, n_splits=3))

        rs = np.random.RandomState(42)
        assert tokenize(cls(shuffle=True, random_state=rs,
                            n_splits=3)) == tokenize(
                                cls(shuffle=True, random_state=rs, n_splits=3))

        assert tokenize(cls(shuffle=True, random_state=0,
                            n_splits=3)) != tokenize(
                                cls(shuffle=True, random_state=2, n_splits=3))

        assert tokenize(cls(shuffle=False, random_state=None,
                            n_splits=3)) == tokenize(
                                cls(shuffle=False,
                                    random_state=None,
                                    n_splits=3))

    cv = cls(n_splits=3)
    assert compute_n_splits(cv, np_X, np_y, np_groups) == 3

    with assert_dask_compute(False):
        assert compute_n_splits(cv, da_X, da_y, da_groups) == 3
Ejemplo n.º 2
0
def test_predefined_split():
    cv = PredefinedSplit(np.array(list(range(4)) * 5))
    cv2 = PredefinedSplit(np.array(list(range(5)) * 4))
    assert tokenize(cv) == tokenize(cv)
    assert tokenize(cv) != tokenize(cv2)

    sol = cv.get_n_splits(np_X, np_y, np_groups)
    assert compute_n_splits(cv, np_X, np_y, np_groups) == sol

    with assert_dask_compute(False):
        assert compute_n_splits(cv, da_X, da_y, da_groups) == sol
Ejemplo n.º 3
0
def test_shuffle_split(cls):
    assert tokenize(cls(n_splits=3, random_state=0)) == tokenize(
        cls(n_splits=3, random_state=0))

    assert tokenize(cls(n_splits=3, random_state=0)) != tokenize(
        cls(n_splits=3, random_state=2))

    assert tokenize(cls(n_splits=3, random_state=0)) != tokenize(
        cls(n_splits=4, random_state=0))

    cv = cls(n_splits=3)
    assert compute_n_splits(cv, np_X, np_y, np_groups) == 3

    with assert_dask_compute(False):
        assert compute_n_splits(cv, da_X, da_y, da_groups) == 3
Ejemplo n.º 4
0
def test_old_style_cv():
    cv1 = _CVIterableWrapper([
        np.array([True, False, True, False] * 5),
        np.array([False, True, False, True] * 5),
    ])
    cv2 = _CVIterableWrapper([
        np.array([True, False, True, False] * 5),
        np.array([False, True, True, True] * 5),
    ])
    assert tokenize(cv1) == tokenize(cv1)
    assert tokenize(cv1) != tokenize(cv2)

    sol = cv1.get_n_splits(np_X, np_y, np_groups)
    assert compute_n_splits(cv1, np_X, np_y, np_groups) == sol
    with assert_dask_compute(False):
        assert compute_n_splits(cv1, da_X, da_y, da_groups) == sol
Ejemplo n.º 5
0
def test_leave_group_out(cvs):
    tokens = []
    for cv in cvs:
        assert tokenize(cv) == tokenize(cv)
        tokens.append(cv)
    assert len(set(tokens)) == len(tokens)

    cv = cvs[0]
    sol = cv.get_n_splits(np_X, np_y, np_groups)
    assert compute_n_splits(cv, np_X, np_y, np_groups) == sol

    with assert_dask_compute(True):
        assert compute_n_splits(cv, da_X, da_y, da_groups) == sol

    with assert_dask_compute(False):
        assert compute_n_splits(cv, da_X, da_y, np_groups) == sol