Example #1
0
def test_search_plateau_tol(c, s, a, b):
    model = LinearFunction(slope=1)
    params = {"foo": np.linspace(0, 1)}

    # every 3 calls, score will increase by 3. tol=1: model did improved enough
    search = IncrementalSearchCV(model,
                                 params,
                                 patience=3,
                                 tol=1,
                                 max_iter=10,
                                 decay_rate=0)
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    yield search.fit(X, y)
    assert set(search.cv_results_["partial_fit_calls"]) == {10}

    # Every 3 calls, score increases by 3. tol=4: model didn't improve enough
    search = IncrementalSearchCV(model,
                                 params,
                                 patience=3,
                                 tol=4,
                                 decay_rate=0,
                                 max_iter=10)
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    yield search.fit(X, y)
    assert set(search.cv_results_["partial_fit_calls"]) == {3}
Example #2
0
def test_search_basic_patience(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))

    rng = check_random_state(42)
    params = {"slope": 2 + rng.rand(1000)}
    model = LinearFunction()

    # Test the case where tol to small (all models finish)
    max_iter = 15
    patience = 5
    increase_after_patience = patience
    search = IncrementalSearchCV(
        model,
        params,
        max_iter=max_iter,
        tol=increase_after_patience,
        patience=patience,
        decay_rate=0,
        scores_per_fit=3,
    )
    yield search.fit(X, y, classes=[0, 1])

    hist = pd.DataFrame(search.history_)
    # +1 (and +2 below) because scores_per_fit isn't exact
    assert hist.partial_fit_calls.max() == max_iter + 1

    # Test the case where tol to large (no models finish)
    patience = 5
    increase_after_patience = patience
    params = {"slope": 0 + 0.9 * rng.rand(1000)}
    search = IncrementalSearchCV(
        model,
        params,
        max_iter=max_iter,
        tol=increase_after_patience,
        patience=patience,
        decay_rate=0,
        scores_per_fit=3,
    )
    yield search.fit(X, y, classes=[0, 1])

    hist = pd.DataFrame(search.history_)
    assert hist.partial_fit_calls.max() == patience + 2