예제 #1
0
def test_small(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    model = SGDClassifier(tol=1e-3, penalty="elasticnet")
    params = {"alpha": [0.1, 0.5, 0.75, 1.0]}
    search = IncrementalSearch(model, params, n_initial_parameters="grid")
    yield search.fit(X, y, classes=[0, 1])
    X_, = yield c.compute([X])
    search.predict(X_)
예제 #2
0
def test_numpy_array(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    X, y = yield c.compute([X, y])
    model = SGDClassifier(tol=1e-3, penalty="elasticnet")
    params = {"alpha": np.logspace(-2, 10, 10), "l1_ratio": np.linspace(0.01, 1, 20)}

    search = IncrementalSearch(model, params, n_initial_parameters=10)
    yield search.fit(X, y, classes=[0, 1])
예제 #3
0
def test_transform(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    model = MiniBatchKMeans(random_state=0)
    params = {"n_clusters": [3, 4, 5], "n_init": [1, 2]}
    search = IncrementalSearch(model, params, n_initial_parameters="grid")
    yield search.fit(X, y)
    X_, = yield c.compute([X])
    result = search.transform(X_)
    assert result.shape == (100, search.best_estimator_.n_clusters)
예제 #4
0
def test_search_max_iter(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    model = SGDClassifier(tol=1e-3, penalty="elasticnet")
    params = {"alpha": np.logspace(-2, 10, 10), "l1_ratio": np.linspace(0.01, 1, 20)}

    search = IncrementalSearch(model, params, n_initial_parameters=10, max_iter=1)
    yield search.fit(X, y, classes=[0, 1])
    for d in search.history_results_:
        assert d["partial_fit_calls"] <= 1
예제 #5
0
def test_gridsearch(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))

    model = SGDClassifier(tol=1e-3)

    params = {"alpha": np.logspace(-2, 10, 3), "l1_ratio": np.linspace(0.01, 1, 2)}

    search = IncrementalSearch(model, params, n_initial_parameters="grid")
    yield search.fit(X, y, classes=[0, 1])

    assert {frozenset(d["params"].items()) for d in search.history_results_} == {
        frozenset(d.items()) for d in ParameterGrid(params)
    }
예제 #6
0
def test_search_patience(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))

    class ConstantClassifier(SGDClassifier):
        def score(*args, **kwargs):
            return 0.5

    model = ConstantClassifier(tol=1e-3)

    params = {
        "alpha": np.logspace(-2, 10, 100),
        "l1_ratio": np.linspace(0.01, 1, 200)
    }

    search = IncrementalSearch(model,
                               params,
                               n_initial_parameters=10,
                               patience=2)
    yield search.fit(X, y, classes=[0, 1])

    assert search.history_results_
    for d in search.history_results_:
        assert d["partial_fit_calls"] <= 3
    assert isinstance(search.best_estimator_, SGDClassifier)
    assert search.best_score_ > 0
    assert "visualize" not in search.__dict__

    X_test, y_test = yield c.compute([X, y])

    search.predict(X_test)
    search.score(X_test, y_test)
예제 #7
0
def test_search(c, s, a, b):
    X, y = make_classification(n_samples=1000, n_features=5, chunks=(100, 5))
    model = SGDClassifier(tol=1e-3, loss="log", penalty="elasticnet")

    params = {
        "alpha": np.logspace(-2, 2, 100),
        "l1_ratio": np.linspace(0.01, 1, 200)
    }

    search = IncrementalSearch(model,
                               params,
                               n_initial_parameters=10,
                               max_iter=10)
    yield search.fit(X, y, classes=[0, 1])

    assert search.history_results_
    for d in search.history_results_:
        assert d["partial_fit_calls"] <= search.max_iter + 1
    assert isinstance(search.best_estimator_, SGDClassifier)
    assert search.best_score_ > 0
    assert "visualize" not in search.__dict__
    assert search.best_params_
    X_, = yield c.compute([X])

    proba = search.predict_proba(X_)
    log_proba = search.predict_log_proba(X_)
    assert proba.shape == (1000, 2)
    assert log_proba.shape == (1000, 2)
    decision = search.decision_function(X_)
    assert decision.shape == (1000, )