def test_small(c, s, a, b): X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5)) model = SGDClassifier(tol=1e-3, penalty="elasticnet") params = {"alpha": [0.1, 0.5, 0.75, 1.0]} search = IncrementalSearch(model, params, n_initial_parameters="grid") yield search.fit(X, y, classes=[0, 1]) X_, = yield c.compute([X]) search.predict(X_)
def test_numpy_array(c, s, a, b): X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5)) X, y = yield c.compute([X, y]) model = SGDClassifier(tol=1e-3, penalty="elasticnet") params = {"alpha": np.logspace(-2, 10, 10), "l1_ratio": np.linspace(0.01, 1, 20)} search = IncrementalSearch(model, params, n_initial_parameters=10) yield search.fit(X, y, classes=[0, 1])
def test_transform(c, s, a, b): X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5)) model = MiniBatchKMeans(random_state=0) params = {"n_clusters": [3, 4, 5], "n_init": [1, 2]} search = IncrementalSearch(model, params, n_initial_parameters="grid") yield search.fit(X, y) X_, = yield c.compute([X]) result = search.transform(X_) assert result.shape == (100, search.best_estimator_.n_clusters)
def test_search_max_iter(c, s, a, b): X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5)) model = SGDClassifier(tol=1e-3, penalty="elasticnet") params = {"alpha": np.logspace(-2, 10, 10), "l1_ratio": np.linspace(0.01, 1, 20)} search = IncrementalSearch(model, params, n_initial_parameters=10, max_iter=1) yield search.fit(X, y, classes=[0, 1]) for d in search.history_results_: assert d["partial_fit_calls"] <= 1
def test_gridsearch(c, s, a, b): X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5)) model = SGDClassifier(tol=1e-3) params = {"alpha": np.logspace(-2, 10, 3), "l1_ratio": np.linspace(0.01, 1, 2)} search = IncrementalSearch(model, params, n_initial_parameters="grid") yield search.fit(X, y, classes=[0, 1]) assert {frozenset(d["params"].items()) for d in search.history_results_} == { frozenset(d.items()) for d in ParameterGrid(params) }
def test_search_patience(c, s, a, b): X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5)) class ConstantClassifier(SGDClassifier): def score(*args, **kwargs): return 0.5 model = ConstantClassifier(tol=1e-3) params = { "alpha": np.logspace(-2, 10, 100), "l1_ratio": np.linspace(0.01, 1, 200) } search = IncrementalSearch(model, params, n_initial_parameters=10, patience=2) yield search.fit(X, y, classes=[0, 1]) assert search.history_results_ for d in search.history_results_: assert d["partial_fit_calls"] <= 3 assert isinstance(search.best_estimator_, SGDClassifier) assert search.best_score_ > 0 assert "visualize" not in search.__dict__ X_test, y_test = yield c.compute([X, y]) search.predict(X_test) search.score(X_test, y_test)
def test_search(c, s, a, b): X, y = make_classification(n_samples=1000, n_features=5, chunks=(100, 5)) model = SGDClassifier(tol=1e-3, loss="log", penalty="elasticnet") params = { "alpha": np.logspace(-2, 2, 100), "l1_ratio": np.linspace(0.01, 1, 200) } search = IncrementalSearch(model, params, n_initial_parameters=10, max_iter=10) yield search.fit(X, y, classes=[0, 1]) assert search.history_results_ for d in search.history_results_: assert d["partial_fit_calls"] <= search.max_iter + 1 assert isinstance(search.best_estimator_, SGDClassifier) assert search.best_score_ > 0 assert "visualize" not in search.__dict__ assert search.best_params_ X_, = yield c.compute([X]) proba = search.predict_proba(X_) log_proba = search.predict_log_proba(X_) assert proba.shape == (1000, 2) assert log_proba.shape == (1000, 2) decision = search.decision_function(X_) assert decision.shape == (1000, )