def test_generic_estimator_for_classification():
    pipeline = make_pipeline(
        H2OAutoMLEstimator(estimator_type='classifier',
                           seed=seed,
                           init_connection_args=init_connection_args))
    pipeline.set_params(h2oautomlestimator__max_models=max_models,
                        h2oautomlestimator__nfolds=3)
    pipeline.named_steps.h2oautomlestimator.exclude_algos = ['XGBoost']

    data = _get_data(format='numpy', n_classes=3)
    assert isinstance(data.X_train, np.ndarray)

    pipeline.fit(data.X_train, data.y_train)
    assert len(pipeline.named_steps.h2oautomlestimator._estimator.leaderboard
               ) == max_models + 2

    preds = pipeline.predict(data.X_test)
    assert isinstance(preds, np.ndarray)
    assert preds.shape == (len(data.X_test), )
    probs = pipeline.predict_proba(data.X_test)
    assert probs.shape == (len(data.X_test), 3)
    assert np.allclose(np.sum(probs, axis=1),
                       1.), "`predict_proba` didn't return probabilities"

    score = pipeline.score(data.X_test, data.y_test)
    assert isinstance(score, float)
    skl_score = accuracy_score(data.y_test, preds)
    assert abs(score - skl_score) < 1e-6, "score={}, skl_score={}".format(
        score, skl_score)
Example #2
0
def test_params_are_correctly_passed_to_underlying_automl():
    estimator = H2OAutoMLEstimator(seed=seed)
    estimator.set_params(max_models=5, nfolds=0)
    estimator.project_name = "dummy"
    assert estimator.estimator is None
    estimator._make_estimator()  # normally done when calling `fit`
    aml = estimator.estimator
    assert aml
    assert aml.build_control["stopping_criteria"]["seed"] == seed
    assert aml.build_control["stopping_criteria"]["max_models"] == 5
    assert aml.build_control["nfolds"] == 0
    assert aml.build_control["project_name"] == "dummy"
def test_generic_estimator_for_regression():
    pipeline = make_pipeline(
        H2OAutoMLEstimator(estimator_type='regressor',
                           seed=seed,
                           init_connection_args=init_connection_args))
    pipeline.set_params(h2oautomlestimator__max_models=max_models,
                        h2oautomlestimator__nfolds=3)
    pipeline.named_steps.h2oautomlestimator.exclude_algos = ['XGBoost']

    data = _get_data(format='numpy', n_classes=0)
    assert isinstance(data.X_train, np.ndarray)
    pipeline.fit(data.X_train, data.y_train)
    assert len(pipeline.named_steps.h2oautomlestimator._estimator.leaderboard
               ) == max_models + 2

    preds = pipeline.predict(data.X_test)
    assert isinstance(preds, np.ndarray)
    assert preds.shape == (len(data.X_test), )

    score = pipeline.score(data.X_test, data.y_test)
    assert isinstance(score, float)
    skl_score = r2_score(data.y_test, preds)
    assert abs(score - skl_score) < 1e-6, "score={}, skl_score={}".format(
        score, skl_score)