def test_generic_estimator_for_classification(): pipeline = make_pipeline( H2OAutoMLEstimator(estimator_type='classifier', seed=seed, init_connection_args=init_connection_args)) pipeline.set_params(h2oautomlestimator__max_models=max_models, h2oautomlestimator__nfolds=3) pipeline.named_steps.h2oautomlestimator.exclude_algos = ['XGBoost'] data = _get_data(format='numpy', n_classes=3) assert isinstance(data.X_train, np.ndarray) pipeline.fit(data.X_train, data.y_train) assert len(pipeline.named_steps.h2oautomlestimator._estimator.leaderboard ) == max_models + 2 preds = pipeline.predict(data.X_test) assert isinstance(preds, np.ndarray) assert preds.shape == (len(data.X_test), ) probs = pipeline.predict_proba(data.X_test) assert probs.shape == (len(data.X_test), 3) assert np.allclose(np.sum(probs, axis=1), 1.), "`predict_proba` didn't return probabilities" score = pipeline.score(data.X_test, data.y_test) assert isinstance(score, float) skl_score = accuracy_score(data.y_test, preds) assert abs(score - skl_score) < 1e-6, "score={}, skl_score={}".format( score, skl_score)
def test_params_are_correctly_passed_to_underlying_automl(): estimator = H2OAutoMLEstimator(seed=seed) estimator.set_params(max_models=5, nfolds=0) estimator.project_name = "dummy" assert estimator.estimator is None estimator._make_estimator() # normally done when calling `fit` aml = estimator.estimator assert aml assert aml.build_control["stopping_criteria"]["seed"] == seed assert aml.build_control["stopping_criteria"]["max_models"] == 5 assert aml.build_control["nfolds"] == 0 assert aml.build_control["project_name"] == "dummy"
def test_generic_estimator_for_regression(): pipeline = make_pipeline( H2OAutoMLEstimator(estimator_type='regressor', seed=seed, init_connection_args=init_connection_args)) pipeline.set_params(h2oautomlestimator__max_models=max_models, h2oautomlestimator__nfolds=3) pipeline.named_steps.h2oautomlestimator.exclude_algos = ['XGBoost'] data = _get_data(format='numpy', n_classes=0) assert isinstance(data.X_train, np.ndarray) pipeline.fit(data.X_train, data.y_train) assert len(pipeline.named_steps.h2oautomlestimator._estimator.leaderboard ) == max_models + 2 preds = pipeline.predict(data.X_test) assert isinstance(preds, np.ndarray) assert preds.shape == (len(data.X_test), ) score = pipeline.score(data.X_test, data.y_test) assert isinstance(score, float) skl_score = r2_score(data.y_test, preds) assert abs(score - skl_score) < 1e-6, "score={}, skl_score={}".format( score, skl_score)