コード例 #1
0
ファイル: test_grid_reload.py プロジェクト: sergeiten/h2o-3
    def test_frame_reload(self):
        name_node = pyunit_utils.hadoop_namenode()
        work_dir = "hdfs://%s%s" % (name_node, utils.get_workdir())
        dataset = "/datasets/iris_wheader.csv"

        ntrees_opts = [100, 120, 130, 140]
        learn_rate_opts = [0.01, 0.02, 0.03, 0.04]
        grid_size = len(ntrees_opts) * len(learn_rate_opts)
        print("max models %s" % grid_size)
        grid_id = "grid_ft_resume"
        hyper_parameters = {
            "learn_rate": learn_rate_opts,
            "ntrees": ntrees_opts
        }

        cluster_1_name = "grid1-py"
        try:
            cluster_1 = utils.start_cluster(cluster_1_name)
            h2o.connect(url=cluster_1)
            train = h2o.import_file(path="hdfs://%s%s" % (name_node, dataset))
            grid = H2OGridSearch(H2OGradientBoostingEstimator,
                                 grid_id=grid_id,
                                 hyper_params=hyper_parameters,
                                 recovery_dir=work_dir)
            print("starting initial grid and sleeping...")
            grid.start(x=list(range(4)), y=4, training_frame=train)
            grid_in_progress = None
            times_waited = 0
            while (times_waited < 20) and (grid_in_progress is None or len(
                    grid_in_progress.model_ids) == 0):
                time.sleep(5)  # give it tome to train some models
                times_waited += 1
                try:
                    grid_in_progress = h2o.get_grid(grid_id)
                except IndexError:
                    print("no models trained yet")
            print("done sleeping")
            h2o.connection().close()
        finally:
            utils.stop_cluster(cluster_1_name)

        cluster_2_name = "grid2-py"
        try:
            cluster_2 = utils.start_cluster(cluster_2_name)
            h2o.connect(url=cluster_2)
            loaded = h2o.load_grid("%s/%s" % (work_dir, grid_id),
                                   load_params_references=True)
            print("models after first run:")
            for x in sorted(loaded.model_ids):
                print(x)
            loaded.resume()
            print("models after second run:")
            for x in sorted(loaded.model_ids):
                print(x)
            print("Newly grained grid has %d models" % len(loaded.model_ids))
            self.assertEqual(len(loaded.model_ids), grid_size,
                             "The full grid was not trained.")
            h2o.connection().close()
        finally:
            utils.stop_cluster(cluster_2_name)
コード例 #2
0
ファイル: pyunit_h2oget_grid.py プロジェクト: StevenLOL/h2o-3
def h2oget_grid():
    """
    Python API test: h2o.get_grid(grid_id)

    Copy from pyunit_gbm_random_grid.py
    """
    air_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
    myX = ["DayofMonth","DayOfWeek"]

    hyper_parameters = {
        'learn_rate':[0.1,0.2],
        'max_depth':[2,3],
        'ntrees':[5,10]
    }

    search_crit = {'strategy': "RandomDiscrete",
                   'max_models': 5,
                   'seed' : 1234,
                   'stopping_rounds' : 3,
                   'stopping_metric' : "AUTO",
                   'stopping_tolerance': 1e-2
                   }

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
    air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, distribution="bernoulli")

    fetched_grid = h2o.get_grid(str(air_grid.grid_id))
    assert_is_type(fetched_grid, H2OGridSearch)
    assert len(air_grid.get_grid())==5, "h2o.get_grid() is command not working.  " \
                                        "It returned the wrong number of models."
    assert len(air_grid.get_grid())==len(fetched_grid.get_grid()), "h2o.get_grid() is command not working."
コード例 #3
0
def h2oget_grid():
    """
    Python API test: h2o.get_grid(grid_id)

    Copy from pyunit_gbm_random_grid.py
    """
    try:
        air_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
        myX = ["DayofMonth","DayOfWeek"]

        hyper_parameters = {
            'learn_rate':[0.1,0.2],
            'max_depth':[2,3],
            'ntrees':[5,10]
        }

        search_crit = {'strategy': "RandomDiscrete",
                       'max_models': 5,
                       'seed' : 1234,
                       'stopping_rounds' : 3,
                       'stopping_metric' : "AUTO",
                       'stopping_tolerance': 1e-2
                       }

        air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
        air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, distribution="bernoulli")

        fetched_grid = h2o.get_grid(str(air_grid.grid_id))
        assert_is_type(fetched_grid, H2OGridSearch)
        assert len(air_grid.get_grid())==5, "h2o.get_grid() is command not working.  " \
                                            "It returned the wrong number of models."
        assert len(air_grid.get_grid())==len(fetched_grid.get_grid()), "h2o.get_grid() is command not working."
    except Exception as e:
        assert False, "h2o.get_grid() command is not working."
コード例 #4
0
def grid_re_run_hyper_serialization():
    train_data = np.dot(np.random.rand(1000, 10), np.random.rand(10, 100))
    train = h2o.H2OFrame(train_data.tolist(), destination_frame="glrm_train")
    params = {
        "k": 2,
        "init": "User",
        "loss": "Quadratic",
        "regularization_x": "OneSparse",
        "regularization_y": "NonNegative"
    }
    hyper_params = {
        "transform": ["NONE", "STANDARDIZE"],
        "gamma_x": [0.1],
    }

    # train grid
    grid = H2OGridSearch(H2OGeneralizedLowRankEstimator,
                         hyper_params=hyper_params)
    grid.train(x=train.names, training_frame=train, **params)
    print(grid)
    assert len(grid.model_ids) == 2

    # load from back-end and train again
    grid = h2o.get_grid(grid.grid_id)
    grid.hyper_params["gamma_x"] = [0.1, 1]
    grid.train(x=train.names, training_frame=train, **params)
    print(grid)
    assert len(grid.model_ids) == 4
コード例 #5
0
ファイル: pyunit_gbm_random_grid.py プロジェクト: h2oai/h2o-3
def airline_gbm_random_grid():
  air_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
  myX = ["DayofMonth","DayOfWeek"]

  hyper_parameters = {
      'learn_rate':[0.1,0.2],
      'max_depth':[2,3,4],
      'ntrees':[5,10,15]
  }

  search_crit = {'strategy': "RandomDiscrete",
                   'max_models': 5,
                   'seed' : 1234,
                   'stopping_rounds' : 3,
                   'stopping_metric' : "AUTO",
                   'stopping_tolerance': 1e-2
                   }

  air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
  air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, distribution="bernoulli")
  assert(len(air_grid.get_grid())==5)
  print(air_grid.get_grid("logloss"))

  # added this part to check h2o.get_grid is working properly
  fetch_grid = h2o.get_grid(str(air_grid.grid_id))
  assert len(air_grid.get_grid())==len(fetch_grid.get_grid())
コード例 #6
0
def grid_ft_resume(train, grid_id, params, hyper_parameters, start_grid):
    print("TESTING %s\n-------------------" % grid_id)
    export_dir = tempfile.mkdtemp()
    print("Using directory %s" % export_dir)
    grid_size = 1
    for p in hyper_parameters:
        grid_size *= len(hyper_parameters[p])
    print("Grid size %d" % grid_size)
    print("Starting baseline grid")
    grid = start_grid(grid_id, export_dir, train, params, hyper_parameters)
    grid_in_progress = None
    times_waited = 0
    while (times_waited < 3000) and (grid_in_progress is None
                                     or len(grid_in_progress.model_ids) == 0):
        time.sleep(0.1)  # give it tome to train some models
        times_waited += 1
        try:
            grid_in_progress = h2o.get_grid(grid_id)
        except IndexError:
            if times_waited % 100 == 0:
                print("no models trained yet after %ss" % (times_waited / 10))
    grid.cancel()

    grid = h2o.get_grid(grid_id)
    old_grid_model_count = len(grid.model_ids)
    print("Baseline grid has %d models:" % old_grid_model_count)
    assert old_grid_model_count < grid_size, "The full grid should not have finished yet."
    for x in sorted(grid.model_ids):
        print(x)
    h2o.remove_all()

    loaded = h2o.load_grid("%s/%s" % (export_dir, grid_id),
                           load_params_references=True)
    assert loaded is not None
    assert len(grid.model_ids) == old_grid_model_count
    loaded_train = h2o.H2OFrame.get_frame(train.frame_id)
    assert loaded_train is not None, "Train frame was not loaded"
    print("Starting final grid")
    loaded.resume()
    print("Newly grained grid has %d models:" % len(loaded.model_ids))
    for x in sorted(loaded.model_ids):
        print(x)
    assert len(loaded.model_ids) == grid_size, "The full grid was not trained."
    h2o.remove_all()
コード例 #7
0
def _wait_for_grid_models(grid, grid_id, models, grid_size):
    grid_in_progress = None
    times_waited = 0
    while (times_waited < 3000) and (grid_in_progress is None or len(grid_in_progress.model_ids) < models):
        time.sleep(0.1)  # give it tome to train some models
        times_waited += 1
        try:
            grid_in_progress = h2o.get_grid(grid_id)
        except IndexError:
            if times_waited % 100 == 0:
                print("%s not trained yet after %ss" % (models, times_waited / 10))
    grid.cancel()
    grid = h2o.get_grid(grid_id)
    old_grid_model_count = len(grid.model_ids)
    print("Grid has %d models" % old_grid_model_count)
    assert old_grid_model_count < grid_size, "The full grid should not have finished yet."
    h2o.remove_all()
    time.sleep(5)
    return old_grid_model_count
コード例 #8
0
 def _wait_for_model_to_build(self, grid_id, model_count=1):
     grid_in_progress = None
     times_waited = 0
     while (times_waited < 20) and (grid_in_progress is None or len(
             grid_in_progress.model_ids) < model_count):
         time.sleep(5)  # give it tome to train some models
         times_waited += 1
         try:
             grid_in_progress = h2o.get_grid(grid_id)
         except IndexError:
             print("no models trained yet")
         except H2OResponseError as e:
             print("grid not started yet " + e.args[0])
     print("done sleeping")
     return grid_in_progress.model_ids
コード例 #9
0
def airline_gbm_random_grid():
    air_hex = h2o.import_file(
        path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"),
        destination_frame="air.hex")
    myX = ["DayofMonth", "DayOfWeek"]

    hyper_parameters = {
        'learn_rate': [0.1, 0.2],
        'max_depth': [2, 3, 4],
        'ntrees': [5, 10, 15]
    }

    search_crit = {
        'strategy': "RandomDiscrete",
        'max_models': 5,
        'seed': 1234,
        'stopping_rounds': 3,
        'stopping_metric': "AUTO",
        'stopping_tolerance': 1e-2
    }

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                             hyper_params=hyper_parameters,
                             search_criteria=search_crit)
    air_grid.train(x=myX,
                   y="IsDepDelayed",
                   training_frame=air_hex,
                   nfolds=5,
                   fold_assignment='Modulo',
                   keep_cross_validation_predictions=True,
                   distribution="bernoulli",
                   seed=5678)

    assert (len(air_grid.get_grid()) == 5)
    print(air_grid.get_grid("logloss"))

    stacker = H2OStackedEnsembleEstimator(base_models=air_grid.model_ids)
    print("created H2OStackedEnsembleEstimator")
    stacker.train(model_id="my_ensemble",
                  y="IsDepDelayed",
                  training_frame=air_hex)
    print("trained H2OStackedEnsembleEstimator")
    predictions = stacker.predict(air_hex)  # training data
    print("predictions for ensemble are in: " + predictions.frame_id)

    # Check that the model can be retrieved
    assert stacker.model_id == "my_ensemble"
    modelcopy = h2o.get_model(stacker.model_id)
    assert modelcopy is not None
    assert modelcopy.model_id == "my_ensemble"

    # golden test for ensemble predictions:
    assert round(
        predictions[0, "YES"], 4
    ) == 0.4327, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(
        0, 0.4327, round(predictions[0, "YES"], 4))
    assert round(
        predictions[1, "YES"], 4
    ) == 0.5214, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(
        1, 0.5214, round(predictions[1, "YES"], 4))
    assert round(
        predictions[2, "YES"], 4
    ) == 0.4666, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(
        2, 0.4666, round(predictions[2, "YES"], 4))

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                             hyper_params=hyper_parameters,
                             search_criteria=search_crit)
    air_grid.train(x=myX,
                   y="IsDepDelayed",
                   training_frame=air_hex,
                   distribution="bernoulli")
    assert (len(air_grid.get_grid()) == 5)
    print(air_grid.get_grid("logloss"))

    # added this part to check h2o.get_grid is working properly
    fetch_grid = h2o.get_grid(str(air_grid.grid_id))
    assert len(air_grid.get_grid()) == len(fetch_grid.get_grid())

    ################################################################################
    # PUBDEV-5145: make sure we give a good error message for JSON parse failures, like range() under 3.6
    hyper_parameters['max_depth'] = range(2, 4)
    search_crit['max_models'] = 1

    if sys.version_info[0] < 3:
        # no exception
        air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                                 hyper_params=hyper_parameters,
                                 search_criteria=search_crit)
        air_grid.train(x=myX,
                       y="IsDepDelayed",
                       training_frame=air_hex,
                       nfolds=5,
                       fold_assignment='Modulo',
                       keep_cross_validation_predictions=True,
                       distribution="bernoulli",
                       seed=5678)
    else:
        # MalformedJsonException in Java; check for the right error message in Python
        got_exception = False
        exc = None
        try:
            air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                                     hyper_params=hyper_parameters,
                                     search_criteria=search_crit)
            air_grid.train(x=myX,
                           y="IsDepDelayed",
                           training_frame=air_hex,
                           nfolds=5,
                           fold_assignment='Modulo',
                           keep_cross_validation_predictions=True,
                           distribution="bernoulli",
                           seed=5678)
        except H2OResponseError as e:
            got_exception = True
            exc = e
        assert (type(exc) == H2OResponseError)
        print("Got an H2OResponseError, as expected with 3.x")
        assert ("Error: Can't parse the hyper_parameters dictionary"
                in str(exc))
        assert (got_exception)

    hyper_parameters['max_depth'] = 1
    search_crit['max_models'] = [1, 3]  # expecting an int
    # IllegalStateException in Java; check for the right error message in Python
    got_exception = False
    exc = None
    try:
        air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                                 hyper_params=hyper_parameters,
                                 search_criteria=search_crit)
        air_grid.train(x=myX,
                       y="IsDepDelayed",
                       training_frame=air_hex,
                       nfolds=5,
                       fold_assignment='Modulo',
                       keep_cross_validation_predictions=True,
                       distribution="bernoulli",
                       seed=5678)
    except H2OResponseError as e:
        got_exception = True
        exc = e
    assert (type(exc) == H2OResponseError)
    print("Got an H2OResponseError, as expected with 3.x")
    assert ("Error: Can't parse the search_criteria dictionary" in str(exc))
    assert (got_exception)
コード例 #10
0
def airline_gbm_random_grid():
    air_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
    myX = ["DayofMonth","DayOfWeek"]

    hyper_parameters = {
        'learn_rate':[0.1,0.2],
        'max_depth':[2,3,4],
        'ntrees':[5,10,15]
    }

    search_crit = {'strategy': "RandomDiscrete",
                   'max_models': 5,
                   'seed' : 1234,
                   'stopping_rounds' : 3,
                   'stopping_metric' : "AUTO",
                   'stopping_tolerance': 1e-2
    }

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
    air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)

    assert(len(air_grid.get_grid())==5)
    print(air_grid.get_grid("logloss"))


    air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
    air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, distribution="bernoulli")
    assert(len(air_grid.get_grid())==5)
    print(air_grid.get_grid("logloss"))

    # added this part to check h2o.get_grid is working properly
    fetch_grid = h2o.get_grid(str(air_grid.grid_id))
    assert len(air_grid.get_grid())==len(fetch_grid.get_grid())


    ################################################################################
    # PUBDEV-5145: make sure we give a good error message for JSON parse failures, like range() under 3.6
    hyper_parameters['max_depth'] = range(2, 4)
    search_crit['max_models'] = 1

    if sys.version_info[0] < 3:
        # no exception
        air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
        air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)
    else:
        # MalformedJsonException in Java; check for the right error message in Python
        got_exception = False
        exc = None
        try:
            air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
            air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)
        except H2OResponseError as e:
            got_exception = True
            exc = e
        assert(type(exc) == H2OResponseError)
        print("Got an H2OResponseError, as expected with 3.x")
        assert("Error: Can't parse the hyper_parameters dictionary" in str(exc))
        assert(got_exception)


    hyper_parameters['max_depth'] = 1
    search_crit['max_models'] = [1, 3]  # expecting an int
    # IllegalStateException in Java; check for the right error message in Python
    got_exception = False
    exc = None
    try:
        air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
        air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)
    except H2OResponseError as e:
        got_exception = True
        exc = e
    assert(type(exc) == H2OResponseError)
    print("Got an H2OResponseError, as expected with 3.x")
    assert("Error: Can't parse the search_criteria dictionary" in str(exc))
    assert(got_exception)
コード例 #11
0
def airline_gbm_random_grid():
    air_hex = h2o.import_file(
        path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"),
        destination_frame="air.hex")
    myX = ["DayofMonth", "DayOfWeek"]

    hyper_parameters = {
        'learn_rate': [0.1, 0.2],
        'max_depth': [2, 3, 4],
        'ntrees': [5, 10, 15]
    }

    search_crit = {
        'strategy': "RandomDiscrete",
        'max_models': 5,
        'seed': 1234,
        'stopping_rounds': 3,
        'stopping_metric': "AUTO",
        'stopping_tolerance': 1e-2
    }

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                             hyper_params=hyper_parameters,
                             search_criteria=search_crit)
    air_grid.train(x=myX,
                   y="IsDepDelayed",
                   training_frame=air_hex,
                   nfolds=5,
                   fold_assignment='Modulo',
                   keep_cross_validation_predictions=True,
                   distribution="bernoulli",
                   seed=5678)

    assert (len(air_grid.get_grid()) == 5)
    print(air_grid.get_grid("logloss"))

    stacker = H2OStackedEnsembleEstimator(base_models=air_grid.model_ids)
    print("created H2OStackedEnsembleEstimator")
    stacker.train(model_id="my_ensemble",
                  y="IsDepDelayed",
                  training_frame=air_hex)
    print("trained H2OStackedEnsembleEstimator")
    predictions = stacker.predict(air_hex)  # training data
    print("predictions for ensemble are in: " + predictions.frame_id)

    # Check that the model can be retrieved
    assert stacker.model_id == "my_ensemble"
    modelcopy = h2o.get_model(stacker.model_id)
    assert modelcopy is not None
    assert modelcopy.model_id == "my_ensemble"

    # golden test for ensemble predictions:
    assert round(
        predictions[0, "YES"], 4
    ) == 0.4327, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(
        0, 0.4327, round(predictions[0, "YES"], 4))
    assert round(
        predictions[1, "YES"], 4
    ) == 0.5214, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(
        1, 0.5214, round(predictions[1, "YES"], 4))
    assert round(
        predictions[2, "YES"], 4
    ) == 0.4666, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(
        2, 0.4666, round(predictions[2, "YES"], 4))

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator,
                             hyper_params=hyper_parameters,
                             search_criteria=search_crit)
    air_grid.train(x=myX,
                   y="IsDepDelayed",
                   training_frame=air_hex,
                   distribution="bernoulli")
    assert (len(air_grid.get_grid()) == 5)
    print(air_grid.get_grid("logloss"))

    # added this part to check h2o.get_grid is working properly
    fetch_grid = h2o.get_grid(str(air_grid.grid_id))
    assert len(air_grid.get_grid()) == len(fetch_grid.get_grid())
コード例 #12
0
def airline_gbm_random_grid():
    air_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/airlines/allyears2k_headers.zip"), destination_frame="air.hex")
    myX = ["DayofMonth","DayOfWeek"]

    hyper_parameters = {
        'learn_rate':[0.1,0.2],
        'max_depth':[2,3,4],
        'ntrees':[5,10,15]
    }

    search_crit = {'strategy': "RandomDiscrete",
                   'max_models': 5,
                   'seed' : 1234,
                   'stopping_rounds' : 3,
                   'stopping_metric' : "AUTO",
                   'stopping_tolerance': 1e-2
    }

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
    air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)

    assert(len(air_grid.get_grid())==5)
    print(air_grid.get_grid("logloss"))



    stacker = H2OStackedEnsembleEstimator(base_models=air_grid.model_ids)
    print("created H2OStackedEnsembleEstimator")
    stacker.train(model_id="my_ensemble", y="IsDepDelayed", training_frame=air_hex)
    print("trained H2OStackedEnsembleEstimator")
    predictions = stacker.predict(air_hex)  # training data
    print("predictions for ensemble are in: " + predictions.frame_id)

    # Check that the model can be retrieved
    assert stacker.model_id == "my_ensemble"
    modelcopy = h2o.get_model(stacker.model_id)
    assert modelcopy is not None
    assert modelcopy.model_id == "my_ensemble"

    # golden test for ensemble predictions:
    assert round(predictions[0, "YES"], 4) == 0.4327, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(0, 0.4327, round(predictions[0, "YES"], 4))
    assert round(predictions[1, "YES"], 4) == 0.5214, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(1, 0.5214, round(predictions[1, "YES"], 4))
    assert round(predictions[2, "YES"], 4) == 0.4666, "Expected prediction for row: {0} to be: {1}; got: {2} instead.".format(2, 0.4666, round(predictions[2, "YES"], 4))

    air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
    air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, distribution="bernoulli")
    assert(len(air_grid.get_grid())==5)
    print(air_grid.get_grid("logloss"))

    # added this part to check h2o.get_grid is working properly
    fetch_grid = h2o.get_grid(str(air_grid.grid_id))
    assert len(air_grid.get_grid())==len(fetch_grid.get_grid())


    ################################################################################
    # PUBDEV-5145: make sure we give a good error message for JSON parse failures, like range() under 3.6
    hyper_parameters['max_depth'] = range(2, 4)
    search_crit['max_models'] = 1

    if sys.version_info[0] < 3:
        # no exception
        air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
        air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)
    else:
        # MalformedJsonException in Java; check for the right error message in Python
        got_exception = False
        exc = None
        try:
            air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
            air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)
        except H2OResponseError as e:
            got_exception = True
            exc = e
        assert(type(exc) == H2OResponseError)
        print("Got an H2OResponseError, as expected with 3.x")
        assert("Error: Can't parse the hyper_parameters dictionary" in str(exc))
        assert(got_exception)


    hyper_parameters['max_depth'] = 1
    search_crit['max_models'] = [1, 3]  # expecting an int
    # IllegalStateException in Java; check for the right error message in Python
    got_exception = False
    exc = None
    try:
        air_grid = H2OGridSearch(H2OGradientBoostingEstimator, hyper_params=hyper_parameters, search_criteria=search_crit)
        air_grid.train(x=myX, y="IsDepDelayed", training_frame=air_hex, nfolds=5, fold_assignment='Modulo', keep_cross_validation_predictions=True, distribution="bernoulli", seed=5678)
    except H2OResponseError as e:
        got_exception = True
        exc = e
    assert(type(exc) == H2OResponseError)
    print("Got an H2OResponseError, as expected with 3.x")
    assert("Error: Can't parse the search_criteria dictionary" in str(exc))
    assert(got_exception)