Beispiel #1
0
def test_params_conflicting_with_sklearn_api_are_still_available():
    pca = H2OPCA()
    assert pca.transform != 'none'
    assert callable(
        pca.transform
    ), "`transform` method from sklearn API has been replaced by a property"
    # conflicting param can be accessed normally using get_params()
    print(pca.get_params())
    assert pca.get_params()['transform'] == 'none'
    # property is accessible directly using a trailing underscore
    assert pca.transform_ == 'none'

    pca = H2OPCA(transform='demean')
    assert callable(
        pca.transform
    ), "`transform` method from sklearn API has been replaced by a property"
    assert pca.get_params()['transform'] == 'demean'
    assert pca.transform_ == 'demean'

    # conflicting param can be modified normally using set_params()
    pca.set_params(transform='descale')
    assert pca.get_params()['transform'] == 'descale'
    assert pca.transform_ == 'descale'

    # conflicting property can be set directly using a trailing underscore
    pca.transform_ = 'normalize'
    assert pca.get_params()['transform'] == 'normalize'
    assert pca.transform_ == 'normalize'
def test_params_conflicting_with_sklearn_api_are_still_available():
    pca = H2OPCA()
    assert pca.transform != 'NONE'
    assert callable(
        pca.transform
    ), "`transform` method from sklearn API has been replaced by a property"
    # conflicting param can be accessed normally using get_params()
    assert pca.get_params()['transform'] == 'NONE'
    # property is accessible directly using a trailing underscore
    assert pca.transform_ == 'NONE'

    pca = H2OPCA(transform='DEMEAN')
    assert callable(
        pca.transform
    ), "`transform` method from sklearn API has been replaced by a property"
    assert pca.get_params()['transform'] == 'DEMEAN'
    assert pca.transform_ == 'DEMEAN'

    # conflicting param can be modified normally using set_params()
    pca.set_params(transform='DESCALE')
    assert pca.get_params()['transform'] == 'DESCALE'
    assert pca.transform_ == 'DESCALE'

    # conflicting property can be set directly using a trailing underscore
    pca.transform_ = 'NORMALIZE'
    assert pca.get_params()['transform'] == 'NORMALIZE'
    assert pca.transform_ == 'NORMALIZE'
Beispiel #3
0
def test_params_are_correctly_passed_to_underlying_transformer():
    pca = H2OPCA(seed=seed)
    pca.set_params(transform='demean', k=3)
    pca.model_id = "dummy"
    assert pca.estimator is None
    pca._make_estimator()  # normally done when calling `fit`
    assert pca.estimator
    parms = pca.estimator._parms
    assert parms['seed'] == seed
    assert parms['transform'] == 'demean'
    assert parms['k'] == 3
    assert parms['model_id'] == "dummy"
    assert parms['max_iterations'] is None
Beispiel #4
0
def test_all_params_can_be_set_as_properties():
    pipeline = Pipeline([('standardize', H2OScaler()), ('pca', H2OPCA()),
                         ('estimator', H2OGradientBoostingEstimator())])
    pipeline.named_steps.standardize.center = True
    pipeline.named_steps.standardize.scale = False
    pipeline.named_steps.pca.k = 2
    pipeline.named_steps.pca.seed = seed
    pipeline.named_steps.estimator.ntrees = 20
    pipeline.named_steps.estimator.max_depth = 5
    pipeline.named_steps.estimator.seed = seed
    params = pipeline.get_params()
    assert isinstance(params['standardize'], H2OScaler)
    assert params['standardize__center'] is True
    assert params['standardize__scale'] is False
    assert isinstance(params['pca'], H2OPCA)
    assert params['pca__k'] == 2
    assert params['pca__seed'] == seed
    assert isinstance(params['estimator'], H2OGradientBoostingEstimator)
    assert params['estimator__ntrees'] == 20
    assert params['estimator__max_depth'] == 5
    assert params['estimator__seed'] == seed
Beispiel #5
0
def test_all_params_can_be_set_using_set_params():
    pipeline = Pipeline([('standardize', H2OScaler()), ('pca', H2OPCA()),
                         ('estimator', H2OGradientBoostingEstimator())])
    pipeline.set_params(standardize__center=True,
                        standardize__scale=False,
                        pca__k=2,
                        pca__seed=seed,
                        estimator__ntrees=20,
                        estimator__max_depth=5,
                        estimator__seed=seed)
    assert isinstance(pipeline.named_steps.standardize, H2OScaler)
    assert pipeline.named_steps.standardize.center is True
    assert pipeline.named_steps.standardize.scale is False
    assert isinstance(pipeline.named_steps.pca, H2OPCA)
    assert pipeline.named_steps.pca.k == 2
    assert pipeline.named_steps.pca.seed == seed
    assert isinstance(pipeline.named_steps.estimator,
                      H2OGradientBoostingEstimator)
    assert pipeline.named_steps.estimator.ntrees == 20
    assert pipeline.named_steps.estimator.max_depth == 5
    assert pipeline.named_steps.estimator.seed == seed
def test_all_params_are_accessible_as_properties():
    pipeline = Pipeline([('standardize', H2OScaler(center=True, scale=False)),
                         ('pca', H2OPCA(k=2, seed=seed)),
                         ('estimator',
                          H2OGradientBoostingEstimator(ntrees=20,
                                                       max_depth=5,
                                                       seed=seed))])
    assert isinstance(pipeline.named_steps.standardize, H2OScaler)
    assert pipeline.named_steps.standardize.center is True
    assert pipeline.named_steps.standardize.scale is False
    assert isinstance(pipeline.named_steps.pca, H2OPCA)
    assert pipeline.named_steps.pca.k == 2
    assert pipeline.named_steps.pca.seed == seed
    assert isinstance(pipeline.named_steps.estimator,
                      H2OGradientBoostingEstimator)
    assert pipeline.named_steps.estimator.ntrees == 20
    assert pipeline.named_steps.estimator.max_depth == 5
    assert pipeline.named_steps.estimator.seed == seed
    # also the ones that were not set explicitly
    assert pipeline.named_steps.pca.max_iterations is None
    assert pipeline.named_steps.estimator.learn_rate is None
def test_all_params_are_visible_in_get_params():
    pipeline = Pipeline([('standardize', H2OScaler(center=True, scale=False)),
                         ('pca', H2OPCA(k=2, seed=seed)),
                         ('estimator',
                          H2OGradientBoostingEstimator(ntrees=20,
                                                       max_depth=5,
                                                       seed=seed))])
    params = pipeline.get_params()
    assert isinstance(params['standardize'], H2OScaler)
    assert params['standardize__center'] is True
    assert params['standardize__scale'] is False
    assert isinstance(params['pca'], H2OPCA)
    assert params['pca__k'] == 2
    assert params['pca__seed'] == seed
    assert isinstance(params['estimator'], H2OGradientBoostingEstimator)
    assert params['estimator__ntrees'] == 20
    assert params['estimator__max_depth'] == 5
    assert params['estimator__seed'] == seed
    # also the ones that were not set explicitly
    assert params['pca__max_iterations'] is None
    assert params['estimator__learn_rate'] is None
def test_h2o_only_pipeline_with_h2o_frames():
    pipeline = Pipeline([('standardize', H2OScaler()),
                         ('pca', H2OPCA(k=2, seed=seed)),
                         ('estimator', H2OGradientBoostingRegressor(seed=seed))
                         ])
    data = _get_data(format='h2o')
    assert isinstance(data.X_train, h2o.H2OFrame)
    pipeline.fit(data.X_train, data.y_train)
    preds = pipeline.predict(data.X_test)
    assert isinstance(preds, h2o.H2OFrame)
    assert preds.dim == [len(data.X_test), 1]

    # to get it working, we need to score a fresh H2OFrame
    data = _get_data(format='h2o')
    score = pipeline.score(data.X_test, data.y_test)
    assert isinstance(score, float)
    skl_score = r2_score(data.y_test.as_data_frame().values,
                         preds.as_data_frame().values)
    assert abs(score - skl_score) < 1e-6, "score={}, skl_score={}".format(
        score, skl_score)
    scores['h2o_only_pipeline_with_h2o_frame'] = score
def test_h2o_only_pipeline_with_numpy_arrays():
    # Note that in normal situations (release build), init_connection_args can be omitted
    # otherwise, it should be set to the first H2O element in the pipeline.
    # Also note that in this specific case mixing numpy inputs with a fully H2O pipeline,
    # the last estimator requires the `data_conversion=True` param in order to return numpy arrays in predictions.
    pipeline = Pipeline([
        ('standardize', H2OScaler(init_connection_args=init_connection_args)),
        ('pca', H2OPCA(k=2, seed=seed)),
        ('estimator',
         H2OGradientBoostingRegressor(seed=seed, data_conversion=True))
    ])
    data = _get_data(format='numpy')
    assert isinstance(data.X_train, np.ndarray)
    pipeline.fit(data.X_train, data.y_train)
    preds = pipeline.predict(data.X_test)
    assert isinstance(preds, np.ndarray)
    assert preds.shape == (len(data.X_test), )

    score = pipeline.score(data.X_test, data.y_test)
    assert isinstance(score, float)
    skl_score = r2_score(data.y_test, preds)
    assert abs(score - skl_score) < 1e-6
    scores['h2o_only_pipeline_with_numpy_arrays'] = score