def test_infogram_iris_wrong_thresholds():
    """
    Simple Iris test to check that when wrong thresholds are specified for core infogram, warnings will
    be received
    """
    fr = h2o.import_file(path=pyunit_utils.locate(
        "smalldata/admissibleml_test/irisROriginal.csv"))
    target = "Species"
    fr[target] = fr[target].asfactor()
    x = fr.names
    x.remove(target)
    with pyunit_utils.catch_warnings() as ws:
        infogram_model = H2OInfogram(
            seed=12345,
            distribution='multinomial',
            safety_index_threshold=0.2,
            relevance_index_threshold=0.2,
            top_n_features=len(
                x))  # build infogram model with default settings
        infogram_model.train(x=x, y=target, training_frame=fr)
        assert len(
            ws
        ) == 2, "Expected two warnings but received {0} warnings instead.".format(
            len(ws))
        assert pyunit_utils.contains_warning(
            ws, 'index_threshold for core infogram runs.')
def test_infogram_personal_loan():
    """
    Simple Perosnal loan test to check that when wrong thresholds are specified, warnings should be
    generated.
    :return: 
    """
    fr = h2o.import_file(path=pyunit_utils.locate(
        "smalldata/admissibleml_test/Bank_Personal_Loan_Modelling.csv"))
    target = "Personal Loan"
    fr[target] = fr[target].asfactor()
    x = [
        "Experience", "Income", "Family", "CCAvg", "Education", "Mortgage",
        "Securities Account", "CD Account", "Online", "CreditCard"
    ]
    with pyunit_utils.catch_warnings() as ws:
        infogram_model = H2OInfogram(seed=12345,
                                     protected_columns=["Age", "ZIP Code"],
                                     top_n_features=len(x),
                                     net_information_threshold=0.2,
                                     total_information_threshold=0.2)
        infogram_model.train(x=x, y=target, training_frame=fr)
        assert len(
            ws
        ) == 2, "Expected two warnings but received {0} warnings instead.".format(
            len(ws))
        assert pyunit_utils.contains_warning(
            ws, 'information_threshold for fair infogram runs.')
Beispiel #3
0
def test_binomial_response_warning():
    training_data = h2o.import_file(pyunit_utils.locate("smalldata/gbm_test/titanic.csv"))
    y = "survived"
    features = ["name", "sex"]

    expected_warning = 'We have detected that your response column has only 2 unique values (0/1). ' \
                       'If you wish to train a binary model instead of a regression model, ' \
                       'convert your target column to categorical before training.'

    with pyunit_utils.catch_warnings() as ws:
        model = H2OGradientBoostingEstimator(ntrees=1)
        model.train(x=features, y=y, training_frame=training_data)
        assert pyunit_utils.contains_warning(ws, expected_warning)

    training_data[training_data[y] == 0, y] = -1
    with pyunit_utils.catch_warnings() as ws:
        model = H2OGradientBoostingEstimator(ntrees=1)
        model.train(x=features, y=y, training_frame=training_data)
        assert pyunit_utils.contains_warning(ws, expected_warning)
    def test_reproducible_early_stopping_warning():
        training_data = h2o.import_file(
            pyunit_utils.locate("smalldata/gbm_test/BostonHousing.csv"))

        with pyunit_utils.catch_warnings() as ws:
            model = estimator(stopping_rounds=1, stopping_metric="mse")
            model.train(x=list(range(13)), y=13, training_frame=training_data)
            expected_message = 'early stopping is enabled but neither score_tree_interval or ' \
                               'score_each_iteration are defined. Early stopping will not be reproducible!'
            assert pyunit_utils.contains_warning(ws, expected_message)
Beispiel #5
0
def test_lambda_warning():
    training_data = h2o.import_file(pyunit_utils.locate("smalldata/gbm_test/BostonHousing.csv"))
    Y = 13
    X = list(range(13))
    model = H2OGeneralizedLinearEstimator(family="Gaussian", lambda_search=True, Lambda=[0.01])
    model.train(x=X, y=Y, training_frame=training_data)

    with pyunit_utils.catch_warnings() as ws:
        model = H2OGeneralizedLinearEstimator(family="Gaussian", lambda_search=True, Lambda=[0.01])
        model.train(x=X, y=Y, training_frame=training_data)

        assert pyunit_utils.contains_warning(ws, 'disabled when user specified any lambda value(s)')
Beispiel #6
0
def test_binomial_response_warning():
    training_data = h2o.import_file(
        pyunit_utils.locate("smalldata/gbm_test/titanic.csv"))
    y = "survived"
    features = ["name", "sex"]

    with pyunit_utils.catch_warnings() as ws:
        model = H2OGradientBoostingEstimator(ntrees=1)
        model.train(x=features, y=y, training_frame=training_data)
        assert pyunit_utils.contains_warning(
            ws,
            'Response is numeric, so the regression model will be trained. However, the cardinality is equaled to two, so if you want to train a classification model, convert the response column to categorical before training.'
        )

    training_data[training_data[y] == 0, y] = -1
    with pyunit_utils.catch_warnings() as ws:
        model = H2OGradientBoostingEstimator(ntrees=1)
        model.train(x=features, y=y, training_frame=training_data)
        assert pyunit_utils.contains_warning(
            ws,
            'Response is numeric, so the regression model will be trained. However, the cardinality is equaled to two, so if you want to train a classification model, convert the response column to categorical before training.'
        )