Exemple #1
0
def test_varimp():
    train = h2o.upload_file(pyunit_utils.locate("smalldata/wine/winequality-redwhite-no-BOM.csv"))
    y = "quality"
    # get at most one column from each type
    cols_to_test = []
    for col, typ in train.types.items():
        for ctt in cols_to_test:
            if typ == train.types[ctt] or col == y:
                break
        else:
            cols_to_test.append(col)

    aml = H2OAutoML(seed=1234, max_models=5)
    aml.train(y=y, training_frame=train)

    assert aml.varimp(use_pandas=True).shape == (12, 5)
    assert h2o.explanation.varimp(aml.leaderboard[aml.leaderboard["model_id"].grep("Stacked", invert=True, output_logical=True), :].head(3), num_of_features=3, use_pandas=True).shape == (3, 3)

    varimp_1 = aml.varimp(use_pandas=False)
    assert varimp_1[0].shape == (12, 5)
    assert len(varimp_1[1]) == 5
    assert len(varimp_1[2]) == 12

    varimp_2 = h2o.explanation.varimp(aml.leaderboard[aml.leaderboard["model_id"].grep("Stacked", invert=True, output_logical=True), :].head(4), num_of_features=3, use_pandas=False)
    assert varimp_2[0].shape == (3, 4)
    assert len(varimp_2[1]) == 4
    assert len(varimp_2[2]) == 3

    assert isinstance(aml.varimp_heatmap().figure(), matplotlib.pyplot.Figure)
    assert isinstance(h2o.varimp_heatmap(aml.leaderboard[aml.leaderboard["model_id"].grep("Stacked", invert=True, output_logical=True), :].head(3), num_of_features=3).figure(), matplotlib.pyplot.Figure)
Exemple #2
0
def test_explanation_list_of_models_binomial_classification():
    train = h2o.upload_file(
        pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    y = "CAPSULE"
    train[y] = train[y].asfactor()
    # get at most one column from each type
    cols_to_test = []
    for col, typ in train.types.items():
        for ctt in cols_to_test:
            if typ == train.types[ctt] or col == y:
                break
        else:
            cols_to_test.append(col)

    aml = H2OAutoML(seed=1234, max_models=5)
    aml.train(y=y, training_frame=train)

    models = [
        h2o.get_model(m[0])
        for m in aml.leaderboard["model_id"].as_data_frame(use_pandas=False,
                                                           header=False)
    ]

    # Test named models as well
    gbm = H2OGradientBoostingEstimator(model_id="my_awesome_model")
    gbm.train(y=y, training_frame=train)
    models += [gbm]

    # test variable importance heatmap plot
    assert isinstance(
        h2o.varimp_heatmap(models).figure(), matplotlib.pyplot.Figure)
    matplotlib.pyplot.close()

    # test model correlation heatmap plot
    assert isinstance(
        h2o.model_correlation_heatmap(models, train).figure(),
        matplotlib.pyplot.Figure)
    matplotlib.pyplot.close()

    # test partial dependences
    for col in cols_to_test:
        assert isinstance(
            h2o.pd_multi_plot(models, train, col).figure(),
            matplotlib.pyplot.Figure)
        matplotlib.pyplot.close()

    # test learning curve
    for model in models:
        assert isinstance(model.learning_curve_plot().figure(),
                          matplotlib.pyplot.Figure)
    matplotlib.pyplot.close("all")

    # test explain
    assert isinstance(h2o.explain(models, train, render=False), H2OExplanation)

    # test explain row
    assert isinstance(h2o.explain_row(models, train, 1, render=False),
                      H2OExplanation)
def test_varimp_heatmap_model_correlation_heatmap():
    train = h2o.upload_file(
        pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    y = "CAPSULE"
    train[y] = train[y].asfactor()
    # get at most one column from each type
    cols_to_test = []
    for col, typ in train.types.items():
        for ctt in cols_to_test:
            if typ == train.types[ctt] or col == y:
                break
        else:
            cols_to_test.append(col)

    aml = H2OAutoML(seed=1234, max_models=5)
    aml.train(y=y, training_frame=train)

    models = [
        h2o.get_model(m[0])
        for m in aml.leaderboard["model_id"].as_data_frame(use_pandas=False,
                                                           header=False)
    ]

    # Test named models as well
    gbm = H2OGradientBoostingEstimator(model_id="my_awesome_model")
    gbm.train(y=y, training_frame=train)
    models += [gbm]

    with TemporaryDirectory() as tmpdir:
        path1 = "{}/plot1.png".format(tmpdir)
        path2 = "{}/plot2.png".format(tmpdir)
        test_plot_result_saving(
            h2o.varimp_heatmap(models), path2,
            h2o.varimp_heatmap(models, save_plot_path=path1), path1)
        test_plot_result_saving(
            h2o.model_correlation_heatmap(models, train), path2,
            h2o.model_correlation_heatmap(models, train, save_plot_path=path1),
            path1)
    h2o.varimp_heatmap(models)
Exemple #4
0
def test_explanation_list_of_models_multinomial_classification():
    train = h2o.upload_file(pyunit_utils.locate("smalldata/iris/iris2.csv"))
    y = "response"
    train[y] = train[y].asfactor()
    # get at most one column from each type
    cols_to_test = []
    for col, typ in train.types.items():
        for ctt in cols_to_test:
            if typ == train.types[ctt] or col == y:
                break
        else:
            cols_to_test.append(col)

    aml = H2OAutoML(seed=1234, max_models=5)
    aml.train(y=y, training_frame=train)

    models = [h2o.get_model(m[0]) for m in
              aml.leaderboard["model_id"].as_data_frame(use_pandas=False, header=False)]


    # test variable importance heatmap plot
    assert isinstance(h2o.varimp_heatmap(models), matplotlib.pyplot.Figure)
    matplotlib.pyplot.close()

    # test model correlation heatmap plot
    assert isinstance(h2o.model_correlation_heatmap(models, train), matplotlib.pyplot.Figure)
    matplotlib.pyplot.close()

    # test partial dependences
    for col in cols_to_test:
        assert isinstance(h2o.pd_multi_plot(models, train, col, target="setosa"), matplotlib.pyplot.Figure)
    matplotlib.pyplot.close("all")

    # test explain
    assert isinstance(h2o.explain(models, train, render=False), H2OExplanation)

    # test explain row
    assert isinstance(h2o.explain_row(models, train, 1, render=False), H2OExplanation)