def h2omake_metrics():
    """
    Python API test: h2o.make_metrics(predicted, actual, domain=None, distribution=None)

    Copied from pyunit_make_metrics.py
    """
    fr = h2o.import_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    fr["CAPSULE"] = fr["CAPSULE"].asfactor()
    fr["RACE"] = fr["RACE"].asfactor()

    response = "RACE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator(distribution="multinomial", ntrees=2, max_depth=3, min_rows=1,
                                         learn_rate=0.01, nbins=20)
    model.train(x=predictors, y=response, training_frame=fr)
    predicted = h2o.assign(model.predict(fr)[1:], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = fr[response].levels()[0]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)
    assert_is_type(m1, H2OMultinomialModelMetrics)
    assert_is_type(m2, H2OMultinomialModelMetrics)
    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
Beispiel #2
0
def h2omake_metrics_mutlinomial():
    """
    Python API test: h2o.make_metrics(predicted, actual, domain=None, distribution=None)

    Copied from pyunit_make_metrics.py
    """
    fr = h2o.import_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    fr["CAPSULE"] = fr["CAPSULE"].asfactor()
    fr["RACE"] = fr["RACE"].asfactor()

    response = "RACE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator(distribution="multinomial",
                                         ntrees=2,
                                         max_depth=3,
                                         min_rows=1,
                                         learn_rate=0.01,
                                         nbins=20,
                                         auc_type="MACRO_OVR")
    model.train(x=predictors, y=response, training_frame=fr)
    predicted = h2o.assign(model.predict(fr)[1:], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = fr[response].levels()[0]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted,
                          actual,
                          domain=domain,
                          auc_type="MACRO_OVR")
    m2 = h2o.make_metrics(predicted, actual, auc_type="MACRO_OVR")
    assert_is_type(m1, H2OMultinomialModelMetrics)
    assert_is_type(m2, H2OMultinomialModelMetrics)
    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
    assert abs(m0.auc() - m1.auc()) < 1e-5
    assert abs(m0.aucpr() - m1.aucpr()) < 1e-5
    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
    assert abs(m2.auc() - m1.auc()) < 1e-5
    assert abs(m2.aucpr() - m1.aucpr()) < 1e-5
def pyunit_make_metrics():
    fr = h2o.import_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    fr["CAPSULE"] = fr["CAPSULE"].asfactor()
    fr["RACE"] = fr["RACE"].asfactor()
    fr.describe()

    response = "AGE"
    predictors = list(set(fr.names) - {"ID", response})

    print("\n\n======= REGRESSION ========\n")
    for distr in ["gaussian", "poisson", "laplace", "gamma"]:
        print("distribution: %s" % distr)
        model = H2OGradientBoostingEstimator(distribution=distr,
                                             ntrees=2,
                                             max_depth=3,
                                             min_rows=1,
                                             learn_rate=0.1,
                                             nbins=20)
        model.train(x=predictors, y=response, training_frame=fr)
        predicted = h2o.assign(model.predict(fr), "pred")
        actual = fr[response]

        m0 = model.model_performance(train=True)
        m1 = h2o.make_metrics(predicted, actual, distribution=distr)
        m2 = h2o.make_metrics(predicted, actual)
        print("model performance:")
        print(m0)
        print("make_metrics (distribution=%s):" % distr)
        print(m1)
        print("make_metrics (distribution=None):")
        print(m2)

        assert abs(m0.mae() - m1.mae()) < 1e-5
        assert abs(m0.mse() - m1.mse()) < 1e-5
        assert abs(m0.rmse() - m1.rmse()) < 1e-5
        assert abs(m0.mean_residual_deviance() -
                   m1.mean_residual_deviance()) < 1e-5
        assert abs(m0.rmsle() - m1.rmsle()) < 1e-5

        assert abs(m2.mae() - m1.mae()) < 1e-5
        assert abs(m2.mse() - m1.mse()) < 1e-5
        assert abs(m2.rmse() - m1.rmse()) < 1e-5
        assert (abs(m1.mean_residual_deviance() - m2.mean_residual_deviance())
                < 1e-7) == (distr == "gaussian")
        assert abs(m2.rmsle() - m1.rmsle()) < 1e-5

    print("\n\n======= BINOMIAL ========\n")
    response = "CAPSULE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator()
    model.train(x=predictors,
                y=response,
                distribution="bernoulli",
                training_frame=fr,
                ntrees=2,
                max_depth=3,
                min_rows=1,
                learn_rate=0.01,
                nbins=20)
    predicted = h2o.assign(model.predict(fr)[2], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = ["0", "1"]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)
    print("m0:")
    print(m0)
    print("m1:")
    print(m1)
    print("m2:")
    print(m2)

    assert abs(m0.auc() - m1.auc()) < 1e-5
    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error()[0][1] -
               m1.mean_per_class_error()[0][1]) < 1e-5
    assert abs(m2.auc() - m1.auc()) < 1e-5
    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error()[0][1] -
               m1.mean_per_class_error()[0][1]) < 1e-5

    print("\n\n======= MULTINOMIAL ========\n")
    response = "RACE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator()
    model.train(x=predictors,
                y=response,
                distribution="multinomial",
                training_frame=fr,
                ntrees=2,
                max_depth=3,
                min_rows=1,
                learn_rate=0.01,
                nbins=20)
    predicted = h2o.assign(model.predict(fr)[1:], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = fr[response].levels()[0]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)

    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
Beispiel #4
0
inet.show()
error_i = inet.model_performance(train=True).mean_per_class_error()


anet = H2ODeepWaterEstimator(epochs=500, network = "alexnet",mini_batch_size=16,nfolds=5,GPU=True)
anet.train(x=[0],y=1, training_frame=frame)
anet.show()
error_a = anet.model_performance(train=True).mean_per_class_error()


#Find rank from the 4 networks and take a weighted average.

l=lnet.train(x=[0],y=1,training_frame=train_ensemble)
g=gnet.train(x=[0],y=1, training_frame=train_ensemble)
i=inet.train(x=[0],y=1, training_frame=train_ensemble)
a=anet.train(x=[0],y=1, training_frame=train_ensemble)

predict=0.1*(4*a.predict(test_ensemble)+3*g.predict(test_ensemble)+1*i.predict(test_ensemble)+2*a.predict(test_ensemble))[2])


h2o.make_metrics(actual=test[response],predicted=predict[2]).logloss()

submit_predict=0.1*(4*a.predict(submit_frame)+3*g.predict(submit_frame)+1*i.predict(submit_frame)+2*a.predict(submit_frame))[2])

df = pd.DataFrame( 'prob': submit_predict[1]})
df.to_csv('predictions.csv', index = True)




Beispiel #5
0
def pyunit_make_metrics():
    fr = h2o.import_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    fr["CAPSULE"] = fr["CAPSULE"].asfactor()
    fr["RACE"] = fr["RACE"].asfactor()
    fr.describe()

    response = "AGE"
    predictors = list(set(fr.names) - {"ID", response})

    print("\n\n======= REGRESSION ========\n")
    for distr in ["gaussian", "poisson", "laplace", "gamma"]:
        print("distribution: %s" % distr)
        model = H2OGradientBoostingEstimator(distribution=distr,
                                             ntrees=2,
                                             max_depth=3,
                                             min_rows=1,
                                             learn_rate=0.1,
                                             nbins=20)
        model.train(x=predictors, y=response, training_frame=fr)
        predicted = h2o.assign(model.predict(fr), "pred")
        actual = fr[response]

        m0 = model.model_performance(train=True)
        m1 = h2o.make_metrics(predicted, actual, distribution=distr)
        m2 = h2o.make_metrics(predicted, actual)
        print("model performance:")
        print(m0)
        print("make_metrics (distribution=%s):" % distr)
        print(m1)
        print("make_metrics (distribution=None):")
        print(m2)

        assert abs(m0.mae() - m1.mae()) < 1e-5
        assert abs(m0.mse() - m1.mse()) < 1e-5
        assert abs(m0.rmse() - m1.rmse()) < 1e-5
        assert abs(m0.mean_residual_deviance() -
                   m1.mean_residual_deviance()) < 1e-5
        assert abs(m0.rmsle() - m1.rmsle()) < 1e-5

        assert abs(m2.mae() - m1.mae()) < 1e-5
        assert abs(m2.mse() - m1.mse()) < 1e-5
        assert abs(m2.rmse() - m1.rmse()) < 1e-5
        assert (abs(m1.mean_residual_deviance() - m2.mean_residual_deviance())
                < 1e-7) == (distr == "gaussian")
        assert abs(m2.rmsle() - m1.rmsle()) < 1e-5

    print("\n\n======= BINOMIAL ========\n")
    response = "CAPSULE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator(distribution="bernoulli",
                                         ntrees=2,
                                         max_depth=3,
                                         min_rows=1,
                                         learn_rate=0.01,
                                         nbins=20,
                                         seed=1)
    model.train(x=predictors, y=response, training_frame=fr)
    predicted = h2o.assign(model.predict(fr)[2], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = ["0", "1"]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)
    print("m0:")
    print(m0)
    print("m1:")
    print(m1)
    print("m2:")
    print(m2)

    assert m0.accuracy()[0][1] + m0.error()[0][1] == 1
    assert len(m0.accuracy(thresholds='all')) == len(m0.fprs)

    assert m0.accuracy().value == m1.accuracy().value == m0.accuracy()[0][1]
    assert m0.accuracy().value + m0.error().value == 1

    assert isinstance(m0.accuracy(thresholds=0.4).value, float)
    assert m0.accuracy(thresholds=0.4).value == m1.accuracy(
        thresholds=0.4).value == m0.accuracy(thresholds=0.4)[0][1]
    assert m0.accuracy(thresholds=0.4).value + m0.error(
        thresholds=0.4).value == 1

    assert isinstance(m0.accuracy(thresholds=[0.4]).value, list)
    assert len(m0.accuracy(thresholds=[0.4]).value) == 1
    assert m0.accuracy(thresholds=[0.4]).value[0] == m0.accuracy(
        thresholds=0.4).value

    assert isinstance(m0.accuracy(thresholds=[0.4, 0.5]).value, list)
    assert len(m0.accuracy(thresholds=[0.4, 0.5]).value) == 2
    assert m0.accuracy(thresholds=[0.4, 0.5]).value == [
        m0.accuracy(thresholds=0.4).value,
        m0.accuracy(thresholds=0.5).value
    ]

    # Testing base metric methods
    # FIXME: check the same failures for other ModelMetrics impl. and then fix'emall or move them out of base class...
    base_metrics_methods_failing_on_H2OBinomialModelMetrics = [
        'aic', 'mae', 'mean_per_class_error', 'mean_residual_deviance', 'rmsle'
    ]
    for metric_method in (
            m for m in base_metric_methods
            if m not in base_metrics_methods_failing_on_H2OBinomialModelMetrics
    ):
        m0mm = getattr(m0, metric_method)()
        m1mm = getattr(m1, metric_method)()
        m2mm = getattr(m2, metric_method)()

        assert m0mm == m1mm or abs(m0mm - m1mm) < 1e-5, \
            "{} is different for model_performance and make_metrics on [0, 1] domain".format(metric_method)
        assert m1mm == m2mm or abs(m1mm - m2mm) < 1e-5, \
            "{} is different for make_metrics on [0, 1] domain and make_metrics without domain".format(metric_method)
    # FIXME: for binomial mean_per_class_error is strangely accessible as an array
    assert abs(m0.mean_per_class_error()[0][1] -
               m1.mean_per_class_error()[0][1]) < 1e-5
    assert abs(m2.mean_per_class_error()[0][1] -
               m1.mean_per_class_error()[0][1]) < 1e-5

    failures = 0
    for metric_method in base_metrics_methods_failing_on_H2OBinomialModelMetrics:
        for m in [m0, m1, m2]:
            try:
                assert isinstance(getattr(m, metric_method)(), float)
            except:
                failures += 1
    assert failures == 3 * len(
        base_metrics_methods_failing_on_H2OBinomialModelMetrics)

    # Testing binomial-only metric methods
    binomial_only_metric_methods = [
        'accuracy', 'F0point5', 'F1', 'F2', 'mcc', 'max_per_class_error',
        'mean_per_class_error', 'precision', 'recall', 'specificity',
        'fallout', 'missrate', 'sensitivity', 'fpr', 'fnr', 'tpr', 'tnr'
    ]
    for metric_method in (m for m in binomial_only_metric_methods):
        # FIXME: not sure that returning a 2d-array is justified when not passing any threshold
        m0mm = getattr(m0, metric_method)()[0]
        m1mm = getattr(m1, metric_method)()[0]
        m2mm = getattr(m2, metric_method)()[0]
        assert m0mm == m1mm or abs(m0mm[1] - m1mm[1]) < 1e-5, \
            "{} is different for model_performance and make_metrics on [0, 1] domain".format(metric_method)
        assert m1mm == m2mm or abs(m1mm[1] - m2mm[1]) < 1e-5, \
            "{} is different for make_metrics on [0, 1] domain and make_metrics without domain".format(metric_method)

    # Testing confusion matrix
    cm0 = m0.confusion_matrix(metrics=max_metrics)
    assert len(cm0) == len(max_metrics)
    assert all([any(m in header for header in map(lambda cm: cm.table._table_header, cm0) for m in max_metrics)]), \
        "got duplicate CM headers, although all metrics are different"
    cm0t = m0.confusion_matrix(metrics=max_metrics, thresholds=[.3, .6])
    assert len(cm0t) == 2 + len(max_metrics)
    assert 2 == sum([not any(m in header for m in max_metrics) for header in map(lambda cm: cm.table._table_header, cm0t)]),  \
        "missing or duplicate headers without metric (thresholds only CMs)"
    assert all([any(m in header for header in map(lambda cm: cm.table._table_header, cm0t) for m in max_metrics)]), \
        "got duplicate CM headers, although all metrics are different"

    print("\n\n======= MULTINOMIAL ========\n")
    response = "RACE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator(distribution="multinomial",
                                         ntrees=2,
                                         max_depth=3,
                                         min_rows=1,
                                         learn_rate=0.01,
                                         nbins=20)
    model.train(x=predictors, y=response, training_frame=fr)
    predicted = h2o.assign(model.predict(fr)[1:], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = fr[response].levels()[0]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)

    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5

    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
def pyunit_make_metrics():
    fr = h2o.import_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    fr["CAPSULE"] = fr["CAPSULE"].asfactor()
    fr["RACE"] = fr["RACE"].asfactor()
    fr.describe()

    response = "AGE"
    predictors = list(set(fr.names) - {"ID", response})

    print("\n\n======= REGRESSION ========\n")
    for distr in ["gaussian", "poisson", "laplace", "gamma"]:
        print("distribution: %s" % distr)
        model = H2OGradientBoostingEstimator(distribution=distr, ntrees=2, max_depth=3,
                    min_rows=1, learn_rate=0.1, nbins=20)
        model.train(x=predictors, y=response, training_frame=fr)
        predicted = h2o.assign(model.predict(fr), "pred")
        actual = fr[response]

        m0 = model.model_performance(train=True)
        m1 = h2o.make_metrics(predicted, actual, distribution=distr)
        m2 = h2o.make_metrics(predicted, actual)
        print("model performance:")
        print(m0)
        print("make_metrics (distribution=%s):" % distr)
        print(m1)
        print("make_metrics (distribution=None):")
        print(m2)

        assert abs(m0.mae() - m1.mae()) < 1e-5
        assert abs(m0.mse() - m1.mse()) < 1e-5
        assert abs(m0.rmse() - m1.rmse()) < 1e-5
        assert abs(m0.mean_residual_deviance() - m1.mean_residual_deviance()) < 1e-5
        assert abs(m2.mae() - m1.mae()) < 1e-5
        assert abs(m2.mse() - m1.mse()) < 1e-5
        assert abs(m2.rmse() - m1.rmse()) < 1e-5
        assert (abs(m1.mean_residual_deviance() - m2.mean_residual_deviance()) < 1e-7) == (distr == "gaussian")


    print("\n\n======= BINOMIAL ========\n")
    response = "CAPSULE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator()
    model.train(x=predictors, y=response, distribution="bernoulli", training_frame=fr, ntrees=2, max_depth=3,
                min_rows=1, learn_rate=0.01, nbins=20)
    predicted = h2o.assign(model.predict(fr)[2], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = ["0", "1"]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)
    print("m0:")
    print(m0)
    print("m1:")
    print(m1)
    print("m2:")
    print(m2)

    assert abs(m0.auc() - m1.auc()) < 1e-5
    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error()[0][1] - m1.mean_per_class_error()[0][1]) < 1e-5
    assert abs(m2.auc() - m1.auc()) < 1e-5
    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error()[0][1] - m1.mean_per_class_error()[0][1]) < 1e-5


    print("\n\n======= MULTINOMIAL ========\n")
    response = "RACE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator()
    model.train(x=predictors, y=response, distribution="multinomial", training_frame=fr, ntrees=2, max_depth=3,
                min_rows=1, learn_rate=0.01, nbins=20)
    predicted = h2o.assign(model.predict(fr)[1:], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = fr[response].levels()[0]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)

    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5
def pyunit_make_metrics():
    fr = h2o.import_file(pyunit_utils.locate("smalldata/logreg/prostate.csv"))
    fr["CAPSULE"] = fr["CAPSULE"].asfactor()
    fr["RACE"] = fr["RACE"].asfactor()
    fr.describe()

    response = "AGE"
    predictors = list(set(fr.names) - {"ID", response})

    print("\n\n======= REGRESSION ========\n")
    for distr in ["gaussian", "poisson", "laplace", "gamma"]:
        print("distribution: %s" % distr)
        model = H2OGradientBoostingEstimator(distribution=distr, ntrees=2, max_depth=3,
                    min_rows=1, learn_rate=0.1, nbins=20)
        model.train(x=predictors, y=response, training_frame=fr)
        predicted = h2o.assign(model.predict(fr), "pred")
        actual = fr[response]

        m0 = model.model_performance(train=True)
        m1 = h2o.make_metrics(predicted, actual, distribution=distr)
        m2 = h2o.make_metrics(predicted, actual)
        print("model performance:")
        print(m0)
        print("make_metrics (distribution=%s):" % distr)
        print(m1)
        print("make_metrics (distribution=None):")
        print(m2)

        assert abs(m0.mae() - m1.mae()) < 1e-5
        assert abs(m0.mse() - m1.mse()) < 1e-5
        assert abs(m0.rmse() - m1.rmse()) < 1e-5
        assert abs(m0.mean_residual_deviance() - m1.mean_residual_deviance()) < 1e-5
        assert abs(m0.rmsle() - m1.rmsle()) < 1e-5

        assert abs(m2.mae() - m1.mae()) < 1e-5
        assert abs(m2.mse() - m1.mse()) < 1e-5
        assert abs(m2.rmse() - m1.rmse()) < 1e-5
        assert (abs(m1.mean_residual_deviance() - m2.mean_residual_deviance()) < 1e-7) == (distr == "gaussian")
        assert abs(m2.rmsle() - m1.rmsle()) < 1e-5

    print("\n\n======= BINOMIAL ========\n")
    response = "CAPSULE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator(distribution="bernoulli", ntrees=2, max_depth=3, min_rows=1,
                                         learn_rate=0.01, nbins=20, seed=1)
    model.train(x=predictors, y=response, training_frame=fr)
    predicted = h2o.assign(model.predict(fr)[2], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = ["0", "1"]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)
    print("m0:")
    print(m0)
    print("m1:")
    print(m1)
    print("m2:")
    print(m2)

    # Testing base metric methods
    # FIXME: check the same failures for other ModelMetrics impl. and then fix'emall or move them out of base class...
    base_metrics_methods_failing_on_H2OBinomialModelMetrics = ['aic', 'mae', 'mean_per_class_error', 'mean_residual_deviance', 'rmsle']
    for metric_method in (m for m in base_metric_methods if m not in base_metrics_methods_failing_on_H2OBinomialModelMetrics):
        m0mm = getattr(m0, metric_method)()
        m1mm = getattr(m1, metric_method)()
        m2mm = getattr(m2, metric_method)()

        assert m0mm == m1mm or abs(m0mm - m1mm) < 1e-5, \
            "{} is different for model_performance and make_metrics on [0, 1] domain".format(metric_method)
        assert m1mm == m2mm or abs(m1mm - m2mm) < 1e-5, \
            "{} is different for make_metrics on [0, 1] domain and make_metrics without domain".format(metric_method)
    # FIXME: for binomial mean_per_class_error is strangely accessible as an array
    assert abs(m0.mean_per_class_error()[0][1] - m1.mean_per_class_error()[0][1]) < 1e-5
    assert abs(m2.mean_per_class_error()[0][1] - m1.mean_per_class_error()[0][1]) < 1e-5

    failures = 0
    for metric_method in base_metrics_methods_failing_on_H2OBinomialModelMetrics:
        for m in [m0, m1, m2]:
            try:
                assert isinstance(getattr(m, metric_method)(), float)
            except:
                failures += 1
    assert failures == 3 * len(base_metrics_methods_failing_on_H2OBinomialModelMetrics)

    # Testing binomial-only metric methods
    binomial_only_metric_methods = ['accuracy', 'F0point5', 'F1', 'F2', 'mcc',
                                    'max_per_class_error', 'mean_per_class_error',
                                    'precision', 'recall', 'specificity', 'fallout', 'missrate', 'sensitivity',
                                    'fpr', 'fnr', 'tpr', 'tnr']
    failing_binomial_metrics = ['max_per_class_error', 'recall', 'specificity', 'fallout', 'missrate', 'sensitivity',
                                'fpr', 'fnr', 'tpr', 'tnr']
    for metric_method in (m for m in binomial_only_metric_methods if m not in failing_binomial_metrics):
        # FIXME: not sure that returning a 2d-array is justified when not passing any threshold
        m0mm = getattr(m0, metric_method)()[0]
        m1mm = getattr(m1, metric_method)()[0]
        m2mm = getattr(m2, metric_method)()[0]
        assert m0mm == m1mm or abs(m0mm[1] - m1mm[1]) < 1e-5, \
            "{} is different for model_performance and make_metrics on [0, 1] domain".format(metric_method)
        assert m1mm == m2mm or abs(m1mm[1] - m2mm[1]) < 1e-5, \
            "{} is different for make_metrics on [0, 1] domain and make_metrics without domain".format(metric_method)

    failures = 0
    for metric_method in failing_binomial_metrics:
        for m in [m0, m1, m2]:
            try:
                assert isinstance(getattr(m, metric_method)()[0][1], float)
            except:
                failures += 1
    assert failures == 3 * len(failing_binomial_metrics)

    # Testing confusion matrix
    cm0 = m0.confusion_matrix(metrics=max_metrics)
    assert len(cm0) == len(max_metrics)
    assert all([any(m in header for header in map(lambda cm: cm.table._table_header, cm0) for m in max_metrics)]), \
        "got duplicate CM headers, although all metrics are different"
    cm0t = m0.confusion_matrix(metrics=max_metrics, thresholds=[.3, .6])
    assert len(cm0t) == 2 + len(max_metrics)
    assert 2 == sum([not any(m in header for m in max_metrics) for header in map(lambda cm: cm.table._table_header, cm0t)]),  \
        "missing or duplicate headers without metric (thresholds only CMs)"
    assert all([any(m in header for header in map(lambda cm: cm.table._table_header, cm0t) for m in max_metrics)]), \
        "got duplicate CM headers, although all metrics are different"


    print("\n\n======= MULTINOMIAL ========\n")
    response = "RACE"
    predictors = list(set(fr.names) - {"ID", response})
    model = H2OGradientBoostingEstimator(distribution="multinomial", ntrees=2, max_depth=3, min_rows=1,
                                         learn_rate=0.01, nbins=20)
    model.train(x=predictors, y=response, training_frame=fr)
    predicted = h2o.assign(model.predict(fr)[1:], "pred")
    actual = h2o.assign(fr[response].asfactor(), "act")
    domain = fr[response].levels()[0]

    m0 = model.model_performance(train=True)
    m1 = h2o.make_metrics(predicted, actual, domain=domain)
    m2 = h2o.make_metrics(predicted, actual)

    assert abs(m0.mse() - m1.mse()) < 1e-5
    assert abs(m0.rmse() - m1.rmse()) < 1e-5
    assert abs(m0.logloss() - m1.logloss()) < 1e-5
    assert abs(m0.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5

    assert abs(m2.mse() - m1.mse()) < 1e-5
    assert abs(m2.rmse() - m1.rmse()) < 1e-5
    assert abs(m2.logloss() - m1.logloss()) < 1e-5
    assert abs(m2.mean_per_class_error() - m1.mean_per_class_error()) < 1e-5