def infer_distribution_test():
    from h2o.utils import CustomDistributionGeneric, CustomDistributionGaussian

    class CustomDistributionGaussian2(CustomDistributionGeneric):
        def link(self):
            return "identity"

        def init(self, w, o, y):
            return [w * (y - o), w]

        def gradient(self, y, f):
            return y - f

        def gamma(self, w, y, z, f):
            return [w * z, w]

    custom_dist1 = h2o.upload_custom_distribution(CustomDistributionGaussian)
    custom_dist2 = h2o.upload_custom_distribution(CustomDistributionGaussian2)

    for dist in [
            "poisson", "laplace", "tweedie", "gaussian", "huber", "gamma",
            "quantile", "bernoulli", "quasibinomial", "multinomial"
    ]:
        infer_distribution_helper(dist, dist)

    # custom distribution
    infer_distribution_helper("custom", "custom",
                              dict(custom_distribution_func=custom_dist1),
                              dict(custom_distribution_func=custom_dist1))

    # revert to default
    infer_distribution_helper("tweedie", "gaussian", dict(tweedie_power=1.2))
    infer_distribution_helper("huber", "gaussian", dict(huber_alpha=0.2))
    infer_distribution_helper("quantile", "gaussian", dict(quantile_alpha=0.2))
    infer_distribution_helper("custom", "gaussian",
                              dict(custom_distribution_func=custom_dist1),
                              dict(custom_distribution_func=custom_dist2))

    # unaffected by param for different distribution
    infer_distribution_helper("quantile", "quantile", dict(tweedie_power=1.2))
    infer_distribution_helper("tweedie", "tweedie", dict(huber_alpha=0.2))
    infer_distribution_helper("huber", "huber", dict(quantile_alpha=0.2))
    infer_distribution_helper(
        "custom", "custom", dict(custom_distribution_func=custom_dist1),
        dict(custom_distribution_func=custom_dist1, tweedie_power=1.2))
예제 #2
0
            def test_scenario():
                expected_dist = distribution_name
                df = make_data(scenario.get("nrows", 264))

                # Hack so we don't remove the custom distribution function
                if expected_dist == "custom":
                    from h2o.utils.distributions import CustomDistributionGaussian
                    custom_dist = h2o.upload_custom_distribution(
                        CustomDistributionGaussian)
                    scenario["distribution"][
                        "custom_distribution_func"] = custom_dist

                aml = H2OAutoML(max_models=scenario.get("max_models", 12),
                                distribution=scenario["distribution"],
                                seed=seed,
                                max_runtime_secs_per_model=1)
                try:
                    aml.train(y=scenario["response"], training_frame=df)
                except Exception:
                    assert scenario.get(
                        'fail',
                        False), "This distribution should not have failed."
                    return
                assert not scenario.get(
                    'fail', False), "This distribution should have failed."
                if aml.leaderboard.nrow == 0:
                    algos = []
                else:
                    algos = list(
                        set(
                            get_leaderboard(
                                aml, "algo").as_data_frame()["algo"].unique()))

                for expected in [
                        'DeepLearning', "DRF", 'GBM', 'GLM', 'StackedEnsemble',
                        'XGBoost'
                ]:
                    assert expected in algos, "Expected {expected} but no found.".format(
                        expected=expected)

                for model_id in aml.leaderboard.as_data_frame()["model_id"]:
                    distribution = get_distribution(model_id)

                    assert distribution == expected_dist or \
                    h2o.get_model(model_id).algo not in [a.lower() for a in scenario["algos"]], (
                       "{model}: Expected distribution {s_dist} but {distribution} found!".format(
                            model=model_id,
                            s_dist=expected_dist,
                            distribution=distribution
                        ))
예제 #3
0
def test_custom_distribution_reuse():
    from h2o.utils.distributions import CustomDistributionGaussian
    train = h2o.import_file(
        pyunit_utils.locate("smalldata/iris/iris_train.csv"))
    y = "petal_wid"
    x = train.columns
    x.remove(y)

    nfolds = 2
    for i in range(3):
        test_wrong_multinomial()
        custom_dist1 = h2o.upload_custom_distribution(
            CustomDistributionGaussian)
        gbm = H2OGradientBoostingEstimator(
            nfolds=nfolds,
            fold_assignment="Modulo",
            keep_cross_validation_predictions=True,
            distribution="custom",
            custom_distribution_func=custom_dist1)
        gbm.train(x=x, y=y, training_frame=train)
def custom_distribution_bernoulli():
    return h2o.upload_custom_distribution(CustomDistributionBernoulli,
                                          func_name="custom_bernoulli",
                                          func_file="custom_bernoulli.py")
예제 #5
0
def upload_distribution(distribution, name):
    return h2o.upload_custom_distribution(distribution,
                                          func_name="custom_" + name,
                                          func_file="custom_" + name + ".py")