def infer_distribution_test(): from h2o.utils import CustomDistributionGeneric, CustomDistributionGaussian class CustomDistributionGaussian2(CustomDistributionGeneric): def link(self): return "identity" def init(self, w, o, y): return [w * (y - o), w] def gradient(self, y, f): return y - f def gamma(self, w, y, z, f): return [w * z, w] custom_dist1 = h2o.upload_custom_distribution(CustomDistributionGaussian) custom_dist2 = h2o.upload_custom_distribution(CustomDistributionGaussian2) for dist in [ "poisson", "laplace", "tweedie", "gaussian", "huber", "gamma", "quantile", "bernoulli", "quasibinomial", "multinomial" ]: infer_distribution_helper(dist, dist) # custom distribution infer_distribution_helper("custom", "custom", dict(custom_distribution_func=custom_dist1), dict(custom_distribution_func=custom_dist1)) # revert to default infer_distribution_helper("tweedie", "gaussian", dict(tweedie_power=1.2)) infer_distribution_helper("huber", "gaussian", dict(huber_alpha=0.2)) infer_distribution_helper("quantile", "gaussian", dict(quantile_alpha=0.2)) infer_distribution_helper("custom", "gaussian", dict(custom_distribution_func=custom_dist1), dict(custom_distribution_func=custom_dist2)) # unaffected by param for different distribution infer_distribution_helper("quantile", "quantile", dict(tweedie_power=1.2)) infer_distribution_helper("tweedie", "tweedie", dict(huber_alpha=0.2)) infer_distribution_helper("huber", "huber", dict(quantile_alpha=0.2)) infer_distribution_helper( "custom", "custom", dict(custom_distribution_func=custom_dist1), dict(custom_distribution_func=custom_dist1, tweedie_power=1.2))
def test_scenario(): expected_dist = distribution_name df = make_data(scenario.get("nrows", 264)) # Hack so we don't remove the custom distribution function if expected_dist == "custom": from h2o.utils.distributions import CustomDistributionGaussian custom_dist = h2o.upload_custom_distribution( CustomDistributionGaussian) scenario["distribution"][ "custom_distribution_func"] = custom_dist aml = H2OAutoML(max_models=scenario.get("max_models", 12), distribution=scenario["distribution"], seed=seed, max_runtime_secs_per_model=1) try: aml.train(y=scenario["response"], training_frame=df) except Exception: assert scenario.get( 'fail', False), "This distribution should not have failed." return assert not scenario.get( 'fail', False), "This distribution should have failed." if aml.leaderboard.nrow == 0: algos = [] else: algos = list( set( get_leaderboard( aml, "algo").as_data_frame()["algo"].unique())) for expected in [ 'DeepLearning', "DRF", 'GBM', 'GLM', 'StackedEnsemble', 'XGBoost' ]: assert expected in algos, "Expected {expected} but no found.".format( expected=expected) for model_id in aml.leaderboard.as_data_frame()["model_id"]: distribution = get_distribution(model_id) assert distribution == expected_dist or \ h2o.get_model(model_id).algo not in [a.lower() for a in scenario["algos"]], ( "{model}: Expected distribution {s_dist} but {distribution} found!".format( model=model_id, s_dist=expected_dist, distribution=distribution ))
def test_custom_distribution_reuse(): from h2o.utils.distributions import CustomDistributionGaussian train = h2o.import_file( pyunit_utils.locate("smalldata/iris/iris_train.csv")) y = "petal_wid" x = train.columns x.remove(y) nfolds = 2 for i in range(3): test_wrong_multinomial() custom_dist1 = h2o.upload_custom_distribution( CustomDistributionGaussian) gbm = H2OGradientBoostingEstimator( nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True, distribution="custom", custom_distribution_func=custom_dist1) gbm.train(x=x, y=y, training_frame=train)
def custom_distribution_bernoulli(): return h2o.upload_custom_distribution(CustomDistributionBernoulli, func_name="custom_bernoulli", func_file="custom_bernoulli.py")
def upload_distribution(distribution, name): return h2o.upload_custom_distribution(distribution, func_name="custom_" + name, func_file="custom_" + name + ".py")