Exemple #1
0
def insuranceFrame(hc, spark):
    df = spark \
        .read.csv("file://" + unit_test_utils.locate("smalldata/insurance.csv"), header=True, inferSchema=True) \
        .withColumn("Offset", log(col("Holders")))
    frame = hc.asH2OFrame(df)
    frame["Group"] = frame["Group"].asfactor()
    frame["Age"] = frame["Age"].asfactor()
    return frame
Exemple #2
0
def testLoadAndTrainMojo(hc, spark):
    referenceMojo = H2OMOJOModel.createFromMojo("file://" + os.path.abspath("../ml/src/test/resources/binom_model_prostate.mojo"))
    df = spark.read.csv("file://" + unit_test_utils.locate("smalldata/prostate/prostate.csv"), header=True, inferSchema=True)
    frame = hc.asH2OFrame(df)
    frame["CAPSULE"] = frame["CAPSULE"].asfactor()
    gbm = H2OGradientBoostingEstimator(distribution="bernoulli", ntrees=2, seed=42)
    gbm.train(y="CAPSULE", training_frame=frame)
    mojoFile = gbm.download_mojo(path=os.path.abspath("build/"), get_genmodel_jar=False)
    trainedMojo = H2OMOJOModel.createFromMojo("file://" + mojoFile)

    expect = referenceMojo.transform(df)
    result = trainedMojo.transform(df)

    unit_test_utils.assert_data_frames_are_identical(expect, result)
def testCustomMetric():
    trainPath = "file://" + unit_test_utils.locate("smalldata/loan.csv")
    train = h2o.import_file(trainPath, destination_frame="loan_train")
    train["bad_loan"] = train["bad_loan"].asfactor()

    y = "bad_loan"
    x = train.col_names
    x.remove(y)
    x.remove("int_rate")

    train["weight"] = train["loan_amnt"]

    weightedFalseNegativeLossFunc = h2o.upload_custom_metric(WeightedFalseNegativeLossMetric,
                                                             func_name="WeightedFalseNegativeLoss",
                                                             func_file="weighted_false_negative_loss.py")
    gbm = H2OGradientBoostingEstimator(model_id="gbm.hex", custom_metric_func=weightedFalseNegativeLossFunc)
    gbm.train(y=y, x=x, training_frame=train, weights_column="weight")

    perf = gbm.model_performance()
    assert perf.custom_metric_name() == "WeightedFalseNegativeLoss"
    assert perf.custom_metric_value() == 0.24579011595430142
Exemple #4
0
def dataset(spark):
    return spark.read.csv(
        "file://" + unit_test_utils.locate("smalldata/iris/iris_wheader.csv"),
        header=True,
        inferSchema=True)
def dataset(spark):
    return spark \
        .read.csv("file://" + unit_test_utils.locate("smalldata/prostate/prostate.csv"), header=True, inferSchema=True) \
        .withColumn("CAPSULE", col("CAPSULE").cast("string"))
def prostateDataset(spark):
    return spark.read.csv(
        "file://" + unit_test_utils.locate("smalldata/prostate/prostate.csv"),
        header=True,
        inferSchema=True)
def dataset(spark):
    return spark \
        .read.csv("file://" + unit_test_utils.locate("smalldata/insurance.csv"), header=True, inferSchema=True) \
        .withColumn("Offset", log(col("Holders")))