Ejemplo n.º 1
0
def testPropagationOfPredictionCol(prostateDataset):
    predictionCol = "my_prediction_col_name"
    algo = H2OGAM(featuresCols=["DPROS", "DCAPS", "RACE", "GLEASON"],
                  gamCols=["PSA", "AGE"],
                  labelCol="CAPSULE",
                  seed=1,
                  splitRatio=0.8,
                  predictionCol=predictionCol)

    model = algo.fit(prostateDataset)
    columns = model.transform(prostateDataset).columns
    assert True == (predictionCol in columns)
Ejemplo n.º 2
0
def testH2OGAMRegressorBehavesTheSameAsGenericH2OGAMOnNumericLabelColumn(
        prostateDataset):
    [trainingDateset, testingDataset] = prostateDataset.randomSplit([0.9, 0.1],
                                                                    42)

    automl = setParamtersForProblemSpecificTests(H2OGAM())
    referenceModel = automl.fit(trainingDateset)
    referenceDataset = referenceModel.transform(testingDataset)

    classifier = setParamtersForProblemSpecificTests(H2OGAMRegressor())
    model = classifier.fit(trainingDateset)
    result = model.transform(testingDataset)

    unit_test_utils.assert_data_frames_are_identical(referenceDataset, result)
Ejemplo n.º 3
0
def testH2OGAMClassifierBehavesTheSameAsGenericH2OGAMOnStringLabelColumn(
        prostateDataset):
    [trainingDateset, testingDataset] = prostateDataset.randomSplit([0.9, 0.1],
                                                                    42)

    gam = setParamtersForProblemSpecificTests(H2OGAM())
    referenceModel = gam.fit(
        trainingDateset.withColumn("CAPSULE",
                                   col("CAPSULE").cast("string")))
    referenceDataset = referenceModel.transform(testingDataset)

    classifier = setParamtersForProblemSpecificTests(H2OGAMClassifier())
    model = classifier.fit(trainingDateset)
    result = model.transform(testingDataset)

    unit_test_utils.assert_data_frames_are_identical(referenceDataset, result)
Ejemplo n.º 4
0
def testPipelineSerialization(prostateDataset):
    algo = H2OGAM(featuresCols=["DPROS", "DCAPS", "RACE", "GLEASON"],
                  gamCols=["PSA", "AGE"],
                  labelCol="CAPSULE",
                  seed=1,
                  splitRatio=0.8)

    pipeline = Pipeline(stages=[algo])
    pipeline.write().overwrite().save("file://" +
                                      os.path.abspath("build/gam_pipeline"))
    loadedPipeline = Pipeline.load("file://" +
                                   os.path.abspath("build/gam_pipeline"))
    model = loadedPipeline.fit(prostateDataset)

    model.write().overwrite().save("file://" +
                                   os.path.abspath("build/gam_pipeline_model"))
    loadedModel = PipelineModel.load(
        "file://" + os.path.abspath("build/gam_pipeline_model"))

    loadedModel.transform(prostateDataset).count()
Ejemplo n.º 5
0
def testPipelineSerializationGAM(prostateDataset):
    gridSearchTester(H2OGAM().setLabelCol("AGE").setGamCols(["PSA"]),
                     prostateDataset)
Ejemplo n.º 6
0
 def createInitialGamDefinition():
     return H2OGAM(featuresCols=featuresCols,
                   labelCol="CAPSULE",
                   seed=1,
                   splitRatio=0.8,
                   gamCols=["PSA", "AGE"])