Esempio n. 1
0
def testGetGridModels(prostateDataset):
    grid = H2OGridSearch(labelCol="AGE", hyperParameters={"seed": [1, 2, 3]}, splitRatio=0.8, algo=H2OGBM(),
                         strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE")

    grid.fit(prostateDataset)
    models = grid.getGridModels()
    assert len(models) == 3
Esempio n. 2
0
def testGetGridModelsMetrics(prostateDataset):
    grid = H2OGridSearch(labelCol="AGE", hyperParameters={"seed": [1, 2, 3]}, splitRatio=0.8, algo=H2OGBM(),
                         strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE")

    grid.fit(prostateDataset)
    metrics = grid.getGridModelsMetrics()
    assert metrics.count() == 3
    assert metrics.columns == ['MOJO Model ID', 'MSE', 'MeanResidualDeviance', 'R2', 'RMSE']
    metrics.collect() # try materializing
Esempio n. 3
0
def testGetGridModelsNoParams(prostateDataset):
    grid = H2OGridSearch(labelCol="AGE", splitRatio=0.8, algo=H2OGBM(),
                         strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE")

    grid.fit(prostateDataset)
    params = grid.getGridModelsParams()
    assert params.count() == 1
    assert params.columns == ['MOJO Model ID']
    params.collect() # try materializing
Esempio n. 4
0
def testGetAlgoViaSetter():
    # SW-2276, 3rd call of getAlgo failed
    grid = H2OGridSearch(hyperParameters={"seed": [1, 2, 3]},
                         strategy="RandomDiscrete",
                         maxModels=3,
                         maxRuntimeSecs=60,
                         selectBestModelBy="RMSE")
    grid.setAlgo(H2OGBM().setNtrees(100).setLabelCol("AGE").setSplitRatio(0.8))
    grid.getAlgo()
    grid.getAlgo()
    assert grid.getAlgo().getNtrees() == 100
Esempio n. 5
0
def testGetGridModelsParams(prostateDataset):
    grid = H2OGridSearch(hyperParameters={"seed": [1, 2, 3]},
                         algo=H2OGBM(splitRatio=0.8, labelCol="AGE"),
                         strategy="RandomDiscrete",
                         maxModels=3,
                         maxRuntimeSecs=60,
                         selectBestModelBy="RMSE")

    grid.fit(prostateDataset)
    params = grid.getGridModelsParams()
    assert params.count() == 3
    assert params.columns == ['MOJO Model ID', 'seed']
    params.collect()  # try materializing
Esempio n. 6
0
def testGetAlgoViaConstructor():
    # SW-2276, 3rd call of getAlgo failed
    grid = H2OGridSearch(hyperParameters={"seed": [1, 2, 3]},
                         algo=H2OGBM(labelCol="AGE",
                                     ntrees=100,
                                     splitRatio=0.8),
                         strategy="RandomDiscrete",
                         maxModels=3,
                         maxRuntimeSecs=60,
                         selectBestModelBy="RMSE")
    grid.getAlgo()
    grid.getAlgo()
    assert grid.getAlgo().getNtrees() == 100
Esempio n. 7
0
def gridSearchTester(algo, prostateDataset):
    grid = H2OGridSearch(labelCol="AGE", hyperParameters={"seed": [1, 2, 3]}, splitRatio=0.8, algo=algo,
                         strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE")

    pipeline = Pipeline(stages=[grid])
    pipeline.write().overwrite().save("file://" + os.path.abspath("build/grid_pipeline"))
    loadedPipeline = Pipeline.load("file://" + os.path.abspath("build/grid_pipeline"))
    model = loadedPipeline.fit(prostateDataset)

    model.write().overwrite().save("file://" + os.path.abspath("build/grid_pipeline_model"))
    loadedModel = PipelineModel.load("file://" + os.path.abspath("build/grid_pipeline_model"))

    loadedModel.transform(prostateDataset).count()
def testParams():
    grid = H2OGridSearch(featuresCols=[],
                         algo=None,
                         splitRatio=1.0,
                         hyperParameters={},
                         labelCol="label",
                         weightCol=None,
                         allStringColumnsToCategorical=True,
                         columnsToCategorical=[],
                         strategy="Cartesian",
                         maxRuntimeSecs=0.0,
                         maxModels=0,
                         seed=-1,
                         stoppingRounds=0,
                         stoppingTolerance=0.001,
                         stoppingMetric="AUTO",
                         nfolds=0,
                         selectBestModelBy="AUTO",
                         selectBestModelDecreasing=True,
                         foldCol=None,
                         convertUnknownCategoricalLevelsToNa=True,
                         predictionCol="prediction",
                         detailedPredictionCol="detailed_prediction",
                         withDetailedPredictionCol=False,
                         convertInvalidNumbersToNa=False)

    assert grid.getFeaturesCols() == []
    assert grid.getSplitRatio() == 1.0
    assert grid.getHyperParameters() == {}
    assert grid.getLabelCol() == "label"
    assert grid.getWeightCol() == None
    assert grid.getAllStringColumnsToCategorical() == True
    assert grid.getColumnsToCategorical() == []
    assert grid.getStrategy() == "Cartesian"
    assert grid.getMaxRuntimeSecs() == 0.0
    assert grid.getMaxModels() == 0
    assert grid.getSeed() == -1
    assert grid.getStoppingRounds() == 0
    assert grid.getStoppingTolerance() == 0.001
    assert grid.getStoppingMetric() == "AUTO"
    assert grid.getNfolds() == 0
    assert grid.getSelectBestModelBy() == "AUTO"
    assert grid.getSelectBestModelDecreasing() == True
    assert grid.getFoldCol() == None
    assert grid.getConvertUnknownCategoricalLevelsToNa() == True
    assert grid.getPredictionCol() == "prediction"
    assert grid.getDetailedPredictionCol() == "detailed_prediction"
    assert grid.getWithDetailedPredictionCol() == False
    assert grid.getConvertInvalidNumbersToNa() == False