def testGetGridModels(prostateDataset): grid = H2OGridSearch(labelCol="AGE", hyperParameters={"seed": [1, 2, 3]}, splitRatio=0.8, algo=H2OGBM(), strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE") grid.fit(prostateDataset) models = grid.getGridModels() assert len(models) == 3
def testGetGridModelsMetrics(prostateDataset): grid = H2OGridSearch(labelCol="AGE", hyperParameters={"seed": [1, 2, 3]}, splitRatio=0.8, algo=H2OGBM(), strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE") grid.fit(prostateDataset) metrics = grid.getGridModelsMetrics() assert metrics.count() == 3 assert metrics.columns == ['MOJO Model ID', 'MSE', 'MeanResidualDeviance', 'R2', 'RMSE'] metrics.collect() # try materializing
def testGetGridModelsNoParams(prostateDataset): grid = H2OGridSearch(labelCol="AGE", splitRatio=0.8, algo=H2OGBM(), strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE") grid.fit(prostateDataset) params = grid.getGridModelsParams() assert params.count() == 1 assert params.columns == ['MOJO Model ID'] params.collect() # try materializing
def testGetAlgoViaSetter(): # SW-2276, 3rd call of getAlgo failed grid = H2OGridSearch(hyperParameters={"seed": [1, 2, 3]}, strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE") grid.setAlgo(H2OGBM().setNtrees(100).setLabelCol("AGE").setSplitRatio(0.8)) grid.getAlgo() grid.getAlgo() assert grid.getAlgo().getNtrees() == 100
def testGetGridModelsParams(prostateDataset): grid = H2OGridSearch(hyperParameters={"seed": [1, 2, 3]}, algo=H2OGBM(splitRatio=0.8, labelCol="AGE"), strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE") grid.fit(prostateDataset) params = grid.getGridModelsParams() assert params.count() == 3 assert params.columns == ['MOJO Model ID', 'seed'] params.collect() # try materializing
def testGetAlgoViaConstructor(): # SW-2276, 3rd call of getAlgo failed grid = H2OGridSearch(hyperParameters={"seed": [1, 2, 3]}, algo=H2OGBM(labelCol="AGE", ntrees=100, splitRatio=0.8), strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE") grid.getAlgo() grid.getAlgo() assert grid.getAlgo().getNtrees() == 100
def gridSearchTester(algo, prostateDataset): grid = H2OGridSearch(labelCol="AGE", hyperParameters={"seed": [1, 2, 3]}, splitRatio=0.8, algo=algo, strategy="RandomDiscrete", maxModels=3, maxRuntimeSecs=60, selectBestModelBy="RMSE") pipeline = Pipeline(stages=[grid]) pipeline.write().overwrite().save("file://" + os.path.abspath("build/grid_pipeline")) loadedPipeline = Pipeline.load("file://" + os.path.abspath("build/grid_pipeline")) model = loadedPipeline.fit(prostateDataset) model.write().overwrite().save("file://" + os.path.abspath("build/grid_pipeline_model")) loadedModel = PipelineModel.load("file://" + os.path.abspath("build/grid_pipeline_model")) loadedModel.transform(prostateDataset).count()
def testParams(): grid = H2OGridSearch(featuresCols=[], algo=None, splitRatio=1.0, hyperParameters={}, labelCol="label", weightCol=None, allStringColumnsToCategorical=True, columnsToCategorical=[], strategy="Cartesian", maxRuntimeSecs=0.0, maxModels=0, seed=-1, stoppingRounds=0, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=0, selectBestModelBy="AUTO", selectBestModelDecreasing=True, foldCol=None, convertUnknownCategoricalLevelsToNa=True, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False) assert grid.getFeaturesCols() == [] assert grid.getSplitRatio() == 1.0 assert grid.getHyperParameters() == {} assert grid.getLabelCol() == "label" assert grid.getWeightCol() == None assert grid.getAllStringColumnsToCategorical() == True assert grid.getColumnsToCategorical() == [] assert grid.getStrategy() == "Cartesian" assert grid.getMaxRuntimeSecs() == 0.0 assert grid.getMaxModels() == 0 assert grid.getSeed() == -1 assert grid.getStoppingRounds() == 0 assert grid.getStoppingTolerance() == 0.001 assert grid.getStoppingMetric() == "AUTO" assert grid.getNfolds() == 0 assert grid.getSelectBestModelBy() == "AUTO" assert grid.getSelectBestModelDecreasing() == True assert grid.getFoldCol() == None assert grid.getConvertUnknownCategoricalLevelsToNa() == True assert grid.getPredictionCol() == "prediction" assert grid.getDetailedPredictionCol() == "detailed_prediction" assert grid.getWithDetailedPredictionCol() == False assert grid.getConvertInvalidNumbersToNa() == False