def __init__(self, keep=False, columns=[]): super(ColumnPruner, self).__init__() self._java_obj = self._new_java_obj( "py_sparkling.ml.features.ColumnPruner", self.uid) self._setDefault(keep=False, columns=[]) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def __init__(self, foldCol=None, labelCol="label", inputCols=[], holdoutStrategy="None", blendedAvgEnabled=False, blendedAvgInflectionPoint=10.0, blendedAvgSmoothing=20.0, noise=0.01, noiseSeed=-1): super(H2OTargetEncoder, self).__init__() self._hc = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False) self._java_obj = self._new_java_obj( "ai.h2o.sparkling.ml.features.H2OTargetEncoder", self.uid) self._setDefault(foldCol=None, labelCol="label", inputCols=[], holdoutStrategy="None", blendedAvgEnabled=False, blendedAvgInflectionPoint=10.0, blendedAvgSmoothing=20.0, noise=0.01, noiseSeed=-1) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def __init__(self, labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], ratio=1.0, foldCol=None, weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5, convertUnknownCategoricalLevelsToNa=False, seed=-1, sortMetric="AUTO", balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True, keepCrossValidationModels=True, maxModels=0, **deprecatedArgs): super(H2OAutoML, self).__init__() self._hc = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False) self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OAutoML", self.uid) self._setDefault(labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], ratio=1.0, foldCol=None, weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3, stoppingTolerance=0.001, stoppingMetric=self._hc._jvm.hex.ScoreKeeper. StoppingMetric.valueOf("AUTO"), nfolds=5, convertUnknownCategoricalLevelsToNa=False, seed=-1, sortMetric=None, balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True, keepCrossValidationModels=True, maxModels=0) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200, 200], reproducible=False, convertUnknownCategoricalLevelsToNa=False, foldCol=None, **deprecatedArgs): kwargs = get_input_kwargs(self) if "distribution" in kwargs: kwargs[ "distribution"] = self._hc._jvm.hex.genmodel.utils.DistributionFamily.valueOf( kwargs["distribution"]) # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = ["splitRatio", "epochs", "l1", "l2"] set_double_values(kwargs, double_types) return self._set(**kwargs)
def setParams(self, featuresCols=[], labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], splitRatio=1.0, foldCol=None, weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5, convertUnknownCategoricalLevelsToNa=True, seed=-1, sortMetric="AUTO", balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True, keepCrossValidationModels=True, maxModels=0, **deprecatedArgs): kwargs = get_input_kwargs(self) if "stoppingMetric" in kwargs: kwargs[ "stoppingMetric"] = self._hc._jvm.hex.ScoreKeeper.StoppingMetric.valueOf( kwargs["stoppingMetric"]) if "projectName" in kwargs and kwargs["projectName"] is None: kwargs["projectName"] = ''.join( random.choice(string.ascii_letters) for i in range(30)) if "excludeAlgos" in kwargs: jvm = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False)._jvm kwargs["excludeAlgos"] = get_enum_array_from_str_array( kwargs["excludeAlgos"], jvm.ai.h2o.automl.Algo) if "includeAlgos" in kwargs: jvm = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False)._jvm kwargs["includeAlgos"] = get_enum_array_from_str_array( kwargs["includeAlgos"], jvm.ai.h2o.automl.Algo) # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "maxRuntimeSecs", "stoppingTolerance", "splitRatio", "maxAfterBalanceSize" ] set_double_values(kwargs, double_types) return self._set(**kwargs)
def setParams(self, featuresCols=[], labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], splitRatio=1.0, foldCol=None, weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5, convertUnknownCategoricalLevelsToNa=True, seed=-1, sortMetric="AUTO", balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True, keepCrossValidationModels=True, maxModels=0, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): kwargs = get_input_kwargs(self) validateEnumValues(self._H2OAutoMLParams__getAutomlAlgoEnum(), kwargs, "includeAlgos", nullEnabled=True) validateEnumValues(self._H2OAutoMLParams__getAutomlAlgoEnum(), kwargs, "excludeAlgos", nullEnabled=True) validateEnumValue(self._H2OAutoMLParams__getStoppingMetricEnum(), kwargs, "stoppingMetric") if "projectName" in kwargs and kwargs["projectName"] is None: kwargs["projectName"] = ''.join( random.choice(string.ascii_letters) for i in range(30)) # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "maxRuntimeSecs", "stoppingTolerance", "splitRatio", "maxAfterBalanceSize" ] set_double_values(kwargs, double_types) return self._set(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200, 200], reproducible=False, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): super(H2ODeepLearning, self).__init__() self._java_obj = self._new_java_obj( "py_sparkling.ml.algos.H2ODeepLearning", self.uid) self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200, 200], reproducible=False, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def __init__(self, featuresCols=[], algo=None, splitRatio=1.0, hyperParameters={}, labelCol="label", weightCol=None, allStringColumnsToCategorical=True, columnsToCategorical=[], strategy="Cartesian", maxRuntimeSecs=0.0, maxModels=0, seed=-1, stoppingRounds=0, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=0, selectBestModelBy="AUTO", selectBestModelDecreasing=True, foldCol=None, convertUnknownCategoricalLevelsToNa=True, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): super(H2OGridSearch, self).__init__() self._java_obj = self._new_java_obj( "py_sparkling.ml.algos.H2OGridSearch", self.uid) self._setDefault(featuresCols=[], algo=None, splitRatio=1.0, hyperParameters={}, labelCol="label", weightCol=None, allStringColumnsToCategorical=True, columnsToCategorical=[], strategy="Cartesian", maxRuntimeSecs=0.0, maxModels=0, seed=-1, stoppingRounds=0, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=0, selectBestModelBy="AUTO", selectBestModelDecreasing=True, foldCol=None, convertUnknownCategoricalLevelsToNa=True, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def __init__(self, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, featuresCols=[], foldCol=None, weightCol=None, splitRatio=1.0, seed=-1, nfolds=0, allStringColumnsToCategorical=True, columnsToCategorical=[], convertUnknownCategoricalLevelsToNa=False, convertInvalidNumbersToNa=False, modelId=None, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, distribution="AUTO", maxIterations=10, standardize=True, init="Furthest", userPoints=None, estimateK=False, k=2, **deprecatedArgs): super(H2OKMeans, self).__init__() self._java_obj = self._new_java_obj( "ai.h2o.sparkling.ml.algos.H2OKMeans", self.uid) self._setDefault(predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, featuresCols=[], foldCol=None, weightCol=None, splitRatio=1.0, seed=-1, nfolds=0, allStringColumnsToCategorical=True, columnsToCategorical=[], convertUnknownCategoricalLevelsToNa=False, convertInvalidNumbersToNa=False, modelId=None, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, distribution="AUTO", maxIterations=10, standardize=True, init="Furthest", userPoints=None, estimateK=False, k=2) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def setParams(self, featuresCols=[], algo=None, splitRatio=1.0, hyperParameters={}, labelCol="label", weightCol=None, allStringColumnsToCategorical=True, columnsToCategorical=[], strategy="Cartesian", maxRuntimeSecs=0.0, maxModels=0, seed=-1, stoppingRounds=0, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=0, selectBestModelBy=None, selectBestModelDecreasing=True, foldCol=None, convertUnknownCategoricalLevelsToNa=True, **deprecatedArgs): kwargs = get_input_kwargs(self) if "stoppingMetric" in kwargs: kwargs[ "stoppingMetric"] = self._hc._jvm.hex.ScoreKeeper.StoppingMetric.valueOf( kwargs["stoppingMetric"]) if "strategy" in kwargs: kwargs[ "strategy"] = self._hc._jvm.hex.grid.HyperSpaceSearchCriteria.Strategy.valueOf( kwargs["strategy"]) if "selectBestModelBy" in kwargs and kwargs[ "selectBestModelBy"] is not None: kwargs[ "selectBestModelBy"] = self._hc._jvm.org.apache.spark.ml.h2o.algos.H2OGridSearchMetric.valueOf( kwargs["selectBestModelBy"]) propagate_value_from_deprecated_property(kwargs, "predictionCol", "labelCol") propagate_value_from_deprecated_property(kwargs, "ratio", "splitRatio") # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = ["ratio", "stoppingTolerance", "maxRuntimeSecs"] set_double_values(kwargs, double_types) if "algo" in kwargs and kwargs["algo"] is not None: tmp = kwargs["algo"] del kwargs['algo'] self._java_obj.setAlgo(tmp._java_obj) return self._set(**kwargs)
def __init__(self, featuresCols=[], algo=None, splitRatio=1.0, hyperParameters={}, labelCol="label", weightCol=None, allStringColumnsToCategorical=True, columnsToCategorical=[], strategy="Cartesian", maxRuntimeSecs=0.0, maxModels=0, seed=-1, stoppingRounds=0, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=0, selectBestModelBy=None, selectBestModelDecreasing=True, foldCol=None, convertUnknownCategoricalLevelsToNa=True, **deprecatedArgs): super(H2OGridSearch, self).__init__() self._hc = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False) self._java_obj = self._new_java_obj( "py_sparkling.ml.algos.H2OGridSearch", self.uid) self._setDefault( featuresCols=[], algo=None, splitRatio=1.0, hyperParameters={}, labelCol="label", weightCol=None, allStringColumnsToCategorical=True, columnsToCategorical=[], strategy=self._hc._jvm.hex.grid.HyperSpaceSearchCriteria.Strategy. valueOf("Cartesian"), maxRuntimeSecs=0.0, maxModels=0, seed=-1, stoppingRounds=0, stoppingTolerance=0.001, stoppingMetric=self._hc._jvm.hex.ScoreKeeper.StoppingMetric. valueOf("AUTO"), nfolds=0, selectBestModelBy=None, selectBestModelDecreasing=True, foldCol=None, convertUnknownCategoricalLevelsToNa=True) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200, 200], reproducible=False, convertUnknownCategoricalLevelsToNa=False, foldCol=None, **deprecatedArgs): super(H2ODeepLearning, self).__init__() self._hc = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False) self._java_obj = self._new_java_obj( "py_sparkling.ml.algos.H2ODeepLearning", self.uid) self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution=self._hc._jvm.hex.genmodel.utils. DistributionFamily.valueOf("AUTO"), epochs=10.0, l1=0.0, l2=0.0, hidden=[200, 200], reproducible=False, convertUnknownCategoricalLevelsToNa=False, foldCol=None) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def setParams(self, featuresCols=[], algo=None, splitRatio=1.0, hyperParameters={}, labelCol="label", weightCol=None, allStringColumnsToCategorical=True, columnsToCategorical=[], strategy="Cartesian", maxRuntimeSecs=0.0, maxModels=0, seed=-1, stoppingRounds=0, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=0, selectBestModelBy="AUTO", selectBestModelDecreasing=True, foldCol=None, convertUnknownCategoricalLevelsToNa=True, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): kwargs = get_input_kwargs(self) validateEnumValue(self._H2OGridSearchParams__getStrategyEnum(), kwargs, "strategy") validateEnumValue(self._H2OGridSearchParams__getStoppingMetricEnum(), kwargs, "stoppingMetric") validateEnumValue( self._H2OGridSearchParams__getSelectBestModelByEnum(), kwargs, "selectBestModelBy") # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = ["splitRatio", "stoppingTolerance", "maxRuntimeSecs"] set_double_values(kwargs, double_types) if "algo" in kwargs and kwargs["algo"] is not None: tmp = kwargs["algo"] del kwargs['algo'] self._java_obj.setAlgo(tmp._java_obj) return self._set(**kwargs)
def setParams(self, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, featuresCols=[], foldCol=None, weightCol=None, splitRatio=1.0, seed=-1, nfolds=0, allStringColumnsToCategorical=True, columnsToCategorical=[], convertUnknownCategoricalLevelsToNa=False, convertInvalidNumbersToNa=False, modelId=None, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, distribution="AUTO", maxIterations=10, standardize=True, init="Furthest", userPoints=None, estimateK=False, k=2, **deprecatedArgs): kwargs = get_input_kwargs(self) validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution") validateEnumValue(self._H2OKMeansParams__getInitEnum(), kwargs, "init") # We need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = ["splitRatio"] set_double_values(kwargs, double_types) if "init" in kwargs: kwargs["init"] = getDoubleArrayArrayFromIntArrayArray( kwargs["init"]) return self._set(**kwargs)
def setParams(self, foldCol=None, labelCol="label", inputCols=[], holdoutStrategy="None", blendedAvgEnabled=False, blendedAvgInflectionPoint=10.0, blendedAvgSmoothing=20.0, noise=0.01, noiseSeed=-1): kwargs = get_input_kwargs(self) validateEnumValue(self.__getHoldoutStrategyEnumName(), kwargs, "holdoutStrategy") # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "blendedAvgInflectionPoint", "blendedAvgSmoothing", "noise" ] set_double_values(kwargs, double_types) return self._set(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200, 200], reproducible=False, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): kwargs = get_input_kwargs(self) validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution") # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = ["splitRatio", "epochs", "l1", "l2"] set_double_values(kwargs, double_types) return self._set(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, quietMode=True, missingValuesHandling=None, ntrees=50, nEstimators=0, maxDepth=6, minRows=1.0, minChildWeight=1.0, learnRate=0.3, eta=0.3, learnRateAnnealing=1.0, sampleRate=1.0, subsample=1.0, colSampleRate=1.0, colSampleByLevel=1.0, colSampleRatePerTree=1.0, colsampleBytree=1.0, maxAbsLeafnodePred=0.0, maxDeltaStep=0.0, scoreTreeInterval=0, initialScoreInterval=4000, scoreInterval=4000, minSplitImprovement=0.0, gamma=0.0, nthread=-1, maxBins=256, maxLeaves=0, minSumHessianInLeaf=100.0, minDataInLeaf=0.0, treeMethod="auto", growPolicy="depthwise", booster="gbtree", dmatrixType="auto", regLambda=0.0, regAlpha=0.0, sampleType="uniform", normalizeType="tree", rateDrop=0.0, oneDrop=False, skipDrop=0.0, gpuId=0, backend="auto", foldCol=None, **deprecatedArgs): super(H2OXGBoost, self).__init__() self._hc = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False) self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OXGBoost", self.uid) self._setDefault( modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution=self._hc._jvm.hex.genmodel.utils.DistributionFamily. valueOf("AUTO"), convertUnknownCategoricalLevelsToNa=False, quietMode=True, missingValuesHandling=None, ntrees=50, nEstimators=0, maxDepth=6, minRows=1.0, minChildWeight=1.0, learnRate=0.3, eta=0.3, learnRateAnnealing=1.0, sampleRate=1.0, subsample=1.0, colSampleRate=1.0, colSampleByLevel=1.0, colSampleRatePerTree=1.0, colsampleBytree=1.0, maxAbsLeafnodePred=0.0, maxDeltaStep=0.0, scoreTreeInterval=0, initialScoreInterval=4000, scoreInterval=4000, minSplitImprovement=0.0, gamma=0.0, nthread=-1, maxBins=256, maxLeaves=0, minSumHessianInLeaf=100.0, minDataInLeaf=0.0, treeMethod=self._hc._jvm.hex.tree.xgboost.XGBoostModel. XGBoostParameters.TreeMethod.valueOf("auto"), growPolicy=self._hc._jvm.hex.tree.xgboost.XGBoostModel. XGBoostParameters.GrowPolicy.valueOf("depthwise"), booster=self._hc._jvm.hex.tree.xgboost.XGBoostModel. XGBoostParameters.Booster.valueOf("gbtree"), dmatrixType=self._hc._jvm.hex.tree.xgboost.XGBoostModel. XGBoostParameters.DMatrixType.valueOf("auto"), regLambda=0.0, regAlpha=0.0, sampleType=self._hc._jvm.hex.tree.xgboost.XGBoostModel. XGBoostParameters.DartSampleType.valueOf("uniform"), normalizeType=self._hc._jvm.hex.tree.xgboost.XGBoostModel. XGBoostParameters.DartNormalizeType.valueOf("tree"), rateDrop=0.0, oneDrop=False, skipDrop=0.0, gpuId=0, backend=self._hc._jvm.hex.tree.xgboost.XGBoostModel. XGBoostParameters.Backend.valueOf("auto"), foldCol=None) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5, histogramType="AUTO", r2Stopping=java_max_double_value, nbinsTopLevel=1 << 10, buildTreeOneNode=False, scoreTreeInterval=0, sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0, learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=java_max_double_value, predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, **deprecatedArgs): super(H2OGBM, self).__init__() self._hc = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False) self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OGBM", self.uid) self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution=self._hc._jvm.hex.genmodel.utils. DistributionFamily.valueOf("AUTO"), ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5, histogramType=self._hc._jvm.hex.tree.SharedTreeModel. SharedTreeParameters.HistogramType.valueOf("AUTO"), r2Stopping=self._hc._jvm.Double.MAX_VALUE, nbinsTopLevel=1 << 10, buildTreeOneNode=False, scoreTreeInterval=0, sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0, learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=self._hc._jvm.Double.MAX_VALUE, predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def setParams(self, keep=False, columns=[]): kwargs = get_input_kwargs(self) return self._set(**kwargs)
def __init__(self, featuresCols=[], labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], splitRatio=1.0, foldCol=None, weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5, convertUnknownCategoricalLevelsToNa=True, seed=-1, sortMetric="AUTO", balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True, keepCrossValidationModels=True, maxModels=0, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): super(H2OAutoML, self).__init__() self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OAutoML", self.uid) self._setDefault(featuresCols=[], labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], splitRatio=1.0, foldCol=None, weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3, stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5, convertUnknownCategoricalLevelsToNa=True, seed=-1, sortMetric=None, balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True, keepCrossValidationModels=True, maxModels=0, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, standardize=True, family="gaussian", link="family_default", solver="AUTO", tweedieVariancePower=0.0, tweedieLinkPower=0.0, alpha=None, lambda_=None, missingValuesHandling="MeanImputation", prior=-1.0, lambdaSearch=False, nlambdas=-1, nonNegative=False, exactLambdas=False, lambdaMinRatio=-1.0, maxIterations=-1, intercept=True, betaEpsilon=1e-4, objectiveEpsilon=-1.0, gradientEpsilon=-1.0, objReg=-1.0, computePValues=False, removeCollinearCols=False, interactions=None, interactionPairs=None, earlyStopping=True, foldCol=None, **deprecatedArgs): kwargs = get_input_kwargs(self) if "distribution" in kwargs: kwargs[ "distribution"] = self._hc._jvm.hex.genmodel.utils.DistributionFamily.valueOf( kwargs["distribution"]) if "family" in kwargs: kwargs[ "family"] = self._hc._jvm.hex.glm.GLMModel.GLMParameters.Family.valueOf( kwargs["family"]) if "link" in kwargs: kwargs[ "link"] = self._hc._jvm.hex.glm.GLMModel.GLMParameters.Link.valueOf( kwargs["link"]) if "solver" in kwargs: kwargs[ "solver"] = self._hc._jvm.hex.glm.GLMModel.GLMParameters.Solver.valueOf( kwargs["solver"]) if "missingValuesHandling" in kwargs: kwargs[ "missingValuesHandling"] = self._hc._jvm.hex.deeplearning.DeepLearningModel.DeepLearningParameters.MissingValuesHandling.valueOf( kwargs["missingValuesHandling"]) # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "splitRatio", "tweedieVariancePower", "tweedieLinkPower", "prior", "lambdaMinRatio", "betaEpsilon", "objectiveEpsilon", "gradientEpsilon", "objReg" ] set_double_values(kwargs, double_types) # We need to also map all doubles in the arrays if "alpha" in kwargs: kwargs["alpha"] = map(float, kwargs["alpha"]) if "lambda_" in kwargs: kwargs["lambda_"] = map(float, kwargs["lambda_"]) return self._set(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5, histogramType="AUTO", r2Stopping=java_max_double_value, nbinsTopLevel=1 << 10, buildTreeOneNode=False, scoreTreeInterval=0, sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0, learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=java_max_double_value, predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): kwargs = get_input_kwargs(self) validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution") validateEnumValue(self._H2OSharedTreeParams__getHistogramTypeEnum(), kwargs, "histogramType") # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "minRows", "predNoiseBandwidth", "splitRatio", "learnRate", "colSampleRate", "learnRateAnnealing", "maxAbsLeafnodePred" "minSplitImprovement", "r2Stopping", "sampleRate", "colSampleRateChangePerLevel", "colSampleRatePerTree" ] set_double_values(kwargs, double_types) # We need to also map all doubles in the arrays arrayToDoubleArray("sampleRatePerClass", kwargs) return self._set(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, standardize=True, family="gaussian", link="family_default", solver="AUTO", tweedieVariancePower=0.0, tweedieLinkPower=0.0, alpha=None, lambda_=None, missingValuesHandling="MeanImputation", prior=-1.0, lambdaSearch=False, nlambdas=-1, nonNegative=False, exactLambdas=False, lambdaMinRatio=-1.0, maxIterations=-1, intercept=True, betaEpsilon=1e-4, objectiveEpsilon=-1.0, gradientEpsilon=-1.0, objReg=-1.0, computePValues=False, removeCollinearCols=False, interactions=None, interactionPairs=None, earlyStopping=True, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): kwargs = get_input_kwargs(self) validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution") validateEnumValue(self._H2OGLMParams__getFamilyEnum(), kwargs, "family") validateEnumValue(self._H2OGLMParams__getLinkEnum(), kwargs, "link") validateEnumValue(self._H2OGLMParams__getSolverEnum(), kwargs, "solver") validateEnumValue(self._H2OGLMParams__getMissingValuesHandlingEnum(), kwargs, "missingValuesHandling") # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "splitRatio", "tweedieVariancePower", "tweedieLinkPower", "prior", "lambdaMinRatio", "betaEpsilon", "objectiveEpsilon", "gradientEpsilon", "objReg" ] set_double_values(kwargs, double_types) # We need to also map all doubles in the arrays arrayToDoubleArray("alpha", kwargs) arrayToDoubleArray("lambda_", kwargs) return self._set(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, quietMode=True, ntrees=50, nEstimators=0, maxDepth=6, minRows=1.0, minChildWeight=1.0, learnRate=0.3, eta=0.3, learnRateAnnealing=1.0, sampleRate=1.0, subsample=1.0, colSampleRate=1.0, colSampleByLevel=1.0, colSampleRatePerTree=1.0, colsampleBytree=1.0, maxAbsLeafnodePred=0.0, maxDeltaStep=0.0, scoreTreeInterval=0, initialScoreInterval=4000, scoreInterval=4000, minSplitImprovement=0.0, gamma=0.0, nthread=-1, maxBins=256, maxLeaves=0, minSumHessianInLeaf=100.0, minDataInLeaf=0.0, treeMethod="auto", growPolicy="depthwise", booster="gbtree", dmatrixType="auto", regLambda=0.0, regAlpha=0.0, sampleType="uniform", normalizeType="tree", rateDrop=0.0, oneDrop=False, skipDrop=0.0, gpuId=0, backend="auto", foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): kwargs = get_input_kwargs(self) validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution") validateEnumValue(self._H2OXGBoostParams__getTreeMethodEnum(), kwargs, "treeMethod") validateEnumValue(self._H2OXGBoostParams__getGrowPolicyEnum(), kwargs, "growPolicy") validateEnumValue(self._H2OXGBoostParams__getBoosterEnum(), kwargs, "booster") validateEnumValue(self._H2OXGBoostParams__getDmatrixTypeEnum(), kwargs, "dmatrixType") validateEnumValue(self._H2OXGBoostParams__getSampleTypeEnum(), kwargs, "sampleType") validateEnumValue(self._H2OXGBoostParams__getNormalizeTypeEnum(), kwargs, "normalizeType") validateEnumValue(self._H2OXGBoostParams__getBackendEnum(), kwargs, "backend") # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "splitRatio", "minRows", "minChildWeight", "learnRate", "eta", "learnRateAnnealing" "sampleRate", "subsample", "colSampleRate", "colSampleByLevel", "colSampleRatePerTree", "colsampleBytree", "maxAbsLeafnodePred", "maxDeltaStep", "minSplitImprovement", "gamma", "minSumHessianInLeaf", "minDataInLeaf", "regLambda", "regAlpha", "rateDrop", "skipDrop" ] set_double_values(kwargs, double_types) return self._set(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, quietMode=True, ntrees=50, nEstimators=0, maxDepth=6, minRows=1.0, minChildWeight=1.0, learnRate=0.3, eta=0.3, learnRateAnnealing=1.0, sampleRate=1.0, subsample=1.0, colSampleRate=1.0, colSampleByLevel=1.0, colSampleRatePerTree=1.0, colsampleBytree=1.0, maxAbsLeafnodePred=0.0, maxDeltaStep=0.0, scoreTreeInterval=0, initialScoreInterval=4000, scoreInterval=4000, minSplitImprovement=0.0, gamma=0.0, nthread=-1, maxBins=256, maxLeaves=0, minSumHessianInLeaf=100.0, minDataInLeaf=0.0, treeMethod="auto", growPolicy="depthwise", booster="gbtree", dmatrixType="auto", regLambda=0.0, regAlpha=0.0, sampleType="uniform", normalizeType="tree", rateDrop=0.0, oneDrop=False, skipDrop=0.0, gpuId=0, backend="auto", foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): super(H2OXGBoost, self).__init__() self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OXGBoost", self.uid) self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, quietMode=True, ntrees=50, nEstimators=0, maxDepth=6, minRows=1.0, minChildWeight=1.0, learnRate=0.3, eta=0.3, learnRateAnnealing=1.0, sampleRate=1.0, subsample=1.0, colSampleRate=1.0, colSampleByLevel=1.0, colSampleRatePerTree=1.0, colsampleBytree=1.0, maxAbsLeafnodePred=0.0, maxDeltaStep=0.0, scoreTreeInterval=0, initialScoreInterval=4000, scoreInterval=4000, minSplitImprovement=0.0, gamma=0.0, nthread=-1, maxBins=256, maxLeaves=0, minSumHessianInLeaf=100.0, minDataInLeaf=0.0, treeMethod="auto", growPolicy="depthwise", booster="gbtree", dmatrixType="auto", regLambda=0.0, regAlpha=0.0, sampleType="uniform", normalizeType="tree", rateDrop=0.0, oneDrop=False, skipDrop=0.0, gpuId=0, backend="auto", foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, quietMode=True, missingValuesHandling=None, ntrees=50, nEstimators=0, maxDepth=6, minRows=1.0, minChildWeight=1.0, learnRate=0.3, eta=0.3, learnRateAnnealing=1.0, sampleRate=1.0, subsample=1.0, colSampleRate=1.0, colSampleByLevel=1.0, colSampleRatePerTree=1.0, colsampleBytree=1.0, maxAbsLeafnodePred=0.0, maxDeltaStep=0.0, scoreTreeInterval=0, initialScoreInterval=4000, scoreInterval=4000, minSplitImprovement=0.0, gamma=0.0, nthread=-1, maxBins=256, maxLeaves=0, minSumHessianInLeaf=100.0, minDataInLeaf=0.0, treeMethod="auto", growPolicy="depthwise", booster="gbtree", dmatrixType="auto", regLambda=0.0, regAlpha=0.0, sampleType="uniform", normalizeType="tree", rateDrop=0.0, oneDrop=False, skipDrop=0.0, gpuId=0, backend="auto", foldCol=None, **deprecatedArgs): kwargs = get_input_kwargs(self) if "distribution" in kwargs: kwargs[ "distribution"] = self._hc._jvm.hex.genmodel.utils.DistributionFamily.valueOf( kwargs["distribution"]) if "treeMethod" in kwargs: kwargs[ "treeMethod"] = self._hc._jvm.hex.tree.xgboost.XGBoostModel.XGBoostParameters.TreeMethod.valueOf( kwargs["treeMethod"]) if "growPolicy" in kwargs: kwargs[ "growPolicy"] = self._hc._jvm.hex.tree.xgboost.XGBoostModel.XGBoostParameters.GrowPolicy.valueOf( kwargs["growPolicy"]) if "booster" in kwargs: kwargs[ "booster"] = self._hc._jvm.hex.tree.xgboost.XGBoostModel.XGBoostParameters.Booster.valueOf( kwargs["booster"]) if "dmatrixType" in kwargs: kwargs[ "dmatrixType"] = self._hc._jvm.hex.tree.xgboost.XGBoostModel.XGBoostParameters.DMatrixType.valueOf( kwargs["dmatrixType"]) if "sampleType" in kwargs: kwargs[ "sampleType"] = self._hc._jvm.hex.tree.xgboost.XGBoostModel.XGBoostParameters.DartSampleType.valueOf( kwargs["sampleType"]) if "normalizeType" in kwargs: kwargs[ "normalizeType"] = self._hc._jvm.hex.tree.xgboost.XGBoostModel.XGBoostParameters.DartNormalizeType.valueOf( kwargs["normalizeType"]) if "backend" in kwargs: kwargs[ "backend"] = self._hc._jvm.hex.tree.xgboost.XGBoostModel.XGBoostParameters.Backend.valueOf( kwargs["backend"]) # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "splitRatio", "minRows", "minChildWeight", "learnRate", "eta", "learnRateAnnealing" "sampleRate", "subsample", "colSampleRate", "colSampleByLevel", "colSampleRatePerTree", "colsampleBytree", "maxAbsLeafnodePred", "maxDeltaStep", "minSplitImprovement", "gamma", "minSumHessianInLeaf", "minDataInLeaf", "regLambda", "regAlpha", "rateDrop", "skipDrop" ] set_double_values(kwargs, double_types) return self._set(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, standardize=True, family="gaussian", link="family_default", solver="AUTO", tweedieVariancePower=0.0, tweedieLinkPower=0.0, alpha=None, lambda_=None, missingValuesHandling="MeanImputation", prior=-1.0, lambdaSearch=False, nlambdas=-1, nonNegative=False, exactLambdas=False, lambdaMinRatio=-1.0, maxIterations=-1, intercept=True, betaEpsilon=1e-4, objectiveEpsilon=-1.0, gradientEpsilon=-1.0, objReg=-1.0, computePValues=False, removeCollinearCols=False, interactions=None, interactionPairs=None, earlyStopping=True, foldCol=None, **deprecatedArgs): super(H2OGLM, self).__init__() self._hc = H2OContext.getOrCreate(SparkSession.builder.getOrCreate(), verbose=False) self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OGLM", self.uid) self._setDefault( modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution=self._hc._jvm.hex.genmodel.utils.DistributionFamily. valueOf("AUTO"), convertUnknownCategoricalLevelsToNa=False, standardize=True, family=self._hc._jvm.hex.glm.GLMModel.GLMParameters.Family.valueOf( "gaussian"), link=self._hc._jvm.hex.glm.GLMModel.GLMParameters.Link.valueOf( "family_default"), solver=self._hc._jvm.hex.glm.GLMModel.GLMParameters.Solver.valueOf( "AUTO"), tweedieVariancePower=0.0, tweedieLinkPower=0.0, alpha=None, lambda_=None, missingValuesHandling=self._hc._jvm.hex.deeplearning. DeepLearningModel.DeepLearningParameters.MissingValuesHandling. valueOf("MeanImputation"), prior=-1.0, lambdaSearch=False, nlambdas=-1, nonNegative=False, exactLambdas=False, lambdaMinRatio=-1.0, maxIterations=-1, intercept=True, betaEpsilon=1e-4, objectiveEpsilon=-1.0, gradientEpsilon=-1.0, objReg=-1.0, computePValues=False, removeCollinearCols=False, interactions=None, interactionPairs=None, earlyStopping=True, foldCol=None) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5, histogramType="AUTO", r2Stopping=java_max_double_value, nbinsTopLevel=1 << 10, buildTreeOneNode=False, scoreTreeInterval=0, sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0, learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=java_max_double_value, predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, **deprecatedArgs): kwargs = get_input_kwargs(self) if "distribution" in kwargs: kwargs[ "distribution"] = self._hc._jvm.hex.genmodel.utils.DistributionFamily.valueOf( kwargs["distribution"]) if "histogramType" in kwargs: kwargs[ "histogramType"] = self._hc._jvm.hex.tree.SharedTreeModel.SharedTreeParameters.HistogramType.valueOf( kwargs["histogramType"]) # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that # the whole type is actually int and we get class cast exception double_types = [ "minRows", "predNoiseBandwidth", "splitRatio", "learnRate", "colSampleRate", "learnRateAnnealing", "maxAbsLeafnodePred" "minSplitImprovement", "r2Stopping", "sampleRate", "colSampleRateChangePerLevel", "colSampleRatePerTree" ] set_double_values(kwargs, double_types) # We need to also map all doubles in the arrays if "sampleRatePerClass" in kwargs: kwargs["sampleRatePerClass"] = map(float, kwargs["sampleRatePerClass"]) return self._set(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, standardize=True, family="gaussian", link="family_default", solver="AUTO", tweedieVariancePower=0.0, tweedieLinkPower=0.0, alpha=None, lambda_=None, missingValuesHandling="MeanImputation", prior=-1.0, lambdaSearch=False, nlambdas=-1, nonNegative=False, exactLambdas=False, lambdaMinRatio=-1.0, maxIterations=-1, intercept=True, betaEpsilon=1e-4, objectiveEpsilon=-1.0, gradientEpsilon=-1.0, objReg=-1.0, computePValues=False, removeCollinearCols=False, interactions=None, interactionPairs=None, earlyStopping=True, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): super(H2OGLM, self).__init__() self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OGLM", self.uid) self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", convertUnknownCategoricalLevelsToNa=False, standardize=True, family="gaussian", link="family_default", solver="AUTO", tweedieVariancePower=0.0, tweedieLinkPower=0.0, alpha=None, lambda_=None, missingValuesHandling="MeanImputation", prior=-1.0, lambdaSearch=False, nlambdas=-1, nonNegative=False, exactLambdas=False, lambdaMinRatio=-1.0, maxIterations=-1, intercept=True, betaEpsilon=1e-4, objectiveEpsilon=-1.0, gradientEpsilon=-1.0, objReg=-1.0, computePValues=False, removeCollinearCols=False, interactions=None, interactionPairs=None, earlyStopping=True, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False) kwargs = get_input_kwargs(self) self.setParams(**kwargs)
def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5, histogramType="AUTO", r2Stopping=java_max_double_value, nbinsTopLevel=1 << 10, buildTreeOneNode=False, scoreTreeInterval=0, sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0, learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=java_max_double_value, predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs): super(H2OGBM, self).__init__() self._java_obj = self._new_java_obj("py_sparkling.ml.algos.H2OGBM", self.uid) self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[], nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True, seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5, histogramType="AUTO", r2Stopping=_jvm().Double.MAX_VALUE, nbinsTopLevel=1 << 10, buildTreeOneNode=False, scoreTreeInterval=0, sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0, learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=_jvm().Double.MAX_VALUE, predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False, convertInvalidNumbersToNa=False) kwargs = get_input_kwargs(self) self.setParams(**kwargs)