Exemple #1
0
    def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                 nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True,
                 seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5,
                 histogramType="AUTO", r2Stopping=java_max_double_value,
                 nbinsTopLevel=1<<10, buildTreeOneNode=False, scoreTreeInterval=0,
                 sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0,
                 learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=java_max_double_value,
                 predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction",
                 detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                 convertInvalidNumbersToNa=False, **deprecatedArgs):
        Initializer.load_sparkling_jar()
        super(H2OGBM, self).__init__()
        self._java_obj = self._new_java_obj("ai.h2o.sparkling.ml.algos.H2OGBM", self.uid)

        self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                         nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True,
                         seed=-1, distribution="AUTO",
                         ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5,
                         histogramType="AUTO",
                         r2Stopping=_jvm().Double.MAX_VALUE, nbinsTopLevel=1<<10, buildTreeOneNode=False, scoreTreeInterval=0,
                         sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0,
                         learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=_jvm().Double.MAX_VALUE,
                         predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None,
                         predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                         convertInvalidNumbersToNa=False)
        kwargs = get_input_kwargs(self)
        self.setParams(**kwargs)
Exemple #2
0
    def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                  nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False,parallelizeCrossValidation=True,
                  seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5,
                  histogramType="AUTO", r2Stopping=java_max_double_value,
                  nbinsTopLevel=1<<10, buildTreeOneNode=False, scoreTreeInterval=0,
                  sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0,
                  learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=java_max_double_value,
                  predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False, **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution")
        validateEnumValue(self._H2OSharedTreeParams__getHistogramTypeEnum(), kwargs, "histogramType")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["minRows", "predNoiseBandwidth", "splitRatio", "learnRate", "colSampleRate", "learnRateAnnealing", "maxAbsLeafnodePred"
                                                                                                                           "minSplitImprovement", "r2Stopping", "sampleRate", "colSampleRateChangePerLevel", "colSampleRatePerTree"]
        set_double_values(kwargs, double_types)

        # We need to also map all doubles in the arrays
        arrayToDoubleArray("sampleRatePerClass", kwargs)

        return self._set(**kwargs)
Exemple #3
0
    def setParams(self,
                  featuresCols=[],
                  labelCol="label",
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  splitRatio=1.0,
                  foldCol=None,
                  weightCol=None,
                  ignoredCols=[],
                  includeAlgos=None,
                  excludeAlgos=None,
                  projectName=None,
                  maxRuntimeSecs=3600.0,
                  stoppingRounds=3,
                  stoppingTolerance=0.001,
                  stoppingMetric="AUTO",
                  nfolds=5,
                  convertUnknownCategoricalLevelsToNa=True,
                  seed=-1,
                  sortMetric="AUTO",
                  balanceClasses=False,
                  classSamplingFactors=None,
                  maxAfterBalanceSize=5.0,
                  keepCrossValidationPredictions=True,
                  keepCrossValidationModels=True,
                  maxModels=0,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False,
                  **deprecatedArgs):

        kwargs = get_input_kwargs(self)

        validateEnumValues(self._H2OAutoMLParams__getAutomlAlgoEnum(),
                           kwargs,
                           "includeAlgos",
                           nullEnabled=True)
        validateEnumValues(self._H2OAutoMLParams__getAutomlAlgoEnum(),
                           kwargs,
                           "excludeAlgos",
                           nullEnabled=True)
        validateEnumValue(self._H2OAutoMLParams__getStoppingMetricEnum(),
                          kwargs, "stoppingMetric")
        validateEnumValue(self._H2OAutoMLParams__getSortMetricEnum(), kwargs,
                          "sortMetric")

        if "projectName" in kwargs and kwargs["projectName"] is None:
            kwargs["projectName"] = ''.join(
                random.choice(string.ascii_letters) for i in range(30))

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = [
            "maxRuntimeSecs", "stoppingTolerance", "splitRatio",
            "maxAfterBalanceSize"
        ]
        set_double_values(kwargs, double_types)
        return self._set(**kwargs)
Exemple #4
0
 def __init__(self, keep=False, columns=[]):
     Initializer.load_sparkling_jar()
     super(ColumnPruner, self).__init__()
     self._java_obj = self._new_java_obj(
         "ai.h2o.sparkling.ml.features.ColumnPruner", self.uid)
     self._setDefault(keep=False, columns=[])
     kwargs = get_input_kwargs(self)
     self.setParams(**kwargs)
    def __init__(self,
                 predictionCol="prediction",
                 detailedPredictionCol="detailed_prediction",
                 withDetailedPredictionCol=False,
                 featuresCols=[],
                 foldCol=None,
                 weightCol=None,
                 splitRatio=1.0,
                 seed=-1,
                 nfolds=0,
                 allStringColumnsToCategorical=True,
                 columnsToCategorical=[],
                 convertUnknownCategoricalLevelsToNa=False,
                 convertInvalidNumbersToNa=False,
                 modelId=None,
                 keepCrossValidationPredictions=False,
                 keepCrossValidationFoldAssignment=False,
                 parallelizeCrossValidation=True,
                 distribution="AUTO",
                 maxIterations=10,
                 standardize=True,
                 init="Furthest",
                 userPoints=None,
                 estimateK=False,
                 k=2,
                 **deprecatedArgs):
        Initializer.load_sparkling_jar()
        super(H2OKMeans, self).__init__()
        self._java_obj = self._new_java_obj(
            "ai.h2o.sparkling.ml.algos.H2OKMeans", self.uid)

        self._setDefault(predictionCol="prediction",
                         detailedPredictionCol="detailed_prediction",
                         withDetailedPredictionCol=False,
                         featuresCols=[],
                         foldCol=None,
                         weightCol=None,
                         splitRatio=1.0,
                         seed=-1,
                         nfolds=0,
                         allStringColumnsToCategorical=True,
                         columnsToCategorical=[],
                         convertUnknownCategoricalLevelsToNa=False,
                         convertInvalidNumbersToNa=False,
                         modelId=None,
                         keepCrossValidationPredictions=False,
                         keepCrossValidationFoldAssignment=False,
                         parallelizeCrossValidation=True,
                         distribution="AUTO",
                         maxIterations=10,
                         standardize=True,
                         init="Furthest",
                         userPoints=None,
                         estimateK=False,
                         k=2)
        kwargs = get_input_kwargs(self)
        self.setParams(**kwargs)
Exemple #6
0
    def __init__(self,
                 featuresCols=[],
                 algo=None,
                 splitRatio=1.0,
                 hyperParameters={},
                 labelCol="label",
                 weightCol=None,
                 allStringColumnsToCategorical=True,
                 columnsToCategorical=[],
                 strategy="Cartesian",
                 maxRuntimeSecs=0.0,
                 maxModels=0,
                 seed=-1,
                 stoppingRounds=0,
                 stoppingTolerance=0.001,
                 stoppingMetric="AUTO",
                 nfolds=0,
                 selectBestModelBy="AUTO",
                 selectBestModelDecreasing=True,
                 foldCol=None,
                 convertUnknownCategoricalLevelsToNa=True,
                 predictionCol="prediction",
                 detailedPredictionCol="detailed_prediction",
                 withDetailedPredictionCol=False,
                 convertInvalidNumbersToNa=False,
                 **deprecatedArgs):
        Initializer.load_sparkling_jar()
        super(H2OGridSearch, self).__init__()
        self._java_obj = self._new_java_obj(
            "ai.h2o.sparkling.ml.algos.H2OGridSearch", self.uid)

        self._setDefault(featuresCols=[],
                         algo=None,
                         splitRatio=1.0,
                         hyperParameters={},
                         labelCol="label",
                         weightCol=None,
                         allStringColumnsToCategorical=True,
                         columnsToCategorical=[],
                         strategy="Cartesian",
                         maxRuntimeSecs=0.0,
                         maxModels=0,
                         seed=-1,
                         stoppingRounds=0,
                         stoppingTolerance=0.001,
                         stoppingMetric="AUTO",
                         nfolds=0,
                         selectBestModelBy="AUTO",
                         selectBestModelDecreasing=True,
                         foldCol=None,
                         convertUnknownCategoricalLevelsToNa=True,
                         predictionCol="prediction",
                         detailedPredictionCol="detailed_prediction",
                         withDetailedPredictionCol=False,
                         convertInvalidNumbersToNa=False)
        kwargs = get_input_kwargs(self)
        self.setParams(**kwargs)
Exemple #7
0
    def __init__(self, foldCol=None, labelCol="label", inputCols=[], holdoutStrategy = "None",
                 blendedAvgEnabled=False, blendedAvgInflectionPoint=10.0, blendedAvgSmoothing=20.0, noise=0.01, noiseSeed=-1):
        Initializer.load_sparkling_jar()
        super(H2OTargetEncoder, self).__init__()
        self._java_obj = self._new_java_obj("ai.h2o.sparkling.ml.features.H2OTargetEncoder", self.uid)

        self._setDefault(foldCol=None, labelCol="label", inputCols=[], holdoutStrategy="None",
                         blendedAvgEnabled=False, blendedAvgInflectionPoint=10.0, blendedAvgSmoothing=20.0, noise=0.01, noiseSeed=-1)
        kwargs = get_input_kwargs(self)
        self.setParams(**kwargs)
    def setParams(self, featuresCols=[], labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], splitRatio=1.0, foldCol=None,
                  weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3,
                  stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5, convertUnknownCategoricalLevelsToNa=True, seed=-1,
                  sortMetric="AUTO", balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True,
                  keepCrossValidationModels=True, maxModels=0, predictionCol="prediction", detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs):

        kwargs = get_input_kwargs(self)

        return self._set(**kwargs)
Exemple #9
0
    def setParams(self, foldCol=None, labelCol="label", inputCols=[], holdoutStrategy = "None",
                  blendedAvgEnabled=False, blendedAvgInflectionPoint=10.0, blendedAvgSmoothing=20.0, noise=0.01, noiseSeed=-1):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self.__getHoldoutStrategyEnumName(), kwargs, "holdoutStrategy")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["blendedAvgInflectionPoint", "blendedAvgSmoothing", "noise"]
        set_double_values(kwargs, double_types)

        return self._set(**kwargs)
    def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                  nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True,
                  seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200,200], reproducible=False, convertUnknownCategoricalLevelsToNa=False,
                  foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False, **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["splitRatio", "epochs", "l1", "l2"]
        set_double_values(kwargs, double_types)

        return self._set(**kwargs)
Exemple #11
0
    def setParams(self,
                  featuresCols=[],
                  algo=None,
                  splitRatio=1.0,
                  hyperParameters={},
                  labelCol="label",
                  weightCol=None,
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  strategy="Cartesian",
                  maxRuntimeSecs=0.0,
                  maxModels=0,
                  seed=-1,
                  stoppingRounds=0,
                  stoppingTolerance=0.001,
                  stoppingMetric="AUTO",
                  nfolds=0,
                  selectBestModelBy="AUTO",
                  selectBestModelDecreasing=True,
                  foldCol=None,
                  convertUnknownCategoricalLevelsToNa=True,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False,
                  **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OGridSearchParams__getStrategyEnum(), kwargs,
                          "strategy")
        validateEnumValue(self._H2OGridSearchParams__getStoppingMetricEnum(),
                          kwargs, "stoppingMetric")
        validateEnumValue(
            self._H2OGridSearchParams__getSelectBestModelByEnum(), kwargs,
            "selectBestModelBy")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["splitRatio", "stoppingTolerance", "maxRuntimeSecs"]
        set_double_values(kwargs, double_types)
        if "algo" in kwargs and kwargs["algo"] is not None:
            tmp = kwargs["algo"]
            del kwargs['algo']
            self._java_obj.setAlgo(tmp._java_obj)

        return self._set(**kwargs)
    def __init__(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                 nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True,
                 seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200,200], reproducible=False,
                 convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction",
                 withDetailedPredictionCol=False, convertInvalidNumbersToNa=False, **deprecatedArgs):
        Initializer.load_sparkling_jar()
        super(H2ODeepLearning, self).__init__()
        self._java_obj = self._new_java_obj("ai.h2o.sparkling.ml.algos.H2ODeepLearning", self.uid)

        self._setDefault(modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                         nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True,
                         seed=-1, distribution="AUTO",
                         epochs=10.0, l1=0.0, l2=0.0, hidden=[200,200], reproducible=False, convertUnknownCategoricalLevelsToNa=False,
                         foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                         convertInvalidNumbersToNa=False)
        kwargs = get_input_kwargs(self)
        self.setParams(**kwargs)
    def setParams(self,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  featuresCols=[],
                  foldCol=None,
                  weightCol=None,
                  splitRatio=1.0,
                  seed=-1,
                  nfolds=0,
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  convertUnknownCategoricalLevelsToNa=False,
                  convertInvalidNumbersToNa=False,
                  modelId=None,
                  keepCrossValidationPredictions=False,
                  keepCrossValidationFoldAssignment=False,
                  parallelizeCrossValidation=True,
                  distribution="AUTO",
                  maxIterations=10,
                  standardize=True,
                  init="Furthest",
                  userPoints=None,
                  estimateK=False,
                  k=2,
                  **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(),
                          kwargs, "distribution")
        validateEnumValue(self._H2OKMeansParams__getInitEnum(), kwargs, "init")

        # We need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["splitRatio"]
        set_double_values(kwargs, double_types)

        if "userPoints" in kwargs:
            kwargs["userPoints"] = getDoubleArrayArrayFromIntArrayArray(
                kwargs["userPoints"])

        return self._set(**kwargs)
    def __init__(self, featuresCols=[], labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], splitRatio=1.0, foldCol=None,
                 weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3,
                 stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5, convertUnknownCategoricalLevelsToNa=True, seed=-1,
                 sortMetric="AUTO", balanceClasses=False, classSamplingFactors=None, maxAfterBalanceSize=5.0,
                 keepCrossValidationPredictions=True, keepCrossValidationModels=True, maxModels=0,
                 predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                 convertInvalidNumbersToNa=False, **deprecatedArgs):
        Initializer.load_sparkling_jar()
        super(H2OAutoML, self).__init__()
        self._java_obj = self._new_java_obj("ai.h2o.sparkling.ml.algos.H2OAutoML", self.uid)

        self._setDefault(featuresCols=[], labelCol="label", allStringColumnsToCategorical=True, columnsToCategorical=[], splitRatio=1.0, foldCol=None,
                         weightCol=None, ignoredCols=[], includeAlgos=None, excludeAlgos=None, projectName=None, maxRuntimeSecs=3600.0, stoppingRounds=3,
                         stoppingTolerance=0.001, stoppingMetric="AUTO", nfolds=5,
                         convertUnknownCategoricalLevelsToNa=True, seed=-1, sortMetric="AUTO", balanceClasses=False,
                         classSamplingFactors=None, maxAfterBalanceSize=5.0, keepCrossValidationPredictions=True,
                         keepCrossValidationModels=True, maxModels=0, predictionCol="prediction", detailedPredictionCol="detailed_prediction",
                         withDetailedPredictionCol=False, convertInvalidNumbersToNa=False)
        kwargs = get_input_kwargs(self)

        self.setParams(**kwargs)
    def setParams(self,
                  modelId=None,
                  splitRatio=1.0,
                  labelCol="label",
                  weightCol=None,
                  featuresCols=[],
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  nfolds=0,
                  keepCrossValidationPredictions=False,
                  keepCrossValidationFoldAssignment=False,
                  parallelizeCrossValidation=True,
                  seed=-1,
                  distribution="AUTO",
                  convertUnknownCategoricalLevelsToNa=False,
                  quietMode=True,
                  ntrees=50,
                  nEstimators=0,
                  maxDepth=6,
                  minRows=1.0,
                  minChildWeight=1.0,
                  learnRate=0.3,
                  eta=0.3,
                  learnRateAnnealing=1.0,
                  sampleRate=1.0,
                  subsample=1.0,
                  colSampleRate=1.0,
                  colSampleByLevel=1.0,
                  colSampleRatePerTree=1.0,
                  colsampleBytree=1.0,
                  maxAbsLeafnodePred=0.0,
                  maxDeltaStep=0.0,
                  scoreTreeInterval=0,
                  initialScoreInterval=4000,
                  scoreInterval=4000,
                  minSplitImprovement=0.0,
                  gamma=0.0,
                  nthread=-1,
                  maxBins=256,
                  maxLeaves=0,
                  minSumHessianInLeaf=100.0,
                  minDataInLeaf=0.0,
                  treeMethod="auto",
                  growPolicy="depthwise",
                  booster="gbtree",
                  dmatrixType="auto",
                  regLambda=0.0,
                  regAlpha=0.0,
                  sampleType="uniform",
                  normalizeType="tree",
                  rateDrop=0.0,
                  oneDrop=False,
                  skipDrop=0.0,
                  gpuId=0,
                  backend="auto",
                  foldCol=None,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False,
                  **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(),
                          kwargs, "distribution")
        validateEnumValue(self._H2OXGBoostParams__getTreeMethodEnum(), kwargs,
                          "treeMethod")
        validateEnumValue(self._H2OXGBoostParams__getGrowPolicyEnum(), kwargs,
                          "growPolicy")
        validateEnumValue(self._H2OXGBoostParams__getBoosterEnum(), kwargs,
                          "booster")
        validateEnumValue(self._H2OXGBoostParams__getDmatrixTypeEnum(), kwargs,
                          "dmatrixType")
        validateEnumValue(self._H2OXGBoostParams__getSampleTypeEnum(), kwargs,
                          "sampleType")
        validateEnumValue(self._H2OXGBoostParams__getNormalizeTypeEnum(),
                          kwargs, "normalizeType")
        validateEnumValue(self._H2OXGBoostParams__getBackendEnum(), kwargs,
                          "backend")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = [
            "splitRatio", "minRows", "minChildWeight", "learnRate", "eta",
            "learnRateAnnealing"
            "sampleRate", "subsample", "colSampleRate", "colSampleByLevel",
            "colSampleRatePerTree", "colsampleBytree", "maxAbsLeafnodePred",
            "maxDeltaStep", "minSplitImprovement", "gamma",
            "minSumHessianInLeaf", "minDataInLeaf", "regLambda", "regAlpha",
            "rateDrop", "skipDrop"
        ]
        set_double_values(kwargs, double_types)
        return self._set(**kwargs)
    def __init__(self,
                 modelId=None,
                 splitRatio=1.0,
                 labelCol="label",
                 weightCol=None,
                 featuresCols=[],
                 allStringColumnsToCategorical=True,
                 columnsToCategorical=[],
                 nfolds=0,
                 keepCrossValidationPredictions=False,
                 keepCrossValidationFoldAssignment=False,
                 parallelizeCrossValidation=True,
                 seed=-1,
                 distribution="AUTO",
                 convertUnknownCategoricalLevelsToNa=False,
                 quietMode=True,
                 ntrees=50,
                 nEstimators=0,
                 maxDepth=6,
                 minRows=1.0,
                 minChildWeight=1.0,
                 learnRate=0.3,
                 eta=0.3,
                 learnRateAnnealing=1.0,
                 sampleRate=1.0,
                 subsample=1.0,
                 colSampleRate=1.0,
                 colSampleByLevel=1.0,
                 colSampleRatePerTree=1.0,
                 colsampleBytree=1.0,
                 maxAbsLeafnodePred=0.0,
                 maxDeltaStep=0.0,
                 scoreTreeInterval=0,
                 initialScoreInterval=4000,
                 scoreInterval=4000,
                 minSplitImprovement=0.0,
                 gamma=0.0,
                 nthread=-1,
                 maxBins=256,
                 maxLeaves=0,
                 minSumHessianInLeaf=100.0,
                 minDataInLeaf=0.0,
                 treeMethod="auto",
                 growPolicy="depthwise",
                 booster="gbtree",
                 dmatrixType="auto",
                 regLambda=0.0,
                 regAlpha=0.0,
                 sampleType="uniform",
                 normalizeType="tree",
                 rateDrop=0.0,
                 oneDrop=False,
                 skipDrop=0.0,
                 gpuId=0,
                 backend="auto",
                 foldCol=None,
                 predictionCol="prediction",
                 detailedPredictionCol="detailed_prediction",
                 withDetailedPredictionCol=False,
                 convertInvalidNumbersToNa=False,
                 **deprecatedArgs):
        Initializer.load_sparkling_jar()
        super(H2OXGBoost, self).__init__()
        self._java_obj = self._new_java_obj(
            "ai.h2o.sparkling.ml.algos.H2OXGBoost", self.uid)

        self._setDefault(modelId=None,
                         splitRatio=1.0,
                         labelCol="label",
                         weightCol=None,
                         featuresCols=[],
                         allStringColumnsToCategorical=True,
                         columnsToCategorical=[],
                         nfolds=0,
                         keepCrossValidationPredictions=False,
                         keepCrossValidationFoldAssignment=False,
                         parallelizeCrossValidation=True,
                         seed=-1,
                         distribution="AUTO",
                         convertUnknownCategoricalLevelsToNa=False,
                         quietMode=True,
                         ntrees=50,
                         nEstimators=0,
                         maxDepth=6,
                         minRows=1.0,
                         minChildWeight=1.0,
                         learnRate=0.3,
                         eta=0.3,
                         learnRateAnnealing=1.0,
                         sampleRate=1.0,
                         subsample=1.0,
                         colSampleRate=1.0,
                         colSampleByLevel=1.0,
                         colSampleRatePerTree=1.0,
                         colsampleBytree=1.0,
                         maxAbsLeafnodePred=0.0,
                         maxDeltaStep=0.0,
                         scoreTreeInterval=0,
                         initialScoreInterval=4000,
                         scoreInterval=4000,
                         minSplitImprovement=0.0,
                         gamma=0.0,
                         nthread=-1,
                         maxBins=256,
                         maxLeaves=0,
                         minSumHessianInLeaf=100.0,
                         minDataInLeaf=0.0,
                         treeMethod="auto",
                         growPolicy="depthwise",
                         booster="gbtree",
                         dmatrixType="auto",
                         regLambda=0.0,
                         regAlpha=0.0,
                         sampleType="uniform",
                         normalizeType="tree",
                         rateDrop=0.0,
                         oneDrop=False,
                         skipDrop=0.0,
                         gpuId=0,
                         backend="auto",
                         foldCol=None,
                         predictionCol="prediction",
                         detailedPredictionCol="detailed_prediction",
                         withDetailedPredictionCol=False,
                         convertInvalidNumbersToNa=False)

        kwargs = get_input_kwargs(self)
        self.setParams(**kwargs)
Exemple #17
0
 def setParams(self, keep=False, columns=[]):
     kwargs = get_input_kwargs(self)
     return self._set(**kwargs)