예제 #1
0
    def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                  nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False,parallelizeCrossValidation=True,
                  seed=-1, distribution="AUTO", ntrees=50, maxDepth=5, minRows=10.0, nbins=20, nbinsCats=1024, minSplitImprovement=1e-5,
                  histogramType="AUTO", r2Stopping=java_max_double_value,
                  nbinsTopLevel=1<<10, buildTreeOneNode=False, scoreTreeInterval=0,
                  sampleRate=1.0, sampleRatePerClass=None, colSampleRateChangePerLevel=1.0, colSampleRatePerTree=1.0,
                  learnRate=0.1, learnRateAnnealing=1.0, colSampleRate=1.0, maxAbsLeafnodePred=java_max_double_value,
                  predNoiseBandwidth=0.0, convertUnknownCategoricalLevelsToNa=False, foldCol=None, predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False, **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution")
        validateEnumValue(self._H2OSharedTreeParams__getHistogramTypeEnum(), kwargs, "histogramType")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["minRows", "predNoiseBandwidth", "splitRatio", "learnRate", "colSampleRate", "learnRateAnnealing", "maxAbsLeafnodePred"
                                                                                                                           "minSplitImprovement", "r2Stopping", "sampleRate", "colSampleRateChangePerLevel", "colSampleRatePerTree"]
        set_double_values(kwargs, double_types)

        # We need to also map all doubles in the arrays
        arrayToDoubleArray("sampleRatePerClass", kwargs)

        return self._set(**kwargs)
예제 #2
0
    def setParams(self,
                  featuresCols=[],
                  labelCol="label",
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  splitRatio=1.0,
                  foldCol=None,
                  weightCol=None,
                  ignoredCols=[],
                  includeAlgos=None,
                  excludeAlgos=None,
                  projectName=None,
                  maxRuntimeSecs=3600.0,
                  stoppingRounds=3,
                  stoppingTolerance=0.001,
                  stoppingMetric="AUTO",
                  nfolds=5,
                  convertUnknownCategoricalLevelsToNa=True,
                  seed=-1,
                  sortMetric="AUTO",
                  balanceClasses=False,
                  classSamplingFactors=None,
                  maxAfterBalanceSize=5.0,
                  keepCrossValidationPredictions=True,
                  keepCrossValidationModels=True,
                  maxModels=0,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False,
                  **deprecatedArgs):

        kwargs = get_input_kwargs(self)

        validateEnumValues(self._H2OAutoMLParams__getAutomlAlgoEnum(),
                           kwargs,
                           "includeAlgos",
                           nullEnabled=True)
        validateEnumValues(self._H2OAutoMLParams__getAutomlAlgoEnum(),
                           kwargs,
                           "excludeAlgos",
                           nullEnabled=True)
        validateEnumValue(self._H2OAutoMLParams__getStoppingMetricEnum(),
                          kwargs, "stoppingMetric")
        validateEnumValue(self._H2OAutoMLParams__getSortMetricEnum(), kwargs,
                          "sortMetric")

        if "projectName" in kwargs and kwargs["projectName"] is None:
            kwargs["projectName"] = ''.join(
                random.choice(string.ascii_letters) for i in range(30))

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = [
            "maxRuntimeSecs", "stoppingTolerance", "splitRatio",
            "maxAfterBalanceSize"
        ]
        set_double_values(kwargs, double_types)
        return self._set(**kwargs)
예제 #3
0
    def setParams(self, foldCol=None, labelCol="label", inputCols=[], holdoutStrategy = "None",
                  blendedAvgEnabled=False, blendedAvgInflectionPoint=10.0, blendedAvgSmoothing=20.0, noise=0.01, noiseSeed=-1):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self.__getHoldoutStrategyEnumName(), kwargs, "holdoutStrategy")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["blendedAvgInflectionPoint", "blendedAvgSmoothing", "noise"]
        set_double_values(kwargs, double_types)

        return self._set(**kwargs)
    def setParams(self, modelId=None, splitRatio=1.0, labelCol="label", weightCol=None, featuresCols=[], allStringColumnsToCategorical=True, columnsToCategorical=[],
                  nfolds=0, keepCrossValidationPredictions=False, keepCrossValidationFoldAssignment=False, parallelizeCrossValidation=True,
                  seed=-1, distribution="AUTO", epochs=10.0, l1=0.0, l2=0.0, hidden=[200,200], reproducible=False, convertUnknownCategoricalLevelsToNa=False,
                  foldCol=None, predictionCol="prediction", detailedPredictionCol="detailed_prediction", withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False, **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(), kwargs, "distribution")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["splitRatio", "epochs", "l1", "l2"]
        set_double_values(kwargs, double_types)

        return self._set(**kwargs)
예제 #5
0
    def setParams(self,
                  featuresCols=[],
                  algo=None,
                  splitRatio=1.0,
                  hyperParameters={},
                  labelCol="label",
                  weightCol=None,
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  strategy="Cartesian",
                  maxRuntimeSecs=0.0,
                  maxModels=0,
                  seed=-1,
                  stoppingRounds=0,
                  stoppingTolerance=0.001,
                  stoppingMetric="AUTO",
                  nfolds=0,
                  selectBestModelBy="AUTO",
                  selectBestModelDecreasing=True,
                  foldCol=None,
                  convertUnknownCategoricalLevelsToNa=True,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False,
                  **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OGridSearchParams__getStrategyEnum(), kwargs,
                          "strategy")
        validateEnumValue(self._H2OGridSearchParams__getStoppingMetricEnum(),
                          kwargs, "stoppingMetric")
        validateEnumValue(
            self._H2OGridSearchParams__getSelectBestModelByEnum(), kwargs,
            "selectBestModelBy")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["splitRatio", "stoppingTolerance", "maxRuntimeSecs"]
        set_double_values(kwargs, double_types)
        if "algo" in kwargs and kwargs["algo"] is not None:
            tmp = kwargs["algo"]
            del kwargs['algo']
            self._java_obj.setAlgo(tmp._java_obj)

        return self._set(**kwargs)
예제 #6
0
    def setParams(self,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  featuresCols=[],
                  foldCol=None,
                  weightCol=None,
                  splitRatio=1.0,
                  seed=-1,
                  nfolds=0,
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  convertUnknownCategoricalLevelsToNa=False,
                  convertInvalidNumbersToNa=False,
                  modelId=None,
                  keepCrossValidationPredictions=False,
                  keepCrossValidationFoldAssignment=False,
                  parallelizeCrossValidation=True,
                  distribution="AUTO",
                  maxIterations=10,
                  standardize=True,
                  init="Furthest",
                  userPoints=None,
                  estimateK=False,
                  k=2,
                  **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(),
                          kwargs, "distribution")
        validateEnumValue(self._H2OKMeansParams__getInitEnum(), kwargs, "init")

        # We need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = ["splitRatio"]
        set_double_values(kwargs, double_types)

        if "userPoints" in kwargs:
            kwargs["userPoints"] = getDoubleArrayArrayFromIntArrayArray(
                kwargs["userPoints"])

        return self._set(**kwargs)
예제 #7
0
    def setParams(self,
                  modelId=None,
                  splitRatio=1.0,
                  labelCol="label",
                  weightCol=None,
                  featuresCols=[],
                  allStringColumnsToCategorical=True,
                  columnsToCategorical=[],
                  nfolds=0,
                  keepCrossValidationPredictions=False,
                  keepCrossValidationFoldAssignment=False,
                  parallelizeCrossValidation=True,
                  seed=-1,
                  distribution="AUTO",
                  convertUnknownCategoricalLevelsToNa=False,
                  quietMode=True,
                  ntrees=50,
                  nEstimators=0,
                  maxDepth=6,
                  minRows=1.0,
                  minChildWeight=1.0,
                  learnRate=0.3,
                  eta=0.3,
                  learnRateAnnealing=1.0,
                  sampleRate=1.0,
                  subsample=1.0,
                  colSampleRate=1.0,
                  colSampleByLevel=1.0,
                  colSampleRatePerTree=1.0,
                  colsampleBytree=1.0,
                  maxAbsLeafnodePred=0.0,
                  maxDeltaStep=0.0,
                  scoreTreeInterval=0,
                  initialScoreInterval=4000,
                  scoreInterval=4000,
                  minSplitImprovement=0.0,
                  gamma=0.0,
                  nthread=-1,
                  maxBins=256,
                  maxLeaves=0,
                  minSumHessianInLeaf=100.0,
                  minDataInLeaf=0.0,
                  treeMethod="auto",
                  growPolicy="depthwise",
                  booster="gbtree",
                  dmatrixType="auto",
                  regLambda=0.0,
                  regAlpha=0.0,
                  sampleType="uniform",
                  normalizeType="tree",
                  rateDrop=0.0,
                  oneDrop=False,
                  skipDrop=0.0,
                  gpuId=0,
                  backend="auto",
                  foldCol=None,
                  predictionCol="prediction",
                  detailedPredictionCol="detailed_prediction",
                  withDetailedPredictionCol=False,
                  convertInvalidNumbersToNa=False,
                  **deprecatedArgs):
        kwargs = get_input_kwargs(self)

        validateEnumValue(self._H2OAlgoCommonParams__getDistributionEnum(),
                          kwargs, "distribution")
        validateEnumValue(self._H2OXGBoostParams__getTreeMethodEnum(), kwargs,
                          "treeMethod")
        validateEnumValue(self._H2OXGBoostParams__getGrowPolicyEnum(), kwargs,
                          "growPolicy")
        validateEnumValue(self._H2OXGBoostParams__getBoosterEnum(), kwargs,
                          "booster")
        validateEnumValue(self._H2OXGBoostParams__getDmatrixTypeEnum(), kwargs,
                          "dmatrixType")
        validateEnumValue(self._H2OXGBoostParams__getSampleTypeEnum(), kwargs,
                          "sampleType")
        validateEnumValue(self._H2OXGBoostParams__getNormalizeTypeEnum(),
                          kwargs, "normalizeType")
        validateEnumValue(self._H2OXGBoostParams__getBackendEnum(), kwargs,
                          "backend")

        # we need to convert double arguments manually to floats as if we assign integer to double, py4j thinks that
        # the whole type is actually int and we get class cast exception
        double_types = [
            "splitRatio", "minRows", "minChildWeight", "learnRate", "eta",
            "learnRateAnnealing"
            "sampleRate", "subsample", "colSampleRate", "colSampleByLevel",
            "colSampleRatePerTree", "colsampleBytree", "maxAbsLeafnodePred",
            "maxDeltaStep", "minSplitImprovement", "gamma",
            "minSumHessianInLeaf", "minDataInLeaf", "regLambda", "regAlpha",
            "rateDrop", "skipDrop"
        ]
        set_double_values(kwargs, double_types)
        return self._set(**kwargs)