예제 #1
0
    def __init__(self, sklearnEstimator=None, keyCols=None, xCol=None, outputCol=None, yCol=None,
                 estimatorType=None, keyedSklearnEstimators=None, outputType=None):
        """The constructor is used by :class:`KeyedEstimator` to generate a :class:`KeyedModel`; it
        is not intended for external use."""

        assert (estimatorType == "predictor") == (yCol is not None), \
            "yCol is {}, but it should {}be None for a {} estimatorType".format(
                yCol, "not " if isLabelled else "", estimatorType)
        assert estimatorType in ["transformer", "clusterer", "predictor"], estimatorType

        def implies(a, b):
            return not a or b
        assert implies(estimatorType == "transformer", outputType == Vector.__UDT__), outputType
        assert implies(estimatorType == "clusterer", outputType == LongType()), outputType
        assert len(keyCols) > 0, len(keyCols)
        assert set(keyedSklearnEstimators.columns) == (set(keyCols) | {"estimator"}), \
            "keyedSklearnEstimator columns {} should have both key columns {} and " + \
            "an estimator column".format(keyedSklearnEstimators.columns, keyCols)

        # The superclass expects Param attributes to already be set, so we only init it after
        # doing so.
        for paramName, paramSpec in KeyedModel._paramSpecs.items():
            setattr(self, paramName, Param(Params._dummy(), paramName, paramSpec["doc"]))
        super(KeyedModel, self).__init__()
        if yCol and type(outputType) not in KeyedModel._sql_types:
            raise TypeError("Output type {} is not an AtomicType (expected for {} estimator)"
                            .format(outputType, estimatorType))
        self._set(**self._input_kwargs)
예제 #2
0
파일: tuning.py 프로젝트: 308306362/spark
 def copy(self, extra={}):
     newCV = Params.copy(self, extra)
     if self.isSet(self.estimator):
         newCV.setEstimator(self.getEstimator().copy(extra))
     # estimatorParamMaps remain the same
     if self.isSet(self.evaluator):
         newCV.setEvaluator(self.getEvaluator().copy(extra))
     return newCV
예제 #3
0
파일: tuning.py 프로젝트: EntilZha/spark
 def copy(self, extra=None):
     if extra is None:
         extra = dict()
     newCV = Params.copy(self, extra)
     if self.isSet(self.estimator):
         newCV.setEstimator(self.getEstimator().copy(extra))
     # estimatorParamMaps remain the same
     if self.isSet(self.evaluator):
         newCV.setEvaluator(self.getEvaluator().copy(extra))
     return newCV
예제 #4
0
    def __init__(self, sklearnEstimator=None, keyCols=["key"], xCol="features",
                 outputCol="output", yCol=None, estimatorType=None):
        """For all instances, the ordered list of ``keyCols`` determine the set of groups which each
        ``sklearnEstimator`` is applied to.

        For every unique ``keyCols`` value, the remaining columns are aggregated and used to train
        the scikit-learn estimator.

        ``estimatorType`` inference is conducted as follows: if ``yCol`` is specified, then this is
        assumed to be of ``"predictor"`` type, else a ``"transformer"`` or a ``"clusterer"``,
        depending on the estimator having the ``transform()`` or ``fit_predict()`` attributes, with
        ``"clusterer"`` being chosen in case both attributes are present.

        :param sklearnEstimator: An instance of a scikit-learn estimator, with parameters configured
                                 as desired for each user.
        :param keyCols: Key column names list used to group data to which models are applied, where
                        order implies lexicographical importance.
        :param xCol: Name of column of input features used for training and
                     transformation/prediction.
        :param yCol: Specifies name of label column for regression or classification pipelines.
                     Required for predictors, must be unspecified or ``None`` for transformers.
        :param estimatorType: Identifies the type of scikit-learn estimator being used, which
                              changes the interface the ``sklearnEstimator`` is expected to have.
                              This parameter's value is inferred using reflection by default,
                              but may be manually overriden.

        :raise ValueError: if ``sklearnEstimator`` is ``None``.
        :raise ValueError: if ``sklearnEstimator`` does not derive from
                           ``sklearn.base.BaseEstimator``.
        :raise ValueError: if ``keyCols`` is empty.
        :raise ValueError: if any column has the name ``"estimator"``
        :raise AttributeError: if reflection checks indicate that parameter estimator is not equipped
                               with a ``fit()`` method.
        """
        if sklearnEstimator is None:
            raise ValueError("sklearnEstimator should be specified")
        if not isinstance(sklearnEstimator, sklearn.base.BaseEstimator):
            raise ValueError("sklearnEstimator should be an sklearn.base.BaseEstimator")
        if len(keyCols) == 0:
            raise ValueError("keyCols should not be empty")
        if "estimator" in keyCols + [xCol, yCol]:
            raise ValueError("keyCols should not contain a column named \"estimator\"")

        # The superclass expects Param attributes to already be set, so we only init it after
        # doing so.
        for paramName, paramSpec in KeyedEstimator._paramSpecs.items():
            setattr(self, paramName, Param(Params._dummy(), paramName, paramSpec["doc"]))
        super(KeyedEstimator, self).__init__()
        self._setDefault(**{paramName: paramSpec["default"]
                            for paramName, paramSpec in KeyedEstimator._paramSpecs.items()
                            if "default" in paramSpec})
        kwargs = KeyedEstimator._inferredParams(sklearnEstimator, self._input_kwargs)
        self._set(**kwargs)

        self._verifyEstimatorType()
예제 #5
0
    def copy(self, extra=None):
        """
        Creates a copy of this instance.

        :param extra: extra parameters
        :returns: new instance
        """
        if extra is None:
            extra = dict()
        that = Params.copy(self, extra)
        stages = [stage.copy(extra) for stage in that.getStages()]
        return that.setStages(stages)
예제 #6
0
파일: tuning.py 프로젝트: ksakellis/spark
 def copy(self, extra=None):
     """
     Creates a copy of this instance with a randomly generated uid
     and some extra params. This copies creates a deep copy of
     the embedded paramMap, and copies the embedded and extra parameters over.
     :param extra: Extra parameters to copy to the new instance
     :return: Copy of this instance
     """
     if extra is None:
         extra = dict()
     newCV = Params.copy(self, extra)
     if self.isSet(self.estimator):
         newCV.setEstimator(self.getEstimator().copy(extra))
     # estimatorParamMaps remain the same
     if self.isSet(self.evaluator):
         newCV.setEvaluator(self.getEvaluator().copy(extra))
     return newCV
예제 #7
0
class HasStepSizeCol(Params):
    """
    Mixin for step size parameter
    """

    stepSizeCol = Param(
        Params._dummy(),
        "stepSizeCol",
        "stepSize parameter from dataframe column instead of a constant value across all samples",
        TypeConverters.toString)

    def __init__(self):
        super(HasStepSizeCol, self).__init__()

    def getStepSizeCol(self):
        """
        Gets the value of step size or its default value
        """
        return self.getOrDefault(self.getStepSizeCol)
예제 #8
0
    def test_params(self):
        testParams = TestParams()
        maxIter = testParams.maxIter
        inputCol = testParams.inputCol
        seed = testParams.seed

        params = testParams.params
        self.assertEqual(params, [inputCol, maxIter, seed])

        self.assertTrue(testParams.hasParam(maxIter.name))
        self.assertTrue(testParams.hasDefault(maxIter))
        self.assertFalse(testParams.isSet(maxIter))
        self.assertTrue(testParams.isDefined(maxIter))
        self.assertEqual(testParams.getMaxIter(), 10)

        self.assertTrue(testParams.hasParam(inputCol.name))
        self.assertFalse(testParams.hasDefault(inputCol))
        self.assertFalse(testParams.isSet(inputCol))
        self.assertFalse(testParams.isDefined(inputCol))
        with self.assertRaises(KeyError):
            testParams.getInputCol()

        otherParam = Param(
            Params._dummy(),
            "otherParam",
            "Parameter used to test that " +
            "set raises an error for a non-member parameter.",
            typeConverter=TypeConverters.toString,
        )
        with self.assertRaises(ValueError):
            testParams.set(otherParam, "value")

        # Since the default is normally random, set it to a known number for debug str
        testParams._setDefault(seed=41)

        self.assertEqual(
            testParams.explainParams(),
            "\n".join([
                "inputCol: input column name. (undefined)",
                "maxIter: max number of iterations (>= 0). (default: 10)",
                "seed: random seed. (default: 41)",
            ]),
        )
class HasInputCol(Params):
    """
    Mixin for param inputCol: input column name.
    """

    inputCol = Param(Params._dummy(), "inputCol", "input column name.",
                     typeConverter=TypeConverters.toString)

    def setInputCol(self, value):
        """
        Sets the value of :py:attr:`inputCol`.
        """
        return self._set(inputCol=value)

    def getInputCol(self):
        """
        Gets the value of inputCol or its default value.
        """
        return self.getOrDefault(self.inputCol)
class HasTFHParams(Params):
    """
    Mixin for TensorFlow model hyper-parameters
    """
    tfHParams = Param(Params._dummy(), "hparams",
                      textwrap.dedent("""\
                      instance of :class:`tf.contrib.training.HParams`, a namespace-like
                      key-value object, storing parameters to be used to define the final
                      TensorFlow graph for the Transformer.

                      Currently used values are:
                      - `batch_size`: number of samples evaluated together in inference steps"""),
                      typeConverter=SparkDLTypeConverters.toTFHParams)

    def setTFHParams(self, value):
        return self._set(tfHParam=value)

    def getTFHParams(self):
        return self.getOrDefault(self.tfHParams)
class _HasRegularizationMatrixCol(Params):
    """
    Mixin for param regularization matrix column.
    """

    regularizationMatrixCol = Param(
        Params._dummy(),
        "regularizationMatrixCol",
        "Regularization matrix column for specifying different reg matrices across filters",
        typeConverter=TypeConverters.toString)

    def __init__(self):
        super(_HasRegularizationMatrixCol, self).__init__()

    def getRegularizationMatrixCol(self):
        """
        Gets the value of regularization matrix column or its default value.
        """
        return self.getOrDefault(self.regularizationMatrixCol)
예제 #12
0
파일: tuning.py 프로젝트: zymap/spark
class _TrainValidationSplitParams(_ValidatorParams):
    """
    Params for :py:class:`TrainValidationSplit` and :py:class:`TrainValidationSplitModel`.

    .. versionadded:: 3.0.0
    """

    trainRatio = Param(Params._dummy(),
                       "trainRatio",
                       "Param for ratio between train and\
     validation data. Must be between 0 and 1.",
                       typeConverter=TypeConverters.toFloat)

    @since("2.0.0")
    def getTrainRatio(self):
        """
        Gets the value of trainRatio or its default value.
        """
        return self.getOrDefault(self.trainRatio)
예제 #13
0
class HasTFInputGraph(Params):
    """
    Mixin for param tfInputGraph: a serializable object derived from a TensorFlow computation graph.
    """
    tfInputGraph = Param(
        Params._dummy(),
        "tfInputGraph",
        "A serializable object derived from a TensorFlow computation graph",
        typeConverter=SparkDLTypeConverters.toTFInputGraph)

    def __init__(self):
        super(HasTFInputGraph, self).__init__()
        self._setDefault(tfInputGraph=None)

    def setTFInputGraph(self, value):
        return self._set(tfInputGraph=value)

    def getTFInputGraph(self):
        return self.getOrDefault(self.tfInputGraph)
예제 #14
0
class HasLabelCol(Params):
    """
    When training Keras image models in a supervised learning setting,
    users will provide a :py:obj:`DataFrame` column with the labels.

    .. note:: The Estimator expect this columnd to contain data directly usable for the Keras model.
              This usually means that the labels are already encoded in one-hot format.
              Please consider adding a :py:obj:`OneHotEncoder` to transform the label column.
    """
    labelCol = Param(Params._dummy(),
                     "labelCol",
                     "name of the column storing the training data labels",
                     typeConverter=TypeConverters.toString)

    def setLabelCol(self, value):
        return self._set(labelCol=value)

    def getLabelCol(self):
        return self.getOrDefault(self.labelCol)
예제 #15
0
class HasEmbeddingsProperties(Params):
    dimension = Param(Params._dummy(),
                      "dimension",
                      "Number of embedding dimensions",
                      typeConverter=TypeConverters.toInt)

    def setDimension(self, value):
        """Sets embeddings dimension.

        Parameters
        ----------
        value : int
            Embeddings dimension
        """
        return self._set(dimension=value)

    def getDimension(self):
        """Gets embeddings dimension."""
        return self.getOrDefault(self.dimension)
class _HasLearningRate(Params):
    """
    Mixin for param Normalized LMS learning rate
    """

    learningRate = Param(
        Params._dummy(),
        "learningRate",
        "Learning rate for Normalized LMS. If there is no interference, the default value of 1.0 is optimal",
        typeConverter=TypeConverters.toFloat)

    def __init__(self):
        super(_HasLearningRate, self).__init__()

    def getLearningRate(self):
        """
        Gets the value of learning rate or its default value.
        """
        return self.getOrDefault(self.learningRate)
class _HasRegularizationConstant(Params):
    """
    Mixin for param for regularization constant.
    """

    regularizationConstant = Param(
        Params._dummy(),
        "regularizationConstant",
        "Regularization term for stability, default is 1.0",
        typeConverter=TypeConverters.toFloat)

    def __init__(self):
        super(_HasRegularizationConstant, self).__init__()

    def getRegularizationConstant(self):
        """
        Gets the value of regularization constant or its default value.
        """
        return self.getOrDefault(self.regularizationConstant)
예제 #18
0
class HasInputCol(Params):
    """
    Mixin for param inputCol: input column name.
    """

    inputCol: "Param[str]" = Param(
        Params._dummy(),
        "inputCol",
        "input column name.",
        typeConverter=TypeConverters.toString,
    )

    def __init__(self) -> None:
        super(HasInputCol, self).__init__()

    def getInputCol(self) -> str:
        """
        Gets the value of inputCol or its default value.
        """
        return self.getOrDefault(self.inputCol)
예제 #19
0
class HasMaxIter(Params):
    """
    Mixin for param maxIter: max number of iterations (>= 0).
    """

    maxIter: "Param[int]" = Param(
        Params._dummy(),
        "maxIter",
        "max number of iterations (>= 0).",
        typeConverter=TypeConverters.toInt,
    )

    def __init__(self) -> None:
        super(HasMaxIter, self).__init__()

    def getMaxIter(self) -> int:
        """
        Gets the value of maxIter or its default value.
        """
        return self.getOrDefault(self.maxIter)
예제 #20
0
class HasCheckpointInterval(Params):
    """
    Mixin for param checkpointInterval: set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.
    """

    checkpointInterval: "Param[int]" = Param(
        Params._dummy(),
        "checkpointInterval",
        "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.",
        typeConverter=TypeConverters.toInt,
    )

    def __init__(self) -> None:
        super(HasCheckpointInterval, self).__init__()

    def getCheckpointInterval(self) -> int:
        """
        Gets the value of checkpointInterval or its default value.
        """
        return self.getOrDefault(self.checkpointInterval)
예제 #21
0
class HasValidationIndicatorCol(Params):
    """
    Mixin for param validationIndicatorCol: name of the column that indicates whether each row is for training or for validation. False indicates training; true indicates validation.
    """

    validationIndicatorCol: "Param[str]" = Param(
        Params._dummy(),
        "validationIndicatorCol",
        "name of the column that indicates whether each row is for training or for validation. False indicates training; true indicates validation.",
        typeConverter=TypeConverters.toString,
    )

    def __init__(self) -> None:
        super(HasValidationIndicatorCol, self).__init__()

    def getValidationIndicatorCol(self) -> str:
        """
        Gets the value of validationIndicatorCol or its default value.
        """
        return self.getOrDefault(self.validationIndicatorCol)
예제 #22
0
class HasLoss(Params):
    """
    Mixin for param loss: the loss function to be optimized.
    """

    loss: "Param[str]" = Param(
        Params._dummy(),
        "loss",
        "the loss function to be optimized.",
        typeConverter=TypeConverters.toString,
    )

    def __init__(self) -> None:
        super(HasLoss, self).__init__()

    def getLoss(self) -> str:
        """
        Gets the value of loss or its default value.
        """
        return self.getOrDefault(self.loss)
예제 #23
0
class HasVarianceCol(Params):
    """
    Mixin for param varianceCol: column name for the biased sample variance of prediction.
    """

    varianceCol: "Param[str]" = Param(
        Params._dummy(),
        "varianceCol",
        "column name for the biased sample variance of prediction.",
        typeConverter=TypeConverters.toString,
    )

    def __init__(self) -> None:
        super(HasVarianceCol, self).__init__()

    def getVarianceCol(self) -> str:
        """
        Gets the value of varianceCol or its default value.
        """
        return self.getOrDefault(self.varianceCol)
예제 #24
0
class HasWeightCol(Params):
    """
    Mixin for param weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0.
    """

    weightCol: "Param[str]" = Param(
        Params._dummy(),
        "weightCol",
        "weight column name. If this is not set or empty, we treat all instance weights as 1.0.",
        typeConverter=TypeConverters.toString,
    )

    def __init__(self) -> None:
        super(HasWeightCol, self).__init__()

    def getWeightCol(self) -> str:
        """
        Gets the value of weightCol or its default value.
        """
        return self.getOrDefault(self.weightCol)
예제 #25
0
class HasThresholds(Params):
    """
    Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.
    """

    thresholds: "Param[List[float]]" = Param(
        Params._dummy(),
        "thresholds",
        "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.",
        typeConverter=TypeConverters.toListFloat,
    )

    def __init__(self) -> None:
        super(HasThresholds, self).__init__()

    def getThresholds(self) -> List[float]:
        """
        Gets the value of thresholds or its default value.
        """
        return self.getOrDefault(self.thresholds)
예제 #26
0
class HasRegParam(Params):
    """
    Mixin for param regParam: regularization parameter (>= 0).
    """

    regParam: "Param[float]" = Param(
        Params._dummy(),
        "regParam",
        "regularization parameter (>= 0).",
        typeConverter=TypeConverters.toFloat,
    )

    def __init__(self) -> None:
        super(HasRegParam, self).__init__()

    def getRegParam(self) -> float:
        """
        Gets the value of regParam or its default value.
        """
        return self.getOrDefault(self.regParam)
예제 #27
0
class HasHandleInvalid(Params):
    """
    Mixin for param handleInvalid: how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later.
    """

    handleInvalid: "Param[str]" = Param(
        Params._dummy(),
        "handleInvalid",
        "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later.",
        typeConverter=TypeConverters.toString,
    )

    def __init__(self) -> None:
        super(HasHandleInvalid, self).__init__()

    def getHandleInvalid(self) -> str:
        """
        Gets the value of handleInvalid or its default value.
        """
        return self.getOrDefault(self.handleInvalid)
예제 #28
0
class HasTol(Params):
    """
    Mixin for param tol: the convergence tolerance for iterative algorithms (>= 0).
    """

    tol: "Param[float]" = Param(
        Params._dummy(),
        "tol",
        "the convergence tolerance for iterative algorithms (>= 0).",
        typeConverter=TypeConverters.toFloat,
    )

    def __init__(self) -> None:
        super(HasTol, self).__init__()

    def getTol(self) -> float:
        """
        Gets the value of tol or its default value.
        """
        return self.getOrDefault(self.tol)
예제 #29
0
class HasUpdateHoldout(Params):
    """
    Mixin for update holdout parameter
    """

    updateHoldout = Param(
        Params._dummy(),
        "updateHoldout",
        "Controls after how many samples the mixture will start calculating estimates. Preventing update" +
        "in first few samples might be preferred for stability.",
        TypeConverters.toInt)

    def __init__(self):
        super(HasUpdateHoldout, self).__init__()

    def getUpdateHoldout(self):
        """
        Gets the value of update holdout or its default value
        """
        return self.getOrDefault(self.updateHoldout)
예제 #30
0
class HasBlockSize(Params):
    """
    Mixin for param blockSize: block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data.
    """

    blockSize: "Param[int]" = Param(
        Params._dummy(),
        "blockSize",
        "block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data.",
        typeConverter=TypeConverters.toInt,
    )

    def __init__(self) -> None:
        super(HasBlockSize, self).__init__()

    def getBlockSize(self) -> int:
        """
        Gets the value of blockSize or its default value.
        """
        return self.getOrDefault(self.blockSize)
예제 #31
0
class HasStepSize(Params):
    """
    Mixin for param stepSize: Step size to be used for each iteration of optimization (>= 0).
    """

    stepSize: "Param[float]" = Param(
        Params._dummy(),
        "stepSize",
        "Step size to be used for each iteration of optimization (>= 0).",
        typeConverter=TypeConverters.toFloat,
    )

    def __init__(self) -> None:
        super(HasStepSize, self).__init__()

    def getStepSize(self) -> float:
        """
        Gets the value of stepSize or its default value.
        """
        return self.getOrDefault(self.stepSize)
예제 #32
0
class HasOutputCols(Params):
    """
    Mixin for param outputCols: output column names.
    """

    outputCols: "Param[List[str]]" = Param(
        Params._dummy(),
        "outputCols",
        "output column names.",
        typeConverter=TypeConverters.toListString,
    )

    def __init__(self) -> None:
        super(HasOutputCols, self).__init__()

    def getOutputCols(self) -> List[str]:
        """
        Gets the value of outputCols or its default value.
        """
        return self.getOrDefault(self.outputCols)
예제 #33
0
class HasBatchTrainEnabled(Params):
    """
    Mixin for enabling batch EM train mode
    """

    batchTrainEnabled = Param(
        Params._dummy(), "batchTrainEnabled",
        "Flag to enable batch EM. Unless enabled, the transformer will do online EM. Online EM can be done with"
        +
        "both streaming and batch dataframes, whereas batch EM can only be done with batch dataframes. Default is false",
        TypeConverters.toBoolean)

    def __init__(self):
        super(HasBatchTrainEnabled, self).__init__()

    def getBatchTrainEnabled(self):
        """
        Gets the value of batch train flag or its default value
        """
        return self.getOrDefault(self.batchTrainEnabled)
예제 #34
0
class HasCalculateMahalanobis(Params):
    """
    Mixin for param for enabling mahalanobis calculation.
    """

    calculateMahalanobis = Param(
        Params._dummy(),
        "calculateMahalanobis",
        "When true, mahalanobis distance of residual will be calculated & added to output DataFrame."
        + "Default is false.",
        typeConverter=TypeConverters.toBoolean)

    def __init__(self):
        super(HasCalculateMahalanobis, self).__init__()

    def getCalculateMahalanobis(self):
        """
        Gets the value of mahalanobis calcuation flag.
        """
        return self.getOrDefault(self.calculateMahalanobis)
예제 #35
0
class HasProcessNoiseCol(Params):
    """
    Mixin for param for process noise column.
    """

    processNoiseCol = Param(
        Params._dummy(),
        "processNoiseCol",
        "Column name for specifying process noise from input DataFrame rather than"
        + "a constant measurement noise for all filters",
        typeConverter=TypeConverters.toString)

    def __init__(self):
        super(HasProcessNoiseCol, self).__init__()

    def getProcessNoiseCol(self):
        """
        Gets the value of process noise column or its default value.
        """
        return self.getOrDefault(self.processNoiseCol)
예제 #36
0
class HasMeasurementModelCol(Params):
    """
    Mixin for param for measurement model column.
    """

    measurementModelCol = Param(
        Params._dummy(),
        "measurementModelCol",
        "Column name for specifying measurement model from input DataFrame rather than"
        + "a constant measurement model for all filters",
        typeConverter=TypeConverters.toString)

    def __init__(self):
        super(HasMeasurementModelCol, self).__init__()

    def getMeasurementModelCol(self):
        """
        Gets the value of measurement model column or its default value.
        """
        return self.getOrDefault(self.measurementModelCol)
예제 #37
0
파일: test_param.py 프로젝트: Brett-A/spark
    def test_params(self):
        testParams = TestParams()
        maxIter = testParams.maxIter
        inputCol = testParams.inputCol
        seed = testParams.seed

        params = testParams.params
        self.assertEqual(params, [inputCol, maxIter, seed])

        self.assertTrue(testParams.hasParam(maxIter.name))
        self.assertTrue(testParams.hasDefault(maxIter))
        self.assertFalse(testParams.isSet(maxIter))
        self.assertTrue(testParams.isDefined(maxIter))
        self.assertEqual(testParams.getMaxIter(), 10)
        testParams.setMaxIter(100)
        self.assertTrue(testParams.isSet(maxIter))
        self.assertEqual(testParams.getMaxIter(), 100)

        self.assertTrue(testParams.hasParam(inputCol.name))
        self.assertFalse(testParams.hasDefault(inputCol))
        self.assertFalse(testParams.isSet(inputCol))
        self.assertFalse(testParams.isDefined(inputCol))
        with self.assertRaises(KeyError):
            testParams.getInputCol()

        otherParam = Param(Params._dummy(), "otherParam", "Parameter used to test that " +
                           "set raises an error for a non-member parameter.",
                           typeConverter=TypeConverters.toString)
        with self.assertRaises(ValueError):
            testParams.set(otherParam, "value")

        # Since the default is normally random, set it to a known number for debug str
        testParams._setDefault(seed=41)
        testParams.setSeed(43)

        self.assertEqual(
            testParams.explainParams(),
            "\n".join(["inputCol: input column name. (undefined)",
                       "maxIter: max number of iterations (>= 0). (default: 10, current: 100)",
                       "seed: random seed. (default: 41, current: 43)"]))
예제 #38
0
파일: pipeline.py 프로젝트: dalonso82/spark
 def copy(self, extra=None):
     if extra is None:
         extra = dict()
     that = Params.copy(self, extra)
     stages = [stage.copy(extra) for stage in that.getStages()]
     return that.setStages(stages)
예제 #39
0
파일: pipeline.py 프로젝트: 308306362/spark
 def copy(self, extra={}):
     that = Params.copy(self, extra)
     stages = [stage.copy(extra) for stage in that.getStages()]
     return that.setStages(stages)