Beispiel #1
0
class _MinMaxScalerParams(JavaWithParams, HasInputCol, HasOutputCol):
    """
    Params for :class:`MinMaxScaler`.
    """

    MIN: Param[float] = FloatParam("min",
                                   "Lower bound of the output feature range.",
                                   0.0, ParamValidators.not_null())

    MAX: Param[float] = FloatParam("max",
                                   "Upper bound of the output feature range.",
                                   1.0, ParamValidators.not_null())

    def __init__(self, java_params):
        super(_MinMaxScalerParams, self).__init__(java_params)

    def set_min(self, value: float):
        return typing.cast(_MinMaxScalerParams, self.set(self.MIN, value))

    def set_max(self, value: float):
        return typing.cast(_MinMaxScalerParams, self.set(self.MAX, value))

    def get_min(self) -> bool:
        return self.get(self.MIN)

    def get_max(self) -> bool:
        return self.get(self.MAX)

    @property
    def min(self):
        return self.get_min()

    @property
    def max(self):
        return self.get_max()
Beispiel #2
0
class _StringIndexerParams(_StringIndexerModelParams):
    """
    Params for :class:`StringIndexer`.
    """

    STRING_ORDER_TYPE: Param[str] = StringParam(
        "string_order_type", "How to order strings of each column.",
        "arbitrary",
        ParamValidators.in_array([
            'arbitrary', 'frequencyDesc', 'frequencyAsc', 'alphabetDesc',
            'alphabetAsc'
        ]))

    def __init__(self, java_params):
        super(_StringIndexerParams, self).__init__(java_params)

    def set_string_order_type(self, value: str):
        return typing.cast(_StringIndexerParams,
                           self.set(self.STRING_ORDER_TYPE, value))

    def get_string_order_type(self) -> str:
        return self.get(self.STRING_ORDER_TYPE)

    @property
    def string_order_type(self):
        return self.get_string_order_type()
Beispiel #3
0
class HasMultiClass(WithParams, ABC):
    """
    Base class for the shared multi class param.

    Supported options:
        <li>auto: selects the classification type based on the number of classes:
            If the number of unique label values from the input data is one or two,
            set to "binomial". Otherwise, set to "multinomial".
        <li>binomial: binary logistic regression.
        <li>multinomial: multinomial logistic regression.
    """
    MULTI_CLASS: Param[str] = StringParam(
        "multi_class",
        "Classification type. Supported options: 'auto', 'binomial' and 'multinomial'.",
        'auto', ParamValidators.in_array(['auto', 'binomial', 'multinomial']))

    def set_multi_class(self, class_type: str):
        return self.set(self.MULTI_CLASS, class_type)

    def get_multi_class(self) -> str:
        return self.get(self.MULTI_CLASS)

    @property
    def multi_class(self) -> str:
        return self.get_multi_class()
Beispiel #4
0
    def test_validators(self):
        gt = ParamValidators.gt(10)
        self.assertFalse(gt.validate(None))
        self.assertFalse(gt.validate(5))
        self.assertFalse(gt.validate(10))
        self.assertTrue(gt.validate(15))

        gt_eq = ParamValidators.gt_eq(10)
        self.assertFalse(gt_eq.validate(None))
        self.assertFalse(gt_eq.validate(5))
        self.assertTrue(gt_eq.validate(10))
        self.assertTrue(gt_eq.validate(15))

        lt = ParamValidators.lt(10)
        self.assertFalse(lt.validate(None))
        self.assertTrue(lt.validate(5))
        self.assertFalse(lt.validate(10))
        self.assertFalse(lt.validate(15))

        lt_eq = ParamValidators.lt_eq(10)
        self.assertFalse(lt_eq.validate(None))
        self.assertTrue(lt_eq.validate(5))
        self.assertTrue(lt_eq.validate(10))
        self.assertFalse(lt_eq.validate(15))

        in_range_inclusive = ParamValidators.in_range(5, 15)
        self.assertFalse(in_range_inclusive.validate(None))
        self.assertFalse(in_range_inclusive.validate(0))
        self.assertTrue(in_range_inclusive.validate(5))
        self.assertTrue(in_range_inclusive.validate(10))
        self.assertTrue(in_range_inclusive.validate(15))
        self.assertFalse(in_range_inclusive.validate(20))

        in_range_exclusive = ParamValidators.in_range(5, 15, False, False)
        self.assertFalse(in_range_exclusive.validate(None))
        self.assertFalse(in_range_exclusive.validate(0))
        self.assertFalse(in_range_exclusive.validate(5))
        self.assertTrue(in_range_exclusive.validate(10))
        self.assertFalse(in_range_exclusive.validate(15))
        self.assertFalse(in_range_exclusive.validate(20))

        in_array = ParamValidators.in_array([1, 2, 3])
        self.assertFalse(in_array.validate(None))
        self.assertTrue(in_array.validate(1))
        self.assertFalse(in_array.validate(0))

        not_null = ParamValidators.not_null()
        self.assertTrue(not_null.validate(5))
        self.assertFalse(not_null.validate(None))
Beispiel #5
0
class HasBatchStrategy(WithParams, ABC):
    """
    Base class for the shared batch strategy param.
    """
    BATCH_STRATEGY: Param[str] = StringParam(
        "batch_strategy",
        "Strategy to create mini batch from online train data.", "count",
        ParamValidators.in_array(["count"]))

    def get_batch_strategy(self) -> str:
        return self.get(self.BATCH_STRATEGY)

    @property
    def batch_strategy(self):
        return self.get_batch_strategy()
Beispiel #6
0
class HasOutputCol(WithParams, ABC):
    """
    Base class for the shared output_col param.
    """
    OUTPUT_COL: Param[str] = StringParam("output_col", "Output column name.",
                                         "output", ParamValidators.not_null())

    def set_output_col(self, col: str):
        return self.set(self.OUTPUT_COL, col)

    def get_output_col(self) -> str:
        return self.get(self.OUTPUT_COL)

    @property
    def output_col(self) -> str:
        return self.get_output_col()
Beispiel #7
0
class HasLabelCol(WithParams, ABC):
    """
    Base class for the shared label column param.
    """
    LABEL_COL: Param[str] = StringParam("label_col", "Label column name.",
                                        "label", ParamValidators.not_null())

    def set_label_col(self, col: str):
        return self.set(self.LABEL_COL, col)

    def get_label_col(self) -> str:
        return self.get(self.LABEL_COL)

    @property
    def label_col(self) -> str:
        return self.get_label_col()
Beispiel #8
0
class HasInputCol(WithParams, ABC):
    """
    Base class for the shared input col param.
    """
    INPUT_COL: Param[str] = StringParam("input_col", "Input column name.",
                                        "input", ParamValidators.not_null())

    def set_input_col(self, col: str):
        return self.set(self.INPUT_COL, col)

    def get_input_col(self) -> str:
        return self.get(self.INPUT_COL)

    @property
    def input_col(self) -> str:
        return self.get_input_col()
Beispiel #9
0
class HasReg(WithParams, ABC):
    """
    Base class for the shared regularization param.
    """
    REG: Param[float] = FloatParam("reg", "Regularization parameter.", 0.,
                                   ParamValidators.gt_eq(0.))

    def set_reg(self, value: float):
        return self.set(self.REG, value)

    def get_reg(self) -> float:
        return self.get(self.REG)

    @property
    def reg(self) -> float:
        return self.get_reg()
Beispiel #10
0
class HasOutputCols(WithParams, ABC):
    """
    Base class for the shared output_cols param.
    """
    OUTPUT_COLS: Param[Tuple[str, ...]] = StringArrayParam(
        "output_cols", "Output column names.", None,
        ParamValidators.non_empty_array())

    def set_output_cols(self, *cols: str):
        return self.set(self.OUTPUT_COLS, cols)

    def get_output_cols(self) -> Tuple[str, ...]:
        return self.get(self.OUTPUT_COLS)

    @property
    def output_cols(self) -> Tuple[str, ...]:
        return self.get_output_cols()
Beispiel #11
0
class HasMaxIter(WithParams, ABC):
    """
    Base class for the shared maxIter param.
    """
    MAX_ITER: Param[int] = IntParam("max_iter",
                                    "Maximum number of iterations.", 20,
                                    ParamValidators.gt(0))

    def set_max_iter(self, max_iter: int):
        return self.set(self.MAX_ITER, max_iter)

    def get_max_iter(self) -> int:
        return self.get(self.MAX_ITER)

    @property
    def max_iter(self) -> int:
        return self.get_max_iter()
Beispiel #12
0
class HasGlobalBatchSize(WithParams, ABC):
    """
    Base class for the shared global_batch_size param.
    """
    GLOBAL_BATCH_SIZE: Param[int] = IntParam(
        "global_batch_size", "Global batch size of training algorithms.", 32,
        ParamValidators.gt(0))

    def set_global_batch_size(self, global_batch_size: int):
        return self.set(self.GLOBAL_BATCH_SIZE, global_batch_size)

    def get_global_batch_size(self) -> int:
        return self.get(self.GLOBAL_BATCH_SIZE)

    @property
    def global_batch_size(self) -> int:
        return self.get_global_batch_size()
Beispiel #13
0
class HasFeaturesCol(WithParams, ABC):
    """
    Base class for the shared feature_col param.
    """
    FEATURES_COL: Param[str] = StringParam("features_col",
                                           "Features column name.", "features",
                                           ParamValidators.not_null())

    def set_features_col(self, col):
        return self.set(self.FEATURES_COL, col)

    def get_features_col(self) -> str:
        return self.get(self.FEATURES_COL)

    @property
    def features_col(self) -> str:
        return self.get_features_col()
Beispiel #14
0
class HasElasticNet(WithParams, ABC):
    """
    Base class for the shared decay factor param.
    """
    ELASTIC_NET: Param[float] = FloatParam("elastic_net",
                                           "ElasticNet parameter.", 0.,
                                           ParamValidators.in_range(0.0, 1.0))

    def set_elastic_net(self, value: float):
        return self.set(self.ELASTIC_NET, value)

    def get_elastic_net(self) -> float:
        return self.get(self.ELASTIC_NET)

    @property
    def elastic_net(self):
        return self.get(self.ELASTIC_NET)
Beispiel #15
0
class HasDecayFactor(WithParams, ABC):
    """
    Base class for the shared decay factor param.
    """
    DECAY_FACTOR: Param[float] = FloatParam(
        "decay_factor", "The forgetfulness of the previous centroids.", 0.,
        ParamValidators.in_range(0, 1))

    def set_decay_factor(self, value: float):
        return self.set(self.DECAY_FACTOR, value)

    def get_decay_factor(self) -> float:
        return self.get(self.DECAY_FACTOR)

    @property
    def decay_factor(self):
        return self.get(self.DECAY_FACTOR)
Beispiel #16
0
class HasTol(WithParams, ABC):
    """
    Base class for the shared tolerance param.
    """
    TOL: Param[float] = FloatParam(
        "tol", "Convergence tolerance for iterative algorithms.", 1e-6,
        ParamValidators.gt_eq(0))

    def set_tol(self, value: float):
        return self.set(self.TOL, value)

    def get_tol(self) -> float:
        return self.get(self.TOL)

    @property
    def tol(self) -> float:
        return self.get_tol()
Beispiel #17
0
class HasDistanceMeasure(WithParams, ABC):
    """
    Base class for the shared distance_measure param.
    """
    DISTANCE_MEASURE: Param[str] = StringParam(
        "distance_measure",
        "Distance measure. Supported options: 'euclidean' and 'cosine'.",
        "euclidean", ParamValidators.in_array(['euclidean', 'cosine']))

    def set_distance_measure(self, distance_measure: str):
        return self.set(self.DISTANCE_MEASURE, distance_measure)

    def get_distance_measure(self) -> str:
        return self.get(self.DISTANCE_MEASURE)

    @property
    def distance_measure(self) -> str:
        return self.get_distance_measure()
Beispiel #18
0
class HasLearningRate(WithParams, ABC):
    """
    Base class for the shared learning rate param.
    """

    LEARNING_RATE: Param[float] = FloatParam(
        "learning_rate", "Learning rate of optimization method.", 0.1,
        ParamValidators.gt(0))

    def set_learning_rate(self, learning_rate: float):
        return self.set(self.LEARNING_RATE, learning_rate)

    def get_learning_rate(self) -> float:
        return self.get(self.LEARNING_RATE)

    @property
    def learning_rate(self) -> float:
        return self.get_learning_rate()
Beispiel #19
0
class HasPredictionCol(WithParams, ABC):
    """
    Base class for the shared prediction column param.
    """
    PREDICTION_COL: Param[str] = StringParam("prediction_col",
                                             "Prediction column name.",
                                             "prediction",
                                             ParamValidators.not_null())

    def set_prediction_col(self, col: str):
        return self.set(self.PREDICTION_COL, col)

    def get_prediction_col(self) -> str:
        return self.get(self.PREDICTION_COL)

    @property
    def prediction_col(self) -> str:
        return self.get_prediction_col()
Beispiel #20
0
class _KNNModelParams(JavaWithParams, HasFeaturesCol, HasPredictionCol, ABC):
    """
    Params for :class:`KNNModel`.
    """

    K: Param[int] = IntParam("k", "The number of nearest neighbors", 5,
                             ParamValidators.gt(0))

    def __init__(self, java_params):
        super(_KNNModelParams, self).__init__(java_params)

    def set_k(self, value: int):
        return typing.cast(_KNNModelParams, self.set(self.K, value))

    def get_k(self) -> int:
        return self.get(self.K)

    @property
    def k(self) -> int:
        return self.get_k()
Beispiel #21
0
class _KMeansParams(_KMeansModelParams, HasSeed, HasMaxIter):
    """
    Params for :class:`KMeans`.
    """
    INIT_MODE: Param[str] = StringParam(
        "init_mode",
        "The initialization algorithm. Supported options: 'random'.", "random",
        ParamValidators.in_array(["random"]))

    def __init__(self, java_params):
        super(_KMeansParams, self).__init__(java_params)

    def set_init_mode(self, value: str):
        return self.set(self.INIT_MODE, value)

    def get_init_mode(self) -> str:
        return self.get(self.INIT_MODE)

    @property
    def init_mode(self):
        return self.get_init_mode()
Beispiel #22
0
class _KMeansModelParams(JavaWithParams, HasDistanceMeasure, HasFeaturesCol,
                         HasPredictionCol, ABC):
    """
    Params for :class:`KMeansModel`.
    """

    K: Param[int] = IntParam("k", "The max number of clusters to create.", 2,
                             ParamValidators.gt(1))

    def __init__(self, java_params):
        super(_KMeansModelParams, self).__init__(java_params)

    def set_k(self, value: int):
        return typing.cast(_KMeansModelParams, self.set(self.K, value))

    def get_k(self) -> int:
        return self.get(self.K)

    @property
    def k(self) -> int:
        return self.get_k()
Beispiel #23
0
class _NaiveBayesModelParams(JavaWithParams, HasFeaturesCol, HasPredictionCol,
                             ABC):
    """
    Params for :class:`NaiveBayesModel`.
    """

    MODEL_TYPE: Param[str] = StringParam(
        "model_type", "The model type.", "multinomial",
        ParamValidators.in_array(["multinomial"]))

    def __init__(self, java_params):
        super(_NaiveBayesModelParams, self).__init__(java_params)

    def set_model_type(self, value: str):
        return self.set(self.MODEL_TYPE, value)

    def get_model_type(self) -> str:
        return self.get(self.MODEL_TYPE)

    @property
    def model_type(self) -> str:
        return self.get_model_type()
Beispiel #24
0
class _LinearSVCModelParams(JavaWithParams, HasFeaturesCol, HasPredictionCol,
                            HasRawPredictionCol, ABC):
    """
    Params for :class:`LinearSVCModel`.
    """

    THRESHOLD: Param[float] = FloatParam(
        "threshold",
        "Threshold in binary classification prediction applied to rawPrediction.",
        0.0, ParamValidators.not_null())

    def __init__(self, java_params):
        super(_LinearSVCModelParams, self).__init__(java_params)

    def set_threshold(self, value: int):
        return typing.cast(_LinearSVCModelParams,
                           self.set(self.THRESHOLD, value))

    def get_threshold(self) -> int:
        return self.get(self.THRESHOLD)

    @property
    def threshold(self) -> int:
        return self.get_threshold()
Beispiel #25
0
class _NaiveBayesParams(
        _NaiveBayesModelParams,
        HasLabelCol,
):
    """
    Params for :class:`NaiveBayes`.
    """

    SMOOTHING: Param[float] = FloatParam("smoothing",
                                         "The smoothing parameter.", 1.0,
                                         ParamValidators.gt_eq(0.0))

    def __init__(self, java_params):
        super(_NaiveBayesParams, self).__init__(java_params)

    def set_smoothing(self, value: float):
        return typing.cast(_NaiveBayesParams, self.set(self.SMOOTHING, value))

    def get_smoothing(self) -> float:
        return self.get(self.SMOOTHING)

    @property
    def smoothing(self) -> float:
        return self.get_smoothing()
Beispiel #26
0
class HasHandleInvalid(WithParams, ABC):
    """
    Base class for the shared handle_invalid param.

    Supported options and the corresponding behavior to handle invalid entries is listed as follows.

    <ul>
        <li>error: raise an exception.
        <li>skip: filter out rows with bad values.
    </ul>
    """
    HANDLE_INVALID: Param[str] = StringParam(
        "handle_invalid", "Strategy to handle invalid entries.", "error",
        ParamValidators.in_array(['error', 'skip']))

    def set_handle_invalid(self, value: str):
        return self.set(self.HANDLE_INVALID, value)

    def get_handle_invalid(self) -> str:
        return self.get(self.HANDLE_INVALID)

    @property
    def handle_invalid(self) -> str:
        return self.get_handle_invalid()
class _BinaryClassificationEvaluatorParams(JavaWithParams, HasLabelCol,
                                           HasRawPredictionCol, HasWeightCol):
    """
    Params for :class:`BinaryClassificationEvaluator`.
    """

    METRICS_NAMES: Param[Tuple[str, ...]] = StringArrayParam(
        "metrics_names", "Names of output metrics.",
        ["areaUnderROC", "areaUnderPR"],
        ParamValidators.is_sub_set(
            ["areaUnderROC", "areaUnderPR", "areaUnderLorenz", "ks"]))

    def __init__(self, java_params):
        super(_BinaryClassificationEvaluatorParams, self).__init__(java_params)

    def set_metrics_names(self, *value: str):
        return self.set(self.METRICS_NAMES, value)

    def get_metrics_names(self) -> Tuple[str, ...]:
        return self.get(self.METRICS_NAMES)

    @property
    def metrics_names(self) -> Tuple[str, ...]:
        return self.get_metrics_names()
Beispiel #28
0
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
import os
from typing import Dict, Any

from pyflink.table import StreamTableEnvironment

from pyflink.ml.core.api import Stage
from pyflink.ml.core.param import ParamValidators, Param, BooleanParam, IntParam, \
    FloatParam, StringParam, IntArrayParam, FloatArrayParam, StringArrayParam
from pyflink.ml.tests.test_utils import PyFlinkMLTestCase

BOOLEAN_PARAM = BooleanParam("boolean_param", "Description", False)
INT_PARAM = IntParam("int_param", "Description", 1, ParamValidators.lt(100))
FLOAT_PARAM = FloatParam("float_param", "Description", 3.0,
                         ParamValidators.lt(100))
STRING_PARAM = StringParam('string_param', "Description", "5")
INT_ARRAY_PARAM = IntArrayParam("int_array_param", "Description", (6, 7))
FLOAT_ARRAY_PARAM = FloatArrayParam("float_array_param", "Description",
                                    (10.0, 11.0))
STRING_ARRAY_PARAM = StringArrayParam("string_array_param", "Description",
                                      ("14", "15"))
EXTRA_INT_PARAM = IntParam("extra_int_param", "Description", 20,
                           ParamValidators.always_true())
PARAM_WITH_NONE_DEFAULT = IntParam(
    "param_with_none_default", "Must be explicitly set with a non-none value",
    None, ParamValidators.not_null())