def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, initialWeights=None, regParam=1.0, regType="none", intercept=False): """ Train a logistic regression model on the given data. :param data: The training data. :param iterations: The number of iterations (default: 100). :param step: The step parameter used in SGD (default: 1.0). :param miniBatchFraction: Fraction of data to be used for each SGD iteration. :param initialWeights: The initial weights (default: None). :param regParam: The regularizer parameter (default: 1.0). :param regType: The type of regularizer used for training our model. :Allowed values: - "l1" for using L1Updater - "l2" for using SquaredL2Updater - "none" for no regularizer (default: "none") @param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). """ def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, iterations, step, miniBatchFraction, i, regParam, regType, intercept) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train(cls, data, iterations=100, step=1.0, regParam=0.01, miniBatchFraction=1.0, initialWeights=None, regType="l2", intercept=False): """ Train a support vector machine on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations (default: 100). :param step: The step parameter used in SGD (default: 1.0). :param regParam: The regularizer parameter (default: 0.01). :param miniBatchFraction: Fraction of data to be used for each SGD iteration. :param initialWeights: The initial weights (default: None). :param regType: The type of regularizer used for training our model. :Allowed values: - "l1" for using L1 regularization - "l2" for using L2 regularization - None for no regularization (default: "l2") :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). """ def train(rdd, i): return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step), float(regParam), float(miniBatchFraction), i, regType, bool(intercept)) return _regression_train_wrapper(train, SVMModel, data, initialWeights)
def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, initialWeights=None, regParam=0.01, regType="l2", intercept=False, validateData=True, convergenceTol=0.001): """ Train a logistic regression model on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations. (default: 100) :param step: The step parameter used in SGD. (default: 1.0) :param miniBatchFraction: Fraction of data to be used for each SGD iteration. (default: 1.0) :param initialWeights: The initial weights. (default: None) :param regParam: The regularizer parameter. (default: 0.01) :param regType: The type of regularizer used for training our model. Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e., whether bias features are activated or not). (default: False) :param validateData: Boolean parameter which indicates if the algorithm should validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. (default: 0.001) """ def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations), float(step), float(miniBatchFraction), i, float(regParam), regType, bool(intercept), bool(validateData), float(convergenceTol)) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train(cls, data, iterations=100, step=1.0, regParam=1.0, miniBatchFraction=1.0, initialWeights=None, regType="none", intercept=False): """ Train a support vector machine on the given data. @param data: The training data. @param iterations: The number of iterations (default: 100). @param step: The step parameter used in SGD (default: 1.0). @param regParam: The regularizer parameter (default: 1.0). @param miniBatchFraction: Fraction of data to be used for each SGD iteration. @param initialWeights: The initial weights (default: None). @param regType: The type of regularizer used for training our model. Allowed values: "l1" for using L1Updater, "l2" for using SquaredL2Updater, "none" for no regularizer. (default: "none") @param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). """ sc = data.context def train(jrdd, i): return sc._jvm.PythonMLLibAPI().trainSVMModelWithSGD( jrdd, iterations, step, regParam, miniBatchFraction, i, regType, intercept) return _regression_train_wrapper(sc, train, SVMModel, data, initialWeights)
def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, initialWeights=None, regParam=0.01, regType="l2", intercept=False, validateData=True, convergenceTol=0.001): """ Train a logistic regression model on the given data. .. versionadded:: 0.9.0 Parameters ---------- data : :py:class:`pyspark.RDD` The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`. iterations : int, optional The number of iterations. (default: 100) step : float, optional The step parameter used in SGD. (default: 1.0) miniBatchFraction : float, optional Fraction of data to be used for each SGD iteration. (default: 1.0) initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional The initial weights. (default: None) regParam : float, optional The regularizer parameter. (default: 0.01) regType : str, optional The type of regularizer used for training our model. Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization intercept : bool, optional Boolean parameter which indicates the use or not of the augmented representation for training data (i.e., whether bias features are activated or not). (default: False) validateData : bool, optional Boolean parameter which indicates if the algorithm should validate data before training. (default: True) convergenceTol : float, optional A condition which decides iteration termination. (default: 0.001) """ warnings.warn( "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or " "LogisticRegressionWithLBFGS.", DeprecationWarning) def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations), float(step), float(miniBatchFraction), i, float(regParam), regType, bool(intercept), bool(validateData), float(convergenceTol)) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2", intercept=False, corrections=10, tolerance=1e-4, validateData=True, numClasses=2): """ Train a logistic regression model on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations (default: 100). :param initialWeights: The initial weights (default: None). :param regParam: The regularizer parameter (default: 0.01). :param regType: The type of regularizer used for training our model. :Allowed values: - "l1" for using L1 regularization - "l2" for using L2 regularization - None for no regularization (default: "l2") :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). :param corrections: The number of corrections used in the LBFGS update (default: 10). :param tolerance: The convergence tolerance of iterations for L-BFGS (default: 1e-4). :param validateData: Boolean parameter which indicates if the algorithm should validate data before training. (default: True) :param numClasses: The number of classes (i.e., outcomes) a label can take in Multinomial Logistic Regression (default: 2). >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), ... LabeledPoint(1.0, [1.0, 0.0]), ... ] >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10) >>> lrm.predict([1.0, 0.0]) 1 >>> lrm.predict([0.0, 1.0]) 0 """ def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i, float(regParam), regType, bool(intercept), int(corrections), float(tolerance), bool(validateData), int(numClasses)) if initialWeights is None: if numClasses == 2: initialWeights = [0.0] * len(data.first().features) else: if intercept: initialWeights = [0.0] * (len(data.first().features) + 1) * (numClasses - 1) else: initialWeights = [0.0] * len(data.first().features) * (numClasses - 1) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, initialWeights=None, regParam=0.01, regType="l2", intercept=False, validateData=True, convergenceTol=0.001): """ Train a logistic regression model on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations. (default: 100) :param step: The step parameter used in SGD. (default: 1.0) :param miniBatchFraction: Fraction of data to be used for each SGD iteration. (default: 1.0) :param initialWeights: The initial weights. (default: None) :param regParam: The regularizer parameter. (default: 0.01) :param regType: The type of regularizer used for training our model. Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e., whether bias features are activated or not). (default: False) :param validateData: Boolean parameter which indicates if the algorithm should validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. (default: 0.001) """ warnings.warn( "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or " "LogisticRegressionWithLBFGS.") def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations), float(step), float(miniBatchFraction), i, float(regParam), regType, bool(intercept), bool(validateData), float(convergenceTol)) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2", intercept=False, corrections=10, tolerance=1e-4): """ Train a logistic regression model on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations (default: 100). :param initialWeights: The initial weights (default: None). :param regParam: The regularizer parameter (default: 0.01). :param regType: The type of regularizer used for training our model. :Allowed values: - "l1" for using L1 regularization - "l2" for using L2 regularization - None for no regularization (default: "l2") :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). :param corrections: The number of corrections used in the LBFGS update (default: 10). :param tolerance: The convergence tolerance of iterations for L-BFGS (default: 1e-4). >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), ... LabeledPoint(1.0, [1.0, 0.0]), ... ] >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data)) >>> lrm.predict([1.0, 0.0]) 1 >>> lrm.predict([0.0, 1.0]) 0 """ def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i, float(regParam), str(regType), bool(intercept), int(corrections), float(tolerance)) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train(cls, data, iterations=100, step=1.0, regParam=0.01, miniBatchFraction=1.0, initialWeights=None, regType="l2", intercept=False, validateData=True): """ Train a support vector machine on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations (default: 100). :param step: The step parameter used in SGD (default: 1.0). :param regParam: The regularizer parameter (default: 0.01). :param miniBatchFraction: Fraction of data to be used for each SGD iteration (default: 1.0). :param initialWeights: The initial weights (default: None). :param regType: The type of regularizer used for training our model. :Allowed values: - "l1" for using L1 regularization - "l2" for using L2 regularization - None for no regularization (default: "l2") :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not, default: False). :param validateData: Boolean parameter which indicates if the algorithm should validate data before training. (default: True) """ def train(rdd, i): return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step), float(regParam), float(miniBatchFraction), i, regType, bool(intercept), bool(validateData)) return _regression_train_wrapper(train, SVMModel, data, initialWeights)
def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2", intercept=False, corrections=10, tolerance=1e-4): """ Train a logistic regression model on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations (default: 100). :param initialWeights: The initial weights (default: None). :param regParam: The regularizer parameter (default: 0.01). :param regType: The type of regularizer used for training our model. :Allowed values: - "l1" for using L1 regularization - "l2" for using L2 regularization - None for no regularization (default: "l2") :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). :param corrections: The number of corrections used in the LBFGS update (default: 10). :param tolerance: The convergence tolerance of iterations for L-BFGS (default: 1e-4). >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), ... LabeledPoint(1.0, [1.0, 0.0]), ... ] >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data)) >>> lrm.predict([1.0, 0.0]) 1 >>> lrm.predict([0.0, 1.0]) 0 """ def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i, float(regParam), regType, bool(intercept), int(corrections), float(tolerance)) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train(cls, data, iterations=100, step=1.0, regParam=1.0, miniBatchFraction=1.0, initialWeights=None, regType="none", intercept=False): """ Train a support vector machine on the given data. :param data: The training data. :param iterations: The number of iterations (default: 100). :param step: The step parameter used in SGD (default: 1.0). :param regParam: The regularizer parameter (default: 1.0). :param miniBatchFraction: Fraction of data to be used for each SGD iteration. :param initialWeights: The initial weights (default: None). :param regType: The type of regularizer used for training our model. :Allowed values: - "l1" for using L1Updater - "l2" for using SquaredL2Updater, - "none" for no regularizer. (default: "none") @param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). """ sc = data.context def train(jrdd, i): return sc._jvm.PythonMLLibAPI().trainSVMModelWithSGD( jrdd, iterations, step, regParam, miniBatchFraction, i, regType, intercept) return _regression_train_wrapper(sc, train, SVMModel, data, initialWeights)
def train(cls, data, iterations=100, initialWeights=None, regParam=0.0, regType="l2", intercept=False, corrections=10, tolerance=1e-6, validateData=True, numClasses=2): """ Train a logistic regression model on the given data. :param data: The training data, an RDD of LabeledPoint. :param iterations: The number of iterations. (default: 100) :param initialWeights: The initial weights. (default: None) :param regParam: The regularizer parameter. (default: 0.0) :param regType: The type of regularizer used for training our model. Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e., whether bias features are activated or not). (default: False) :param corrections: The number of corrections used in the LBFGS update. If a known updater is used for binary classification, it calls the ml implementation and this parameter will have no effect. (default: 10) :param tolerance: The convergence tolerance of iterations for L-BFGS. (default: 1e-6) :param validateData: Boolean parameter which indicates if the algorithm should validate data before training. (default: True) :param numClasses: The number of classes (i.e., outcomes) a label can take in Multinomial Logistic Regression. (default: 2) >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), ... LabeledPoint(1.0, [1.0, 0.0]), ... ] >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10) >>> lrm.predict([1.0, 0.0]) 1 >>> lrm.predict([0.0, 1.0]) 0 """ def train(rdd, i): return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i, float(regParam), regType, bool(intercept), int(corrections), float(tolerance), bool(validateData), int(numClasses)) if initialWeights is None: if numClasses == 2: initialWeights = [0.0] * len(data.first().features) else: if intercept: initialWeights = [0.0] * (len(data.first().features) + 1) * (numClasses - 1) else: initialWeights = [0.0] * len( data.first().features) * (numClasses - 1) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
def train( cls, data: RDD[LabeledPoint], iterations: int = 100, step: float = 1.0, regParam: float = 0.01, miniBatchFraction: float = 1.0, initialWeights: Optional["VectorLike"] = None, regType: str = "l2", intercept: bool = False, validateData: bool = True, convergenceTol: float = 0.001, ) -> SVMModel: """ Train a support vector machine on the given data. .. versionadded:: 0.9.0 Parameters ---------- data : :py:class:`pyspark.RDD` The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`. iterations : int, optional The number of iterations. (default: 100) step : float, optional The step parameter used in SGD. (default: 1.0) regParam : float, optional The regularizer parameter. (default: 0.01) miniBatchFraction : float, optional Fraction of data to be used for each SGD iteration. (default: 1.0) initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional The initial weights. (default: None) regType : str, optional The type of regularizer used for training our model. Allowed values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization intercept : bool, optional Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). (default: False) validateData : bool, optional Boolean parameter which indicates if the algorithm should validate data before training. (default: True) convergenceTol : float, optional A condition which decides iteration termination. (default: 0.001) """ def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]: return callMLlibFunc( "trainSVMModelWithSGD", rdd, int(iterations), float(step), float(regParam), float(miniBatchFraction), i, regType, bool(intercept), bool(validateData), float(convergenceTol), ) return _regression_train_wrapper(train, SVMModel, data, initialWeights)
def train( cls, data: RDD[LabeledPoint], iterations: int = 100, initialWeights: Optional["VectorLike"] = None, regParam: float = 0.0, regType: str = "l2", intercept: bool = False, corrections: int = 10, tolerance: float = 1e-6, validateData: bool = True, numClasses: int = 2, ) -> LogisticRegressionModel: """ Train a logistic regression model on the given data. .. versionadded:: 1.2.0 Parameters ---------- data : :py:class:`pyspark.RDD` The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`. iterations : int, optional The number of iterations. (default: 100) initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional The initial weights. (default: None) regParam : float, optional The regularizer parameter. (default: 0.01) regType : str, optional The type of regularizer used for training our model. Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) - None for no regularization intercept : bool, optional Boolean parameter which indicates the use or not of the augmented representation for training data (i.e., whether bias features are activated or not). (default: False) corrections : int, optional The number of corrections used in the LBFGS update. If a known updater is used for binary classification, it calls the ml implementation and this parameter will have no effect. (default: 10) tolerance : float, optional The convergence tolerance of iterations for L-BFGS. (default: 1e-6) validateData : bool, optional Boolean parameter which indicates if the algorithm should validate data before training. (default: True) numClasses : int, optional The number of classes (i.e., outcomes) a label can take in Multinomial Logistic Regression. (default: 2) Examples -------- >>> data = [ ... LabeledPoint(0.0, [0.0, 1.0]), ... LabeledPoint(1.0, [1.0, 0.0]), ... ] >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10) >>> lrm.predict([1.0, 0.0]) 1 >>> lrm.predict([0.0, 1.0]) 0 """ def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]: return callMLlibFunc( "trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i, float(regParam), regType, bool(intercept), int(corrections), float(tolerance), bool(validateData), int(numClasses), ) if initialWeights is None: if numClasses == 2: initialWeights = [0.0] * len(data.first().features) else: if intercept: initialWeights = [0.0] * (len(data.first().features) + 1) * (numClasses - 1) else: initialWeights = [0.0] * len(data.first().features) * (numClasses - 1) return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)