Example #1
0
    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
              initialWeights=None, regParam=1.0, regType="none", intercept=False):
        """
        Train a logistic regression model on the given data.

        :param data:              The training data.
        :param iterations:        The number of iterations (default: 100).
        :param step:              The step parameter used in SGD
                                  (default: 1.0).
        :param miniBatchFraction: Fraction of data to be used for each SGD
                                  iteration.
        :param initialWeights:    The initial weights (default: None).
        :param regParam:          The regularizer parameter (default: 1.0).
        :param regType:           The type of regularizer used for training
                                  our model.

                                  :Allowed values:
                                     - "l1" for using L1Updater
                                     - "l2" for using SquaredL2Updater
                                     - "none" for no regularizer

                                     (default: "none")

        @param intercept:         Boolean parameter which indicates the use
                                  or not of the augmented representation for
                                  training data (i.e. whether bias features
                                  are activated or not).
        """
        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, iterations, step,
                                 miniBatchFraction, i, regParam, regType, intercept)

        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
Example #2
0
    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
              miniBatchFraction=1.0, initialWeights=None, regType="l2", intercept=False):
        """
        Train a support vector machine on the given data.

        :param data:              The training data, an RDD of LabeledPoint.
        :param iterations:        The number of iterations (default: 100).
        :param step:              The step parameter used in SGD
                                  (default: 1.0).
        :param regParam:          The regularizer parameter (default: 0.01).
        :param miniBatchFraction: Fraction of data to be used for each SGD
                                  iteration.
        :param initialWeights:    The initial weights (default: None).
        :param regType:           The type of regularizer used for training
                                  our model.

                                  :Allowed values:
                                     - "l1" for using L1 regularization
                                     - "l2" for using L2 regularization
                                     - None for no regularization

                                     (default: "l2")

        :param intercept:         Boolean parameter which indicates the use
                                  or not of the augmented representation for
                                  training data (i.e. whether bias features
                                  are activated or not).
        """
        def train(rdd, i):
            return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step),
                                 float(regParam), float(miniBatchFraction), i, regType,
                                 bool(intercept))

        return _regression_train_wrapper(train, SVMModel, data, initialWeights)
Example #3
0
    def train(cls,
              data,
              iterations=100,
              step=1.0,
              miniBatchFraction=1.0,
              initialWeights=None,
              regParam=0.01,
              regType="l2",
              intercept=False,
              validateData=True,
              convergenceTol=0.001):
        """
        Train a logistic regression model on the given data.

        :param data:
          The training data, an RDD of LabeledPoint.
        :param iterations:
          The number of iterations.
          (default: 100)
        :param step:
          The step parameter used in SGD.
          (default: 1.0)
        :param miniBatchFraction:
          Fraction of data to be used for each SGD iteration.
          (default: 1.0)
        :param initialWeights:
          The initial weights.
          (default: None)
        :param regParam:
          The regularizer parameter.
          (default: 0.01)
        :param regType:
          The type of regularizer used for training our model.
          Supported values:

            - "l1" for using L1 regularization
            - "l2" for using L2 regularization (default)
            - None for no regularization
        :param intercept:
          Boolean parameter which indicates the use or not of the
          augmented representation for training data (i.e., whether bias
          features are activated or not).
          (default: False)
        :param validateData:
          Boolean parameter which indicates if the algorithm should
          validate data before training.
          (default: True)
        :param convergenceTol:
          A condition which decides iteration termination.
          (default: 0.001)
        """
        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd,
                                 int(iterations), float(step),
                                 float(miniBatchFraction), i, float(regParam),
                                 regType, bool(intercept), bool(validateData),
                                 float(convergenceTol))

        return _regression_train_wrapper(train, LogisticRegressionModel, data,
                                         initialWeights)
Example #4
0
    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
              miniBatchFraction=1.0, initialWeights=None, regType="none", intercept=False):
        """
        Train a support vector machine on the given data.

        @param data:              The training data.
        @param iterations:        The number of iterations (default: 100).
        @param step:              The step parameter used in SGD
                                  (default: 1.0).
        @param regParam:          The regularizer parameter (default: 1.0).
        @param miniBatchFraction: Fraction of data to be used for each SGD
                                  iteration.
        @param initialWeights:    The initial weights (default: None).
        @param regType:           The type of regularizer used for training
                                  our model.
                                  Allowed values: "l1" for using L1Updater,
                                                  "l2" for using
                                                       SquaredL2Updater,
                                                  "none" for no regularizer.
                                  (default: "none")
        @param intercept:         Boolean parameter which indicates the use
                                  or not of the augmented representation for
                                  training data (i.e. whether bias features
                                  are activated or not).
        """
        sc = data.context

        def train(jrdd, i):
            return sc._jvm.PythonMLLibAPI().trainSVMModelWithSGD(
                jrdd, iterations, step, regParam, miniBatchFraction, i, regType, intercept)

        return _regression_train_wrapper(sc, train, SVMModel, data, initialWeights)
Example #5
0
    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
              initialWeights=None, regParam=0.01, regType="l2", intercept=False,
              validateData=True, convergenceTol=0.001):
        """
        Train a logistic regression model on the given data.

        .. versionadded:: 0.9.0

        Parameters
        ----------
        data : :py:class:`pyspark.RDD`
            The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.
        iterations : int, optional
            The number of iterations.
            (default: 100)
        step : float, optional
            The step parameter used in SGD.
            (default: 1.0)
        miniBatchFraction : float, optional
            Fraction of data to be used for each SGD iteration.
            (default: 1.0)
        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
            The initial weights.
            (default: None)
        regParam : float, optional
            The regularizer parameter.
            (default: 0.01)
        regType : str, optional
            The type of regularizer used for training our model.
            Supported values:

            - "l1" for using L1 regularization
            - "l2" for using L2 regularization (default)
            - None for no regularization

        intercept : bool, optional
            Boolean parameter which indicates the use or not of the
            augmented representation for training data (i.e., whether bias
            features are activated or not).
            (default: False)
        validateData : bool, optional
            Boolean parameter which indicates if the algorithm should
            validate data before training.
            (default: True)
        convergenceTol : float, optional
            A condition which decides iteration termination.
            (default: 0.001)
        """
        warnings.warn(
            "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
            "LogisticRegressionWithLBFGS.", DeprecationWarning)

        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
                                 float(step), float(miniBatchFraction), i, float(regParam), regType,
                                 bool(intercept), bool(validateData), float(convergenceTol))

        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
Example #6
0
    def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2",
              intercept=False, corrections=10, tolerance=1e-4, validateData=True, numClasses=2):
        """
        Train a logistic regression model on the given data.

        :param data:           The training data, an RDD of LabeledPoint.
        :param iterations:     The number of iterations (default: 100).
        :param initialWeights: The initial weights (default: None).
        :param regParam:       The regularizer parameter (default: 0.01).
        :param regType:        The type of regularizer used for training
                               our model.

                               :Allowed values:
                                 - "l1" for using L1 regularization
                                 - "l2" for using L2 regularization
                                 - None for no regularization

                                 (default: "l2")

        :param intercept:      Boolean parameter which indicates the use
                               or not of the augmented representation for
                               training data (i.e. whether bias features
                               are activated or not).
        :param corrections:    The number of corrections used in the LBFGS
                               update (default: 10).
        :param tolerance:      The convergence tolerance of iterations for
                               L-BFGS (default: 1e-4).
        :param validateData:   Boolean parameter which indicates if the
                               algorithm should validate data before training.
                               (default: True)
        :param numClasses:     The number of classes (i.e., outcomes) a label can take
                               in Multinomial Logistic Regression (default: 2).

        >>> data = [
        ...     LabeledPoint(0.0, [0.0, 1.0]),
        ...     LabeledPoint(1.0, [1.0, 0.0]),
        ... ]
        >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10)
        >>> lrm.predict([1.0, 0.0])
        1
        >>> lrm.predict([0.0, 1.0])
        0
        """
        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i,
                                 float(regParam), regType, bool(intercept), int(corrections),
                                 float(tolerance), bool(validateData), int(numClasses))

        if initialWeights is None:
            if numClasses == 2:
                initialWeights = [0.0] * len(data.first().features)
            else:
                if intercept:
                    initialWeights = [0.0] * (len(data.first().features) + 1) * (numClasses - 1)
                else:
                    initialWeights = [0.0] * len(data.first().features) * (numClasses - 1)
        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
Example #7
0
    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
              initialWeights=None, regParam=0.01, regType="l2", intercept=False,
              validateData=True, convergenceTol=0.001):
        """
        Train a logistic regression model on the given data.

        :param data:
          The training data, an RDD of LabeledPoint.
        :param iterations:
          The number of iterations.
          (default: 100)
        :param step:
          The step parameter used in SGD.
          (default: 1.0)
        :param miniBatchFraction:
          Fraction of data to be used for each SGD iteration.
          (default: 1.0)
        :param initialWeights:
          The initial weights.
          (default: None)
        :param regParam:
          The regularizer parameter.
          (default: 0.01)
        :param regType:
          The type of regularizer used for training our model.
          Supported values:

            - "l1" for using L1 regularization
            - "l2" for using L2 regularization (default)
            - None for no regularization
        :param intercept:
          Boolean parameter which indicates the use or not of the
          augmented representation for training data (i.e., whether bias
          features are activated or not).
          (default: False)
        :param validateData:
          Boolean parameter which indicates if the algorithm should
          validate data before training.
          (default: True)
        :param convergenceTol:
          A condition which decides iteration termination.
          (default: 0.001)
        """
        warnings.warn(
            "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
            "LogisticRegressionWithLBFGS.")

        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
                                 float(step), float(miniBatchFraction), i, float(regParam), regType,
                                 bool(intercept), bool(validateData), float(convergenceTol))

        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
Example #8
0
    def train(cls,
              data,
              iterations=100,
              initialWeights=None,
              regParam=0.01,
              regType="l2",
              intercept=False,
              corrections=10,
              tolerance=1e-4):
        """
        Train a logistic regression model on the given data.

        :param data:           The training data, an RDD of LabeledPoint.
        :param iterations:     The number of iterations (default: 100).
        :param initialWeights: The initial weights (default: None).
        :param regParam:       The regularizer parameter (default: 0.01).
        :param regType:        The type of regularizer used for training
                               our model.

                               :Allowed values:
                                 - "l1" for using L1 regularization
                                 - "l2" for using L2 regularization
                                 - None for no regularization

                                 (default: "l2")

        :param intercept:      Boolean parameter which indicates the use
                               or not of the augmented representation for
                               training data (i.e. whether bias features
                               are activated or not).
        :param corrections:    The number of corrections used in the LBFGS
                               update (default: 10).
        :param tolerance:      The convergence tolerance of iterations for
                               L-BFGS (default: 1e-4).

        >>> data = [
        ...     LabeledPoint(0.0, [0.0, 1.0]),
        ...     LabeledPoint(1.0, [1.0, 0.0]),
        ... ]
        >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data))
        >>> lrm.predict([1.0, 0.0])
        1
        >>> lrm.predict([0.0, 1.0])
        0
        """
        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd,
                                 int(iterations), i, float(regParam),
                                 str(regType), bool(intercept),
                                 int(corrections), float(tolerance))

        return _regression_train_wrapper(train, LogisticRegressionModel, data,
                                         initialWeights)
Example #9
0
    def train(cls,
              data,
              iterations=100,
              step=1.0,
              regParam=0.01,
              miniBatchFraction=1.0,
              initialWeights=None,
              regType="l2",
              intercept=False,
              validateData=True):
        """
        Train a support vector machine on the given data.

        :param data:              The training data, an RDD of
                                  LabeledPoint.
        :param iterations:        The number of iterations
                                  (default: 100).
        :param step:              The step parameter used in SGD
                                  (default: 1.0).
        :param regParam:          The regularizer parameter
                                  (default: 0.01).
        :param miniBatchFraction: Fraction of data to be used for each
                                  SGD iteration (default: 1.0).
        :param initialWeights:    The initial weights (default: None).
        :param regType:           The type of regularizer used for
                                  training our model.

                                  :Allowed values:
                                     - "l1" for using L1 regularization
                                     - "l2" for using L2 regularization
                                     - None for no regularization

                                     (default: "l2")

        :param intercept:         Boolean parameter which indicates the
                                  use or not of the augmented representation
                                  for training data (i.e. whether bias
                                  features are activated or not,
                                  default: False).
        :param validateData:      Boolean parameter which indicates if
                                  the algorithm should validate data
                                  before training. (default: True)
        """
        def train(rdd, i):
            return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations),
                                 float(step), float(regParam),
                                 float(miniBatchFraction), i, regType,
                                 bool(intercept), bool(validateData))

        return _regression_train_wrapper(train, SVMModel, data, initialWeights)
Example #10
0
    def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType="l2",
              intercept=False, corrections=10, tolerance=1e-4):
        """
        Train a logistic regression model on the given data.

        :param data:           The training data, an RDD of LabeledPoint.
        :param iterations:     The number of iterations (default: 100).
        :param initialWeights: The initial weights (default: None).
        :param regParam:       The regularizer parameter (default: 0.01).
        :param regType:        The type of regularizer used for training
                               our model.

                               :Allowed values:
                                 - "l1" for using L1 regularization
                                 - "l2" for using L2 regularization
                                 - None for no regularization

                                 (default: "l2")

        :param intercept:      Boolean parameter which indicates the use
                               or not of the augmented representation for
                               training data (i.e. whether bias features
                               are activated or not).
        :param corrections:    The number of corrections used in the LBFGS
                               update (default: 10).
        :param tolerance:      The convergence tolerance of iterations for
                               L-BFGS (default: 1e-4).

        >>> data = [
        ...     LabeledPoint(0.0, [0.0, 1.0]),
        ...     LabeledPoint(1.0, [1.0, 0.0]),
        ... ]
        >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data))
        >>> lrm.predict([1.0, 0.0])
        1
        >>> lrm.predict([0.0, 1.0])
        0
        """
        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i,
                                 float(regParam), regType, bool(intercept), int(corrections),
                                 float(tolerance))

        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
    def train(cls,
              data,
              iterations=100,
              step=1.0,
              regParam=1.0,
              miniBatchFraction=1.0,
              initialWeights=None,
              regType="none",
              intercept=False):
        """
        Train a support vector machine on the given data.

        :param data:              The training data.
        :param iterations:        The number of iterations (default: 100).
        :param step:              The step parameter used in SGD
                                  (default: 1.0).
        :param regParam:          The regularizer parameter (default: 1.0).
        :param miniBatchFraction: Fraction of data to be used for each SGD
                                  iteration.
        :param initialWeights:    The initial weights (default: None).
        :param regType:           The type of regularizer used for training
                                  our model.

                                  :Allowed values:
                                     - "l1" for using L1Updater
                                     - "l2" for using SquaredL2Updater,
                                     - "none" for no regularizer.

                                     (default: "none")

        @param intercept:         Boolean parameter which indicates the use
                                  or not of the augmented representation for
                                  training data (i.e. whether bias features
                                  are activated or not).
        """
        sc = data.context

        def train(jrdd, i):
            return sc._jvm.PythonMLLibAPI().trainSVMModelWithSGD(
                jrdd, iterations, step, regParam, miniBatchFraction, i,
                regType, intercept)

        return _regression_train_wrapper(sc, train, SVMModel, data,
                                         initialWeights)
    def train(cls,
              data,
              iterations=100,
              initialWeights=None,
              regParam=0.0,
              regType="l2",
              intercept=False,
              corrections=10,
              tolerance=1e-6,
              validateData=True,
              numClasses=2):
        """
        Train a logistic regression model on the given data.

        :param data:
          The training data, an RDD of LabeledPoint.
        :param iterations:
          The number of iterations.
          (default: 100)
        :param initialWeights:
          The initial weights.
          (default: None)
        :param regParam:
          The regularizer parameter.
          (default: 0.0)
        :param regType:
          The type of regularizer used for training our model.
          Supported values:

            - "l1" for using L1 regularization
            - "l2" for using L2 regularization (default)
            - None for no regularization
        :param intercept:
          Boolean parameter which indicates the use or not of the
          augmented representation for training data (i.e., whether bias
          features are activated or not).
          (default: False)
        :param corrections:
          The number of corrections used in the LBFGS update.
          If a known updater is used for binary classification,
          it calls the ml implementation and this parameter will
          have no effect. (default: 10)
        :param tolerance:
          The convergence tolerance of iterations for L-BFGS.
          (default: 1e-6)
        :param validateData:
          Boolean parameter which indicates if the algorithm should
          validate data before training.
          (default: True)
        :param numClasses:
          The number of classes (i.e., outcomes) a label can take in
          Multinomial Logistic Regression.
          (default: 2)

        >>> data = [
        ...     LabeledPoint(0.0, [0.0, 1.0]),
        ...     LabeledPoint(1.0, [1.0, 0.0]),
        ... ]
        >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10)
        >>> lrm.predict([1.0, 0.0])
        1
        >>> lrm.predict([0.0, 1.0])
        0
        """
        def train(rdd, i):
            return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd,
                                 int(iterations), i, float(regParam), regType,
                                 bool(intercept), int(corrections),
                                 float(tolerance), bool(validateData),
                                 int(numClasses))

        if initialWeights is None:
            if numClasses == 2:
                initialWeights = [0.0] * len(data.first().features)
            else:
                if intercept:
                    initialWeights = [0.0] * (len(data.first().features) +
                                              1) * (numClasses - 1)
                else:
                    initialWeights = [0.0] * len(
                        data.first().features) * (numClasses - 1)
        return _regression_train_wrapper(train, LogisticRegressionModel, data,
                                         initialWeights)
Example #13
0
    def train(
        cls,
        data: RDD[LabeledPoint],
        iterations: int = 100,
        step: float = 1.0,
        regParam: float = 0.01,
        miniBatchFraction: float = 1.0,
        initialWeights: Optional["VectorLike"] = None,
        regType: str = "l2",
        intercept: bool = False,
        validateData: bool = True,
        convergenceTol: float = 0.001,
    ) -> SVMModel:
        """
        Train a support vector machine on the given data.

        .. versionadded:: 0.9.0

        Parameters
        ----------
        data : :py:class:`pyspark.RDD`
            The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.
        iterations : int, optional
            The number of iterations.
            (default: 100)
        step : float, optional
            The step parameter used in SGD.
            (default: 1.0)
        regParam : float, optional
            The regularizer parameter.
            (default: 0.01)
        miniBatchFraction : float, optional
            Fraction of data to be used for each SGD iteration.
            (default: 1.0)
        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
            The initial weights.
            (default: None)
        regType : str, optional
            The type of regularizer used for training our model.
            Allowed values:

            - "l1" for using L1 regularization
            - "l2" for using L2 regularization (default)
            - None for no regularization

        intercept : bool, optional
            Boolean parameter which indicates the use or not of the
            augmented representation for training data (i.e. whether bias
            features are activated or not).
            (default: False)
        validateData : bool, optional
            Boolean parameter which indicates if the algorithm should
            validate data before training.
            (default: True)
        convergenceTol : float, optional
            A condition which decides iteration termination.
            (default: 0.001)
        """

        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
            return callMLlibFunc(
                "trainSVMModelWithSGD",
                rdd,
                int(iterations),
                float(step),
                float(regParam),
                float(miniBatchFraction),
                i,
                regType,
                bool(intercept),
                bool(validateData),
                float(convergenceTol),
            )

        return _regression_train_wrapper(train, SVMModel, data, initialWeights)
Example #14
0
    def train(
        cls,
        data: RDD[LabeledPoint],
        iterations: int = 100,
        initialWeights: Optional["VectorLike"] = None,
        regParam: float = 0.0,
        regType: str = "l2",
        intercept: bool = False,
        corrections: int = 10,
        tolerance: float = 1e-6,
        validateData: bool = True,
        numClasses: int = 2,
    ) -> LogisticRegressionModel:
        """
        Train a logistic regression model on the given data.

        .. versionadded:: 1.2.0

        Parameters
        ----------
        data : :py:class:`pyspark.RDD`
            The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.
        iterations : int, optional
            The number of iterations.
            (default: 100)
        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
            The initial weights.
            (default: None)
        regParam : float, optional
            The regularizer parameter.
            (default: 0.01)
        regType : str, optional
            The type of regularizer used for training our model.
            Supported values:

            - "l1" for using L1 regularization
            - "l2" for using L2 regularization (default)
            - None for no regularization

        intercept : bool, optional
            Boolean parameter which indicates the use or not of the
            augmented representation for training data (i.e., whether bias
            features are activated or not).
            (default: False)
        corrections : int, optional
            The number of corrections used in the LBFGS update.
            If a known updater is used for binary classification,
            it calls the ml implementation and this parameter will
            have no effect. (default: 10)
        tolerance : float, optional
            The convergence tolerance of iterations for L-BFGS.
            (default: 1e-6)
        validateData : bool, optional
            Boolean parameter which indicates if the algorithm should
            validate data before training.
            (default: True)
        numClasses : int, optional
            The number of classes (i.e., outcomes) a label can take in
            Multinomial Logistic Regression.
            (default: 2)

        Examples
        --------
        >>> data = [
        ...     LabeledPoint(0.0, [0.0, 1.0]),
        ...     LabeledPoint(1.0, [1.0, 0.0]),
        ... ]
        >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10)
        >>> lrm.predict([1.0, 0.0])
        1
        >>> lrm.predict([0.0, 1.0])
        0
        """

        def train(rdd: RDD[LabeledPoint], i: Vector) -> Iterable[Any]:
            return callMLlibFunc(
                "trainLogisticRegressionModelWithLBFGS",
                rdd,
                int(iterations),
                i,
                float(regParam),
                regType,
                bool(intercept),
                int(corrections),
                float(tolerance),
                bool(validateData),
                int(numClasses),
            )

        if initialWeights is None:
            if numClasses == 2:
                initialWeights = [0.0] * len(data.first().features)
            else:
                if intercept:
                    initialWeights = [0.0] * (len(data.first().features) + 1) * (numClasses - 1)
                else:
                    initialWeights = [0.0] * len(data.first().features) * (numClasses - 1)
        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)