Ejemplo n.º 1
0
def test_error_on_wrong_normalize():
    normalize = 'wrong'
    default = True
    error_msg = "Leave 'normalize' to its default"
    with pytest.raises(ValueError, match=error_msg):
        _deprecate_normalize(normalize, default, 'estimator')
    ValueError
Ejemplo n.º 2
0
def _fit_ridge(self, X, y, sample_weight=None):
    """Fit Ridge regression model

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training data

    y : array-like, shape = [n_samples] or [n_samples, n_targets]
        Target values

    sample_weight : float or numpy array of shape [n_samples]
        Individual weights for each sample

    Returns
    -------
    self : returns an instance of self.
    """
    if sklearn_check_version('1.0'):
        from sklearn.linear_model._base import _deprecate_normalize
        self._normalize = _deprecate_normalize(
            self.normalize,
            default=False,
            estimator_name=self.__class__.__name__)

    X, y = check_X_y(X,
                     y, ['csr', 'csc', 'coo'],
                     dtype=[np.float64, np.float32],
                     multi_output=True,
                     y_numeric=True)
    self.n_features_in_ = X.shape[1]
    self.sample_weight_ = sample_weight
    self.fit_shape_good_for_daal_ = True if X.shape[0] >= X.shape[1] else False
    if not self.solver == 'auto' or sp.issparse(X) or \
            not self.fit_shape_good_for_daal_ or \
            not (X.dtype == np.float64 or X.dtype == np.float32) or \
            sample_weight is not None or \
            (hasattr(self, 'positive') and self.positive):
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        logging.info("sklearn.linear_model.Ridge.fit: " +
                     get_patch_message("sklearn"))
        return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
    self.n_iter_ = None
    logging.info("sklearn.linear_model.Ridge.fit: " +
                 get_patch_message("daal"))
    res = _daal4py_fit(self, X, y)
    if res is None:
        logging.info("sklearn.linear_model.Ridge.fit: " +
                     get_patch_message("sklearn_after_daal"))
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
    return res
Ejemplo n.º 3
0
def test_deprecate_normalize(normalize, default):
    # test all possible case of the normalize parameter deprecation
    if not default:
        if normalize == "deprecated":
            # no warning
            output = default
            expected = None
            warning_msg = []
        else:
            output = normalize
            expected = FutureWarning
            warning_msg = ["1.2"]
            if not normalize:
                warning_msg.append("default value")
            else:
                warning_msg.append("StandardScaler(")
    elif default:
        if normalize == "deprecated":
            # warning to pass False and use StandardScaler
            output = default
            expected = FutureWarning
            warning_msg = ["False", "1.2", "StandardScaler("]
        else:
            # no warning
            output = normalize
            expected = None
            warning_msg = []

    if expected is None:
        with warnings.catch_warnings():
            warnings.simplefilter("error", FutureWarning)
            _normalize = _deprecate_normalize(normalize, default, "estimator")
    else:
        with pytest.warns(expected) as record:
            _normalize = _deprecate_normalize(normalize, default, "estimator")
        assert all(
            [warning in str(record[0].message) for warning in warning_msg])
    assert _normalize == output
Ejemplo n.º 4
0
    def fit(self, X, y, sample_weight=None):
        if sklearn_check_version('1.0'):
            from sklearn.linear_model._base import _deprecate_normalize
            self._normalize = _deprecate_normalize(
                self.normalize, default=False,
                estimator_name=self.__class__.__name__
            )

        if self.positive is True:
            logging.info(
                "sklearn.linar_model.LinearRegression."
                "fit: " + get_patch_message("sklearn"))
            return super(LinearRegression, self).fit(X, y=y, sample_weight=sample_weight)
        return _fit_linear(self, X, y, sample_weight=sample_weight)
Ejemplo n.º 5
0
    def fit(self, X, y, sample_weight=None):
        """
        Fit linear model.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values. Will be cast to X's dtype if necessary.

        sample_weight : array-like of shape (n_samples,), default=None
            Individual weights for each sample.
            .. versionadded:: 0.17
               parameter *sample_weight* support to LinearRegression.

        Returns
        -------
        self : object
            Fitted Estimator.
        """
        if sklearn_check_version('1.0'):
            self._normalize = _deprecate_normalize(
                self.normalize,
                default=False,
                estimator_name=self.__class__.__name__,
            )
            self._check_feature_names(X, reset=True)

        if sklearn_check_version('0.24'):
            _patching_status = PatchingConditionsChain(
                "sklearn.linear_model.LinearRegression.fit")
            _dal_ready = _patching_status.and_conditions([
                (self.positive is False,
                 "Forced positive coefficients are not supported.")
            ])
            if not _dal_ready:
                _patching_status.write_log()
                return super(LinearRegression,
                             self).fit(X, y=y, sample_weight=sample_weight)
        return _fit_linear(self, X, y, sample_weight=sample_weight)
Ejemplo n.º 6
0
def test_deprecate_normalize(normalize, default):
    # test all possible case of the normalize parameter deprecation
    if not default:
        if normalize == 'deprecated':
            # no warning
            output = default
            expected = None
            warning_msg = []
        else:
            output = normalize
            expected = FutureWarning
            warning_msg = ['1.2']
            if not normalize:
                warning_msg.append('default value')
            else:
                warning_msg.append('StandardScaler(')
    elif default:
        if normalize == 'deprecated':
            # warning to pass False and use StandardScaler
            output = default
            expected = FutureWarning
            warning_msg = ['False', '1.2', 'StandardScaler(']
        else:
            # no warning
            output = normalize
            expected = None
            warning_msg = []

    with pytest.warns(expected) as record:
        _normalize = _deprecate_normalize(normalize, default, 'estimator')
    assert _normalize == output

    n_warnings = 0 if expected is None else 1
    assert len(record) == n_warnings
    if n_warnings:
        assert all([
            warning in str(record[0].message)
            for warning in warning_msg
        ])
Ejemplo n.º 7
0
def _fit_ridge(self, X, y, sample_weight=None):
    """Fit Ridge regression model

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training data

    y : array-like, shape = [n_samples] or [n_samples, n_targets]
        Target values

    sample_weight : float or numpy array of shape [n_samples]
        Individual weights for each sample

    Returns
    -------
    self : returns an instance of self.
    """
    if sklearn_check_version('1.0'):
        from sklearn.linear_model._base import _deprecate_normalize
        self._normalize = _deprecate_normalize(
            self.normalize,
            default=False,
            estimator_name=self.__class__.__name__)
        self._check_feature_names(X, reset=True)

    X, y = check_X_y(X,
                     y, ['csr', 'csc', 'coo'],
                     dtype=[np.float64, np.float32],
                     multi_output=True,
                     y_numeric=True)
    self.n_features_in_ = X.shape[1]
    self.sample_weight_ = sample_weight
    self.fit_shape_good_for_daal_ = True if X.shape[0] >= X.shape[1] else False

    _patching_status = PatchingConditionsChain(
        "sklearn.linear_model.Ridge.fit")
    _dal_ready = _patching_status.and_conditions([
        (self.solver == 'auto', f"'{self.solver}' solver is not supported. "
         "Only 'auto' solver is supported."),
        (not sp.issparse(X), "X is sparse. Sparse input is not supported."),
        (self.fit_shape_good_for_daal_,
         "The shape of X does not satisfy oneDAL requirements: "
         "number of features > number of samples."),
        (X.dtype == np.float64 or X.dtype == np.float32,
         f"'{X.dtype}' X data type is not supported. "
         "Only np.float32 and np.float64 are supported."),
        (sample_weight is None, "Sample weights are not supported."),
        (not (hasattr(self, 'positive') and self.positive),
         "Forced positive coefficients are not supported.")
    ])
    _patching_status.write_log()

    if not _dal_ready:
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
    self.n_iter_ = None
    res = _daal4py_fit(self, X, y)
    if res is None:
        logging.info("sklearn.linear_model.Ridge.fit: " +
                     get_patch_message("sklearn_after_daal"))
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
    return res
Ejemplo n.º 8
0
def _fit(self, X, y, sample_weight=None, check_input=True):
    if sklearn_check_version('1.0'):
        self._check_feature_names(X, reset=True)
    # check X and y
    if check_input:
        X, y = check_X_y(
            X,
            y,
            copy=False,
            accept_sparse='csc',
            dtype=[np.float64, np.float32],
            multi_output=True,
            y_numeric=True,
        )
        y = check_array(y, copy=False, dtype=X.dtype.type, ensure_2d=False)

    if not sp.issparse(X):
        self.fit_shape_good_for_daal_ = \
            True if X.ndim <= 1 else True if X.shape[0] >= X.shape[1] else False
    else:
        self.fit_shape_good_for_daal_ = False

    _function_name = f"sklearn.linear_model.{self.__class__.__name__}.fit"
    _patching_status = PatchingConditionsChain(
        _function_name)
    _dal_ready = _patching_status.and_conditions([
        (not sp.issparse(X), "X is sparse. Sparse input is not supported."),
        (self.fit_shape_good_for_daal_,
            "The shape of X does not satisfy oneDAL requirements: "
            "number of features > number of samples."),
        (X.dtype == np.float64 or X.dtype == np.float32,
            f"'{X.dtype}' X data type is not supported. "
            "Only np.float32 and np.float64 are supported."),
        (sample_weight is None, "Sample weights are not supported.")])
    _patching_status.write_log()

    if not _dal_ready:
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        if sklearn_check_version('0.23'):
            res_new = super(ElasticNet, self).fit(
                X, y, sample_weight=sample_weight, check_input=check_input)
        else:
            res_new = super(ElasticNet, self).fit(
                X, y, check_input=check_input)
        self._gap = res_new.dual_gap_
        return res_new
    self.n_iter_ = None
    self._gap = None

    if not check_input:
        # only for compliance with Sklearn,
        # this assert is not required for Intel(R) oneAPI Data
        # Analytics Library
        print(type(X), X.flags['F_CONTIGUOUS'])
        if isinstance(X, np.ndarray) and \
                X.flags['F_CONTIGUOUS'] is False:
            # print(X.flags)
            raise ValueError("ndarray is not Fortran contiguous")

    if sklearn_check_version('1.0'):
        self._normalize = _deprecate_normalize(
            self.normalize,
            default=False,
            estimator_name=self.__class__.__name__)

    # only for pass tests
    # "check_estimators_fit_returns_self(readonly_memmap=True) and
    # check_regressors_train(readonly_memmap=True)
    if not X.flags.writeable:
        X = np.copy(X)
    if not y.flags.writeable:
        y = np.copy(y)

    if self.__class__.__name__ == "ElasticNet":
        res = _daal4py_fit_enet(self, X, y, check_input=check_input)
    else:
        res = _daal4py_fit_lasso(self, X, y, check_input=check_input)
    if res is None:
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        logging.info(
            _function_name + ": " + get_patch_message("sklearn_after_daal")
        )
        if sklearn_check_version('0.23'):
            res_new = super(ElasticNet, self).fit(
                X, y, sample_weight=sample_weight, check_input=check_input)
        else:
            res_new = super(ElasticNet, self).fit(
                X, y, check_input=check_input)
        self._gap = res_new.dual_gap_
        return res_new
    return res
Ejemplo n.º 9
0
    def fit(self, X, y, sample_weight=None, check_input=True):
        """Fit model with coordinate descent.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of (n_samples, n_features)
            Data

        y : {ndarray, sparse matrix} of shape (n_samples,) or \
            (n_samples, n_targets)
            Target. Will be cast to X's dtype if necessary

        sample_weight : float or array-like of shape (n_samples,), default=None
            Sample weight.

        check_input : bool, default=True
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        Notes
        -----

        Coordinate descent is an algorithm that considers each column of
        data at a time hence it will automatically convert the X input
        as a Fortran-contiguous numpy array if necessary.

        To avoid memory re-allocation it is advised to allocate the
        initial data in memory directly using that format.
        """
        # check X and y
        if check_input:
            X, y = check_X_y(X,
                             y,
                             copy=False,
                             accept_sparse='csc',
                             dtype=[np.float64, np.float32],
                             multi_output=True,
                             y_numeric=True)
            y = check_array(y, copy=False, dtype=X.dtype.type, ensure_2d=False)
        else:
            # only for compliance with Sklearn,
            # this assert is not required for Intel(R) oneAPI Data
            # Analytics Library
            if isinstance(X, np.ndarray) and \
                    X.flags['F_CONTIGUOUS'] is False:
                raise ValueError("ndarray is not Fortran contiguous")

        if isinstance(X, np.ndarray):
            self.fit_shape_good_for_daal_ = True if X.ndim <= 1 else True if X.shape[
                0] >= X.shape[1] else False
        else:
            self.fit_shape_good_for_daal_ = False

        if sp.issparse(X) or \
                sample_weight is not None or \
                not self.fit_shape_good_for_daal_ or \
                not (X.dtype == np.float64 or X.dtype == np.float32):
            if hasattr(self, 'daal_model_'):
                del self.daal_model_
            logging.info("sklearn.linear_model.Lasso."
                         "fit: " + get_patch_message("sklearn"))
            res_new = super(ElasticNet, self).fit(X,
                                                  y,
                                                  sample_weight=sample_weight,
                                                  check_input=check_input)
            self._gap = res_new.dual_gap_
            return res_new

        if sklearn_check_version('1.0'):
            self.normalize = _deprecate_normalize(
                self.normalize,
                default=False,
                estimator_name=self.__class__.__name__)

        self.n_iter_ = None
        self._gap = None
        # only for pass tests
        # "check_estimators_fit_returns_self(readonly_memmap=True) and
        # check_regressors_train(readonly_memmap=True)
        if not (X.flags.writeable):
            X = np.copy(X)
        if not (y.flags.writeable):
            y = np.copy(y)
        logging.info("sklearn.linear_model.Lasso."
                     "fit: " + get_patch_message("daal"))
        res = _daal4py_fit_lasso(self, X, y, check_input=check_input)
        if res is None:
            if hasattr(self, 'daal_model_'):
                del self.daal_model_
            logging.info("sklearn.linear_model.Lasso."
                         "fit: " + get_patch_message("sklearn_after_daal"))
            res_new = super(ElasticNet, self).fit(X,
                                                  y,
                                                  sample_weight=sample_weight,
                                                  check_input=check_input)
            self._gap = res_new.dual_gap_
            return res_new
        return res
Ejemplo n.º 10
0
def _fit(self, X, y, sample_weight=None, check_input=True):
    # check X and y
    if check_input:
        X, y = check_X_y(
            X,
            y,
            copy=False,
            accept_sparse='csc',
            dtype=[np.float64, np.float32],
            multi_output=True,
            y_numeric=True,
        )
        y = check_array(y, copy=False, dtype=X.dtype.type, ensure_2d=False)

    if not sp.issparse(X):
        self.fit_shape_good_for_daal_ = \
            True if X.ndim <= 1 else True if X.shape[0] >= X.shape[1] else False
    else:
        self.fit_shape_good_for_daal_ = False

    log_str = "sklearn.linear_model." + self.__class__.__name__ + ".fit: "
    sklearn_ready = sp.issparse(X) or not self.fit_shape_good_for_daal_ or \
        X.dtype not in [np.float64, np.float32] or sample_weight is not None

    if sklearn_ready:
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        logging.info(
            log_str + get_patch_message("sklearn")
        )
        if sklearn_check_version('0.23'):
            res_new = super(ElasticNet, self).fit(
                X, y, sample_weight=sample_weight, check_input=check_input)
        else:
            res_new = super(ElasticNet, self).fit(
                X, y, check_input=check_input)
        self._gap = res_new.dual_gap_
        return res_new
    self.n_iter_ = None
    self._gap = None

    if not check_input:
        # only for compliance with Sklearn,
        # this assert is not required for Intel(R) oneAPI Data
        # Analytics Library
        print(type(X), X.flags['F_CONTIGUOUS'])
        if isinstance(X, np.ndarray) and \
                X.flags['F_CONTIGUOUS'] is False:
            # print(X.flags)
            raise ValueError("ndarray is not Fortran contiguous")

    if sklearn_check_version('1.0'):
        self._normalize = _deprecate_normalize(
            self.normalize,
            default=False,
            estimator_name=self.__class__.__name__)

    # only for pass tests
    # "check_estimators_fit_returns_self(readonly_memmap=True) and
    # check_regressors_train(readonly_memmap=True)
    if not X.flags.writeable:
        X = np.copy(X)
    if not y.flags.writeable:
        y = np.copy(y)
    logging.info(log_str + get_patch_message("daal"))

    if self.__class__.__name__ == "ElasticNet":
        res = _daal4py_fit_enet(self, X, y, check_input=check_input)
    else:
        res = _daal4py_fit_lasso(self, X, y, check_input=check_input)
    if res is None:
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        logging.info(
            log_str + get_patch_message("sklearn_after_daal")
        )
        if sklearn_check_version('0.23'):
            res_new = super(ElasticNet, self).fit(
                X, y, sample_weight=sample_weight, check_input=check_input)
        else:
            res_new = super(ElasticNet, self).fit(
                X, y, check_input=check_input)
        self._gap = res_new.dual_gap_
        return res_new
    return res