Example #1
0
    def fit(self, X, y, sample_weight=None):
        X, y = check_X_y(X, y, y_numeric=True, multi_output=True)

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight,
                                                 X,
                                                 dtype=X.dtype)

        X, y, X_offset, y_offset, X_scale = _preprocess_data(
            X,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=sample_weight,
            return_mean=True)

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)

        self.is_fitted_ = True
        coef, alpha = fracridge(X, y, fracs=self.fracs)
        self.alpha_ = alpha
        self.coef_ = coef
        self._set_intercept(X_offset, y_offset, X_scale)
        return self
Example #2
0
    def _validate_input(self, X, y, sample_weight=None):
        """
        Helper function to validate the inputs
        """
        X, y = check_X_y(X, y, y_numeric=True, multi_output=True)

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight,
                                                 X,
                                                 dtype=X.dtype)

        X, y, X_offset, y_offset, X_scale = _preprocess_data(
            X,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=sample_weight,
            check_input=True)

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            outs = _rescale_data(X, y, sample_weight)
            X, y = outs[0], outs[1]

        return X, y, X_offset, y_offset, X_scale
Example #3
0
def nonnegative_regression(X, y, sample_weight=None):
    r"""Solve the nonnegative least squares estimate regression problem.

    Solves :math:`\underset{x}{\text{argmin}} \| Ax - b \|_2^2` subject to :math:`x \geq 0`
    using `scipy.optimize.nnls <https://docs.scipy.org/doc/scipy/reference/
    generated/scipy.optimize.nnls.html>`_

    Parameters
    ----------
    X : array, shape = (n_samples, n_features)
        Training data.

    y : array, shape = (n_samples,) or (n_samples, n_targets)
        Target values.

    sample_weight : float or array-like, shape (n_samples,), optional (default = None)
        Individual weights for each sample.

    Returns
    -------
    coef : array, shape = (n_features,) or (n_samples, n_features)
        Weight vector(s).

    res : float
        The residual, :math:`\| Ax - y \|_2`.
    """
    # TODO accept_sparse=['csr', 'csc', 'coo']? check sopt.nnls
    # TODO order='F'?
    X = check_array(X)
    y = check_array(y, ensure_2d=False)
    check_consistent_length(X, y)

    n_samples, n_features = X.shape

    ravel = False
    if y.ndim == 1:
        y = y.reshape(-1, 1)
        ravel = True

    n_samples_, n_targets = y.shape

    if n_samples != n_samples_:
        raise ValueError("Number of samples in X and y does not correspond:"
                         " %d != %d" % (n_samples, n_samples_))

    has_sw = sample_weight is not None

    if has_sw:
        if np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")

        X, y = _rescale_data(X, y, sample_weight)

    coef, res = _solve_nnls(X, y)

    if ravel:
        # When y was passed as 1d-array, we flatten the coefficients
        coef = coef.ravel()

    return coef, res
Example #4
0
def test_rescale_data():
    n_samples = 200
    n_features = 2

    sample_weight = 1.0 + rng.rand(n_samples)
    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples)
    rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
    rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
    rescaled_y2 = y * np.sqrt(sample_weight)
    assert_array_almost_equal(rescaled_X, rescaled_X2)
    assert_array_almost_equal(rescaled_y, rescaled_y2)
Example #5
0
def test_rescale_data_dense(n_targets):
    n_samples = 200
    n_features = 2

    sample_weight = 1.0 + rng.rand(n_samples)
    X = rng.rand(n_samples, n_features)
    if n_targets is None:
        y = rng.rand(n_samples)
    else:
        y = rng.rand(n_samples, n_targets)
    rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
    rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
    if n_targets is None:
        rescaled_y2 = y * np.sqrt(sample_weight)
    else:
        rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
    assert_array_almost_equal(rescaled_X, rescaled_X2)
    assert_array_almost_equal(rescaled_y, rescaled_y2)
Example #6
0
def test_rescale_data_dense(n_targets, global_random_seed):
    rng = np.random.RandomState(global_random_seed)
    n_samples = 200
    n_features = 2

    sample_weight = 1.0 + rng.rand(n_samples)
    X = rng.rand(n_samples, n_features)
    if n_targets is None:
        y = rng.rand(n_samples)
    else:
        y = rng.rand(n_samples, n_targets)
    rescaled_X, rescaled_y, sqrt_sw = _rescale_data(X, y, sample_weight)
    rescaled_X2 = X * sqrt_sw[:, np.newaxis]
    if n_targets is None:
        rescaled_y2 = y * sqrt_sw
    else:
        rescaled_y2 = y * sqrt_sw[:, np.newaxis]
    assert_array_almost_equal(rescaled_X, rescaled_X2)
    assert_array_almost_equal(rescaled_y, rescaled_y2)
Example #7
0
def fit(self, X, y, sample_weight=None):
    """
    Fit linear model.

    Parameters
    ----------
    X : numpy array or sparse matrix of shape [n_samples,n_features]
        Training data

    y : numpy array of shape [n_samples, n_targets]
        Target values

    sample_weight : numpy array of shape [n_samples]
        Individual weights for each sample

        .. versionadded:: 0.17
           parameter *sample_weight* support to LinearRegression.

    Returns
    -------
    self : returns an instance of self.
    """

    n_jobs_ = self.n_jobs
    X, y = _daal_check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                           y_numeric=True, multi_output=True)

    dtype = get_dtype(X)

    if sample_weight is not None:
        sample_weight = _check_sample_weight(sample_weight, X,
                                             dtype=dtype)

    self.sample_weight_ = sample_weight
    self.fit_shape_good_for_daal_ = bool(X.shape[0] > X.shape[1] + int(self.fit_intercept))

    if (self.fit_shape_good_for_daal_ and
            not sp.issparse(X) and
            (dtype == np.float64 or dtype == np.float32) and
            sample_weight is None):
        logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_daal)
        res = _daal4py_fit(self, X, y)
        if res is not None:
            return res
        logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_sklearn_arter_daal)
    else:
        logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_sklearn)

    if sample_weight is not None:
        sample_weight = np.asarray(sample_weight)
        if np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")

    X, y, X_offset, y_offset, X_scale = self._preprocess_data(
        X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
        copy=self.copy_X, sample_weight=sample_weight, return_mean=True)

    if sample_weight is not None:
        # Sample weight can be implemented via a simple rescaling.
        X, y = _rescale_data(X, y, sample_weight)

    if sp.issparse(X):
        X_offset_scale = X_offset / X_scale

        def matvec(b):
            return X.dot(b) - b.dot(X_offset_scale)

        def rmatvec(b):
            return X.T.dot(b) - X_offset_scale * np.sum(b)

        X_centered = sp.linalg.LinearOperator(shape=X.shape,
                                              matvec=matvec,
                                              rmatvec=rmatvec)

        if y.ndim < 2:
            out = sparse_lsqr(X_centered, y)
            self.coef_ = out[0]
            self._residues = out[3]
        else:
            # sparse_lstsq cannot handle y with shape (M, K)
            outs = Parallel(n_jobs=n_jobs_)(
                delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
                for j in range(y.shape[1]))
            self.coef_ = np.vstack([out[0] for out in outs])
            self._residues = np.vstack([out[3] for out in outs])
    else:
        self.coef_, self._residues, self.rank_, self.singular_ = \
            linalg.lstsq(X, y)
        self.coef_ = self.coef_.T

    if y.ndim == 1:
        self.coef_ = np.ravel(self.coef_)
    self._set_intercept(X_offset, y_offset, X_scale)
    return self
def nonnegative_ridge_regression(X, y, alpha, sample_weight=None,
                                 solver='SLSQP', **solver_kwargs):
    r"""Solve the nonnegative least squares estimate ridge regression problem.

    Solves

    .. math::
        \underset{x}{\text{argmin}} \| Ax - b \|_2^2 + \alpha^2 \| x \|_2^2
        \quad \text{s.t.} \quad x \geq 0

    We can write this as the quadratic programming (QP) problem:

    .. math::

        \underset{x}{\text{argmin}} x^TQx - c^Tx \quad \text{s.t.} \quad x \geq 0

    where

    .. math::

        Q = A^TA + \alpha I \quad \text{and} \quad c = -2A^Ty

    Parameters
    ----------
    X : array, shape = (n_samples, n_features)
        Training data.

    y : array, shape = (n_samples,) or (n_samples, n_targets)
        Target values.

    alpha : float or array with shape = (n_features,)
        Regularization strength; must be a positive float. Improves the
        conditioning of the problem and reduces the variance of the estimates.
        Larger values specify stronger regularization.

    sample_weight : float or array-like, shape (n_samples,), optional (default = None)
        Individual weights for each sample.

    solver : string, optional (default = 'SLSQP')
        Solver with which to solve the QP. Must be one that supports bounds
        (i.e. 'L-BFGS-B', 'TNC', 'SLSQP').

    **solver_kwargs
        See `scipy.optimize.minimize <https://docs.scipy.org/doc/scipy/
        reference/generated/scipy.optimize.minimize.html>`_
        for valid keyword arguments

    Returns
    -------
    coef : array, shape = (n_features,) or (n_features, n_targets)
        Weight vector(s).

    res : float
        The residual, :math:`\| Qx - c \|_2`

    Notes
    -----
    - This is an experimental function.
    - If one wishes to perform Lasso or Elastic-Net regression, see
      `sklearn.linear_model.lasso_path <http://scikit-learn.org/stable/modules/
      generated/sklearn.linear_model.lasso_path.html>`_ or
      `sklearn.linear_model.enet_path <http://scikit-learn.org/stable/
      modules/generated/sklearn.linear_model.enet_path.html>`_,
      and pass the parameters `fit_intercept=False, positive=True`


    See Also
    --------
    nonnegative_regression
    """
    if solver not in ('L-BFGS-B', 'TNC', 'SLSQP'):
        raise ValueError('solver must be one of L-BFGS-B, TNC, SLSQP, '
                         'not %s' % solver)

    # TODO accept_sparse=['csr', 'csc', 'coo']? check sopt.nnls
    # TODO order='F'?
    X = check_array(X)
    y = check_array(y, ensure_2d=False)
    check_consistent_length(X, y)

    n_samples, n_features = X.shape

    ravel = False
    if y.ndim == 1:
        y = y.reshape(-1, 1)
        ravel = True

    n_samples_, n_targets = y.shape

    if n_samples != n_samples_:
        raise ValueError("Number of samples in X and y does not correspond:"
                         " %d != %d" % (n_samples, n_samples_))

    has_sw = sample_weight is not None

    if has_sw:
        if np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")

        X, y = _rescale_data(X, y, sample_weight)

    # there should be either 1 or n_targets penalties
    alpha = np.asarray(alpha, dtype=X.dtype).ravel()
    if alpha.size not in [1, n_features]:
        raise ValueError("Number of targets and number of L2 penalties "
                         "do not correspond: %d != %d"
                         % (alpha.size, n_features))

    # NOTE: different from sklearn.linear_model.ridge
    if alpha.size == 1 and n_features > 1:
        alpha = np.repeat(alpha, n_features)

    coef, res = _solve_ridge_nnls(X, y, alpha, solver, **solver_kwargs)

    if ravel:
        # When y was passed as 1d-array, we flatten the coefficients
        coef = coef.ravel()

    return coef, res