def fit(self, X, y, sample_weight=None): X, y = check_X_y(X, y, y_numeric=True, multi_output=True) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, return_mean=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) self.is_fitted_ = True coef, alpha = fracridge(X, y, fracs=self.fracs) self.alpha_ = alpha self.coef_ = coef self._set_intercept(X_offset, y_offset, X_scale) return self
def _validate_input(self, X, y, sample_weight=None): """ Helper function to validate the inputs """ X, y = check_X_y(X, y, y_numeric=True, multi_output=True) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, check_input=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. outs = _rescale_data(X, y, sample_weight) X, y = outs[0], outs[1] return X, y, X_offset, y_offset, X_scale
def nonnegative_regression(X, y, sample_weight=None): r"""Solve the nonnegative least squares estimate regression problem. Solves :math:`\underset{x}{\text{argmin}} \| Ax - b \|_2^2` subject to :math:`x \geq 0` using `scipy.optimize.nnls <https://docs.scipy.org/doc/scipy/reference/ generated/scipy.optimize.nnls.html>`_ Parameters ---------- X : array, shape = (n_samples, n_features) Training data. y : array, shape = (n_samples,) or (n_samples, n_targets) Target values. sample_weight : float or array-like, shape (n_samples,), optional (default = None) Individual weights for each sample. Returns ------- coef : array, shape = (n_features,) or (n_samples, n_features) Weight vector(s). res : float The residual, :math:`\| Ax - y \|_2`. """ # TODO accept_sparse=['csr', 'csc', 'coo']? check sopt.nnls # TODO order='F'? X = check_array(X) y = check_array(y, ensure_2d=False) check_consistent_length(X, y) n_samples, n_features = X.shape ravel = False if y.ndim == 1: y = y.reshape(-1, 1) ravel = True n_samples_, n_targets = y.shape if n_samples != n_samples_: raise ValueError("Number of samples in X and y does not correspond:" " %d != %d" % (n_samples, n_samples_)) has_sw = sample_weight is not None if has_sw: if np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y = _rescale_data(X, y, sample_weight) coef, res = _solve_nnls(X, y) if ravel: # When y was passed as 1d-array, we flatten the coefficients coef = coef.ravel() return coef, res
def test_rescale_data(): n_samples = 200 n_features = 2 sample_weight = 1.0 + rng.rand(n_samples) X = rng.rand(n_samples, n_features) y = rng.rand(n_samples) rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight) rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis] rescaled_y2 = y * np.sqrt(sample_weight) assert_array_almost_equal(rescaled_X, rescaled_X2) assert_array_almost_equal(rescaled_y, rescaled_y2)
def test_rescale_data_dense(n_targets): n_samples = 200 n_features = 2 sample_weight = 1.0 + rng.rand(n_samples) X = rng.rand(n_samples, n_features) if n_targets is None: y = rng.rand(n_samples) else: y = rng.rand(n_samples, n_targets) rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight) rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis] if n_targets is None: rescaled_y2 = y * np.sqrt(sample_weight) else: rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis] assert_array_almost_equal(rescaled_X, rescaled_X2) assert_array_almost_equal(rescaled_y, rescaled_y2)
def test_rescale_data_dense(n_targets, global_random_seed): rng = np.random.RandomState(global_random_seed) n_samples = 200 n_features = 2 sample_weight = 1.0 + rng.rand(n_samples) X = rng.rand(n_samples, n_features) if n_targets is None: y = rng.rand(n_samples) else: y = rng.rand(n_samples, n_targets) rescaled_X, rescaled_y, sqrt_sw = _rescale_data(X, y, sample_weight) rescaled_X2 = X * sqrt_sw[:, np.newaxis] if n_targets is None: rescaled_y2 = y * sqrt_sw else: rescaled_y2 = y * sqrt_sw[:, np.newaxis] assert_array_almost_equal(rescaled_X, rescaled_X2) assert_array_almost_equal(rescaled_y, rescaled_y2)
def fit(self, X, y, sample_weight=None): """ Fit linear model. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples,n_features] Training data y : numpy array of shape [n_samples, n_targets] Target values sample_weight : numpy array of shape [n_samples] Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression. Returns ------- self : returns an instance of self. """ n_jobs_ = self.n_jobs X, y = _daal_check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) dtype = get_dtype(X) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=dtype) self.sample_weight_ = sample_weight self.fit_shape_good_for_daal_ = bool(X.shape[0] > X.shape[1] + int(self.fit_intercept)) if (self.fit_shape_good_for_daal_ and not sp.issparse(X) and (dtype == np.float64 or dtype == np.float32) and sample_weight is None): logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_daal) res = _daal4py_fit(self, X, y) if res is not None: return res logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_sklearn_arter_daal) else: logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_sklearn) if sample_weight is not None: sample_weight = np.asarray(sample_weight) if np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, return_mean=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) if sp.issparse(X): X_offset_scale = X_offset / X_scale def matvec(b): return X.dot(b) - b.dot(X_offset_scale) def rmatvec(b): return X.T.dot(b) - X_offset_scale * np.sum(b) X_centered = sp.linalg.LinearOperator(shape=X.shape, matvec=matvec, rmatvec=rmatvec) if y.ndim < 2: out = sparse_lsqr(X_centered, y) self.coef_ = out[0] self._residues = out[3] else: # sparse_lstsq cannot handle y with shape (M, K) outs = Parallel(n_jobs=n_jobs_)( delayed(sparse_lsqr)(X_centered, y[:, j].ravel()) for j in range(y.shape[1])) self.coef_ = np.vstack([out[0] for out in outs]) self._residues = np.vstack([out[3] for out in outs]) else: self.coef_, self._residues, self.rank_, self.singular_ = \ linalg.lstsq(X, y) self.coef_ = self.coef_.T if y.ndim == 1: self.coef_ = np.ravel(self.coef_) self._set_intercept(X_offset, y_offset, X_scale) return self
def nonnegative_ridge_regression(X, y, alpha, sample_weight=None, solver='SLSQP', **solver_kwargs): r"""Solve the nonnegative least squares estimate ridge regression problem. Solves .. math:: \underset{x}{\text{argmin}} \| Ax - b \|_2^2 + \alpha^2 \| x \|_2^2 \quad \text{s.t.} \quad x \geq 0 We can write this as the quadratic programming (QP) problem: .. math:: \underset{x}{\text{argmin}} x^TQx - c^Tx \quad \text{s.t.} \quad x \geq 0 where .. math:: Q = A^TA + \alpha I \quad \text{and} \quad c = -2A^Ty Parameters ---------- X : array, shape = (n_samples, n_features) Training data. y : array, shape = (n_samples,) or (n_samples, n_targets) Target values. alpha : float or array with shape = (n_features,) Regularization strength; must be a positive float. Improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. sample_weight : float or array-like, shape (n_samples,), optional (default = None) Individual weights for each sample. solver : string, optional (default = 'SLSQP') Solver with which to solve the QP. Must be one that supports bounds (i.e. 'L-BFGS-B', 'TNC', 'SLSQP'). **solver_kwargs See `scipy.optimize.minimize <https://docs.scipy.org/doc/scipy/ reference/generated/scipy.optimize.minimize.html>`_ for valid keyword arguments Returns ------- coef : array, shape = (n_features,) or (n_features, n_targets) Weight vector(s). res : float The residual, :math:`\| Qx - c \|_2` Notes ----- - This is an experimental function. - If one wishes to perform Lasso or Elastic-Net regression, see `sklearn.linear_model.lasso_path <http://scikit-learn.org/stable/modules/ generated/sklearn.linear_model.lasso_path.html>`_ or `sklearn.linear_model.enet_path <http://scikit-learn.org/stable/ modules/generated/sklearn.linear_model.enet_path.html>`_, and pass the parameters `fit_intercept=False, positive=True` See Also -------- nonnegative_regression """ if solver not in ('L-BFGS-B', 'TNC', 'SLSQP'): raise ValueError('solver must be one of L-BFGS-B, TNC, SLSQP, ' 'not %s' % solver) # TODO accept_sparse=['csr', 'csc', 'coo']? check sopt.nnls # TODO order='F'? X = check_array(X) y = check_array(y, ensure_2d=False) check_consistent_length(X, y) n_samples, n_features = X.shape ravel = False if y.ndim == 1: y = y.reshape(-1, 1) ravel = True n_samples_, n_targets = y.shape if n_samples != n_samples_: raise ValueError("Number of samples in X and y does not correspond:" " %d != %d" % (n_samples, n_samples_)) has_sw = sample_weight is not None if has_sw: if np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y = _rescale_data(X, y, sample_weight) # there should be either 1 or n_targets penalties alpha = np.asarray(alpha, dtype=X.dtype).ravel() if alpha.size not in [1, n_features]: raise ValueError("Number of targets and number of L2 penalties " "do not correspond: %d != %d" % (alpha.size, n_features)) # NOTE: different from sklearn.linear_model.ridge if alpha.size == 1 and n_features > 1: alpha = np.repeat(alpha, n_features) coef, res = _solve_ridge_nnls(X, y, alpha, solver, **solver_kwargs) if ravel: # When y was passed as 1d-array, we flatten the coefficients coef = coef.ravel() return coef, res