def fit(self, X, y, sample_weight=None): """ Fit linear model. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples,n_features] Training data y : numpy array of shape [n_samples, n_targets] Target values. Will be cast to X's dtype if necessary sample_weight : numpy array of shape [n_samples] Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression. Returns ------- self : returns an instance of self. """ n_jobs_ = self.n_jobs X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) if sp.issparse(X): if y.ndim < 2: out = sparse_lsqr(X, y) self.coef_ = out[0] self._residues = out[3] else: # sparse_lstsq cannot handle y with shape (M, K) outs = Parallel(n_jobs=n_jobs_)( delayed(sparse_lsqr)(X, y[:, j].ravel()) for j in range(y.shape[1])) self.coef_ = np.vstack(out[0] for out in outs) self._residues = np.vstack(out[3] for out in outs) else: self.coef_, self._residues, self.rank_, self.singular_ = \ linalg.lstsq(X, y) self.coef_ = self.coef_.T if y.ndim == 1: self.coef_ = np.ravel(self.coef_) self._set_intercept(X_offset, y_offset, X_scale) return self
def fit(self, X, y, sample_weight=None): """ Fit linear model. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples,n_features] Training data y : numpy array of shape [n_samples, n_targets] Target values sample_weight : numpy array of shape [n_samples] Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression. Returns ------- self : returns an instance of self. """ n_jobs_ = self.n_jobs X, y = _daal_check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) dtype = get_dtype(X) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=dtype) self.sample_weight_ = sample_weight self.fit_shape_good_for_daal_ = bool(X.shape[0] > X.shape[1] + int(self.fit_intercept)) if (self.fit_shape_good_for_daal_ and not sp.issparse(X) and (dtype == np.float64 or dtype == np.float32) and sample_weight is None): logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_daal) res = _daal4py_fit(self, X, y) if res is not None: return res logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_sklearn_arter_daal) else: logging.info("sklearn.linar_model.LinearRegression.fit: " + method_uses_sklearn) if sample_weight is not None: sample_weight = np.asarray(sample_weight) if np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, return_mean=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) if sp.issparse(X): X_offset_scale = X_offset / X_scale def matvec(b): return X.dot(b) - b.dot(X_offset_scale) def rmatvec(b): return X.T.dot(b) - X_offset_scale * np.sum(b) X_centered = sp.linalg.LinearOperator(shape=X.shape, matvec=matvec, rmatvec=rmatvec) if y.ndim < 2: out = sparse_lsqr(X_centered, y) self.coef_ = out[0] self._residues = out[3] else: # sparse_lstsq cannot handle y with shape (M, K) outs = Parallel(n_jobs=n_jobs_)( delayed(sparse_lsqr)(X_centered, y[:, j].ravel()) for j in range(y.shape[1])) self.coef_ = np.vstack([out[0] for out in outs]) self._residues = np.vstack([out[3] for out in outs]) else: self.coef_, self._residues, self.rank_, self.singular_ = \ linalg.lstsq(X, y) self.coef_ = self.coef_.T if y.ndim == 1: self.coef_ = np.ravel(self.coef_) self._set_intercept(X_offset, y_offset, X_scale) return self
def fit(self, X, y, sample_weight=None): """ Fit linear model. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training data y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. Will be cast to X's dtype if necessary sample_weight : array-like of shape (n_samples,), default=None Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression. Returns ------- self : returns an instance of self. """ n_jobs_ = self.n_jobs X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, return_mean=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) if sp.issparse(X): X_offset_scale = X_offset / X_scale def matvec(b): return X.dot(b) - b.dot(X_offset_scale) def rmatvec(b): return X.T.dot(b) - X_offset_scale * np.sum(b) # REVIEW check this out, it had sparse rather than sp before X_centered = sp.linalg.LinearOperator(shape=X.shape, matvec=matvec, rmatvec=rmatvec) if y.ndim < 2: out = sparse_lsqr(X_centered, y) self.coef_ = out[0] self._residues = out[3] else: # sparse_lstsq cannot handle y with shape (M, K) outs = Parallel(n_jobs=n_jobs_)( delayed(sparse_lsqr)(X_centered, y[:, j].ravel()) for j in range(y.shape[1])) self.coef_ = np.vstack([out[0] for out in outs]) self._residues = np.vstack([out[3] for out in outs]) else: self.coef_, self._residues, self.rank_, self.singular_ = \ linalg.lstsq(X, y) self.coef_ = self.coef_.T if y.ndim == 1: self.coef_ = np.ravel(self.coef_) self._set_intercept(X_offset, y_offset, X_scale) return self