def transform(self, X): """Transform X separately by each transformer, merge results into a dictionary. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features) Input data to be transformed. Returns ------- Xt : dict Dictionary with the step names as keys and transformed data as values. """ Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, weight, X) for name, trans, weight in self._iter()) if not Xs: # All transformers are None return {} Xt = self._update_transformed_dict(Xs) return Xt
def fit_transform(self, X, y=None, **fit_params): """Fit all transformers using X, transform the data and concatenate results. Parameters ---------- X : array-like, sparse matrix or dataframe, shape (n_samples, n_features) Input data to be transformed. Returns ------- X_t : array-like, sparse matrix or dataframe, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter()) if not result: # All transformers are None return np.zeros((X.shape[0], 0)) Xs, transformers = zip(*result) if self.keep_original: Xs = list(itertools.chain([X], Xs)) self._update_transformer_list(transformers) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() elif all(isinstance(f, (pd.DataFrame, pd.Series)) for f in Xs): if self.ignore_index: Xs = [f.reset_index(drop=True) for f in Xs] Xs = pd.concat(Xs, axis=1, copy=self.copy) else: Xs = np.hstack(Xs) return Xs
def transform(self, X): """Transform X separately by each transformer, concatenate results. Parameters ---------- X : array-like, sparse matrix or dataframe, shape (n_samples, n_features) Input data to be transformed. Returns ------- X_t : array-like, sparse matrix or dataframe, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, X, None, weight) for _, trans, weight in self._iter()) if not Xs: # All transformers are None return np.zeros((X.shape[0], 0)) if self.keep_original: Xs = list(itertools.chain([X], Xs)) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() elif all(isinstance(f, (pd.DataFrame, pd.Series)) for f in Xs): if self.ignore_index: Xs = [f.reset_index(drop=True) for f in Xs] Xs = pd.concat(Xs, axis=1, copy=self.copy) else: Xs = np.hstack(Xs) return Xs
def fit_transform(self, X, y=None, **fit_params): """Fit all transformers using X, transform the data and merge results into a dictionary. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features) Input data to be transformed. y : iterable, default=None Training targets. **fit_params : dict, optional Parameters to pass to the fit method. Returns ------- Xt : dict Dictionary with the step names as keys and transformed data as values. """ self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter()) if not result: # All transformers are None return {} Xs, transformers = zip(*result) self._update_transformer_list(transformers) Xt = self._update_transformed_dict(Xs) return Xt