예제 #1
0
    def transform(self, X):
        """Transform X separately by each transformer, merge results
        into a dictionary.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            Input data to be transformed.

        Returns
        -------
        Xt : dict
            Dictionary with the step names as keys and transformed
            data as values.

        """
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, weight, X)
            for name, trans, weight in self._iter())

        if not Xs:
            # All transformers are None
            return {}

        Xt = self._update_transformed_dict(Xs)
        return Xt
예제 #2
0
    def fit_transform(self, X, y=None, **fit_params):
        """Fit all transformers using X, transform the data and
        concatenate results.

        Parameters
        ----------
        X : array-like, sparse matrix or dataframe,
            shape (n_samples, n_features)
            Input data to be transformed.

        Returns
        -------
        X_t : array-like, sparse matrix or dataframe,
            shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.

        """
        self._validate_transformers()
        result = Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_transform_one)(trans, weight, X, y, **fit_params)
            for _, trans, weight in self._iter())

        if not result:
            # All transformers are None
            return np.zeros((X.shape[0], 0))

        Xs, transformers = zip(*result)
        if self.keep_original:
            Xs = list(itertools.chain([X], Xs))
        self._update_transformer_list(transformers)

        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        elif all(isinstance(f, (pd.DataFrame, pd.Series)) for f in Xs):
            if self.ignore_index:
                Xs = [f.reset_index(drop=True) for f in Xs]
            Xs = pd.concat(Xs, axis=1, copy=self.copy)
        else:
            Xs = np.hstack(Xs)
        return Xs
예제 #3
0
    def transform(self, X):
        """Transform X separately by each transformer, concatenate
        results.

        Parameters
        ----------
        X : array-like, sparse matrix or dataframe,
            shape (n_samples, n_features)
            Input data to be transformed.

        Returns
        -------
        X_t : array-like, sparse matrix or dataframe,
            shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.

        """
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, X, None, weight)
            for _, trans, weight in self._iter())

        if not Xs:
            # All transformers are None
            return np.zeros((X.shape[0], 0))

        if self.keep_original:
            Xs = list(itertools.chain([X], Xs))

        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        elif all(isinstance(f, (pd.DataFrame, pd.Series)) for f in Xs):
            if self.ignore_index:
                Xs = [f.reset_index(drop=True) for f in Xs]
            Xs = pd.concat(Xs, axis=1, copy=self.copy)
        else:
            Xs = np.hstack(Xs)

        return Xs
예제 #4
0
    def fit_transform(self, X, y=None, **fit_params):
        """Fit all transformers using X, transform the data and
        merge results into a dictionary.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            Input data to be transformed.

        y : iterable, default=None
            Training targets.

        **fit_params : dict, optional
            Parameters to pass to the fit method.

        Returns
        -------
        Xt : dict
            Dictionary with the step names as keys and transformed
            data as values.

        """
        self._validate_transformers()
        result = Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_transform_one)(trans, weight, X, y, **fit_params)
            for _, trans, weight in self._iter())

        if not result:
            # All transformers are None
            return {}

        Xs, transformers = zip(*result)
        self._update_transformer_list(transformers)

        Xt = self._update_transformed_dict(Xs)
        return Xt