Beispiel #1
0
    def inverse_transform(self, x):
        """
        Scale back the data to the original representation.

        Parameters
        ----------
        x: DataFrame, Series, ndarray, list
            The data used to scale along the features axis.

        Returns
        -------
        DataFrame
            Inverse transformed data.
        """
        x = self._check_type(x)
        xs = []
        for col, shift, lmd in zip(x.T, self._shift, self._lmd):
            for case in Switch(lmd):
                if case(np.nan, np.inf):
                    _x = col
                    break
                if case():
                    _x = inv_boxcox(col, lmd) - shift
            xs.append(_x.reshape(-1, 1))
        xs = np.concatenate(xs, axis=1)
        if len(self._shape) == 1:
            return xs.ravel()
        return xs
Beispiel #2
0
 def _handle_err(self, e):
     for c in Switch(self._on_err):
         if c(None):
             self._lmd.append(np.inf)
             break
         if c('log'):
             self._lmd.append(0.)
             break
         if c('nan'):
             self._lmd.append(np.nan)
             break
         if c('raise'):
             raise e
         if c():
             raise RuntimeError(
                 'parameter on_err must be None "log", "nan" or "raise"')
Beispiel #3
0
    def transform(self, x):
        """

        Parameters
        ----------
        x

        Returns
        -------
        DataFrame
            Box-Cox transformed data.
        """
        x = self._check_type(x)
        xs = []
        for i, col in enumerate(x.T):
            if np.all(col > 0):
                self._shift[i] = 0.
            else:
                self._shift[i] -= col[~np.isnan(col)].min()

            _lmd = self._lmd[i]
            _shift = self._shift[i]
            for case in Switch(_lmd):
                if case(np.inf):
                    x = col
                    break
                if case(np.nan):
                    x = np.full(col.shape, np.nan)
                    break
                if case():
                    x = boxcox(col + _shift, _lmd)
            xs.append(x.reshape(-1, 1))
        xs = np.concatenate(xs, axis=1)

        if len(self._shape) == 1:
            return xs.ravel()
        return xs.reshape(-1, self._shape[1])
Beispiel #4
0
    def transform(self, entries: Sequence, *, return_type=None, **kwargs):
        """
        Featurize a list of entries.
        If `featurize` takes multiple inputs, supply inputs as a list of tuples.
        
        Args
        ----
        entries: list-like
            A list of entries to be featurized.
        return_type: str
            Specific the return type.
            Can be ``any``, ``array`` and ``df``.
            ``array`` and ``df`` force return type to ``np.ndarray`` and ``pd.DataFrame`` respectively.
            If ``any``, the return type depend on the input type.
            This is a temporary change that only have effect in the current transform.
            Default is ``None`` for no changes.

        Returns
        -------
            DataFrame
                features for each entry.
        """
        self._kwargs = kwargs

        # Check inputs
        if not isinstance(entries, Iterable):
            raise TypeError('parameter "entries" must be a iterable object')

        # Special case: Empty list
        if len(entries) is 0:
            return []

        for c in Switch(self._n_jobs):
            if c(0):
                # Run the actual featurization
                ret = self.featurize(entries, **kwargs)
                break
            if c(1):
                ret = [self._wrapper(x) for x in entries]
                break
            if c():
                ret = Parallel(n_jobs=self._n_jobs,
                               verbose=self._parallel_verbose)(delayed(self._wrapper)(x) for x in entries)

        try:
            labels = self.feature_labels
        except NotImplementedError:
            labels = None

        if return_type is None:
            return_type = self.return_type
        if return_type == 'any':
            if isinstance(entries, (pd.Series, pd.DataFrame)):
                tmp = pd.DataFrame(ret, index=entries.index, columns=labels)
                return tmp
            if isinstance(entries, np.ndarray):
                return np.array(ret)
            return ret

        if return_type == 'array':
            return np.array(ret)

        if return_type == 'df':
            if isinstance(entries, (pd.Series, pd.DataFrame)):
                return pd.DataFrame(ret, index=entries.index, columns=labels)
            return pd.DataFrame(ret, columns=labels)
Beispiel #5
0
    def transform(self, entries: Sequence, *, return_type=None, target_col=None, **kwargs):
        """
        Featurize a list of entries.
        If `featurize` takes multiple inputs, supply inputs as a list of tuples,
        or use pd.DataFrame with parameter ``target_col`` to specify the column name(s).
        
        Args
        ----
        entries: list-like or pd.DataFrame
            A list of entries to be featurized or pd.DataFrame with one specified column.
            See detail of target_col if entries is pd.DataFrame.
            Also, make sure n_jobs=0 for pd.DataFrame.
        return_type: str
            Specify the return type.
            Can be ``any``, ``custom``, ``array`` or ``df``.
            ``array`` or ``df`` forces return type to ``np.ndarray`` or ``pd.DataFrame``, respectively.
            If ``any``, the return type follow prefixed rules:
            (1) if input type is pd.Series or pd.DataFrame, returns pd.DataFrame;
            (2) else if input type is np.array, returns np.array;
            (3) else if other input type and n_jobs=0, follows the featurize function return;
            (4) otherwise, return a list of objects (output of featurize function).
            If ``custom``, the return type depends on the featurize function if n_jobs=0,
            or the return type is a list of objects (output of featurize function) for other n_jobs values.
            This is a one-time change that only have effect in the current transformation.
            Default is ``None`` for using the setting at initialization step.
        target_col
            Only relevant when input is pd.DataFrame, otherwise ignored.
            Specify a single column to be used for transformation.
            Default is ``None`` for using the setting at initialization step.
            (see __init__ for more information)

        Returns
        -------
            DataFrame
                features for each entry.
        """
        self._kwargs = kwargs

        # Check inputs
        if not isinstance(entries, Iterable):
            raise TypeError('parameter "entries" must be a iterable object')

        # Extract relevant columns for pd.DataFrame input
        if isinstance(entries, pd.DataFrame):
            if target_col is None:
                target_col = self.target_col
                if target_col is None:
                    target_col = entries.columns.values
            entries = entries[target_col]

        # Special case: Empty list
        if len(entries) is 0:
            return []

        # Check outputs
        if return_type not in {None, 'any', 'array', 'df', 'custom'}:
            raise ValueError('`return_type` must be None, `any`, `custom`, `array` or `df`')

        for c in Switch(self._n_jobs):
            if c(0):
                # Run the actual featurization
                ret = self.featurize(entries, **kwargs)
                break
            if isinstance(entries, pd.DataFrame):
                raise RuntimeError(
                    "Auto-parallel can not be used when`entries` is `pandas.DataFrame`. "
                    "Please set `n_jobs` to 0 and implements your algorithm in the `featurize` method"
                )
            if c(1):
                ret = [self._wrapper(x) for x in entries]
                break
            if c():
                ret = Parallel(n_jobs=self._n_jobs, verbose=self._parallel_verbose)(
                    delayed(self._wrapper)(x) for x in entries)

        try:
            labels = self.feature_labels
        except NotImplementedError:
            labels = None

        if return_type is None:
            return_type = self.return_type

        if return_type == 'any':
            if isinstance(entries, (pd.Series, pd.DataFrame)):
                tmp = pd.DataFrame(ret, index=entries.index, columns=labels)
                return tmp
            if isinstance(entries, np.ndarray):
                return np.array(ret)
            return ret

        if return_type == 'array':
            return np.array(ret)

        if return_type == 'df':
            if isinstance(entries, (pd.Series, pd.DataFrame)):
                return pd.DataFrame(ret, index=entries.index, columns=labels)
            return pd.DataFrame(ret, columns=labels)

        if return_type == 'custom':
            return ret