Exemple #1
0
    def _pairwise_panel_x_check(self, X, var_name="X"):
        """Check and coerce input data.

        Method used to check the input and convert Series/Panel input
            to internally used format, as defined in X_inner_mtype tag

        Parameters
        ----------
        X: List of dfs, Numpy of dfs, 3d numpy
            sktime data container compliant with the Series or Panel scitype
            The value to be checked
        var_name: str, variable name to print in error messages

        Returns
        -------
        X: Panel data container of a supported format in X_inner_mtype
            usually df-list, list of pd.DataFrame, unless overridden
        """
        check_res = check_is_scitype(X, ["Series", "Panel"],
                                     return_metadata=True,
                                     var_name=var_name)
        X_valid = check_res[0]
        metadata = check_res[2]

        X_scitype = metadata["scitype"]

        if not X_valid:
            msg = (
                "X and X2 must be in an sktime compatible format, "
                "of scitype Series or Panel, "
                "for instance a pandas.DataFrame with sktime compatible time indices, "
                "or with MultiIndex and lowest level a sktime compatible time index. "
                "See the data format tutorial examples/AA_datatypes_and_datasets.ipynb"
            )
            raise TypeError(msg)

        # if the input is a single series, convert it to a Panel
        if X_scitype == "Series":
            X = convert_Series_to_Panel(X)

        # can't be anything else if check_is_scitype is working properly
        elif X_scitype != "Panel":
            raise RuntimeError(
                "Unexpected error in check_is_scitype, check validity")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Panel")

        return X_coerced
Exemple #2
0
    def _pairwise_panel_x_check(self, X, var_name="X"):
        """Check and coerce input data.

        Method used to check the input and convert Series/Panel input
            to internally used format, as defined in X_inner_mtype tag

        Parameters
        ----------
        X: List of dfs, Numpy of dfs, 3d numpy
            The value to be checked
        var_name: str, variable name to print in error messages

        Returns
        -------
        X: Panel data container of a supported format in X_inner_mtype
            usually df-list, list of pd.DataFrame, unless overridden
        """
        check_res = check_is_scitype(X, ["Series", "Panel"],
                                     return_metadata=True,
                                     var_name=var_name)
        X_valid = check_res[0]
        metadata = check_res[2]

        X_scitype = metadata["scitype"]

        if not X_valid:
            raise TypeError("X/X2 must be of Series or Panel scitype")

        # if the input is a single series, convert it to a Panel
        if X_scitype == "Series":
            X = convert_Series_to_Panel(X)

        # can't be anything else if check_is_scitype is working properly
        elif X_scitype != "Panel":
            raise RuntimeError(
                "Unexpected error in check_is_scitype, check validity")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Panel")

        return X_coerced
Exemple #3
0
    def update(self, X, y=None, Z=None, update_params=True):
        """Update transformer with X, optionally y.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Writes to self:
            May update fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0
        update_params : bool, default=True
            whether the model is updated. Yes if true, if false, simply skips call.
            argument exists for compatibility with forecasting module.

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        # skip everything if update_params is False
        if not update_params:
            return self

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None:
                raise ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None"
                )
            X = convert_to(
                X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
            )
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_inner, y_inner = self._convert_X_y(X, y)

        # todo: uncomment this once Z is completely gone
        # self._update(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._update(X_inner, y_inner)
        return self
Exemple #4
0
    def inverse_transform(self, X, y=None, Z=None):
        """Inverse transform X and return an inverse transformed version.

        Currently it is assumed that only transformers with tags
            "scitype:transform-input"="Series", "scitype:transform-output"="Series",
        have an inverse_transform.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be inverse transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        inverse transformed version of X
            of the same type as X, and conforming to mtype format specifications
        """
        if not self.get_tag("capability:inverse_transform"):
            raise NotImplementedError(
                f"{type(self)} does not implement inverse_transform"
            )

        X = _handle_alias(X, Z)

        # check whether is fitted
        self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = X_metadata["mtype"]
        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (
            X_input_scitype == "Series"
            and "Series" not in X_inner_scitypes
            and "Panel" in X_inner_scitypes
        ):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (
            X_input_scitype == "Panel"
            and "Panel" not in X_inner_scitypes
            and "Series" in X_inner_scitypes
        ):
            Xt = self._vectorized_transform(X, X_input_mtype, y=y, inverse=True)
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        X_inner, y_inner = self._convert_X_y(X, y)

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._inverse_transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################
        Xt = self._convert_output(Xt, X_input_mtype, X_was_Series, inverse=True)

        return Xt
Exemple #5
0
    def transform(self, X, y=None, Z=None):
        """Transform X and return a transformed version.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        transformed version of X
        type depends on type of X and scitype:transform-output tag:
            |          | `transform`  |                        |
            |   `X`    |  `-output`   |     type of return     |
            |----------|--------------|------------------------|
            | `Series` | `Primitives` | `pd.DataFrame` (1-row) |
            | `Panel`  | `Primitives` | `pd.DataFrame`         |
            | `Series` | `Series`     | `Series`               |
            | `Panel`  | `Series`     | `Panel`                |
            | `Series` | `Panel`      | `Panel`                |
        instances in return correspond to instances in `X`
        combinations not in the table are currently not supported

        Explicitly, with examples:
            if `X` is `Series` (e.g., `pd.DataFrame`) and `transform-output` is `Series`
                then the return is a single `Series` of the same mtype
                Example: detrending a single series
            if `X` is `Panel` (e.g., `pd-multiindex`) and `transform-output` is `Series`
                then the return is `Panel` with same number of instances as `X`
                    (the transformer is applied to each input Series instance)
                Example: all series in the panel are detrended individually
            if `X` is `Series` or `Panel` and `transform-output` is `Primitives`
                then the return is `pd.DataFrame` with as many rows as instances in `X`
                Example: i-th row of the return has mean and variance of the i-th series
            if `X` is `Series` and `transform-output` is `Panel`
                then the return is a `Panel` object of type `pd-multiindex`
                Example: i-th instance of the output is the i-th window running over `X`
        """
        X = _handle_alias(X, Z)

        # check whether is fitted
        self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = X_metadata["mtype"]
        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (
            X_input_scitype == "Series"
            and "Series" not in X_inner_scitypes
            and "Panel" in X_inner_scitypes
        ):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (
            X_input_scitype == "Panel"
            and "Panel" not in X_inner_scitypes
            and "Series" in X_inner_scitypes
        ):
            Xt = self._vectorized_transform(X, X_input_mtype, y=y)
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        X_inner, y_inner = self._convert_X_y(X, y)

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################
        Xt = self._convert_output(Xt, X_input_mtype, X_was_Series)

        return Xt
Exemple #6
0
    def transform(self, X, y=None, Z=None):
        """Transform X and return a transformed version.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        transformed version of X
        type depends on type of X and scitype:transform-output tag:
            |          | `transform`  |                        |
            |   `X`    |  `-output`   |     type of return     |
            |----------|--------------|------------------------|
            | `Series` | `Primitives` | `pd.DataFrame` (1-row) |
            | `Panel`  | `Primitives` | `pd.DataFrame`         |
            | `Series` | `Series`     | `Series`               |
            | `Panel`  | `Series`     | `Panel`                |
            | `Series` | `Panel`      | `Panel`                |
        instances in return correspond to instances in `X`
        combinations not in the table are currently not supported

        Explicitly, with examples:
            if `X` is `Series` (e.g., `pd.DataFrame`) and `transform-output` is `Series`
                then the return is a single `Series` of the same mtype
                Example: detrending a single series
            if `X` is `Panel` (e.g., `pd-multiindex`) and `transform-output` is `Series`
                then the return is `Panel` with same number of instances as `X`
                    (the transformer is applied to each input Series instance)
                Example: all series in the panel are detrended individually
            if `X` is `Series` or `Panel` and `transform-output` is `Primitives`
                then the return is `pd.DataFrame` with as many rows as instances in `X`
                Example: i-th row of the return has mean and variance of the i-th series
            if `X` is `Series` and `transform-output` is `Panel`
                then the return is a `Panel` object of type `pd-multiindex`
                Example: i-th instance of the output is the i-th window running over `X`
        """
        X = _handle_alias(X, Z)

        # check whether is fitted, unless fit-in-transform is true
        if self.get_tag("fit-in-transform"):
            self.fit(X=X, y=y, Z=Z)
        else:
            self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is(X,
                                        mtype=self.ALLOWED_INPUT_MTYPES,
                                        return_metadata=True,
                                        var_name="X")
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X)
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y)
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        output_scitype = self.get_tag("scitype:transform-output")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (X_input_scitype == "Series" and "Series" not in X_inner_scitypes
                and "Panel" in X_inner_scitypes):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes
                and "Series" in X_inner_scitypes):
            if y is not None:
                ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")

            if self.get_tag("fit-in-transform"):
                Xt = [clone(self).transform(Xi) for Xi in X]
            else:
                transformers = self.transformers_
                if len(transformers) != len(X):
                    raise RuntimeError(
                        "found different number of instances in transform than in fit"
                    )
                else:
                    Xt = [
                        transformers[i].transform(X[i]) for i in range(len(X))
                    ]
            # now we have a list of transformed instances

            # if the output is Series, Xt is a Panel and we convert back
            if output_scitype == "Series":
                Xt = convert_to(Xt, to_type=X_input_mtype, as_scitype="Panel")

            # if the output is Primitives, we have a list of one-row dataframes
            # we concatenate those and overwrite the index with that of X
            elif output_scitype == "Primitives":
                Xt = pd.concat(Xt)
                Xt.index = X.index
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        # variables for the scitype of the current X (possibly converted)
        #     y wasn't converted so we can use y_input_scitype
        X_mtype = mtype(X)
        X_scitype = mtype_to_scitype(X_mtype)

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################

        # if we converted Series to "one-instance-Panel", revert that
        if X_was_Series and output_scitype == "Series":
            Xt = convert_Panel_to_Series(Xt)

        if output_scitype == "Series":
            Xt = convert_to(
                Xt,
                to_type=X_input_mtype,
                as_scitype=X_input_scitype,
            )
        elif output_scitype == "Primitives":
            # we "abuse" the Series converter to ensure df output
            Xt = convert_to(
                Xt,
                to_type="pd.DataFrame",
                as_scitype="Series",
            )
        else:
            # output_scitype is "Panel" and no need for conversion
            pass

        return Xt
Exemple #7
0
    def fit(self, X, y=None, Z=None):
        """Fit transformer to X, optionally to y.

        State change:
            Changes state to "fitted".

        Writes to self:
            Sets is_fitted flag to True.
            Sets fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        self._is_fitted = False

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            self._is_fitted = True
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is(X,
                                        mtype=self.ALLOWED_INPUT_MTYPES,
                                        return_metadata=True,
                                        var_name="X")
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X)
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y)
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None:
                raise ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_mtype = mtype(X)
        X_scitype = mtype_to_scitype(X_mtype)

        # for debugging, exception if the conversion fails (this should never happen)
        if X_scitype not in X_inner_scitypes:
            raise RuntimeError(
                "conversion of X to X_inner unsuccessful, unexpected")

        # convert X/y to supported inner type, if necessary
        ###################################################

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # todo: uncomment this once Z is completely gone
        # self._fit(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._fit(X_inner, y_inner)

        self._is_fitted = True
        return self
Exemple #8
0
    def fit(self, X, y=None, Z=None):
        """Fit transformer to X, optionally to y.

        State change:
            Changes state to "fitted".

        Writes to self:
            Sets is_fitted flag to True.
            Sets fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z is deprecated since version 0.10.0 and will be removed in 0.11.0

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        self._is_fitted = False

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            self._is_fitted = True
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None and self.get_tag("y_inner_mtype") != "None":
                raise ValueError(
                    f"{type(self).__name__} does not support Panel X if y is not None, "
                    f"since {type(self).__name__} supports only Series. "
                    "Auto-vectorization to extend Series X to Panel X can only be "
                    'carried out if y is None, or "y_inner_mtype" tag is "None". '
                    "Consider extending _fit and _transform to handle the following "
                    "input types natively: Panel X and non-None y."
                )
            X = convert_to(
                X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
            )
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_inner, y_inner = self._convert_X_y(X, y)

        # todo: uncomment this once Z is completely gone
        # self._fit(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._fit(X_inner, y_inner)

        self._is_fitted = True
        return self