Esempio n. 1
0
    def _convert_X_y(self, X, y):
        """Convert X, y to inner type."""
        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        y_inner_mtype = _coerce_to_list(self.get_tag("y_inner_mtype"))

        X_mtype = mtype(X, as_scitype=["Series", "Panel"])
        X_scitype = mtype_to_scitype(X_mtype)

        y_mtype = mtype(y, as_scitype=["Series", "Panel"])
        y_scitype = mtype_to_scitype(y_mtype)

        # for debugging, exception if the conversion fails (this should never happen)
        if X_scitype not in X_inner_scitypes:
            raise RuntimeError(
                "conversion of X to X_inner unsuccessful, unexpected")

        # convert X/y to supported inner type, if necessary
        ###################################################

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
            store=self._converter_store_X,
        )

        if y_inner_mtype != ["None"]:
            y_inner_mtype = [
                mt for mt in y_inner_mtype if mtype_to_scitype(mt) == y_scitype
            ]
            y_inner = convert_to(
                y,
                to_type=y_inner_mtype,
                as_scitype=y_scitype,
            )
        else:
            y_inner = None

        return X_inner, y_inner
Esempio n. 2
0
    def _set_cutoff_from_y(self, y):
        """Set and update cutoff from series y.

        Parameters
        ----------
        y: pd.Series, pd.DataFrame, or np.array
            Target time series to which to fit the forecaster.

        Notes
        -----
        Set self._cutoff to last index seen in `y`.
        """
        if mtype(y, as_scitype="Series") in ["pd.Series", "pd.DataFrame"]:
            self._cutoff = y.index[-1]
        elif mtype(y, as_scitype="Series") == "np.ndarray":
            self._cutoff = len(y)
        else:
            raise TypeError("y does not have a supported type")
Esempio n. 3
0
    def _vectorized_transform(self, X, X_input_mtype=None, y=None, inverse=False):
        """Vectorized application of transform or inverse, and convert back."""
        if X_input_mtype is None:
            X_input_mtype = mtype(X, as_scitype=["Series", "Panel"])
        if y is not None:
            ValueError(
                "no default behaviour if _fit does not support Panel, "
                " but X is Panel and y is not None"
            )

        X = convert_to(
            X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
        )

        # depending on whether fitting happens, apply fitted or unfitted instances
        if not self.get_tag("fit-in-transform"):
            # these are the transformers-per-instanced, fitted in fit
            transformers = self.transformers_
            if len(transformers) != len(X):
                raise RuntimeError(
                    "found different number of instances in transform than in fit"
                )
            if inverse:
                Xt = [transformers[i].inverse_transform(X[i]) for i in range(len(X))]
            else:
                Xt = [transformers[i].transform(X[i]) for i in range(len(X))]
            # now we have a list of transformed instances
        else:
            # if no fitting happens, just apply transform multiple times
            if inverse:
                Xt = [self.inverse_transform(X[i]) for i in range(len(X))]
            else:
                Xt = [self.transform(X[i]) for i in range(len(X))]

        # convert to expected output format
        ###################################
        if inverse:
            output_scitype = self.get_tag("scitype:transform-input")
        else:
            output_scitype = self.get_tag("scitype:transform-output")
        # if the output is Series, Xt is a Panel and we convert back
        if output_scitype == "Series":
            Xt = convert_to(
                Xt,
                to_type=X_input_mtype,
                as_scitype="Panel",
                store=self._converter_store_X,
            )

        # if the output is Primitives, we have a list of one-row dataframes
        # we concatenate those and overwrite the index with that of X
        elif output_scitype == "Primitives":
            Xt = pd.concat(Xt)
            Xt = Xt.reset_index(drop=True)
        return Xt
Esempio n. 4
0
    def _set_cutoff_from_y(self, y):
        """Set and update cutoff from series y.

        Parameters
        ----------
        y: pd.Series, pd.DataFrame, or np.array
            Time series from which to infer the cutoff.

        Notes
        -----
        Set self._cutoff to last index seen in `y`.
        """
        y_mtype = mtype(y, as_scitype="Series")

        if len(y) > 0:
            if y_mtype in ["pd.Series", "pd.DataFrame"]:
                self._cutoff = y.index[-1]
            elif y_mtype == "np.ndarray":
                self._cutoff = len(y)
            else:
                raise TypeError("y does not have a supported type")
Esempio n. 5
0
def convert_to_scitype(obj, to_scitype, from_scitype=None, store=None):
    """Convert single-series or single-panel between mtypes.

    Assumes input is conformant with one of the mtypes
        for one of the scitypes Series, Panel, Hierarchical.
    This method does not perform full mtype checks, use mtype or check_is_mtype for
    checks.

    Parameters
    ----------
    obj : an object of scitype Series, Panel, or Hierarchical.
    to_scitype : str, scitype that obj should be converted to
    from_scitype : str, optional. Default = inferred from obj
        scitype that obj is of, and being converted from
        if avoided, function will skip type inference from obj
    store : dict, optional. Converter store for back-conversion.

    Returns
    -------
    obj of scitype to_scitype
        if converted to or from Hierarchical, the mtype will always be one of
            pd.DataFrame (Series), pd-multiindex (Panel), or pd_multiindex_hier
        if converted to or from Panel, mtype will attempt to keep python type
            e.g., np.ndarray (Series) converted to numpy3D (Panel) or back
            if not possible, will be one of the mtypes with pd.DataFrame python type
    """
    if from_scitype is None:
        obj_mtype = mtype(obj, as_scitype=["Series", "Panel", "Hierarchical"])
        from_scitype = mtype_to_scitype(obj_mtype)

    if to_scitype == from_scitype:
        return obj

    func_name = f"convert_{from_scitype}_to_{to_scitype}"
    func = eval(func_name)

    return func(obj, store=store)
Esempio n. 6
0
    def _check_X_y(self, X=None, y=None):
        """Check and coerce X/y for fit/predict/update functions.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D), optional (default=None)
            Time series to check.
        X : pd.DataFrame, or 2D np.array, optional (default=None)
            Exogeneous time series.

        Returns
        -------
        y_inner : Series compatible with self.get_tag("y_inner_mtype") format
            converted/coerced version of y, mtype determined by "y_inner_mtype" tag
            None if y was None
        X_inner : Series compatible with self.get_tag("X_inner_mtype") format
            converted/coerced version of y, mtype determined by "X_inner_mtype" tag
            None if X was None

        Raises
        ------
        TypeError if y or X is not one of the permissible Series mtypes
        TypeError if y is not compatible with self.get_tag("scitype:y")
            if tag value is "univariate", y must be univariate
            if tag value is "multivariate", y must be bi- or higher-variate
            if tag vaule is "both", y can be either
        TypeError if self.get_tag("X-y-must-have-same-index") is True
            and the index set of X is not a super-set of the index set of y

        Writes to self
        --------------
        _y_mtype_last_seen : str, mtype of y
        _converter_store_y : dict, metadata from conversion for back-conversion
        """
        # input checks and minor coercions on X, y
        ###########################################

        enforce_univariate = self.get_tag("scitype:y") == "univariate"
        enforce_multivariate = self.get_tag("scitype:y") == "multivariate"
        enforce_index_type = self.get_tag("enforce_index_type")

        # checking y
        if y is not None:
            check_y_args = {
                "enforce_univariate": enforce_univariate,
                "enforce_multivariate": enforce_multivariate,
                "enforce_index_type": enforce_index_type,
                "allow_None": False,
                "allow_empty": True,
            }

            y = check_series(y, **check_y_args, var_name="y")

            self._y_mtype_last_seen = mtype(y, as_scitype="Series")
        # end checking y

        # checking X
        if X is not None:
            X = check_series(X, enforce_index_type=enforce_index_type, var_name="X")
            if self.get_tag("X-y-must-have-same-index"):
                check_equal_time_index(X, y)
        # end checking X

        # convert X & y to supported inner type, if necessary
        #####################################################

        # retrieve supported mtypes

        # convert X and y to a supported internal mtype
        #  it X/y mtype is already supported, no conversion takes place
        #  if X/y is None, then no conversion takes place (returns None)
        y_inner_mtype = self.get_tag("y_inner_mtype")
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype="Series",  # we are dealing with series
            store=self._converter_store_y,
        )

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype="Series",  # we are dealing with series
        )

        return X_inner, y_inner
Esempio n. 7
0
    def transform(self, X, y=None, Z=None):
        """Transform X and return a transformed version.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        transformed version of X
        type depends on type of X and scitype:transform-output tag:
            |          | `transform`  |                        |
            |   `X`    |  `-output`   |     type of return     |
            |----------|--------------|------------------------|
            | `Series` | `Primitives` | `pd.DataFrame` (1-row) |
            | `Panel`  | `Primitives` | `pd.DataFrame`         |
            | `Series` | `Series`     | `Series`               |
            | `Panel`  | `Series`     | `Panel`                |
            | `Series` | `Panel`      | `Panel`                |
        instances in return correspond to instances in `X`
        combinations not in the table are currently not supported

        Explicitly, with examples:
            if `X` is `Series` (e.g., `pd.DataFrame`) and `transform-output` is `Series`
                then the return is a single `Series` of the same mtype
                Example: detrending a single series
            if `X` is `Panel` (e.g., `pd-multiindex`) and `transform-output` is `Series`
                then the return is `Panel` with same number of instances as `X`
                    (the transformer is applied to each input Series instance)
                Example: all series in the panel are detrended individually
            if `X` is `Series` or `Panel` and `transform-output` is `Primitives`
                then the return is `pd.DataFrame` with as many rows as instances in `X`
                Example: i-th row of the return has mean and variance of the i-th series
            if `X` is `Series` and `transform-output` is `Panel`
                then the return is a `Panel` object of type `pd-multiindex`
                Example: i-th instance of the output is the i-th window running over `X`
        """
        X = _handle_alias(X, Z)

        # check whether is fitted, unless fit-in-transform is true
        if self.get_tag("fit-in-transform"):
            self.fit(X=X, y=y, Z=Z)
        else:
            self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is(X,
                                        mtype=self.ALLOWED_INPUT_MTYPES,
                                        return_metadata=True,
                                        var_name="X")
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X)
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y)
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        output_scitype = self.get_tag("scitype:transform-output")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (X_input_scitype == "Series" and "Series" not in X_inner_scitypes
                and "Panel" in X_inner_scitypes):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes
                and "Series" in X_inner_scitypes):
            if y is not None:
                ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")

            if self.get_tag("fit-in-transform"):
                Xt = [clone(self).transform(Xi) for Xi in X]
            else:
                transformers = self.transformers_
                if len(transformers) != len(X):
                    raise RuntimeError(
                        "found different number of instances in transform than in fit"
                    )
                else:
                    Xt = [
                        transformers[i].transform(X[i]) for i in range(len(X))
                    ]
            # now we have a list of transformed instances

            # if the output is Series, Xt is a Panel and we convert back
            if output_scitype == "Series":
                Xt = convert_to(Xt, to_type=X_input_mtype, as_scitype="Panel")

            # if the output is Primitives, we have a list of one-row dataframes
            # we concatenate those and overwrite the index with that of X
            elif output_scitype == "Primitives":
                Xt = pd.concat(Xt)
                Xt.index = X.index
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        # variables for the scitype of the current X (possibly converted)
        #     y wasn't converted so we can use y_input_scitype
        X_mtype = mtype(X)
        X_scitype = mtype_to_scitype(X_mtype)

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################

        # if we converted Series to "one-instance-Panel", revert that
        if X_was_Series and output_scitype == "Series":
            Xt = convert_Panel_to_Series(Xt)

        if output_scitype == "Series":
            Xt = convert_to(
                Xt,
                to_type=X_input_mtype,
                as_scitype=X_input_scitype,
            )
        elif output_scitype == "Primitives":
            # we "abuse" the Series converter to ensure df output
            Xt = convert_to(
                Xt,
                to_type="pd.DataFrame",
                as_scitype="Series",
            )
        else:
            # output_scitype is "Panel" and no need for conversion
            pass

        return Xt
Esempio n. 8
0
    def fit(self, X, y=None, Z=None):
        """Fit transformer to X, optionally to y.

        State change:
            Changes state to "fitted".

        Writes to self:
            Sets is_fitted flag to True.
            Sets fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        self._is_fitted = False

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            self._is_fitted = True
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is(X,
                                        mtype=self.ALLOWED_INPUT_MTYPES,
                                        return_metadata=True,
                                        var_name="X")
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X)
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y)
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None:
                raise ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_mtype = mtype(X)
        X_scitype = mtype_to_scitype(X_mtype)

        # for debugging, exception if the conversion fails (this should never happen)
        if X_scitype not in X_inner_scitypes:
            raise RuntimeError(
                "conversion of X to X_inner unsuccessful, unexpected")

        # convert X/y to supported inner type, if necessary
        ###################################################

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # todo: uncomment this once Z is completely gone
        # self._fit(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._fit(X_inner, y_inner)

        self._is_fitted = True
        return self
Esempio n. 9
0
    def _vectorized_transform(self, X, X_input_mtype=None, y=None, inverse=False):
        """Vectorized application of transform or inverse, and convert back."""
        if X_input_mtype is None:
            X_input_mtype = mtype(X, as_scitype=["Series", "Panel"])
        if y is not None and self.get_tag("y_inner_mtype") != "None":
            raise ValueError(
                f"{type(self).__name__} does not support Panel X if y is not None, "
                f"since {type(self).__name__} supports only Series. "
                "Auto-vectorization to extend Series X to Panel X can only be "
                'carried out if y is None, or "y_inner_mtype" tag is "None". '
                "Consider extending _fit and _transform to handle the following "
                "input types natively: Panel X and non-None y."
            )

        X = convert_to(
            X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
        )

        # depending on whether fitting happens, apply fitted or unfitted instances
        if not self.get_tag("fit-in-transform"):
            # these are the transformers-per-instance, fitted in fit
            transformers = self.transformers_
            if len(transformers) != len(X):
                raise RuntimeError(
                    "found different number of instances in transform than in fit. "
                    f"number of instances seen in fit: {len(transformers)}; "
                    f"number of instances seen in transform: {len(X)}"
                )
            if inverse:
                Xt = [transformers[i].inverse_transform(X[i]) for i in range(len(X))]
            else:
                Xt = [transformers[i].transform(X[i]) for i in range(len(X))]
            # now we have a list of transformed instances
        else:
            # if no fitting happens, just apply transform multiple times
            if inverse:
                Xt = [self.inverse_transform(X[i]) for i in range(len(X))]
            else:
                Xt = [self.transform(X[i]) for i in range(len(X))]

        # convert to expected output format
        ###################################
        if inverse:
            output_scitype = self.get_tag("scitype:transform-input")
        else:
            output_scitype = self.get_tag("scitype:transform-output")
        # if the output is Series, Xt is a Panel and we convert back
        if output_scitype == "Series":
            Xt = convert_to(
                Xt,
                to_type=X_input_mtype,
                as_scitype="Panel",
                store=self._converter_store_X,
            )

        # if the output is Primitives, we have a list of one-row dataframes
        # we concatenate those and overwrite the index with that of X
        elif output_scitype == "Primitives":
            Xt = pd.concat(Xt)
            Xt = Xt.reset_index(drop=True)
        return Xt