def convert_align_to_align_loc(align, X, align_name="align", df_name="X", copy=True):
    """Convert iloc alignment to loc alignment, using reference data frame.

    Parameters
    ----------
    align: pd.DataFrame in alignment format, with columns 'ind'+str(i) for integer i
        cols contain iloc index of X[i] mapped to alignment coordinate for alignment
    align_name: str, optional - name of "align" to display in error messages
    df_name: str, optional - name of "X" to display in error messages
    copy: bool, optional, default=True - whether returned data frame is a new object
        if not, values are references; passed to copy arg of df.reindex

    Returns
    -------
    pd.DataFrame in alignment format, with columns 'ind'+str(i) for integer i
        cols contain loc index of X[i] mapped to alignment coordinate for alignment

    Example
    -------
    align_df = pd.DataFrame({'ind0' : [1,2,3], 'ind1' : [0,2,4]})
    X = [pd.DataFrame({'a' : [1,2,3,4]}, index=[-4,7,11,14]),
            pd.DataFrame({'a' : [1,2,3,5,6]}, index=[4,8,12,16,20])
        ]

    convert_align_to_align_loc(align_df, X)
    """
    from sktime.datatypes import check_is_mtype

    check_is_mtype(align, "alignment", scitype="Alignment", var_name=align_name)

    if not isinstance(X, list):
        raise ValueError(f"{df_name} must be a list of pandas.DataFrame")

    for Xi in X:
        if not isinstance(Xi, pd.DataFrame):
            raise ValueError(f"{df_name} must be a list of pandas.DataFrame")

    if copy:
        align = align.copy()

    if not len(X) == len(align.columns):
        raise ValueError(
            f"number of data frames in {df_name} must equal"
            f" number of index columns in {align_name}"
        )

    for i, Xi in enumerate(X):
        indi = "ind" + str(i)

        # reindex X to the alignment positions
        #  this also deals with np.nan indices
        loc_series = pd.Series(Xi.index).reindex(align[indi], copy=copy)
        align[indi] = loc_series.values

    return align
Esempio n. 2
0
def test_vectorization_series_to_panel(mtype):
    """Test that forecaster vectorization works for Panel data.

    This test passes Panel data to the ARIMA forecaster which internally has an
    implementation for Series only, so the BaseForecaster has to vectorize.
    """
    n_instances = 10

    y = _make_panel(n_instances=n_instances, random_state=42, return_mtype=mtype)

    y_pred = ARIMA().fit(y).predict([1, 2, 3])
    valid, _, metadata = check_is_mtype(y_pred, mtype, return_metadata=True)

    msg = (
        f"vectorization of forecasters does not work for test example "
        f"of mtype {mtype}, using the ARIMA forecaster"
    )

    assert valid, msg

    y_pred_instances = metadata["n_instances"]
    msg = (
        f"vectorization test produces wrong number of instances "
        f"expected {n_instances}, found {y_pred_instances}"
    )

    assert y_pred_instances == n_instances, msg

    y_pred_equal_length = metadata["is_equal_length"]
    msg = (
        "vectorization test produces non-equal length Panel forecast, should be "
        "equal length, and length equal to the forecasting horizon [1, 2, 3]"
    )
    assert y_pred_equal_length, msg
Esempio n. 3
0
def test_vectorization_series_to_hier_proba(method, mtype):
    """Test that forecaster vectorization works for Hierarchical data, predict_proba.

    This test passes Hierarchical data to the ARIMA forecaster which internally has an
    implementation for Series only, so the BaseForecaster has to vectorize.
    """
    hierarchy_levels = (2, 4)
    y = _make_hierarchical(hierarchy_levels=hierarchy_levels, random_state=84)
    y = convert(y, from_type="pd_multiindex_hier", to_type=mtype)

    est = ARIMA().fit(y)
    y_pred = getattr(est, method)([1, 2, 3])

    if method in ["predict_interval", "predict_quantiles"]:
        expected_mtype = method.replace("ict", "")
    elif method in ["predict_var"]:
        expected_mtype = "pd_multiindex_hier"
    else:
        RuntimeError(f"bug in test, unreachable state, method {method} queried")

    valid, _, _ = check_is_mtype(y_pred, expected_mtype, return_metadata=True)

    msg = (
        f"vectorization of forecaster method {method} does not work for test example "
        f"of mtype {mtype}, using the ARIMA forecaster"
    )

    assert valid, msg
Esempio n. 4
0
    def test_predict_interval(self, estimator_instance, n_columns, fh_int_oos,
                              alpha):
        """Check prediction intervals returned by predict.

        Arguments
        ---------
        Forecaster: BaseEstimator class descendant, forecaster to test
        fh: ForecastingHorizon, fh at which to test prediction
        alpha: float, coverage at which to make prediction intervals

        Raises
        ------
        AssertionError - if Forecaster test instance has "capability:pred_int"
                and pred. int are not returned correctly when asking predict for them
        AssertionError - if Forecaster test instance does not have "capability:pred_int"
                and no NotImplementedError is raised when asking predict for pred.int
        """
        y_train = _make_series(n_columns=n_columns)
        estimator_instance.fit(y_train, fh=fh_int_oos)
        if estimator_instance.get_tag("capability:pred_int"):

            pred_ints = estimator_instance.predict_interval(fh_int_oos,
                                                            coverage=alpha)
            assert check_is_mtype(pred_ints,
                                  mtype="pred_interval",
                                  scitype="Proba")

        else:
            with pytest.raises(NotImplementedError,
                               match="prediction intervals"):
                estimator_instance.predict_interval(fh_int_oos, coverage=alpha)
Esempio n. 5
0
    def _convert_output(self, X, X_input_mtype=None, X_was_Series=False, inverse=False):
        """Convert transform output to expected format."""
        Xt = X
        X_input_scitype = mtype_to_scitype(X_input_mtype)

        if inverse:
            # the output of inverse transform is equal to input of transform
            output_scitype = self.get_tag("scitype:transform-input")
        else:
            output_scitype = self.get_tag("scitype:transform-output")

        # if we converted Series to "one-instance-Panel", revert that
        if X_was_Series and output_scitype == "Series":
            Xt = convert_to(
                Xt, to_type=["pd-multiindex", "numpy3D", "df-list"], as_scitype="Panel"
            )
            Xt = convert_Panel_to_Series(Xt)

        if output_scitype == "Series":
            # output mtype is input mtype
            X_output_mtype = X_input_mtype

            # exception to this: if the transformer outputs multivariate series,
            #   we cannot convert back to pd.Series, do pd.DataFrame instead then
            #   this happens only for Series, not Panel
            if X_input_scitype == "Series":
                _, _, metadata = check_is_mtype(
                    Xt,
                    ["pd.DataFrame", "pd.Series", "np.ndarray"],
                    return_metadata=True,
                )
                if not metadata["is_univariate"] and X_input_mtype == "pd.Series":
                    X_output_mtype = "pd.DataFrame"

            Xt = convert_to(
                Xt,
                to_type=X_output_mtype,
                as_scitype=X_input_scitype,
                store=self._converter_store_X,
            )
        elif output_scitype == "Primitives":
            # we "abuse" the Series converter to ensure df output
            # & reset index to have integers for instances
            if isinstance(Xt, (pd.DataFrame, pd.Series)):
                Xt = Xt.reset_index(drop=True)
            Xt = convert_to(
                Xt,
                to_type="pd.DataFrame",
                as_scitype="Series",
                # no converter store since this is not a "1:1 back-conversion"
            )
        # else output_scitype is "Panel" and no need for conversion

        return Xt
Esempio n. 6
0
def test_make_panel(n_instances, n_columns, n_timepoints, return_mtype):
    """Test that _make_panel utility returns panel data of right format."""
    X = _make_panel(
        n_instances=n_instances,
        n_columns=n_columns,
        n_timepoints=n_timepoints,
        return_mtype=return_mtype,
    )

    valid, _, metadata = check_is_mtype(X, mtype=return_mtype, return_metadata=True)
    msg = f"_make_panel_X generated data does not comply with mtype {return_mtype}"
    assert valid, msg
    assert metadata["n_instances"] == n_instances
    assert metadata["is_univariate"] == (n_columns == 1)
Esempio n. 7
0
def test_load_tsf_to_dataframe(input_path, return_type, output_df):
    """Test function for loading tsf format."""
    data_path = os.path.join(
        os.path.dirname(sktime.__file__),
        input_path,
    )

    expected_metadata = {
        "frequency": "yearly",
        "forecast_horizon": 4,
        "contain_missing_values": False,
        "contain_equal_length": False,
    }

    df, metadata = load_tsf_to_dataframe(data_path, return_type=return_type)

    assert_frame_equal(df, output_df, check_dtype=False)
    assert metadata == expected_metadata
    if return_type != "default_tsf":
        assert check_is_mtype(obj=df, mtype=return_type)
Esempio n. 8
0
def test_vectorization_series_to_hier(mtype):
    """Test that forecaster vectorization works for Hierarchical data.

    This test passes Hierarchical data to the ARIMA forecaster which internally has an
    implementation for Series only, so the BaseForecaster has to vectorize.
    """
    hierarchy_levels = (2, 4)
    n_instances = reduce(mul, hierarchy_levels)

    y = _make_hierarchical(hierarchy_levels=hierarchy_levels, random_state=84)
    y = convert(y, from_type="pd_multiindex_hier", to_type=mtype)

    y_pred = ARIMA().fit(y).predict([1, 2, 3])
    valid, _, metadata = check_is_mtype(y_pred, mtype, return_metadata=True)

    msg = (
        f"vectorization of forecasters does not work for test example "
        f"of mtype {mtype}, using the ARIMA forecaster"
    )

    assert valid, msg

    y_pred_instances = metadata["n_instances"]
    msg = (
        f"vectorization test produces wrong number of instances "
        f"expected {n_instances}, found {y_pred_instances}"
    )

    assert y_pred_instances == n_instances, msg

    y_pred_equal_length = metadata["is_equal_length"]
    msg = (
        "vectorization test produces non-equal length Panel forecast, should be "
        "equal length, and length equal to the forecasting horizon [1, 2, 3]"
    )
    assert y_pred_equal_length, msg
Esempio n. 9
0
    def update(self, X, y=None, Z=None, update_params=True):
        """Update transformer with X, optionally y.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Writes to self:
            May update fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0
        update_params : bool, default=True
            whether the model is updated. Yes if true, if false, simply skips call.
            argument exists for compatibility with forecasting module.

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        # skip everything if update_params is False
        if not update_params:
            return self

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None:
                raise ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None"
                )
            X = convert_to(
                X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
            )
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_inner, y_inner = self._convert_X_y(X, y)

        # todo: uncomment this once Z is completely gone
        # self._update(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._update(X_inner, y_inner)
        return self
Esempio n. 10
0
    def inverse_transform(self, X, y=None, Z=None):
        """Inverse transform X and return an inverse transformed version.

        Currently it is assumed that only transformers with tags
            "scitype:transform-input"="Series", "scitype:transform-output"="Series",
        have an inverse_transform.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be inverse transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        inverse transformed version of X
            of the same type as X, and conforming to mtype format specifications
        """
        if not self.get_tag("capability:inverse_transform"):
            raise NotImplementedError(
                f"{type(self)} does not implement inverse_transform"
            )

        X = _handle_alias(X, Z)

        # check whether is fitted
        self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = X_metadata["mtype"]
        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (
            X_input_scitype == "Series"
            and "Series" not in X_inner_scitypes
            and "Panel" in X_inner_scitypes
        ):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (
            X_input_scitype == "Panel"
            and "Panel" not in X_inner_scitypes
            and "Series" in X_inner_scitypes
        ):
            Xt = self._vectorized_transform(X, X_input_mtype, y=y, inverse=True)
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        X_inner, y_inner = self._convert_X_y(X, y)

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._inverse_transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################
        Xt = self._convert_output(Xt, X_input_mtype, X_was_Series, inverse=True)

        return Xt
Esempio n. 11
0
    def transform(self, X, y=None, Z=None):
        """Transform X and return a transformed version.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        transformed version of X
        type depends on type of X and scitype:transform-output tag:
            |          | `transform`  |                        |
            |   `X`    |  `-output`   |     type of return     |
            |----------|--------------|------------------------|
            | `Series` | `Primitives` | `pd.DataFrame` (1-row) |
            | `Panel`  | `Primitives` | `pd.DataFrame`         |
            | `Series` | `Series`     | `Series`               |
            | `Panel`  | `Series`     | `Panel`                |
            | `Series` | `Panel`      | `Panel`                |
        instances in return correspond to instances in `X`
        combinations not in the table are currently not supported

        Explicitly, with examples:
            if `X` is `Series` (e.g., `pd.DataFrame`) and `transform-output` is `Series`
                then the return is a single `Series` of the same mtype
                Example: detrending a single series
            if `X` is `Panel` (e.g., `pd-multiindex`) and `transform-output` is `Series`
                then the return is `Panel` with same number of instances as `X`
                    (the transformer is applied to each input Series instance)
                Example: all series in the panel are detrended individually
            if `X` is `Series` or `Panel` and `transform-output` is `Primitives`
                then the return is `pd.DataFrame` with as many rows as instances in `X`
                Example: i-th row of the return has mean and variance of the i-th series
            if `X` is `Series` and `transform-output` is `Panel`
                then the return is a `Panel` object of type `pd-multiindex`
                Example: i-th instance of the output is the i-th window running over `X`
        """
        X = _handle_alias(X, Z)

        # check whether is fitted
        self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = X_metadata["mtype"]
        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (
            X_input_scitype == "Series"
            and "Series" not in X_inner_scitypes
            and "Panel" in X_inner_scitypes
        ):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (
            X_input_scitype == "Panel"
            and "Panel" not in X_inner_scitypes
            and "Series" in X_inner_scitypes
        ):
            Xt = self._vectorized_transform(X, X_input_mtype, y=y)
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        X_inner, y_inner = self._convert_X_y(X, y)

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################
        Xt = self._convert_output(Xt, X_input_mtype, X_was_Series)

        return Xt
Esempio n. 12
0
    def fit(self, X, y=None, Z=None):
        """Fit transformer to X, optionally to y.

        State change:
            Changes state to "fitted".

        Writes to self:
            Sets is_fitted flag to True.
            Sets fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z is deprecated since version 0.10.0 and will be removed in 0.11.0

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        self._is_fitted = False

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            self._is_fitted = True
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None and self.get_tag("y_inner_mtype") != "None":
                raise ValueError(
                    f"{type(self).__name__} does not support Panel X if y is not None, "
                    f"since {type(self).__name__} supports only Series. "
                    "Auto-vectorization to extend Series X to Panel X can only be "
                    'carried out if y is None, or "y_inner_mtype" tag is "None". '
                    "Consider extending _fit and _transform to handle the following "
                    "input types natively: Panel X and non-None y."
                )
            X = convert_to(
                X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
            )
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_inner, y_inner = self._convert_X_y(X, y)

        # todo: uncomment this once Z is completely gone
        # self._fit(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._fit(X_inner, y_inner)

        self._is_fitted = True
        return self
Esempio n. 13
0
    def transform(self, X, y=None, Z=None):
        """Transform X and return a transformed version.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        transformed version of X
        type depends on type of X and scitype:transform-output tag:
            |          | `transform`  |                        |
            |   `X`    |  `-output`   |     type of return     |
            |----------|--------------|------------------------|
            | `Series` | `Primitives` | `pd.DataFrame` (1-row) |
            | `Panel`  | `Primitives` | `pd.DataFrame`         |
            | `Series` | `Series`     | `Series`               |
            | `Panel`  | `Series`     | `Panel`                |
            | `Series` | `Panel`      | `Panel`                |
        instances in return correspond to instances in `X`
        combinations not in the table are currently not supported

        Explicitly, with examples:
            if `X` is `Series` (e.g., `pd.DataFrame`) and `transform-output` is `Series`
                then the return is a single `Series` of the same mtype
                Example: detrending a single series
            if `X` is `Panel` (e.g., `pd-multiindex`) and `transform-output` is `Series`
                then the return is `Panel` with same number of instances as `X`
                    (the transformer is applied to each input Series instance)
                Example: all series in the panel are detrended individually
            if `X` is `Series` or `Panel` and `transform-output` is `Primitives`
                then the return is `pd.DataFrame` with as many rows as instances in `X`
                Example: i-th row of the return has mean and variance of the i-th series
            if `X` is `Series` and `transform-output` is `Panel`
                then the return is a `Panel` object of type `pd-multiindex`
                Example: i-th instance of the output is the i-th window running over `X`
        """
        X = _handle_alias(X, Z)

        # check whether is fitted
        self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is_mtype(X,
                                              mtype=self.ALLOWED_INPUT_MTYPES,
                                              return_metadata=True,
                                              var_name="X")
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X, as_scitype=["Series", "Panel"])
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y, as_scitype=["Series", "Panel"])
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        output_scitype = self.get_tag("scitype:transform-output")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (X_input_scitype == "Series" and "Series" not in X_inner_scitypes
                and "Panel" in X_inner_scitypes):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes
                and "Series" in X_inner_scitypes):
            if y is not None:
                ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")

            # depending on whether fitting happens, apply fitted or unfitted instances
            if not self.get_tag("fit-in-transform"):
                # these are the transformers-per-instanced, fitted in fit
                transformers = self.transformers_
                if len(transformers) != len(X):
                    raise RuntimeError(
                        "found different number of instances in transform than in fit"
                    )

                Xt = [transformers[i].transform(X[i]) for i in range(len(X))]
                # now we have a list of transformed instances
            else:
                # if no fitting happens, just apply transform multiple times
                Xt = [self.transform(X[i]) for i in range(len(X))]

            # if the output is Series, Xt is a Panel and we convert back
            if output_scitype == "Series":
                Xt = convert_to(Xt, to_type=X_input_mtype, as_scitype="Panel")

            # if the output is Primitives, we have a list of one-row dataframes
            # we concatenate those and overwrite the index with that of X
            elif output_scitype == "Primitives":
                Xt = pd.concat(Xt)
                Xt = Xt.reset_index(drop=True)
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        # variables for the scitype of the current X (possibly converted)
        #     y wasn't converted so we can use y_input_scitype
        X_mtype = mtype(X, as_scitype=["Series", "Panel"])
        X_scitype = mtype_to_scitype(X_mtype)

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################

        # if we converted Series to "one-instance-Panel", revert that
        if X_was_Series and output_scitype == "Series":
            Xt = convert_Panel_to_Series(Xt)

        if output_scitype == "Series":
            # if the transformer outputs multivariate series,
            #   we cannot convert back to pd.Series, do pd.DataFrame instead then
            _, _, metadata = check_is_mtype(
                Xt, ["pd.DataFrame", "pd.Series", "np.ndarray"],
                return_metadata=True)
            if not metadata["is_univariate"] and X_input_mtype == "pd.Series":
                X_output_mtype = "pd.DataFrame"
            else:
                X_output_mtype = X_input_mtype
            Xt = convert_to(
                Xt,
                to_type=X_output_mtype,
                as_scitype=X_input_scitype,
            )
        elif output_scitype == "Primitives":
            # we "abuse" the Series converter to ensure df output
            # & reset index to have integers for instances
            if isinstance(Xt, (pd.DataFrame, pd.Series)):
                Xt = Xt.reset_index(drop=True)
            Xt = convert_to(
                Xt,
                to_type="pd.DataFrame",
                as_scitype="Series",
            )
        else:
            # output_scitype is "Panel" and no need for conversion
            pass

        return Xt
Esempio n. 14
0
    def fit(self, X, y=None, Z=None):
        """Fit transformer to X, optionally to y.

        State change:
            Changes state to "fitted".

        Writes to self:
            Sets is_fitted flag to True.
            Sets fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        self._is_fitted = False

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            self._is_fitted = True
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is_mtype(X,
                                              mtype=self.ALLOWED_INPUT_MTYPES,
                                              return_metadata=True,
                                              var_name="X")
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X, as_scitype=["Series", "Panel"])
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y, as_scitype=["Series", "Panel"])
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None:
                raise ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_mtype = mtype(X, as_scitype=["Series", "Panel"])
        X_scitype = mtype_to_scitype(X_mtype)

        # for debugging, exception if the conversion fails (this should never happen)
        if X_scitype not in X_inner_scitypes:
            raise RuntimeError(
                "conversion of X to X_inner unsuccessful, unexpected")

        # convert X/y to supported inner type, if necessary
        ###################################################

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # todo: uncomment this once Z is completely gone
        # self._fit(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._fit(X_inner, y_inner)

        self._is_fitted = True
        return self