예제 #1
0
    def _vectorized_transform(self, X, X_input_mtype=None, y=None, inverse=False):
        """Vectorized application of transform or inverse, and convert back."""
        if X_input_mtype is None:
            X_input_mtype = mtype(X, as_scitype=["Series", "Panel"])
        if y is not None:
            ValueError(
                "no default behaviour if _fit does not support Panel, "
                " but X is Panel and y is not None"
            )

        X = convert_to(
            X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
        )

        # depending on whether fitting happens, apply fitted or unfitted instances
        if not self.get_tag("fit-in-transform"):
            # these are the transformers-per-instanced, fitted in fit
            transformers = self.transformers_
            if len(transformers) != len(X):
                raise RuntimeError(
                    "found different number of instances in transform than in fit"
                )
            if inverse:
                Xt = [transformers[i].inverse_transform(X[i]) for i in range(len(X))]
            else:
                Xt = [transformers[i].transform(X[i]) for i in range(len(X))]
            # now we have a list of transformed instances
        else:
            # if no fitting happens, just apply transform multiple times
            if inverse:
                Xt = [self.inverse_transform(X[i]) for i in range(len(X))]
            else:
                Xt = [self.transform(X[i]) for i in range(len(X))]

        # convert to expected output format
        ###################################
        if inverse:
            output_scitype = self.get_tag("scitype:transform-input")
        else:
            output_scitype = self.get_tag("scitype:transform-output")
        # if the output is Series, Xt is a Panel and we convert back
        if output_scitype == "Series":
            Xt = convert_to(
                Xt,
                to_type=X_input_mtype,
                as_scitype="Panel",
                store=self._converter_store_X,
            )

        # if the output is Primitives, we have a list of one-row dataframes
        # we concatenate those and overwrite the index with that of X
        elif output_scitype == "Primitives":
            Xt = pd.concat(Xt)
            Xt = Xt.reset_index(drop=True)
        return Xt
예제 #2
0
    def _convert_output(self, X, X_input_mtype=None, X_was_Series=False, inverse=False):
        """Convert transform output to expected format."""
        Xt = X
        X_input_scitype = mtype_to_scitype(X_input_mtype)

        if inverse:
            # the output of inverse transform is equal to input of transform
            output_scitype = self.get_tag("scitype:transform-input")
        else:
            output_scitype = self.get_tag("scitype:transform-output")

        # if we converted Series to "one-instance-Panel", revert that
        if X_was_Series and output_scitype == "Series":
            Xt = convert_to(
                Xt, to_type=["pd-multiindex", "numpy3D", "df-list"], as_scitype="Panel"
            )
            Xt = convert_Panel_to_Series(Xt)

        if output_scitype == "Series":
            # output mtype is input mtype
            X_output_mtype = X_input_mtype

            # exception to this: if the transformer outputs multivariate series,
            #   we cannot convert back to pd.Series, do pd.DataFrame instead then
            #   this happens only for Series, not Panel
            if X_input_scitype == "Series":
                _, _, metadata = check_is_mtype(
                    Xt,
                    ["pd.DataFrame", "pd.Series", "np.ndarray"],
                    return_metadata=True,
                )
                if not metadata["is_univariate"] and X_input_mtype == "pd.Series":
                    X_output_mtype = "pd.DataFrame"

            Xt = convert_to(
                Xt,
                to_type=X_output_mtype,
                as_scitype=X_input_scitype,
                store=self._converter_store_X,
            )
        elif output_scitype == "Primitives":
            # we "abuse" the Series converter to ensure df output
            # & reset index to have integers for instances
            if isinstance(Xt, (pd.DataFrame, pd.Series)):
                Xt = Xt.reset_index(drop=True)
            Xt = convert_to(
                Xt,
                to_type="pd.DataFrame",
                as_scitype="Series",
                # no converter store since this is not a "1:1 back-conversion"
            )
        # else output_scitype is "Panel" and no need for conversion

        return Xt
예제 #3
0
    def _convert_X_y(self, X, y):
        """Convert X, y to inner type."""
        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        y_inner_mtype = _coerce_to_list(self.get_tag("y_inner_mtype"))

        X_mtype = mtype(X, as_scitype=["Series", "Panel"])
        X_scitype = mtype_to_scitype(X_mtype)

        y_mtype = mtype(y, as_scitype=["Series", "Panel"])
        y_scitype = mtype_to_scitype(y_mtype)

        # for debugging, exception if the conversion fails (this should never happen)
        if X_scitype not in X_inner_scitypes:
            raise RuntimeError(
                "conversion of X to X_inner unsuccessful, unexpected")

        # convert X/y to supported inner type, if necessary
        ###################################################

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
            store=self._converter_store_X,
        )

        if y_inner_mtype != ["None"]:
            y_inner_mtype = [
                mt for mt in y_inner_mtype if mtype_to_scitype(mt) == y_scitype
            ]
            y_inner = convert_to(
                y,
                to_type=y_inner_mtype,
                as_scitype=y_scitype,
            )
        else:
            y_inner = None

        return X_inner, y_inner
예제 #4
0
    def _pairwise_table_x_check(self, X, var_name="X"):
        """Check and coerce input data.

        Method used to check the input and convert Table input
            to internally used format, as defined in X_inner_mtype tag

        Parameters
        ----------
        X: pd.DataFrame, pd.Series, numpy 1D or 2D, list of dicts
            sktime data container compliant with the Table scitype
            The value to be checked and coerced
        var_name: str, variable name to print in error messages

        Returns
        -------
        X: Panel data container of a supported format in X_inner_mtype
            usually a 2D np.ndarray or a pd.DataFrame, unless overridden
        """
        X_valid = check_is_scitype(X,
                                   "Table",
                                   return_metadata=False,
                                   var_name=var_name)

        if not X_valid:
            msg = (
                "X and X2 must be in an sktime compatible format, of scitype Table, "
                "for instance a pandas.DataFrame or a 2D numpy.ndarray. "
                "See the data format tutorial examples/AA_datatypes_and_datasets.ipynb"
            )
            raise TypeError(msg)

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Table")

        return X_coerced
예제 #5
0
def plot_series(X: TimeSeriesInstances):
    _check_soft_dependencies("matplotlib")
    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt

    if isinstance(X, pd.DataFrame):
        X = convert_to(X, "numpy3D")
    plt.figure(figsize=(5, 10))
    plt.rcParams["figure.dpi"] = 100

    fig, axes = plt.subplots(nrows=len(X), ncols=1)
    for i in range(len(X)):
        curr = X[i][0]
        curr_axes = axes[i]
        curr_axes.plot(curr, color="b")

    blue_patch = mpatches.Patch(color="blue",
                                label="Series that belong to the cluster")
    plt.legend(
        handles=[blue_patch],
        loc="upper center",
        bbox_to_anchor=(0.5, -0.40),
        fancybox=True,
        shadow=True,
        ncol=5,
    )
    plt.tight_layout()
    plt.show()
예제 #6
0
def create_test_distance_numpy(
    n_instance: int,
    n_columns: int = None,
    n_timepoints: int = None,
    random_state: int = 1,
):
    """Create a test numpy distance.

    Parameters
    ----------
    n_instance: int
        Number of instances to create.
    n_columns: int
        Number of columns to create.
    n_timepoints: int, defaults = None
        Number of timepoints to create in each column.
    random_state: int, defaults = 1
        Random state to initialise with.

    Returns
    -------
    np.ndarray 2D or 3D numpy
        Numpy array of shape specific. If 1 instance then 2D array returned,
        if > 1 instance then 3D array returned.
    """
    num_dims = 3
    if n_timepoints is None:
        n_timepoints = 1
        num_dims -= 1
    if n_columns is None:
        n_columns = 1
        num_dims -= 1

    df = _create_test_distances(
        n_instance=n_instance,
        n_columns=n_columns,
        n_timepoints=n_timepoints,
        random_state=random_state,
    )
    if num_dims == 3:
        return convert_to(df, to_type="numpy3D")
    elif num_dims == 2:
        return convert_to(df, to_type="numpy3D")[:, :, 0]
    else:
        return convert_to(df, to_type="numpy3D")[:, 0, 0]
예제 #7
0
def test_center_init(center_init_callable: Callable[[np.ndarray], np.ndarray]):
    """Test center initialisation algorithms."""
    k = 5
    X, y = load_arrow_head(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    X_train = convert_to(X_train, "numpy3D")
    random_state = check_random_state(1)
    test_centers = center_init_callable(X_train, k, random_state)
    assert len(test_centers) == k
    assert len(np.unique(test_centers, axis=1)) == k
예제 #8
0
    def _check_clusterer_input(
        self, X: TimeSeriesInstances, enforce_min_instances: int = 1
    ) -> TimeSeriesInstances:
        """Validate the input and prepare for _fit.

        Parameters
        ----------
        X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape
            (n_instances,n_dimensions,series_length)) or nested pd.DataFrame (
            n_instances,n_dimensions).
            Training time series instances to cluster.

        Returns
        -------
        X : np.ndarray (3d of shape (n_instances,n_dimensions,series_length)) or
            pd.Dataframe (n_instances,n_dimensions).
            Converted X ready for _fit.

        Raises
        ------
        ValueError
            If y or X is invalid input data type, or there is not enough data.
        """
        X = self._initial_conversion(X)

        X_valid, _, X_metadata = check_is_scitype(
            X, scitype="Panel", return_metadata=True
        )
        if not X_valid:
            raise TypeError(
                f"X is not of a supported input data type."
                f"X must be of type np.ndarray or pd.DataFrame, found {type(X)}"
                f"Use datatypes.check_is_mtype to check conformance with "
                f"specifications."
            )
        n_cases = X_metadata["n_instances"]
        if n_cases < enforce_min_instances:
            raise ValueError(
                f"Minimum number of cases required is {enforce_min_instances} but X "
                f"has : {n_cases}"
            )
        missing = X_metadata["has_nans"]
        multivariate = not X_metadata["is_univariate"]
        unequal = not X_metadata["is_equal_length"]
        self._check_capabilities(missing, multivariate, unequal)
        return convert_to(
            X,
            to_type=self.get_tag("X_inner_mtype"),
            as_scitype="Panel",
        )
예제 #9
0
    def _pairwise_panel_x_check(self, X, var_name="X"):
        """Check and coerce input data.

        Method used to check the input and convert Series/Panel input
            to internally used format, as defined in X_inner_mtype tag

        Parameters
        ----------
        X: List of dfs, Numpy of dfs, 3d numpy
            sktime data container compliant with the Series or Panel scitype
            The value to be checked
        var_name: str, variable name to print in error messages

        Returns
        -------
        X: Panel data container of a supported format in X_inner_mtype
            usually df-list, list of pd.DataFrame, unless overridden
        """
        check_res = check_is_scitype(X, ["Series", "Panel"],
                                     return_metadata=True,
                                     var_name=var_name)
        X_valid = check_res[0]
        metadata = check_res[2]

        X_scitype = metadata["scitype"]

        if not X_valid:
            msg = (
                "X and X2 must be in an sktime compatible format, "
                "of scitype Series or Panel, "
                "for instance a pandas.DataFrame with sktime compatible time indices, "
                "or with MultiIndex and lowest level a sktime compatible time index. "
                "See the data format tutorial examples/AA_datatypes_and_datasets.ipynb"
            )
            raise TypeError(msg)

        # if the input is a single series, convert it to a Panel
        if X_scitype == "Series":
            X = convert_Series_to_Panel(X)

        # can't be anything else if check_is_scitype is working properly
        elif X_scitype != "Panel":
            raise RuntimeError(
                "Unexpected error in check_is_scitype, check validity")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Panel")

        return X_coerced
예제 #10
0
    def _transform(self, X, y=None):
        """Transform nested pandas dataframe into tabular dataframe.

        Parameters
        ----------
        X : pandas DataFrame or 3D np.ndarray
            panel of time series to transform
        y : ignored argument for interface compatibility

        Returns
        -------
        Xt : pandas DataFrame
            Transformed dataframe with only primitives in cells.
        """
        Xt = convert_to(X, to_type="numpyflat", as_scitype="Panel")
        return Xt
예제 #11
0
def plot_cluster_algorithm(model: TimeSeriesLloyds, X: TimeSeriesInstances,
                           k: int):
    """Plot the results from a univariate partitioning algorithm.

    Parameters
    ----------
    model: BaseClusterer
        Clustering model to plot
    predict_series: np.ndarray or pd.Dataframe or List[pd.Dataframe]
        The series to predict the values for
    k: int
        Number of centers
    """
    _check_soft_dependencies("matplotlib")
    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt

    if isinstance(X, pd.DataFrame):
        predict_series = convert_to(X, "numpy3D")
    plt.figure(figsize=(5, 10))
    plt.rcParams["figure.dpi"] = 100
    indexes = model.predict(predict_series)

    centers = model.cluster_centers_
    series_values = _get_cluster_values(indexes, predict_series, k)

    fig, axes = plt.subplots(nrows=k, ncols=1)
    for i in range(k):
        _plot(series_values[i], centers[i], axes[i])

    blue_patch = mpatches.Patch(color="blue",
                                label="Series that belong to the cluster")
    red_patch = mpatches.Patch(color="red", label="Cluster centers")
    plt.legend(
        handles=[red_patch, blue_patch],
        loc="upper center",
        bbox_to_anchor=(0.5, -0.40),
        fancybox=True,
        shadow=True,
        ncol=5,
    )
    plt.tight_layout()
    plt.show()
예제 #12
0
    def _pairwise_panel_x_check(self, X, var_name="X"):
        """Check and coerce input data.

        Method used to check the input and convert Series/Panel input
            to internally used format, as defined in X_inner_mtype tag

        Parameters
        ----------
        X: List of dfs, Numpy of dfs, 3d numpy
            The value to be checked
        var_name: str, variable name to print in error messages

        Returns
        -------
        X: Panel data container of a supported format in X_inner_mtype
            usually df-list, list of pd.DataFrame, unless overridden
        """
        check_res = check_is_scitype(X, ["Series", "Panel"],
                                     return_metadata=True,
                                     var_name=var_name)
        X_valid = check_res[0]
        metadata = check_res[2]

        X_scitype = metadata["scitype"]

        if not X_valid:
            raise TypeError("X/X2 must be of Series or Panel scitype")

        # if the input is a single series, convert it to a Panel
        if X_scitype == "Series":
            X = convert_Series_to_Panel(X)

        # can't be anything else if check_is_scitype is working properly
        elif X_scitype != "Panel":
            raise RuntimeError(
                "Unexpected error in check_is_scitype, check validity")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Panel")

        return X_coerced
예제 #13
0
파일: base.py 프로젝트: chmathys/sktime
    def _convert_X(self, X):
        """Convert equal length series from DataFrame to numpy array or vice versa.

        Parameters
        ----------
        self : this classifier
        X : pd.DataFrame or np.ndarray. Input attribute data

        Returns
        -------
        X : pd.DataFrame or np.array
            Checked and possibly converted input data
        """
        inner_type = self.get_tag("X_inner_mtype")
        # convert pd.DataFrame
        X = convert_to(
            X,
            to_type=inner_type,
            as_scitype="Panel",
        )
        return X
예제 #14
0
def make_clustering_problem(
    n_instances=20,
    n_columns=1,
    n_timepoints=20,
    return_numpy=False,
    random_state=None,
):
    """Make Clustering Problem."""
    # Can only currently support univariate so converting
    # to univaritate for the time being
    X = _make_panel_X(
        n_instances=n_instances,
        n_columns=n_columns,
        n_timepoints=n_timepoints,
        return_numpy=return_numpy,
        random_state=random_state,
    )

    if return_numpy:
        return convert_to(X, "numpy3D")
    else:
        return X
예제 #15
0
def plot_dba_example():
    """Plot dba."""
    import matplotlib.pyplot as plt

    X_train, y_train = load_arrow_head(split="train")
    X_train = convert_to(X_train, "numpy3D")

    def plot_helper(barycenter):
        for series in X_train:
            plt.plot(series.ravel(), "k-", alpha=0.2)
        plt.plot(barycenter.ravel(), "r-", linewidth=2)

    ax1 = plt.subplot()

    plt.subplot(4, 1, 1, sharex=ax1)
    plt.title("Sktime DBA (using dtw)")
    plot_helper(
        dba(X_train, distance_metric="dtw", medoids_distance_metric="dtw"))

    plt.subplot(4, 1, 2, sharex=ax1)
    plt.title("Sktime DBA (using wdtw)")
    plot_helper(
        dba(X_train, distance_metric="wdtw", medoids_distance_metric="wdtw"))

    plt.subplot(4, 1, 3, sharex=ax1)
    plt.title("Sktime DBA (using lcss)")
    plot_helper(
        dba(X_train, distance_metric="lcss", medoids_distance_metric="lcss"))

    plt.subplot(4, 1, 4, sharex=ax1)
    plt.title("Sktime DBA (using msm)")
    plot_helper(dba(X_train, distance_metric="msm"))

    ax1.set_xlim([0, X_train.shape[2]])

    # show the plot(s)
    plt.tight_layout()
    plt.show()
예제 #16
0
def convert_Hierarchical_to_Panel(obj, store=None):
    """Convert single-series hierarchical object to a series.

    Removes one dimensions to obtain a panel, by removing 1 level from MultiIndex.

    Assumes input is conformant with Hierarchical mtype.
    This method does not perform full mtype checks, use mtype or check_is_mtype for
    checks.

    Parameters
    ----------
    obj: an object of scitype Hierarchical.

    Returns
    -------
    returns a data container of mtype pd-multiindex, of scitype Panel
    """
    obj_df = convert_to(obj,
                        to_type="pd_multiindex_hier",
                        as_scitype="Hierarchical")
    obj_df = obj_df.copy()
    obj_df.index = obj_df.index.get_level_values([-2, -1])
    return obj_df
예제 #17
0
def convert_Panel_to_Hierarchical(obj, store=None):
    """Convert panel to a single-panel hierarchical object.

    Adds a dimensions to the panel to obtain a 3-level MultiIndex, 1 level is added.

    Assumes input is conformant with one of the Panel mtypes.
    This method does not perform full mtype checks, use mtype or check_is_mtype for
    checks.

    Parameters
    ----------
    obj: an object of scitype Panel.

    Returns
    -------
    returns a data container of mtype pd_multiindex_hier
    """
    obj_df = convert_to(obj, to_type="pd-multiindex", as_scitype="Panel")
    obj_df = obj_df.copy()
    obj_df["__level2"] = 0
    obj_df = obj_df.set_index(["__level2"], append=True)
    obj_df = obj_df.reorder_levels([2, 0, 1])
    return obj_df
예제 #18
0
def convert_Series_to_Hierarchical(obj, store=None):
    """Convert series to a single-series hierarchical object.

    Adds two dimensions to the series to obtain a 3-level MultiIndex, 2 levels added.

    Assumes input is conformant with one of the three Series mtypes.
    This method does not perform full mtype checks, use mtype or check_is_mtype for
    checks.

    Parameters
    ----------
    obj: an object of scitype Series, of mtype pd.DataFrame, pd.Series, or np.ndarray.

    Returns
    -------
    returns a data container of mtype pd_multiindex_hier
    """
    obj_df = convert_to(obj, to_type="pd.DataFrame", as_scitype="Series")
    obj_df = obj_df.copy()
    obj_df["__level1"] = 0
    obj_df["__level2"] = 0
    obj_df = obj_df.set_index(["__level1", "__level2"], append=True)
    obj_df = obj_df.reorder_levels([1, 2, 0])
    return obj_df
예제 #19
0
    def update(self, X, y=None, Z=None, update_params=True):
        """Update transformer with X, optionally y.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Writes to self:
            May update fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0
        update_params : bool, default=True
            whether the model is updated. Yes if true, if false, simply skips call.
            argument exists for compatibility with forecasting module.

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        # skip everything if update_params is False
        if not update_params:
            return self

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, X_metadata = check_is_mtype(
            X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X"
        )
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not X_metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_scitype = X_metadata["scitype"]

        X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype"))
        X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True)

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None:
                raise ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None"
                )
            X = convert_to(
                X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X
            )
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_inner, y_inner = self._convert_X_y(X, y)

        # todo: uncomment this once Z is completely gone
        # self._update(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._update(X_inner, y_inner)
        return self
예제 #20
0
파일: _base.py 프로젝트: chmathys/sktime
    def _check_X_y(self, X=None, y=None):
        """Check and coerce X/y for fit/predict/update functions.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D), optional (default=None)
            Time series to check.
        X : pd.DataFrame, or 2D np.array, optional (default=None)
            Exogeneous time series.

        Returns
        -------
        y_inner : Series compatible with self.get_tag("y_inner_mtype") format
            converted/coerced version of y, mtype determined by "y_inner_mtype" tag
            None if y was None
        X_inner : Series compatible with self.get_tag("X_inner_mtype") format
            converted/coerced version of y, mtype determined by "X_inner_mtype" tag
            None if X was None

        Raises
        ------
        TypeError if y or X is not one of the permissible Series mtypes
        TypeError if y is not compatible with self.get_tag("scitype:y")
            if tag value is "univariate", y must be univariate
            if tag value is "multivariate", y must be bi- or higher-variate
            if tag vaule is "both", y can be either
        TypeError if self.get_tag("X-y-must-have-same-index") is True
            and the index set of X is not a super-set of the index set of y

        Writes to self
        --------------
        _y_mtype_last_seen : str, mtype of y
        _converter_store_y : dict, metadata from conversion for back-conversion
        """
        # input checks and minor coercions on X, y
        ###########################################

        enforce_univariate = self.get_tag("scitype:y") == "univariate"
        enforce_multivariate = self.get_tag("scitype:y") == "multivariate"
        enforce_index_type = self.get_tag("enforce_index_type")

        # checking y
        if y is not None:
            check_y_args = {
                "enforce_univariate": enforce_univariate,
                "enforce_multivariate": enforce_multivariate,
                "enforce_index_type": enforce_index_type,
                "allow_None": False,
                "allow_empty": True,
            }

            y = check_series(y, **check_y_args, var_name="y")

            self._y_mtype_last_seen = mtype(y, as_scitype="Series")
        # end checking y

        # checking X
        if X is not None:
            X = check_series(X, enforce_index_type=enforce_index_type, var_name="X")
            if self.get_tag("X-y-must-have-same-index"):
                check_equal_time_index(X, y)
        # end checking X

        # convert X & y to supported inner type, if necessary
        #####################################################

        # retrieve supported mtypes

        # convert X and y to a supported internal mtype
        #  it X/y mtype is already supported, no conversion takes place
        #  if X/y is None, then no conversion takes place (returns None)
        y_inner_mtype = self.get_tag("y_inner_mtype")
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype="Series",  # we are dealing with series
            store=self._converter_store_y,
        )

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype="Series",  # we are dealing with series
        )

        return X_inner, y_inner
예제 #21
0
파일: _base.py 프로젝트: chmathys/sktime
    def predict(
        self,
        fh=None,
        X=None,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
        keep_old_return_type=True,
    ):
        """Forecast time series at future horizon.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self.cutoff, self._is_fitted

        Writes to self:
            Stores fh to self.fh if fh is passed and has not been passed previously.

        Parameters
        ----------
        fh : int, list, np.ndarray or ForecastingHorizon
            Forecasting horizon
        X : pd.DataFrame, or 2D np.ndarray, optional (default=None)
            Exogeneous time series to predict from
            if self.get_tag("X-y-must-have-same-index"), X.index must contain fh.index
        return_pred_int : bool, optional (default=False)
            If True, returns prediction intervals for given alpha values.
        alpha : float or list, optional (default=0.95)

        Returns
        -------
        y_pred : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
            Point forecasts at fh, with same index as fh
            y_pred has same type as y passed in fit (most recently)
        y_pred_int : pd.DataFrame - only if return_pred_int=True
            in this case, return is 2-tuple (otherwise a single y_pred)
            Prediction intervals
        """
        # handle inputs

        self.check_is_fitted()
        self._set_fh(fh)

        # todo deprecate NotImplementedError in v 10.0.1
        if return_pred_int and not self.get_tag("capability:pred_int"):
            raise NotImplementedError(
                f"{self.__class__.__name__} does not have the capability to return "
                "prediction intervals. Please set return_pred_int=False. If you "
                "think this estimator should have the capability, please open "
                "an issue on sktime."
            )

        # input check and conversion for X
        X_inner = self._check_X(X=X)

        # this is how it is supposed to be after the refactor is complete and effective
        if not return_pred_int:
            y_pred = self._predict(
                self.fh,
                X=X_inner,
            )

            # convert to output mtype, identical with last y mtype seen
            y_out = convert_to(
                y_pred,
                self._y_mtype_last_seen,
                as_scitype="Series",
                store=self._converter_store_y,
            )

            return y_out

        # keep following code for downward compatibility,
        # todo: can be deleted once refactor is completed and effective,
        # todo: deprecate in v 10
        else:
            warn(
                "return_pred_int in predict() will be deprecated;"
                "please use predict_interval() instead to generate "
                "prediction intervals.",
                FutureWarning,
            )

            if not self._has_predict_quantiles_been_refactored():
                # this means the method is not refactored
                y_pred = self._predict(
                    self.fh,
                    X=X_inner,
                    return_pred_int=return_pred_int,
                    alpha=alpha,
                )

                # returns old return type anyways
                pred_int = y_pred[1]
                y_pred = y_pred[0]

            else:
                # it's already refactored
                # opposite definition previously vs. now
                coverage = [1 - a for a in alpha]
                pred_int = self.predict_interval(fh=fh, X=X_inner, coverage=coverage)

                if keep_old_return_type:
                    pred_int = _convert_new_to_old_pred_int(pred_int, alpha)

            # convert to output mtype, identical with last y mtype seen
            y_out = convert_to(
                y_pred,
                self._y_mtype_last_seen,
                as_scitype="Series",
                store=self._converter_store_y,
            )

            return (y_out, pred_int)
    np.argmax,
    np.any,
]

X1_list_df = make_transformer_problem(n_instances=4,
                                      n_columns=4,
                                      n_timepoints=5,
                                      random_state=1,
                                      return_numpy=False)
X2_list_df = make_transformer_problem(n_instances=5,
                                      n_columns=4,
                                      n_timepoints=5,
                                      random_state=2,
                                      return_numpy=False)

X1_num_pan = convert_to(X1_list_df, to_type="numpy3D")
X2_num_pan = convert_to(X2_list_df, to_type="numpy3D")


def test_aggr():
    """Test that AggrDist produces expected pre-computed result on fixtures."""
    # test 3d numpy
    _run_aggr_dist_test(X1_num_pan, X2_num_pan)

    # test list of df
    _run_aggr_dist_test(X1_list_df, X2_list_df)


def _run_aggr_dist_test(x, y):
    # default parametersc
    default_params = AggrDist(transformer=ScipyDist())
예제 #23
0
def plot_correlations(
    series,
    lags=24,
    alpha=0.05,
    zero_lag=True,
    acf_fft=False,
    acf_adjusted=True,
    pacf_method="ywadjusted",
    suptitle=None,
    series_title=None,
    acf_title="Autocorrelation",
    pacf_title="Partial Autocorrelation",
):
    """Plot series and its ACF and PACF values.

    Parameters
    ----------
    series : pd.Series
        A time series.

    lags : int, default = 24
        Number of lags to include in ACF and PACF plots

    alpha : int, default = 0.05
        Alpha value used to set confidence intervals. Alpha = 0.05 results in
        95% confidence interval with standard deviation calculated via
        Bartlett's formula.

    zero_lag : bool, default = True
        If True, start ACF and PACF plots at 0th lag

    acf_fft : bool,  = False
        Whether to compute ACF via FFT.

    acf_adjusted : bool, default = True
        If True, denonimator of ACF calculations uses n-k instead of n, where
        n is number of observations and k is the lag.

    pacf_method : str, default = 'ywadjusted'
        Method to use in calculation of PACF.

    suptitle : str, default = None
        The text to use as the Figure's suptitle.

    series_title : str, default = None
        Used to set the title of the series plot if provided. Otherwise, series
        plot has no title.

    acf_title : str, default = 'Autocorrelation'
        Used to set title of ACF plot.

    pacf_title : str, default = 'Partial Autocorrelation'
        Used to set title of PACF plot.

    Returns
    -------
    fig : matplotlib.figure.Figure

    axes : np.ndarray
        Array of the figure's Axe objects
    """
    _check_soft_dependencies("matplotlib")
    import matplotlib.pyplot as plt

    series = check_y(series)
    series = convert_to(series, "pd.Series", "Series")

    # Setup figure for plotting
    fig = plt.figure(constrained_layout=True, figsize=(12, 8))
    gs = fig.add_gridspec(2, 2)
    f_ax1 = fig.add_subplot(gs[0, :])
    if series_title is not None:
        f_ax1.set_title(series_title)
    f_ax2 = fig.add_subplot(gs[1, 0])
    f_ax3 = fig.add_subplot(gs[1, 1])

    # Create expected plots on their respective Axes
    plot_series(series, ax=f_ax1)
    plot_acf(
        series,
        ax=f_ax2,
        lags=lags,
        zero=zero_lag,
        alpha=alpha,
        title=acf_title,
        adjusted=acf_adjusted,
        fft=acf_fft,
    )
    plot_pacf(
        series,
        ax=f_ax3,
        lags=lags,
        zero=zero_lag,
        alpha=alpha,
        title=pacf_title,
        method=pacf_method,
    )
    if suptitle is not None:
        fig.suptitle(suptitle, size="xx-large")

    return fig, np.array(fig.get_axes())
예제 #24
0
    def transform(self, X, y=None, Z=None):
        """Transform X and return a transformed version.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self._is_fitted

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to be transformed, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        transformed version of X
        type depends on type of X and scitype:transform-output tag:
            |          | `transform`  |                        |
            |   `X`    |  `-output`   |     type of return     |
            |----------|--------------|------------------------|
            | `Series` | `Primitives` | `pd.DataFrame` (1-row) |
            | `Panel`  | `Primitives` | `pd.DataFrame`         |
            | `Series` | `Series`     | `Series`               |
            | `Panel`  | `Series`     | `Panel`                |
            | `Series` | `Panel`      | `Panel`                |
        instances in return correspond to instances in `X`
        combinations not in the table are currently not supported

        Explicitly, with examples:
            if `X` is `Series` (e.g., `pd.DataFrame`) and `transform-output` is `Series`
                then the return is a single `Series` of the same mtype
                Example: detrending a single series
            if `X` is `Panel` (e.g., `pd-multiindex`) and `transform-output` is `Series`
                then the return is `Panel` with same number of instances as `X`
                    (the transformer is applied to each input Series instance)
                Example: all series in the panel are detrended individually
            if `X` is `Series` or `Panel` and `transform-output` is `Primitives`
                then the return is `pd.DataFrame` with as many rows as instances in `X`
                Example: i-th row of the return has mean and variance of the i-th series
            if `X` is `Series` and `transform-output` is `Panel`
                then the return is a `Panel` object of type `pd-multiindex`
                Example: i-th instance of the output is the i-th window running over `X`
        """
        X = _handle_alias(X, Z)

        # check whether is fitted, unless fit-in-transform is true
        if self.get_tag("fit-in-transform"):
            self.fit(X=X, y=y, Z=Z)
        else:
            self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is(X,
                                        mtype=self.ALLOWED_INPUT_MTYPES,
                                        return_metadata=True,
                                        var_name="X")
        if not valid:
            ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X)
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y)
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        output_scitype = self.get_tag("scitype:transform-output")

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel:  loop over instances
        #     currently this is enabled by conversion to df-list mtype
        #     and this does not support y (unclear what should happen here)

        # 1. nothing to do - simply don't enter any of the ifs below
        #   the "ifs" for case 2 and 3 below are skipped under the condition
        #       X_input_scitype in X_inner_scitypes
        #   case 2 has an "else" which remembers that it wasn't entered

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if (X_input_scitype == "Series" and "Series" not in X_inner_scitypes
                and "Panel" in X_inner_scitypes):
            # convert the Series X to a one-element Panel
            X = convert_Series_to_Panel(X)
            # remember that we converted the Series to a one-element Panel
            X_was_Series = True
        else:
            # remember that we didn't convert a Series to a one-element Panel
            X_was_Series = False

        # 3. internal only has Series but X is Panel: loop over instances
        if (X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes
                and "Series" in X_inner_scitypes):
            if y is not None:
                ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")

            if self.get_tag("fit-in-transform"):
                Xt = [clone(self).transform(Xi) for Xi in X]
            else:
                transformers = self.transformers_
                if len(transformers) != len(X):
                    raise RuntimeError(
                        "found different number of instances in transform than in fit"
                    )
                else:
                    Xt = [
                        transformers[i].transform(X[i]) for i in range(len(X))
                    ]
            # now we have a list of transformed instances

            # if the output is Series, Xt is a Panel and we convert back
            if output_scitype == "Series":
                Xt = convert_to(Xt, to_type=X_input_mtype, as_scitype="Panel")

            # if the output is Primitives, we have a list of one-row dataframes
            # we concatenate those and overwrite the index with that of X
            elif output_scitype == "Primitives":
                Xt = pd.concat(Xt)
                Xt.index = X.index
            return Xt

        # convert X/y to supported inner type, if necessary
        ###################################################

        # variables for the scitype of the current X (possibly converted)
        #     y wasn't converted so we can use y_input_scitype
        X_mtype = mtype(X)
        X_scitype = mtype_to_scitype(X_mtype)

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # carry out the transformation
        ###################################################

        # todo: uncomment this once Z is completely gone
        # Xt = self._transform(X=X_inner, y=y_inner)
        # less robust workaround until then
        Xt = self._transform(X_inner, y_inner)

        # convert transformed X back to input mtype
        ###########################################

        # if we converted Series to "one-instance-Panel", revert that
        if X_was_Series and output_scitype == "Series":
            Xt = convert_Panel_to_Series(Xt)

        if output_scitype == "Series":
            Xt = convert_to(
                Xt,
                to_type=X_input_mtype,
                as_scitype=X_input_scitype,
            )
        elif output_scitype == "Primitives":
            # we "abuse" the Series converter to ensure df output
            Xt = convert_to(
                Xt,
                to_type="pd.DataFrame",
                as_scitype="Series",
            )
        else:
            # output_scitype is "Panel" and no need for conversion
            pass

        return Xt
예제 #25
0
    def fit(self, X, y=None, Z=None):
        """Fit transformer to X, optionally to y.

        State change:
            Changes state to "fitted".

        Writes to self:
            Sets is_fitted flag to True.
            Sets fitted model attributes ending in "_".

        Parameters
        ----------
        X : Series or Panel, any supported mtype
            Data to fit transform to, of python type as follows:
                Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
                Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame,
                    nested pd.DataFrame, or pd.DataFrame in long/wide format
                subject to sktime mtype format specifications, for further details see
                    examples/AA_datatypes_and_datasets.ipynb
        y : Series or Panel, default=None
            Additional data, e.g., labels for transformation
        Z : possible alias for X; should not be passed when X is passed
            alias Z will be deprecated in version 0.10.0

        Returns
        -------
        self : a fitted instance of the estimator
        """
        X = _handle_alias(X, Z)

        self._is_fitted = False

        # skip everything if fit-in-transform is True
        if self.get_tag("fit-in-transform"):
            self._is_fitted = True
            return self

        # input checks and minor coercions on X, y
        ###########################################

        valid, msg, metadata = check_is(X,
                                        mtype=self.ALLOWED_INPUT_MTYPES,
                                        return_metadata=True,
                                        var_name="X")
        if not valid:
            raise ValueError(msg)

        # checking X
        enforce_univariate = self.get_tag("univariate-only")
        if enforce_univariate and not metadata["is_univariate"]:
            raise ValueError("X must be univariate but is not")

        # retrieve mtypes/scitypes of all objects
        #########################################

        X_input_mtype = mtype(X)
        X_input_scitype = mtype_to_scitype(X_input_mtype)
        y_input_mtype = mtype(y)
        y_input_scitype = mtype_to_scitype(y_input_mtype)

        X_inner_mtype = self.get_tag("X_inner_mtype")
        if not isinstance(X_inner_mtype, list):
            X_inner_mtype = [X_inner_mtype]
        X_inner_scitypes = list(
            set([mtype_to_scitype(mt) for mt in X_inner_mtype]))

        y_inner_mtype = self.get_tag("y_inner_mtype")
        if not isinstance(y_inner_mtype, list):
            y_inner_mtype = [y_inner_mtype]
        # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype]))

        # treating Series vs Panel conversion for X
        ###########################################

        # there are three cases to treat:
        # 1. if the internal _fit supports X's scitype, move on to mtype conversion
        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        # 3. internal only has Series but X is Panel: auto-vectorization over instances
        #     currently, this is enabled by conversion to df-list mtype
        #     auto-vectorization is not supported if y is passed
        #       individual estimators that vectorize over y must implement individually

        # 1. nothing to do - simply don't enter any of the ifs below

        # 2. internal only has Panel but X is Series: consider X as one-instance Panel
        if X_input_scitype == "Series" and "Series" not in X_inner_scitypes:
            X = convert_Series_to_Panel(X)

        # 3. internal only has Series but X is Panel: loop over instances
        elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes:
            if y is not None:
                raise ValueError(
                    "no default behaviour if _fit does not support Panel, "
                    " but X is Panel and y is not None")
            X = convert_to(X, to_type="df-list", as_scitype="Panel")
            # this fits one transformer per instance
            self.transformers_ = [clone(self).fit(Xi) for Xi in X]
            # recurse and leave function - recursion does input checks/conversion
            # also set is_fitted flag to True since we leave function here
            self._is_fitted = True
            return self

        X_mtype = mtype(X)
        X_scitype = mtype_to_scitype(X_mtype)

        # for debugging, exception if the conversion fails (this should never happen)
        if X_scitype not in X_inner_scitypes:
            raise RuntimeError(
                "conversion of X to X_inner unsuccessful, unexpected")

        # convert X/y to supported inner type, if necessary
        ###################################################

        # subset to the mtypes that are of the same scitype as X/y
        X_inner_mtype = [
            mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype
        ]

        y_inner_mtype = [
            mt for mt in y_inner_mtype
            if mtype_to_scitype(mt) == y_input_scitype
        ]

        # convert X and y to a supported internal type
        #  if X/y type is already supported, no conversion takes place
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype=X_scitype,
        )
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype=y_input_scitype,
        )

        # todo: uncomment this once Z is completely gone
        # self._fit(X=X_inner, y=y_inner)
        # less robust workaround until then
        self._fit(X_inner, y_inner)

        self._is_fitted = True
        return self
예제 #26
0
def get_window(obj, window_length=None, lag=0):
    """Slice obj to the time index window with given length and lag.

    Returns time series or time series panel with time indices
        strictly greater than cutoff - lag - window_length, and
        equal or less than cutoff - lag.
    Cutoff if of obj, as determined by get_cutoff.

    Parameters
    ----------
    obj : sktime compatible time series data container or None
        if not None, must be of one of the following mtypes:
            pd.Series, pd.DataFrame, np.ndarray, of Series scitype
            pd.multiindex, numpy3D, nested_univ, df-list, of Panel scitype
            pd_multiindex_hier, of Hierarchical scitype
    window_length : int or timedelta, optional, default=-inf
        must be int if obj is int indexed, timedelta if datetime indexed
        length of the window to slice to. Default = window of infinite size
    lag : int or timedelta, optional, default = 0
        must be int if obj is int indexed, timedelta if datetime indexed
        lag of the latest time in the window, with respect to cutoff of obj

    Returns
    -------
    obj sub-set to time indices in the semi-open interval
        (cutoff - window_length - lag, cutoff - lag)
        None if obj was None
    """
    from sktime.datatypes import check_is_scitype, convert_to

    if window_length is None or obj is None:
        return obj

    valid, _, metadata = check_is_scitype(
        obj, scitype=["Series", "Panel", "Hierarchical"], return_metadata=True)
    if not valid:
        raise ValueError(
            "obj must be of Series, Panel, or Hierarchical scitype")
    obj_in_mtype = metadata["mtype"]

    obj = convert_to(obj, GET_LATEST_WINDOW_SUPPORTED_MTYPES)

    # numpy3D (Panel) or np.npdarray (Series)
    if isinstance(obj, np.ndarray):
        obj_len = len(obj)
        window_start = max(-window_length - lag, -obj_len)
        window_end = max(-lag, -obj_len)
        if window_end == 0:
            return obj[window_start:]
        else:
            return obj[window_start:window_end]

    # pd.DataFrame(Series), pd-multiindex (Panel) and pd_multiindex_hier (Hierarchical)
    if isinstance(obj, pd.DataFrame):
        cutoff = get_cutoff(obj)
        win_start_excl = cutoff - window_length - lag
        win_end_incl = cutoff - lag

        if not isinstance(obj.index, pd.MultiIndex):
            time_indices = obj.index
        else:
            time_indices = obj.index.get_level_values(-1)

        win_select = (time_indices > win_start_excl) & (time_indices <=
                                                        win_end_incl)
        obj_subset = obj.iloc[win_select]

        return convert_to(obj_subset, obj_in_mtype)

    raise ValueError(
        "bug in get_latest_window, unreachable condition, ifs should be exhaustive"
    )
예제 #27
0
    def _transform(self, X, y=None):
        """Transform X.

        Transform X, segments time-series in each column into random
        intervals using interval indices generated
        during `fit` and extracts features from each interval.

        Parameters
        ----------
        X : nested pandas.DataFrame of shape [n_instances, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas.DataFrame
          Transformed pandas DataFrame with n_instances rows and one
            column for each generated interval.
        """
        # Check input of feature calculators, i.e list of functions to be
        # applied to time-series
        features = _check_features(self.features)
        X = convert_to(X, "numpy3D")

        # Check that the input is of the same shape as the one passed
        # during fit.
        if X.shape[1] != self.input_shape_[1]:
            raise ValueError(
                "Number of columns of input is different from what was seen in `fit`"
            )
        # Input validation
        # if not all([np.array_equal(fit_idx, trans_idx) for trans_idx,
        # fit_idx in zip(check_equal_index(X),
        #     raise ValueError('Indexes of input time-series are different
        #     from what was seen in `fit`')

        n_instances, _, _ = X.shape
        n_features = len(features)

        intervals = self.intervals_
        n_intervals = len(intervals)

        # Compute features on intervals.
        Xt = np.zeros((n_instances, n_features * n_intervals))  # Allocate output array
        # for transformed data
        columns = []

        i = 0
        for func in features:
            # TODO generalise to series-to-series functions and function kwargs
            for start, end in intervals:
                interval = X[:, :, start:end]

                # Try to use optimised computations over axis if possible,
                # otherwise iterate over rows.
                try:
                    Xt[:, i] = func(interval, axis=-1).squeeze()
                except TypeError as e:
                    if (
                        str(e) == f"{func.__name__}() got an unexpected "
                        f"keyword argument 'axis'"
                    ):
                        Xt[:, i] = np.apply_along_axis(
                            func, axis=2, arr=interval
                        ).squeeze()
                    else:
                        raise
                i += 1
                columns.append(f"{start}_{end}_{func.__name__}")

        Xt = pd.DataFrame(Xt)
        Xt.columns = columns
        return Xt
예제 #28
0
    def transform_single_feature(self, X, feature, case_id=None):
        """Transform data into a specified catch22 feature.

        Parameters
        ----------
        X : np.ndarray, 3D, in numpy3D mtype format
            or other sktime data container of Panel scitype
        feature : int, catch22 feature id or String, catch22 feature
                  name.
        case_id : int, identifier for the current set of cases. If the case_id is not
                  None and the same as the previously used case_id, calculations from
                  previous features will be reused.

        Returns
        -------
        Numpy array containing a catch22 feature for each input series.
        """
        if isinstance(feature,
                      (int, np.integer)) or isinstance(feature,
                                                       (float, float)):
            if feature > 21 or feature < 0:
                raise ValueError("Invalid catch22 feature ID")
        elif isinstance(feature, str):
            if feature in feature_names:
                feature = feature_names.index(feature)
            else:
                raise ValueError("Invalid catch22 feature name")
        else:
            raise ValueError("catch22 feature name or ID required")

        if isinstance(X, pd.DataFrame):
            X = convert_to(X, "numpy3D")

        if len(X.shape) > 2:
            n_instances, n_dims, series_length = X.shape

            if n_dims > 1:
                raise ValueError(
                    "transform_single_feature can only handle univariate series "
                    "currently.")

            X = np.reshape(X, (n_instances, -1))
        else:
            n_instances, series_length = X.shape

        if case_id is not None:
            if case_id != self._case_id:
                self._case_id = case_id
                self._st_n_instances = n_instances
                self._st_series_length = series_length
                self._outlier_series = [None] * n_instances
                self._smin = [None] * n_instances
                self._smax = [None] * n_instances
                self._smean = [None] * n_instances
                self._fft = [None] * n_instances
                self._ac = [None] * n_instances
                self._acfz = [None] * n_instances
            else:
                if (n_instances != self._st_n_instances
                        or series_length != self._st_series_length):
                    raise ValueError(
                        "Catch22: case_is the same, but n_instances and "
                        "series_length do not match last seen for single "
                        "feature transform.")

        c22_list = Parallel(n_jobs=self.n_jobs)(
            delayed(self._transform_case_single)(
                X[i],
                feature,
                case_id,
                i,
            ) for i in range(n_instances))

        if self.replace_nans:
            c22_list = np.nan_to_num(c22_list, False, 0, 0, 0)

        return np.asarray(c22_list)
예제 #29
0
def plot_series(*series,
                labels=None,
                markers=None,
                x_label=None,
                y_label=None,
                ax=None):
    """Plot one or more time series.

    Parameters
    ----------
    series : pd.Series or iterable of pd.Series
        One or more time series
    labels : list, default = None
        Names of series, will be displayed in figure legend
    markers: list, default = None
        Markers of data points, if None the marker "o" is used by default.
        The length of the list has to match with the number of series.

    Returns
    -------
    fig : plt.Figure
    ax : plt.Axis
    """
    _check_soft_dependencies("matplotlib", "seaborn")
    import matplotlib.pyplot as plt
    from matplotlib.ticker import FuncFormatter, MaxNLocator
    from matplotlib.cbook import flatten
    import seaborn as sns

    for y in series:
        check_y(y)

    series = list(series)
    series = [convert_to(y, "pd.Series", "Series") for y in series]

    n_series = len(series)
    _ax_kwarg_is_none = True if ax is None else False
    # labels
    if labels is not None:
        if n_series != len(labels):
            raise ValueError("""There must be one label for each time series,
                but found inconsistent numbers of series and
                labels.""")
        legend = True
    else:
        labels = ["" for _ in range(n_series)]
        legend = False

    # markers
    if markers is not None:
        if n_series != len(markers):
            raise ValueError("""There must be one marker for each time series,
                but found inconsistent numbers of series and
                markers.""")
    else:
        markers = ["o" for _ in range(n_series)]

    # create combined index
    index = series[0].index
    for y in series[1:]:
        # check index types
        check_consistent_index_type(index, y.index)
        index = index.union(y.index)

    # generate integer x-values
    xs = [np.argwhere(index.isin(y.index)).ravel() for y in series]

    # create figure if no Axe provided for plotting
    if _ax_kwarg_is_none:
        fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25))

    colors = sns.color_palette("colorblind", n_colors=n_series)

    # plot series
    for x, y, color, label, marker in zip(xs, series, colors, labels, markers):

        # scatter if little data is available or index is not complete
        if len(x) <= 3 or not np.array_equal(np.arange(x[0], x[-1] + 1), x):
            plot_func = sns.scatterplot
        else:
            plot_func = sns.lineplot

        plot_func(x=x, y=y, ax=ax, marker=marker, label=label, color=color)

    # combine data points for all series
    xs_flat = list(flatten(xs))

    # set x label of data point to the matching index
    def format_fn(tick_val, tick_pos):
        if int(tick_val) in xs_flat:
            return index[int(tick_val)]
        else:
            return ""

    # dynamically set x label ticks and spacing from index labels
    ax.xaxis.set_major_formatter(FuncFormatter(format_fn))
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    # Label the x and y axes
    if x_label is not None:
        ax.set_xlabel(x_label)

    _y_label = y_label if y_label is not None else series[0].name
    ax.set_ylabel(_y_label)

    if legend:
        ax.legend()
    if _ax_kwarg_is_none:
        return fig, ax
    else:
        return ax
예제 #30
0
    def predict(self,
                fh=None,
                X=None,
                return_pred_int=False,
                alpha=DEFAULT_ALPHA):
        """Forecast time series at future horizon.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            self.cutoff, self._is_fitted

        Writes to self:
            Stores fh to self.fh if fh is passed and has not been passed in _fit.

        Parameters
        ----------
        fh : int, list, np.ndarray or ForecastingHorizon
            Forecasting horizon
        X : pd.DataFrame, or 2D np.ndarray, optional (default=None)
            Exogeneous time series to predict from
            if self.get_tag("X-y-must-have-same-index"), X.index must contain fh.index
        return_pred_int : bool, optional (default=False)
            If True, returns prediction intervals for given alpha values.
        alpha : float or list, optional (default=0.95)

        Returns
        -------
        y_pred : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
            Point forecasts at fh, with same index as fh
            y_pred has same type as y passed in fit (most recently)
        y_pred_int : pd.DataFrame - only if return_pred_int=True
            in this case, return is 2-tuple (otherwise a single y_pred)
            Prediction intervals
        """
        # handle inputs
        self.check_is_fitted()
        self._set_fh(fh)

        if return_pred_int and not self.get_tag("capability:pred_int"):
            raise NotImplementedError(
                f"{self.__class__.__name__} does not have the capability to return "
                "prediction intervals. Please set return_pred_int=False. If you "
                "think this estimator should have the capability, please open "
                "an issue on sktime.")

        # input check and conversion for X
        X_inner = self._check_X(X=X)

        # this should be here, but it breaks the ARIMA forecasters
        #  that is because check_alpha converts to list, but ARIMA forecaster
        #  doesn't do the check, and needs it as a float or it breaks
        # todo: needs fixing in ARIMA and AutoARIMA
        # alpha = check_alpha(alpha)

        y_pred = self._predict(
            self.fh,
            X=X_inner,
            return_pred_int=return_pred_int,
            alpha=alpha,
        )

        # todo: clean this up, predictive intervals should be returned by other method
        if return_pred_int:
            pred_int = y_pred[1]
            y_pred = y_pred[0]

        # convert to output mtype, identical with last y mtype seen
        y_out = convert_to(
            y_pred,
            self._y_mtype_last_seen,
            as_scitype="Series",
            store=self._converter_store_y,
        )

        if return_pred_int:
            return (y_out, pred_int)
        else:
            return y_out