Beispiel #1
0
    def fit(self, X, y=None):
        """
        Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : an instance of self.
        """
        X = check_X(X, enforce_univariate=True)

        self.input_shape_ = X.shape

        # Retrieve time-series indexes from each column.
        self._time_index = get_time_index(X)

        if isinstance(self.intervals, np.ndarray):
            self.intervals_ = self.intervals

        elif is_int(self.intervals):
            self.intervals_ = np.array_split(self._time_index, self.intervals)

        else:
            raise ValueError(
                f"Intervals must be either an integer, an array with "
                f"start and end points, but found: {self.intervals}")
        self._is_fitted = True
        return self
Beispiel #2
0
    def fit(self, X, y=None):
        """
        Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : RandomIntervalSegmenter
            This estimator
        """

        validate_X(X)

        self.input_shape_ = X.shape

        # Retrieve time-series indexes from each column.
        # TODO generalise to columns with series of unequal length
        self._time_index = get_time_index(X)

        # Compute random intervals for each column.
        # TODO if multiple columns are passed, introduce option to compute one set of shared intervals,
        #  or rely on ColumnTransformer?
        if self.n_intervals == 'random':
            self.intervals_ = self._rand_intervals_rand_n(self._time_index)
        else:
            self.intervals_ = self._rand_intervals_fixed_n(
                self._time_index, n_intervals=self.n_intervals)

        return self
Beispiel #3
0
def test_indices(n_components):
    np.random.seed(42)
    X = detabularize(pd.DataFrame(data=np.random.randn(10, 5)))
    X.columns = pd.CategoricalIndex(['col_0'])
    X.index = pd.Int64Index([i+10 for i in range(10)])

    pca = PCATransformer(n_components=n_components)
    Xt = pca.fit_transform(X)

    assert X.columns.equals(Xt.columns)
    assert X.index.equals(Xt.index)
    assert get_time_index(Xt).equals(pd.Int64Index(range(pca.pca.n_components_)))
Beispiel #4
0
    def transform(self, X, y=None):
        """Transform X.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and one column for each generated interval.
        """
        if self.check_input:
            validate_X(X)
            if X.shape[1] > 1:
                raise NotImplementedError(f"Currently does not work on multiple columns, make use of ColumnTransformer "
                                          f"instead")

        self._input_shape = X.shape

        # when seasonal periodicity is equal to 1 return X unchanged
        if self.sp == 1:
            return X

        # keep time index as transform/inverse transform depends on it, e.g. to carry forward trend in inverse_transform
        self._time_index = get_time_index(X)

        # convert into tabular format
        tabulariser = Tabulariser()
        Xs = tabulariser.transform(X.iloc[:, :1])

        check_is_fitted(self, 'is_fitted_')
        validate_X(X)

        # fit seasonal decomposition model
        seasonal_components = self._fit_seasonal_decomposition_model(Xs)

        # remove seasonal components from data
        if self.model == 'additive':
            Xt = Xs - seasonal_components
        else:
            Xt = Xs / seasonal_components

        # keep fitted seasonal components for inverse transform, they are repeated after the first seasonal
        # period so we only keep the components for the first seasonal period
        self.seasonal_components_ = seasonal_components[:, :self.sp]

        # convert back into nested format
        Xt = tabulariser.inverse_transform(pd.DataFrame(Xt))
        Xt.columns = X.columns
        return Xt
Beispiel #5
0
    def inverse_transform(self, X, y=None):
        """Inverse transform X

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and one column for each generated interval.
        """

        check_is_fitted_in_transform(self, 'coefs_')

        if self.check_input:
            if not isinstance(X, pd.DataFrame):
                raise ValueError(f"Input must be pandas DataFrame, but found: {type(X)}")

        if X.shape[1] > 1:
            raise NotImplementedError(f"Currently does not work on multiple columns, make use of ColumnTransformer "
                                      f"instead")

        if not X.shape[0] == self._input_shape[0]:
            raise ValueError(f"Inverse transform only works on data with the same number samples "
                             f"as seen during transform, but found: {X.shape[0]} samples "
                             f"!= {self._input_shape[0]} samples (seen during transform)")

        time_index = get_time_index(X)

        # convert into tabular format
        tabulariser = Tabulariser()
        Xs = tabulariser.transform(X.iloc[:, :1])

        # add trend at given time series index
        Xit = add_trend(Xs, coefs=self.coefs_, time_index=time_index)

        # convert back into nested format
        Xit = tabulariser.inverse_transform(pd.DataFrame(Xit))
        Xit.columns = X.columns
        return Xit
Beispiel #6
0
    def fit(self, X, y=None):
        """
        Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : RandomIntervalSegmenter
            This estimator
        """
        if not isinstance(self.min_length, int):
            raise ValueError(f"Min_lenght must be an integer, but found: "
                             f"{type(self.min_length)}")
        if self.min_length < 1:
            raise ValueError(f"Min_lenght must be an positive integer (>= 1), "
                             f"but found: {self.min_length}")
        X = check_X(X)
        self.input_shape_ = X.shape

        # Retrieve time-series indexes from each column.
        # TODO generalise to columns with series of unequal length
        self._time_index = get_time_index(X)

        # Compute random intervals for each column.
        # TODO if multiple columns are passed, introduce option to compute
        #  one set of shared intervals,
        #  or rely on ColumnTransformer?
        if self.n_intervals == 'random':
            self.intervals_ = self._rand_intervals_rand_n(self._time_index)
        else:
            self.intervals_ = self._rand_intervals_fixed_n(
                self._time_index,
                n_intervals=self.n_intervals)
        self._is_fitted = True
        return self
Beispiel #7
0
    def transform(self, X, y=None):
        """Transform X.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and one column for each generated interval.
        """

        if self.check_input:
            if not isinstance(X, pd.DataFrame):
                raise ValueError(f"Input must be pandas DataFrame, but found: {type(X)}")

        if X.shape[1] > 1:
            raise NotImplementedError(f"Currently does not work on multiple columns")

        self._input_shape = X.shape

        # keep time index as trend depends on it, e.g. to carry forward trend in inverse_transform
        self._time_index = get_time_index(X)

        # convert into tabular format
        tabulariser = Tabulariser()
        Xs = tabulariser.transform(X.iloc[:, :1])

        # fit polynomial trend
        self.coefs_ = fit_trend(Xs, order=self.order)

        # remove trend
        Xt = remove_trend(Xs, coefs=self.coefs_, time_index=self._time_index)

        # convert back into nested format
        Xt = tabulariser.inverse_transform(pd.DataFrame(Xt))
        Xt.columns = X.columns
        return Xt
Beispiel #8
0
    def fit(self, y, fh=1, X=None):
        """
        Fit forecaster.

        Parameters
        ----------
        y : pandas.Series
            Target time series to which to fit the forecaster.
        fh : array-like, optional (default=None)
            The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast,
            i.e. np.array([1])
        X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None)
            An optional 2-d dataframe of exogenous variables. If provided, these
            variables are used as additional features in the regression
            operation. This should not include a constant or trend. Note that
            if an ``ARIMA`` is fit on exogenous features, it must also be provided
            exogenous features for making predictions.

        Returns
        -------
        self : returns an instance of self.
        """
        if self.check_input:
            validate_y_X(y, X)

        # validate forecasting horizon
        if fh is not None:
            fh = validate_fh(fh)

        # Keep index for predicting where forecasters horizon will be relative to y seen in fit
        self._time_index = get_time_index(y)

        # Make interface compatible with estimators that only take y and no X
        kwargs = {} if X is None else {'X': X}

        # Internal fit.
        self._fit(y, fh=fh, **kwargs)
        self._is_fitted = True
        return self
Beispiel #9
0
    def transform(self, X, y=None):
        """Transform nested pandas dataframe into tabular dataframe.

        Parameters
        ----------
        X : pandas DataFrame
            Nested dataframe with pandas series or numpy arrays in cells.
        y : array-like, optional (default=None)

        Returns
        -------
        Xt : pandas DataFrame
            Transformed dataframe with only primitives in cells.
        """

        if self.check_input:
            validate_X(X)

        self._columns = X.columns
        self._index = X.index
        self._time_index = get_time_index(X)

        Xt = tabularize(X)
        return Xt
Beispiel #10
0
    def inverse_transform(self, X, y=None):
        """Inverse transform X

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and one column for each generated interval.
        """

        if self.check_input:
            validate_X(X)
            check_X_is_univariate(X)

        # check that number of samples are the same, inverse transform depends on parameters fitted in transform and
        # hence only works on data with the same (number of) rows
        if not X.shape[0] == self._input_shape[0]:
            raise ValueError(f"Inverse transform only works on data with the same number samples "
                             f"as seen during transform, but found: {X.shape[0]} samples "
                             f"!= {self._input_shape[0]} samples (seen during transform)")

        # if the seasonal periodicity is 1, return unchanged X
        sp = self.sp
        if sp == 1:
            return X

        # check if seasonal decomposition model has been fitted in transform
        check_is_fitted_in_transform(self, 'seasonal_components_')

        # check if time index is aligned with time index seen during transform
        time_index = get_time_index(X)

        # align seasonal components with index of X
        if self._time_index.equals(time_index):
            # if time index is the same as used for fitting seasonal components, simply expand it to the size of X
            seasonal_components = self.seasonal_components_

        else:
            # if time index is not aligned, make sure to align fitted seasonal components to new index
            seasonal_components = self._align_seasonal_components_to_index(time_index)

        # expand or shorten aligned seasonal components to same size as X
        n_obs = len(time_index)
        if n_obs > sp:
            n_tiles = np.int(np.ceil(n_obs / sp))
            seasonal_components = np.tile(seasonal_components, n_tiles)
        seasonal_components = seasonal_components[:, :n_obs]

        # convert into tabular format
        tabulariser = Tabulariser()
        Xs = tabulariser.transform(X.iloc[:, :1])

        # inverse transform data
        if self.model == 'additive':
            Xit = Xs + seasonal_components
        else:
            Xit = Xs * seasonal_components

        # convert back into nested format
        Xit = tabulariser.inverse_transform(pd.DataFrame(Xit))
        Xit.columns = X.columns
        return Xit
Beispiel #11
0
 def fit(self, X, y=None):
     self._columns = X.columns
     self._index = X.index
     self._time_index = get_time_index(X)
     self._is_fitted = True
     return self