def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. There are currently no reserved values for transformers. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ from sktime.transformations.series.boxcox import BoxCoxTransformer params = [ { "transformer": BoxCoxTransformer() }, { "transformer": BoxCoxTransformer(), "skip_inverse_transform": False }, ] return params
def test_transform_fitintransform(): """Test fit/transform against BoxCoxTransformer.""" fitintransform = FitInTransform(BoxCoxTransformer()) fitintransform.fit(X=X_train) y_hat = fitintransform.transform(X=X_test) y_hat_expected = BoxCoxTransformer().fit_transform(X_test) assert_series_equal(y_hat, y_hat_expected)
def test_boxcox_against_scipy(): y = load_airline() t = BoxCoxTransformer() actual = t.fit_transform(y) excepted, expected_lambda = boxcox(y.values) np.testing.assert_array_equal(actual, excepted) assert t.lambda_ == expected_lambda
def get_test_params(cls): """Return testing parameter settings for the estimator. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ from sklearn.preprocessing import StandardScaler from sktime.forecasting.naive import NaiveForecaster from sktime.transformations.series.adapt import TabularToSeriesAdaptor from sktime.transformations.series.boxcox import BoxCoxTransformer STEPS1 = [ ("transformer", TabularToSeriesAdaptor(StandardScaler())), ("forecaster", NaiveForecaster()), ] params1 = {"steps": STEPS1} STEPS2 = [ ("transformer", BoxCoxTransformer()), ("forecaster", NaiveForecaster()), ] params2 = {"steps": STEPS2} return [params1, params2]
def test_guerrero_against_r_implementation(bounds, r_lambda): """ Testing lambda values estimated by the R implementation of the Guerrero method https://github.com/robjhyndman/forecast/blob/master/R/guerrero.R against the guerrero method in BoxCoxTransformer. R code to generate the hardcoded value for bounds=(-1, 2) used in the test ('Airline.csv' contains the data from 'load_airline()'): airline_file <- read.csv(file = 'Airline.csv')[,c('Passengers')] airline.ts <- ts(airline_file) guerrero(airline.ts, lower=-1, upper=2, nonseasonal.length = 20) Output: -0.156981228426408 """ y = load_airline() t = BoxCoxTransformer(bounds=bounds, method="guerrero", sp=20) t.fit(y) np.testing.assert_almost_equal(t.lambda_, r_lambda, decimal=4)
def _fit(self, X, y=None): """Fit transformer to X and y. private _fit containing the core logic, called from fit Parameters ---------- X : pd.Series Data to be transformed y : ignored, for interface compatibility Returns ------- self: reference to self """ if self.sp <= 1: raise NotImplementedError( "STLBootstrapTransformer does not support non-seasonal data") if not isinstance(self.sp, int): raise ValueError( "sp parameter of STLBootstrapTransformer must be an integer") if len(X) <= self.sp: raise ValueError( "STLBootstrapTransformer requires that sp is greater than" " the length of X") self.block_length_ = (self.block_length if self.block_length is not None else min(self.sp * 2, len(X) - self.sp)) # fit boxcox to get lambda and transform X self.box_cox_transformer_ = BoxCoxTransformer( sp=self.sp, bounds=self.lambda_bounds, method=self.lambda_method) self.box_cox_transformer_.fit(X) return self
def get_test_params(cls): """Return testing parameter settings for the estimator. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ from sktime.transformations.series.boxcox import BoxCoxTransformer return {"transformer": BoxCoxTransformer(), "passthrough": False}
"level": "local level" }, PartialAutoCorrelationTransformer: { "n_lags": 1 }, AutoCorrelationTransformer: { "n_lags": 1 }, Imputer: { "method": "mean" }, HampelFilter: { "window_length": 3 }, OptionalPassthrough: { "transformer": BoxCoxTransformer(), "passthrough": False }, FeatureSelection: { "method": "all" }, ColumnwiseTransformer: { "transformer": Detrender() }, AggrDist: { "transformer": ScipyDist() }, PyODAnnotator: { "estimator": ANOMALY_DETECTOR }, ClaSPSegmentation: {
def test_lambda_bounds(bounds, method, sp): y = load_airline() t = BoxCoxTransformer(bounds=bounds, method=method, sp=sp) t.fit(y) assert bounds[0] < t.lambda_ < bounds[1]
class STLBootstrapTransformer(BaseTransformer): """Creates a population of similar time series. This method utilises a form of bootstrapping to generate a population of similar time series to the input time series [1]_, [2]_. First the observed time series is transformed using a Box-Cox transformation to stabilise the variance. Then it's decomposed to seasonal, trend and residual time series, using the STL implementation from statsmodels (``statsmodels.tsa.api.STL``) [4]_. We then sample blocks from the residuals time series using the Moving Block Bootstrapping (MBB) method [3]_ to create synthetic residuals series that mimic the autocorrelation patterns of the observed series. Finally these bootstrapped residuals are added to the season and trend components and we use the inverse Box-Cox transform to return a panel of similar time series. The output can be used for bagging forecasts, prediction intervals and data augmentation. The returned panel will be a multiindex dataframe (``pd.DataFrame``) with the series_id and time_index as the index and a single column of the time series value. The values for series_id are "actual" for the original series and "synthetic_n" (where n is an integer) for the generated series. See the **Examples** section for example output. Parameters ---------- n_series : int, optional The number of bootstraped time series that will be generated, by default 10. sp : int, optional Seasonal periodicity of the data in integer form, by default 12. Must be an integer >= 2 block_length : int, optional The length of the block in the MBB method, by default None. If not provided, the following heuristic is used, the block length will the minimum between 2*sp and len(X) - sp. sampling_replacement : bool, optional Whether the MBB sample is with or without replacement, by default False. return_actual : bool, optional If True the output will contain the actual time series, by default True. The actual time series will be labelled as "<series_name>_actual" (or "actual" if series name is None). lambda_bounds : Tuple, optional BoxCox parameter: Lower and upper bounds used to restrict the feasible range when solving for the value of lambda, by default None. lambda_method : str, optional BoxCox parameter: {"pearsonr", "mle", "all", "guerrero"}, by default "guerrero". The optimization approach used to determine the lambda value used in the Box-Cox transformation. seasonal : int, optional STL parameter: Length of the seasonal smoother. Must be an odd integer, and should normally be >= 7, by default 7. trend : int, optional STL parameter: Length of the trend smoother, by default None. Must be an odd integer. If not provided uses the smallest odd integer greater than 1.5 * period / (1 - 1.5 / seasonal), following the suggestion in the original implementation. low_pass : int, optional STL parameter: Length of the low-pass filter, by default None. Must be an odd integer >=3. If not provided, uses the smallest odd integer > period seasonal_deg : int, optional STL parameter: Degree of seasonal LOESS. 0 (constant) or 1 (constant and trend), by default 1. trend_deg : int, optional STL parameter: Degree of trend LOESS. 0 (constant) or 1 (constant and trend), by default 1. low_pass_deg : int, optional STL parameter: Degree of low pass LOESS. 0 (constant) or 1 (constant and trend), by default 1. robust : bool, optional STL parameter: Flag indicating whether to use a weighted version that is robust to some forms of outliers, by default False. seasonal_jump : int, optional STL parameter: Positive integer determining the linear interpolation step, by default 1. If larger than 1, the LOESS is used every seasonal_jump points and linear interpolation is between fitted points. Higher values reduce estimation time. trend_jump : int, optional STL parameter: Positive integer determining the linear interpolation step, by default 1. If larger than 1, the LOESS is used every trend_jump points and values between the two are linearly interpolated. Higher values reduce estimation time. low_pass_jump : int, optional STL parameter: Positive integer determining the linear interpolation step, by default 1. If larger than 1, the LOESS is used every low_pass_jump points and values between the two are linearly interpolated. Higher values reduce estimation time. inner_iter : int, optional STL parameter: Number of iterations to perform in the inner loop, by default None. If not provided uses 2 if robust is True, or 5 if not. This param goes into STL.fit() from statsmodels. outer_iter : int, optional STL parameter: Number of iterations to perform in the outer loop, by default None. If not provided uses 15 if robust is True, or 0 if not. This param goes into STL.fit() from statsmodels. random_state : int, np.random.RandomState or None, by default None Controls the randomness of the estimator See Also -------- sktime.transformations.bootstrap.MovingBlockBootstrapTransformer : Transofrmer that applies the Moving Block Bootstrapping method to create a panel of synthetic time series. References ---------- .. [1] Bergmeir, C., Hyndman, R. J., & Benítez, J. M. (2016). Bagging exponential smoothing methods using STL decomposition and Box-Cox transformation. International Journal of Forecasting, 32(2), 303-312 .. [2] Hyndman, R.J., & Athanasopoulos, G. (2021) Forecasting: principles and practice, 3rd edition, OTexts: Melbourne, Australia. OTexts.com/fpp3, Chapter 12.5. Accessed on February 13th 2022. .. [3] Kunsch HR (1989) The jackknife and the bootstrap for general stationary observations. Annals of Statistics 17(3), 1217-1241 .. [4] https://www.statsmodels.org/dev/generated/statsmodels.tsa.seasonal.STL.html Examples -------- >>> from sktime.transformations.bootstrap import STLBootstrapTransformer >>> from sktime.datasets import load_airline >>> from sktime.utils.plotting import plot_series >>> y = load_airline() >>> transformer = STLBootstrapTransformer(10) >>> y_hat = transformer.fit_transform(y) >>> series_list = [] >>> names = [] >>> for group, series in y_hat.groupby(level=[0], as_index=False): ... series.index = series.index.droplevel(0) ... series_list.append(series) ... names.append(group) >>> plot_series(*series_list, labels=names) (...) >>> print(y_hat.head()) # doctest: +NORMALIZE_WHITESPACE Number of airline passengers series_id time_index actual 1949-01 112.0 1949-02 118.0 1949-03 132.0 1949-04 129.0 1949-05 121.0 """ _tags = { # todo: what is the scitype of X: Series, or Panel "scitype:transform-input": "Series", # todo: what scitype is returned: Primitives, Series, Panel "scitype:transform-output": "Panel", # todo: what is the scitype of y: None (not needed), Primitives, Series, Panel "scitype:transform-labels": "None", "scitype:instancewise": True, # is this an instance-wise transform? "X_inner_mtype": "pd.Series", # which mtypes do _fit/_predict support for X? # X_inner_mtype can be Panel mtype even if transform-input is Series, vectorized "y_inner_mtype": "None", # which mtypes do _fit/_predict support for y? "capability:inverse_transform": False, "skip-inverse-transform": True, # is inverse-transform skipped when called? "univariate-only": True, # can the transformer handle multivariate X? "handles-missing-data": False, # can estimator handle missing data? "X-y-must-have-same-index": False, # can estimator handle different X/y index? "enforce_index_type": None, # index type that needs to be enforced in X/y "fit_is_empty": False, # is fit empty and can be skipped? Yes = True "transform-returns-same-time-index": False, } def __init__( self, n_series: int = 10, sp: int = 12, block_length: int = None, sampling_replacement: bool = False, return_actual: bool = True, lambda_bounds: Tuple = None, lambda_method: str = "guerrero", seasonal: int = 7, trend: int = None, low_pass: int = None, seasonal_deg: int = 1, trend_deg: int = 1, low_pass_deg: int = 1, robust: bool = False, seasonal_jump: int = 1, trend_jump: int = 1, low_pass_jump: int = 1, inner_iter: int = None, outer_iter: int = None, random_state: Union[int, np.random.RandomState] = None, ): self.n_series = n_series self.sp = sp self.block_length = block_length self.sampling_replacement = sampling_replacement self.return_actual = return_actual self.lambda_bounds = lambda_bounds self.lambda_method = lambda_method self.seasonal = seasonal self.trend = trend self.low_pass = low_pass self.seasonal_deg = seasonal_deg self.trend_deg = trend_deg self.low_pass_deg = low_pass_deg self.robust = robust self.seasonal_jump = seasonal_jump self.trend_jump = trend_jump self.low_pass_jump = low_pass_jump self.inner_iter = inner_iter self.outer_iter = outer_iter self.random_state = random_state super(STLBootstrapTransformer, self).__init__() def _fit(self, X, y=None): """Fit transformer to X and y. private _fit containing the core logic, called from fit Parameters ---------- X : pd.Series Data to be transformed y : ignored, for interface compatibility Returns ------- self: reference to self """ if self.sp <= 1: raise NotImplementedError( "STLBootstrapTransformer does not support non-seasonal data") if not isinstance(self.sp, int): raise ValueError( "sp parameter of STLBootstrapTransformer must be an integer") if len(X) <= self.sp: raise ValueError( "STLBootstrapTransformer requires that sp is greater than" " the length of X") self.block_length_ = (self.block_length if self.block_length is not None else min(self.sp * 2, len(X) - self.sp)) # fit boxcox to get lambda and transform X self.box_cox_transformer_ = BoxCoxTransformer( sp=self.sp, bounds=self.lambda_bounds, method=self.lambda_method) self.box_cox_transformer_.fit(X) return self def _transform(self, X, y=None): """Transform X and return a transformed version. private _transform containing core logic, called from transform Parameters ---------- X : pd.Series Data to be transformed y : ignored, for interface compatibility Returns ------- transformed version of X """ if len(X) <= self.block_length_: raise ValueError( "STLBootstrapTransformer requires that block_length is" " strictly smaller than the length of X") X_index = X.index X_transformed = self.box_cox_transformer_.transform(X) # fit STL on X_transformed series and extract trend, seasonal and residuals stl = _STL( X_transformed, period=self.sp, seasonal=self.seasonal, trend=self.trend, low_pass=self.low_pass, seasonal_deg=self.seasonal_deg, trend_deg=self.trend_deg, low_pass_deg=self.low_pass_deg, robust=self.robust, seasonal_jump=self.seasonal_jump, trend_jump=self.trend_jump, low_pass_jump=self.low_pass_jump, ).fit(inner_iter=self.inner_iter, outer_iter=self.outer_iter) seasonal = pd.Series(stl.seasonal, index=X_index) resid = pd.Series(stl.resid, index=X_index) trend = pd.Series(stl.trend, index=X_index) # time series id prefix col_name = _get_series_name(X) # initialize the dataframe that will store the bootstrapped series if self.return_actual: df_list = [ pd.DataFrame( X.values, index=pd.MultiIndex.from_product( iterables=[["actual"], X_index], names=["series_id", "time_index"], ), columns=[col_name], ) ] else: df_list = [] # set the random state rng = check_random_state(self.random_state) # create multiple series for i in range(self.n_series): new_series = self.box_cox_transformer_.inverse_transform( _moving_block_bootstrap( ts=resid, block_length=self.block_length_, replacement=self.sampling_replacement, random_state=rng, ) + seasonal + trend) new_series_id = f"synthetic_{i}" new_df_index = pd.MultiIndex.from_product( iterables=[[new_series_id], new_series.index], names=["series_id", "time_index"], ) df_list.append( pd.DataFrame(data=new_series.values, index=new_df_index, columns=[col_name])) return pd.concat(df_list) @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ params = [ { "sp": 3 }, { "block_length": 1, "sp": 3 }, { "return_actual": False, "sp": 3 }, { "sampling_replacement": True, "sp": 3 }, ] return params
"use_arma_errors": False, "n_jobs": 1, }, Prophet: { "n_changepoints": 0, "yearly_seasonality": False, "weekly_seasonality": False, "daily_seasonality": False, "uncertainty_samples": 1000, "verbose": False, }, PartialAutoCorrelationTransformer: {"n_lags": 1}, AutoCorrelationTransformer: {"n_lags": 1}, Imputer: {"method": "mean"}, HampelFilter: {"window_length": 3}, OptionalPassthrough: {"transformer": BoxCoxTransformer(), "passthrough": True}, } # We use estimator tags in addition to class hierarchies to further distinguish # estimators into different categories. This is useful for defining and running # common tests for estimators with the same tags. VALID_ESTIMATOR_TAGS = ( "fit-in-transform", # fitted in transform or non-fittable "univariate-only", "transform-returns-same-time-index", "handles-missing-data", "skip-inverse-transform", ) # These methods should not change the state of the estimator, that is, they should # not change fitted parameters or hyper-parameters. They are also the methods that