Example #1
0
    def _transform(self, X, y=None):
        """Transform X and return a transformed version.

        private _transform containing the core logic, called from transform

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to be transformed
        y : ignored argument for interface compatibility
            Additional data, e.g., labels for transformation

        Returns
        -------
        theta_lines: pd.Series or pd.DataFrame
            Transformed series
            pd.Series, with single Theta-line, if self.theta is float
            pd.DataFrame of shape: [len(X), len(self.theta)], if self.theta is tuple
        """
        z = X
        theta = _check_theta(self.theta)

        forecaster = PolynomialTrendForecaster()
        forecaster.fit(z)
        fh = ForecastingHorizon(z.index, is_relative=False)
        trend = forecaster.predict(fh)

        theta_lines = np.zeros((z.shape[0], len(theta)))
        for i, theta in enumerate(theta):
            theta_lines[:, i] = _theta_transform(z, trend, theta)
        if isinstance(self.theta, (float, int)):
            return pd.Series(theta_lines.flatten(), index=z.index)
        else:
            return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
Example #2
0
    def transform(self, Z, X=None):
        """Transform data.

        Parameters
        ----------
        Z : pd.Series
            Series to transform.
        X : pd.DataFrame, optional (default=None)
            Exogenous data used in transformation.

        Returns
        -------
        theta_lines: ndarray or pd.DataFrame
            Transformed series: single Theta-line or a pd.DataFrame of
            shape: len(Z)*len(self.theta).
        """
        self.check_is_fitted()
        z = check_series(Z, enforce_univariate=True)
        theta = _check_theta(self.theta)

        forecaster = PolynomialTrendForecaster()
        forecaster.fit(z)
        fh = ForecastingHorizon(z.index, is_relative=False)
        trend = forecaster.predict(fh)

        theta_lines = np.zeros((z.shape[0], len(theta)))
        for i, theta in enumerate(theta):
            theta_lines[:, i] = _theta_transform(z, trend, theta)
        if isinstance(self.theta, (float, int)):
            return pd.Series(theta_lines.flatten(), index=z.index)
        else:
            return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
Example #3
0
def learn(series_data):
    model = TransformedTargetForecaster([
        ("deseasonalise", Deseasonalizer(model="multiplicative", sp=7)),
        ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=4))),
        ("forecast", PolynomialTrendForecaster(degree=4))
    ])
    model.fit(series_data[:-2])
    return model
Example #4
0
def test_constant_trend():
    y = pd.Series(np.arange(30))
    fh = -np.arange(30)  # in-sample fh

    forecaster = PolynomialTrendForecaster(degree=1)
    y_pred = forecaster.fit(y).predict(fh)

    np.testing.assert_array_almost_equal(y, y_pred)
Example #5
0
def check_trend(degree, with_intercept):
    """Helper function to check trend"""
    y = load_airline()
    f = PolynomialTrendForecaster(degree=degree, with_intercept=with_intercept)
    f.fit(y)
    a = f.regressor_.steps[-1][1].coef_[
        ::-1]  # intercept is added in reverse order

    b = compute_expected_coefs(y, degree, with_intercept)
    np.testing.assert_allclose(a, b)
Example #6
0
def _test_trend(degree, with_intercept):
    """Helper function to check trend"""
    y = make_forecasting_problem()
    forecaster = PolynomialTrendForecaster(degree=degree,
                                           with_intercept=with_intercept)
    forecaster.fit(y)

    # check coefficients
    # intercept is added in reverse order
    actual = forecaster.regressor_.steps[-1][1].coef_[::-1]
    expected = get_expected_polynomial_coefs(y, degree, with_intercept)
    np.testing.assert_allclose(actual, expected)
Example #7
0
    def transform(self, Z, X=None):
        """Transform data.
        Returns a transformed version of Z.

        Parameters
        ----------
        Z : pd.Series

        Returns
        -------
        z : pd.Series
            Transformed time series.
        """
        self.check_is_fitted()
        self._check_method()
        z = check_series(Z, enforce_univariate=True)

        # replace missing_values with np.nan
        if self.missing_values:
            z = z.replace(to_replace=self.missing_values, value=np.nan)

        if self.method == "random":
            z = z.apply(lambda x: self._get_random(z) if np.isnan(x) else x)
        elif self.method == "constant":
            z = z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            z = z.fillna(method=self.method)
        elif self.method in ["drift", "forecaster"]:
            if self.method == "forecaster":
                forecaster = self.forecaster
            else:
                forecaster = PolynomialTrendForecaster(degree=1)
            # in-sample forecasting horizon
            fh_ins = -np.arange(len(z))
            # fill NaN before fitting with ffill and backfill (heuristic)
            z_pred = forecaster.fit(
                z.fillna(method="ffill").fillna(method="backfill")).predict(
                    fh=fh_ins)
            # fill with trend values
            z = z.fillna(value=z_pred)
        elif self.method == "mean":
            z = z.fillna(value=z.mean())
        elif self.method == "median":
            z = z.fillna(value=z.median())
        elif self.method in ["nearest", "linear"]:
            z = z.interpolate(method=self.method)
        else:
            raise ValueError(f"method {self.method} not available")
        return z
Example #8
0
    def _fit(self, X, y=None):
        """Fit transformer to X and y.

        private _fit containing the core logic, called from fit

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to fit transform to
        y : pd.DataFrame, default=None
            Additional data, e.g., labels for transformation

        Returns
        -------
        self: a fitted instance of the estimator
        """
        z = X
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)

        # multivariate
        if isinstance(z, pd.DataFrame):
            self.forecaster_ = {}
            for colname in z.columns:
                forecaster = clone(self.forecaster)
                self.forecaster_[colname] = forecaster.fit(X[colname], y)
        # univariate
        else:
            forecaster = clone(self.forecaster)
            self.forecaster_ = forecaster.fit(z, X)
        return self
Example #9
0
def test_polynomial_detrending():
    import numpy as np
    import pandas as pd

    from sktime.forecasting.tests.test_trend import get_expected_polynomial_coefs
    from sktime.forecasting.trend import PolynomialTrendForecaster
    from sktime.transformations.series.detrend import Detrender

    y = pd.Series(np.arange(20) * 0.5) + np.random.normal(0, 1, size=20)
    forecaster = PolynomialTrendForecaster(degree=1, with_intercept=True)
    transformer = Detrender(forecaster)
    transformer.fit(y)

    # check coefficients
    actual_coefs = transformer.forecaster_.regressor_.steps[-1][-1].coef_
    expected_coefs = get_expected_polynomial_coefs(y,
                                                   degree=1,
                                                   with_intercept=True)[::-1]
    np.testing.assert_array_almost_equal(actual_coefs, expected_coefs)

    # check trend
    expected_trend = expected_coefs[0] + np.arange(len(y)) * expected_coefs[1]
    actual_trend = transformer.forecaster_.predict(-np.arange(len(y)))
    np.testing.assert_array_almost_equal(actual_trend, expected_trend)

    # check residuals
    actual = transformer.transform(y)
    expected = y - expected_trend
    np.testing.assert_array_almost_equal(actual, expected)
Example #10
0
    def _fit(self, X, y=None):
        """Fit transformer to X and y.

        private _fit containing the core logic, called from fit

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to fit transform to
        y : pd.DataFrame, default=None
            Additional data, e.g., labels for transformation

        Returns
        -------
        self: a fitted instance of the estimator
        """
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)

        # univariate: X is pd.Series
        if isinstance(X, pd.Series):
            forecaster = clone(self.forecaster)
            # note: the y in the transformer is exogeneous in the forecaster, i.e., X
            self.forecaster_ = forecaster.fit(y=X, X=y)
        # multivariate
        elif isinstance(X, pd.DataFrame):
            self.forecaster_ = {}
            for colname in X.columns:
                forecaster = clone(self.forecaster)
                self.forecaster_[colname] = forecaster.fit(y=X[colname], X=y)
        else:
            raise TypeError("X must be pd.Series or pd.DataFrame")

        return self
Example #11
0
def test_pipeline():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = TransformedTargetForecaster([
        ("t1", Deseasonalizer(sp=12, model="multiplicative")),
        ("t2", Detrender(PolynomialTrendForecaster(degree=1))),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred

    expected = compute_expected_y_pred(y_train, fh)
    np.testing.assert_array_equal(actual, expected)
Example #12
0
    def fit(self, Z, X=None):
        """
        Compute the trend in the series

        Parameters
        ----------
        Y : pd.Series
            Endogenous time series to fit a trend to.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables

        Returns
        -------
        self : an instance of self
        """
        self._is_fitted = False
        z = check_series(Z)
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)

        # multivariate
        if isinstance(z, pd.DataFrame):
            self.forecaster_ = {}
            for colname in z.columns:
                forecaster = clone(self.forecaster)
                self.forecaster_[colname] = forecaster.fit(z[colname], X)
        # univariate
        else:
            forecaster = clone(self.forecaster)
            self.forecaster_ = forecaster.fit(z, X)
        self._is_fitted = True
        return self
Example #13
0
    def _transform(self, X, y=None):
        """Transform X and return a transformed version.

        private _transform containing the core logic, called from transform

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to be transformed
        y : ignored argument for interface compatibility
            Additional data, e.g., labels for transformation

        Returns
        -------
        Z : pd.Series or pd.DataFrame, same type as X
            transformed version of X
        """
        self._check_method()
        Z = X.copy()

        # replace missing_values with np.nan
        if self.missing_values:
            Z = Z.replace(to_replace=self.missing_values, value=np.nan)

        if not _has_missing_values(Z):
            return Z

        if self.method == "random":
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    Z[col] = Z[col].apply(lambda i: self._get_random(Z[col])
                                          if np.isnan(i) else i)
            else:
                Z = Z.apply(lambda i: self._get_random(Z)
                            if np.isnan(i) else i)
        elif self.method == "constant":
            Z = Z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            Z = Z.fillna(method=self.method)
        elif self.method == "drift":
            forecaster = PolynomialTrendForecaster(degree=1)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "forecaster":
            forecaster = clone(self.forecaster)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "mean":
            Z = Z.fillna(value=Z.mean())
        elif self.method == "median":
            Z = Z.fillna(value=Z.median())
        elif self.method in ["nearest", "linear"]:
            Z = Z.interpolate(method=self.method)
        else:
            raise ValueError(f"`method`: {self.method} not available.")
        # fill first/last elements of series,
        # as some methods (e.g. "linear") cant impute those
        Z = Z.fillna(method="ffill").fillna(method="backfill")
        return Z
Example #14
0
    def transform(self, Z, X=None):
        """Transform data.

        Returns a transformed version of Z.

        Parameters
        ----------
        Z : pd.Series, pd.DataFrame

        Returns
        -------
        Z : pd.Series, pd.DataFrame
            Transformed time series(es).
        """
        self.check_is_fitted()
        self._check_method()
        Z = check_series(Z)
        Z = Z.copy()

        # replace missing_values with np.nan
        if self.missing_values:
            Z = Z.replace(to_replace=self.missing_values, value=np.nan)

        if not _has_missing_values(Z):
            return Z

        elif self.method == "random":
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    Z[col] = Z[col].apply(
                        lambda i: self._get_random(Z[col]) if np.isnan(i) else i
                    )
            else:
                Z = Z.apply(lambda i: self._get_random(Z) if np.isnan(i) else i)
        elif self.method == "constant":
            Z = Z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            Z = Z.fillna(method=self.method)
        elif self.method == "drift":
            forecaster = PolynomialTrendForecaster(degree=1)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "forecaster":
            forecaster = clone(self.forecaster)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "mean":
            Z = Z.fillna(value=Z.mean())
        elif self.method == "median":
            Z = Z.fillna(value=Z.median())
        elif self.method in ["nearest", "linear"]:
            Z = Z.interpolate(method=self.method)
        else:
            raise ValueError(f"`method`: {self.method} not available.")
        # fill first/last elements of series,
        # as some methods (e.g. "linear") cant impute those
        Z = Z.fillna(method="ffill").fillna(method="backfill")
        return Z
Example #15
0
def test_linear_detrending():
    y = load_airline()

    f = PolynomialTrendForecaster(degree=1, with_intercept=True)
    t = Detrender(f)
    a = t.fit_transform(y)

    b = compute_expected_detrend(y, 1, with_intercept=True)

    np.testing.assert_allclose(a, b)
Example #16
0
    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred
Example #17
0
    def fit(self, Z, X=None):
        """
        Compute the trend in the series

        Parameters
        ----------
        Y : pd.Series
            Endogenous time series to fit a trend to.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables

        Returns
        -------
        self : an instance of self
        """
        z = check_series(Z, enforce_univariate=True)
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)
        forecaster = clone(self.forecaster)
        self.forecaster_ = forecaster.fit(z, X)
        self._is_fitted = True
        return self
Example #18
0
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred))
st.write("gscv.best_params_:", gscv.best_params_)

st.write('''
    * Detrending
    请注意,到目前为止,上述减少方法并未考虑任何季节或趋势,但我们可以轻松地指定首先对数据进行趋势去除的管道。
    sktime提供了一个通用的去趋势器,它是一个使用任何预测器并返回预测器预测值的样本内残差的转换器。 
    例如,要删除时间序列的线性趋势,我们可以写成
''')

from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformers.single_series.detrend import Detrender
# liner detrending
forecaster = PolynomialTrendForecaster(degree=1)
transformer = Detrender(forecaster=forecaster)
yt = transformer.fit_transform(y_train)
# internally, the Detrender uses the in-sample predictions of the PolynomialTrendForecaster
forecaster = PolynomialTrendForecaster(degree=1)
fh_ins = -np.arange(len(y_train))  # in-sample forecasting horizon
y_pred = forecaster.fit(y_train).predict(fh=fh_ins)
plot_ys(y_train,
        y_pred,
        yt,
        labels=["y_train", "Fitted linear trend", "Residuals"])
st.pyplot()

st.write('''
    * Pipelining
    让我们在管道中使用**去趋势剂**和**去季节化**。 
Example #19
0
    best_idx = gscv.best_index_
    assert best_idx == actual.argmin()

    best_params = gscv.best_params_
    assert best_params == param_grid[best_idx]

    # Check if best parameters are contained in best forecaster.
    best_forecaster_params = gscv.best_forecaster_.get_params()
    best_params = gscv.best_params_
    assert best_params.items() <= best_forecaster_params.items()


NAIVE = NaiveForecaster(strategy="mean")
NAIVE_GRID = {"window_length": TEST_WINDOW_LENGTHS}
PIPE = TransformedTargetForecaster([
    ("transformer", Detrender(PolynomialTrendForecaster())),
    ("forecaster", ARIMA()),
])
PIPE_GRID = {
    "transformer__forecaster__degree": [1, 2],
    "forecaster__with_intercept": [True, False],
}
CVs = [
    *[SingleWindowSplitter(fh=fh) for fh in TEST_OOS_FHS],
    SlidingWindowSplitter(fh=1, initial_window=15),
]


@pytest.mark.parametrize("forecaster, param_grid", [(NAIVE, NAIVE_GRID),
                                                    (PIPE, PIPE_GRID)])
@pytest.mark.parametrize("scoring", TEST_METRICS)
Example #20
0
import numpy as np
import pandas as pd
import pytest

from sktime.forecasting.compose import EnsembleForecaster
from sktime.forecasting.compose._ensemble import VALID_AGG_FUNCS
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.utils._testing.forecasting import make_forecasting_problem


@pytest.mark.parametrize(
    "forecasters",
    [
        [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())],
        [("trend", PolynomialTrendForecaster()), ("ses", ExponentialSmoothing())],
    ],
)
def test_avg_mean(forecasters):
    """Assert `mean` aggfunc returns the same values as `average` with equal weights."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters)
    forecaster.fit(y, fh=[1, 2, 3])
    mean_pred = forecaster.predict()

    forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1])
    forecaster_1.fit(y, fh=[1, 2, 3])
    avg_pred = forecaster_1.predict()

    pd.testing.assert_series_equal(mean_pred, avg_pred)
Example #21
0
import pytest
import sys

from scipy.stats import gmean
from sktime.forecasting.compose import EnsembleForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster

from sktime.utils._testing.forecasting import make_forecasting_problem


@pytest.mark.parametrize(
    "forecasters",
    [
        [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())],
        [("trend", PolynomialTrendForecaster()),
         ("ses", ExponentialSmoothing())],
    ],
)
def test_avg_mean(forecasters):
    """Assert `mean` aggfunc returns the same values as `average` with equal weights."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters)
    forecaster.fit(y, fh=[1, 2, 3])
    mean_pred = forecaster.predict()

    forecaster_1 = EnsembleForecaster(forecasters,
                                      aggfunc="mean",
                                      weights=[1, 1])
    forecaster_1.fit(y, fh=[1, 2, 3])
Example #22
0
    def transform(self, Z, X=None):
        """Transform data.
        Returns a transformed version of Z.

        Parameters
        ----------
        Z : pd.Series, pd.DataFrame

        Returns
        -------
        Z : pd.Series, pd.DataFrame
            Transformed time series(es).
        """
        self.check_is_fitted()
        self._check_method()
        Z = check_series(Z)

        # replace missing_values with np.nan
        if self.missing_values:
            Z = Z.replace(to_replace=self.missing_values, value=np.nan)

        if self.method == "random":
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    Z[col] = Z[col].apply(lambda i: self._get_random(Z[col])
                                          if np.isnan(i) else i)
            else:
                Z = Z.apply(lambda i: self._get_random(Z)
                            if np.isnan(i) else i)
        elif self.method == "constant":
            Z = Z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            Z = Z.fillna(method=self.method)
        elif self.method in ["drift", "forecaster"]:
            if self.method == "forecaster":
                forecaster = self.forecaster
            else:
                forecaster = PolynomialTrendForecaster(degree=1)
            # in-sample forecasting horizon
            fh_ins = -np.arange(len(Z))
            # fill NaN before fitting with ffill and backfill (heuristic)
            Z = Z.fillna(method="ffill").fillna(method="backfill")
            # multivariate
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    forecaster.fit(y=Z[col])
                    Z_pred = forecaster.predict(fh=fh_ins)
                    Z[col] = Z[col].fillna(value=Z_pred)
            # univariate
            else:
                forecaster.fit(y=Z)
                Z_pred = forecaster.predict(fh=fh_ins)
                Z = Z.fillna(value=Z_pred)
        elif self.method == "mean":
            Z = Z.fillna(value=Z.mean())
        elif self.method == "median":
            Z = Z.fillna(value=Z.median())
        elif self.method in ["nearest", "linear"]:
            Z = Z.interpolate(method=self.method)
        else:
            raise ValueError(f"method {self.method} not available")
        # fill first/last elements of series,
        # as some methods (e.g. "linear") cant impute those
        Z = Z.fillna(method="ffill").fillna(method="backfill")
        return Z
Example #23
0
import numpy as np
import pandas as pd
import pytest

from sktime.forecasting.compose import ColumnEnsembleForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster


@pytest.mark.parametrize(
    "forecasters",
    [
        [
            ("trend", PolynomialTrendForecaster(), 0),
            ("naive", NaiveForecaster(), 1),
            ("ses", ExponentialSmoothing(), 2),
        ]
    ],
)
@pytest.mark.parametrize(
    "fh", [(np.arange(1, 11)), (np.arange(1, 33)), (np.arange(1, 3))]
)
def test_column_ensemble_shape(forecasters, fh):
    """Check the shape of the returned prediction."""
    y = pd.DataFrame(np.random.randint(0, 100, size=(100, 3)), columns=list("ABC"))
    forecaster = ColumnEnsembleForecaster(forecasters)
    forecaster.fit(y, fh=fh)
    actual = forecaster.predict()
    assert actual.shape == (len(fh), y.shape[1])
Example #24
0
        y_test_subset = y_test.loc[
            y_pred.index
        ]  # select only time points which we predicted
        scores[i] = scoring(y_test_subset, y_pred)
    return scores


@pytest.mark.parametrize(
    "forecaster, param_dict",
    [
        (NaiveForecaster(strategy="mean"), {"window_length": TEST_WINDOW_LENGTHS}),
        # atomic estimator
        (
            TransformedTargetForecaster(
                [  # composite estimator
                    ("t", Detrender(PolynomialTrendForecaster())),
                    ("f", ReducedForecaster(LinearRegression(), scitype="regressor")),
                ]
            ),
            {
                "f__window_length": TEST_WINDOW_LENGTHS,
                "f__step_length": TEST_STEP_LENGTHS,
            },
        ),  # multiple params
    ],
)
@pytest.mark.parametrize(
    "scoring",
    [sMAPE(), make_forecasting_scorer(mean_squared_error, greater_is_better=False)],
)
@pytest.mark.parametrize(
Example #25
0
pd.DataFrame(gscv.cv_results_)

# ## Pipelines

# In[86]:

from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformers.single_series.detrend import Detrender, Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster

# ### Detrending

# In[126]:

model = PolynomialTrendForecaster(degree=1)
transformer = Detrender(model)

yt = transformer.fit_transform(train)
trendline = model.fit(train).predict(fh=-np.arange(len(train)))

plot_ys(train, trendline, yt, labels=['series', 'trend', 'detrended'])

# ### Pipelining

# In[130]:

forecaster = TransformedTargetForecaster([
    ("deseasonalise", Deseasonalizer(model="multiplicative", sp=12)),
    ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=1))),
    ("forecast",
def forecast(data,
             customer_id,
             start='2017-01',
             end='2019-04',
             model_type='NaiveForecaster',
             test_size_month=5,
             model_storage_path=''):
    """
    Main function for build forecasting model on selected customer and time interval, save the model and plotting

    Parameters
    ----------
    data: pandas DataFrame
        main dataset with customer_id, product_id and Timestamp

    customer_id: int

    start: string
        start year and month in '2020-01' format

    end: string
        end year and month in '2020-01' format *** this month will not be included ***

    model_type:
        type of model to use in forecasting
        select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor',
                       'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster']

    test_size_month:
        number of month that will be excluded from end of interval to use as test dataset

    model_storage_path: string
        the folder that you want to store saved models
    Returns
    -------
    sMAPE Loss: print

    plot: matplotlib figure
        plot train, test and predicted values
    """
    y_train, y_test = temporal_train_test_split(prepare_data(data,
                                                             customer_id,
                                                             start=start,
                                                             end=end),
                                                test_size=test_size_month)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    if model_type == 'NaiveForecaster':
        forecaster = NaiveForecaster(strategy="last", sp=12)
    elif model_type == 'PolynomialTrendForecaster':
        forecaster = PolynomialTrendForecaster(degree=2)
    elif model_type == 'ThetaForecaster':
        forecaster = ThetaForecaster(sp=6)
    elif model_type == 'KNeighborsRegressor':
        regressor = KNeighborsRegressor(n_neighbors=1)
        forecaster = ReducedRegressionForecaster(regressor=regressor,
                                                 window_length=12,
                                                 strategy="recursive")
    elif model_type == 'ExponentialSmoothing':
        forecaster = ExponentialSmoothing(trend="add",
                                          seasonal="multiplicative",
                                          sp=12)
    elif model_type == 'AutoETS':
        forecaster = AutoETS(auto=True, sp=12, n_jobs=-1)
    elif model_type == 'AutoARIMA':
        forecaster = AutoARIMA(sp=12, suppress_warnings=True)
    elif model_type == 'TBATS':
        forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'BATS':
        forecaster = BATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'EnsembleForecaster':
        forecaster = EnsembleForecaster([
            ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
            (
                "holt",
                ExponentialSmoothing(trend="add",
                                     damped_trend=False,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
            (
                "damped",
                ExponentialSmoothing(trend="add",
                                     damped_trend=True,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
        ])

    try:
        forecaster.fit(y_train)
    except:
        forecaster.fit(y_train + 1)

    y_pred = forecaster.predict(fh)
    dump(
        forecaster,
        f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model'
    )

    print('sMAPE Loss :', smape_loss(y_pred, y_test))
    plot = plot_series(y_train,
                       y_test,
                       y_pred,
                       labels=["y_train", "y_test", "y_pred"])
    return plot
Example #27
0
def genforecast(data):
    from sktime.forecasting.model_selection import temporal_train_test_split
    import numpy as np
    import math
    y_train, y_test = temporal_train_test_split(data)
    fh = np.arange(1, len(y_test) + 1)
    testct = len(y_test)

    from sktime.forecasting.naive import NaiveForecaster
    forecaster = NaiveForecaster(strategy="drift")
    forecaster.fit(y_train)
    y_pred_naive = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    naive_acc = round(smape_loss(y_pred_naive, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_naive = round(min(fut_pred), 2)
    max_naive = round(max(fut_pred), 2)

    from sktime.forecasting.trend import PolynomialTrendForecaster
    forecaster = PolynomialTrendForecaster(degree=1)
    forecaster.fit(y_train)
    y_pred_poly = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    poly_acc = round(smape_loss(y_pred_poly, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_poly = round(min(fut_pred), 2)
    max_poly = round(max(fut_pred), 2)

    from sktime.forecasting.compose import EnsembleForecaster
    from sktime.forecasting.exp_smoothing import ExponentialSmoothing
    sp1 = math.floor(len(y_test) / 4)
    sp2 = min(sp1, 12)
    spval = max(2, sp2)
    forecaster = EnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)),
        ("holt",
         ExponentialSmoothing(trend="add",
                              damped=False,
                              seasonal="multiplicative",
                              sp=spval)),
        ("damped",
         ExponentialSmoothing(trend="add",
                              damped=True,
                              seasonal="multiplicative",
                              sp=spval))
    ])
    forecaster.fit(y_train)
    y_pred_ensem = forecaster.predict(fh)
    ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_ensem = round(min(fut_pred), 2)
    max_ensem = round(max(fut_pred), 2)

    from sklearn.neighbors import KNeighborsRegressor
    regressor = KNeighborsRegressor(n_neighbors=1)
    from sktime.forecasting.compose import ReducedRegressionForecaster
    forecaster = ReducedRegressionForecaster(regressor=regressor,
                                             window_length=15,
                                             strategy="recursive")
    param_grid = {"window_length": [5, 10, 15]}
    from sktime.forecasting.model_selection import SlidingWindowSplitter
    from sktime.forecasting.model_selection import ForecastingGridSearchCV
    # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
    cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
    gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
    gscv.fit(y_train)
    y_pred_redreg = gscv.predict(fh)
    redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4)
    #full model dev and forecast next 5 days
    gscv.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = gscv.predict(futurewin)
    min_redreg = round(min(fut_pred), 2)
    max_redreg = round(max(fut_pred), 2)

    return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc