Beispiel #1
0
def learn(series_data):
    model = TransformedTargetForecaster([
        ("deseasonalise", Deseasonalizer(model="multiplicative", sp=7)),
        ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=4))),
        ("forecast", PolynomialTrendForecaster(degree=4))
    ])
    model.fit(series_data[:-2])
    return model
Beispiel #2
0
    def _fit(self, X, y=None):
        """Fit transformer to X and y.

        private _fit containing the core logic, called from fit

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to fit transform to
        y : pd.DataFrame, default=None
            Additional data, e.g., labels for transformation

        Returns
        -------
        self: a fitted instance of the estimator
        """
        z = X
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)

        # multivariate
        if isinstance(z, pd.DataFrame):
            self.forecaster_ = {}
            for colname in z.columns:
                forecaster = clone(self.forecaster)
                self.forecaster_[colname] = forecaster.fit(X[colname], y)
        # univariate
        else:
            forecaster = clone(self.forecaster)
            self.forecaster_ = forecaster.fit(z, X)
        return self
Beispiel #3
0
    def fit(self, Z, X=None):
        """
        Compute the trend in the series

        Parameters
        ----------
        Y : pd.Series
            Endogenous time series to fit a trend to.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables

        Returns
        -------
        self : an instance of self
        """
        self._is_fitted = False
        z = check_series(Z)
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)

        # multivariate
        if isinstance(z, pd.DataFrame):
            self.forecaster_ = {}
            for colname in z.columns:
                forecaster = clone(self.forecaster)
                self.forecaster_[colname] = forecaster.fit(z[colname], X)
        # univariate
        else:
            forecaster = clone(self.forecaster)
            self.forecaster_ = forecaster.fit(z, X)
        self._is_fitted = True
        return self
Beispiel #4
0
    def transform(self, Z, X=None):
        """Transform data.

        Parameters
        ----------
        Z : pd.Series
            Series to transform.
        X : pd.DataFrame, optional (default=None)
            Exogenous data used in transformation.

        Returns
        -------
        theta_lines: ndarray or pd.DataFrame
            Transformed series: single Theta-line or a pd.DataFrame of
            shape: len(Z)*len(self.theta).
        """
        self.check_is_fitted()
        z = check_series(Z, enforce_univariate=True)
        theta = _check_theta(self.theta)

        forecaster = PolynomialTrendForecaster()
        forecaster.fit(z)
        fh = ForecastingHorizon(z.index, is_relative=False)
        trend = forecaster.predict(fh)

        theta_lines = np.zeros((z.shape[0], len(theta)))
        for i, theta in enumerate(theta):
            theta_lines[:, i] = _theta_transform(z, trend, theta)
        if isinstance(self.theta, (float, int)):
            return pd.Series(theta_lines.flatten(), index=z.index)
        else:
            return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
Beispiel #5
0
def test_polynomial_detrending():
    import numpy as np
    import pandas as pd

    from sktime.forecasting.tests.test_trend import get_expected_polynomial_coefs
    from sktime.forecasting.trend import PolynomialTrendForecaster
    from sktime.transformations.series.detrend import Detrender

    y = pd.Series(np.arange(20) * 0.5) + np.random.normal(0, 1, size=20)
    forecaster = PolynomialTrendForecaster(degree=1, with_intercept=True)
    transformer = Detrender(forecaster)
    transformer.fit(y)

    # check coefficients
    actual_coefs = transformer.forecaster_.regressor_.steps[-1][-1].coef_
    expected_coefs = get_expected_polynomial_coefs(y,
                                                   degree=1,
                                                   with_intercept=True)[::-1]
    np.testing.assert_array_almost_equal(actual_coefs, expected_coefs)

    # check trend
    expected_trend = expected_coefs[0] + np.arange(len(y)) * expected_coefs[1]
    actual_trend = transformer.forecaster_.predict(-np.arange(len(y)))
    np.testing.assert_array_almost_equal(actual_trend, expected_trend)

    # check residuals
    actual = transformer.transform(y)
    expected = y - expected_trend
    np.testing.assert_array_almost_equal(actual, expected)
Beispiel #6
0
def test_pipeline():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = TransformedTargetForecaster([
        ("t1", Deseasonalizer(sp=12, model="multiplicative")),
        ("t2", Detrender(PolynomialTrendForecaster(degree=1))),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred

    expected = compute_expected_y_pred(y_train, fh)
    np.testing.assert_array_equal(actual, expected)
Beispiel #7
0
    def _fit(self, X, y=None):
        """Fit transformer to X and y.

        private _fit containing the core logic, called from fit

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to fit transform to
        y : pd.DataFrame, default=None
            Additional data, e.g., labels for transformation

        Returns
        -------
        self: a fitted instance of the estimator
        """
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)

        # univariate: X is pd.Series
        if isinstance(X, pd.Series):
            forecaster = clone(self.forecaster)
            # note: the y in the transformer is exogeneous in the forecaster, i.e., X
            self.forecaster_ = forecaster.fit(y=X, X=y)
        # multivariate
        elif isinstance(X, pd.DataFrame):
            self.forecaster_ = {}
            for colname in X.columns:
                forecaster = clone(self.forecaster)
                self.forecaster_[colname] = forecaster.fit(y=X[colname], X=y)
        else:
            raise TypeError("X must be pd.Series or pd.DataFrame")

        return self
Beispiel #8
0
    def _transform(self, X, y=None):
        """Transform X and return a transformed version.

        private _transform containing the core logic, called from transform

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to be transformed
        y : ignored argument for interface compatibility
            Additional data, e.g., labels for transformation

        Returns
        -------
        theta_lines: pd.Series or pd.DataFrame
            Transformed series
            pd.Series, with single Theta-line, if self.theta is float
            pd.DataFrame of shape: [len(X), len(self.theta)], if self.theta is tuple
        """
        z = X
        theta = _check_theta(self.theta)

        forecaster = PolynomialTrendForecaster()
        forecaster.fit(z)
        fh = ForecastingHorizon(z.index, is_relative=False)
        trend = forecaster.predict(fh)

        theta_lines = np.zeros((z.shape[0], len(theta)))
        for i, theta in enumerate(theta):
            theta_lines[:, i] = _theta_transform(z, trend, theta)
        if isinstance(self.theta, (float, int)):
            return pd.Series(theta_lines.flatten(), index=z.index)
        else:
            return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
Beispiel #9
0
def test_constant_trend():
    y = pd.Series(np.arange(30))
    fh = -np.arange(30)  # in-sample fh

    forecaster = PolynomialTrendForecaster(degree=1)
    y_pred = forecaster.fit(y).predict(fh)

    np.testing.assert_array_almost_equal(y, y_pred)
Beispiel #10
0
    def _transform(self, X, y=None):
        """Transform X and return a transformed version.

        private _transform containing the core logic, called from transform

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to be transformed
        y : ignored argument for interface compatibility
            Additional data, e.g., labels for transformation

        Returns
        -------
        Z : pd.Series or pd.DataFrame, same type as X
            transformed version of X
        """
        self._check_method()
        Z = X.copy()

        # replace missing_values with np.nan
        if self.missing_values:
            Z = Z.replace(to_replace=self.missing_values, value=np.nan)

        if not _has_missing_values(Z):
            return Z

        if self.method == "random":
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    Z[col] = Z[col].apply(lambda i: self._get_random(Z[col])
                                          if np.isnan(i) else i)
            else:
                Z = Z.apply(lambda i: self._get_random(Z)
                            if np.isnan(i) else i)
        elif self.method == "constant":
            Z = Z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            Z = Z.fillna(method=self.method)
        elif self.method == "drift":
            forecaster = PolynomialTrendForecaster(degree=1)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "forecaster":
            forecaster = clone(self.forecaster)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "mean":
            Z = Z.fillna(value=Z.mean())
        elif self.method == "median":
            Z = Z.fillna(value=Z.median())
        elif self.method in ["nearest", "linear"]:
            Z = Z.interpolate(method=self.method)
        else:
            raise ValueError(f"`method`: {self.method} not available.")
        # fill first/last elements of series,
        # as some methods (e.g. "linear") cant impute those
        Z = Z.fillna(method="ffill").fillna(method="backfill")
        return Z
Beispiel #11
0
    def transform(self, Z, X=None):
        """Transform data.

        Returns a transformed version of Z.

        Parameters
        ----------
        Z : pd.Series, pd.DataFrame

        Returns
        -------
        Z : pd.Series, pd.DataFrame
            Transformed time series(es).
        """
        self.check_is_fitted()
        self._check_method()
        Z = check_series(Z)
        Z = Z.copy()

        # replace missing_values with np.nan
        if self.missing_values:
            Z = Z.replace(to_replace=self.missing_values, value=np.nan)

        if not _has_missing_values(Z):
            return Z

        elif self.method == "random":
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    Z[col] = Z[col].apply(
                        lambda i: self._get_random(Z[col]) if np.isnan(i) else i
                    )
            else:
                Z = Z.apply(lambda i: self._get_random(Z) if np.isnan(i) else i)
        elif self.method == "constant":
            Z = Z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            Z = Z.fillna(method=self.method)
        elif self.method == "drift":
            forecaster = PolynomialTrendForecaster(degree=1)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "forecaster":
            forecaster = clone(self.forecaster)
            Z = _impute_with_forecaster(forecaster, Z)
        elif self.method == "mean":
            Z = Z.fillna(value=Z.mean())
        elif self.method == "median":
            Z = Z.fillna(value=Z.median())
        elif self.method in ["nearest", "linear"]:
            Z = Z.interpolate(method=self.method)
        else:
            raise ValueError(f"`method`: {self.method} not available.")
        # fill first/last elements of series,
        # as some methods (e.g. "linear") cant impute those
        Z = Z.fillna(method="ffill").fillna(method="backfill")
        return Z
Beispiel #12
0
def test_linear_detrending():
    y = load_airline()

    f = PolynomialTrendForecaster(degree=1, with_intercept=True)
    t = Detrender(f)
    a = t.fit_transform(y)

    b = compute_expected_detrend(y, 1, with_intercept=True)

    np.testing.assert_allclose(a, b)
Beispiel #13
0
def check_trend(degree, with_intercept):
    """Helper function to check trend"""
    y = load_airline()
    f = PolynomialTrendForecaster(degree=degree, with_intercept=with_intercept)
    f.fit(y)
    a = f.regressor_.steps[-1][1].coef_[
        ::-1]  # intercept is added in reverse order

    b = compute_expected_coefs(y, degree, with_intercept)
    np.testing.assert_allclose(a, b)
Beispiel #14
0
def _test_trend(degree, with_intercept):
    """Helper function to check trend"""
    y = make_forecasting_problem()
    forecaster = PolynomialTrendForecaster(degree=degree,
                                           with_intercept=with_intercept)
    forecaster.fit(y)

    # check coefficients
    # intercept is added in reverse order
    actual = forecaster.regressor_.steps[-1][1].coef_[::-1]
    expected = get_expected_polynomial_coefs(y, degree, with_intercept)
    np.testing.assert_allclose(actual, expected)
Beispiel #15
0
    def transform(self, Z, X=None):
        """Transform data.
        Returns a transformed version of Z.

        Parameters
        ----------
        Z : pd.Series

        Returns
        -------
        z : pd.Series
            Transformed time series.
        """
        self.check_is_fitted()
        self._check_method()
        z = check_series(Z, enforce_univariate=True)

        # replace missing_values with np.nan
        if self.missing_values:
            z = z.replace(to_replace=self.missing_values, value=np.nan)

        if self.method == "random":
            z = z.apply(lambda x: self._get_random(z) if np.isnan(x) else x)
        elif self.method == "constant":
            z = z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            z = z.fillna(method=self.method)
        elif self.method in ["drift", "forecaster"]:
            if self.method == "forecaster":
                forecaster = self.forecaster
            else:
                forecaster = PolynomialTrendForecaster(degree=1)
            # in-sample forecasting horizon
            fh_ins = -np.arange(len(z))
            # fill NaN before fitting with ffill and backfill (heuristic)
            z_pred = forecaster.fit(
                z.fillna(method="ffill").fillna(method="backfill")).predict(
                    fh=fh_ins)
            # fill with trend values
            z = z.fillna(value=z_pred)
        elif self.method == "mean":
            z = z.fillna(value=z.mean())
        elif self.method == "median":
            z = z.fillna(value=z.median())
        elif self.method in ["nearest", "linear"]:
            z = z.interpolate(method=self.method)
        else:
            raise ValueError(f"method {self.method} not available")
        return z
Beispiel #16
0
    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred
Beispiel #17
0
    def fit(self, Z, X=None):
        """
        Compute the trend in the series

        Parameters
        ----------
        Y : pd.Series
            Endogenous time series to fit a trend to.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables

        Returns
        -------
        self : an instance of self
        """
        z = check_series(Z, enforce_univariate=True)
        if self.forecaster is None:
            self.forecaster = PolynomialTrendForecaster(degree=1)
        forecaster = clone(self.forecaster)
        self.forecaster_ = forecaster.fit(z, X)
        self._is_fitted = True
        return self
Beispiel #18
0
import numpy as np
import pandas as pd
import pytest

from sktime.forecasting.compose import EnsembleForecaster
from sktime.forecasting.compose._ensemble import VALID_AGG_FUNCS
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.utils._testing.forecasting import make_forecasting_problem


@pytest.mark.parametrize(
    "forecasters",
    [
        [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())],
        [("trend", PolynomialTrendForecaster()), ("ses", ExponentialSmoothing())],
    ],
)
def test_avg_mean(forecasters):
    """Assert `mean` aggfunc returns the same values as `average` with equal weights."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters)
    forecaster.fit(y, fh=[1, 2, 3])
    mean_pred = forecaster.predict()

    forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1])
    forecaster_1.fit(y, fh=[1, 2, 3])
    avg_pred = forecaster_1.predict()

    pd.testing.assert_series_equal(mean_pred, avg_pred)
Beispiel #19
0
def genforecast(data):
    from sktime.forecasting.model_selection import temporal_train_test_split
    import numpy as np
    import math
    y_train, y_test = temporal_train_test_split(data)
    fh = np.arange(1, len(y_test) + 1)
    testct = len(y_test)

    from sktime.forecasting.naive import NaiveForecaster
    forecaster = NaiveForecaster(strategy="drift")
    forecaster.fit(y_train)
    y_pred_naive = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    naive_acc = round(smape_loss(y_pred_naive, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_naive = round(min(fut_pred), 2)
    max_naive = round(max(fut_pred), 2)

    from sktime.forecasting.trend import PolynomialTrendForecaster
    forecaster = PolynomialTrendForecaster(degree=1)
    forecaster.fit(y_train)
    y_pred_poly = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    poly_acc = round(smape_loss(y_pred_poly, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_poly = round(min(fut_pred), 2)
    max_poly = round(max(fut_pred), 2)

    from sktime.forecasting.compose import EnsembleForecaster
    from sktime.forecasting.exp_smoothing import ExponentialSmoothing
    sp1 = math.floor(len(y_test) / 4)
    sp2 = min(sp1, 12)
    spval = max(2, sp2)
    forecaster = EnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)),
        ("holt",
         ExponentialSmoothing(trend="add",
                              damped=False,
                              seasonal="multiplicative",
                              sp=spval)),
        ("damped",
         ExponentialSmoothing(trend="add",
                              damped=True,
                              seasonal="multiplicative",
                              sp=spval))
    ])
    forecaster.fit(y_train)
    y_pred_ensem = forecaster.predict(fh)
    ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_ensem = round(min(fut_pred), 2)
    max_ensem = round(max(fut_pred), 2)

    from sklearn.neighbors import KNeighborsRegressor
    regressor = KNeighborsRegressor(n_neighbors=1)
    from sktime.forecasting.compose import ReducedRegressionForecaster
    forecaster = ReducedRegressionForecaster(regressor=regressor,
                                             window_length=15,
                                             strategy="recursive")
    param_grid = {"window_length": [5, 10, 15]}
    from sktime.forecasting.model_selection import SlidingWindowSplitter
    from sktime.forecasting.model_selection import ForecastingGridSearchCV
    # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
    cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
    gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
    gscv.fit(y_train)
    y_pred_redreg = gscv.predict(fh)
    redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4)
    #full model dev and forecast next 5 days
    gscv.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = gscv.predict(futurewin)
    min_redreg = round(min(fut_pred), 2)
    max_redreg = round(max(fut_pred), 2)

    return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
Beispiel #20
0
    best_idx = gscv.best_index_
    assert best_idx == actual.argmin()

    best_params = gscv.best_params_
    assert best_params == param_grid[best_idx]

    # Check if best parameters are contained in best forecaster.
    best_forecaster_params = gscv.best_forecaster_.get_params()
    best_params = gscv.best_params_
    assert best_params.items() <= best_forecaster_params.items()


NAIVE = NaiveForecaster(strategy="mean")
NAIVE_GRID = {"window_length": TEST_WINDOW_LENGTHS}
PIPE = TransformedTargetForecaster([
    ("transformer", Detrender(PolynomialTrendForecaster())),
    ("forecaster", ARIMA()),
])
PIPE_GRID = {
    "transformer__forecaster__degree": [1, 2],
    "forecaster__with_intercept": [True, False],
}
CVs = [
    *[SingleWindowSplitter(fh=fh) for fh in TEST_OOS_FHS],
    SlidingWindowSplitter(fh=1, initial_window=15),
]


@pytest.mark.parametrize("forecaster, param_grid", [(NAIVE, NAIVE_GRID),
                                                    (PIPE, PIPE_GRID)])
@pytest.mark.parametrize("scoring", TEST_METRICS)
Beispiel #21
0
    def transform(self, Z, X=None):
        """Transform data.
        Returns a transformed version of Z.

        Parameters
        ----------
        Z : pd.Series, pd.DataFrame

        Returns
        -------
        Z : pd.Series, pd.DataFrame
            Transformed time series(es).
        """
        self.check_is_fitted()
        self._check_method()
        Z = check_series(Z)

        # replace missing_values with np.nan
        if self.missing_values:
            Z = Z.replace(to_replace=self.missing_values, value=np.nan)

        if self.method == "random":
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    Z[col] = Z[col].apply(lambda i: self._get_random(Z[col])
                                          if np.isnan(i) else i)
            else:
                Z = Z.apply(lambda i: self._get_random(Z)
                            if np.isnan(i) else i)
        elif self.method == "constant":
            Z = Z.fillna(value=self.value)
        elif self.method in ["backfill", "bfill", "pad", "ffill"]:
            Z = Z.fillna(method=self.method)
        elif self.method in ["drift", "forecaster"]:
            if self.method == "forecaster":
                forecaster = self.forecaster
            else:
                forecaster = PolynomialTrendForecaster(degree=1)
            # in-sample forecasting horizon
            fh_ins = -np.arange(len(Z))
            # fill NaN before fitting with ffill and backfill (heuristic)
            Z = Z.fillna(method="ffill").fillna(method="backfill")
            # multivariate
            if isinstance(Z, pd.DataFrame):
                for col in Z:
                    forecaster.fit(y=Z[col])
                    Z_pred = forecaster.predict(fh=fh_ins)
                    Z[col] = Z[col].fillna(value=Z_pred)
            # univariate
            else:
                forecaster.fit(y=Z)
                Z_pred = forecaster.predict(fh=fh_ins)
                Z = Z.fillna(value=Z_pred)
        elif self.method == "mean":
            Z = Z.fillna(value=Z.mean())
        elif self.method == "median":
            Z = Z.fillna(value=Z.median())
        elif self.method in ["nearest", "linear"]:
            Z = Z.interpolate(method=self.method)
        else:
            raise ValueError(f"method {self.method} not available")
        # fill first/last elements of series,
        # as some methods (e.g. "linear") cant impute those
        Z = Z.fillna(method="ffill").fillna(method="backfill")
        return Z
Beispiel #22
0
import pytest
import sys

from scipy.stats import gmean
from sktime.forecasting.compose import EnsembleForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster

from sktime.utils._testing.forecasting import make_forecasting_problem


@pytest.mark.parametrize(
    "forecasters",
    [
        [("trend", PolynomialTrendForecaster()), ("naive", NaiveForecaster())],
        [("trend", PolynomialTrendForecaster()),
         ("ses", ExponentialSmoothing())],
    ],
)
def test_avg_mean(forecasters):
    """Assert `mean` aggfunc returns the same values as `average` with equal weights."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters)
    forecaster.fit(y, fh=[1, 2, 3])
    mean_pred = forecaster.predict()

    forecaster_1 = EnsembleForecaster(forecasters,
                                      aggfunc="mean",
                                      weights=[1, 1])
    forecaster_1.fit(y, fh=[1, 2, 3])
Beispiel #23
0
import numpy as np
import pandas as pd
import pytest

from sktime.forecasting.compose import ColumnEnsembleForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster


@pytest.mark.parametrize(
    "forecasters",
    [
        [
            ("trend", PolynomialTrendForecaster(), 0),
            ("naive", NaiveForecaster(), 1),
            ("ses", ExponentialSmoothing(), 2),
        ]
    ],
)
@pytest.mark.parametrize(
    "fh", [(np.arange(1, 11)), (np.arange(1, 33)), (np.arange(1, 3))]
)
def test_column_ensemble_shape(forecasters, fh):
    """Check the shape of the returned prediction."""
    y = pd.DataFrame(np.random.randint(0, 100, size=(100, 3)), columns=list("ABC"))
    forecaster = ColumnEnsembleForecaster(forecasters)
    forecaster.fit(y, fh=fh)
    actual = forecaster.predict()
    assert actual.shape == (len(fh), y.shape[1])
def forecast(data,
             customer_id,
             start='2017-01',
             end='2019-04',
             model_type='NaiveForecaster',
             test_size_month=5,
             model_storage_path=''):
    """
    Main function for build forecasting model on selected customer and time interval, save the model and plotting

    Parameters
    ----------
    data: pandas DataFrame
        main dataset with customer_id, product_id and Timestamp

    customer_id: int

    start: string
        start year and month in '2020-01' format

    end: string
        end year and month in '2020-01' format *** this month will not be included ***

    model_type:
        type of model to use in forecasting
        select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor',
                       'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster']

    test_size_month:
        number of month that will be excluded from end of interval to use as test dataset

    model_storage_path: string
        the folder that you want to store saved models
    Returns
    -------
    sMAPE Loss: print

    plot: matplotlib figure
        plot train, test and predicted values
    """
    y_train, y_test = temporal_train_test_split(prepare_data(data,
                                                             customer_id,
                                                             start=start,
                                                             end=end),
                                                test_size=test_size_month)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    if model_type == 'NaiveForecaster':
        forecaster = NaiveForecaster(strategy="last", sp=12)
    elif model_type == 'PolynomialTrendForecaster':
        forecaster = PolynomialTrendForecaster(degree=2)
    elif model_type == 'ThetaForecaster':
        forecaster = ThetaForecaster(sp=6)
    elif model_type == 'KNeighborsRegressor':
        regressor = KNeighborsRegressor(n_neighbors=1)
        forecaster = ReducedRegressionForecaster(regressor=regressor,
                                                 window_length=12,
                                                 strategy="recursive")
    elif model_type == 'ExponentialSmoothing':
        forecaster = ExponentialSmoothing(trend="add",
                                          seasonal="multiplicative",
                                          sp=12)
    elif model_type == 'AutoETS':
        forecaster = AutoETS(auto=True, sp=12, n_jobs=-1)
    elif model_type == 'AutoARIMA':
        forecaster = AutoARIMA(sp=12, suppress_warnings=True)
    elif model_type == 'TBATS':
        forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'BATS':
        forecaster = BATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'EnsembleForecaster':
        forecaster = EnsembleForecaster([
            ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
            (
                "holt",
                ExponentialSmoothing(trend="add",
                                     damped_trend=False,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
            (
                "damped",
                ExponentialSmoothing(trend="add",
                                     damped_trend=True,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
        ])

    try:
        forecaster.fit(y_train)
    except:
        forecaster.fit(y_train + 1)

    y_pred = forecaster.predict(fh)
    dump(
        forecaster,
        f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model'
    )

    print('sMAPE Loss :', smape_loss(y_pred, y_test))
    plot = plot_series(y_train,
                       y_test,
                       y_pred,
                       labels=["y_train", "y_test", "y_pred"])
    return plot
Beispiel #25
0
        y_test_subset = y_test.loc[
            y_pred.index
        ]  # select only time points which we predicted
        scores[i] = scoring(y_test_subset, y_pred)
    return scores


@pytest.mark.parametrize(
    "forecaster, param_dict",
    [
        (NaiveForecaster(strategy="mean"), {"window_length": TEST_WINDOW_LENGTHS}),
        # atomic estimator
        (
            TransformedTargetForecaster(
                [  # composite estimator
                    ("t", Detrender(PolynomialTrendForecaster())),
                    ("f", ReducedForecaster(LinearRegression(), scitype="regressor")),
                ]
            ),
            {
                "f__window_length": TEST_WINDOW_LENGTHS,
                "f__step_length": TEST_STEP_LENGTHS,
            },
        ),  # multiple params
    ],
)
@pytest.mark.parametrize(
    "scoring",
    [sMAPE(), make_forecasting_scorer(mean_squared_error, greater_is_better=False)],
)
@pytest.mark.parametrize(
Beispiel #26
0
pd.DataFrame(gscv.cv_results_)

# ## Pipelines

# In[86]:

from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformers.single_series.detrend import Detrender, Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster

# ### Detrending

# In[126]:

model = PolynomialTrendForecaster(degree=1)
transformer = Detrender(model)

yt = transformer.fit_transform(train)
trendline = model.fit(train).predict(fh=-np.arange(len(train)))

plot_ys(train, trendline, yt, labels=['series', 'trend', 'detrended'])

# ### Pipelining

# In[130]:

forecaster = TransformedTargetForecaster([
    ("deseasonalise", Deseasonalizer(model="multiplicative", sp=12)),
    ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=1))),
    ("forecast",