Esempio n. 1
0
    def test_check_seasonality(self):
        pd_series = pd.Series(range(50),
                              index=pd.date_range("20130101", "20130219"))
        pd_series = pd_series.map(lambda x: np.sin(x * np.pi / 3 + np.pi / 2))
        series = TimeSeries.from_series(pd_series)

        self.assertEqual((True, 6), check_seasonality(series))
        self.assertEqual((False, 3), check_seasonality(series, m=3))

        with self.assertRaises(AssertionError):
            check_seasonality(series.stack(series))
Esempio n. 2
0
 def fit(self, train: TimeSeries):
     super().fit(train)
     train_des = train
     self.seasonOut = 1
     if self.m > 1:
         if check_seasonality(train, m=self.m, max_lag=2 * self.m):
             _, season = extract_trend_and_seasonality(train, self.m, model=ModelMode.MULTIPLICATIVE)
             train_des = remove_from_series(train, season, model=ModelMode.MULTIPLICATIVE)
             seasonOut = season[-self.m:].shift(self.m)
             self.seasonOut = seasonOut.append_values(seasonOut.values())
     self.model.fit(train_des)
Esempio n. 3
0
    def fit(self, series: TimeSeries):
        super().fit(series)
        ts = self.training_series

        self.length = len(ts)

        # Check for statistical significance of user-defined season period
        # or infers season_period from the TimeSeries itself.
        if self.season_mode is SeasonalityMode.NONE:
            self.season_period = 1
        else:
            self.season_period = self.seasonality_period
        if self.season_period is None:
            max_lag = len(ts) // 2
            self.is_seasonal, self.season_period = check_seasonality(
                ts, self.season_period, max_lag=max_lag
            )
        else:
            # force the user-defined seasonality to be considered as a true seasonal period.
            self.is_seasonal = self.season_period > 1

        new_ts = ts

        # Store and remove seasonality effect if there is any.
        if self.is_seasonal:
            _, self.seasonality = extract_trend_and_seasonality(
                ts, self.season_period, model=self.season_mode
            )
            new_ts = remove_from_series(ts, self.seasonality, model=self.season_mode)

        # SES part of the decomposition.
        self.model = hw.SimpleExpSmoothing(new_ts.values(copy=False)).fit()

        # Linear Regression part of the decomposition. We select the degree one coefficient.
        b_theta = np.polyfit(
            np.array([i for i in range(0, self.length)]),
            (1.0 - self.theta) * new_ts.values(copy=False),
            1,
        )[0]

        # Normalization of the coefficient b_theta.
        self.coef = b_theta / (-self.theta)

        self.alpha = self.model.params["smoothing_level"]
        if self.alpha == 0.0:
            self.model = hw.SimpleExpSmoothing(new_ts.values(copy=False)).fit(
                initial_level=ALPHA_START
            )
            self.alpha = self.model.params["smoothing_level"]

        return self
Esempio n. 4
0
def naive2_groe(ts: TimeSeries, n: int, m: int):
    """
    Return the prediction of the naive2 baseline
    """
    # It will be better to use R functions
    ts_des = ts
    seasonOut = 1
    if m > 1:
        if check_seasonality(ts, m=int(m), max_lag=2 * m):
            _, season = extract_trend_and_seasonality(ts, m, model=ModelMode.MULTIPLICATIVE)
            ts_des = remove_from_series(ts, season, model=ModelMode.MULTIPLICATIVE)
            seasonOut = season[-m:].shift(m)
            seasonOut = seasonOut.append_values(seasonOut.values())[:n]
    naive2 = NaiveSeasonal(K=1)

    naive2.fit(ts_des)
    return naive2.predict(n) * seasonOut
Esempio n. 5
0
    def fit(self, series):
        super().fit(series)

        self.length = len(series)
        # normalization of data
        if self.normalization:
            self.mean = series.pd_dataframe(copy=False).mean().mean()
            raise_if_not(
                not np.isclose(self.mean, 0),
                "The mean value of the provided series is too close to zero to perform normalization",
                logger,
            )
            new_ts = series / self.mean
        else:
            new_ts = series

        # Check for statistical significance of user-defined season period
        # or infers season_period from the TimeSeries itself.
        if self.season_mode is SeasonalityMode.NONE:
            self.season_period = 1
        else:
            self.season_period = self.seasonality_period
        if self.season_period is None:
            max_lag = len(series) // 2
            self.is_seasonal, self.season_period = check_seasonality(
                series, self.season_period, max_lag=max_lag
            )
        else:
            # force the user-defined seasonality to be considered as a true seasonal period.
            self.is_seasonal = self.season_period > 1

        # Store and remove seasonality effect if there is any.
        if self.is_seasonal:
            _, self.seasonality = extract_trend_and_seasonality(
                new_ts, self.season_period, model=self.season_mode
            )
            new_ts = remove_from_series(
                new_ts, self.seasonality, model=self.season_mode
            )

        ts_values = new_ts.univariate_values()
        if (ts_values <= 0).any():
            self.model_mode = ModelMode.ADDITIVE
            self.trend_mode = TrendMode.LINEAR
            logger.warning(
                "Time series has negative values. Fallback to additive and linear model"
            )

        # Drift part of the decomposition
        if self.trend_mode is TrendMode.LINEAR:
            linreg = ts_values
        else:
            linreg = np.log(ts_values)
        self.drift = np.poly1d(np.polyfit(np.arange(self.length), linreg, 1))
        theta0_in = self.drift(np.arange(self.length))
        if self.trend_mode is TrendMode.EXPONENTIAL:
            theta0_in = np.exp(theta0_in)

        if (theta0_in > 0).all() and self.model_mode is ModelMode.MULTIPLICATIVE:
            theta_t = (ts_values**self.theta) * (theta0_in ** (1 - self.theta))
        else:
            if self.model_mode is ModelMode.MULTIPLICATIVE:
                logger.warning("Negative Theta line. Fallback to additive model")
                self.model_mode = ModelMode.ADDITIVE
            theta_t = self.theta * ts_values + (1 - self.theta) * theta0_in

        # SES part of the decomposition.
        self.model = hw.SimpleExpSmoothing(theta_t).fit()
        theta2_in = self.model.fittedvalues

        if (theta2_in > 0).all() and self.model_mode is ModelMode.MULTIPLICATIVE:
            self.fitted_values = theta2_in**self.wses * theta0_in**self.wdrift
        else:
            if self.model_mode is ModelMode.MULTIPLICATIVE:
                self.model_mode = ModelMode.ADDITIVE
                logger.warning("Negative Theta line. Fallback to additive model")
                theta_t = self.theta * ts_values + (1 - self.theta) * theta0_in
                self.model = hw.SimpleExpSmoothing(theta_t).fit()
                theta2_in = self.model.fittedvalues
            self.fitted_values = self.wses * theta2_in + self.wdrift * theta0_in
        if self.is_seasonal:
            if self.season_mode is SeasonalityMode.ADDITIVE:
                self.fitted_values += self.seasonality.univariate_values(copy=False)
            elif self.season_mode is SeasonalityMode.MULTIPLICATIVE:
                self.fitted_values *= self.seasonality.univariate_values(copy=False)
        # Fitted values are the results of the fit of the model on the train series. A good fit of the model
        # will lead to fitted_values similar to ts. But one cannot see if it overfits.
        if self.normalization:
            self.fitted_values *= self.mean

        return self
Esempio n. 6
0
    for cat in data_categories[::-1]:
        # Load TimeSeries from M4
        ts_train = pkl.load(open("dataset/train_" + cat + ".pkl", "rb"))
        ts_test = pkl.load(open("dataset/test_" + cat + ".pkl", "rb"))

        # Test models on all time series
        mase_all = []
        smape_all = []
        m = int(info_dataset.Frequency[cat[0] + "1"])
        for train, test in _build_tqdm_iterator(zip(ts_train, ts_test),
                                                verbose=True):
            train_des = train
            seasonOut = 1
            if m > 1:
                if check_seasonality(train, m=m, max_lag=2 * m):
                    _, season = extract_trend_and_seasonality(
                        train, m, model=ModelMode.MULTIPLICATIVE)
                    train_des = remove_from_series(
                        train, season, model=ModelMode.MULTIPLICATIVE)
                    seasonOut = season[-m:].shift(m)
                    seasonOut = seasonOut.append_values(seasonOut.values())
                    seasonOut = seasonOut[:len(test)]
            naive = NaiveDrift()
            naive2 = NaiveSeasonal(K=1)
            naiveSeason = NaiveSeasonal(K=m)
            ses = ExponentialSmoothing(trend=None,
                                       seasonal=None,
                                       seasonal_periods=m)
            holt = ExponentialSmoothing(seasonal=None,
                                        damped=False,
Esempio n. 7
0
    def _multivariate_mase(
        actual_series: TimeSeries,
        pred_series: TimeSeries,
        insample: TimeSeries,
        m: int,
        intersect: bool,
        reduction: Callable[[np.ndarray], float],
    ):

        raise_if_not(
            actual_series.width == pred_series.width,
            "The two TimeSeries instances must have the same width.",
            logger,
        )
        raise_if_not(
            actual_series.width == insample.width,
            "The insample TimeSeries must have the same width as the other series.",
            logger,
        )
        raise_if_not(
            insample.end_time() + insample.freq == pred_series.start_time(),
            "The pred_series must be the forecast of the insample series",
            logger,
        )

        insample_ = (
            insample.quantile_timeseries(quantile=0.5)
            if insample.is_stochastic
            else insample
        )

        value_list = []
        for i in range(actual_series.width):
            # old implementation of mase on univariate TimeSeries
            if m is None:
                test_season, m = check_seasonality(insample)
                if not test_season:
                    warn(
                        "No seasonality found when computing MASE. Fixing the period to 1.",
                        UserWarning,
                    )
                    m = 1

            y_true, y_hat = _get_values_or_raise(
                actual_series.univariate_component(i),
                pred_series.univariate_component(i),
                intersect,
                remove_nan_union=False,
            )

            x_t = insample_.univariate_component(i).values()
            errors = np.abs(y_true - y_hat)
            scale = np.mean(np.abs(x_t[m:] - x_t[:-m]))
            raise_if_not(
                not np.isclose(scale, 0),
                "cannot use MASE with periodical signals",
                logger,
            )
            value_list.append(np.mean(errors / scale))

        return reduction(value_list)
Esempio n. 8
0
    for cat in data_categories[::-1]:
        # Load TimeSeries from M4
        ts_train = pkl.load(open("dataset/train_" + cat + ".pkl", "rb"))
        ts_test = pkl.load(open("dataset/test_" + cat + ".pkl", "rb"))

        # Test models on all time series
        mase_all = []
        smape_all = []
        m = int(info_dataset.Frequency[cat[0] + "1"])
        for train, test in _build_tqdm_iterator(zip(ts_train, ts_test),
                                                verbose=True):
            train_des = train
            seasonOut = 1
            if m > 1:
                if check_seasonality(train, m=int(m), max_lag=2 * m):
                    pass
                else:
                    m = 1
            try:
                prophet_args = {
                    'daily_seasonality': False,
                    'weekly_seasonality': False,
                    'yearly_seasonality': False,
                    'frequency': None,
                    'changepoint_range': 0.95,
                }
                if cat == 'Daily':
                    prophet_args['daily_seasonality'] = True
                elif cat == 'Hourly':
                    prophet_args['daily_seasonality'] = True