def test_check_seasonality(self): pd_series = pd.Series(range(50), index=pd.date_range("20130101", "20130219")) pd_series = pd_series.map(lambda x: np.sin(x * np.pi / 3 + np.pi / 2)) series = TimeSeries.from_series(pd_series) self.assertEqual((True, 6), check_seasonality(series)) self.assertEqual((False, 3), check_seasonality(series, m=3)) with self.assertRaises(AssertionError): check_seasonality(series.stack(series))
def fit(self, train: TimeSeries): super().fit(train) train_des = train self.seasonOut = 1 if self.m > 1: if check_seasonality(train, m=self.m, max_lag=2 * self.m): _, season = extract_trend_and_seasonality(train, self.m, model=ModelMode.MULTIPLICATIVE) train_des = remove_from_series(train, season, model=ModelMode.MULTIPLICATIVE) seasonOut = season[-self.m:].shift(self.m) self.seasonOut = seasonOut.append_values(seasonOut.values()) self.model.fit(train_des)
def fit(self, series: TimeSeries): super().fit(series) ts = self.training_series self.length = len(ts) # Check for statistical significance of user-defined season period # or infers season_period from the TimeSeries itself. if self.season_mode is SeasonalityMode.NONE: self.season_period = 1 else: self.season_period = self.seasonality_period if self.season_period is None: max_lag = len(ts) // 2 self.is_seasonal, self.season_period = check_seasonality( ts, self.season_period, max_lag=max_lag ) else: # force the user-defined seasonality to be considered as a true seasonal period. self.is_seasonal = self.season_period > 1 new_ts = ts # Store and remove seasonality effect if there is any. if self.is_seasonal: _, self.seasonality = extract_trend_and_seasonality( ts, self.season_period, model=self.season_mode ) new_ts = remove_from_series(ts, self.seasonality, model=self.season_mode) # SES part of the decomposition. self.model = hw.SimpleExpSmoothing(new_ts.values(copy=False)).fit() # Linear Regression part of the decomposition. We select the degree one coefficient. b_theta = np.polyfit( np.array([i for i in range(0, self.length)]), (1.0 - self.theta) * new_ts.values(copy=False), 1, )[0] # Normalization of the coefficient b_theta. self.coef = b_theta / (-self.theta) self.alpha = self.model.params["smoothing_level"] if self.alpha == 0.0: self.model = hw.SimpleExpSmoothing(new_ts.values(copy=False)).fit( initial_level=ALPHA_START ) self.alpha = self.model.params["smoothing_level"] return self
def naive2_groe(ts: TimeSeries, n: int, m: int): """ Return the prediction of the naive2 baseline """ # It will be better to use R functions ts_des = ts seasonOut = 1 if m > 1: if check_seasonality(ts, m=int(m), max_lag=2 * m): _, season = extract_trend_and_seasonality(ts, m, model=ModelMode.MULTIPLICATIVE) ts_des = remove_from_series(ts, season, model=ModelMode.MULTIPLICATIVE) seasonOut = season[-m:].shift(m) seasonOut = seasonOut.append_values(seasonOut.values())[:n] naive2 = NaiveSeasonal(K=1) naive2.fit(ts_des) return naive2.predict(n) * seasonOut
def fit(self, series): super().fit(series) self.length = len(series) # normalization of data if self.normalization: self.mean = series.pd_dataframe(copy=False).mean().mean() raise_if_not( not np.isclose(self.mean, 0), "The mean value of the provided series is too close to zero to perform normalization", logger, ) new_ts = series / self.mean else: new_ts = series # Check for statistical significance of user-defined season period # or infers season_period from the TimeSeries itself. if self.season_mode is SeasonalityMode.NONE: self.season_period = 1 else: self.season_period = self.seasonality_period if self.season_period is None: max_lag = len(series) // 2 self.is_seasonal, self.season_period = check_seasonality( series, self.season_period, max_lag=max_lag ) else: # force the user-defined seasonality to be considered as a true seasonal period. self.is_seasonal = self.season_period > 1 # Store and remove seasonality effect if there is any. if self.is_seasonal: _, self.seasonality = extract_trend_and_seasonality( new_ts, self.season_period, model=self.season_mode ) new_ts = remove_from_series( new_ts, self.seasonality, model=self.season_mode ) ts_values = new_ts.univariate_values() if (ts_values <= 0).any(): self.model_mode = ModelMode.ADDITIVE self.trend_mode = TrendMode.LINEAR logger.warning( "Time series has negative values. Fallback to additive and linear model" ) # Drift part of the decomposition if self.trend_mode is TrendMode.LINEAR: linreg = ts_values else: linreg = np.log(ts_values) self.drift = np.poly1d(np.polyfit(np.arange(self.length), linreg, 1)) theta0_in = self.drift(np.arange(self.length)) if self.trend_mode is TrendMode.EXPONENTIAL: theta0_in = np.exp(theta0_in) if (theta0_in > 0).all() and self.model_mode is ModelMode.MULTIPLICATIVE: theta_t = (ts_values**self.theta) * (theta0_in ** (1 - self.theta)) else: if self.model_mode is ModelMode.MULTIPLICATIVE: logger.warning("Negative Theta line. Fallback to additive model") self.model_mode = ModelMode.ADDITIVE theta_t = self.theta * ts_values + (1 - self.theta) * theta0_in # SES part of the decomposition. self.model = hw.SimpleExpSmoothing(theta_t).fit() theta2_in = self.model.fittedvalues if (theta2_in > 0).all() and self.model_mode is ModelMode.MULTIPLICATIVE: self.fitted_values = theta2_in**self.wses * theta0_in**self.wdrift else: if self.model_mode is ModelMode.MULTIPLICATIVE: self.model_mode = ModelMode.ADDITIVE logger.warning("Negative Theta line. Fallback to additive model") theta_t = self.theta * ts_values + (1 - self.theta) * theta0_in self.model = hw.SimpleExpSmoothing(theta_t).fit() theta2_in = self.model.fittedvalues self.fitted_values = self.wses * theta2_in + self.wdrift * theta0_in if self.is_seasonal: if self.season_mode is SeasonalityMode.ADDITIVE: self.fitted_values += self.seasonality.univariate_values(copy=False) elif self.season_mode is SeasonalityMode.MULTIPLICATIVE: self.fitted_values *= self.seasonality.univariate_values(copy=False) # Fitted values are the results of the fit of the model on the train series. A good fit of the model # will lead to fitted_values similar to ts. But one cannot see if it overfits. if self.normalization: self.fitted_values *= self.mean return self
for cat in data_categories[::-1]: # Load TimeSeries from M4 ts_train = pkl.load(open("dataset/train_" + cat + ".pkl", "rb")) ts_test = pkl.load(open("dataset/test_" + cat + ".pkl", "rb")) # Test models on all time series mase_all = [] smape_all = [] m = int(info_dataset.Frequency[cat[0] + "1"]) for train, test in _build_tqdm_iterator(zip(ts_train, ts_test), verbose=True): train_des = train seasonOut = 1 if m > 1: if check_seasonality(train, m=m, max_lag=2 * m): _, season = extract_trend_and_seasonality( train, m, model=ModelMode.MULTIPLICATIVE) train_des = remove_from_series( train, season, model=ModelMode.MULTIPLICATIVE) seasonOut = season[-m:].shift(m) seasonOut = seasonOut.append_values(seasonOut.values()) seasonOut = seasonOut[:len(test)] naive = NaiveDrift() naive2 = NaiveSeasonal(K=1) naiveSeason = NaiveSeasonal(K=m) ses = ExponentialSmoothing(trend=None, seasonal=None, seasonal_periods=m) holt = ExponentialSmoothing(seasonal=None, damped=False,
def _multivariate_mase( actual_series: TimeSeries, pred_series: TimeSeries, insample: TimeSeries, m: int, intersect: bool, reduction: Callable[[np.ndarray], float], ): raise_if_not( actual_series.width == pred_series.width, "The two TimeSeries instances must have the same width.", logger, ) raise_if_not( actual_series.width == insample.width, "The insample TimeSeries must have the same width as the other series.", logger, ) raise_if_not( insample.end_time() + insample.freq == pred_series.start_time(), "The pred_series must be the forecast of the insample series", logger, ) insample_ = ( insample.quantile_timeseries(quantile=0.5) if insample.is_stochastic else insample ) value_list = [] for i in range(actual_series.width): # old implementation of mase on univariate TimeSeries if m is None: test_season, m = check_seasonality(insample) if not test_season: warn( "No seasonality found when computing MASE. Fixing the period to 1.", UserWarning, ) m = 1 y_true, y_hat = _get_values_or_raise( actual_series.univariate_component(i), pred_series.univariate_component(i), intersect, remove_nan_union=False, ) x_t = insample_.univariate_component(i).values() errors = np.abs(y_true - y_hat) scale = np.mean(np.abs(x_t[m:] - x_t[:-m])) raise_if_not( not np.isclose(scale, 0), "cannot use MASE with periodical signals", logger, ) value_list.append(np.mean(errors / scale)) return reduction(value_list)
for cat in data_categories[::-1]: # Load TimeSeries from M4 ts_train = pkl.load(open("dataset/train_" + cat + ".pkl", "rb")) ts_test = pkl.load(open("dataset/test_" + cat + ".pkl", "rb")) # Test models on all time series mase_all = [] smape_all = [] m = int(info_dataset.Frequency[cat[0] + "1"]) for train, test in _build_tqdm_iterator(zip(ts_train, ts_test), verbose=True): train_des = train seasonOut = 1 if m > 1: if check_seasonality(train, m=int(m), max_lag=2 * m): pass else: m = 1 try: prophet_args = { 'daily_seasonality': False, 'weekly_seasonality': False, 'yearly_seasonality': False, 'frequency': None, 'changepoint_range': 0.95, } if cat == 'Daily': prophet_args['daily_seasonality'] = True elif cat == 'Hourly': prophet_args['daily_seasonality'] = True