def test_strategy_mean_seasonal(fh, sp, window_length): if window_length > sp or window_length is None: f = NaiveForecaster(strategy="mean", sp=sp, window_length=window_length) f.fit(y_train) y_pred = f.predict(fh) # check predicted index np.testing.assert_array_equal(y_train.index[-1] + check_fh(fh), y_pred.index) if window_length is None: window_length = len(y_train) # check values fh = check_fh(fh) # get well formatted fh reps = np.int(np.ceil(max(fh) / sp)) last_window = y_train.iloc[-window_length:].values last_window = np.pad(last_window, (0, sp - len(last_window) % sp), 'constant', constant_values=np.nan) last_window = last_window.reshape( np.int(np.ceil(len(last_window) / sp)), sp) expected = np.tile(np.nanmean(last_window, axis=0), reps=reps)[fh - 1] np.testing.assert_array_equal(y_pred, expected)
def forecasting_example(): name = "C:\\Users\\Tony\\OneDrive - University of East Anglia\\Research\\Alex " \ "Mcgregor Grant\\randomNoise.csv" y = pd.read_csv(name, index_col=0, squeeze=True, dtype={1: np.float}) forecast_horizon = np.arange(1, 2) forecaster = NaiveForecaster(strategy="last") forecaster.fit(y) y_pred = forecaster.predict(forecast_horizon) print("Next predicted value = ",y_pred) # https://github.com/alan-turing-institute/sktime/blob/main/examples/01_forecasting.ipynb #Reduce to a regression problem through windowing. ##Transform forecasting into regression np_y = y.to_numpy() v = sliding_window_view(y, 100) print("Window shape =",v.shape) v_3d = np.expand_dims(v, axis=1) print("Window shape =",v.shape) print(v_3d.shape) z = v[:,2] print(z.shape) regressor = CNNRegressor() classifier = CNNClassifier() regressor.fit(v_3d,z) p = regressor.predict(v_3d) #print(p) d = np.array([0.0]) c = np.digitize(z,d) classifier = RandomIntervalSpectralForest() classifier.fit(v_3d,c) cls = classifier.predict(v_3d) print(cls)
def test_strategy_mean_seasonal(fh, sp, window_length): if (window_length is not None and window_length > sp) or (window_length is None): f = NaiveForecaster(strategy="mean", sp=sp, window_length=window_length) f.fit(y_train) y_pred = f.predict(fh) # check predicted index _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) if window_length is None: window_length = len(y_train) # check values fh = check_fh(fh) # get well formatted fh reps = int(np.ceil(max(fh) / sp)) last_window = y_train.iloc[-window_length:].to_numpy().astype(float) last_window = np.pad( last_window, (sp - len(last_window) % sp, 0), "constant", constant_values=np.nan, ) last_window = last_window.reshape(int(np.ceil(len(last_window) / sp)), sp) expected = np.tile(np.nanmean(last_window, axis=0), reps=reps)[fh - 1] np.testing.assert_array_equal(y_pred, expected)
def test_strategy_last(fh): """Test last strategy.""" f = NaiveForecaster(strategy="last") f.fit(y_train) y_pred = f.predict(fh) expected = np.repeat(y_train.iloc[-1], len(f.fh)) np.testing.assert_array_equal(y_pred, expected)
def test_strategy_mean(fh, window_length): f = NaiveForecaster(strategy="mean", window_length=window_length) f.fit(y_train) y_pred = f.predict(fh) if window_length is None: window_length = len(y_train) expected = np.repeat(y_train.iloc[-window_length:].mean(), len(f.fh)) np.testing.assert_array_equal(y_pred, expected)
def test_strategy_mean_seasonal_additional_combinations(n, window_length, sp): """Check time series of n * window_length with a 1:n-1 train/test split, for different combinations of the period and seasonal periodicity. The time series contains perfectly cyclic data. """ # given <window_length> hours of data with a seasonal periodicity of <sp> hours freq = pd.Timedelta("1H") data = pd.Series( index=pd.date_range("2021-06-01 00:00", periods=n * window_length, freq=freq, closed="left"), data=([float(i) for i in range(1, sp + 1)] * n * window_length)[:n * window_length], ) # Split into train and test data train_data = data[:window_length] test_data = data[window_length:] # Forecast data does not retain the original frequency test_data.index.freq = None # For example, for n=2, periods=4 and sp=3: # print(train_data) # 2021-06-01 00:00:00 1.0 # 2021-06-01 01:00:00 2.0 # 2021-06-01 02:00:00 3.0 # 2021-06-01 03:00:00 1.0 # Freq: H, dtype: int64 # print(test_data) # 2021-06-01 04:00:00 2.0 # (value of 3 hours earlier) # 2021-06-01 05:00:00 3.0 # (value of 3 hours earlier) # 2021-06-01 06:00:00 1.0 # (mean value of 3 and 6 hours earlier) # 2021-06-01 07:00:00 2.0 # (value of 6 hours earlier) # dtype: float64 # let's forecast the next <2 x period> hours with a periodicity of <sp> hours fh = ForecastingHorizon(test_data.index, is_relative=False) model = NaiveForecaster(strategy="mean", sp=sp) model.fit(train_data) forecast_data = model.predict(fh) if sp < window_length: # We expect a perfect forecast given our perfectly cyclic data pd.testing.assert_series_equal(forecast_data, test_data) else: # We expect a few forecasts yield NaN values for i in range(1 + len(test_data) // sp): test_data[i * sp:i * sp + sp - window_length] = np.nan pd.testing.assert_series_equal(forecast_data, test_data)
def test_strategy_last_seasonal(fh, sp): f = NaiveForecaster(strategy="last", sp=sp) f.fit(y_train) y_pred = f.predict(fh) # check predicted index _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh) # check values fh = check_fh(fh) # get well formatted fh reps = int(np.ceil(max(fh) / sp)) expected = np.tile(y_train.iloc[-sp:], reps=reps)[fh - 1] np.testing.assert_array_equal(y_pred, expected)
def test_strategy_mean_seasonal_simple(n_seasons, sp): """Create 2d matrix (seasons on rows, time points of each season on columns).""" values = np.random.normal(size=(n_seasons, sp)) y = pd.Series(values.ravel()) expected = values.mean(axis=0) assert expected.shape == (sp, ) f = NaiveForecaster(strategy="mean", sp=sp) f.fit(y) fh = np.arange(1, sp + 1) y_pred = f.predict(fh) np.testing.assert_array_equal(y_pred, expected)
def sma_forecast(y_train: pd.Series, forecast_horizon: np.array) -> pd.Series: """ Fit a simple moving average model with training data and forecast for a given horizon. Args: y_train: Historic dataset to fit model. forecast_horizon: Array of forecast periods [1, ... , n] n being number of desired periods to forecast. Returns: A pandas series of consumption forecast with a datetimeindex. """ forecaster = NaiveForecaster(strategy="mean", window_length=7) forecaster.fit(y_train) forecast = forecaster.predict(forecast_horizon).rename("consumption") return forecast
def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = ExponentTransformer() yt = t1.fit_transform(yt) t2 = TabularToSeriesAdaptor(MinMaxScaler()) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred
def compute_expected_y_pred(y_train, fh): # fitting yt = y_train.copy() t1 = Deseasonalizer(sp=12, model="multiplicative") yt = t1.fit_transform(yt) t2 = Detrender(PolynomialTrendForecaster(degree=1)) yt = t2.fit_transform(yt) forecaster = NaiveForecaster() forecaster.fit(yt, fh=fh) # predicting y_pred = forecaster.predict() y_pred = t2.inverse_transform(y_pred) y_pred = t1.inverse_transform(y_pred) return y_pred
def test_strategy_drift_unit_slope(fh, window_length): # drift strategy for constant slope 1 if window_length != 1: f = NaiveForecaster(strategy="drift", window_length=window_length) f.fit(y_train) y_pred = f.predict(fh) if window_length is None: window_length = len(y_train) # get well formatted fh values fh = check_fh(fh) expected = y_train.iloc[-1] + np.arange(0, max(fh) + 1)[fh] np.testing.assert_array_equal(y_pred, expected)
def test_strategy_drift_flat_line(fh, window_length): # test for flat time series data if window_length != 1: y_train = pd.Series(np.ones(20)) f = NaiveForecaster(strategy="drift", window_length=window_length) f.fit(y_train) y_pred = f.predict(fh) if window_length is None: window_length = len(y_train) # get well formatted fh values fh = check_fh(fh) expected = np.ones(len(fh)) np.testing.assert_array_equal(y_pred, expected)
def test_strategy_drift_window_length(fh, window_length): # test for checking if window_length is properly working if window_length != 1: if window_length is None: window_length = len(y_train) values = np.random.normal(size=window_length) y = pd.Series(values) f = NaiveForecaster(strategy="drift", window_length=window_length) f.fit(y) y_pred = f.predict(fh) slope = (values[-1] - values[0]) / (window_length - 1) # get well formatted fh values fh = check_fh(fh) expected = values[-1] + slope * fh np.testing.assert_array_equal(y_pred, expected)
def app_naive_forecast(body): # noqa: E501 """app_naive_forecast Sending time series which needs to be forecasted # noqa: E501 :param body: :type body: dict | bytes :rtype: InlineResponse200 """ if connexion.request.is_json: body = Body.from_dict(connexion.request.get_json()) # noqa: E501time_series = connexion.request.get_json(); """ time_series = time_series['time_series'] time_series = pd.Series(time_series) forecaster = NaiveForecaster(strategy="last") forecaster.fit(time_series) #TODO: Move to yaml spec fh = ForecastingHorizon(list(range(1,7)), relative = False) y_pred = forecaster.predict(fh) print(y_pred) return {"forecast": y_pred.values.tolist()} """ print(type(body)) time_series = body.to_dict() time_series = time_series["time_series"] time_series = pd.Series(time_series) forecaster = NaiveForecaster(strategy="last") forecaster.fit(time_series) #TODO: Move to yaml spec fh = ForecastingHorizon(list(range(1,7)), is_relative = True) y_pred = forecaster.predict(fh) print(y_pred) return {"forecast": y_pred.values.tolist()}
"""create naive baseline""" import numpy as np from sktime.datasets import load_airline from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import smape_loss y = load_airline() y_train, y_test = temporal_train_test_split(y) fh = np.arange(1, len(y_test) + 1) # forecasting horizon naive_forecaster_last = NaiveForecaster(strategy="last") naive_forecaster_last.fit(y_train) y_last = naive_forecaster_last.predict(fh) naive_forecaster_seasonal = NaiveForecaster(strategy="seasonal_last", sp=12) naive_forecaster_seasonal.fit(y_train) y_seasonal_last = naive_forecaster_seasonal.predict(fh) plot_ys(y_train, y_test, y_last, y_seasonal_last, labels=["y_train", "y_test", "y_pred_last", "y_pred_seasonal_last"]); smape_loss(y_last, y_test) """sklearn regressors with forcasting""" from sktime.forecasting.compose import ReducedRegressionForecaster from sklearn.ensemble import RandomForestRegressor from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import smape_loss
(1) 基准模型预测 * 我们总是预测(在训练系列中)观察到的最后一个值 * 我们预测在同一季节观察到的最后一个值 ''') y_pred = np.repeat(y_train.iloc[-1], len(fh)) y_pred = pd.Series(y_pred, index=y_train.index[-1] + fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write(''' (2) 使用sktime ''') forecaster = NaiveForecaster(strategy="last") forecaster.fit(y_train) y_last = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_last, y_test):", smape_loss(y_last, y_test)) forecaster = NaiveForecaster(strategy="seasonal_last", sp=12) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot() st.write("smape_loss(y_last, y_test):", smape_loss(y_last, y_test)) st.write(''' ### 4 Forecasting with sktime ### 4.1 Reduction: from forecasting to regression
def main(): df = datasets.load_airline( ) #Univariate, monthly records from 1949 to 60 (144 records) y_train, y_test = temporal_train_test_split( df, test_size=36) #36 months for testing forecaster = NaiveForecaster( strategy='seasonal_last', sp=12 ) #model strategy: last, mean, seasonal_last. sp=12months (yearly season) forecaster.fit(y_train) #fit fh = np.arange(1, len(y_test) + 1) #forecast horizon: array with the same lenght of y_test y_pred = forecaster.predict(fh) #pred forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1) forecaster2.fit(y_train) y_pred2 = forecaster2.predict(fh) forecaster3 = ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12) forecaster3.fit(y_train) y_pred3 = forecaster3.predict(fh) forecaster4 = ThetaForecaster(sp=12) forecaster4.fit(y_train) y_pred4 = forecaster4.predict(fh) forecaster5 = EnsembleForecaster([ ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)), ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)), ('Exp Smoothing', ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12)), ('Theta', ThetaForecaster(sp=12)) ]) forecaster5.fit(y_train) y_pred5 = forecaster5.predict(fh) plot_ys(y_train, y_test, y_pred, y_pred2, y_pred3, y_pred4, y_pred5, labels=[ 'Train', 'Test', 'Naive Forecaster', 'AutoARIMA', 'Exp Smoothing', 'Theta', 'Ensemble' ]) plt.xlabel('Months') plt.ylabel('Number of flights') plt.title( 'Time series of the number of international flights in function of time' ) plt.show() print('SMAPE Error for NaiveForecaster is:', 100 * round(smape_loss(y_test, y_pred), 3), '%') print('SMAPE Error for AutoARIMA is:', 100 * round(smape_loss(y_test, y_pred2), 3), '%') print('SMAPE Error for Exp Smoothing is:', 100 * round(smape_loss(y_test, y_pred3), 3), '%') print('SMAPE Error for Theta is:', 100 * round(smape_loss(y_test, y_pred4), 3), '%') print('SMAPE Error for Ensemble is:', 100 * round(smape_loss(y_test, y_pred5), 3), '%')
full = re.findall(r'SARIMAX\(.*?\)x\(.*?\)', summary_text)[0] info = [int(_) for _ in re.findall(r'\d+', full)] return info p, d, q, P, D, Q, S = get_params(summary.as_text()) y_train, y_test = temporal_train_test_split(series, test_size=24) fh = ForecastingHorizon(y_test.index, is_relative=False) plot_series(y_train, y_test, labels=['Train', 'Test']) model = NaiveForecaster(strategy="last", sp=12).fit(y_train) y_pred = model.predict(fh) print(type(y_pred)) print(y_pred) plot_series(y_train, y_test, y_pred, labels=['Train', 'Test', 'Predicted']) plot_series(y_test, y_pred, labels=['Test', 'Predicted']) mape_naive = mean_absolute_percentage_error(y_pred, y_test) model = ARIMA(order=(p, d, q)).fit(y_train) y_pred, y_conf = model.predict(fh, return_pred_int=True) y_train.plot(label='Train')
def genforecast(data): from sktime.forecasting.model_selection import temporal_train_test_split import numpy as np import math y_train, y_test = temporal_train_test_split(data) fh = np.arange(1, len(y_test) + 1) testct = len(y_test) from sktime.forecasting.naive import NaiveForecaster forecaster = NaiveForecaster(strategy="drift") forecaster.fit(y_train) y_pred_naive = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss naive_acc = round(smape_loss(y_pred_naive, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_naive = round(min(fut_pred), 2) max_naive = round(max(fut_pred), 2) from sktime.forecasting.trend import PolynomialTrendForecaster forecaster = PolynomialTrendForecaster(degree=1) forecaster.fit(y_train) y_pred_poly = forecaster.predict(fh) from sktime.performance_metrics.forecasting import smape_loss poly_acc = round(smape_loss(y_pred_poly, y_test), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_poly = round(min(fut_pred), 2) max_poly = round(max(fut_pred), 2) from sktime.forecasting.compose import EnsembleForecaster from sktime.forecasting.exp_smoothing import ExponentialSmoothing sp1 = math.floor(len(y_test) / 4) sp2 = min(sp1, 12) spval = max(2, sp2) forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)), ("holt", ExponentialSmoothing(trend="add", damped=False, seasonal="multiplicative", sp=spval)), ("damped", ExponentialSmoothing(trend="add", damped=True, seasonal="multiplicative", sp=spval)) ]) forecaster.fit(y_train) y_pred_ensem = forecaster.predict(fh) ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4) #full model dev and forecast next 5 days forecaster.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = forecaster.predict(futurewin) min_ensem = round(min(fut_pred), 2) max_ensem = round(max(fut_pred), 2) from sklearn.neighbors import KNeighborsRegressor regressor = KNeighborsRegressor(n_neighbors=1) from sktime.forecasting.compose import ReducedRegressionForecaster forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=15, strategy="recursive") param_grid = {"window_length": [5, 10, 15]} from sktime.forecasting.model_selection import SlidingWindowSplitter from sktime.forecasting.model_selection import ForecastingGridSearchCV # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5)) gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid) gscv.fit(y_train) y_pred_redreg = gscv.predict(fh) redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4) #full model dev and forecast next 5 days gscv.fit(data) futurewin = np.arange(1, 6) # 5 day in future prediction fut_pred = gscv.predict(futurewin) min_redreg = round(min(fut_pred), 2) max_redreg = round(max(fut_pred), 2) return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
# In[17]: FH = TEST_SIZE = 36 fh = np.arange(1, FH + 1) train, test = temporal_train_test_split(airlines, test_size=TEST_SIZE) plot_ys(train, test, labels=['train', 'test']) # ## Naive Forecaster # In[19]: strategies = ['last', 'mean', 'drift'] for strategy in strategies: forecaster = NaiveForecaster(strategy=strategy) forecaster.fit(train) y_pred = forecaster.predict(fh) plot_ys(train, test, y_pred, labels=['train', 'test', 'preds']) plt.title( f'strategy : {strategy} - smape_loss : {round(smape_loss(test,y_pred),4)}' ) # ## Tuning # ### Tune Forecaster # In[44]: from sktime.forecasting.model_selection import SlidingWindowSplitter, ForecastingGridSearchCV # In[33]:
def test_strategy_mean_and_last_seasonal_additional_combinations( n, window_length, sp, strategy): """Check that naive forecasters yield the right forecasts given simple data. Test for perfectly cyclic data, and for robustness against a missing value. More specifically, check time series of n * window_length with a 1:n-1 train/test split, for different combinations of the period and seasonal periodicity. The time series contains perfectly cyclic data, so switching between the "mean" and "last" strategies should not make a difference. """ # given <window_length> hours of data with a seasonal periodicity of <sp> hours freq = pd.Timedelta("1H") data = pd.Series( index=pd.date_range("2021-06-01 00:00", periods=n * window_length, freq=freq, closed="left"), data=([float(i) for i in range(1, sp + 1)] * n * window_length)[:n * window_length], ) # For selected cases, remove a redundant data point by making it NaN if window_length > sp: # create a trailing NaN value in the training set data[window_length - 1] = np.nan # Split into train and test data train_data = data[:window_length] test_data = data[window_length:] # Forecast data does not retain the original frequency test_data.index.freq = None # For example, for n=2, window_length=4, sp=3: # print(train_data) # 2021-06-01 00:00:00 1.0 # 2021-06-01 01:00:00 2.0 # 2021-06-01 02:00:00 3.0 # 2021-06-01 03:00:00 NaN # Freq: H, dtype: int64 # print(test_data) # 2021-06-01 04:00:00 2.0 # (value of 3 hours earlier) # 2021-06-01 05:00:00 3.0 # (value of 3 hours earlier) # 2021-06-01 06:00:00 1.0 # (value of 6 hours earlier) # 2021-06-01 07:00:00 2.0 # (value of 6 hours earlier) # dtype: float64 # forecast the next <(n-1) x window_length> hours with periodicity of <sp> hours fh = ForecastingHorizon(test_data.index, is_relative=False) model = NaiveForecaster(strategy=strategy, sp=sp) model.fit(train_data) forecast_data = model.predict(fh) # Make sure that the model (object) reports that it handles missing data assert model.get_tag("handles-missing-data") if sp < window_length: # We expect a perfect forecast given our perfectly cyclic data pd.testing.assert_series_equal(forecast_data, test_data) else: # We expect a few forecasts yield NaN values for i in range(1 + len(test_data) // sp): test_data[i * sp:i * sp + sp - window_length] = np.nan pd.testing.assert_series_equal(forecast_data, test_data)