def forecast_test(df, account): # Calculate split point and split data split_index = '2019-12-01' df_train, df_test = sales_df[sales_df.index < split_index], sales_df[ sales_df.index >= split_index] X_train = df_train.index.map(dt.datetime.toordinal).values.reshape(-1, 1) y_train = df_train.NetExchange.values.reshape(-1, 1) X_test = df_test.index.map(dt.datetime.toordinal).values.reshape(-1, 1) y_test = df_test.NetExchange.values.reshape(-1, 1) endog = df_train.NetExchange exog = df_test.NetExchange # Create and fit model, and forecast 12 months mod = ThetaModel(endog, deseasonalize=(len(endog) >= 24)) res = mod.fit(disp=0) fcast = res.forecast(12) # Create plot for test forecast fig, ax = plt.subplots() ax.set_title(account) ax.set_xlabel('Date') ax.set_ylabel('Net Exchange') # plot forecast, and actual sales history fcast.plot(ax=ax) endog.loc['2010':].plot(ax=ax) exog.plot(ax=ax) plt.legend() # Save figure for reference plt.savefig(f'./figures_us/{account}.png') plt.close(fig)
def fit(self, y, **kwargs): """ Fit the trend component in the boosting loop for an optimized theta model. Parameters ---------- time_series : TYPE DESCRIPTION. **kwargs : TYPE DESCRIPTION. Returns ------- None. """ self.kwargs = kwargs bias = kwargs['bias'] y -= bias theta_model = ThetaModel(y, method="additive", period=1) + bias fitted = theta_model.fit() self.fitted = theta_model last_fitted_values = self.fitted[-1] self.model_params = last_fitted_values return self.fitted
def _theta_forecast(self, series): period = self._analysis['theta_period'] steps = len(series) model = ThetaModel( series, period=period, deseasonalize=True, use_test=False ).fit() forecast = model.forecast(steps=steps, theta=20) return forecast
def test_pi_width(): # GH 7075 rs = np.random.RandomState(1233091) y = np.arange(100) + rs.standard_normal(100) th = ThetaModel(y, period=12, deseasonalize=False) res = th.fit() pi = res.prediction_intervals(24) d = np.squeeze(np.diff(np.asarray(pi), axis=1)) assert np.all(np.diff(d) > 0)
def test_alt_index(indexed_data): idx = indexed_data.index date_like = not hasattr(idx, "freq") or getattr(idx, "freq", None) is None period = 12 if date_like else None res = ThetaModel(indexed_data, period=period).fit() if hasattr(idx, "freq") and idx.freq is None: with pytest.warns(UserWarning): res.forecast_components(37) with pytest.warns(UserWarning): res.forecast(23) else: res.forecast_components(37) res.forecast(23)
def test_forecast_seasonal_alignment(data, period): res = ThetaModel( data, period=period, deseasonalize=True, use_test=False, difference=False, ).fit(use_mle=False) seasonal = res._seasonal comp = res.forecast_components(32) index = np.arange(data.shape[0], data.shape[0] + comp.shape[0]) expected = seasonal[index % period] np.testing.assert_allclose(comp.seasonal, expected)
def test_forecast_errors(data): res = ThetaModel(data, period=12).fit() with pytest.raises(ValueError, match="steps must be a positive integer"): res.forecast(-1) with pytest.raises(ValueError, match="theta must be a float"): res.forecast(7, theta=0.99) with pytest.raises(ValueError, match="steps must be a positive integer"): res.forecast_components(0)
def _theta_forecast(self, series): period = self._analysis['theta_period'] steps = len(series) # replace last value of series by a mean value # to avoid some extreme cases where the foecast starts at a single # which may happen for very noisy data # series[0] = series[::period].mean() # series[-1] = series[::-period].mean() model = ThetaModel(series, period=period, deseasonalize=True, use_test=False).fit() forecast = model.forecast(steps=steps, theta=20) return forecast
def test_no_freq(): idx = pd.date_range("2000-1-1", periods=300) locs = [] for i in range(100): locs.append(2 * i + int((i % 2) == 1)) y = pd.Series(np.random.standard_normal(100), index=idx[locs]) with pytest.raises(ValueError, match="You must specify a period or"): ThetaModel(y)
def test_smoke(data, period, use_mle, deseasonalize, use_test, diff, model): if period is None and isinstance(data, np.ndarray): return res = ThetaModel( data, period=period, deseasonalize=deseasonalize, use_test=use_test, difference=diff, method=model, ).fit(use_mle=use_mle) assert "b0" in str(res.summary()) res.forecast(36) res.forecast_components(47) assert res.model.use_test is (use_test and res.model.deseasonalize) assert res.model.difference is diff
clamped_sales_df = raw_sales_df.append(end_clamp) clamped_sales_df.index = pd.to_datetime(clamped_sales_df.index) sales_df = clamped_sales_df.resample('M').sum().filter(['NetExchange']) if sales_df.NetExchange.sum() <= 0: print('No Data; forecast aborted') continue # Run example forecast # forecast_test(sales_df, account) # Prepare data for forecast endog = sales_df.NetExchange # Create and fit model, and forecast for 12 months mod = ThetaModel(endog, deseasonalize=(len(endog) >= 24)) res = mod.fit(disp=0) fcast = res.forecast(15) # Plot forecast data try: res.plot_predict( 15, alpha=0.2, in_sample=True, ) plt.hlines(y=0, xmin=dt.datetime.strptime('2010-01-01', '%Y-%M-%d'), xmax=dt.datetime.strptime('2022-04-01', '%Y-%M-%d')) # endog['2016-01-01':].plot() plt.xlim((dt.datetime.strptime('2016-01-01', '%Y-%M-%d'),
# clearly seasonal but does not have a clear trend during the same. reader = pdr.fred.FredReader(["HOUST"], start="1980-01-01", end="2020-04-01") data = reader.read() housing = data.HOUST housing.index.freq = housing.index.inferred_freq ax = housing.plot() # We fit specify the model without any options and fit it. The summary # shows that the data was deseasonalized using the multiplicative method. # The drift is modest and negative, and the smoothing parameter is fairly # low. from statsmodels.tsa.forecasting.theta import ThetaModel tm = ThetaModel(housing) res = tm.fit() print(res.summary()) # The model is first and foremost a forecasting method. Forecasts are # produced using the `forecast` method from fitted model. Below we produce a # hedgehog plot by forecasting 2-years ahead every 2 years. # # **Note**: the default $\theta$ is 2. forecasts = {"housing": housing} for year in range(1995, 2020, 2): sub = housing[:str(year)] res = ThetaModel(sub).fit() fcast = res.forecast(24) forecasts[str(year)] = fcast
def get_stats(): if not request.json: abort(400) print(request.json) router_name = request.json['loc'] router_id = Router.query.filter_by(name=router_name).first().id data = Count.query.filter_by(router_id=router_id).order_by( Count.timestamp).all() # Get the last recorded amount of people last_count = data[-1].devices current_time = data[-1].timestamp # Get historical high/medium/low rating threshold = current_time.replace(day=1) - timedelta(days=1) past_counts = np.array( [x.devices for x in data if x.timestamp >= threshold]) std = np.std(past_counts) ind = np.argmin([ np.abs(last_count - x) for x in [np.max(past_counts), np.median(past_counts), np.min(past_counts)] ]) if ind == 0: state = 'high' elif ind == 1: state = 'medium' else: # ind == 1 state = 'low' threshold = current_time - timedelta(hours=1) past_counts = np.array( [x.devices for x in data if x.timestamp >= threshold]) # Predict upcoming trend train = list(np.copy(past_counts)) try: predictions = [] for i in range(1): model = ThetaModel(np.array(train), period=10) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = list(output)[0] predictions.append(yhat) train.append(yhat) trend_val = int(predictions[-1] - last_count) except: trend_val = int(past_counts[-1] - np.mean(past_counts)) if np.sign(trend_val) > 0: if trend_val > std: trend = 'increasing' else: trend = 'slightly increasing' elif np.sign(trend_val) < 0: if np.abs(trend_val) > std: trend = 'decreasing' else: trend = 'slightly decreasing' else: trend = 'no change' res = {'num': last_count, 'state': state, 'trend': trend} return res