q_values = range(0, 3) S = range(0, 3) s_values = [0, 11, 77] warnings.filterwarnings("ignore") hourly_arima_res = evaluate_pdq(bestillingHourlySeries, p_values, d_values, q_values) hourly_sarimax_res = evaluate_PDQs(bestillingHourlySeries, S, S, S, s_values, hourly_arima_res[0]) # # --- SARIMAX: how the best model looks like for daily calls on all the dataset --- daily_model = sx.SARIMAX(bestillingDailySeries, exog=None, order=(7, 1, 0), seasonal_order=(1, 1, 1, 7), trend='t') daily_model_fit = daily_model.fit(disp=0) yhat = daily_model_fit.fittedvalues print(daily_model_fit.summary()) # plot residual errors residuals = DataFrame(daily_model_fit.resid) residuals.plot() pyplot.suptitle('Residuals for SARIMAX(7,1,0)(1,1,1,7)') pyplot.show() residuals.plot(kind='kde') pyplot.suptitle('Residuals for SARIMAX(7,1,0)(1,1,1,7)') pyplot.show() print(residuals.describe()) pyplot.plot(bestillingDailySeries, 'k-', label='actual calls', alpha=0.7)
def test_varmax(): # Clear warnings varmax.__warningregistry__ = {} np.random.seed(371934) nobs = 100 eps = np.random.normal(size=nobs) exog = np.random.normal(size=(nobs, 1)) eps1 = np.zeros(nobs) eps2 = np.zeros(nobs) eps2[49] = 1 eps3 = np.zeros(nobs) eps3[50:] = 1 # VAR(2) - single series mod1 = varmax.VARMAX([[0]], order=(2, 0), trend='nc') mod2 = sarimax.SARIMAX([0], order=(2, 0, 0)) actual = mod1.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VMA(2) - single series mod1 = varmax.VARMAX([[0]], order=(0, 2), trend='nc') mod2 = sarimax.SARIMAX([0], order=(0, 0, 2)) actual = mod1.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VARMA(2, 2) - single series with warnings.catch_warnings(): warnings.simplefilter("ignore") mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='nc') mod2 = sarimax.SARIMAX([0], order=(2, 0, 2)) actual = mod1.simulate([0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VARMA(2, 2) + trend - single series mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='c') mod2 = sarimax.SARIMAX([0], order=(2, 0, 2), trend='c') actual = mod1.simulate([10, 0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([10, 0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VAR(1) transition = np.array([[0.5, 0.1], [-0.1, 0.2]]) mod = varmax.VARMAX([[0, 0]], order=(1, 0), trend='nc') actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.], nobs, state_shocks=np.c_[eps1, eps1], initial_state=np.zeros(mod.k_states)) assert_allclose(actual, 0) actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.], nobs, state_shocks=np.c_[eps1, eps1], initial_state=[1, 1]) desired = np.zeros((nobs, 2)) state = np.r_[1, 1] for i in range(nobs): desired[i] = state state = np.dot(transition, state) assert_allclose(actual, desired) # VAR(1) + measurement error mod = varmax.VARMAX([[0, 0]], order=(1, 0), trend='nc', measurement_error=True) actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1., 1., 1.], nobs, measurement_shocks=np.c_[eps, eps], state_shocks=np.c_[eps1, eps1], initial_state=np.zeros(mod.k_states)) assert_allclose(actual, np.c_[eps, eps]) # VARX(1) mod = varmax.VARMAX(np.zeros((nobs, 2)), order=(1, 0), trend='nc', exog=exog) actual = mod.simulate(np.r_[transition.ravel(), 5, -2, 1., 0, 1.], nobs, state_shocks=np.c_[eps1, eps1], initial_state=[1, 1]) desired = np.zeros((nobs, 2)) state = np.r_[1, 1] for i in range(nobs): desired[i] = state state = exog[i] * [5, -2] + np.dot(transition, state) assert_allclose(actual, desired) # VMA(1) # TODO: This is just a smoke test mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)), order=(0, 1), trend='nc') mod.simulate(mod.start_params, nobs) # VARMA(2, 2) + trend + exog # TODO: This is just a smoke test with warnings.catch_warnings(): warnings.simplefilter("ignore") mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)), order=(2, 2), trend='c', exog=exog) mod.simulate(mod.start_params, nobs)
def test_simulate(): # Test for simulation of new time-series from scipy.signal import lfilter # Common parameters nsimulations = 10 sigma2 = 2 measurement_shocks = np.zeros(nsimulations) state_shocks = np.random.normal(scale=sigma2**0.5, size=nsimulations) # Random walk model, so simulated series is just the cumulative sum of # the shocks mod = KalmanFilter(k_endog=1, k_states=1) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. actual = mod.simulate(nsimulations, measurement_shocks=measurement_shocks, state_shocks=state_shocks)[0].squeeze() desired = np.r_[0, np.cumsum(state_shocks)[:-1]] assert_allclose(actual, desired) # Local level model, so simulated series is just the cumulative sum of # the shocks plus the measurement shock mod = KalmanFilter(k_endog=1, k_states=1) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. actual = mod.simulate(nsimulations, measurement_shocks=np.ones(nsimulations), state_shocks=state_shocks)[0].squeeze() desired = np.r_[1, np.cumsum(state_shocks)[:-1] + 1] assert_allclose(actual, desired) # Local level-like model with observation and state intercepts, so # simulated series is just the cumulative sum of the shocks minus the state # intercept, plus the observation intercept and the measurement shock mod = KalmanFilter(k_endog=1, k_states=1) mod['obs_intercept', 0, 0] = 5. mod['design', 0, 0] = 1. mod['state_intercept', 0, 0] = -2. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. actual = mod.simulate(nsimulations, measurement_shocks=np.ones(nsimulations), state_shocks=state_shocks)[0].squeeze() desired = np.r_[1 + 5, np.cumsum(state_shocks - 2)[:-1] + 1 + 5] assert_allclose(actual, desired) # Model with time-varying observation intercept mod = KalmanFilter(k_endog=1, k_states=1, nobs=10) mod['obs_intercept'] = (np.arange(10) * 1.).reshape(1, 10) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. actual = mod.simulate(nsimulations, measurement_shocks=measurement_shocks, state_shocks=state_shocks)[0].squeeze() desired = np.r_[0, np.cumsum(state_shocks)[:-1] + np.arange(1, 10)] assert_allclose(actual, desired) # Model with time-varying observation intercept, check that error is raised # if more simulations are requested than are nobs. mod = KalmanFilter(k_endog=1, k_states=1, nobs=10) mod['obs_intercept'] = (np.arange(10) * 1.).reshape(1, 10) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. assert_raises(ValueError, mod.simulate, nsimulations + 1, measurement_shocks, state_shocks) # ARMA(1, 1): phi = [0.1], theta = [0.5], sigma^2 = 2 phi = 0.1 theta = 0.5 mod = sarimax.SARIMAX([0], order=(1, 0, 1)) mod.update(np.r_[phi, theta, sigma2]) actual = mod.ssm.simulate(nsimulations, measurement_shocks=measurement_shocks, state_shocks=state_shocks, initial_state=np.zeros( mod.k_states))[0].squeeze() desired = lfilter([1, theta], [1, -phi], np.r_[0, state_shocks[:-1]]) assert_allclose(actual, desired) # SARIMAX(1, 0, 1)x(1, 0, 1, 4), this time using the results object call mod = sarimax.SARIMAX([0.1, 0.5, -0.2], order=(1, 0, 1), seasonal_order=(1, 0, 1, 4)) res = mod.filter([0.1, 0.5, 0.2, -0.3, 1]) actual = res.simulate(nsimulations, measurement_shocks=measurement_shocks, state_shocks=state_shocks, initial_state=np.zeros(mod.k_states)) desired = lfilter(res.polynomial_reduced_ma, res.polynomial_reduced_ar, np.r_[0, state_shocks[:-1]]) assert_allclose(actual, desired)
def test_start_end_int(which): endog = dta['infl'].copy() nobs = len(endog) if which == 'range': endog.index = pd.RangeIndex(nobs) endog_init = endog.iloc[:-1] index_plus2 = pd.RangeIndex(nobs + 2) if which == 'range2': endog.index = pd.RangeIndex(stop=nobs * 2, step=2) endog_init = endog.iloc[:-1] index_plus2 = pd.RangeIndex((nobs + 2) * 2, step=2) elif which == 'int64': endog.index = NumericIndex(np.arange(nobs)) endog_init = endog.iloc[:-1] index_plus2 = NumericIndex(np.arange(nobs + 2)) elif which == 'numpy': endog = endog.values endog_init = endog[:-1] index_plus2 = pd.RangeIndex(nobs + 2) elif which == 'list': endog = endog.tolist() endog_init = endog[:-1] index_plus2 = pd.RangeIndex(nobs + 2) mod = sarimax.SARIMAX(endog_init) res = mod.smooth([0.5, 1.0]) # Default is the last in-sample period news = res.news(endog) desired = index_plus2[-4:-3] assert_(news.total_impacts.index.equals(desired)) # Start, periods news = res.news(endog, start=mod.nobs - 1, periods=1) desired = index_plus2[-4:-3] assert_(news.total_impacts.index.equals(desired)) news = res.news(endog, start=mod.nobs - 2, periods=2) desired = index_plus2[-5:-3] assert_(news.total_impacts.index.equals(desired)) # End, periods news = res.news(endog, end=mod.nobs - 1, periods=1) desired = index_plus2[-4:-3] assert_(news.total_impacts.index.equals(desired)) news = res.news(endog, end=mod.nobs - 2, periods=2) desired = index_plus2[-6:-4] assert_(news.total_impacts.index.equals(desired)) # Start, end # Note: end is inclusive, like `get_prediction`. news = res.news(endog, start=mod.nobs - 2, end=mod.nobs - 1) desired = index_plus2[-5:-3] assert_(news.total_impacts.index.equals(desired)) if which not in ['numpy', 'list']: predicted = res.predict(start=mod.nobs - 2, end=mod.nobs - 1) assert_(news.total_impacts.index.equals(predicted.index)) news = res.news(endog, start=mod.nobs, end=mod.nobs) desired = index_plus2[-3:-2] assert_(news.total_impacts.index.equals(desired)) if which not in ['numpy', 'list']: predicted = res.predict(start=mod.nobs, end=mod.nobs) assert_(news.total_impacts.index.equals(predicted.index)) news = res.news(endog, start=mod.nobs, end=mod.nobs + 1) desired = index_plus2[-3:-1] assert_(news.total_impacts.index.equals(desired)) if which not in ['numpy', 'list']: predicted = res.predict(start=mod.nobs, end=mod.nobs + 1) assert_(news.total_impacts.index.equals(predicted.index))
def test_mixed_stationary(): # More specific tests when one or more blocks are initialized as stationary endog = np.zeros(10) mod = sarimax.SARIMAX(endog, order=(2, 1, 0)) phi = [0.5, -0.2] sigma2 = 2. mod.update(np.r_[phi, sigma2]) init = Initialization(mod.k_states) init.set(0, 'diffuse') init.set((1, 3), 'stationary') desired_cov = np.zeros((3, 3)) T = np.array([[0.5, 1], [-0.2, 0]]) Q = np.diag([sigma2, 0]) desired_cov[1:, 1:] = solve_discrete_lyapunov(T, Q) check_initialization(mod, init, [0, 0, 0], np.diag([1, 0, 0]), desired_cov) init.clear() init.set(0, 'diffuse') init.set(1, 'stationary') init.set(2, 'approximate_diffuse') T = np.array([[0.5]]) Q = np.diag([sigma2]) desired_cov = np.diag([0, solve_discrete_lyapunov(T, Q), 1e6]) check_initialization(mod, init, [0, 0, 0], np.diag([1, 0, 0]), desired_cov) init.clear() init.set(0, 'diffuse') init.set(1, 'stationary') init.set(2, 'stationary') desired_cov[2, 2] = 0 check_initialization(mod, init, [0, 0, 0], np.diag([1, 0, 0]), desired_cov) # Test with a VAR model endog = np.zeros((10, 2)) mod = varmax.VARMAX( endog, order=(1, 0), ) intercept = [1.5, -0.1] transition = np.array([[0.5, -0.2], [0.1, 0.8]]) cov = np.array([[1.2, -0.4], [-0.4, 0.4]]) tril = np.tril_indices(2) params = np.r_[intercept, transition.ravel(), np.linalg.cholesky(cov)[tril]] mod.update(params) # > stationary, global init = Initialization(mod.k_states, 'stationary') desired_intercept = np.linalg.solve(np.eye(2) - transition, intercept) desired_cov = solve_discrete_lyapunov(transition, cov) check_initialization(mod, init, desired_intercept, np.diag([0, 0]), desired_cov) # > diffuse, global init.set(None, 'diffuse') check_initialization(mod, init, [0, 0], np.eye(2), np.diag([0, 0])) # > stationary, individually init.unset(None) init.set(0, 'stationary') init.set(1, 'stationary') a, Pinf, Pstar = init(model=mod) desired_intercept = [ intercept[0] / (1 - transition[0, 0]), intercept[1] / (1 - transition[1, 1]) ] desired_cov = np.diag([ cov[0, 0] / (1 - transition[0, 0]**2), cov[1, 1] / (1 - transition[1, 1]**2) ]) check_initialization(mod, init, desired_intercept, np.diag([0, 0]), desired_cov)
def sarimax_model(timeseries, seasonality_idx, mdl_order, fcst_window, ts_start, ts_end, verbose, exog=None): sarimax_fcst = pd.DataFrame() if exog is None: try: mod = smsar.SARIMAX(endog=timeseries, trend='n', order=mdl_order.order, seasonal_order=mdl_order.sorder+seasonality_idx) sarimax_mdl = mod.fit(disp=False) if verbose is True: print("SARIMA Info: Default Params - Endogenous Mode") except ValueError: mod = smsar.SARIMAX(endog=timeseries, trend='n', order=mdl_order.order, seasonal_order=(0, 1, 0, seasonality_idx[0])) sarimax_mdl = mod.fit(disp=False) if verbose is True: print("SARIMA Info: Custom Params - Endogenous Mode") sarimax_rslt = sarimax_mdl.predict(alpha=0.05, start=0, end=(len(timeseries)-1)+fcst_window) sarimax_rslt[12] = np.mean([sarimax_rslt[12-1], sarimax_rslt[12+1]]) # PATCH for buggy value sarimax_rslt_info = sarimax_mdl.get_prediction(end=(len(timeseries)-1)+fcst_window) sarimax_ci = sarimax_rslt_info.conf_int(alpha=0.05) sarimax_ci.columns = ['lower', 'upper'] sarimax_ci.lower[12] = np.mean([sarimax_ci.lower[12 - 1], sarimax_ci.lower[12 + 1]]) sarimax_ci.upper[12] = np.mean([sarimax_ci.upper[12 - 1], sarimax_ci.upper[12 + 1]]) # End of PATCH sarimax_fcst = pd.concat([timeseries, sarimax_rslt, sarimax_ci], axis=1) sarimax_fcst.columns = ['Actual', 'Forecast', 'CI Lower Bound', 'CI Upper Bound'] # y_pred = sarimax_fcst.loc[ts_start:ts_end] # sarimax_mase = mase_score(timeseries, y_pred.Forecast, seasonality_idx, 1) # remove first buggy value # sarimax_mape = mape_score(timeseries, y_pred.Forecast, 1) # remove first buggy value # print("MASE Score = {0:.2f}, MAPE Score = {1:.2f}".format(sarimax_mase, sarimax_mape)) # if sarimax_mase < 1 and sarimax_mape < 10: # print("SARIMA Info: Forecasting Accuracy is OK") # else: # print("SARIMA Info: Forecasting Accuracy is not OK, check the forecast results") elif exog is not None: # shape exogenous time serie for past values ts_start_exog = pd.to_datetime([ts_start]) ts_end_exog = pd.to_datetime([ts_end]) ts_exog_past = exog[ts_start_exog[0]:ts_end_exog[0]] try: mod = smsar.SARIMAX(endog=timeseries, exog=ts_exog_past, trend='n', order=mdl_order.order, seasonal_order=mdl_order.sorder + seasonality_idx) sarimax_mdl = mod.fit(disp=False) if verbose is True: print("SARIMA Info: Default Params - Exogenous Mode") except ValueError: mod = smsar.SARIMAX(endog=timeseries, exog=ts_exog_past, trend='n', order=mdl_order.order, seasonal_order=(0, 1, 0, seasonality_idx[0])) sarimax_mdl = mod.fit(disp=False) if verbose is True: print("SARIMA Info: Custom Params - Exogenous Mode") # shape exogenous time serie for future values ts_start_exog = pd.to_datetime([ts_end]) + DateOffset(months=1) ts_end_exog = pd.to_datetime([ts_end]) + DateOffset(months=fcst_window) ts_exog_future = exog[ts_start_exog[0]:ts_end_exog[0]] np_exog = np.array(ts_exog_future) # forecast time serie using exogenous factors sarimax_rslt = sarimax_mdl.predict(alpha=0.05, start=0, end=(len(timeseries) - 1) + fcst_window, exog=np_exog) sarimax_rslt[12] = np.mean([sarimax_rslt[12-1], sarimax_rslt[12+1]]) # PATCH for buggy value sarimax_fcst = pd.concat([timeseries, sarimax_rslt], axis=1) sarimax_fcst.columns = ['Actual', 'Forecast'] return sarimax_fcst
def test_sarimax_time_invariant(revisions, updates): # Construct previous and updated datasets endog = dta['infl'].copy() comparison_type = None if updates: endog1 = endog.loc[:'2009Q2'].copy() endog2 = endog.loc[:'2009Q3'].copy() else: endog1 = endog.loc[:'2009Q3'].copy() endog2 = endog.loc[:'2009Q3'].copy() # Without updates and without NaN values, we need to specify that # the type of the comparison object that we're passing is "updated" comparison_type = 'updated' if revisions: endog1.iloc[-1] = 0. # Get the previous results object and compute the news mod = sarimax.SARIMAX(endog1) res = mod.smooth([0.5, 1.0]) news = res.news(endog2, start='2009Q2', end='2010Q1', comparison_type=comparison_type) # Compute the true values for each combination of (revsions, updates) impact_dates = pd.period_range(start='2009Q2', end='2010Q1', freq='Q') impacted_variables = ['infl'] # Revisions if revisions and updates: revisions_index = pd.MultiIndex.from_arrays( [endog1.index[-1:], ['infl']], names=['revision date', 'revised variable']) # If we have updates, the revision is to 2009Q2 revision_impacts = endog2.iloc[-2] * 0.5**np.arange(4).reshape(4, 1) elif revisions: revisions_index = pd.MultiIndex.from_arrays( [endog1.index[-1:], ['infl']], names=['revision date', 'revised variable']) # With no updates, the revision is to 2009Q3 revision_impacts = np.r_[ 0, endog2.iloc[-1] * 0.5**np.arange(3)].reshape(4, 1) else: revisions_index = pd.MultiIndex.from_arrays( [[], []], names=['revision date', 'revised variable']) revision_impacts = None # Updates if updates: updates_index = pd.MultiIndex.from_arrays( [pd.period_range(start='2009Q3', periods=1, freq='Q'), ['infl']], names=['update date', 'updated variable']) update_impacts = np.array([[ 0, endog.loc['2009Q3'] - 0.5 * endog.loc['2009Q2'], 0.5 * endog.loc['2009Q3'] - 0.5**2 * endog.loc['2009Q2'], 0.5**2 * endog.loc['2009Q3'] - 0.5**3 * endog.loc['2009Q2']]]).T else: updates_index = pd.MultiIndex.from_arrays( [[], []], names=['update date', 'updated variable']) update_impacts = None # Impact forecasts if updates: prev_impacted_forecasts = np.r_[ endog1.iloc[-1] * 0.5**np.arange(4)].reshape(4, 1) else: prev_impacted_forecasts = np.r_[ endog1.iloc[-2], endog1.iloc[-1] * 0.5**np.arange(3)].reshape(4, 1) post_impacted_forecasts = np.r_[ endog2.iloc[-2], 0.5 ** np.arange(3) * endog2.iloc[-1]].reshape(4, 1) # News if updates: # Note: update_forecasts is created using the endog2 dataset even if # there were revisions, because it should be computed after revisions # have already been taken into account update_forecasts = [0.5 * endog2.loc['2009Q2']] update_realized = [endog2.loc['2009Q3']] news_desired = [update_realized[i] - update_forecasts[i] for i in range(len(update_forecasts))] weights = pd.DataFrame(np.r_[0, 0.5**np.arange(3)]).T else: update_forecasts = pd.Series([], dtype=np.float64) update_realized = pd.Series([], dtype=np.float64) news_desired = pd.Series([], dtype=np.float64) weights = pd.DataFrame(np.zeros((0, 4))) # Run unit tests check_news(news, revisions, updates, impact_dates, impacted_variables, revisions_index, updates_index, revision_impacts, update_impacts, prev_impacted_forecasts, post_impacted_forecasts, update_forecasts, update_realized, news_desired, weights)
def fun(i, k): sys.stdout = open(str(k + 1) + '/' + str(i - 1) + '.csv', "w") dateparse = lambda dates: pd.time.strptime(dates, '%H-%M-%S.%f') data = pd.read_csv(str(k + 1) + '/sep' + str(i) + '.csv') ts = Series.from_csv(str(k + 1) + '/sep' + str(i) + '.csv', header=0) info = pd.read_csv(str(k + 1) + '/sep' + str(i) + '.csv') num_of_for = 10 #NUMBER OF FORECASTS flag = 0 count = 0 s = info['#Passengers'].values for z in s: if z == 0: count = count + 1 if count >= 0.99 * len(ts): flag = 1 if flag == 1: for i in range(num_of_for): print(0) else: def fun(ser): co = 0 c1 = 0 for e in ser[len(ser) - 12:]: if e == 0: co = co + 1 if e == 1: c1 = c1 + 1 if co > c1: ser[len(ser) - 3] = 1 if co < c1: ser[len(ser) - 3] = 0 return ser tsmod = fun(ts) ts_log = np.sqrt(tsmod) moving_avg = ts_log.rolling(12).mean() ts_log_moving_avg_diff = ts_log - moving_avg ts_log_diff = ts_log - ts_log.shift() from statsmodels.tsa.seasonal import seasonal_decompose decomposition = seasonal_decompose(ts_log, freq=2) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid ts_log_decompose = residual from statsmodels.tsa.statespace import sarimax model = sarimax.SARIMAX(ts_log, order=(4, 1, 1), enforce_stationarity=False, enforce_invertibility=False) results_ARIMA = model.fit() predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True) predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum() predictions_ARIMA_log = pd.Series(ts_log.ix[0], index=ts_log.index) predictions_ARIMA_log = predictions_ARIMA_log.add( predictions_ARIMA_diff_cumsum, fill_value=0) predictions_ARIMA_log.head() predictions_ARIMA = np.square(predictions_ARIMA_log) start_index = len(ts) end_index = len(ts) forecast = results_ARIMA.forecast(steps=num_of_for) for p in forecast: print(p) sys.stdout.close()
def get_pa_indecies(): a = acf(train["y"], nlags=240) p = pacf(train["y"], nlags=240) ab = a[np.abs(a) > (1.96 / np.sqrt(len(train["y"])))] pb = p[np.abs(p) > (1.96 / np.sqrt(len(train["y"])))] print("acf") print(np.where(np.isin(a, ab))) print("pacf") print(np.where(np.isin(p, pb))) return None #get_pa_indecies() mod = sarimax.SARIMAX(train_bam["bam"].values, trend='n', order=(2, 0, 1), seasonal_order=(2, 1, 0, 180)) results = mod.fit() #print(results.summary()) test_bam['forecast'] = results.forecast(104) fig = plt.figure(figsize=(12, 8)) print("train_bam: " + str(test_bam["forecast"].shape)) print("test_bam: " + str(test_bam["bam"].shape)) plt.plot(train_bam["ds"], train_bam['bam']) plt.plot(test_bam["ds"], test_bam["forecast"]) plt.show() print("The Root Mean Squared Error is: " + str( np.sqrt(metrics.mean_squared_error(test_bam["bam"], test_bam["forecast"]))) )
max_date = data.period.max() min_date = data.period.min() num_of_actual_points = data.index.shape[0] num_of_expected_points = (max_date.year - min_date.year) * 12 + max_date.month - min_date.month + 1 print("Date range: {} - {}".format(min_date.strftime("%d.%m.%Y"), max_date.strftime("%d.%m.%Y"))) print("Number of data points: {} of expected {}".format(num_of_actual_points, num_of_expected_points)) max_date = df.period.max() min_date = df.period.min() num_of_actual_points = df.index.shape[0] num_of_expected_points = (max_date.year - min_date.year) * 12 + max_date.month - min_date.month + 1 print("Date range: {} - {}".format(min_date.strftime("%d.%m.%Y"), max_date.strftime("%d.%m.%Y"))) print("Number of data points: {} of expected {}".format(num_of_actual_points, num_of_expected_points)) from statsmodels.tsa.statespace import sarimax model = sarimax.SARIMAX()
def test_smoothed_decomposition_sarimax(use_exog, trend, concentrate_scale, measurement_error): endog = np.array([[0.2, np.nan, 1.2, -0.3, -1.5]]).T exog = np.array([2, 5.3, -1, 3.4, 0.]) if use_exog else None trend_params = [0.1] ar_params = [0.5] exog_params = [1.4] meas_err_params = [1.2] cov_params = [0.8] params = [] if trend in ['c', 't']: params += trend_params if use_exog: params += exog_params params += ar_params if measurement_error: params += meas_err_params if not concentrate_scale: params += cov_params # Fit the models mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend, exog=exog if use_exog else None, concentrate_scale=concentrate_scale, measurement_error=measurement_error) prior_mean = np.array([-0.4]) prior_cov = np.eye(1) * 1.2 mod.ssm.initialize_known(prior_mean, prior_cov) res = mod.smooth(params) # Check smoothed state # Get the decomposition of the smoothed state cd, coi, csi, cp = res.get_smoothed_decomposition( decomposition_of='smoothed_state') # Sum across contributions (i.e. from observations at each time period and # from the initial state) css = ((cd + coi).sum(axis=1) + csi.sum(axis=1) + cp.sum(axis=1)) css = css.unstack(level='state_to').values # Summing up all contributions should yield the actual smoothed state, # so the smoothed state vector is the desired result of this test ss = np.array(res.states.smoothed) assert_allclose(css, ss, atol=1e-12) # Check smoothed signal # Use the summed state contributions and multiply by the design matrix # to get the smoothed signal csf = ((css.T * mod['design'][:, :, None]).sum(axis=1) + mod['obs_intercept']).T # Summing up all contributions should yield the smoothed prediction of # the observed variables s_sig = res.predict(information_set='smoothed', signal_only=True) sf = res.predict(information_set='smoothed', signal_only=False) assert_allclose(csf[:, 0], sf) # Now check the smoothed signal against the sum computed from the # decomposed smoothed signal cd, coi, csi, cp = res.get_smoothed_decomposition( decomposition_of='smoothed_signal') # Sum across contributions (i.e. from observations and intercepts at each # time period and from the initial state) to get the smoothed signal cs_sig = ((cd + coi).sum(axis=1) + csi.sum(axis=1) + cp.sum(axis=1)) cs_sig = cs_sig.unstack(level='variable_to').values assert_allclose(cs_sig[:, 0], s_sig, atol=1e-12) # Add in the observation intercept to get the smoothed forecast csf = cs_sig + mod['obs_intercept'].T assert_allclose(csf[:, 0], sf)
def test_fit(): # Test that fitting works regardless of the level of memory conservation # used endog = dta['infl'].iloc[:20] mod = sarimax.SARIMAX(endog, order=(1, 0, 0), concentrate_scale=True) res = mod.fit(disp=False) options_smooth = [ 'memory_no_forecast', 'memory_no_filtered', 'memory_no_likelihood', 'memory_no_std_forecast' ] for option in options_smooth: mod.ssm.set_conserve_memory(0) setattr(mod.ssm, option, True) res2 = mod.fit(res.params, disp=False) # General check that smoothing results are available assert_allclose(res2.smoothed_state, res.smoothed_state, atol=1e-10) # Specific checks for each type if option == 'memory_no_forecast': assert_(res2.forecasts is None) assert_(res2.forecasts_error is None) assert_(res2.forecasts_error_cov is None) else: assert_allclose(res2.forecasts, res.forecasts) assert_allclose(res2.forecasts_error, res.forecasts_error) assert_allclose(res2.forecasts_error_cov, res.forecasts_error_cov) if option == 'memory_no_filtered': assert_(res2.filtered_state is None) assert_(res2.filtered_state_cov is None) else: assert_allclose(res2.filtered_state, res.filtered_state) assert_allclose(res2.filtered_state_cov, res.filtered_state_cov) assert_allclose(res2.llf, res.llf) if option == 'memory_no_likelihood': assert_(res2.llf_obs is None) else: assert_allclose(res2.llf_obs, res.llf_obs) if option == 'memory_no_std_forecast': assert_(res2.standardized_forecasts_error is None) else: assert_allclose(res2.standardized_forecasts_error, res.standardized_forecasts_error) options_filter_only = [ 'memory_no_predicted', 'memory_no_gain', 'memory_no_smoothing', 'memory_conserve' ] for option in options_filter_only[2:]: mod.ssm.set_conserve_memory(0) setattr(mod.ssm, option, True) res2 = mod.fit(res.params, disp=False) # General check that smoothing results are not available assert_(res2.smoothed_state is None) # Specific checks for each type if option in ['memory_no_predicted', 'memory_conserve']: assert_(res2.predicted_state is None) assert_(res2.predicted_state_cov is None) else: assert_allclose(res2.predicted_state, res.predicted_state) assert_allclose(res2.predicted_state_cov, res.predicted_state_cov) if option in ['memory_no_gain', 'memory_conserve']: assert_(res2.filter_results._kalman_gain is None) else: assert_allclose(res2.filter_results.kalman_gain, res.filter_results.kalman_gain)
def test_sarimax_time_varying(revisions, updates, which): # This is primarily a test that the `news` method works with a time-varying # setup (i.e. time-varying state space matrices). It tests a time-varying # SARIMAX model where the time-varying component has been set to zeros # against a time-invariant version of the model. # Construct previous and updated datasets endog = dta['infl'].copy() comparison_type = None if updates: endog1 = endog.loc[:'2009Q2'].copy() endog2 = endog.loc[:'2009Q3'].copy() else: endog1 = endog.loc[:'2009Q3'].copy() endog2 = endog.loc[:'2009Q3'].copy() # Without updates and without NaN values, we need to specify that # the type of the comparison object that we're passing is "updated" comparison_type = 'updated' if revisions: endog1.iloc[-1] = 0. exog1 = None exog2 = None trend = 'n' if which == 'exog': exog1 = np.ones_like(endog1) exog2 = np.ones_like(endog2) elif which == 'trend': trend = 't' # Compute the news from a model with a trend/exog term (so the model is # time-varying), but with the coefficient set to zero (so that it will be # equivalent to the time-invariant model) mod1 = sarimax.SARIMAX(endog1, exog=exog1, trend=trend) res1 = mod1.smooth([0., 0.5, 1.0]) news1 = res1.news(endog2, exog=exog2, start='2008Q1', end='2009Q3', comparison_type=comparison_type) # Compute the news from a model without a trend term mod2 = sarimax.SARIMAX(endog1) res2 = mod2.smooth([0.5, 1.0]) news2 = res2.news(endog2, start='2008Q1', end='2009Q3', comparison_type=comparison_type) attrs = [ 'total_impacts', 'update_impacts', 'revision_impacts', 'news', 'weights', 'update_forecasts', 'update_realized', 'prev_impacted_forecasts', 'post_impacted_forecasts', 'revisions_iloc', 'revisions_ix', 'updates_iloc', 'updates_ix' ] for attr in attrs: w = getattr(news1, attr) x = getattr(news2, attr) if isinstance(x, pd.Series): assert_series_equal(w, x) else: assert_frame_equal(w, x)
def get_results_with_val(df, exo, p, d, q, P, D, Q, s, model, y_col_name, val_size_perc, n_predictions=5): """Fit SARIMAX on input df (optional input and future exo regr) and predict validation + future values Or use param fitted model (optional input and future exo regr) to predict validation + future values Plot input and output (val+future) predictions Parameters ---------- df : DataFrame R Time Series exo : DataFrame, optional Exogenous Regressors to model Y p : int AR parameter for the SARIMAX on Y d : int Integrated parameter for the SARIMAX on Y q : int MA parameter for the SARIMAX on Y P : int Seasonal AR parameter for the SARIMAX on Y D : int Seasonal Integrated parameter for the SARIMAX on Y Q : int Seasonal MA parameter for the SARIMAX on Y s : int Seasonality timeframe for Y model : SARIMAX Fitted model, optional Pre-fitted SARIMAX model to use to predict Y values y_col_name : String Column name of Y values val_size_perc : Float Part of the df to use for Validation. Format: [0.0;1.0] n_predictions : int, optional Number of future values to predict for Y, by default 5 Returns ------- smodel: json Fitted SARIMAX model on Y results: DataFrame DataFrame including the train, validation and forecast values from the SARIMAX fitted model on Y Time Series """ X = df[y_col_name].values Y = df["Date"].values train_size = int(len(X) * (1 - val_size_perc)) train, test = X[:train_size], X[train_size:len(X)] week = Y[train_size:len(X)] exo_past, exo_future = None, None # Split Exo Regressor into past (train + val) and future (forecast) values if exo is not None: exo_past, exo_future = exo[:len(X)], exo[len(X):len(exo)] # Create SARIMAX model or use input model print("Checking model for fit...") if model is None: print("No input model, starting to fit SARIMAX" + str(p) + str(d) + str(q) + str(P) + str(D) + str(Q) + str(s)) smodel = pmdarima.arima.ARIMA(order=[p, d, q], method="lbfgs", maxiter=50, suppress_warnings=True) smodel = smodel.fit(df[y_col_name].values, exo_past) print("Finished SARIMAX fit.") else: print("Existing input model, will use it") smodel = model # Test model on the Validation set history = [x for x in train] predictions = list() for t in range(len(test)): model = sarimax.SARIMAX(history, order=smodel.order, seasonal_order=smodel.seasonal_order, enforce_stationarity=False) model_fit = model.fit(disp=0) output = model_fit.forecast() if output[0] < 0: yhat = 0 else: yhat = output[0] predictions.append(yhat) obs = test[t] history.append(obs) print("predicted=%f, expected=%f" % (yhat, obs)) error = metrics.mean_squared_error(test, predictions) print("Test MSE: %.3f" % error) # Add Train set to output data = pd.DataFrame() data["Date"] = Y[0:train_size] data["Predicted Net Order Value"] = None data["Actual Net Order Value"] = X[0:train_size] data["Classification"] = "train" # Add Validation set to output Tested = pd.DataFrame() Tested["Date"] = week Tested["Predicted Net Order Value"] = predictions Tested["Actual Net Order Value"] = test Tested["Classification"] = "test" Tested["Predicted Net Order Value"] = Tested[ "Predicted Net Order Value"].astype(float) Tested["Date"] = pd.to_datetime(Tested["Date"]) # Add Forecast set to output print("Predicting forecast values...") n_periods = n_predictions fitted, confint = smodel.predict(n_periods=n_periods, return_conf_int=True, exogenous=exo_future) print("Finished predicting forecast values.") rng = pd.date_range(df["Date"].max(), periods=n_periods, freq="7D") forecast = pd.DataFrame({ "Date": rng, "Predicted Net Order Value": fitted, "Actual Net Order Value": None, "Classification": "forecast", "Conf_lower": confint[:, 0], "Conf_Upper": confint[:, 1], }) forecast = forecast.drop(forecast.index[0]) # Combine all sets results = data.append(Tested, ignore_index=True) results = results.append(forecast, ignore_index=True) results["Date"] = pd.to_datetime(results["Date"]) # Reformat Dates to Date type results["Date"] = pd.to_datetime(results["Date"]) return smodel, results
def test_varmax(): steps = 10 # Clear warnings varmax.__warningregistry__ = {} # VAR(2) - single series mod1 = varmax.VARMAX([[0]], order=(2, 0), trend='n') mod2 = sarimax.SARIMAX([0], order=(2, 0, 0)) actual = mod1.impulse_responses([0.5, 0.2, 1], steps) desired = mod2.impulse_responses([0.5, 0.2, 1], steps) assert_allclose(actual, desired) # VMA(2) - single series mod1 = varmax.VARMAX([[0]], order=(0, 2), trend='n') mod2 = sarimax.SARIMAX([0], order=(0, 0, 2)) actual = mod1.impulse_responses([0.5, 0.2, 1], steps) desired = mod2.impulse_responses([0.5, 0.2, 1], steps) assert_allclose(actual, desired) # VARMA(2, 2) - single series with warnings.catch_warnings(): warnings.simplefilter("ignore") mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='n') mod2 = sarimax.SARIMAX([0], order=(2, 0, 2)) actual = mod1.impulse_responses([0.5, 0.2, 0.1, -0.2, 1], steps) desired = mod2.impulse_responses([0.5, 0.2, 0.1, -0.2, 1], steps) assert_allclose(actual, desired) # VARMA(2, 2) + trend - single series warning = EstimationWarning match = r'VARMA\(p,q\) models is not' with pytest.warns(warning, match=match): mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='c') mod2 = sarimax.SARIMAX([0], order=(2, 0, 2), trend='c') actual = mod1.impulse_responses([10, 0.5, 0.2, 0.1, -0.2, 1], steps) desired = mod2.impulse_responses([10, 0.5, 0.2, 0.1, -0.2, 1], steps) assert_allclose(actual, desired) # VAR(2) + constant # Stata: # webuse lutkepohl2 # var dln_inv dln_inc, lags(1/2) # irf create irf3, set(irf3) step(10) # irf table irf # irf table oirf params = [ -.00122728, .01503679, -.22741923, .71030531, -.11596357, .51494891, .05974659, .02094608, .05635125, .08332519, .04297918, .00159473, .01096298 ] irf_00 = [ 1, -.227419, -.021806, .093362, -.001875, -.00906, .009605, .001323, -.001041, .000769, .00032 ] irf_01 = [ 0, .059747, .044015, -.008218, .007845, .004629, .000104, .000451, .000638, .000063, .000042 ] irf_10 = [ 0, .710305, .36829, -.065697, .084398, .043038, .000533, .005755, .006051, .000548, .000526 ] irf_11 = [ 1, .020946, .126202, .066419, .028735, .007477, .009878, .003287, .001266, .000986, .0005 ] oirf_00 = [ 0.042979, -0.008642, -0.00035, 0.003908, 0.000054, -0.000321, 0.000414, 0.000066, -0.000035, 0.000034, 0.000015 ] oirf_01 = [ 0.001595, 0.002601, 0.002093, -0.000247, 0.000383, 0.000211, 0.00002, 0.000025, 0.000029, 4.30E-06, 2.60E-06 ] oirf_10 = [ 0, 0.007787, 0.004037, -0.00072, 0.000925, 0.000472, 5.80E-06, 0.000063, 0.000066, 6.00E-06, 5.80E-06 ] oirf_11 = [ 0.010963, 0.00023, 0.001384, 0.000728, 0.000315, 0.000082, 0.000108, 0.000036, 0.000014, 0.000011, 5.50E-06 ] mod = varmax.VARMAX([[0, 0]], order=(2, 0), trend='c') # IRFs actual = mod.impulse_responses(params, steps, impulse=0) assert_allclose(actual, np.c_[irf_00, irf_01], atol=1e-6) actual = mod.impulse_responses(params, steps, impulse=1) assert_allclose(actual, np.c_[irf_10, irf_11], atol=1e-6) # Orthogonalized IRFs actual = mod.impulse_responses(params, steps, impulse=0, orthogonalized=True) assert_allclose(actual, np.c_[oirf_00, oirf_01], atol=1e-6) actual = mod.impulse_responses(params, steps, impulse=1, orthogonalized=True) assert_allclose(actual, np.c_[oirf_10, oirf_11], atol=1e-6) # VARMA(2, 2) + trend + exog # TODO: This is just a smoke test with warnings.catch_warnings(): warnings.simplefilter("ignore") mod = varmax.VARMAX(np.random.normal(size=(steps, 2)), order=(2, 2), trend='c', exog=np.ones(steps), enforce_stationarity=False, enforce_invertibility=False) mod.impulse_responses(mod.start_params, steps)
# The distribution has seasonal part in it. # We shall run pmd auto-arima to get the orderes from pmdarima import auto_arima auto_arima(df['Employees'], seasonal=True, max_p=2, max_q=2, max_d=2, m=12).summary() Ltrain = 12 train = df[:-Ltrain] test = df[-Ltrain:] from statsmodels.tsa.statespace import sarimax result = sarimax.SARIMAX(train['Employees'], order=(1, 1, 2), seasonal_order=(1, 0, 1, 12), enforce_invertibility=False).fit() result.summary() prediction = result.predict(start=len(train), end=len(df) - 1, type='levels').rename('SARIMA (1, 1, 2') ax = df['Employees'].plot(legend=True) prediction.plot(legend=True) plt.show() len(test) len(prediction) from statsmodels.tools.eval_measures import rmse rmse(test['Employees'], prediction) test['Employees'].mean()
import pandas as pd df = pd.read_csv("weather_data_train_labels.csv",parse_dates =['datetime'], sep =';',decimal=',',infer_datetime_format=True) print(df.dtypes) temp_df = df[["datetime","U_mu"]] print() from statsmodels.tsa.seasonal import seasonal_decompose from matplotlib import pyplot as plt # ETS decomposition result = seasonal_decompose(temp_df["U_mu"],model='multiplicative',extrapolate_trend='freq',freq=365) result.plot() #plt.show() from statsmodels.tsa.statespace import sarimax model = sarimax.SARIMAX(temp_df["U_mu"],order = (0, 1, 1),seasonal_order =(2, 1, 1, 12)) result = model.fit() print(result.summary()) pred = result.get_prediction(start=pd.to_datetime('2012-09-20'),dynamic=False) pred_ci = pred.conf_int() print(pred,pred_ci)
def Sarimax(ts, order, seasonal_order): fit = sms.SARIMAX(ts, order=order,seasonal_order=seasonal_order).fit() fcst = fit.predict(start=121, end=127, dynamic=True) return fcst
def test_smoothed_state_obs_weights_sarimax(use_exog, trend, concentrate_scale, measurement_error): endog = np.array([[0.2, np.nan, 1.2, -0.3, -1.5]]).T exog = np.array([2, 5.3, -1, 3.4, 0.]) if use_exog else None trend_params = [0.1] ar_params = [0.5] exog_params = [1.4] meas_err_params = [1.2] cov_params = [0.8] params = [] if trend in ['c', 't']: params += trend_params if use_exog: params += exog_params params += ar_params if measurement_error: params += meas_err_params if not concentrate_scale: params += cov_params # Fit the models mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend, exog=exog if use_exog else None, concentrate_scale=concentrate_scale, measurement_error=measurement_error) prior_mean = np.array([-0.4]) prior_cov = np.eye(1) * 1.2 mod.ssm.initialize_known(prior_mean, prior_cov) res = mod.smooth(params) # Compute the desiried weights n = mod.nobs m = mod.k_states p = mod.k_endog desired = np.zeros((n, n, m, p)) * np.nan # Here we manually compute the weights by adjusting one observation at a # time for j in range(n): for i in range(p): if np.isnan(endog[j, i]): desired[:, j, :, i] = np.nan else: y = endog.copy() y[j, i] += 1.0 tmp_mod = sarimax.SARIMAX(y, order=(1, 0, 0), trend=trend, exog=exog if use_exog else None, concentrate_scale=concentrate_scale, measurement_error=measurement_error) tmp_mod.ssm.initialize_known(prior_mean, prior_cov) tmp_res = tmp_mod.smooth(params) desired[:, j, :, i] = (tmp_res.smoothed_state.T - res.smoothed_state.T) desired_state_intercept_weights = np.zeros((n, n, m, m)) * np.nan # Here we manually compute the weights by adjusting one state intercept # at a time for j in range(n): for ell in range(m): tmp_mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend, exog=exog if use_exog else None, concentrate_scale=concentrate_scale, measurement_error=measurement_error) tmp_mod.ssm.initialize_known(prior_mean, prior_cov) tmp_mod.update(params) if tmp_mod['state_intercept'].ndim == 1: si = tmp_mod['state_intercept'] tmp_mod['state_intercept'] = np.zeros((mod.k_states, mod.nobs)) tmp_mod['state_intercept', :, :] = si tmp_mod['state_intercept', ell, j] += 1.0 tmp_res = tmp_mod.ssm.smooth() desired_state_intercept_weights[:, j, :, ell] = ( tmp_res.smoothed_state.T - res.smoothed_state.T) desired_prior_weights = np.zeros((n, m, m)) * np.nan # Here we manually compute the weights by adjusting one prior element at # a time for i in range(m): a = prior_mean.copy() a[i] += 1 tmp_mod = sarimax.SARIMAX(endog, order=(1, 0, 0), trend=trend, exog=exog if use_exog else None, concentrate_scale=concentrate_scale, measurement_error=measurement_error) tmp_mod.ssm.initialize_known(a, prior_cov) tmp_res = tmp_mod.smooth(params) desired_prior_weights[:, :, i] = (tmp_res.smoothed_state.T - res.smoothed_state.T) mod.ssm.initialize_known(prior_mean, prior_cov) actual, actual_state_intercept_weights, actual_prior_weights = ( tools.compute_smoothed_state_weights(res)) assert_allclose(actual, desired, atol=1e-12) assert_allclose(actual_state_intercept_weights, desired_state_intercept_weights, atol=1e-12) assert_allclose(actual_prior_weights, desired_prior_weights, atol=1e-12)
def test_append_extend_apply_invalid(): # Test for invalid options to append, extend, and apply niledata = nile.data.load_pandas().data['volume'] niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS') endog1 = niledata.iloc[:20] endog2 = niledata.iloc[20:40] mod = sarimax.SARIMAX(endog1, order=(1, 0, 0), concentrate_scale=True) res1 = mod.smooth([0.5]) assert_raises(ValueError, res1.append, endog2, fit_kwargs={'cov_type': 'approx'}) assert_raises(ValueError, res1.extend, endog2, fit_kwargs={'cov_type': 'approx'}) assert_raises(ValueError, res1.apply, endog2, fit_kwargs={'cov_type': 'approx'}) assert_raises(ValueError, res1.append, endog2, fit_kwargs={'cov_kwds': {}}) assert_raises(ValueError, res1.extend, endog2, fit_kwargs={'cov_kwds': {}}) assert_raises(ValueError, res1.apply, endog2, fit_kwargs={'cov_kwds': {}}) # Test for exception when given a different frequency wrong_freq = niledata.iloc[20:40] wrong_freq.index = pd.date_range(start=niledata.index[0], periods=len(wrong_freq), freq='MS') message = ('Given `endog` does not have an index that extends the index of' ' the model. Expected index frequency is') with pytest.raises(ValueError, match=message): res1.append(wrong_freq) with pytest.raises(ValueError, match=message): res1.extend(wrong_freq) message = ('Given `exog` does not have an index that extends the index of' ' the model. Expected index frequency is') with pytest.raises(ValueError, match=message): res1.append(endog2, exog=wrong_freq) message = 'The indices for endog and exog are not aligned' with pytest.raises(ValueError, match=message): res1.extend(endog2, exog=wrong_freq) # Test for exception when given the same frequency but not right after the # end of model not_cts = niledata.iloc[21:41] message = ('Given `endog` does not have an index that extends the index of' ' the model.$') with pytest.raises(ValueError, match=message): res1.append(not_cts) with pytest.raises(ValueError, match=message): res1.extend(not_cts) message = ('Given `exog` does not have an index that extends the index of' ' the model.$') with pytest.raises(ValueError, match=message): res1.append(endog2, exog=not_cts) message = 'The indices for endog and exog are not aligned' with pytest.raises(ValueError, match=message): res1.extend(endog2, exog=not_cts) # # Test for problems with non-date indexes endog3 = pd.Series(niledata.iloc[:20].values) endog4 = pd.Series(niledata.iloc[:40].values)[20:] mod2 = sarimax.SARIMAX(endog3, order=(1, 0, 0), exog=endog3, concentrate_scale=True) res2 = mod2.smooth([0.2, 0.5]) # Test for exception when given the same frequency but not right after the # end of model not_cts = pd.Series(niledata[:41].values)[21:] message = ('Given `endog` does not have an index that extends the index of' ' the model.$') with pytest.raises(ValueError, match=message): res2.append(not_cts) with pytest.raises(ValueError, match=message): res2.extend(not_cts) message = ('Given `exog` does not have an index that extends the index of' ' the model.$') with pytest.raises(ValueError, match=message): res2.append(endog4, exog=not_cts) message = 'The indices for endog and exog are not aligned' with pytest.raises(ValueError, match=message): res2.extend(endog4, exog=not_cts)
def test_start_end_dates(use_periods): endog = dta['infl'].copy() if use_periods: index_range = pd.period_range else: def index_range(*args, **kwargs): return pd.period_range(*args, **kwargs).to_timestamp(freq='Q') endog = endog.to_timestamp(freq='Q') mod = sarimax.SARIMAX(endog.iloc[:-1]) res = mod.smooth([0.5, 1.0]) # Default is the first out-of-sample period news = res.news(endog) desired = index_range(start='2009Q2', periods=1, freq='Q') assert_(news.total_impacts.index.equals(desired)) # Start (dates), periods news = res.news(endog, start='2009Q1', periods=1) desired = index_range(start='2009Q1', periods=1, freq='Q') assert_(news.total_impacts.index.equals(desired)) news = res.news(endog, start='2009Q1', periods=2) desired = index_range(start='2009Q1', periods=2, freq='Q') assert_(news.total_impacts.index.equals(desired)) # Start (int), periods news = res.news(endog, start=mod.nobs - 1, periods=1) desired = index_range(start='2009Q2', periods=1, freq='Q') assert_(news.total_impacts.index.equals(desired)) news = res.news(endog, start=mod.nobs - 2, periods=2) desired = index_range(start='2009Q1', periods=2, freq='Q') assert_(news.total_impacts.index.equals(desired)) # End (dates), periods news = res.news(endog, end='2009Q1', periods=1) desired = index_range(end='2009Q1', periods=1, freq='Q') assert_(news.total_impacts.index.equals(desired)) news = res.news(endog, end='2009Q1', periods=2) desired = index_range(end='2009Q1', periods=2, freq='Q') assert_(news.total_impacts.index.equals(desired)) # End (int), periods news = res.news(endog, end=mod.nobs - 1, periods=1) desired = index_range(end='2009Q2', periods=1, freq='Q') assert_(news.total_impacts.index.equals(desired)) news = res.news(endog, end=mod.nobs - 2, periods=2) desired = index_range(end='2009Q1', periods=2, freq='Q') assert_(news.total_impacts.index.equals(desired)) # Start (dates), end (dates) news = res.news(endog, start='2009Q1', end='2009Q1') desired = index_range(start='2009Q1', end='2009Q1', freq='Q') assert_(news.total_impacts.index.equals(desired)) news = res.news(endog, start='2009Q1', end='2009Q2') desired = index_range(start='2009Q1', end='2009Q2', freq='Q') assert_(news.total_impacts.index.equals(desired)) # Start (dates), end (int) news = res.news(endog, start='2009Q1', end=mod.nobs - 2) desired = index_range(start='2009Q1', end='2009Q1', freq='Q') assert_(news.total_impacts.index.equals(desired)) predicted = res.predict(start='2009Q1', end=mod.nobs - 2) assert_(news.total_impacts.index.equals(predicted.index)) news = res.news(endog, start='2009Q1', end=mod.nobs - 1) desired = index_range(start='2009Q1', end='2009Q2', freq='Q') assert_(news.total_impacts.index.equals(desired)) predicted = res.predict(start='2009Q1', end=mod.nobs - 1) assert_(news.total_impacts.index.equals(predicted.index)) # Start (int), end (dates) news = res.news(endog, start=mod.nobs - 2, end='2009Q1') desired = index_range(start='2009Q1', end='2009Q1', freq='Q') assert_(news.total_impacts.index.equals(desired)) predicted = res.predict(start=mod.nobs - 2, end='2009Q1') assert_(news.total_impacts.index.equals(predicted.index)) news = res.news(endog, start=mod.nobs - 2, end='2009Q2') desired = index_range(start='2009Q1', end='2009Q2', freq='Q') assert_(news.total_impacts.index.equals(desired)) predicted = res.predict(start=mod.nobs - 2, end='2009Q2') assert_(news.total_impacts.index.equals(predicted.index)) # Negative indexes # Note that negative indexes are always computed relative to the updated # sample, which in this case is 1 observation more than is in `mod.nobs` total_nobs = len(endog) assert_equal(total_nobs, mod.nobs + 1) # Start (dates), end (int) desired = index_range(start='2009Q1', end='2009Q1', freq='Q') for end in [mod.nobs - 2, total_nobs - 3, -3]: news = res.news(endog, start='2009Q1', end=end) assert_(news.total_impacts.index.equals(desired)) # Note: predict does not allow negative indexing if end > 0: predicted = res.predict(start='2009Q1', end=end) assert_(news.total_impacts.index.equals(predicted.index)) # Start (int), end (dates) desired = index_range(start='2009Q1', end='2009Q1', freq='Q') for start in [mod.nobs - 2, total_nobs - 3, -3]: news = res.news(endog, start=start, end='2009Q1') assert_(news.total_impacts.index.equals(desired)) # Note: predict does not allow negative indexing if end > 0: predicted = res.predict(start=start, end='2009Q1') assert_(news.total_impacts.index.equals(predicted.index))
def test_score_analytic_ar1(): # Test the score against the analytic score for an AR(1) model with 2 # observations # Let endog = [1, 0.5], params=[0, 1] mod = sarimax.SARIMAX([1, 0.5], order=(1, 0, 0)) def partial_phi(phi, sigma2): return -0.5 * (phi**2 + 2 * phi * sigma2 - 1) / (sigma2 * (1 - phi**2)) def partial_sigma2(phi, sigma2): return -0.5 * (2 * sigma2 + phi - 1.25) / (sigma2**2) params = np.r_[0., 2] # Compute the analytic score analytic_score = np.r_[partial_phi(params[0], params[1]), partial_sigma2(params[0], params[1])] # Check each of the approximations, transformed parameters approx_cs = mod.score(params, transformed=True, approx_complex_step=True) assert_allclose(approx_cs, analytic_score) approx_fd = mod.score(params, transformed=True, approx_complex_step=False) assert_allclose(approx_fd, analytic_score, atol=1e-5) approx_fd_centered = (mod.score(params, transformed=True, approx_complex_step=False, approx_centered=True)) assert_allclose(approx_fd, analytic_score, atol=1e-5) harvey_cs = mod.score(params, transformed=True, method='harvey', approx_complex_step=True) assert_allclose(harvey_cs, analytic_score) harvey_fd = mod.score(params, transformed=True, method='harvey', approx_complex_step=False) assert_allclose(harvey_fd, analytic_score, atol=1e-5) harvey_fd_centered = mod.score(params, transformed=True, method='harvey', approx_complex_step=False, approx_centered=True) assert_allclose(harvey_fd_centered, analytic_score, atol=1e-5) # Check the approximations for untransformed parameters. The analytic # check now comes from chain rule with the analytic derivative of the # transformation # if L* is the likelihood evaluated at untransformed parameters and # L is the likelihood evaluated at transformed parameters, then we have: # L*(u) = L(t(u)) # and then # L'*(u) = L'(t(u)) * t'(u) def partial_transform_phi(phi): return -1. / (1 + phi**2)**(3. / 2) def partial_transform_sigma2(sigma2): return 2. * sigma2 uparams = mod.untransform_params(params) analytic_score = np.dot( np.diag(np.r_[partial_transform_phi(uparams[0]), partial_transform_sigma2(uparams[1])]), np.r_[partial_phi(params[0], params[1]), partial_sigma2(params[0], params[1])]) approx_cs = mod.score(uparams, transformed=False, approx_complex_step=True) assert_allclose(approx_cs, analytic_score) approx_fd = mod.score(uparams, transformed=False, approx_complex_step=False) assert_allclose(approx_fd, analytic_score, atol=1e-5) approx_fd_centered = (mod.score(uparams, transformed=False, approx_complex_step=False, approx_centered=True)) assert_allclose(approx_fd_centered, analytic_score, atol=1e-5) harvey_cs = mod.score(uparams, transformed=False, method='harvey', approx_complex_step=True) assert_allclose(harvey_cs, analytic_score) harvey_fd = mod.score(uparams, transformed=False, method='harvey', approx_complex_step=False) assert_allclose(harvey_fd, analytic_score, atol=1e-5) harvey_fd_centered = mod.score(uparams, transformed=False, method='harvey', approx_complex_step=False, approx_centered=True) assert_allclose(harvey_fd_centered, analytic_score, atol=1e-5) # Check the Hessian: these approximations are not very good, particularly # when phi is close to 0 params = np.r_[0.5, 1.] def hessian(phi, sigma2): hessian = np.zeros((2, 2)) hessian[0, 0] = (-phi**2 - 1) / (phi**2 - 1)**2 hessian[1, 0] = hessian[0, 1] = -1 / (2 * sigma2**2) hessian[1, 1] = (sigma2 + phi - 1.25) / sigma2**3 return hessian analytic_hessian = hessian(params[0], params[1]) with warnings.catch_warnings(): warnings.simplefilter("ignore") assert_allclose(mod._hessian_complex_step(params) * 2, analytic_hessian, atol=1e-1) assert_allclose(mod._hessian_finite_difference(params) * 2, analytic_hessian, atol=1e-1)
def test_mixed_basic(): # Performs a number of tests for setting different initialization for # different blocks # - 2-dimensional - endog = np.zeros(10) mod = sarimax.SARIMAX(endog, order=(2, 0, 0)) phi = [0.5, -0.2] sigma2 = 2. mod.update(np.r_[phi, sigma2]) # known has constant init = Initialization(mod.k_states) init.set(0, 'known', constant=[1.2]) # > known has constant init.set(1, 'known', constant=[-0.2]) check_initialization(mod, init, [1.2, -0.2], np.diag([0, 0]), np.diag([0, 0])) # > diffuse init.unset(1) init.set(1, 'diffuse') check_initialization(mod, init, [1.2, 0], np.diag([0, 1]), np.diag([0, 0])) # > approximate diffuse init.unset(1) init.set(1, 'approximate_diffuse') check_initialization(mod, init, [1.2, 0], np.diag([0, 0]), np.diag([0, 1e6])) # > stationary init.unset(1) init.set(1, 'stationary') check_initialization(mod, init, [1.2, 0], np.diag([0, 0]), np.diag([0, 0])) # known has cov init = Initialization(mod.k_states) init.set(0, 'known', stationary_cov=np.diag([1])) init.set(1, 'diffuse') check_initialization(mod, init, [0, 0], np.diag([0, 1]), np.diag([1, 0])) # known has both init = Initialization(mod.k_states) init.set(0, 'known', constant=[1.2], stationary_cov=np.diag([1])) init.set(1, 'diffuse') check_initialization(mod, init, [1.2, 0], np.diag([0, 1]), np.diag([1, 0])) # - 3-dimensional - endog = np.zeros(10) mod = sarimax.SARIMAX(endog, order=(3, 0, 0)) # known has constant init = Initialization(mod.k_states) init.set((0, 2), 'known', constant=[1.2, -0.2]) init.set(2, 'diffuse') check_initialization(mod, init, [1.2, -0.2, 0], np.diag([0, 0, 1]), np.diag([0, 0, 0])) # known has cov init = Initialization(mod.k_states) init.set((0, 2), 'known', stationary_cov=np.diag([1, 4.2])) init.set(2, 'diffuse') check_initialization(mod, init, [0, 0, 0], np.diag([0, 0, 1]), np.diag([1, 4.2, 0])) # known has both init = Initialization(mod.k_states) init.set((0, 2), 'known', constant=[1.2, -0.2], stationary_cov=np.diag([1, 4.2])) init.set(2, 'diffuse') check_initialization(mod, init, [1.2, -0.2, 0], np.diag([0, 0, 1]), np.diag([1, 4.2, 0]))
def test_lutkepohl_information_criteria(): # Setup dataset, use Lutkepohl data dta = pd.DataFrame(results_var_misc.lutkepohl_data, columns=['inv', 'inc', 'consump'], index=pd.date_range('1960-01-01', '1982-10-01', freq='QS')) dta['dln_inv'] = np.log(dta['inv']).diff() dta['dln_inc'] = np.log(dta['inc']).diff() dta['dln_consump'] = np.log(dta['consump']).diff() endog = dta.loc['1960-04-01':'1978-10-01', ['dln_inv', 'dln_inc', 'dln_consump']] # AR model - SARIMAX # (use loglikelihood_burn=1 to mimic conditional MLE used by Stata's var # command). true = results_var_misc.lutkepohl_ar1_lustats mod = sarimax.SARIMAX(endog['dln_inv'], order=(1, 0, 0), trend='c', loglikelihood_burn=1) res = mod.filter(true['params']) assert_allclose(res.llf, true['loglike']) # Test the Lutkepohl ICs # Note: for the Lutkepohl ICs, Stata only counts the AR coefficients as # estimated parameters for the purposes of information criteria, whereas we # count all parameters including scale and constant, so we need to adjust # for that aic = (res.info_criteria('aic', method='lutkepohl') - 2 * 2 / res.nobs_effective) bic = (res.info_criteria('bic', method='lutkepohl') - 2 * np.log(res.nobs_effective) / res.nobs_effective) hqic = (res.info_criteria('hqic', method='lutkepohl') - 2 * 2 * np.log(np.log(res.nobs_effective)) / res.nobs_effective) assert_allclose(aic, true['aic']) assert_allclose(bic, true['bic']) assert_allclose(hqic, true['hqic']) # Test the non-Lutkepohl ICs # Note: for the non-Lutkepohl ICs, Stata does not count the scale as an # estimated parameter, but does count the constant term, for the # purposes of information criteria, whereas we count both, so we need to # adjust for that true = results_var_misc.lutkepohl_ar1 aic = res.aic - 2 bic = res.bic - np.log(res.nobs_effective) assert_allclose(aic, true['estat_aic']) assert_allclose(bic, true['estat_bic']) aic = res.info_criteria('aic') - 2 bic = res.info_criteria('bic') - np.log(res.nobs_effective) assert_allclose(aic, true['estat_aic']) assert_allclose(bic, true['estat_bic']) # Note: could also test the "dfk" (degree of freedom corrections), but not # really necessary since they just rescale things a bit # VAR model - VARMAX # (use loglikelihood_burn=1 to mimic conditional MLE used by Stata's var # command). true = results_var_misc.lutkepohl_var1_lustats mod = varmax.VARMAX( endog, order=(1, 0), trend='n', error_cov_type='unstructured', loglikelihood_burn=1, ) res = mod.filter(true['params']) assert_allclose(res.llf, true['loglike']) # Test the Lutkepohl ICs # Note: for the Lutkepohl ICs, Stata only counts the AR coefficients as # estimated parameters for the purposes of information criteria, whereas we # count all parameters including the elements of the covariance matrix, so # we need to adjust for that aic = (res.info_criteria('aic', method='lutkepohl') - 2 * 6 / res.nobs_effective) bic = (res.info_criteria('bic', method='lutkepohl') - 6 * np.log(res.nobs_effective) / res.nobs_effective) hqic = (res.info_criteria('hqic', method='lutkepohl') - 2 * 6 * np.log(np.log(res.nobs_effective)) / res.nobs_effective) assert_allclose(aic, true['aic']) assert_allclose(bic, true['bic']) assert_allclose(hqic, true['hqic']) # Test the non-Lutkepohl ICs # Note: for the non-Lutkepohl ICs, Stata does not count the elements of the # covariance matrix as estimated parameters for the purposes of information # criteria, whereas we count both, so we need to adjust for that true = results_var_misc.lutkepohl_var1 aic = res.aic - 2 * 6 bic = res.bic - 6 * np.log(res.nobs_effective) assert_allclose(aic, true['estat_aic']) assert_allclose(bic, true['estat_bic']) aic = res.info_criteria('aic') - 2 * 6 bic = res.info_criteria('bic') - 6 * np.log(res.nobs_effective) assert_allclose(aic, true['estat_aic']) assert_allclose(bic, true['estat_bic'])
def test_structural(): # Clear warnings structural.__warningregistry__ = {} np.random.seed(38947) nobs = 100 eps = np.random.normal(size=nobs) exog = np.random.normal(size=nobs) eps1 = np.zeros(nobs) eps2 = np.zeros(nobs) eps2[49] = 1 eps3 = np.zeros(nobs) eps3[50:] = 1 # AR(1) mod1 = structural.UnobservedComponents([0], autoregressive=1) mod2 = sarimax.SARIMAX([0], order=(1, 0, 0)) actual = mod1.simulate([1, 0.5], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([0.5, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # ARX(1) mod1 = structural.UnobservedComponents(np.zeros(nobs), exog=exog, autoregressive=1) mod2 = sarimax.SARIMAX(np.zeros(nobs), exog=exog, order=(1, 0, 0)) actual = mod1.simulate([1, 0.5, 0.2], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) desired = mod2.simulate([0.2, 0.5, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # Irregular mod = structural.UnobservedComponents([0], 'irregular') actual = mod.simulate([1.], nobs, measurement_shocks=eps, initial_state=np.zeros(mod.k_states)) assert_allclose(actual, eps) # Fixed intercept # (in practice this is a deterministic constant, because an irregular # component must be added) with warnings.catch_warnings(): warnings.simplefilter("ignore") mod = structural.UnobservedComponents([0], 'fixed intercept') actual = mod.simulate([1.], nobs, measurement_shocks=eps, initial_state=[10]) assert_allclose(actual, 10 + eps) # Deterministic constant mod = structural.UnobservedComponents([0], 'deterministic constant') actual = mod.simulate([1.], nobs, measurement_shocks=eps, initial_state=[10]) assert_allclose(actual, 10 + eps) # Local level mod = structural.UnobservedComponents([0], 'local level') actual = mod.simulate([1., 1.], nobs, measurement_shocks=eps, state_shocks=eps2, initial_state=np.zeros(mod.k_states)) assert_allclose(actual, eps + eps3) # Random walk mod = structural.UnobservedComponents([0], 'random walk') actual = mod.simulate([1.], nobs, measurement_shocks=eps, state_shocks=eps2, initial_state=np.zeros(mod.k_states)) assert_allclose(actual, eps + eps3) # Fixed slope # (in practice this is a deterministic trend, because an irregular # component must be added) with warnings.catch_warnings(): warnings.simplefilter("ignore") mod = structural.UnobservedComponents([0], 'fixed slope') actual = mod.simulate([1., 1.], nobs, measurement_shocks=eps, state_shocks=eps2, initial_state=[0, 1]) assert_allclose(actual, eps + np.arange(100)) # Deterministic trend mod = structural.UnobservedComponents([0], 'deterministic trend') actual = mod.simulate([1.], nobs, measurement_shocks=eps, state_shocks=eps2, initial_state=[0, 1]) assert_allclose(actual, eps + np.arange(100)) # Local linear deterministic trend mod = structural.UnobservedComponents([0], 'local linear deterministic trend') actual = mod.simulate([1., 1.], nobs, measurement_shocks=eps, state_shocks=eps2, initial_state=[0, 1]) desired = eps + np.r_[np.arange(50), 1 + np.arange(50, 100)] assert_allclose(actual, desired) # Random walk with drift mod = structural.UnobservedComponents([0], 'random walk with drift') actual = mod.simulate([1.], nobs, state_shocks=eps2, initial_state=[0, 1]) desired = np.r_[np.arange(50), 1 + np.arange(50, 100)] assert_allclose(actual, desired) # Local linear trend mod = structural.UnobservedComponents([0], 'local linear trend') actual = mod.simulate([1., 1., 1.], nobs, measurement_shocks=eps, state_shocks=np.c_[eps2, eps1], initial_state=[0, 1]) desired = eps + np.r_[np.arange(50), 1 + np.arange(50, 100)] assert_allclose(actual, desired) actual = mod.simulate([1., 1., 1.], nobs, measurement_shocks=eps, state_shocks=np.c_[eps1, eps2], initial_state=[0, 1]) desired = eps + np.r_[np.arange(50), np.arange(50, 150, 2)] assert_allclose(actual, desired) # Smooth trend mod = structural.UnobservedComponents([0], 'smooth trend') actual = mod.simulate([1., 1.], nobs, measurement_shocks=eps, state_shocks=eps1, initial_state=[0, 1]) desired = eps + np.r_[np.arange(100)] assert_allclose(actual, desired) actual = mod.simulate([1., 1.], nobs, measurement_shocks=eps, state_shocks=eps2, initial_state=[0, 1]) desired = eps + np.r_[np.arange(50), np.arange(50, 150, 2)] assert_allclose(actual, desired) # Random trend mod = structural.UnobservedComponents([0], 'random trend') actual = mod.simulate([1., 1.], nobs, state_shocks=eps1, initial_state=[0, 1]) desired = np.r_[np.arange(100)] assert_allclose(actual, desired) actual = mod.simulate([1., 1.], nobs, state_shocks=eps2, initial_state=[0, 1]) desired = np.r_[np.arange(50), np.arange(50, 150, 2)] assert_allclose(actual, desired) # Seasonal (deterministic) mod = structural.UnobservedComponents([0], 'irregular', seasonal=2, stochastic_seasonal=False) actual = mod.simulate([1.], nobs, measurement_shocks=eps, initial_state=[10]) desired = eps + np.tile([10, -10], 50) assert_allclose(actual, desired) # Seasonal (stochastic) mod = structural.UnobservedComponents([0], 'irregular', seasonal=2) actual = mod.simulate([1., 1.], nobs, measurement_shocks=eps, state_shocks=eps2, initial_state=[10]) desired = eps + np.r_[np.tile([10, -10], 25), np.tile([11, -11], 25)] assert_allclose(actual, desired) # Cycle (deterministic) mod = structural.UnobservedComponents([0], 'irregular', cycle=True) actual = mod.simulate([1., 1.2], nobs, measurement_shocks=eps, initial_state=[1, 0]) x1 = [np.cos(1.2), np.sin(1.2)] x2 = [-np.sin(1.2), np.cos(1.2)] T = np.array([x1, x2]) desired = eps states = [1, 0] for i in range(nobs): desired[i] += states[0] states = np.dot(T, states) assert_allclose(actual, desired) # Cycle (stochastic) mod = structural.UnobservedComponents([0], 'irregular', cycle=True, stochastic_cycle=True) actual = mod.simulate([1., 1., 1.2], nobs, measurement_shocks=eps, state_shocks=np.c_[eps2, eps2], initial_state=[1, 0]) x1 = [np.cos(1.2), np.sin(1.2)] x2 = [-np.sin(1.2), np.cos(1.2)] T = np.array([x1, x2]) desired = eps states = [1, 0] for i in range(nobs): desired[i] += states[0] states = np.dot(T, states) + eps2[i] assert_allclose(actual, desired)
def test_varmax(): np.random.seed(371934) nobs = 100 eps = np.random.normal(size=nobs) exog = np.random.normal(size=(nobs, 1)) eps1 = np.zeros(nobs) eps2 = np.zeros(nobs) eps2[49] = 1 eps3 = np.zeros(nobs) eps3[50:] = 1 # VAR(2) - single series mod1 = varmax.VARMAX([[0]], order=(2, 0), trend='n') mod2 = sarimax.SARIMAX([0], order=(2, 0, 0)) actual = mod1.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VMA(2) - single series mod1 = varmax.VARMAX([[0]], order=(0, 2), trend='n') mod2 = sarimax.SARIMAX([0], order=(0, 0, 2)) actual = mod1.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([0.5, 0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VARMA(2, 2) - single series warning = EstimationWarning match = r'VARMA\(p,q\) models is not' with pytest.warns(warning, match=match): mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='n') mod2 = sarimax.SARIMAX([0], order=(2, 0, 2)) actual = mod1.simulate([0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VARMA(2, 2) + trend - single series warning = EstimationWarning match = r'VARMA\(p,q\) models is not' with pytest.warns(warning, match=match): mod1 = varmax.VARMAX([[0]], order=(2, 2), trend='c') mod2 = sarimax.SARIMAX([0], order=(2, 0, 2), trend='c') actual = mod1.simulate([10, 0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod1.k_states)) desired = mod2.simulate([10, 0.5, 0.2, 0.1, -0.2, 1], nobs, state_shocks=eps, initial_state=np.zeros(mod2.k_states)) assert_allclose(actual, desired) # VAR(1) transition = np.array([[0.5, 0.1], [-0.1, 0.2]]) mod = varmax.VARMAX([[0, 0]], order=(1, 0), trend='n') actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.], nobs, state_shocks=np.c_[eps1, eps1], initial_state=np.zeros(mod.k_states)) assert_allclose(actual, 0) actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1.], nobs, state_shocks=np.c_[eps1, eps1], initial_state=[1, 1]) desired = np.zeros((nobs, 2)) state = np.r_[1, 1] for i in range(nobs): desired[i] = state state = np.dot(transition, state) assert_allclose(actual, desired) # VAR(1) + measurement error mod = varmax.VARMAX([[0, 0]], order=(1, 0), trend='n', measurement_error=True) actual = mod.simulate(np.r_[transition.ravel(), 1., 0, 1., 1., 1.], nobs, measurement_shocks=np.c_[eps, eps], state_shocks=np.c_[eps1, eps1], initial_state=np.zeros(mod.k_states)) assert_allclose(actual, np.c_[eps, eps]) # VARX(1) mod = varmax.VARMAX(np.zeros((nobs, 2)), order=(1, 0), trend='n', exog=exog) actual = mod.simulate(np.r_[transition.ravel(), 5, -2, 1., 0, 1.], nobs, state_shocks=np.c_[eps1, eps1], initial_state=[1, 1]) desired = np.zeros((nobs, 2)) state = np.r_[1, 1] for i in range(nobs): desired[i] = state if i < nobs - 1: state = exog[i + 1] * [5, -2] + np.dot(transition, state) assert_allclose(actual, desired) # VMA(1) # TODO: This is just a smoke test mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)), order=(0, 1), trend='n') mod.simulate(mod.start_params, nobs) # VARMA(2, 2) + trend + exog # TODO: This is just a smoke test warning = EstimationWarning match = r"VARMA\(p,q\) models is not" with pytest.warns(warning, match=match): mod = varmax.VARMAX(np.random.normal(size=(nobs, 2)), order=(2, 2), trend='c', exog=exog) mod.simulate(mod.start_params, nobs)
def test_arma_direct(): # Tests of an ARMA model simulation against direct construction # This is useful for e.g. trend components # Note: the first elements of the generated SARIMAX datasets are based on # the initial state, so we don't include them in the comparisons np.random.seed(10239) nobs = 100 eps = np.random.normal(size=nobs) exog = np.random.normal(size=nobs) # AR(1) mod = sarimax.SARIMAX([0], order=(1, 0, 0)) actual = mod.simulate([0.5, 1.], nobs + 1, state_shocks=np.r_[eps, 0], initial_state=np.zeros(mod.k_states)) desired = np.zeros(nobs) for i in range(nobs): if i == 0: desired[i] = eps[i] else: desired[i] = 0.5 * desired[i - 1] + eps[i] assert_allclose(actual[1:], desired) # MA(1) mod = sarimax.SARIMAX([0], order=(0, 0, 1)) actual = mod.simulate([0.5, 1.], nobs + 1, state_shocks=np.r_[eps, 0], initial_state=np.zeros(mod.k_states)) desired = np.zeros(nobs) for i in range(nobs): if i == 0: desired[i] = eps[i] else: desired[i] = 0.5 * eps[i - 1] + eps[i] assert_allclose(actual[1:], desired) # ARMA(1, 1) mod = sarimax.SARIMAX([0], order=(1, 0, 1)) actual = mod.simulate([0.5, 0.2, 1.], nobs + 1, state_shocks=np.r_[eps, 0], initial_state=np.zeros(mod.k_states)) desired = np.zeros(nobs) for i in range(nobs): if i == 0: desired[i] = eps[i] else: desired[i] = 0.5 * desired[i - 1] + 0.2 * eps[i - 1] + eps[i] assert_allclose(actual[1:], desired) # ARMA(1, 1) + intercept mod = sarimax.SARIMAX([0], order=(1, 0, 1), trend='c') actual = mod.simulate([1.3, 0.5, 0.2, 1.], nobs + 1, state_shocks=np.r_[eps, 0], initial_state=np.zeros(mod.k_states)) desired = np.zeros(nobs) for i in range(nobs): trend = 1.3 if i == 0: desired[i] = trend + eps[i] else: desired[i] = (trend + 0.5 * desired[i - 1] + 0.2 * eps[i - 1] + eps[i]) assert_allclose(actual[1:], desired) # ARMA(1, 1) + intercept + time trend # Note: to allow time-varying SARIMAX to simulate 101 observations, need to # give it 101 observations up front mod = sarimax.SARIMAX(np.zeros(nobs + 1), order=(1, 0, 1), trend='ct') actual = mod.simulate([1.3, 0.2, 0.5, 0.2, 1.], nobs + 1, state_shocks=np.r_[eps, 0], initial_state=np.zeros(mod.k_states)) desired = np.zeros(nobs) for i in range(nobs): trend = 1.3 + 0.2 * (i + 1) if i == 0: desired[i] = trend + eps[i] else: desired[i] = (trend + 0.5 * desired[i - 1] + 0.2 * eps[i - 1] + eps[i]) assert_allclose(actual[1:], desired) # ARMA(1, 1) + intercept + time trend + exog # Note: to allow time-varying SARIMAX to simulate 101 observations, need to # give it 101 observations up front # Note: the model is regression with SARIMAX errors, so the exog is # introduced into the observation equation rather than the ARMA part mod = sarimax.SARIMAX(np.zeros(nobs + 1), exog=np.r_[0, exog], order=(1, 0, 1), trend='ct') actual = mod.simulate([1.3, 0.2, -0.5, 0.5, 0.2, 1.], nobs + 1, state_shocks=np.r_[eps, 0], initial_state=np.zeros(mod.k_states)) desired = np.zeros(nobs) for i in range(nobs): trend = 1.3 + 0.2 * (i + 1) if i == 0: desired[i] = trend + eps[i] else: desired[i] = (trend + 0.5 * desired[i - 1] + 0.2 * eps[i - 1] + eps[i]) desired = desired - 0.5 * exog assert_allclose(actual[1:], desired)
def test_seasonal_arima(self): model_combin = [[(3, 1, 1), (3, 1, 1, 12), 't', True, True, False, True, False, False, True, False], [(3, 1, 1), (3, 1, 1, 12), 't', True, True, False, False, False, False, False, False], [(3, 1, 1), (3, 1, 1, 12), 't', True, False, True, True, False, False, True, False], [(3, 1, 1), (3, 1, 1, 12), 't', True, False, True, False, False, False, False, False], [(3, 1, 1), (3, 1, 1, 12), 't', False, True, False, True, False, False, True, False], [(3, 1, 1), (3, 1, 1, 12), 't', False, True, False, False, False, False, False, False], [(3, 1, 1), (3, 1, 1, 12), 't', False, False, True, True, False, False, True, False], [(3, 1, 1), (3, 1, 1, 12), 't', False, False, True, False, False, False, False, False]] # no of cars sold data = [ 112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140, 145, 150, 178, 163, 172, 178, 199, 199, 184, 162, 146, 166, 171, 180, 193, 181, 183, 218, 230, 242, 209, 191, 172, 194, 196, 196, 236, 235, 229, 243, 264, 272, 237, 211, 180, 201, 204, 188, 235, 227, 234, 264, 302, 293, 259, 229, 203, 229, 242, 233, 267, 269, 270, 315, 364, 347, 312, 274, 237, 278, 284, 277, 317, 313, 318, 374, 413, 405, 355, 306, 271, 306, 315, 301, 356, 348, 355, 422, 465, 467, 404, 347, 305, 336, 340, 318, 362, 348, 363, 435, 491, 505, 404, 359, 310, 337, 360, 342, 406, 396, 420, 472, 548, 559, 463, 407, 362, 405, 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390, 432 ] index = pd.DatetimeIndex(start='1949-01-01', end='1960-12-01', freq='MS') ts_data = pd.Series(data, index) ts_data.index.name = 'datetime_index' ts_data.name = 'n_passengers' c = 0 for x in model_combin: try: model = sarimax.SARIMAX(endog=ts_data, exog=None, order=x[0], seasonal_order=x[1], trend=x[2], measurement_error=x[3], time_varying_regression=x[4], mle_regression=x[5], simple_differencing=x[6], enforce_stationarity=x[7], enforce_invertibility=x[8], hamilton_representation=x[9], concentrate_scale=x[10]) result = model.fit() try: c = c + 1 file_name = 'seasonal_arima' + str(c) + '.pmml' ArimaToPMML(ts_data, model, result, file_name) except: continue finally: exported = os.path.isfile(file_name) self.assertEqual(exported, True) if (not exported): break except: continue
def test_impulse_responses(): # Test for impulse response functions # Random walk: 1-unit impulse response (i.e. non-orthogonalized irf) is 1 # for all periods mod = KalmanFilter(k_endog=1, k_states=1) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. mod['state_cov', 0, 0] = 2. actual = mod.impulse_responses(steps=10) desired = np.ones((11, 1)) assert_allclose(actual, desired) # Random walk: 2-unit impulse response (i.e. non-orthogonalized irf) is 2 # for all periods mod = KalmanFilter(k_endog=1, k_states=1) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. mod['state_cov', 0, 0] = 2. actual = mod.impulse_responses(steps=10, impulse=[2]) desired = np.ones((11, 1)) * 2 assert_allclose(actual, desired) # Random walk: 1-standard-deviation response (i.e. orthogonalized irf) is # sigma for all periods (here sigma^2 = 2) mod = KalmanFilter(k_endog=1, k_states=1) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. mod['state_cov', 0, 0] = 2. actual = mod.impulse_responses(steps=10, orthogonalized=True) desired = np.ones((11, 1)) * 2**0.5 assert_allclose(actual, desired) # Random walk: 1-standard-deviation cumulative response (i.e. cumulative # orthogonalized irf) mod = KalmanFilter(k_endog=1, k_states=1) mod['design', 0, 0] = 1. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. mod['state_cov', 0, 0] = 2. actual = mod.impulse_responses(steps=10, orthogonalized=True, cumulative=True) desired = np.cumsum(np.ones((11, 1)) * 2**0.5)[:, np.newaxis] actual = mod.impulse_responses(steps=10, impulse=[1], orthogonalized=True, cumulative=True) desired = np.cumsum(np.ones((11, 1)) * 2**0.5)[:, np.newaxis] assert_allclose(actual, desired) # Random walk: 1-unit impulse response (i.e. non-orthogonalized irf) is 1 # for all periods, even when intercepts are present mod = KalmanFilter(k_endog=1, k_states=1) mod['state_intercept', 0] = 100. mod['design', 0, 0] = 1. mod['obs_intercept', 0] = -1000. mod['transition', 0, 0] = 1. mod['selection', 0, 0] = 1. mod['state_cov', 0, 0] = 2. actual = mod.impulse_responses(steps=10) desired = np.ones((11, 1)) assert_allclose(actual, desired) # Univariate model (random walk): test that an error is thrown when # a multivariate or empty "impulse" is sent mod = KalmanFilter(k_endog=1, k_states=1) assert_raises(ValueError, mod.impulse_responses, impulse=1) assert_raises(ValueError, mod.impulse_responses, impulse=[1, 1]) assert_raises(ValueError, mod.impulse_responses, impulse=[]) # Univariate model with two uncorrelated shocks mod = KalmanFilter(k_endog=1, k_states=2) mod['design', 0, 0:2] = 1. mod['transition', :, :] = np.eye(2) mod['selection', :, :] = np.eye(2) mod['state_cov', :, :] = np.eye(2) desired = np.ones((11, 1)) actual = mod.impulse_responses(steps=10, impulse=0) assert_allclose(actual, desired) actual = mod.impulse_responses(steps=10, impulse=[1, 0]) assert_allclose(actual, desired) actual = mod.impulse_responses(steps=10, impulse=1) assert_allclose(actual, desired) actual = mod.impulse_responses(steps=10, impulse=[0, 1]) assert_allclose(actual, desired) # In this case (with sigma=sigma^2=1), orthogonalized is the same as not actual = mod.impulse_responses(steps=10, impulse=0, orthogonalized=True) assert_allclose(actual, desired) actual = mod.impulse_responses(steps=10, impulse=[1, 0], orthogonalized=True) assert_allclose(actual, desired) actual = mod.impulse_responses(steps=10, impulse=[0, 1], orthogonalized=True) assert_allclose(actual, desired) # Univariate model with two correlated shocks mod = KalmanFilter(k_endog=1, k_states=2) mod['design', 0, 0:2] = 1. mod['transition', :, :] = np.eye(2) mod['selection', :, :] = np.eye(2) mod['state_cov', :, :] = np.array([[1, 0.5], [0.5, 1.25]]) desired = np.ones((11, 1)) # Non-orthogonalized (i.e. 1-unit) impulses still just generate 1's actual = mod.impulse_responses(steps=10, impulse=0) assert_allclose(actual, desired) actual = mod.impulse_responses(steps=10, impulse=1) assert_allclose(actual, desired) # Orthogonalized (i.e. 1-std-dev) impulses now generate different responses actual = mod.impulse_responses(steps=10, impulse=0, orthogonalized=True) assert_allclose(actual, desired + desired * 0.5) actual = mod.impulse_responses(steps=10, impulse=1, orthogonalized=True) assert_allclose(actual, desired) # Multivariate model with two correlated shocks mod = KalmanFilter(k_endog=2, k_states=2) mod['design', :, :] = np.eye(2) mod['transition', :, :] = np.eye(2) mod['selection', :, :] = np.eye(2) mod['state_cov', :, :] = np.array([[1, 0.5], [0.5, 1.25]]) ones = np.ones((11, 1)) zeros = np.zeros((11, 1)) # Non-orthogonalized (i.e. 1-unit) impulses still just generate 1's, but # only for the appropriate series actual = mod.impulse_responses(steps=10, impulse=0) assert_allclose(actual, np.c_[ones, zeros]) actual = mod.impulse_responses(steps=10, impulse=1) assert_allclose(actual, np.c_[zeros, ones]) # Orthogonalized (i.e. 1-std-dev) impulses now generate different # responses, and only for the appropriate series actual = mod.impulse_responses(steps=10, impulse=0, orthogonalized=True) assert_allclose(actual, np.c_[ones, ones * 0.5]) actual = mod.impulse_responses(steps=10, impulse=1, orthogonalized=True) assert_allclose(actual, np.c_[zeros, ones]) # AR(1) model generates a geometrically declining series mod = sarimax.SARIMAX([0.1, 0.5, -0.2], order=(1, 0, 0)) phi = 0.5 mod.update([phi, 1]) desired = np.cumprod(np.r_[1, [phi] * 10]) # Test going through the model directly actual = mod.ssm.impulse_responses(steps=10) assert_allclose(actual[:, 0], desired) # Test going through the results object res = mod.filter([phi, 1.]) actual = res.impulse_responses(steps=10) assert_allclose(actual, desired)
seasonality=[365, 30, 7]) plt.plot(multi_adj_series) plt.hist(multi_adj_series) def make_date_range(start_date, end_date, date_format='%Y-%m-%d', increment=1): start = datetime.datetime.strptime(start_date, date_format) end = datetime.datetime.strptime(end_date, date_format) delta = datetime.timedelta(days=increment) date_range = [] while start < end: date_range.append(start.date()) start += delta return date_range ts_df = pd.DataFrame({'value': tseries}, index=make_date_range('2015-01-01', '2019-12-31')) define_sarima = sarima.SARIMAX(ts_df, order=(1, 0, 0), seasonal_order=(0, 1, 0, 1), freq='D') fit_sarima = define_sarima.fit() fit_sarima.summary() prediction_range = make_date_range('2020-01-01', '2024-12-31') pred = fit_sarima.predict(start=prediction_range[0], end=prediction_range[-1]) plt.plot(pred)