def test_innovations_algo_direct_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process, using the direct # function. endog = np.random.normal(size=10) # Innovations algorithm approach u, r = arma_innovations.arma_innovations(endog, ar_params, ma_params, sigma2) v = np.array(r) * sigma2 u = np.array(u) llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(u, res.forecasts_error[0]) # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1]) assert_allclose(llf_obs, res.llf_obs) # Get llf_obs directly llf_obs2 = _arma_innovations.darma_loglikeobs_fast( endog, ar_params, ma_params, sigma2) assert_allclose(llf_obs2, res.llf_obs)
def test_innovations_algo_direct_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process, using the direct # function. endog = np.random.normal(size=10) # Innovations algorithm approach u, r = arma_innovations.arma_innovations(endog, ar_params, ma_params, sigma2) v = np.array(r) * sigma2 u = np.array(u) llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(u, res.forecasts_error[0]) # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1]) assert_allclose(llf_obs, res.llf_obs) # Get llf_obs directly llf_obs2 = _arma_innovations.darma_loglikeobs_fast( endog, ar_params, ma_params, sigma2) assert_allclose(llf_obs2, res.llf_obs)
def test_itsmr(): # This is essentially a high precision version of # test_brockwell_davis_example_517, where the desired values were computed # from R itsmr::hannan; see results/results_hr.R endog = lake.copy() hr, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True, initial_ar_order=22, unbiased=False) assert_allclose(hr.ar_params, [0.69607715], atol=1e-4) assert_allclose(hr.ma_params, [0.3787969217], atol=1e-4) # Because our fast implementation of the innovations algorithm does not # allow for non-stationary processes, the estimate of the variance returned # by `hannan_rissanen` is based on the residuals from the least-squares # regression, rather than (as reported by BD) based on the innovations # algorithm output. Since the estimates here do correspond to a stationary # series, we can compute the innovations variance manually to check # against BD. u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params, sigma2=1) tmp = u / v**0.5 assert_allclose(np.inner(tmp, tmp) / len(u), 0.4773580109, atol=1e-4)
def test_itsmr_with_fixed_params(fixed_params): # This test is a variation of test_itsmr where we fix 1 or more parameters # for Example 5.1.7 in Brockwell and Davis (2016) and check that free # parameters are still correct'. endog = lake.copy() hr, _ = hannan_rissanen( endog, ar_order=1, ma_order=1, demean=True, initial_ar_order=22, unbiased=False, fixed_params=fixed_params ) assert_allclose(hr.ar_params, [0.69607715], atol=1e-4) assert_allclose(hr.ma_params, [0.3787969217], atol=1e-4) # Because our fast implementation of the innovations algorithm does not # allow for non-stationary processes, the estimate of the variance returned # by `hannan_rissanen` is based on the residuals from the least-squares # regression, rather than (as reported by BD) based on the innovations # algorithm output. Since the estimates here do correspond to a stationary # series, we can compute the innovations variance manually to check # against BD. u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params, sigma2=1) tmp = u / v**0.5 assert_allclose(np.inner(tmp, tmp) / len(u), 0.4773580109, atol=1e-4)
def test_brockwell_davis_example_517(): # Get the lake data endog = lake.copy() # BD do not implement the "bias correction" third step that they describe, # so we can't use their results to test that. Thus here `unbiased=False`. # Note: it's not clear why BD use initial_order=22 (and they don't mention # that they do this), but it is the value that allows the test to pass. hr, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True, initial_ar_order=22, unbiased=False) assert_allclose(hr.ar_params, [0.6961], atol=1e-4) assert_allclose(hr.ma_params, [0.3788], atol=1e-4) # Because our fast implementation of the innovations algorithm does not # allow for non-stationary processes, the estimate of the variance returned # by `hannan_rissanen` is based on the residuals from the least-squares # regression, rather than (as reported by BD) based on the innovations # algorithm output. Since the estimates here do correspond to a stationary # series, we can compute the innovations variance manually to check # against BD. u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params, sigma2=1) tmp = u / v**0.5 assert_allclose(np.inner(tmp, tmp) / len(u), 0.4774, atol=1e-4)
def check_innovations_ma_itsmr(lake): # Test against R itsmr::ia; see results/results_innovations.R ia, _ = innovations(lake, 10, demean=True) desired = [ 1.0816255264, 0.7781248438, 0.5367164430, 0.3291559246, 0.3160039850, 0.2513754550, 0.2051536531, 0.1441070313, 0.3431868340, 0.1827400798] assert_allclose(ia[10].ma_params, desired) # itsmr::ia returns the innovations algorithm estimate of the variance u, v = arma_innovations(np.array(lake) - np.mean(lake), ma_params=ia[10].ma_params, sigma2=1) desired_sigma2 = 0.4523684344 assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def test_nonstationary_series(): # Test against R stats::ar.burg; see results/results_burg.R endog = np.arange(1, 12) * 1.0 res, _ = burg(endog, 2, demean=False) desired_ar_params = [1.9669331547, -0.9892846679] assert_allclose(res.ar_params, desired_ar_params) desired_sigma2 = 0.02143066427 assert_allclose(res.sigma2, desired_sigma2) # With var.method = 1, stats::ar.burg also returns something equivalent to # the innovations algorithm estimate of sigma2 u, v = arma_innovations(endog, ar_params=res.ar_params, sigma2=1) desired_sigma2 = 0.02191056906 assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def check_itsmr(lake): # Test against R itsmr::yw; see results/results_yw_dl.R yw, _ = yule_walker(lake, 5) desired = [ 1.08213598501, -0.39658257147, 0.11793957728, -0.03326633983, 0.06209208707 ] assert_allclose(yw.ar_params, desired) # stats::ar.yw return the innovations algorithm estimate of the variance u, v = arma_innovations(np.array(lake) - np.mean(lake), ar_params=yw.ar_params, sigma2=1) desired_sigma2 = 0.4716322564 assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def check_itsmr(lake): # Test against R itsmr::burg; see results/results_burg.R res, _ = burg(lake, 10, demean=True) desired_ar_params = [ 1.05853631096, -0.32639150878, 0.04784765122, 0.02620476111, 0.04444511374, -0.04134010262, 0.02251178970, -0.01427524694, 0.22223486915, -0.20935524387 ] assert_allclose(res.ar_params, desired_ar_params) # itsmr always returns the innovations algorithm estimate of sigma2, # whereas we return Burg's estimate u, v = arma_innovations(np.array(lake) - np.mean(lake), ar_params=res.ar_params, sigma2=1) desired_sigma2 = 0.4458956354 assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def check_itsmr(lake): # Test against R itsmr::yw; see results/results_yw_dl.R dl, _ = durbin_levinson(lake, 5) assert_allclose(dl[0].params, np.var(lake)) assert_allclose(dl[1].ar_params, [0.8319112104]) assert_allclose(dl[2].ar_params, [1.0538248798, -0.2667516276]) desired = [1.0887037577, -0.4045435867, 0.1307541335] assert_allclose(dl[3].ar_params, desired) desired = [1.08425065810, -0.39076602696, 0.09367609911, 0.03405704644] assert_allclose(dl[4].ar_params, desired) desired = [ 1.08213598501, -0.39658257147, 0.11793957728, -0.03326633983, 0.06209208707 ] assert_allclose(dl[5].ar_params, desired) # itsmr::yw returns the innovations algorithm estimate of the variance # we'll just check for p=5 u, v = arma_innovations(np.array(lake) - np.mean(lake), ar_params=dl[5].ar_params, sigma2=1) desired_sigma2 = 0.4716322564 assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def run(csv_list): time, data = get_data(csv_list) data_plot(time, data, 'Time', 'Discharge', 'Plot of raw data', n=40) ar = init_ar(time, data) raw = ar.auto_correlation(ar.data, 40) data_plot(range(len(raw)), raw, 'Lag', 'Correlation', 'Plot of ACF of raw data', n=40, plt_type='bar', significance=(1.96 / (len(raw))**0.5)) period = check_seasonality() if period: ar, non_seasonal = do_diff(ar, period) else: non_seasonal = ar.data d = 0 diff = non_seasonal while True: is_needed = check_diff() if is_needed: d += 1 ar, diff = do_diff(ar, 1) else: break ar.data = diff pacf = ar.yule_walker_pacf(100) data_plot(range(len(pacf)), pacf, 'Lags', 'Correlation', 'PACF', plt_type='bar', n=40, significance=(1.96 / (len(pacf))**0.5)) p = int(input('Enter value of p: ')) phi = pacf[:p] print(f'The AR({p}) equation is') print( ' + '.join([ f'({round(phi_i, 4)}) * y(t-{i + 1})' for i, phi_i in enumerate(phi) ]), '+ residues') ar.data = ar.predict(phi) out = ar.difference(period, rev=True) plt.plot(time[:len(out)], data[:len(out)]) plt.plot(time[:len(out)], out) plt.show() # o = AR(data, time) # ou = o.fit() # print(ou.params) # plt.plot(time, data) # plt.plot(time, list(o.predict(ou.params)) + [0] * 15) # plt.show() # ma params q = int(input('Enter value of q: ')) if q: ma_params, mse = arma_innovations(out) theta = ma_params[:q] print('MA Equation is:') print(' + '.join([ f'({round(theta_i, 4)}) * e(t-{i + 1})' for i, theta_i in enumerate(theta) ])) # Calculate Error residues = [] for y, y_cap in zip(data, out): residues.append(y - y_cap) data_plot(time[:len(residues)], residues, 'Time', 'Residues', 'Plot of residues', n=40) # Final Predicted Output # final_output = predict_using_arima(data, phi, residues, theta) pass
def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8, arma_estimator='innovations_mle', arma_estimator_kwargs=None): """ Estimate ARMAX parameters by GLS. Parameters ---------- endog : array_like Input time series array. exog : array_like, optional Array of exogenous regressors. If not included, then `include_constant` must be True, and then `exog` will only include the constant column. order : tuple, optional The (p,d,q) order of the ARIMA model. Default is (0, 0, 0). seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal ARIMA model. Default is (0, 0, 0, 0). include_constant : bool, optional Whether to add a constant term in `exog` if it's not already there. The estimate of the constant will then appear as one of the `exog` parameters. If `exog` is None, then the constant will represent the mean of the process. Default is True if the specified model does not include integration and False otherwise. n_iter : int, optional Optionally iterate feasible GSL a specific number of times. Default is to iterate to convergence. If set, this argument overrides the `max_iter` and `tolerance` arguments. max_iter : int, optional Maximum number of feasible GLS iterations. Default is 50. If `n_iter` is set, it overrides this argument. tolerance : float, optional Tolerance for determining convergence of feasible GSL iterations. If `iter` is set, this argument has no effect. Default is 1e-8. arma_estimator : str, optional The estimator used for estimating the ARMA model. This option should not generally be used, unless the default method is failing or is otherwise unsuitable. Not all values will be valid, depending on the specified model orders (`order` and `seasonal_order`). Possible values are: * 'innovations_mle' - can be used with any specification * 'statespace' - can be used with any specification * 'hannan_rissanen' - can be used with any ARMA non-seasonal model * 'yule_walker' - only non-seasonal consecutive autoregressive (AR) models * 'burg' - only non-seasonal, consecutive autoregressive (AR) models * 'innovations' - only non-seasonal, consecutive moving average (MA) models. The default is 'innovations_mle'. arma_estimator_kwargs : dict, optional Arguments to pass to the ARMA estimator. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes eight components: `spec`, `params`, `converged`, `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs', and `arma_results`. Notes ----- The primary reference is [1]_, section 6.6. In particular, the implementation follows the iterative procedure described in section 6.6.2. Construction of the transformed variables used to compute the GLS estimator described in section 6.6.1 is done via an application of the innovations algorithm (rather than explicit construction of the transformation matrix). Note that if the specified model includes integration, both the `endog` and `exog` series will be differenced prior to estimation and a warning will be issued to alert the user. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ # Handle n_iter if n_iter is not None: max_iter = n_iter tolerance = np.inf # Default for include_constant is True if there is no integration and # False otherwise integrated = order[1] > 0 or seasonal_order[1] > 0 if include_constant is None: include_constant = not integrated elif include_constant and integrated: raise ValueError('Cannot include a constant in an integrated model.') # Handle including the constant (need to do it now so that the constant # parameter can be included in the specification as part of `exog`.) if include_constant: exog = np.ones_like(endog) if exog is None else add_constant(exog) # Create the SARIMAX specification spec = SARIMAXSpecification(endog, exog=exog, order=order, seasonal_order=seasonal_order) endog = spec.endog exog = spec.exog # Handle integration if spec.is_integrated: # TODO: this is the approach suggested by BD (see Remark 1 in # section 6.6.2 and Example 6.6.3), but maybe there are some cases # where we don't want to force this behavior on the user? warnings.warn('Provided `endog` and `exog` series have been' ' differenced to eliminate integration prior to GLS' ' parameter estimation.') endog = diff(endog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) exog = diff(exog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) augmented = np.c_[endog, exog] # Validate arma_estimator spec.validate_estimator(arma_estimator) if arma_estimator_kwargs is None: arma_estimator_kwargs = {} # Step 1: OLS mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() exog_params = res_ols.params resid = res_ols.resid # 0th iteration parameters p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = np.zeros(spec.k_ar_params) if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params) if spec.max_ma_order > 0: p.ma_params = np.zeros(spec.k_ma_params) if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params) p.sigma2 = res_ols.scale ar_params = p.ar_params seasonal_ar_params = p.seasonal_ar_params ma_params = p.ma_params seasonal_ma_params = p.seasonal_ma_params sigma2 = p.sigma2 # Step 2 - 4: iterate feasible GLS to convergence arma_results = [None] differences = [None] parameters = [p] converged = False if n_iter is None else None i = 0 for i in range(1, max_iter + 1): prev = exog_params # Step 2: ARMA # TODO: allow estimator-specific kwargs? if arma_estimator == 'yule_walker': p_arma, res_arma = yule_walker(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'burg': p_arma, res_arma = burg(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'innovations': out, res_arma = innovations(resid, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) p_arma = out[-1] elif arma_estimator == 'hannan_rissanen': p_arma, res_arma = hannan_rissanen(resid, ar_order=spec.ar_order, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) else: # For later iterations, use a "warm start" for parameter estimates # (speeds up estimation and convergence) start_params = (None if i == 1 else np.r_[ar_params, ma_params, seasonal_ar_params, seasonal_ma_params, sigma2]) # Note: in each case, we do not pass in the order of integration # since we have already differenced the series tmp_order = (spec.order[0], 0, spec.order[2]) tmp_seasonal_order = (spec.seasonal_order[0], 0, spec.seasonal_order[2], spec.seasonal_order[3]) if arma_estimator == 'innovations_mle': p_arma, res_arma = innovations_mle( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, demean=False, start_params=start_params, **arma_estimator_kwargs) else: p_arma, res_arma = statespace( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, include_constant=False, start_params=start_params, **arma_estimator_kwargs) ar_params = p_arma.ar_params seasonal_ar_params = p_arma.seasonal_ar_params ma_params = p_arma.ma_params seasonal_ma_params = p_arma.seasonal_ma_params sigma2 = p_arma.sigma2 arma_results.append(res_arma) # Step 3: GLS # Compute transformed variables that satisfy OLS assumptions # Note: In section 6.1.1 of Brockwell and Davis (2016), these # transformations are developed as computed by left multiplcation # by a matrix T. However, explicitly constructing T and then # performing the left-multiplications does not scale well when nobs is # large. Instead, we can retrieve the transformed variables as the # residuals of the innovations algorithm (the `normalize=True` # argument applies a Prais-Winsten-type normalization to the first few # observations to ensure homoskedasticity). Brockwell and Davis # mention that they also take this approach in practice. tmp, _ = arma_innovations.arma_innovations(augmented, ar_params=ar_params, ma_params=ma_params, normalize=True) u = tmp[:, 0] x = tmp[:, 1:] # OLS on transformed variables mod_gls = OLS(u, x) res_gls = mod_gls.fit() exog_params = res_gls.params resid = endog - np.dot(exog, exog_params) # Construct the parameter vector for the iteration p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = ar_params if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = seasonal_ar_params if spec.max_ma_order > 0: p.ma_params = ma_params if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = seasonal_ma_params p.sigma2 = sigma2 parameters.append(p) # Check for convergence difference = np.abs(exog_params - prev) differences.append(difference) if n_iter is None and np.all(difference < tolerance): converged = True break else: if n_iter is None: warnings.warn('Feasible GLS failed to converge in %d iterations.' ' Consider increasing the maximum number of' ' iterations using the `max_iter` argument or' ' reducing the required tolerance using the' ' `tolerance` argument.' % max_iter) # Construct final results p = parameters[-1] other_results = Bunch({ 'spec': spec, 'params': parameters, 'converged': converged, 'differences': differences, 'iterations': i, 'arma_estimator': arma_estimator, 'arma_estimator_kwargs': arma_estimator_kwargs, 'arma_results': arma_results, }) return p, other_results
def test_innovations_nonstationary(ar_params): np.random.seed(42) endog = np.random.normal(size=100) with pytest.raises(ValueError, match="The model's autoregressive"): arma_innovations.arma_innovations(endog, ar_params=ar_params)
def innovations(data): """ Fallback for MA innovation algorithm """ return arma_innovations(data)