def innovations_mle(endog, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), demean=True, enforce_invertibility=True, start_params=None, minimize_kwargs=None): """ Estimate SARIMA parameters by MLE using innovations algorithm. Parameters ---------- endog : array_like Input time series array. order : tuple, optional The (p,d,q) order of the model for the number of AR parameters, differences, and MA parameters. Default is (0, 0, 0). seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal component of the model for the AR parameters, differences, MA parameters, and periodicity. Default is (0, 0, 0, 0). demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the SARIMA coefficients. Default is True. enforce_invertibility : bool, optional Whether or not to transform the MA parameters to enforce invertibility in the moving average component of the model. Default is True. start_params : array_like, optional Initial guess of the solution for the loglikelihood maximization. The AR polynomial must be stationary. If `enforce_invertibility=True` the MA poylnomial must be invertible. If not provided, default starting parameters are computed using the Hannan-Rissanen method. minimize_kwargs : dict, optional Arguments to pass to scipy.optimize.minimize. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes four components: `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; `minimize_kwargs`, containing any keyword arguments passed to `minimize`; `start_params`, containing the untransformed starting parameters passed to `minimize`; and `minimize_results`, containing the output from `minimize`. Notes ----- The primary reference is [1]_, section 5.2. Note: we do not include `enforce_stationarity` as an argument, because this function requires stationarity. TODO: support concentrating out the scale (should be easy: use sigma2=1 and then compute sigma2=np.sum(u**2 / v) / len(u); would then need to redo llf computation in the Cython function). TODO: add support for fixed parameters TODO: add support for secondary optimization that does not enforce stationarity / invertibility, starting from first step's parameters References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ spec = SARIMAXSpecification(endog, order=order, seasonal_order=seasonal_order, enforce_stationarity=True, enforce_invertibility=enforce_invertibility) endog = spec.endog if spec.is_integrated: warnings.warn('Provided `endog` series has been differenced to' ' eliminate integration prior to ARMA parameter' ' estimation.') endog = diff(endog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) if demean: endog = endog - endog.mean() p = SARIMAXParams(spec=spec) if start_params is None: sp = SARIMAXParams(spec=spec) # Estimate starting parameters via Hannan-Rissanen hr, hr_results = hannan_rissanen(endog, ar_order=spec.ar_order, ma_order=spec.ma_order, demean=False) if spec.seasonal_periods == 0: # If no seasonal component, then `hr` gives starting parameters sp.params = hr.params else: # If we do have a seasonal component, estimate starting parameters # for the seasonal lags using the residuals from the previous step _ = SARIMAXSpecification( endog, seasonal_order=seasonal_order, enforce_stationarity=True, enforce_invertibility=enforce_invertibility) ar_order = np.array(spec.seasonal_ar_lags) * spec.seasonal_periods ma_order = np.array(spec.seasonal_ma_lags) * spec.seasonal_periods seasonal_hr, seasonal_hr_results = hannan_rissanen( hr_results.resid, ar_order=ar_order, ma_order=ma_order, demean=False) # Set the starting parameters sp.ar_params = hr.ar_params sp.ma_params = hr.ma_params sp.seasonal_ar_params = seasonal_hr.ar_params sp.seasonal_ma_params = seasonal_hr.ma_params sp.sigma2 = seasonal_hr.sigma2 # Then, require starting parameters to be stationary and invertible if not sp.is_stationary: sp.ar_params = [0] * sp.k_ar_params sp.seasonal_ar_params = [0] * sp.k_seasonal_ar_params if not sp.is_invertible and spec.enforce_invertibility: sp.ma_params = [0] * sp.k_ma_params sp.seasonal_ma_params = [0] * sp.k_seasonal_ma_params start_params = sp.params else: sp = SARIMAXParams(spec=spec) sp.params = start_params if not sp.is_stationary: raise ValueError('Given starting parameters imply a non-stationary' ' AR process. Innovations algorithm requires a' ' stationary process.') if spec.enforce_invertibility and not sp.is_invertible: raise ValueError('Given starting parameters imply a non-invertible' ' MA process with `enforce_invertibility=True`.') def obj(params): p.params = spec.constrain_params(params) return -arma_innovations.arma_loglike( endog, ar_params=-p.reduced_ar_poly.coef[1:], ma_params=p.reduced_ma_poly.coef[1:], sigma2=p.sigma2) # Untransform the starting parameters unconstrained_start_params = spec.unconstrain_params(start_params) # Perform the minimization if minimize_kwargs is None: minimize_kwargs = {} if 'options' not in minimize_kwargs: minimize_kwargs['options'] = {} minimize_kwargs['options'].setdefault('maxiter', 100) minimize_results = minimize(obj, unconstrained_start_params, **minimize_kwargs) # TODO: show warning if convergence failed. # Reverse the transformation to get the optimal parameters p.params = spec.constrain_params(minimize_results.x) # Construct other results other_results = Bunch({ 'spec': spec, 'minimize_results': minimize_results, 'minimize_kwargs': minimize_kwargs, 'start_params': start_params }) return p, other_results
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True, initial_ar_order=None, unbiased=None): """ Estimate ARMA parameters using Hannan-Rissanen procedure. Parameters ---------- endog : array_like Input time series array, assumed to be stationary. ar_order : int Autoregressive order ma_order : int Moving average order demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the ARMA coefficients. Default is True. initial_ar_order : int, optional Order of long autoregressive process used for initial computation of residuals. unbiased: bool, optional Whether or not to apply the bias correction step. Default is True if the estimated coefficients from the previous step imply a stationary and invertible process and False otherwise. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes three components: `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; `initial_ar_order`, containing the autoregressive lag order used in the first step; and `resid`, which contains the computed residuals from the last step. Notes ----- The primary reference is [1]_, section 5.1.4, which describes a three-step procedure that we implement here. 1. Fit a large-order AR model via Yule-Walker to estimate residuals 2. Compute AR and MA estimates via least squares 3. (Unless the estimated coefficients from step (2) are non-stationary / non-invertible or `unbiased=False`) Perform bias correction The order used for the AR model in the first step may be given as an argument. If it is not, we compute it as suggested by [2]_. The estimate of the variance that we use is computed from the residuals of the least-squares regression and not from the innovations algorithm. This is because our fast implementation of the innovations algorithm is only valid for stationary processes, and the Hannan-Rissanen procedure may produce estimates that imply non-stationary processes. To avoid inconsistency, we never compute this latter variance here, even if it is possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for an example of how to compute this variance manually. This procedure assumes that the series is stationary, but if this is not true, it is still possible that this procedure will return parameters that imply a non-stationary / non-invertible process. Note that the third stage will only be applied if the parameters from the second stage imply a stationary / invertible model. If `unbiased=True` is given, then non-stationary / non-invertible parameters in the second stage will throw an exception. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. .. [2] Gomez, Victor, and Agustin Maravall. 2001. "Automatic Modeling Methods for Univariate Series." A Course in Time Series Analysis, 171–201. """ spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order) endog = spec.endog if demean: endog = endog - endog.mean() p = SARIMAXParams(spec=spec) nobs = len(endog) max_ar_order = spec.max_ar_order max_ma_order = spec.max_ma_order # Default initial_ar_order is as suggested by Gomez and Maravall (2001) if initial_ar_order is None: initial_ar_order = max( np.floor(np.log(nobs)**2).astype(int), 2 * max(max_ar_order, max_ma_order)) # Create a spec, just to validate the initial autoregressive order _ = SARIMAXSpecification(endog, ar_order=initial_ar_order) # Compute lagged endog # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags; # for indexing purposes, must have dtype int) ar_ix = np.array(spec.ar_lags, dtype=int) - 1 lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix] # If no AR or MA components, this is just a variance computation if max_ma_order == 0 and max_ar_order == 0: p.sigma2 = np.var(endog, ddof=0) resid = endog.copy() # If no MA component, this is just CSS elif max_ma_order == 0: mod = OLS(endog[max_ar_order:], lagged_endog) res = mod.fit() resid = res.resid p.ar_params = res.params p.sigma2 = res.scale # Otherwise ARMA model else: # Step 1: Compute long AR model via Yule-Walker, get residuals initial_ar_params, _ = yule_walker(endog, order=initial_ar_order, method='mle') X = lagmat(endog, initial_ar_order, trim='both') y = endog[initial_ar_order:] resid = y - X.dot(initial_ar_params) # Get lagged residuals for `exog` in least-squares regression ma_ix = np.array(spec.ma_lags, dtype=int) - 1 lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix] # Step 2: estimate ARMA model via least squares ix = initial_ar_order + max_ma_order - max_ar_order mod = OLS(endog[initial_ar_order + max_ma_order:], np.c_[lagged_endog[ix:], lagged_resid]) res = mod.fit() p.ar_params = res.params[:spec.k_ar_params] p.ma_params = res.params[spec.k_ar_params:] resid = res.resid p.sigma2 = res.scale # Step 3: bias correction (if requested) if unbiased is True or unbiased is None: if p.is_stationary and p.is_invertible: Z = np.zeros_like(endog) V = np.zeros_like(endog) W = np.zeros_like(endog) ar_coef = p.ar_poly.coef ma_coef = p.ma_poly.coef for t in range(nobs): if t >= max(max_ar_order, max_ma_order): # Note: in the case of non-consecutive lag orders, the # polynomials have the appropriate zeros so we don't # need to subset `endog[t - max_ar_order:t]` or # Z[t - max_ma_order:t] tmp_ar = np.dot(-ar_coef[1:], endog[t - max_ar_order:t][::-1]) tmp_ma = np.dot(ma_coef[1:], Z[t - max_ma_order:t][::-1]) Z[t] = endog[t] - tmp_ar - tmp_ma V = lfilter([1], ar_coef, Z) W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z) lagged_V = lagmat(V, max_ar_order, trim='both') lagged_W = lagmat(W, max_ma_order, trim='both') exog = np.c_[lagged_V[max(max_ma_order - max_ar_order, 0):, ar_ix], lagged_W[max(max_ar_order - max_ma_order, 0):, ma_ix]] mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog) res_unbias = mod_unbias.fit() p.ar_params = (p.ar_params + res_unbias.params[:spec.k_ar_params]) p.ma_params = (p.ma_params + res_unbias.params[spec.k_ar_params:]) # Recompute sigma2 resid = mod.endog - mod.exog.dot(np.r_[p.ar_params, p.ma_params]) p.sigma2 = np.inner(resid, resid) / len(resid) elif unbiased is True: raise ValueError('Cannot perform third step of Hannan-Rissanen' ' estimation to remove paramater bias,' ' because parameters estimated from the' ' second step are non-stationary or' ' non-invertible') # TODO: Gomez and Maravall (2001) or Gomez (1998) # propose one more step here to further improve MA estimates # Construct results other_results = Bunch({ 'spec': spec, 'initial_ar_order': initial_ar_order, 'resid': resid }) return p, other_results
def yule_walker(endog, ar_order=0, demean=True, unbiased=False): """ Estimate AR parameters using Yule-Walker equations. Parameters ---------- endog : array_like or SARIMAXSpecification Input time series array, assumed to be stationary. ar_order : int, optional Autoregressive order. Default is 0. demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the autoregressive coefficients. Default is True. unbiased : bool, optional Whether to use the "unbiased" autocovariance estimator, which uses n - h degrees of freedom rather than n. Note that despite the name, it is only truly unbiased if the process mean is known (rather than estimated) and for some processes it can result in a non-positive definite autocovariance matrix. Default is False. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes one component, `spec`, which is the `SARIMAXSpecification` instance corresponding to the input arguments. Notes ----- The primary reference is [1]_, section 5.1.1. This procedure assumes that the series is stationary. For a description of the effect of the "unbiased" estimate of the autocovariance function, see 2.4.2 of [1]_. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ spec = SARIMAXSpecification(endog, ar_order=ar_order) endog = spec.endog p = SARIMAXParams(spec=spec) if not spec.is_ar_consecutive: raise ValueError('Yule-Walker estimation unavailable for models with' ' seasonal or non-consecutive AR orders.') # Estimate parameters method = 'unbiased' if unbiased else 'mle' p.ar_params, sigma = linear_model.yule_walker(endog, order=ar_order, demean=demean, method=method) p.sigma2 = sigma**2 # Construct other results other_results = Bunch({ 'spec': spec, }) return p, other_results
def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8, arma_estimator='innovations_mle', arma_estimator_kwargs=None): """ Estimate ARMAX parameters by GLS. Parameters ---------- endog : array_like Input time series array. exog : array_like, optional Array of exogenous regressors. If not included, then `include_constant` must be True, and then `exog` will only include the constant column. order : tuple, optional The (p,d,q) order of the ARIMA model. Default is (0, 0, 0). seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal ARIMA model. Default is (0, 0, 0, 0). include_constant : bool, optional Whether to add a constant term in `exog` if it's not already there. The estimate of the constant will then appear as one of the `exog` parameters. If `exog` is None, then the constant will represent the mean of the process. Default is True if the specified model does not include integration and False otherwise. n_iter : int, optional Optionally iterate feasible GSL a specific number of times. Default is to iterate to convergence. If set, this argument overrides the `max_iter` and `tolerance` arguments. max_iter : int, optional Maximum number of feasible GLS iterations. Default is 50. If `n_iter` is set, it overrides this argument. tolerance : float, optional Tolerance for determining convergence of feasible GSL iterations. If `iter` is set, this argument has no effect. Default is 1e-8. arma_estimator : str, optional The estimator used for estimating the ARMA model. This option should not generally be used, unless the default method is failing or is otherwise unsuitable. Not all values will be valid, depending on the specified model orders (`order` and `seasonal_order`). Possible values are: * 'innovations_mle' - can be used with any specification * 'statespace' - can be used with any specification * 'hannan_rissanen' - can be used with any ARMA non-seasonal model * 'yule_walker' - only non-seasonal consecutive autoregressive (AR) models * 'burg' - only non-seasonal, consecutive autoregressive (AR) models * 'innovations' - only non-seasonal, consecutive moving average (MA) models. The default is 'innovations_mle'. arma_estimator_kwargs : dict, optional Arguments to pass to the ARMA estimator. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes eight components: `spec`, `params`, `converged`, `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs', and `arma_results`. Notes ----- The primary reference is [1]_, section 6.6. In particular, the implementation follows the iterative procedure described in section 6.6.2. Construction of the transformed variables used to compute the GLS estimator described in section 6.6.1 is done via an application of the innovations algorithm (rather than explicit construction of the transformation matrix). Note that if the specified model includes integration, both the `endog` and `exog` series will be differenced prior to estimation and a warning will be issued to alert the user. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ # Handle n_iter if n_iter is not None: max_iter = n_iter tolerance = np.inf # Default for include_constant is True if there is no integration and # False otherwise integrated = order[1] > 0 or seasonal_order[1] > 0 if include_constant is None: include_constant = not integrated elif include_constant and integrated: raise ValueError('Cannot include a constant in an integrated model.') # Handle including the constant (need to do it now so that the constant # parameter can be included in the specification as part of `exog`.) if include_constant: exog = np.ones_like(endog) if exog is None else add_constant(exog) # Create the SARIMAX specification spec = SARIMAXSpecification(endog, exog=exog, order=order, seasonal_order=seasonal_order) endog = spec.endog exog = spec.exog # Handle integration if spec.is_integrated: # TODO: this is the approach suggested by BD (see Remark 1 in # section 6.6.2 and Example 6.6.3), but maybe there are some cases # where we don't want to force this behavior on the user? warnings.warn('Provided `endog` and `exog` series have been' ' differenced to eliminate integration prior to GLS' ' parameter estimation.') endog = diff(endog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) exog = diff(exog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) augmented = np.c_[endog, exog] # Validate arma_estimator spec.validate_estimator(arma_estimator) if arma_estimator_kwargs is None: arma_estimator_kwargs = {} # Step 1: OLS mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() exog_params = res_ols.params resid = res_ols.resid # 0th iteration parameters p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = np.zeros(spec.k_ar_params) if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params) if spec.max_ma_order > 0: p.ma_params = np.zeros(spec.k_ma_params) if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params) p.sigma2 = res_ols.scale ar_params = p.ar_params seasonal_ar_params = p.seasonal_ar_params ma_params = p.ma_params seasonal_ma_params = p.seasonal_ma_params sigma2 = p.sigma2 # Step 2 - 4: iterate feasible GLS to convergence arma_results = [None] differences = [None] parameters = [p] converged = False if n_iter is None else None i = 0 for i in range(1, max_iter + 1): prev = exog_params # Step 2: ARMA # TODO: allow estimator-specific kwargs? if arma_estimator == 'yule_walker': p_arma, res_arma = yule_walker(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'burg': p_arma, res_arma = burg(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'innovations': out, res_arma = innovations(resid, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) p_arma = out[-1] elif arma_estimator == 'hannan_rissanen': p_arma, res_arma = hannan_rissanen(resid, ar_order=spec.ar_order, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) else: # For later iterations, use a "warm start" for parameter estimates # (speeds up estimation and convergence) start_params = (None if i == 1 else np.r_[ar_params, ma_params, seasonal_ar_params, seasonal_ma_params, sigma2]) # Note: in each case, we do not pass in the order of integration # since we have already differenced the series tmp_order = (spec.order[0], 0, spec.order[2]) tmp_seasonal_order = (spec.seasonal_order[0], 0, spec.seasonal_order[2], spec.seasonal_order[3]) if arma_estimator == 'innovations_mle': p_arma, res_arma = innovations_mle( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, demean=False, start_params=start_params, **arma_estimator_kwargs) else: p_arma, res_arma = statespace( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, include_constant=False, start_params=start_params, **arma_estimator_kwargs) ar_params = p_arma.ar_params seasonal_ar_params = p_arma.seasonal_ar_params ma_params = p_arma.ma_params seasonal_ma_params = p_arma.seasonal_ma_params sigma2 = p_arma.sigma2 arma_results.append(res_arma) # Step 3: GLS # Compute transformed variables that satisfy OLS assumptions # Note: In section 6.1.1 of Brockwell and Davis (2016), these # transformations are developed as computed by left multiplcation # by a matrix T. However, explicitly constructing T and then # performing the left-multiplications does not scale well when nobs is # large. Instead, we can retrieve the transformed variables as the # residuals of the innovations algorithm (the `normalize=True` # argument applies a Prais-Winsten-type normalization to the first few # observations to ensure homoskedasticity). Brockwell and Davis # mention that they also take this approach in practice. tmp, _ = arma_innovations.arma_innovations(augmented, ar_params=ar_params, ma_params=ma_params, normalize=True) u = tmp[:, 0] x = tmp[:, 1:] # OLS on transformed variables mod_gls = OLS(u, x) res_gls = mod_gls.fit() exog_params = res_gls.params resid = endog - np.dot(exog, exog_params) # Construct the parameter vector for the iteration p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = ar_params if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = seasonal_ar_params if spec.max_ma_order > 0: p.ma_params = ma_params if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = seasonal_ma_params p.sigma2 = sigma2 parameters.append(p) # Check for convergence difference = np.abs(exog_params - prev) differences.append(difference) if n_iter is None and np.all(difference < tolerance): converged = True break else: if n_iter is None: warnings.warn('Feasible GLS failed to converge in %d iterations.' ' Consider increasing the maximum number of' ' iterations using the `max_iter` argument or' ' reducing the required tolerance using the' ' `tolerance` argument.' % max_iter) # Construct final results p = parameters[-1] other_results = Bunch({ 'spec': spec, 'params': parameters, 'converged': converged, 'differences': differences, 'iterations': i, 'arma_estimator': arma_estimator, 'arma_estimator_kwargs': arma_estimator_kwargs, 'arma_results': arma_results, }) return p, other_results
def burg(endog, ar_order=0, demean=True): """ Estimate AR parameters using Burg technique. Parameters ---------- endog : array_like or SARIMAXSpecification Input time series array, assumed to be stationary. ar_order : int, optional Autoregressive order. Default is 0. demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the autoregressive coefficients. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes one component, `spec`, which is the `SARIMAXSpecification` instance corresponding to the input arguments. Notes ----- The primary reference is [1]_, section 5.1.2. This procedure assumes that the series is stationary. This function is a light wrapper around `statsmodels.linear_model.burg`. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ spec = SARIMAXSpecification(endog, ar_order=ar_order) endog = spec.endog # Workaround for statsmodels.tsa.stattools.pacf_burg which doesn't work # on integer input # TODO: remove when possible if np.issubdtype(endog.dtype, np.dtype(int)): endog = endog * 1.0 if not spec.is_ar_consecutive: raise ValueError('Burg estimation unavailable for models with' ' seasonal or otherwise non-consecutive AR orders.') p = SARIMAXParams(spec=spec) if ar_order == 0: p.sigma2 = np.var(endog) else: p.ar_params, p.sigma2 = linear_model.burg(endog, order=ar_order, demean=demean) # Construct other results other_results = Bunch({ 'spec': spec, }) return p, other_results
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True, initial_ar_order=None, unbiased=None, fixed_params=None): """ Estimate ARMA parameters using Hannan-Rissanen procedure. Parameters ---------- endog : array_like Input time series array, assumed to be stationary. ar_order : int or list of int Autoregressive order ma_order : int or list of int Moving average order demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the ARMA coefficients. Default is True. initial_ar_order : int, optional Order of long autoregressive process used for initial computation of residuals. unbiased : bool, optional Whether or not to apply the bias correction step. Default is True if the estimated coefficients from the previous step imply a stationary and invertible process and False otherwise. fixed_params : dict, optional Dictionary with names of fixed parameters as keys (e.g. 'ar.L1', 'ma.L2'), which correspond to SARIMAXSpecification.param_names. Dictionary values are the values of the associated fixed parameters. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes three components: `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; `initial_ar_order`, containing the autoregressive lag order used in the first step; and `resid`, which contains the computed residuals from the last step. Notes ----- The primary reference is [1]_, section 5.1.4, which describes a three-step procedure that we implement here. 1. Fit a large-order AR model via Yule-Walker to estimate residuals 2. Compute AR and MA estimates via least squares 3. (Unless the estimated coefficients from step (2) are non-stationary / non-invertible or `unbiased=False`) Perform bias correction The order used for the AR model in the first step may be given as an argument. If it is not, we compute it as suggested by [2]_. The estimate of the variance that we use is computed from the residuals of the least-squares regression and not from the innovations algorithm. This is because our fast implementation of the innovations algorithm is only valid for stationary processes, and the Hannan-Rissanen procedure may produce estimates that imply non-stationary processes. To avoid inconsistency, we never compute this latter variance here, even if it is possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for an example of how to compute this variance manually. This procedure assumes that the series is stationary, but if this is not true, it is still possible that this procedure will return parameters that imply a non-stationary / non-invertible process. Note that the third stage will only be applied if the parameters from the second stage imply a stationary / invertible model. If `unbiased=True` is given, then non-stationary / non-invertible parameters in the second stage will throw an exception. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. .. [2] Gomez, Victor, and Agustin Maravall. 2001. "Automatic Modeling Methods for Univariate Series." A Course in Time Series Analysis, 171–201. """ spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order) fixed_params = _validate_fixed_params(fixed_params, spec.param_names) endog = spec.endog if demean: endog = endog - endog.mean() p = SARIMAXParams(spec=spec) nobs = len(endog) max_ar_order = spec.max_ar_order max_ma_order = spec.max_ma_order # Default initial_ar_order is as suggested by Gomez and Maravall (2001) if initial_ar_order is None: initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int), 2 * max(max_ar_order, max_ma_order)) # Create a spec, just to validate the initial autoregressive order _ = SARIMAXSpecification(endog, ar_order=initial_ar_order) # Unpack fixed and free ar/ma lags, ix, and params (fixed only) params_info = _package_fixed_and_free_params_info( fixed_params, spec.ar_lags, spec.ma_lags ) # Compute lagged endog lagged_endog = lagmat(endog, max_ar_order, trim='both') # If no AR or MA components, this is just a variance computation if max_ma_order == 0 and max_ar_order == 0: p.sigma2 = np.var(endog, ddof=0) resid = endog.copy() # If no MA component, this is just CSS elif max_ma_order == 0: # extract 1) lagged_endog with free params; 2) lagged_endog with fixed # params; 3) endog residual after applying fixed params if applicable X_with_free_params = lagged_endog[:, params_info.free_ar_ix] X_with_fixed_params = lagged_endog[:, params_info.fixed_ar_ix] y = endog[max_ar_order:] if X_with_fixed_params.shape[1] != 0: y = y - X_with_fixed_params.dot(params_info.fixed_ar_params) # no free ar params -> variance computation on the endog residual if X_with_free_params.shape[1] == 0: p.ar_params = params_info.fixed_ar_params p.sigma2 = np.var(y, ddof=0) resid = y.copy() # otherwise OLS with endog residual (after applying fixed params) as y, # and lagged_endog with free params as X else: mod = OLS(y, X_with_free_params) res = mod.fit() resid = res.resid p.sigma2 = res.scale p.ar_params = _stitch_fixed_and_free_params( fixed_ar_or_ma_lags=params_info.fixed_ar_lags, fixed_ar_or_ma_params=params_info.fixed_ar_params, free_ar_or_ma_lags=params_info.free_ar_lags, free_ar_or_ma_params=res.params, spec_ar_or_ma_lags=spec.ar_lags ) # Otherwise ARMA model else: # Step 1: Compute long AR model via Yule-Walker, get residuals initial_ar_params, _ = yule_walker( endog, order=initial_ar_order, method='mle') X = lagmat(endog, initial_ar_order, trim='both') y = endog[initial_ar_order:] resid = y - X.dot(initial_ar_params) # Get lagged residuals for `exog` in least-squares regression lagged_resid = lagmat(resid, max_ma_order, trim='both') # Step 2: estimate ARMA model via least squares ix = initial_ar_order + max_ma_order - max_ar_order X_with_free_params = np.c_[ lagged_endog[ix:, params_info.free_ar_ix], lagged_resid[:, params_info.free_ma_ix] ] X_with_fixed_params = np.c_[ lagged_endog[ix:, params_info.fixed_ar_ix], lagged_resid[:, params_info.fixed_ma_ix] ] y = endog[initial_ar_order + max_ma_order:] if X_with_fixed_params.shape[1] != 0: y = y - X_with_fixed_params.dot( np.r_[params_info.fixed_ar_params, params_info.fixed_ma_params] ) # Step 2.1: no free ar params -> variance computation on the endog # residual if X_with_free_params.shape[1] == 0: p.ar_params = params_info.fixed_ar_params p.ma_params = params_info.fixed_ma_params p.sigma2 = np.var(y, ddof=0) resid = y.copy() # Step 2.2: otherwise OLS with endog residual (after applying fixed # params) as y, and lagged_endog and lagged_resid with free params as X else: mod = OLS(y, X_with_free_params) res = mod.fit() k_free_ar_params = len(params_info.free_ar_lags) p.ar_params = _stitch_fixed_and_free_params( fixed_ar_or_ma_lags=params_info.fixed_ar_lags, fixed_ar_or_ma_params=params_info.fixed_ar_params, free_ar_or_ma_lags=params_info.free_ar_lags, free_ar_or_ma_params=res.params[:k_free_ar_params], spec_ar_or_ma_lags=spec.ar_lags ) p.ma_params = _stitch_fixed_and_free_params( fixed_ar_or_ma_lags=params_info.fixed_ma_lags, fixed_ar_or_ma_params=params_info.fixed_ma_params, free_ar_or_ma_lags=params_info.free_ma_lags, free_ar_or_ma_params=res.params[k_free_ar_params:], spec_ar_or_ma_lags=spec.ma_lags ) resid = res.resid p.sigma2 = res.scale # Step 3: bias correction (if requested) # Step 3.1: validate `unbiased` argument and handle setting the default if unbiased is True: if len(fixed_params) != 0: raise NotImplementedError( "Third step of Hannan-Rissanen estimation to remove " "parameter bias is not yet implemented for the case " "with fixed parameters." ) elif not (p.is_stationary and p.is_invertible): raise ValueError( "Cannot perform third step of Hannan-Rissanen estimation " "to remove parameter bias, because parameters estimated " "from the second step are non-stationary or " "non-invertible." ) elif unbiased is None: if len(fixed_params) != 0: unbiased = False else: unbiased = p.is_stationary and p.is_invertible # Step 3.2: bias correction if unbiased is True: Z = np.zeros_like(endog) V = np.zeros_like(endog) W = np.zeros_like(endog) ar_coef = p.ar_poly.coef ma_coef = p.ma_poly.coef for t in range(nobs): if t >= max(max_ar_order, max_ma_order): # Note: in the case of non-consecutive lag orders, the # polynomials have the appropriate zeros so we don't # need to subset `endog[t - max_ar_order:t]` or # Z[t - max_ma_order:t] tmp_ar = np.dot( -ar_coef[1:], endog[t - max_ar_order:t][::-1]) tmp_ma = np.dot(ma_coef[1:], Z[t - max_ma_order:t][::-1]) Z[t] = endog[t] - tmp_ar - tmp_ma V = lfilter([1], ar_coef, Z) W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z) lagged_V = lagmat(V, max_ar_order, trim='both') lagged_W = lagmat(W, max_ma_order, trim='both') exog = np.c_[ lagged_V[ max(max_ma_order - max_ar_order, 0):, params_info.free_ar_ix ], lagged_W[ max(max_ar_order - max_ma_order, 0):, params_info.free_ma_ix ] ] mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog) res_unbias = mod_unbias.fit() p.ar_params = ( p.ar_params + res_unbias.params[:spec.k_ar_params]) p.ma_params = ( p.ma_params + res_unbias.params[spec.k_ar_params:]) # Recompute sigma2 resid = mod.endog - mod.exog.dot( np.r_[p.ar_params, p.ma_params]) p.sigma2 = np.inner(resid, resid) / len(resid) # TODO: Gomez and Maravall (2001) or Gomez (1998) # propose one more step here to further improve MA estimates # Construct results other_results = Bunch({ 'spec': spec, 'initial_ar_order': initial_ar_order, 'resid': resid }) return p, other_results