def test_yule_walker(): # Test for basic use of Yule-Walker estimation endog = dta['infl'].iloc[:50] # AR(2), no trend (since trend would imply GLS estimation) desired_p, _ = yule_walker(endog, ar_order=2, demean=False) mod = ARIMA(endog, order=(2, 0, 0), trend='n') res = mod.fit(method='yule_walker') assert_allclose(res.params, desired_p.params)
def test_brockwell_davis_example_514(): # Note: this example is primarily tested in # test_burg::test_brockwell_davis_example_514. # Get the lake data, demean endog = lake.copy() # Yule-Walker res, _ = yule_walker(endog, ar_order=2, demean=True) assert_allclose(res.ar_params, [1.0538, -0.2668], atol=1e-4) assert_allclose(res.sigma2, 0.4920, atol=1e-4)
def test_brockwell_davis_example_511(): # Make the series stationary endog = dowj.diff().iloc[1:] # Should have 77 observations assert_equal(len(endog), 77) # Autocovariances desired = [0.17992, 0.07590, 0.04885] assert_allclose(acovf(endog, fft=True, nlag=2), desired, atol=1e-5) # Yule-Walker yw, _ = yule_walker(endog, ar_order=1, demean=True) assert_allclose(yw.ar_params, [0.4219], atol=1e-4) assert_allclose(yw.sigma2, 0.1479, atol=1e-4)
def check_itsmr(lake): # Test against R itsmr::yw; see results/results_yw_dl.R yw, _ = yule_walker(lake, 5) desired = [ 1.08213598501, -0.39658257147, 0.11793957728, -0.03326633983, 0.06209208707 ] assert_allclose(yw.ar_params, desired) # stats::ar.yw return the innovations algorithm estimate of the variance u, v = arma_innovations(np.array(lake) - np.mean(lake), ar_params=yw.ar_params, sigma2=1) desired_sigma2 = 0.4716322564 assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def fit(self, start_params=None, transformed=True, includes_fixed=False, method=None, method_kwargs=None, gls=None, gls_kwargs=None, cov_type=None, cov_kwds=None, return_params=False, low_memory=False): """ Fit (estimate) the parameters of the model. Parameters ---------- start_params : array_like, optional Initial guess of the solution for the loglikelihood maximization. If None, the default is given by Model.start_params. transformed : bool, optional Whether or not `start_params` is already transformed. Default is True. includes_fixed : bool, optional If parameters were previously fixed with the `fix_params` method, this argument describes whether or not `start_params` also includes the fixed parameters, in addition to the free parameters. Default is False. method : str, optional The method used for estimating the parameters of the model. Valid options include 'statespace', 'innovations_mle', 'hannan_rissanen', 'burg', 'innovations', and 'yule_walker'. Not all options are available for every specification (for example 'yule_walker' can only be used with AR(p) models). method_kwargs : dict, optional Arguments to pass to the fit function for the parameter estimator described by the `method` argument. gls : bool, optional Whether or not to use generalized least squares (GLS) to estimate regression effects. The default is False if `method='statespace'` and is True otherwise. gls_kwargs : dict, optional Arguments to pass to the GLS estimation fit method. Only applicable if GLS estimation is used (see `gls` argument for details). cov_type : str, optional The `cov_type` keyword governs the method for calculating the covariance matrix of parameter estimates. Can be one of: - 'opg' for the outer product of gradient estimator - 'oim' for the observed information matrix estimator, calculated using the method of Harvey (1989) - 'approx' for the observed information matrix estimator, calculated using a numerical approximation of the Hessian matrix. - 'robust' for an approximate (quasi-maximum likelihood) covariance matrix that may be valid even in the presence of some misspecifications. Intermediate calculations use the 'oim' method. - 'robust_approx' is the same as 'robust' except that the intermediate calculations use the 'approx' method. - 'none' for no covariance matrix calculation. Default is 'opg' unless memory conservation is used to avoid computing the loglikelihood values for each observation, in which case the default is 'oim'. cov_kwds : dict or None, optional A dictionary of arguments affecting covariance matrix computation. **opg, oim, approx, robust, robust_approx** - 'approx_complex_step' : bool, optional - If True, numerical approximations are computed using complex-step methods. If False, numerical approximations are computed using finite difference methods. Default is True. - 'approx_centered' : bool, optional - If True, numerical approximations computed using finite difference methods use a centered approximation. Default is False. return_params : bool, optional Whether or not to return only the array of maximizing parameters. Default is False. low_memory : bool, optional If set to True, techniques are applied to substantially reduce memory usage. If used, some features of the results object will not be available (including smoothed results and in-sample prediction), although out-of-sample forecasting is possible. Default is False. Returns ------- ARIMAResults Examples -------- >>> mod = sm.tsa.arima.ARIMA(endog, order=(1, 0, 0)) >>> res = mod.fit() >>> print(res.summary()) """ # Determine which method to use # 1. If method is specified, make sure it is valid if method is not None: self._spec_arima.validate_estimator(method) # 2. Otherwise, use state space # TODO: may want to consider using innovations (MLE) if possible here, # (since in some cases it may be faster than state space), but it is # less tested. else: method = 'statespace' # Can only use fixed parameters with method='statespace' if self._has_fixed_params and method != 'statespace': raise ValueError('When parameters have been fixed, only the method' ' "statespace" can be used; got "%s".' % method) # Handle kwargs related to the fit method if method_kwargs is None: method_kwargs = {} required_kwargs = [] if method == 'statespace': required_kwargs = [ 'enforce_stationarity', 'enforce_invertibility', 'concentrate_scale' ] elif method == 'innovations_mle': required_kwargs = ['enforce_invertibility'] for name in required_kwargs: if name in method_kwargs: raise ValueError('Cannot override model level value for "%s"' ' when method="%s".' % (name, method)) method_kwargs[name] = getattr(self, name) # Handle kwargs related to GLS estimation if gls_kwargs is None: gls_kwargs = {} # Handle starting parameters # TODO: maybe should have standard way of computing starting # parameters in this class? if start_params is not None: if method not in ['statespace', 'innovations_mle']: raise ValueError('Estimation method "%s" does not use starting' ' parameters, but `start_params` argument was' ' given.' % method) method_kwargs['start_params'] = start_params method_kwargs['transformed'] = transformed method_kwargs['includes_fixed'] = includes_fixed # Perform estimation, depending on whether we have exog or not p = None fit_details = None has_exog = self._spec_arima.exog is not None if has_exog or method == 'statespace': # Use GLS if it was explicitly requested (`gls = True`) or if it # was left at the default (`gls = None`) and the ARMA estimator is # anything but statespace. # Note: both GLS and statespace are able to handle models with # integration, so we don't need to difference endog or exog here. if has_exog and (gls or (gls is None and method != 'statespace')): p, fit_details = estimate_gls( self.endog, exog=self.exog, order=self.order, seasonal_order=self.seasonal_order, include_constant=False, arma_estimator=method, arma_estimator_kwargs=method_kwargs, **gls_kwargs) elif method != 'statespace': raise ValueError('If `exog` is given and GLS is disabled' ' (`gls=False`), then the only valid' " method is 'statespace'. Got '%s'." % method) else: method_kwargs.setdefault('disp', 0) res = super(ARIMA, self).fit(return_params=return_params, low_memory=low_memory, cov_type=cov_type, cov_kwds=cov_kwds, **method_kwargs) if not return_params: res.fit_details = res.mlefit else: # Handle differencing if we have an integrated model # (these methods do not support handling integration internally, # so we need to manually do the differencing) endog = self.endog order = self._spec_arima.order seasonal_order = self._spec_arima.seasonal_order if self._spec_arima.is_integrated: warnings.warn('Provided `endog` series has been differenced' ' to eliminate integration prior to parameter' ' estimation by method "%s".' % method) endog = diff( endog, k_diff=self._spec_arima.diff, k_seasonal_diff=self._spec_arima.seasonal_diff, seasonal_periods=self._spec_arima.seasonal_periods) if order[1] > 0: order = (order[0], 0, order[2]) if seasonal_order[1] > 0: seasonal_order = (seasonal_order[0], 0, seasonal_order[2], seasonal_order[3]) # Now, estimate parameters if method == 'yule_walker': p, fit_details = yule_walker(endog, ar_order=order[0], demean=False, **method_kwargs) elif method == 'burg': p, fit_details = burg(endog, ar_order=order[0], demean=False, **method_kwargs) elif method == 'hannan_rissanen': p, fit_details = hannan_rissanen(endog, ar_order=order[0], ma_order=order[2], demean=False, **method_kwargs) elif method == 'innovations': p, fit_details = innovations(endog, ma_order=order[2], demean=False, **method_kwargs) # innovations computes estimates through the given order, so # we want to take the estimate associated with the given order p = p[-1] elif method == 'innovations_mle': p, fit_details = innovations_mle(endog, order=order, seasonal_order=seasonal_order, demean=False, **method_kwargs) # In all cases except method='statespace', we now need to extract the # parameters and, optionally, create a new results object if p is not None: # Need to check that fitted parameters satisfy given restrictions if (self.enforce_stationarity and self._spec_arima.max_reduced_ar_order > 0 and not p.is_stationary): raise ValueError('Non-stationary autoregressive parameters' ' found with `enforce_stationarity=True`.' ' Consider setting it to False or using a' ' different estimation method, such as' ' method="statespace".') if (self.enforce_invertibility and self._spec_arima.max_reduced_ma_order > 0 and not p.is_invertible): raise ValueError('Non-invertible moving average parameters' ' found with `enforce_invertibility=True`.' ' Consider setting it to False or using a' ' different estimation method, such as' ' method="statespace".') # Build the requested results if return_params: res = p.params else: # Handle memory conservation option if low_memory: conserve_memory = self.ssm.conserve_memory self.ssm.set_conserve_memory(MEMORY_CONSERVE) # Perform filtering / smoothing if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain or self.ssm.memory_no_smoothing): func = self.filter else: func = self.smooth res = func(p.params, transformed=True, includes_fixed=True, cov_type=cov_type, cov_kwds=cov_kwds) # Save any details from the fit method res.fit_details = fit_details # Reset memory conservation if low_memory: self.ssm.set_conserve_memory(conserve_memory) return res
def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8, arma_estimator='innovations_mle', arma_estimator_kwargs=None): """ Estimate ARMAX parameters by GLS. Parameters ---------- endog : array_like Input time series array. exog : array_like, optional Array of exogenous regressors. If not included, then `include_constant` must be True, and then `exog` will only include the constant column. order : tuple, optional The (p,d,q) order of the ARIMA model. Default is (0, 0, 0). seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal ARIMA model. Default is (0, 0, 0, 0). include_constant : bool, optional Whether to add a constant term in `exog` if it's not already there. The estimate of the constant will then appear as one of the `exog` parameters. If `exog` is None, then the constant will represent the mean of the process. Default is True if the specified model does not include integration and False otherwise. n_iter : int, optional Optionally iterate feasible GSL a specific number of times. Default is to iterate to convergence. If set, this argument overrides the `max_iter` and `tolerance` arguments. max_iter : int, optional Maximum number of feasible GLS iterations. Default is 50. If `n_iter` is set, it overrides this argument. tolerance : float, optional Tolerance for determining convergence of feasible GSL iterations. If `iter` is set, this argument has no effect. Default is 1e-8. arma_estimator : str, optional The estimator used for estimating the ARMA model. This option should not generally be used, unless the default method is failing or is otherwise unsuitable. Not all values will be valid, depending on the specified model orders (`order` and `seasonal_order`). Possible values are: * 'innovations_mle' - can be used with any specification * 'statespace' - can be used with any specification * 'hannan_rissanen' - can be used with any ARMA non-seasonal model * 'yule_walker' - only non-seasonal consecutive autoregressive (AR) models * 'burg' - only non-seasonal, consecutive autoregressive (AR) models * 'innovations' - only non-seasonal, consecutive moving average (MA) models. The default is 'innovations_mle'. arma_estimator_kwargs : dict, optional Arguments to pass to the ARMA estimator. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes eight components: `spec`, `params`, `converged`, `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs', and `arma_results`. Notes ----- The primary reference is [1]_, section 6.6. In particular, the implementation follows the iterative procedure described in section 6.6.2. Construction of the transformed variables used to compute the GLS estimator described in section 6.6.1 is done via an application of the innovations algorithm (rather than explicit construction of the transformation matrix). Note that if the specified model includes integration, both the `endog` and `exog` series will be differenced prior to estimation and a warning will be issued to alert the user. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ # Handle n_iter if n_iter is not None: max_iter = n_iter tolerance = np.inf # Default for include_constant is True if there is no integration and # False otherwise integrated = order[1] > 0 or seasonal_order[1] > 0 if include_constant is None: include_constant = not integrated elif include_constant and integrated: raise ValueError('Cannot include a constant in an integrated model.') # Handle including the constant (need to do it now so that the constant # parameter can be included in the specification as part of `exog`.) if include_constant: exog = np.ones_like(endog) if exog is None else add_constant(exog) # Create the SARIMAX specification spec = SARIMAXSpecification(endog, exog=exog, order=order, seasonal_order=seasonal_order) endog = spec.endog exog = spec.exog # Handle integration if spec.is_integrated: # TODO: this is the approach suggested by BD (see Remark 1 in # section 6.6.2 and Example 6.6.3), but maybe there are some cases # where we don't want to force this behavior on the user? warnings.warn('Provided `endog` and `exog` series have been' ' differenced to eliminate integration prior to GLS' ' parameter estimation.') endog = diff(endog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) exog = diff(exog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) augmented = np.c_[endog, exog] # Validate arma_estimator spec.validate_estimator(arma_estimator) if arma_estimator_kwargs is None: arma_estimator_kwargs = {} # Step 1: OLS mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() exog_params = res_ols.params resid = res_ols.resid # 0th iteration parameters p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = np.zeros(spec.k_ar_params) if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params) if spec.max_ma_order > 0: p.ma_params = np.zeros(spec.k_ma_params) if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params) p.sigma2 = res_ols.scale ar_params = p.ar_params seasonal_ar_params = p.seasonal_ar_params ma_params = p.ma_params seasonal_ma_params = p.seasonal_ma_params sigma2 = p.sigma2 # Step 2 - 4: iterate feasible GLS to convergence arma_results = [None] differences = [None] parameters = [p] converged = False if n_iter is None else None i = 0 for i in range(1, max_iter + 1): prev = exog_params # Step 2: ARMA # TODO: allow estimator-specific kwargs? if arma_estimator == 'yule_walker': p_arma, res_arma = yule_walker(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'burg': p_arma, res_arma = burg(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'innovations': out, res_arma = innovations(resid, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) p_arma = out[-1] elif arma_estimator == 'hannan_rissanen': p_arma, res_arma = hannan_rissanen(resid, ar_order=spec.ar_order, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) else: # For later iterations, use a "warm start" for parameter estimates # (speeds up estimation and convergence) start_params = (None if i == 1 else np.r_[ar_params, ma_params, seasonal_ar_params, seasonal_ma_params, sigma2]) # Note: in each case, we do not pass in the order of integration # since we have already differenced the series tmp_order = (spec.order[0], 0, spec.order[2]) tmp_seasonal_order = (spec.seasonal_order[0], 0, spec.seasonal_order[2], spec.seasonal_order[3]) if arma_estimator == 'innovations_mle': p_arma, res_arma = innovations_mle( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, demean=False, start_params=start_params, **arma_estimator_kwargs) else: p_arma, res_arma = statespace( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, include_constant=False, start_params=start_params, **arma_estimator_kwargs) ar_params = p_arma.ar_params seasonal_ar_params = p_arma.seasonal_ar_params ma_params = p_arma.ma_params seasonal_ma_params = p_arma.seasonal_ma_params sigma2 = p_arma.sigma2 arma_results.append(res_arma) # Step 3: GLS # Compute transformed variables that satisfy OLS assumptions # Note: In section 6.1.1 of Brockwell and Davis (2016), these # transformations are developed as computed by left multiplcation # by a matrix T. However, explicitly constructing T and then # performing the left-multiplications does not scale well when nobs is # large. Instead, we can retrieve the transformed variables as the # residuals of the innovations algorithm (the `normalize=True` # argument applies a Prais-Winsten-type normalization to the first few # observations to ensure homoskedasticity). Brockwell and Davis # mention that they also take this approach in practice. tmp, _ = arma_innovations.arma_innovations(augmented, ar_params=ar_params, ma_params=ma_params, normalize=True) u = tmp[:, 0] x = tmp[:, 1:] # OLS on transformed variables mod_gls = OLS(u, x) res_gls = mod_gls.fit() exog_params = res_gls.params resid = endog - np.dot(exog, exog_params) # Construct the parameter vector for the iteration p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = ar_params if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = seasonal_ar_params if spec.max_ma_order > 0: p.ma_params = ma_params if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = seasonal_ma_params p.sigma2 = sigma2 parameters.append(p) # Check for convergence difference = np.abs(exog_params - prev) differences.append(difference) if n_iter is None and np.all(difference < tolerance): converged = True break else: if n_iter is None: warnings.warn('Feasible GLS failed to converge in %d iterations.' ' Consider increasing the maximum number of' ' iterations using the `max_iter` argument or' ' reducing the required tolerance using the' ' `tolerance` argument.' % max_iter) # Construct final results p = parameters[-1] other_results = Bunch({ 'spec': spec, 'params': parameters, 'converged': converged, 'differences': differences, 'iterations': i, 'arma_estimator': arma_estimator, 'arma_estimator_kwargs': arma_estimator_kwargs, 'arma_results': arma_results, }) return p, other_results