def recursive_coefficients(self): """ Estimates of regression coefficients, recursively estimated Returns ------- out: Bunch Has the following attributes: - `filtered`: a time series array with the filtered estimate of the component - `filtered_cov`: a time series array with the filtered estimate of the variance/covariance of the component - `smoothed`: a time series array with the smoothed estimate of the component - `smoothed_cov`: a time series array with the smoothed estimate of the variance/covariance of the component - `offset`: an integer giving the offset in the state vector where this component begins """ out = None spec = self.specification start = offset = 0 end = offset + spec.k_exog out = Bunch(filtered=self.filtered_state[start:end], filtered_cov=self.filtered_state_cov[start:end, start:end], smoothed=None, smoothed_cov=None, offset=offset) if self.smoothed_state is not None: out.smoothed = self.smoothed_state[start:end] if self.smoothed_state_cov is not None: out.smoothed_cov = (self.smoothed_state_cov[start:end, start:end]) return out
def partial_project(endog, exog): '''helper function to get linear projection or partialling out of variables endog variables are projected on exog variables Parameters ---------- endog : ndarray array of variables where the effect of exog is partialled out. exog : ndarray array of variables on which the endog variables are projected. Returns ------- res : instance of Bunch with - params : OLS parameter estimates from projection of endog on exog - fittedvalues : predicted values of endog given exog - resid : residual of the regression, values of endog with effect of exog partialled out Notes ----- This is no-frills mainly for internal calculations, no error checking or array conversion is performed, at least for now. ''' x1, x2 = endog, exog params = np.linalg.pinv(x2).dot(x1) predicted = x2.dot(params) residual = x1 - predicted res = Bunch(params=params, fittedvalues=predicted, resid=residual) return res
def x13_seasonal_decompose(self): """ 获取分析结果中调整后的数据、趋势循环数据、不规则数据、季节调整数据及日历调整数据。 Returns ---------- res : Bunch 一个包含以下元素的Bunch对象: - seasadj: pandas.Series 季节分解后最终因素调整完后的数据(即原始数据去除季节调整因素与日历调整因素) - trend: pandas.Series 季节分解后最终的趋势-循环部分 - irregular: pandas.Series 季节分解后最终的不规则部分 - seasonal: pandas.Series 最终的季节调整因素(离群点调整与移动假日效应等调整已在先验调整中处理,包含在季节调整因素中) - calendar: pandas.Series 或 None 若trading为True,calendar为最终的日历调整因素(主要是交易日调整);若trading为False,则为None。 """ seasadj, trend, irregular = self.analysis_res.seasadj, self.analysis_res.trend, self.analysis_res.irregular seasonal = self._seasonal_resolution() calendar = self._calendar_resolution() res = Bunch(seasadj=seasadj, trend=trend, irregular=irregular, seasonal=seasonal, calendar=calendar) return res
def _package_fixed_and_free_params_info(fixed_params, spec_ar_lags, spec_ma_lags): """ Parameters ---------- fixed_params : dict spec_ar_lags : list of int SARIMAXSpecification.ar_lags spec_ma_lags : list of int SARIMAXSpecification.ma_lags Returns ------- Bunch with (lags) fixed_ar_lags, fixed_ma_lags, free_ar_lags, free_ma_lags; (ix) fixed_ar_ix, fixed_ma_ix, free_ar_ix, free_ma_ix; (params) fixed_ar_params, free_ma_params """ # unpack fixed lags and params fixed_ar_lags_and_params = [] fixed_ma_lags_and_params = [] for key, val in fixed_params.items(): lag = int(key.split(".")[-1].lstrip("L")) if key.startswith("ar"): fixed_ar_lags_and_params.append((lag, val)) elif key.startswith("ma"): fixed_ma_lags_and_params.append((lag, val)) fixed_ar_lags_and_params.sort() fixed_ma_lags_and_params.sort() fixed_ar_lags = [lag for lag, _ in fixed_ar_lags_and_params] fixed_ar_params = np.array([val for _, val in fixed_ar_lags_and_params]) fixed_ma_lags = [lag for lag, _ in fixed_ma_lags_and_params] fixed_ma_params = np.array([val for _, val in fixed_ma_lags_and_params]) # unpack free lags free_ar_lags = [lag for lag in spec_ar_lags if lag not in set(fixed_ar_lags)] free_ma_lags = [lag for lag in spec_ma_lags if lag not in set(fixed_ma_lags)] # get ix for indexing purposes: `ar_ix`, and `ma_ix` below, are to account # for non-consecutive lags; for indexing purposes, must have dtype int free_ar_ix = np.array(free_ar_lags, dtype=int) - 1 free_ma_ix = np.array(free_ma_lags, dtype=int) - 1 fixed_ar_ix = np.array(fixed_ar_lags, dtype=int) - 1 fixed_ma_ix = np.array(fixed_ma_lags, dtype=int) - 1 return Bunch( # lags fixed_ar_lags=fixed_ar_lags, fixed_ma_lags=fixed_ma_lags, free_ar_lags=free_ar_lags, free_ma_lags=free_ma_lags, # ixs fixed_ar_ix=fixed_ar_ix, fixed_ma_ix=fixed_ma_ix, free_ar_ix=free_ar_ix, free_ma_ix=free_ma_ix, # fixed params fixed_ar_params=fixed_ar_params, fixed_ma_params=fixed_ma_params, )
def plot_data(request): lags, trend, seasonal = request.param[:3] nexog, period, missing, use_pandas, hold_back = request.param[3:] data = gen_data(250, nexog, use_pandas) return Bunch(trend=trend, lags=lags, seasonal=seasonal, period=period, endog=data.endog, exog=data.exog, missing=missing, hold_back=hold_back)
def get_sarimax_models(endog, filter_univariate=False, **kwargs): kwargs.setdefault('tolerance', 0) # Construct a concentrated version of the given SARIMAX model, and get # the estimate of the scale mod_conc = sarimax.SARIMAX(endog, **kwargs) mod_conc.ssm.filter_concentrated = True mod_conc.ssm.filter_univariate = filter_univariate params_conc = mod_conc.start_params params_conc[-1] = 1 res_conc = mod_conc.smooth(params_conc) scale = res_conc.scale # Construct the non-concentrated version mod_orig = sarimax.SARIMAX(endog, **kwargs) mod_orig.ssm.filter_univariate = filter_univariate params_orig = params_conc.copy() k_vars = 1 + kwargs.get('measurement_error', False) params_orig[-k_vars:] = scale * params_conc[-k_vars:] res_orig = mod_orig.smooth(params_orig) return Bunch( **{ 'mod_conc': mod_conc, 'params_conc': params_conc, 'mod_orig': mod_orig, 'params_orig': params_orig, 'res_conc': res_conc, 'res_orig': res_orig, 'scale': scale })
def __init__(self, model, params, filter_results, cov_type='opg', **kwargs): super(RecursiveLSResults, self).__init__( model, params, filter_results, cov_type, **kwargs) # Since we are overriding params with things that are not MLE params, # need to adjust df's q = max(self.loglikelihood_burn, self.k_diffuse_states) self.df_model = q - self.model.k_constraints self.df_resid = self.nobs_effective - self.df_model # Save _init_kwds self._init_kwds = self.model._get_init_kwds() # Save the model specification self.specification = Bunch(**{ 'k_exog': self.model.k_exog, 'k_constraints': self.model.k_constraints}) # Adjust results to remove "faux" endog from the constraints if self.model._r_matrix is not None: for name in ['forecasts', 'forecasts_error', 'forecasts_error_cov', 'standardized_forecasts_error', 'forecasts_error_diffuse_cov']: setattr(self, name, getattr(self, name)[0:1])
def perfect_fit_data(request): from statsmodels.tools.tools import Bunch rs = np.random.RandomState(1249328932) exog = rs.standard_normal((1000, 1)) endog = exog + exog**2 exog = sm.add_constant(np.c_[exog, exog**2]) return Bunch(endog=endog, exog=exog, const=(3.2 * np.ones_like(endog)))
def _fit_once(self): alpha, gamma, delta, damp = self._fixed_params[:4] initial = self.initial trend = self.trendtype season = self.seasontype nobs = self.nobs y = self.data.endog period = self.period # smoothed data sdata = np.zeros(nobs + 1) # + 1 for initial data # trend bdata = np.zeros(nobs + 1) # + 1 for initial data # seasonal cdata = np.zeros(nobs + period if period else nobs) # + period for initial data and forecasts # Setup seasonal values if period: sdata, bdata, cdata = _init_seasonal_params(initial, sdata, bdata, cdata, period, gamma, y, season) else: sdata, bdata = _init_nonseasonal_params(initial, sdata, bdata, y, gamma, trend) smooth_func = _compute_smoothing[(season, trend)] sdata, bdata, cdata = smooth_func(y, sdata, bdata, cdata, alpha, gamma, damp, period, delta, nobs) #Handles special case for Brown linear if trend.startswith('b'): at = 2 * sdata - bdata bt = alpha / (1 - alpha) * (sdata - bdata) sdata = at bdata = bt fitted_func = _compute_fitted[(season, trend)] pdata = fitted_func(sdata[:nobs], bdata[:nobs], cdata[:nobs], damp) # NOTE: could compute other residuals for the non-linear model resid = y - pdata # go ahead and save the first forecast _forecast_level = sdata[-1] _forecast_trend = bdata[-1] res = SmoothingResults(self, Bunch(fitted=pdata, resid=resid, _level=sdata, _trend=bdata, _season=cdata, trendtype=trend, seasontype=season, damp=damp, period=period, alpha=alpha, gamma=gamma, delta=delta, _forecast_level=_forecast_level, _forecast_trend=_forecast_trend)) return SmoothingResultsWrapper(res)
def fit(self, method='pinv'): """ Minimal implementation of WLS optimized for performance. Parameters ---------- method : str, optional Method to use to estimate parameters. "pinv", "qr" or "lstsq" * "pinv" uses the Moore-Penrose pseudoinverse to solve the least squares problem. * "qr" uses the QR factorization. * "lstsq" uses the least squares implementation in numpy.linalg Returns ------- results : namedtuple Named tuple containing the fewest terms needed to implement iterative estimation in models. Currently * params : Estimated parameters * fittedvalues : Fit values using original data * resid : Residuals using original data * model : namedtuple with one field, weights * scale : scale computed using weighted residuals Notes ----- Does not perform and checks on the input data See Also -------- statsmodels.regression.linear_model.WLS """ if method == 'pinv': pinv_wexog = np.linalg.pinv(self.wexog) params = pinv_wexog.dot(self.wendog) elif method == 'qr': Q, R = np.linalg.qr(self.wexog) params = np.linalg.solve(R, np.dot(Q.T, self.wendog)) else: params, _, _, _ = np.linalg.lstsq(self.wexog, self.wendog, rcond=-1) fitted_values = self.exog.dot(params) resid = self.endog - fitted_values wresid = self.wendog - self.wexog.dot(params) df_resid = self.wexog.shape[0] - self.wexog.shape[1] scale = np.dot(wresid, wresid) / df_resid return Bunch(params=params, fittedvalues=fitted_values, resid=resid, model=self, scale=scale)
def __init__(self, model, params, filter_results, cov_type='opg', **kwargs): super(RecursiveLSResults, self).__init__( model, params, filter_results, cov_type, **kwargs) self.df_resid = np.inf # attribute required for wald tests # Save _init_kwds self._init_kwds = self.model._get_init_kwds() # Save the model specification self.specification = Bunch(**{ 'k_exog': self.model.k_exog})
def recursive_coefficients(self): """ Estimates of regression coefficients, recursively estimated Returns ------- out: Bunch Has the following attributes: - `filtered`: a time series array with the filtered estimate of the component - `filtered_cov`: a time series array with the filtered estimate of the variance/covariance of the component - `smoothed`: a time series array with the smoothed estimate of the component - `smoothed_cov`: a time series array with the smoothed estimate of the variance/covariance of the component - `offset`: an integer giving the offset in the state vector where this component begins """ out = None spec = self.specification start = offset = 0 end = offset + spec.k_exog out = Bunch( filtered=self.filtered_state[start:end], filtered_cov=self.filtered_state_cov[start:end, start:end], smoothed=None, smoothed_cov=None, offset=offset ) if self.smoothed_state is not None: out.smoothed = self.smoothed_state[start:end] if self.smoothed_state_cov is not None: out.smoothed_cov = ( self.smoothed_state_cov[start:end, start:end]) return out
def test_all(self): # expand frequencies to observations, (no freq_weights yet) freq = [46, 76, 24, 9, 1] y = np.repeat(np.arange(5), freq) # results from article table 7 res1 = Bunch( params=[3.52636, 0.425617], llf=-187.469, chi2=1.701208, # chisquare test df_model=2, p=0.4272, # p-value for chi2 aic=378.938, probs=[46.48, 73.72, 27.88, 6.5, 1.42]) dp = DiscretizedCount(stats.gamma) mod = DiscretizedModel(y, distr=dp) res = mod.fit(start_params=[1, 1]) nobs = len(y) assert_allclose(res.params, res1.params, rtol=1e-5) assert_allclose(res.llf, res1.llf, atol=6e-3) assert_allclose(res.aic, res1.aic, atol=6e-3) assert_equal(res.df_model, res1.df_model) probs = mod.predict(res.params, which="probs") probs_trunc = probs[:len(res1.probs)] probs_trunc[-1] += 1 - probs_trunc.sum() assert_allclose(probs_trunc * nobs, res1.probs, atol=6e-2) assert_allclose(np.sum(freq), (probs_trunc * nobs).sum(), rtol=1e-10) res_chi2 = stats.chisquare(freq, probs_trunc * nobs, ddof=len(res.params)) # regression test, numbers from running test # close but not identical to article assert_allclose(res_chi2.statistic, 1.70409356, rtol=1e-7) assert_allclose(res_chi2.pvalue, 0.42654100, rtol=1e-7) # smoke test for summary res.summary() np.random.seed(987146) res_boots = res.bootstrap() # only loose check, small default n_rep=100, agreement at around 3% assert_allclose(res.params, res_boots[0], rtol=0.05) assert_allclose(res.bse, res_boots[1], rtol=0.05)
def __init__(self, model, params, filter_results, cov_type='opg', cov_kwds=None, **kwargs): super(VARMAXResults, self).__init__(model, params, filter_results, cov_type, cov_kwds, **kwargs) self.specification = Bunch( **{ # Set additional model parameters 'error_cov_type': self.model.error_cov_type, 'measurement_error': self.model.measurement_error, 'enforce_stationarity': self.model.enforce_stationarity, 'enforce_invertibility': self.model.enforce_invertibility, 'trend_offset': self.model.trend_offset, 'order': self.model.order, # Model order 'k_ar': self.model.k_ar, 'k_ma': self.model.k_ma, # Trend / Regression 'trend': self.model.trend, 'k_trend': self.model.k_trend, 'k_exog': self.model.k_exog, }) # Polynomials / coefficient matrices self.coefficient_matrices_var = None self.coefficient_matrices_vma = None if self.model.k_ar > 0: ar_params = np.array(self.params[self.model._params_ar]) k_endog = self.model.k_endog k_ar = self.model.k_ar self.coefficient_matrices_var = (ar_params.reshape( k_endog * k_ar, k_endog).T).reshape(k_endog, k_endog, k_ar).T if self.model.k_ma > 0: ma_params = np.array(self.params[self.model._params_ma]) k_endog = self.model.k_endog k_ma = self.model.k_ma self.coefficient_matrices_vma = (ma_params.reshape( k_endog * k_ma, k_endog).T).reshape(k_endog, k_endog, k_ma).T
def __init__(self, model, params, filter_results, cov_type='opg', **kwargs): super(RecursiveLSResults, self).__init__( model, params, filter_results, cov_type, **kwargs) # Since we are overriding params with things that aren't MLE params, # need to adjust df's q = max(self.loglikelihood_burn, self.k_diffuse_states) self.df_model = q - self.model.k_constraints self.df_resid = self.nobs_effective - self.df_model # Save _init_kwds self._init_kwds = self.model._get_init_kwds() # Save the model specification self.specification = Bunch(**{ 'k_exog': self.model.k_exog, 'k_constraints': self.model.k_constraints})
def results(self, params): """ Construct results params : ndarray Model parameters Notes ----- Allows results to be constructed from either existing parameters or when estimated using using ``fit`` """ fitted_values = self.exog.dot(params) resid = self.endog - fitted_values wresid = self.wendog - self.wexog.dot(params) df_resid = self.wexog.shape[0] - self.wexog.shape[1] scale = np.dot(wresid, wresid) / df_resid return Bunch(params=params, fittedvalues=fitted_values, resid=resid, model=self, scale=scale)
def test_acorr_breusch_godfrey_multidim(self): res = Bunch(resid=np.empty((100, 2))) with pytest.raises(ValueError, match='Model resid must be a 1d array'): smsdia.acorr_breusch_godfrey(res)
def _spg_optim(func, grad, start, project, maxiter=1e4, M=10, ctol=1e-3, maxiter_nmls=200, lam_min=1e-30, lam_max=1e30, sig1=0.1, sig2=0.9, gam=1e-4): """ Implements the spectral projected gradient method for minimizing a differentiable function on a convex domain. Parameters ---------- func : real valued function The objective function to be minimized. grad : real array-valued function The gradient of the objective function start : array_like The starting point project : function In-place projection of the argument to the domain of func. ... See notes regarding additional arguments Returns ------- rslt : Bunch rslt.params is the final iterate, other fields describe convergence status. Notes ----- This can be an effective heuristic algorithm for problems where no gauranteed algorithm for computing a global minimizer is known. There are a number of tuning parameters, but these generally should not be changed except for `maxiter` (positive integer) and `ctol` (small positive real). See the Birgin et al reference for more information about the tuning parameters. Reference --------- E. Birgin, J.M. Martinez, and M. Raydan. Spectral projected gradient methods: Review and perspectives. Journal of Statistical Software (preprint). Available at: http://www.ime.usp.br/~egbirgin/publications/bmr5.pdf """ lam = min(10 * lam_min, lam_max) params = start.copy() gval = grad(params) obj_hist = [ func(params), ] for itr in range(int(maxiter)): # Check convergence df = params - gval project(df) df -= params if np.max(np.abs(df)) < ctol: return Bunch( **{ "Converged": True, "params": params, "objective_values": obj_hist, "Message": "Converged successfully" }) # The line search direction d = params - lam * gval project(d) d -= params # Carry out the nonmonotone line search alpha, params1, fval, gval1 = _nmono_linesearch(func, grad, params, d, obj_hist, M=M, sig1=sig1, sig2=sig2, gam=gam, maxiter=maxiter_nmls) if alpha is None: return Bunch( **{ "Converged": False, "params": params, "objective_values": obj_hist, "Message": "Failed in nmono_linesearch" }) obj_hist.append(fval) s = params1 - params y = gval1 - gval sy = (s * y).sum() if sy <= 0: lam = lam_max else: ss = (s * s).sum() lam = max(lam_min, min(ss / sy, lam_max)) params = params1 gval = gval1 return Bunch( **{ "Converged": False, "params": params, "objective_values": obj_hist, "Message": "spg_optim did not converge" })
def innovations_mle(endog, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), demean=True, enforce_invertibility=True, start_params=None, minimize_kwargs=None): """ Estimate SARIMA parameters by MLE using innovations algorithm. Parameters ---------- endog : array_like Input time series array. order : tuple, optional The (p,d,q) order of the model for the number of AR parameters, differences, and MA parameters. Default is (0, 0, 0). seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal component of the model for the AR parameters, differences, MA parameters, and periodicity. Default is (0, 0, 0, 0). demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the SARIMA coefficients. Default is True. enforce_invertibility : bool, optional Whether or not to transform the MA parameters to enforce invertibility in the moving average component of the model. Default is True. start_params : array_like, optional Initial guess of the solution for the loglikelihood maximization. The AR polynomial must be stationary. If `enforce_invertibility=True` the MA poylnomial must be invertible. If not provided, default starting parameters are computed using the Hannan-Rissanen method. minimize_kwargs : dict, optional Arguments to pass to scipy.optimize.minimize. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes four components: `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; `minimize_kwargs`, containing any keyword arguments passed to `minimize`; `start_params`, containing the untransformed starting parameters passed to `minimize`; and `minimize_results`, containing the output from `minimize`. Notes ----- The primary reference is [1]_, section 5.2. Note: we do not include `enforce_stationarity` as an argument, because this function requires stationarity. TODO: support concentrating out the scale (should be easy: use sigma2=1 and then compute sigma2=np.sum(u**2 / v) / len(u); would then need to redo llf computation in the Cython function). TODO: add support for fixed parameters TODO: add support for secondary optimization that does not enforce stationarity / invertibility, starting from first step's parameters References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ spec = SARIMAXSpecification(endog, order=order, seasonal_order=seasonal_order, enforce_stationarity=True, enforce_invertibility=enforce_invertibility) endog = spec.endog if spec.is_integrated: warnings.warn('Provided `endog` series has been differenced to' ' eliminate integration prior to ARMA parameter' ' estimation.') endog = diff(endog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) if demean: endog = endog - endog.mean() p = SARIMAXParams(spec=spec) if start_params is None: sp = SARIMAXParams(spec=spec) # Estimate starting parameters via Hannan-Rissanen hr, hr_results = hannan_rissanen(endog, ar_order=spec.ar_order, ma_order=spec.ma_order, demean=False) if spec.seasonal_periods == 0: # If no seasonal component, then `hr` gives starting parameters sp.params = hr.params else: # If we do have a seasonal component, estimate starting parameters # for the seasonal lags using the residuals from the previous step _ = SARIMAXSpecification( endog, seasonal_order=seasonal_order, enforce_stationarity=True, enforce_invertibility=enforce_invertibility) ar_order = np.array(spec.seasonal_ar_lags) * spec.seasonal_periods ma_order = np.array(spec.seasonal_ma_lags) * spec.seasonal_periods seasonal_hr, seasonal_hr_results = hannan_rissanen( hr_results.resid, ar_order=ar_order, ma_order=ma_order, demean=False) # Set the starting parameters sp.ar_params = hr.ar_params sp.ma_params = hr.ma_params sp.seasonal_ar_params = seasonal_hr.ar_params sp.seasonal_ma_params = seasonal_hr.ma_params sp.sigma2 = seasonal_hr.sigma2 # Then, require starting parameters to be stationary and invertible if not sp.is_stationary: sp.ar_params = [0] * sp.k_ar_params sp.seasonal_ar_params = [0] * sp.k_seasonal_ar_params if not sp.is_invertible and spec.enforce_invertibility: sp.ma_params = [0] * sp.k_ma_params sp.seasonal_ma_params = [0] * sp.k_seasonal_ma_params start_params = sp.params else: sp = SARIMAXParams(spec=spec) sp.params = start_params if not sp.is_stationary: raise ValueError('Given starting parameters imply a non-stationary' ' AR process. Innovations algorithm requires a' ' stationary process.') if spec.enforce_invertibility and not sp.is_invertible: raise ValueError('Given starting parameters imply a non-invertible' ' MA process with `enforce_invertibility=True`.') def obj(params): p.params = spec.constrain_params(params) return -arma_innovations.arma_loglike( endog, ar_params=-p.reduced_ar_poly.coef[1:], ma_params=p.reduced_ma_poly.coef[1:], sigma2=p.sigma2) # Untransform the starting parameters unconstrained_start_params = spec.unconstrain_params(start_params) # Perform the minimization if minimize_kwargs is None: minimize_kwargs = {} if 'options' not in minimize_kwargs: minimize_kwargs['options'] = {} minimize_kwargs['options'].setdefault('maxiter', 100) minimize_results = minimize(obj, unconstrained_start_params, **minimize_kwargs) # TODO: show warning if convergence failed. # Reverse the transformation to get the optimal parameters p.params = spec.constrain_params(minimize_results.x) # Construct other results other_results = Bunch({ 'spec': spec, 'minimize_results': minimize_results, 'minimize_kwargs': minimize_kwargs, 'start_params': start_params }) return p, other_results
def innovations(endog, ma_order=0, demean=True): """ Estimate MA parameters using innovations algorithm. Parameters ---------- endog : array_like or SARIMAXSpecification Input time series array, assumed to be stationary. ma_order : int, optional Maximum moving average order. Default is 0. demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the moving average coefficients. Default is True. Returns ------- parameters : list of SARIMAXParams objects List elements correspond to estimates at different `ma_order`. For example, parameters[0] is an `SARIMAXParams` instance corresponding to `ma_order=0`. other_results : Bunch Includes one component, `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments. Notes ----- The primary reference is [1]_, section 5.1.3. This procedure assumes that the series is stationary. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ spec = max_spec = SARIMAXSpecification(endog, ma_order=ma_order) endog = max_spec.endog if demean: endog = endog - endog.mean() if not max_spec.is_ma_consecutive: raise ValueError('Innovations estimation unavailable for models with' ' seasonal or otherwise non-consecutive MA orders.') sample_acovf = acovf(endog, fft=True) theta, v = innovations_algo(sample_acovf, nobs=max_spec.ma_order + 1) ma_params = [theta[i, :i] for i in range(1, max_spec.ma_order + 1)] sigma2 = v out = [] for i in range(max_spec.ma_order + 1): spec = SARIMAXSpecification(ma_order=i) p = SARIMAXParams(spec=spec) if i == 0: p.params = sigma2[i] else: p.params = np.r_[ma_params[i - 1], sigma2[i]] out.append(p) # Construct other results other_results = Bunch({ 'spec': spec, }) return out, other_results
def arma_order_select_ic(y, max_ar=4, max_ma=2, ic='bic', trend='c', model_kw={}, fit_kw={}): """ Returns information criteria for many ARMA models Parameters ---------- y : array-like Time-series data max_ar : int Maximum number of AR lags to use. Default 4. max_ma : int Maximum number of MA lags to use. Default 2. ic : str, list Information criteria to report. Either a single string or a list of different criteria is possible. trend : str The trend to use when fitting the ARMA models. model_kw : dict Keyword arguments to be passed to the ``ARMA`` model fit_kw : dict Keyword arguments to be passed to ``ARMA.fit``. Returns ------- obj : Results object Each ic is an attribute with a DataFrame for the results. The AR order used is the row index. The ma order used is the column index. The minimum orders are available as ``ic_min_order``. Examples -------- >>> from statsmodels.tsa.arima_process import arma_generate_sample >>> import statsmodels.api as sm >>> import numpy as np >>> arparams = np.array([.75, -.25]) >>> maparams = np.array([.65, .35]) >>> arparams = np.r_[1, -arparams] >>> maparam = np.r_[1, maparams] >>> nobs = 250 >>> np.random.seed(2014) >>> y = arma_generate_sample(arparams, maparams, nobs) >>> res = sm.tsa.arma_order_select_ic(y, ic=['aic', 'bic'], trend='nc') >>> res.aic_min_order >>> res.bic_min_order Notes ----- This method can be used to tentatively identify the order of an ARMA from process, provided that the time series is stationary and invertible. This function computes the full exact MLE estimate of each model and can be, therefore a little slow. An implementation using approximate estimates will be provided in the future. In the meantime, consider passing {method : 'css'} to fit_kw. """ from pandas import DataFrame ar_range = lrange(0, max_ar + 1) ma_range = lrange(0, max_ma + 1) if isinstance(ic, string_types): ic = [ic] elif not isinstance(ic, (list, tuple)): raise ValueError("Need a list or a tuple for ic if not a string.") results = np.zeros((len(ic), max_ar + 1, max_ma + 1)) for ar in ar_range: for ma in ma_range: if ar == 0 and ma == 0 and trend == 'nc': results[:, ar, ma] = np.nan continue mod = _safe_arma_fit(y, (ar, ma), model_kw, trend, fit_kw) if mod is None: results[:, ar, ma] = np.nan continue for i, criteria in enumerate(ic): results[i, ar, ma] = getattr(mod, criteria) dfs = [DataFrame(res, columns=ma_range, index=ar_range) for res in results] res = dict(zip(ic, dfs)) # add the minimums to the results dict min_res = {} for i, result in iteritems(res): mins = np.where(result.min().min() == result) min_res.update({i + '_min_order' : (mins[0][0], mins[1][0])}) res.update(min_res) return Bunch(**res)
def statespace(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), include_constant=True, enforce_stationarity=True, enforce_invertibility=True, concentrate_scale=False, start_params=None, fit_kwargs=None): """ Estimate SARIMAX parameters using state space methods. Parameters ---------- endog : array_like Input time series array. order : tuple, optional The (p,d,q) order of the model for the number of AR parameters, differences, and MA parameters. Default is (0, 0, 0). seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal component of the model for the AR parameters, differences, MA parameters, and periodicity. Default is (0, 0, 0, 0). include_constant : bool, optional Whether to add a constant term in `exog` if it's not already there. The estimate of the constant will then appear as one of the `exog` parameters. If `exog` is None, then the constant will represent the mean of the process. enforce_stationarity : boolean, optional Whether or not to transform the AR parameters to enforce stationarity in the autoregressive component of the model. Default is True. enforce_invertibility : boolean, optional Whether or not to transform the MA parameters to enforce invertibility in the moving average component of the model. Default is True. concentrate_scale : boolean, optional Whether or not to concentrate the scale (variance of the error term) out of the likelihood. This reduces the number of parameters estimated by maximum likelihood by one. start_params : array_like, optional Initial guess of the solution for the loglikelihood maximization. The AR polynomial must be stationary. If `enforce_invertibility=True` the MA poylnomial must be invertible. If not provided, default starting parameters are computed using the Hannan-Rissanen method. fit_kwargs : dict, optional Arguments to pass to the state space model's `fit` method. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes two components, `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; and `state_space_results`, corresponding to the results from the underlying state space model and Kalman filter / smoother. Notes ----- The primary reference is [1]_. References ---------- .. [1] Durbin, James, and Siem Jan Koopman. 2012. Time Series Analysis by State Space Methods: Second Edition. Oxford University Press. """ # Handle including the constant (need to do it now so that the constant # parameter can be included in the specification as part of `exog`.) if include_constant: exog = np.ones_like(endog) if exog is None else add_constant(exog) # Create the specification spec = SARIMAXSpecification(endog, exog=exog, order=order, seasonal_order=seasonal_order, enforce_stationarity=enforce_stationarity, enforce_invertibility=enforce_invertibility, concentrate_scale=concentrate_scale) endog = spec.endog exog = spec.exog p = SARIMAXParams(spec=spec) # Check start parameters if start_params is not None: sp = SARIMAXParams(spec=spec) sp.params = start_params if spec.enforce_stationarity and not sp.is_stationary: raise ValueError('Given starting parameters imply a non-stationary' ' AR process with `enforce_stationarity=True`.') if spec.enforce_invertibility and not sp.is_invertible: raise ValueError('Given starting parameters imply a non-invertible' ' MA process with `enforce_invertibility=True`.') # Create and fit the state space model mod = SARIMAX(endog, exog=exog, order=spec.order, seasonal_order=spec.seasonal_order, enforce_stationarity=spec.enforce_stationarity, enforce_invertibility=spec.enforce_invertibility, concentrate_scale=spec.concentrate_scale) if fit_kwargs is None: fit_kwargs = {} fit_kwargs.setdefault('disp', 0) res_ss = mod.fit(start_params=start_params, **fit_kwargs) # Construct results p.params = res_ss.params res = Bunch({ 'spec': spec, 'statespace_results': res_ss, }) return p, res
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True, initial_ar_order=None, unbiased=None): """ Estimate ARMA parameters using Hannan-Rissanen procedure. Parameters ---------- endog : array_like Input time series array, assumed to be stationary. ar_order : int Autoregressive order ma_order : int Moving average order demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the ARMA coefficients. Default is True. initial_ar_order : int, optional Order of long autoregressive process used for initial computation of residuals. unbiased: bool, optional Whether or not to apply the bias correction step. Default is True if the estimated coefficients from the previous step imply a stationary and invertible process and False otherwise. Returns ------- parameters : SARIMAXParams object other_results : Bunch Includes three components: `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments; `initial_ar_order`, containing the autoregressive lag order used in the first step; and `resid`, which contains the computed residuals from the last step. Notes ----- The primary reference is [1]_, section 5.1.4, which describes a three-step procedure that we implement here. 1. Fit a large-order AR model via Yule-Walker to estimate residuals 2. Compute AR and MA estimates via least squares 3. (Unless the estimated coefficients from step (2) are non-stationary / non-invertible or `unbiased=False`) Perform bias correction The order used for the AR model in the first step may be given as an argument. If it is not, we compute it as suggested by [2]_. The estimate of the variance that we use is computed from the residuals of the least-squares regression and not from the innovations algorithm. This is because our fast implementation of the innovations algorithm is only valid for stationary processes, and the Hannan-Rissanen procedure may produce estimates that imply non-stationary processes. To avoid inconsistency, we never compute this latter variance here, even if it is possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for an example of how to compute this variance manually. This procedure assumes that the series is stationary, but if this is not true, it is still possible that this procedure will return parameters that imply a non-stationary / non-invertible process. Note that the third stage will only be applied if the parameters from the second stage imply a stationary / invertible model. If `unbiased=True` is given, then non-stationary / non-invertible parameters in the second stage will throw an exception. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. .. [2] Gomez, Victor, and Agustin Maravall. 2001. "Automatic Modeling Methods for Univariate Series." A Course in Time Series Analysis, 171–201. """ spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order) endog = spec.endog if demean: endog = endog - endog.mean() p = SARIMAXParams(spec=spec) nobs = len(endog) max_ar_order = spec.max_ar_order max_ma_order = spec.max_ma_order # Default initial_ar_order is as suggested by Gomez and Maravall (2001) if initial_ar_order is None: initial_ar_order = max( np.floor(np.log(nobs)**2).astype(int), 2 * max(max_ar_order, max_ma_order)) # Create a spec, just to validate the initial autoregressive order _ = SARIMAXSpecification(endog, ar_order=initial_ar_order) # Compute lagged endog # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags; # for indexing purposes, must have dtype int) ar_ix = np.array(spec.ar_lags, dtype=int) - 1 lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix] # If no AR or MA components, this is just a variance computation if max_ma_order == 0 and max_ar_order == 0: p.sigma2 = np.var(endog, ddof=0) resid = endog.copy() # If no MA component, this is just CSS elif max_ma_order == 0: mod = OLS(endog[max_ar_order:], lagged_endog) res = mod.fit() resid = res.resid p.ar_params = res.params p.sigma2 = res.scale # Otherwise ARMA model else: # Step 1: Compute long AR model via Yule-Walker, get residuals initial_ar_params, _ = yule_walker(endog, order=initial_ar_order, method='mle') X = lagmat(endog, initial_ar_order, trim='both') y = endog[initial_ar_order:] resid = y - X.dot(initial_ar_params) # Get lagged residuals for `exog` in least-squares regression ma_ix = np.array(spec.ma_lags, dtype=int) - 1 lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix] # Step 2: estimate ARMA model via least squares ix = initial_ar_order + max_ma_order - max_ar_order mod = OLS(endog[initial_ar_order + max_ma_order:], np.c_[lagged_endog[ix:], lagged_resid]) res = mod.fit() p.ar_params = res.params[:spec.k_ar_params] p.ma_params = res.params[spec.k_ar_params:] resid = res.resid p.sigma2 = res.scale # Step 3: bias correction (if requested) if unbiased is True or unbiased is None: if p.is_stationary and p.is_invertible: Z = np.zeros_like(endog) V = np.zeros_like(endog) W = np.zeros_like(endog) ar_coef = p.ar_poly.coef ma_coef = p.ma_poly.coef for t in range(nobs): if t >= max(max_ar_order, max_ma_order): # Note: in the case of non-consecutive lag orders, the # polynomials have the appropriate zeros so we don't # need to subset `endog[t - max_ar_order:t]` or # Z[t - max_ma_order:t] tmp_ar = np.dot(-ar_coef[1:], endog[t - max_ar_order:t][::-1]) tmp_ma = np.dot(ma_coef[1:], Z[t - max_ma_order:t][::-1]) Z[t] = endog[t] - tmp_ar - tmp_ma V = lfilter([1], ar_coef, Z) W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z) lagged_V = lagmat(V, max_ar_order, trim='both') lagged_W = lagmat(W, max_ma_order, trim='both') exog = np.c_[lagged_V[max(max_ma_order - max_ar_order, 0):, ar_ix], lagged_W[max(max_ar_order - max_ma_order, 0):, ma_ix]] mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog) res_unbias = mod_unbias.fit() p.ar_params = (p.ar_params + res_unbias.params[:spec.k_ar_params]) p.ma_params = (p.ma_params + res_unbias.params[spec.k_ar_params:]) # Recompute sigma2 resid = mod.endog - mod.exog.dot(np.r_[p.ar_params, p.ma_params]) p.sigma2 = np.inner(resid, resid) / len(resid) elif unbiased is True: raise ValueError('Cannot perform third step of Hannan-Rissanen' ' estimation to remove paramater bias,' ' because parameters estimated from the' ' second step are non-stationary or' ' non-invertible') # TODO: Gomez and Maravall (2001) or Gomez (1998) # propose one more step here to further improve MA estimates # Construct results other_results = Bunch({ 'spec': spec, 'initial_ar_order': initial_ar_order, 'resid': resid }) return p, other_results
def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0), include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8, arma_estimator='innovations_mle', arma_estimator_kwargs=None): """ Estimate ARMAX parameters by GLS. Parameters ---------- endog : array_like Input time series array. exog : array_like, optional Array of exogenous regressors. If not included, then `include_constant` must be True, and then `exog` will only include the constant column. order : tuple, optional The (p,d,q) order of the ARIMA model. Default is (0, 0, 0). seasonal_order : tuple, optional The (P,D,Q,s) order of the seasonal ARIMA model. Default is (0, 0, 0, 0). include_constant : bool, optional Whether to add a constant term in `exog` if it's not already there. The estimate of the constant will then appear as one of the `exog` parameters. If `exog` is None, then the constant will represent the mean of the process. Default is True if the specified model does not include integration and False otherwise. n_iter : int, optional Optionally iterate feasible GSL a specific number of times. Default is to iterate to convergence. If set, this argument overrides the `max_iter` and `tolerance` arguments. max_iter : int, optional Maximum number of feasible GLS iterations. Default is 50. If `n_iter` is set, it overrides this argument. tolerance : float, optional Tolerance for determining convergence of feasible GSL iterations. If `iter` is set, this argument has no effect. Default is 1e-8. arma_estimator : str, optional The estimator used for estimating the ARMA model. This option should not generally be used, unless the default method is failing or is otherwise unsuitable. Not all values will be valid, depending on the specified model orders (`order` and `seasonal_order`). Possible values are: * 'innovations_mle' - can be used with any specification * 'statespace' - can be used with any specification * 'hannan_rissanen' - can be used with any ARMA non-seasonal model * 'yule_walker' - only non-seasonal consecutive autoregressive (AR) models * 'burg' - only non-seasonal, consecutive autoregressive (AR) models * 'innovations' - only non-seasonal, consecutive moving average (MA) models. The default is 'innovations_mle'. arma_estimator_kwargs : dict, optional Arguments to pass to the ARMA estimator. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes eight components: `spec`, `params`, `converged`, `differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs', and `arma_results`. Notes ----- The primary reference is [1]_, section 6.6. In particular, the implementation follows the iterative procedure described in section 6.6.2. Construction of the transformed variables used to compute the GLS estimator described in section 6.6.1 is done via an application of the innovations algorithm (rather than explicit construction of the transformation matrix). Note that if the specified model includes integration, both the `endog` and `exog` series will be differenced prior to estimation and a warning will be issued to alert the user. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ # Handle n_iter if n_iter is not None: max_iter = n_iter tolerance = np.inf # Default for include_constant is True if there is no integration and # False otherwise integrated = order[1] > 0 or seasonal_order[1] > 0 if include_constant is None: include_constant = not integrated elif include_constant and integrated: raise ValueError('Cannot include a constant in an integrated model.') # Handle including the constant (need to do it now so that the constant # parameter can be included in the specification as part of `exog`.) if include_constant: exog = np.ones_like(endog) if exog is None else add_constant(exog) # Create the SARIMAX specification spec = SARIMAXSpecification(endog, exog=exog, order=order, seasonal_order=seasonal_order) endog = spec.endog exog = spec.exog # Handle integration if spec.is_integrated: # TODO: this is the approach suggested by BD (see Remark 1 in # section 6.6.2 and Example 6.6.3), but maybe there are some cases # where we don't want to force this behavior on the user? warnings.warn('Provided `endog` and `exog` series have been' ' differenced to eliminate integration prior to GLS' ' parameter estimation.') endog = diff(endog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) exog = diff(exog, k_diff=spec.diff, k_seasonal_diff=spec.seasonal_diff, seasonal_periods=spec.seasonal_periods) augmented = np.c_[endog, exog] # Validate arma_estimator spec.validate_estimator(arma_estimator) if arma_estimator_kwargs is None: arma_estimator_kwargs = {} # Step 1: OLS mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() exog_params = res_ols.params resid = res_ols.resid # 0th iteration parameters p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = np.zeros(spec.k_ar_params) if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params) if spec.max_ma_order > 0: p.ma_params = np.zeros(spec.k_ma_params) if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params) p.sigma2 = res_ols.scale ar_params = p.ar_params seasonal_ar_params = p.seasonal_ar_params ma_params = p.ma_params seasonal_ma_params = p.seasonal_ma_params sigma2 = p.sigma2 # Step 2 - 4: iterate feasible GLS to convergence arma_results = [None] differences = [None] parameters = [p] converged = False if n_iter is None else None i = 0 for i in range(1, max_iter + 1): prev = exog_params # Step 2: ARMA # TODO: allow estimator-specific kwargs? if arma_estimator == 'yule_walker': p_arma, res_arma = yule_walker(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'burg': p_arma, res_arma = burg(resid, ar_order=spec.ar_order, demean=False, **arma_estimator_kwargs) elif arma_estimator == 'innovations': out, res_arma = innovations(resid, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) p_arma = out[-1] elif arma_estimator == 'hannan_rissanen': p_arma, res_arma = hannan_rissanen(resid, ar_order=spec.ar_order, ma_order=spec.ma_order, demean=False, **arma_estimator_kwargs) else: # For later iterations, use a "warm start" for parameter estimates # (speeds up estimation and convergence) start_params = (None if i == 1 else np.r_[ar_params, ma_params, seasonal_ar_params, seasonal_ma_params, sigma2]) # Note: in each case, we do not pass in the order of integration # since we have already differenced the series tmp_order = (spec.order[0], 0, spec.order[2]) tmp_seasonal_order = (spec.seasonal_order[0], 0, spec.seasonal_order[2], spec.seasonal_order[3]) if arma_estimator == 'innovations_mle': p_arma, res_arma = innovations_mle( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, demean=False, start_params=start_params, **arma_estimator_kwargs) else: p_arma, res_arma = statespace( resid, order=tmp_order, seasonal_order=tmp_seasonal_order, include_constant=False, start_params=start_params, **arma_estimator_kwargs) ar_params = p_arma.ar_params seasonal_ar_params = p_arma.seasonal_ar_params ma_params = p_arma.ma_params seasonal_ma_params = p_arma.seasonal_ma_params sigma2 = p_arma.sigma2 arma_results.append(res_arma) # Step 3: GLS # Compute transformed variables that satisfy OLS assumptions # Note: In section 6.1.1 of Brockwell and Davis (2016), these # transformations are developed as computed by left multiplcation # by a matrix T. However, explicitly constructing T and then # performing the left-multiplications does not scale well when nobs is # large. Instead, we can retrieve the transformed variables as the # residuals of the innovations algorithm (the `normalize=True` # argument applies a Prais-Winsten-type normalization to the first few # observations to ensure homoskedasticity). Brockwell and Davis # mention that they also take this approach in practice. tmp, _ = arma_innovations.arma_innovations(augmented, ar_params=ar_params, ma_params=ma_params, normalize=True) u = tmp[:, 0] x = tmp[:, 1:] # OLS on transformed variables mod_gls = OLS(u, x) res_gls = mod_gls.fit() exog_params = res_gls.params resid = endog - np.dot(exog, exog_params) # Construct the parameter vector for the iteration p = SARIMAXParams(spec=spec) p.exog_params = exog_params if spec.max_ar_order > 0: p.ar_params = ar_params if spec.max_seasonal_ar_order > 0: p.seasonal_ar_params = seasonal_ar_params if spec.max_ma_order > 0: p.ma_params = ma_params if spec.max_seasonal_ma_order > 0: p.seasonal_ma_params = seasonal_ma_params p.sigma2 = sigma2 parameters.append(p) # Check for convergence difference = np.abs(exog_params - prev) differences.append(difference) if n_iter is None and np.all(difference < tolerance): converged = True break else: if n_iter is None: warnings.warn('Feasible GLS failed to converge in %d iterations.' ' Consider increasing the maximum number of' ' iterations using the `max_iter` argument or' ' reducing the required tolerance using the' ' `tolerance` argument.' % max_iter) # Construct final results p = parameters[-1] other_results = Bunch({ 'spec': spec, 'params': parameters, 'converged': converged, 'differences': differences, 'iterations': i, 'arma_estimator': arma_estimator, 'arma_estimator_kwargs': arma_estimator_kwargs, 'arma_results': arma_results, }) return p, other_results
def durbin_levinson(endog, ar_order=0, demean=True, adjusted=False): """ Estimate AR parameters at multiple orders using Durbin-Levinson recursions. Parameters ---------- endog : array_like or SARIMAXSpecification Input time series array, assumed to be stationary. ar_order : int, optional Autoregressive order. Default is 0. demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the autoregressive coefficients. Default is True. adjusted : bool, optional Whether to use the "adjusted" autocovariance estimator, which uses n - h degrees of freedom rather than n. This option can result in a non-positive definite autocovariance matrix. Default is False. Returns ------- parameters : list of SARIMAXParams objects List elements correspond to estimates at different `ar_order`. For example, parameters[0] is an `SARIMAXParams` instance corresponding to `ar_order=0`. other_results : Bunch Includes one component, `spec`, containing the `SARIMAXSpecification` instance corresponding to the input arguments. Notes ----- The primary reference is [1]_, section 2.5.1. This procedure assumes that the series is stationary. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ max_spec = SARIMAXSpecification(endog, ar_order=ar_order) endog = max_spec.endog # Make sure we have a consecutive process if not max_spec.is_ar_consecutive: raise ValueError('Durbin-Levinson estimation unavailable for models' ' with seasonal or otherwise non-consecutive AR' ' orders.') gamma = acovf(endog, adjusted=adjusted, fft=True, demean=demean, nlag=max_spec.ar_order) # If no AR component, just a variance computation if max_spec.ar_order == 0: ar_params = [None] sigma2 = [gamma[0]] # Otherwise, AR model else: Phi = np.zeros((max_spec.ar_order, max_spec.ar_order)) v = np.zeros(max_spec.ar_order + 1) Phi[0, 0] = gamma[1] / gamma[0] v[0] = gamma[0] v[1] = v[0] * (1 - Phi[0, 0]**2) for i in range(1, max_spec.ar_order): tmp = Phi[i - 1, :i] Phi[i, i] = (gamma[i + 1] - np.dot(tmp, gamma[i:0:-1])) / v[i] Phi[i, :i] = (tmp - Phi[i, i] * tmp[::-1]) v[i + 1] = v[i] * (1 - Phi[i, i]**2) ar_params = [None] + [Phi[i, :i + 1] for i in range(max_spec.ar_order)] sigma2 = v # Compute output out = [] for i in range(max_spec.ar_order + 1): spec = SARIMAXSpecification(ar_order=i) p = SARIMAXParams(spec=spec) if i == 0: p.params = sigma2[i] else: p.params = np.r_[ar_params[i], sigma2[i]] out.append(p) # Construct other results other_results = Bunch({ 'spec': spec, }) return out, other_results
import numpy as np from statsmodels.tools.tools import Bunch pls5 = Bunch() pls5.smooth = Bunch() pls5.smooth.term = 'times' pls5.smooth.bs_dim = 7 pls5.smooth.dim = 1 pls5.smooth.by = 'NA' pls5.smooth.label = 's(times)' pls5.smooth.sp = 1 pls5.smooth.BD = np.array([ -0.0322305472050642, 0.0332895629742452, -0.00907144581575865, 0.00386174436551668, -0.00624916066961505, 0.0181385348730838, 0.0292384327901831, -0.0717740723184547, 0.054914261809955, -0.0158383049768667, 0.00626042823599089, -0.0103543594891998, -0.011074996356557, 0.0526128870346165, -0.0930190975208449, 0.0595200902721069, -0.0135721686724522, 0.00600098849448633, 0.00625687187895293, -0.0145166841858048, 0.0594020303618183, -0.0946831790103269, 0.0511018949689336, -0.0114519440129956, -0.00994300967116444, 0.00619931821053046, -0.0156728054209229, 0.0550549724656574, -0.0669161912708059, 0.0271416199423184, 0.0177532485636497, -0.00581101171513275, 0.00344705658575325, -0.00791532311608746, 0.0293751974079486, -0.0294748398076931 ]).reshape(6, 6, order='F') pls5.smooth.xp = np.array([ 2.4, 11.2, 17.8, 24.8, 31.2, 41, 57.6
def x13_arima_select_order(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None, exog=None, log=None, outlier=True, trading=False, forecast_years=None, start=None, freq=None, print_stdout=False, x12path=None, prefer_x13=True): """ Perform automatic seaonal ARIMA order identification using x12/x13 ARIMA. Parameters ---------- endog : array-like, pandas.Series The series to model. It is best to use a pandas object with a DatetimeIndex or PeriodIndex. However, you can pass an array-like object. If your object does not have a dates index then ``start`` and ``freq`` are not optional. maxorder : tuple The maximum order of the regular and seasonal ARMA polynomials to examine during the model identification. The order for the regular polynomial must be greater than zero and no larger than 4. The order for the seaonal polynomial may be 1 or 2. maxdiff : tuple The maximum orders for regular and seasonal differencing in the automatic differencing procedure. Acceptable inputs for regular differencing are 1 and 2. The maximum order for seasonal differencing is 1. If ``diff`` is specified then ``maxdiff`` should be None. Otherwise, ``diff`` will be ignored. See also ``diff``. diff : tuple Fixes the orders of differencing for the regular and seasonal differencing. Regular differencing may be 0, 1, or 2. Seasonal differencing may be 0 or 1. ``maxdiff`` must be None, otherwise ``diff`` is ignored. exog : array-like Exogenous variables. log : bool or None If None, it is automatically determined whether to log the series or not. If False, logs are not taken. If True, logs are taken. outlier : bool Whether or not outliers are tested for and corrected, if detected. trading : bool Whether or not trading day effects are tested for. forecast_years : int Number of forecasts produced. The default is one year. start : str, datetime Must be given if ``endog`` does not have date information in its index. Anything accepted by pandas.DatetimeIndex for the start value. freq : str Must be givein if ``endog`` does not have date information in its index. Anything accapted by pandas.DatetimeIndex for the freq value. print_stdout : bool The stdout from X12/X13 is suppressed. To print it out, set this to True. Default is False. x12path : str or None The path to x12 or x13 binary. If None, the program will attempt to find x13as or x12a on the PATH or by looking at X13PATH or X12PATH depending on the value of prefer_x13. prefer_x13 : bool If True, will look for x13as first and will fallback to the X13PATH environmental variable. If False, will look for x12a first and will fallback to the X12PATH environmental variable. If x12path points to the path for the X12/X13 binary, it does nothing. Returns ------- results : Bunch A bunch object that has the following attributes: - order : tuple The regular order - sorder : tuple The seasonal order - include_mean : bool Whether to include a mean or not - results : str The full results from the X12/X13 analysis - stdout : str The captured stdout from the X12/X13 analysis Notes ----- This works by creating a specification file, writing it to a temporary directory, invoking X12/X13 in a subprocess, and reading the output back in. """ results = x13_arima_analysis(endog, x12path=x12path, exog=exog, log=log, outlier=outlier, trading=trading, forecast_years=forecast_years, maxorder=maxorder, maxdiff=maxdiff, diff=diff, start=start, freq=freq, prefer_x13=prefer_x13) model = re.search("(?<=Final automatic model choice : ).*", results.results) order = model.group() if re.search("Mean is not significant", results.results): include_mean = False elif re.search("Constant", results.results): include_mean = True else: include_mean = False order, sorder = _clean_order(order) res = Bunch(order=order, sorder=sorder, include_mean=include_mean, results=results.results, stdout=results.stdout) return res
-.00018443722054, -.03257408922788, -.00018443722054, .00205106413403, -.3943459697384, -.03257408922788, -.3943459697384, 140.50692606398]).reshape(3, 3) cov_dk4_stata = np.array([ .00018052657317, -.00035661054613, -.06728261073866, -.00035661054613, .0024312795189, -.32394785247278, -.06728261073866, -.32394785247278, 148.60456447156]).reshape(3, 3) results = Bunch( cov_clu_stata=cov_clu_stata, cov_pnw0_stata=cov_pnw0_stata, cov_pnw1_stata=cov_pnw1_stata, cov_pnw4_stata=cov_pnw4_stata, cov_dk0_stata=cov_dk0_stata, cov_dk1_stata=cov_dk1_stata, cov_dk4_stata=cov_dk4_stata )
def _window_ols(y, x, window=None, window_type=None, min_periods=None): """ Minimal replacement for pandas ols that provides the required features Parameters ---------- y : pd.Series Endogenous variable x : pd.DataFrame Exogenous variables, always adds a constant window: {None, int} window_type : {str, int} min_periods : {None, int} Returns ------- results : Bunch Bunch containing parameters (beta), R-squared (r2), nobs and residuals (resid) """ # Must return beta, r2, resid, nobs if window_type == FULL_SAMPLE: window_type = 'full_sample' elif window_type == ROLLING: window_type = 'rolling' elif window_type == EXPANDING: window_type = 'expanding' if window_type in ('rolling', 'expanding') and window is None: window = y.shape[0] min_periods = 1 if min_periods is None else min_periods window_type = 'full_sample' if window is None else window_type window_type = 'rolling' if window_type is None else window_type if window_type == 'rolling': min_periods = window if window_type not in ('full_sample', 'rolling', 'expanding'): raise ValueError('Unknown window_type') x = x.copy() x['intercept'] = 1.0 bunch = Bunch() if window_type == 'full_sample': missing = y.isnull() | x.isnull().any(1) y = y.loc[~missing] x = x.loc[~missing] res = OLS(y, x).fit() bunch['beta'] = res.params bunch['r2'] = res.rsquared bunch['nobs'] = res.nobs bunch['resid'] = res.resid return bunch index = y.index columns = x.columns n = y.shape[0] k = x.shape[1] beta = pd.DataFrame(np.zeros((n, k)), columns=columns, index=index) r2 = pd.Series(np.zeros(n), index=index) nobs = r2.copy().astype(np.int) resid = r2.copy() valid = r2.copy().astype(np.bool) if window_type == 'rolling': start = window else: start = min_periods for i in range(start, y.shape[0] + 1): # i is right edge, as in y[:i] for expanding if window_type == 'rolling': left = max(0, i - window) sel = slice(left, i) else: sel = slice(i) _y = y[sel] _x = x[sel] missing = _y.isnull() | _x.isnull().any(1) if missing.any(): if (~missing).sum() < min_periods: continue else: _y = _y.loc[~missing] _x = _x.loc[~missing] if _y.shape[0] <= _x.shape[1]: continue if window_type == 'expanding' and missing.values[-1]: continue res = OLS(_y, _x).fit() valid.iloc[i - 1] = True beta.iloc[i - 1] = res.params r2.iloc[i - 1] = res.rsquared nobs.iloc[i - 1] = int(res.nobs) resid.iloc[i - 1] = res.resid.iloc[-1] bunch['beta'] = beta.loc[valid] bunch['r2'] = r2.loc[valid] bunch['nobs'] = nobs.loc[valid] bunch['resid'] = resid.loc[valid] return bunch
def yule_walker(endog, ar_order=0, demean=True, unbiased=False): """ Estimate AR parameters using Yule-Walker equations. Parameters ---------- endog : array_like or SARIMAXSpecification Input time series array, assumed to be stationary. ar_order : int, optional Autoregressive order. Default is 0. demean : bool, optional Whether to estimate and remove the mean from the process prior to fitting the autoregressive coefficients. Default is True. unbiased : bool, optional Whether to use the "unbiased" autocovariance estimator, which uses n - h degrees of freedom rather than n. Note that despite the name, it is only truly unbiased if the process mean is known (rather than estimated) and for some processes it can result in a non-positive definite autocovariance matrix. Default is False. Returns ------- parameters : SARIMAXParams object Contains the parameter estimates from the final iteration. other_results : Bunch Includes one component, `spec`, which is the `SARIMAXSpecification` instance corresponding to the input arguments. Notes ----- The primary reference is [1]_, section 5.1.1. This procedure assumes that the series is stationary. For a description of the effect of the "unbiased" estimate of the autocovariance function, see 2.4.2 of [1]_. References ---------- .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. Introduction to Time Series and Forecasting. Springer. """ spec = SARIMAXSpecification(endog, ar_order=ar_order) endog = spec.endog p = SARIMAXParams(spec=spec) if not spec.is_ar_consecutive: raise ValueError('Yule-Walker estimation unavailable for models with' ' seasonal or non-consecutive AR orders.') # Estimate parameters method = 'unbiased' if unbiased else 'mle' p.ar_params, sigma = linear_model.yule_walker(endog, order=ar_order, demean=demean, method=method) p.sigma2 = sigma**2 # Construct other results other_results = Bunch({ 'spec': spec, }) return p, other_results
.85678082704544, 1.022847533226, 1.0930491685867, 1.0342184305191, 1.2070096731186, 1.2472279071808, 1.0886085033417, 1.3604420423508, 1.1053978204727, 2.0939025878906, 1.0898643732071, 1.3238569498062, 1.5171576738358, .77435439825058, 1.3360253572464, 1.5512014627457, 1.3569095134735, 1.4669530391693, 1.9312930107117, 1.52878677845, 2.3952746391296, .80755305290222, -.2365039139986, .85178333520889, 1.1858888864517 ]) icstats = np.array( [202, np.nan, -240.21658671417, 4, 488.43317342834, 501.66624421795]) results = Bunch(llf=llf, nobs=nobs, k=k, k_exog=k_exog, sigma=sigma, chi2=chi2, df_model=df_model, k_ar=k_ar, k_ma=k_ma, params=params, cov_params=cov_params, xb=xb, y=y, resid=resid, yr=yr, mse=mse, stdp=stdp, icstats=icstats)
import numpy as np from statsmodels.tools.tools import Bunch epanechnikov_hsheather_q75 = Bunch() epanechnikov_hsheather_q75.table = np.array( [[.6440143, .0122001, 52.79, 0.000, .6199777, .6680508], [62.39648, 13.5509, 4.60, 0.000, 35.69854, 89.09443]]) epanechnikov_hsheather_q75.psrsquared = 0.6966 epanechnikov_hsheather_q75.rank = 2 epanechnikov_hsheather_q75.sparsity = 223.784434936344 epanechnikov_hsheather_q75.bwidth = .1090401129546568 # epanechnikov_hsheather_q75.kbwidth = 59.62067927472172 # Stata 12 results epanechnikov_hsheather_q75.kbwidth = 59.30 # TODO: why do we need lower tol? epanechnikov_hsheather_q75.df_m = 1 epanechnikov_hsheather_q75.df_r = 233 epanechnikov_hsheather_q75.f_r = .0044685860313942 epanechnikov_hsheather_q75.N = 235 epanechnikov_hsheather_q75.q_v = 745.2352905273438 epanechnikov_hsheather_q75.q = .75 epanechnikov_hsheather_q75.sum_rdev = 43036.06956481934 epanechnikov_hsheather_q75.sum_adev = 13058.50008841318 epanechnikov_hsheather_q75.convcode = 0 biweight_bofinger = Bunch() biweight_bofinger.table = np.array( [[.5601805, .0136491, 41.04, 0.000, .533289, .5870719], [81.48233, 15.1604, 5.37, 0.000, 51.61335, 111.3513]]) biweight_bofinger.psrsquared = 0.6206 biweight_bofinger.rank = 2 biweight_bofinger.sparsity = 216.8218989750115