def _fit_start_params_hr(self, order): """ Get starting parameters for fit. Parameters ---------- order : iterable (p,q,k) - AR lags, MA lags, and number of exogenous variables including the constant. Returns ------- start_params : array A first guess at the starting parameters. Notes ----- If necessary, fits an AR process with the laglength selected according to best BIC. Obtain the residuals. Then fit an ARMA(p,q) model via OLS using these residuals for a first approximation. Uses a separate OLS regression to find the coefficients of exogenous variables. References ---------- Hannan, E.J. and Rissanen, J. 1982. "Recursive estimation of mixed autoregressive-moving average order." `Biometrika`. 69.1. """ p,q,k = order start_params = zeros((p+q+k)) endog = self.endog.copy() # copy because overwritten exog = self.exog if k != 0: ols_params = GLS(endog, exog).fit().params start_params[:k] = ols_params endog -= np.dot(exog, ols_params).squeeze() if q != 0: if p != 0: armod = AR(endog).fit(ic='bic', trend='nc') arcoefs_tmp = armod.params p_tmp = armod.k_ar resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp, trim='both'), arcoefs_tmp) if p < p_tmp + q: endog_start = p_tmp + q - p resid_start = 0 else: endog_start = 0 resid_start = p - p_tmp - q lag_endog = lagmat(endog, p, 'both')[endog_start:] lag_resid = lagmat(resid, q, 'both')[resid_start:] # stack ar lags and resids X = np.column_stack((lag_endog, lag_resid)) coefs = GLS(endog[max(p_tmp+q,p):], X).fit().params start_params[k:k+p+q] = coefs else: start_params[k+p:k+p+q] = yule_walker(endog, order=q)[0] if q==0 and p != 0: arcoefs = yule_walker(endog, order=p)[0] start_params[k:k+p] = arcoefs return start_params
def _fit_start_params_hr(self, order): """ Get starting parameters for fit. Parameters ---------- order : iterable (p,q,k) - AR lags, MA lags, and number of exogenous variables including the constant. Returns ------- start_params : array A first guess at the starting parameters. Notes ----- If necessary, fits an AR process with the laglength selected according to best BIC. Obtain the residuals. Then fit an ARMA(p,q) model via OLS using these residuals for a first approximation. Uses a separate OLS regression to find the coefficients of exogenous variables. References ---------- Hannan, E.J. and Rissanen, J. 1982. "Recursive estimation of mixed autoregressive-moving average order." `Biometrika`. 69.1. """ p, q, k = order start_params = zeros((p + q + k)) endog = self.endog.copy() # copy because overwritten exog = self.exog if k != 0: ols_params = GLS(endog, exog).fit().params start_params[:k] = ols_params endog -= np.dot(exog, ols_params).squeeze() if q != 0: if p != 0: armod = AR(endog).fit(ic='bic', trend='nc') arcoefs_tmp = armod.params p_tmp = armod.k_ar resid = endog[p_tmp:] - np.dot( lagmat(endog, p_tmp, trim='both'), arcoefs_tmp) X = np.column_stack( (lagmat(endog, p, 'both')[p_tmp + (q - p):], lagmat(resid, q, 'both'))) # stack ar lags and resids coefs = GLS(endog[p_tmp + q:], X).fit().params start_params[k:k + p + q] = coefs else: start_params[k + p:k + p + q] = yule_walker(endog, order=q)[0] if q == 0 and p != 0: arcoefs = yule_walker(endog, order=p)[0] start_params[k:k + p] = arcoefs return start_params
def _stackX(self, k_ar, trend): """ Private method to build the RHS matrix for estimation. Columns are trend terms then lags. """ endog = self.endog X = lagmat(endog, maxlag=k_ar, trim='both') k_trend = util.get_trendorder(trend) if k_trend: X = add_trend(X, prepend=True, trend=trend) self.k_trend = k_trend return X
def fit(self, nlags): """estimate parameters using ols Parameters ---------- nlags : integer number of lags to include in regression, same for all variables Returns ------- None, but attaches arhat : array (nlags, nvar, nvar) full lag polynomial array arlhs : array (nlags-1, nvar, nvar) reduced lag polynomial for left hand side other statistics as returned by linalg.lstsq : need to be completed This currently assumes all parameters are estimated without restrictions. In this case SUR is identical to OLS estimation results are attached to the class instance """ self.nlags = nlags # without current period nvars = self.nvars # TODO: ar2s looks like a module variable, bug? # lmat = lagmat(ar2s, nlags, trim='both', original='in') lmat = lagmat(self.y, nlags, trim="both", original="in") self.yred = lmat[:, :nvars] self.xred = lmat[:, nvars:] res = np.linalg.lstsq(self.xred, self.yred) self.estresults = res self.arlhs = res[0].reshape(nlags, nvars, nvars) self.arhat = ar2full(self.arlhs) self.rss = res[1] self.xredrank = res[2]
def fit(self, nlags): '''estimate parameters using ols Parameters ---------- nlags : integer number of lags to include in regression, same for all variables Returns ------- None, but attaches arhat : array (nlags, nvar, nvar) full lag polynomial array arlhs : array (nlags-1, nvar, nvar) reduced lag polynomial for left hand side other statistics as returned by linalg.lstsq : need to be completed This currently assumes all parameters are estimated without restrictions. In this case SUR is identical to OLS estimation results are attached to the class instance ''' self.nlags = nlags # without current period nvars = self.nvars #TODO: ar2s looks like a module variable, bug? #lmat = lagmat(ar2s, nlags, trim='both', original='in') lmat = lagmat(self.y, nlags, trim='both', original='in') self.yred = lmat[:, :nvars] self.xred = lmat[:, nvars:] res = np.linalg.lstsq(self.xred, self.yred) self.estresults = res self.arlhs = res[0].reshape(nlags, nvars, nvars) self.arhat = ar2full(self.arlhs) self.rss = res[1] self.xredrank = res[2]
def _stackX(self, k_ar, trend): """ Private method to build the RHS matrix for estimation. Columns are trend terms, then exogenous, then lags. """ endog = self.endog exog = self.exog X = lagmat(endog, maxlag=k_ar, trim='both') if exog is not None: X = np.column_stack((exog[k_ar:, :], X)) # Handle trend terms if trend == 'c': k_trend = 1 elif trend == 'nc': k_trend = 0 elif trend == 'ct': k_trend = 2 elif trend == 'ctt': k_trend = 3 if trend != 'nc': X = add_trend(X, prepend=True, trend=trend) self.k_trend = k_trend return X
def _stackX(self, k_ar, trend): """ Private method to build the RHS matrix for estimation. Columns are trend terms, then exogenous, then lags. """ endog = self.endog exog = self.exog X = lagmat(endog, maxlag=k_ar, trim='both') if exog is not None: X = np.column_stack((exog[k_ar:,:], X)) # Handle trend terms if trend == 'c': k_trend = 1 elif trend == 'nc': k_trend = 0 elif trend == 'ct': k_trend = 2 elif trend == 'ctt': k_trend = 3 if trend != 'nc': X = add_trend(X,prepend=True, trend=trend) self.k_trend = k_trend return X
a22 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.8, 0.0], [0.1, -0.8]]]) a23 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.8, 0.2], [0.1, -0.6]]]) a24 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.6, 0.0], [0.2, -0.6]], [[-0.1, 0.0], [0.1, -0.1]]]) a31 = np.r_[np.eye(3)[None, :, :], 0.8 * np.eye(3)[None, :, :]] a32 = np.array( [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], [[0.8, 0.0, 0.0], [0.1, 0.6, 0.0], [0.0, 0.0, 0.9]]] ) ######## ut = np.random.randn(1000, 2) ar2s = vargenerate(a22, ut) # res = np.linalg.lstsq(lagmat(ar2s,1)[:,1:], ar2s) res = np.linalg.lstsq(lagmat(ar2s, 1), ar2s) bhat = res[0].reshape(1, 2, 2) arhat = ar2full(bhat) # print maxabs(arhat - a22) v = Var(ar2s) v.fit(1) v.forecast() v.forecast(25)[-30:] ar23 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.6, 0.0], [0.2, -0.6]], [[-0.1, 0.0], [0.1, -0.1]]]) ma22 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[0.4, 0.0], [0.2, 0.3]]]) ar23ns = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-1.9, 0.0], [0.4, -0.6]], [[0.3, 0.0], [0.1, -0.1]]])
def acorr_lm(x, maxlag=None, autolag='AIC', store=False): '''Lagrange Multiplier tests for autocorrelation not checked yet, copied from unitrood_adf with adjustments check array shapes because of the addition of the constant. written/copied without reference This is not Breush-Godfrey. BG adds lags of residual to exog in the design matrix for the auxiliary regression with residuals as endog, see Greene 12.7.1. Notes ----- If x is calculated as y^2 for a time series y, then this test corresponds to the Engel test for autoregressive conditional heteroscedasticity (ARCH). TODO: get details and verify ''' x = np.asarray(x) nobs = x.shape[0] if maxlag is None: #for adf from Greene referencing Schwert 1989 maxlag = 12. * np.power( nobs / 100., 1 / 4.) #nobs//4 #TODO: check default, or do AIC/BIC xdiff = np.diff(x) # xdall = lagmat(x[:-1, None], maxlag, trim='both') nobs = xdall.shape[0] xdall = np.c_[np.ones((nobs, 1)), xdall] xshort = x[-nobs:] if store: resstore = ResultsStore() if autolag: #search for lag length with highest information criteria #Note: I use the same number of observations to have comparable IC results = {} for mlag in range(1, maxlag): results[mlag] = sm.OLS(xshort, xdall[:, :mlag + 1]).fit() if autolag.lower() == 'aic': bestic, icbestlag = max((v.aic, k) for k, v in results.iteritems()) elif autolag.lower() == 'bic': icbest, icbestlag = max((v.bic, k) for k, v in results.iteritems()) else: raise ValueError("autolag can only be None, 'AIC' or 'BIC'") #rerun ols with best ic xdall = lagmat(x[:, None], icbestlag, trim='forward') nobs = xdall.shape[0] xdall = np.c_[np.ones((nobs, 1)), xdall] xshort = x[-nobs:] usedlag = icbestlag else: usedlag = maxlag resols = sm.OLS(xshort, xdall[:, :usedlag + 1]).fit() fval = resols.fvalue fpval = resols.f_pvalue lm = nobs * resols.rsquared lmpval = stats.chi2.sf(lm, usedlag) # Note: degrees of freedom for LM test is nvars minus constant = usedlags return fval, fpval, lm, lmpval if store: resstore.resols = resols resstore.usedlag = usedlag return fval, fpval, lm, lmpval, resstore else: return fval, fpval, lm, lmpval
def acorr_lm(x, maxlag=None, autolag='AIC', store=False): '''Lagrange Multiplier tests for autocorrelation not checked yet, copied from unitrood_adf with adjustments check array shapes because of the addition of the constant. written/copied without reference This is not Breush-Godfrey. BG adds lags of residual to exog in the design matrix for the auxiliary regression with residuals as endog, see Greene 12.7.1. Notes ----- If x is calculated as y^2 for a time series y, then this test corresponds to the Engel test for autoregressive conditional heteroscedasticity (ARCH). TODO: get details and verify ''' x = np.asarray(x) nobs = x.shape[0] if maxlag is None: #for adf from Greene referencing Schwert 1989 maxlag = 12. * np.power(nobs/100., 1/4.)#nobs//4 #TODO: check default, or do AIC/BIC xdiff = np.diff(x) # xdall = lagmat(x[:-1,None], maxlag, trim='both') nobs = xdall.shape[0] xdall = np.c_[np.ones((nobs,1)), xdall] xshort = x[-nobs:] if store: resstore = ResultsStore() if autolag: #search for lag length with highest information criteria #Note: I use the same number of observations to have comparable IC results = {} for mlag in range(1,maxlag): results[mlag] = sm.OLS(xshort, xdall[:,:mlag+1]).fit() if autolag.lower() == 'aic': bestic, icbestlag = max((v.aic,k) for k,v in results.iteritems()) elif autolag.lower() == 'bic': icbest, icbestlag = max((v.bic,k) for k,v in results.iteritems()) else: raise ValueError("autolag can only be None, 'AIC' or 'BIC'") #rerun ols with best ic xdall = lagmat(x[:,None], icbestlag, trim='forward') nobs = xdall.shape[0] xdall = np.c_[np.ones((nobs,1)), xdall] xshort = x[-nobs:] usedlag = icbestlag else: usedlag = maxlag resols = sm.OLS(xshort, xdall[:,:usedlag+1]).fit() fval = resols.fvalue fpval = resols.f_pvalue lm = nobs * resols.rsquared lmpval = stats.chi2.sf(lm, usedlag) # Note: degrees of freedom for LM test is nvars minus constant = usedlags return fval, fpval, lm, lmpval if store: resstore.resols = resols resstore.usedlag = usedlag return fval, fpval, lm, lmpval, resstore else: return fval, fpval, lm, lmpval
a22 = np.array([[[1., 0.], [0., 1.]], [[-0.8, 0.], [0.1, -0.8]]]) a23 = np.array([[[1., 0.], [0., 1.]], [[-0.8, 0.2], [0.1, -0.6]]]) a24 = np.array([[[1., 0.], [0., 1.]], [[-0.6, 0.], [0.2, -0.6]], [[-0.1, 0.], [0.1, -0.1]]]) a31 = np.r_[np.eye(3)[None, :, :], 0.8 * np.eye(3)[None, :, :]] a32 = np.array([[[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], [[0.8, 0., 0.], [0.1, 0.6, 0.], [0., 0., 0.9]]]) ######## ut = np.random.randn(1000, 2) ar2s = vargenerate(a22, ut) #res = np.linalg.lstsq(lagmat(ar2s,1)[:,1:], ar2s) res = np.linalg.lstsq(lagmat(ar2s, 1), ar2s) bhat = res[0].reshape(1, 2, 2) arhat = ar2full(bhat) #print maxabs(arhat - a22) v = Var(ar2s) v.fit(1) v.forecast() v.forecast(25)[-30:] ar23 = np.array([[[1., 0.], [0., 1.]], [[-0.6, 0.], [0.2, -0.6]], [[-0.1, 0.], [0.1, -0.1]]]) ma22 = np.array([[[1., 0.], [0., 1.]], [[0.4, 0.], [0.2, 0.3]]]) ar23ns = np.array([[[1., 0.], [0., 1.]], [[-1.9, 0.], [0.4, -0.6]],