def pacf_ols(x, nlags=40): '''Calculate partial autocorrelations Parameters ---------- x : 1d array observations of time series for which pacf is calculated nlags : int Number of lags for which pacf is returned. Lag 0 is not returned. Returns ------- pacf : 1d array partial autocorrelations, maxlag+1 elements Notes ----- This solves a separate OLS estimation for each desired lag. ''' #TODO: add warnings for Yule-Walker #NOTE: demeaning and not using a constant gave incorrect answers? #JP: demeaning should have a better estimate of the constant #maybe we can compare small sample properties with a MonteCarlo xlags = lagmat(x, nlags) x0 = xlags[:,0] xlags = xlags[:,1:] #xlags = sm.add_constant(lagmat(x, nlags), prepend=True) xlags = sm.add_constant(xlags, prepend=True) pacf = [1.] for k in range(1, nlags+1): res = sm.OLS(x0[k:], xlags[k:,:k+1]).fit() #np.take(xlags[k:], range(1,k+1)+[-1], pacf.append(res.params[-1]) return np.array(pacf)
def acorr_lm(x, maxlag=None, autolag='AIC', store=False): '''Lagrange Multiplier tests for autocorrelation not checked yet, copied from unitrood_adf with adjustments check array shapes because of the addition of the constant. written/copied without reference This is not Breush-Godfrey. BG adds lags of residual to exog in the design matrix for the auxiliary regression with residuals as endog, see Greene 12.7.1. Notes ----- If x is calculated as y^2 for a time series y, then this test corresponds to the Engel test for autoregressive conditional heteroscedasticity (ARCH). TODO: get details and verify ''' x = np.asarray(x) nobs = x.shape[0] if maxlag is None: #for adf from Greene referencing Schwert 1989 maxlag = 12. * np.power(nobs/100., 1/4.)#nobs//4 #TODO: check default, or do AIC/BIC xdiff = np.diff(x) # xdall = lagmat(x[:-1,None], maxlag, trim='both') nobs = xdall.shape[0] xdall = np.c_[np.ones((nobs,1)), xdall] xshort = x[-nobs:] if store: resstore = ResultsStore() if autolag: #search for lag length with highest information criteria #Note: I use the same number of observations to have comparable IC results = {} for mlag in range(1,maxlag): results[mlag] = sm.OLS(xshort, xdall[:,:mlag+1]).fit() if autolag.lower() == 'aic': bestic, icbestlag = max((v.aic,k) for k,v in results.iteritems()) elif autolag.lower() == 'bic': icbest, icbestlag = max((v.bic,k) for k,v in results.iteritems()) else: raise ValueError("autolag can only be None, 'AIC' or 'BIC'") #rerun ols with best ic xdall = lagmat(x[:,None], icbestlag, trim='forward') nobs = xdall.shape[0] xdall = np.c_[np.ones((nobs,1)), xdall] xshort = x[-nobs:] usedlag = icbestlag else: usedlag = maxlag resols = sm.OLS(xshort, xdall[:,:usedlag+1]).fit() fval = resols.fvalue fpval = resols.f_pvalue lm = nobs * resols.rsquared lmpval = stats.chi2.sf(lm, usedlag) # Note: degrees of freedom for LM test is nvars minus constant = usedlags return fval, fpval, lm, lmpval if store: resstore.resols = resols resstore.usedlag = usedlag return fval, fpval, lm, lmpval, resstore else: return fval, fpval, lm, lmpval
def fit(self, maxlag=None, method='ols', ic=None, trend='c', demean=True, penalty=False, start_params=None, solver=None, maxiter=35, full_output=1, disp=1, callback=None, **kwargs): """ Fit the unconditional maximum likelihood of an AR(p) process. Parameters ---------- start_params : array-like, optional A first guess on the parameters. Defaults is a vector of zeros. method : str {'ols', 'yw'. 'mle', 'umle'}, optional ols - Ordinary Leasy Squares yw - Yule-Walker mle - conditional maximum likelihood umle - unconditional maximum likelihood solver : str or None, optional Unconstrained solvers: Default is 'bfgs', 'newton' (newton-raphson), 'ncg' (Note that previous 3 are not recommended at the moment.) and 'powell' Constrained solvers: 'bfgs-b', 'tnc' See notes. maxiter : int, optional The maximum number of function evaluations. Default is 35. tol = float The convergence tolerance. Default is 1e-08. penalty : bool Whether or not to use a penalty function. Default is False, though this is ignored at the moment and the penalty is always used if appropriate. See notes. Notes ----- The unconstrained solvers use a quadratic penalty (regardless if penalty kwd is True or False) in order to ensure that the solution stays within (-1,1). The constrained solvers default to using a bound of (-.999,.999). See also -------- scikits.statsmodels.model.LikelihoodModel.fit for more information on using the solvers. The below is the docstring from scikits.statsmodels.LikelihoodModel.fit """ self.penalty = penalty method = method.lower() nobs = self.nobs if maxlag is None: maxlag = round(12*(nobs/100.)**(1/4.)) avobs = nobs - maxlag self.avobs = avobs laglen = maxlag self.laglen = laglen if demean: endog = self.endog.copy() # have to copy if demeaning mean = endog.mean() endog -= mean self.endog_mean = mean else: endog = self.endog # LHS Y = endog[laglen:,:] # make lagged RHS X = lagmat(endog, maxlag=laglen, trim='both')[:,1:] if self.exog is not None: X = np.column_stack((self.exog[laglen:,:], X)) # Handle constant, etc. if trend == 'c': trendorder = 1 elif trend == 'nc': trendorder = 0 elif trend == 'ct': trendorder = 2 elif trend == 'ctt': trendorder = 3 X = add_trend(X,prepend=True, trend=trend) self.trendorder = trendorder self.Y = Y self.X = X if solver: solver = solver.lower() #TODO: allow user-specified penalty function # if penalty and method not in ['bfgs_b','tnc','cobyla','slsqp']: # minfunc = lambda params : -self.loglike(params) - \ # self.penfunc(params) # else: if method == "mle": if not solver: # make default? solver = 'newton' if not start_params: start_params = np.zeros((X.shape[1])) if solver in ['newton', 'bfgs', 'ncg']: return super(AR, self).fit(start_params=start_params, method=solver, maxiter=maxiter, full_output=full_output, disp=disp, callback=callback, **kwargs) # return retvals elif method == "umle": #TODO: move this stuff up to LikelihoodModel.fit minfunc = lambda params: -self.loglike(params) bounds = [(-.999,.999)] # assume stationarity if start_params == None: start_params = np.array([0]) # assumes AR(1) if method == 'bfgs-b': retval = optimize.fmin_l_bfgs_b(minfunc, start_params, approx_grad=True, bounds=bounds) self.params, self.llf = retval[0:2] if method == 'tnc': retval = optimize.fmin_tnc(minfunc, start_params, approx_grad=True, bounds = bounds) self.params = retval[0] if method == 'powell': retval = optimize.fmin_powell(minfunc,start_params) self.params = retval[None] #TODO: write regression tests for Pauli's branch so that # new line_search and optimize.nonlin can get put in. # http://projects.scipy.org/scipy/ticket/791 # if method == 'broyden': # retval = optimize.broyden2(minfunc, [.5], verbose=True) # self.results = retvar elif method == "ols": arfit = OLS(Y,X).fit() params = arfit.params omega = None self.params = params elif method == "yw": params, omega = sm.regression.yule_walker(endog, order=maxlag, method="mle", demean=False) self.params = params
def adfuller(x, maxlag=None, regression="c", autolag='AIC', store=False, regresults=False): '''Augmented Dickey-Fuller unit root test The Augmented Dickey-Fuller test can be used to test for a unit root in a univariate process in the presence of serial correlation. Parameters ---------- x : array_like, 1d data series maxlag : int Maximum lag which is included in test, default 12*(nobs/100)^{1/4} regression : str {'c','ct','ctt','nc'} Constant and trend order to include in regression * 'c' : constant only * 'ct' : constant and trend * 'ctt' : constant, and linear and quadratic trend * 'nc' : no constant, no trend autolag : {'AIC', 'BIC', 't-stat', None} * if None, then maxlag lags are used * if 'AIC' or 'BIC', then the number of lags is chosen to minimize the corresponding information criterium * 't-stat' based choice of maxlag. Starts with maxlag and drops a lag until the t-statistic on the last lag length is significant at the 95 % level. store : bool If True, then a result instance is returned additionally to the adf statistic regresults : bool If True, the full regression results are returned. Returns ------- adf : float Test statistic pvalue : float MacKinnon's approximate p-value based on MacKinnon (1994) usedlag : int Number of lags used. nobs : int Number of observations used for the ADF regression and calculation of the critical values. critical values : dict Critical values for the test statistic at the 1 %, 5 %, and 10 % levels. Based on MacKinnon (2010) icbest : float The maximized information criterion if autolag is not None. regresults : RegressionResults instance The resstore : (optional) instance of ResultStore an instance of a dummy class with results attached as attributes Notes ----- If the p-value is close to significant, then the critical values should be used to judge whether to accept or reject the null. Examples -------- see example script References ---------- Greene Hamilton P-Values (regression surface approximation) MacKinnon, J.G. 1994. "Approximate asymptotic distribution functions for unit-root and cointegration tests. `Journal of Business and Economic Statistics` 12, 167-76. Critical values MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests." Queen's University, Dept of Economics, Working Papers. Available at http://ideas.repec.org/p/qed/wpaper/1227.html ''' regression = regression.lower() if regression not in ['c','nc','ct','ctt']: raise ValueError("regression option %s not understood") % regression x = np.asarray(x) nobs = x.shape[0] if regression == 'c': trendorder = 0 elif regression == 'nc': trendorder = -1 elif regression == 'ct': trendorder = 1 elif regression == 'ctt': trendorder = 2 # only make the trend once with biggest nobs trend = np.vander(np.arange(nobs), trendorder+1) if maxlag is None: #from Greene referencing Schwert 1989 maxlag = 12. * np.power(nobs/100., 1/4.) xdiff = np.diff(x) xdall = lagmat(xdiff[:,None], maxlag, trim='both') nobs = xdall.shape[0] xdall[:,0] = x[-nobs-1:-1] # replace 0 xdiff with level of x xdshort = xdiff[-nobs:] # xdshort = x[-nobs:] #TODO: allow for 2nd xdshort as endog, with Phillips Perron or DF test? if store: resstore = ResultsStore() if autolag: if trendorder is not -1: fullRHS = np.column_stack((trend[:nobs],xdall)) else: fullRHS = xdall lagstart = trendorder + 1 #search for lag length with highest information criteria #Note: use the same number of observations to have comparable IC icbest, bestlag = _autolag(sm.OLS, xdshort, fullRHS, lagstart, maxlag, autolag) #rerun ols with best autolag xdall = lagmat(xdiff[:,None], bestlag, trim='both') nobs = xdall.shape[0] # trend = np.vander(np.arange(nobs), trendorder+1) xdall[:,0] = x[-nobs-1:-1] # replace 0 xdiff with level of x xdshort = xdiff[-nobs:] usedlag = bestlag else: usedlag = maxlag resols = sm.OLS(xdshort, np.column_stack([xdall[:,:usedlag+1], trend[:nobs]])).fit() #NOTE: should be usedlag+1 since the first column is the level? adfstat = resols.t(0) # adfstat = (resols.params[0]-1.0)/resols.bse[0] # the "asymptotically correct" z statistic is obtained as # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1) # I think this is the statistic that is used for series that are integrated # for orders higher than I(1), ie., not ADF but cointegration tests. # Get approx p-value and critical values pvalue = mackinnonp(adfstat, regression=regression, N=1) critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs) critvalues = {"1%" : critvalues[0], "5%" : critvalues[1], "10%" : critvalues[2]} if store: resstore.resols = resols resstore.usedlag = usedlag resstore.adfstat = adfstat resstore.critvalues = critvalues resstore.nobs = nobs resstore.H0 = "The coefficient on the lagged level equals 1" resstore.HA = "The coefficient on the lagged level < 1" resstore.icbest = icbest return adfstat, pvalue, critvalues, resstore else: if not autolag: return adfstat, pvalue, usedlag, nobs, critvalues else: return adfstat, pvalue, usedlag, nobs, critvalues, icbest