def d_param(self, diff): '''function takes different values for difference step, and returns true or false flag if acf and pacf values lie into the threshold area''' THRESHOLD = 0.08 if diff == 0: acf = tss.acf(self.val) pacf = tss.pacf(self.val) # acf and pacf plots fig = plt.figure(figsize = (12,8)) ax1 = fig.add_subplot(121) fig = plot_acf(self.val,lags =40 ,ax=ax1) ax2 = fig.add_subplot(122, sharey=ax1) fig= plot_pacf(self.val, lags = 40, ax =ax2) plt.savefig('ACF_vs_PACF.jpg') plt.close() # check if most acf and pacf are lie in the accepted region for diff0 acf_percent = len(acf[np.abs(acf) <= THRESHOLD])/float(len(acf)) pacf_percent = len(pacf[np.abs(pacf) <= THRESHOLD])/float(len(pacf)) return (acf_percent >= .65) and (pacf_percent >= 0.65) elif diff == 1: diff1_acf = tss.acf(self.diff1_val.dropna()) diff1_pacf = tss.pacf(self.diff1_val.dropna()) # for acf and pacf plots fig = plt.figure(figsize = (12,8)) ax1 = fig.add_subplot(121) fig = plot_acf(self.diff1_val.dropna(),lags =40 ,ax=ax1) ax2 = fig.add_subplot(122, sharey=ax1) fig= plot_pacf(self.diff1_val.dropna(), lags = 40, ax =ax2) plt.savefig('ACF_vs_PACF_diff1.jpg') plt.close() # check if most acf and pacf are lie in the accepted region for diff1 acf_percent = len(diff1_acf[np.abs(diff1_acf) <= THRESHOLD])/float(len(diff1_acf)) pacf_percent = len(diff1_pacf[np.abs(diff1_pacf) <= THRESHOLD])/float(len(diff1_pacf)) return (acf_percent >= .65) and (pacf_percent >= 0.65) elif diff == 2: diff2_acf = tss.acf(self.diff2_val.dropna()) diff2_pacf = tss.pacf(self.diff2_val.dropna()) # check save fig for acf and pacf plots fig = plt.figure(figsize = (12,8)) ax1 = fig.add_subplot(121) fig = plot_acf(self.diff2_val.dropna(),lags =40 ,ax=ax1) ax2 = fig.add_subplot(122, sharey=ax1) fig = plot_pacf(self.diff2_val.dropna(), lags = 40, ax =ax2) plt.savefig('ACF_vs_PACF_diff2.jpg') plt.close() # check if most acf and pacf are lie in the accepted region for diff2 acf_percent = len(diff2_acf[np.abs(diff2_acf) <= THRESHOLD])/float(len(diff2_acf)) pacf_percent = len(diff2_pacf[np.abs(diff2_pacf) <= THRESHOLD])/float(len(diff2_pacf)) return (acf_percent >= .65) and (pacf_percent >= 0.65) else: raise InvalidParamError
def plot_acf_multiple(ys, lags=20): """ """ from scikits.statsmodels.tsa.stattools import acf # hack old_size = mpl.rcParams['font.size'] mpl.rcParams['font.size'] = 8 plt.figure(figsize=(10, 10)) xs = np.arange(lags + 1) acorr = np.apply_along_axis(lambda x: acf(x, nlags=lags), 0, ys) k = acorr.shape[1] for i in range(k): ax = plt.subplot(k, 1, i + 1) ax.vlines(xs, [0], acorr[:, i]) ax.axhline(0, color='k') ax.set_ylim([-1, 1]) # hack? ax.set_xlim([-1, xs[-1] + 1]) mpl.rcParams['font.size'] = old_size
def plot_acf(y, lags=100, partial=False, ax=None): from scikits.statsmodels.tsa.stattools import acf, pacf if partial: the_acf = pacf(y, nlags=lags) else: the_acf = acf(y, nlags=lags) if ax is None: fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(111) ax.vlines(np.arange(lags + 1), [0], the_acf) ax.axhline(0, color='k')
def plot_acf(y, lags=100, partial=False, ax=None): from scikits.statsmodels.tsa.stattools import acf, pacf if partial: the_acf = pacf(y, nlags=lags) else: the_acf = acf(y, nlags=lags) if ax is None: fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(111) ax.vlines(np.arange(lags+1), [0], the_acf) ax.axhline(0, color='k')
def __init__(self): self.acf = self.results['acvarfft'] self.qstat = self.results['Q1'] self.res1 = acf(self.x, nlags=40, qstat=True, fft=True)
def __init__(self): self.acf = self.results['acvar'] #self.acf = np.concatenate(([1.], self.acf)) self.qstat = self.results['Q1'] self.res1 = acf(self.x, nlags=40, qstat=True)
def acorr_ljungbox(x, lags=None, boxpierce=False): '''Ljung-Box test for no autocorrelation Parameters ---------- x : array_like, 1d data series, regression residuals when used as diagnostic test lags : None, int or array_like If lags is an integer then this is taken to be the largest lag that is included, the test result is reported for all smaller lag length. If lags is a list or array, then all lags are included up to the largest lag in the list, however only the tests for the lags in the list are reported. If lags is None, then the default maxlag is 12*(nobs/100)^{1/4} boxpierce : {False, True} If true, then additional to the results of the Ljung-Box test also the Box-Pierce test results are returned Returns ------- lbvalue : float or array test statistic pvalue : float or array p-value based on chi-square distribution bpvalue : (optionsal), float or array test statistic for Box-Pierce test bppvalue : (optional), float or array p-value based for Box-Pierce test on chi-square distribution Notes ----- Ljung-Box and Box-Pierce statistic differ in their scaling of the autocorrelation function. Ljung-Box test is reported to have better small sample properties. could be extended to work with more than one series 1d or nd ? axis ? ravel ? needs more testing ''Verification'' Looks correctly sized in Monte Carlo studies. not yet compared to verified values Examples -------- see example script References ---------- Greene Wikipedia ''' x = np.asarray(x) nobs = x.shape[0] if lags is None: lags = range(1, 41) #TODO: check default; SS: changed to 40 elif isinstance(lags, int): lags = range(1, lags + 1) maxlag = max(lags) lags = np.asarray(lags) acfx = acf(x, nlags=maxlag) # normalize by nobs not (nobs-nlags) # SS: unbiased=False is default now # acf2norm = acfx[1:maxlag+1]**2 / (nobs - np.arange(1,maxlag+1)) acf2norm = acfx[1:maxlag + 1]**2 / (nobs - np.arange(1, maxlag + 1)) qljungbox = nobs * (nobs + 2) * np.cumsum(acf2norm)[lags - 1] pval = stats.chi2.sf(qljungbox, lags) if not boxpierce: return qljungbox, pval else: qboxpierce = nobs * np.cumsum(acfx[1:maxlag + 1]**2)[lags] pvalbp = stats.chi2.sf(qboxpierce, lags) return qljungbox, pval, qboxpierce, pvalbp
# plt.show() # print adfuller(x_diff_1, regression="c") #The Augmented Dickey-Fuller test x_fit, res, lag = create_ar_modex(x) fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.plot(x[lag:]) ax.plot(x_fit[lag:]) plt.legend(labels=['X', 'AR model']) plt.show() plt.bar(range(len(res)), res) plt.show() # check auto correlation acf = stattools.acf(res, nlags=100) plt.bar(range(len(acf)), acf) plt.show() # lbs = stattools.q_stat(acf, len(res[lag:])) # plt.bar(range(len(lbs[1])), lbs[1]) # plt.show() # print adfuller(res, regression="c") # The Augmented Dickey-Fuller test # y_fit, res = create_ar_modex(y) # fig = plt.figure() # ax = fig.add_subplot(1,1,1) # ax.plot(y) # ax.plot(y_fit) # plt.legend(labels=['Y', 'AR model'])
from scikits.statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response from scikits.statsmodels.tsa.arima_process import arma_acovf, arma_acf, ARIMA # from movstat import acf, acovf # from scikits.statsmodels.sandbox.tsa import acf, acovf, pacf from scikits.statsmodels.tsa.stattools import acf, acovf, pacf ar = [1.0, -0.6] # ar = [1., 0.] ma = [1.0, 0.4] # ma = [1., 0.4, 0.6] # ma = [1., 0.] mod = "" #'ma2' x = arma_generate_sample(ar, ma, 5000) x_acf = acf(x)[:10] x_ir = arma_impulse_response(ar, ma) # print x_acf[:10] # print x_ir[:10] # irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:] # print irc2[:10] # print irc2[:10]/irc2[0] # print irc2[:10-1] / irc2[1:10] # print x_acf[:10-1] / x_acf[1:10] # detrend helper from matplotlib.mlab def detrend(x, key=None): if key is None or key == "constant": return detrend_mean(x) elif key == "linear":
def acorr_ljungbox(x, lags=None, boxpierce=False): '''Ljung-Box test for no autocorrelation Parameters ---------- x : array_like, 1d data series, regression residuals when used as diagnostic test lags : None, int or array_like If lags is an integer then this is taken to be the largest lag that is included, the test result is reported for all smaller lag length. If lags is a list or array, then all lags are included up to the largest lag in the list, however only the tests for the lags in the list are reported. If lags is None, then the default maxlag is 12*(nobs/100)^{1/4} boxpierce : {False, True} If true, then additional to the results of the Ljung-Box test also the Box-Pierce test results are returned Returns ------- lbvalue : float or array test statistic pvalue : float or array p-value based on chi-square distribution bpvalue : (optionsal), float or array test statistic for Box-Pierce test bppvalue : (optional), float or array p-value based for Box-Pierce test on chi-square distribution Notes ----- Ljung-Box and Box-Pierce statistic differ in their scaling of the autocorrelation function. Ljung-Box test is reported to have better small sample properties. could be extended to work with more than one series 1d or nd ? axis ? ravel ? needs more testing ''Verification'' Looks correctly sized in Monte Carlo studies. not yet compared to verified values Examples -------- see example script References ---------- Greene Wikipedia ''' x = np.asarray(x) nobs = x.shape[0] if lags is None: lags = range(1,41) #TODO: check default; SS: changed to 40 elif isinstance(lags, int): lags = range(1,lags+1) maxlag = max(lags) lags = np.asarray(lags) acfx = acf(x, nlags=maxlag) # normalize by nobs not (nobs-nlags) # SS: unbiased=False is default now # acf2norm = acfx[1:maxlag+1]**2 / (nobs - np.arange(1,maxlag+1)) acf2norm = acfx[1:maxlag+1]**2 / (nobs - np.arange(1,maxlag+1)) qljungbox = nobs * (nobs+2) * np.cumsum(acf2norm)[lags-1] pval = stats.chi2.sf(qljungbox, lags) if not boxpierce: return qljungbox, pval else: qboxpierce = nobs * np.cumsum(acfx[1:maxlag+1]**2)[lags] pvalbp = stats.chi2.sf(qboxpierce, lags) return qljungbox, pval, qboxpierce, pvalbp
import matplotlib.mlab as mlab from scikits.statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response from scikits.statsmodels.tsa.arima_process import arma_acovf, arma_acf, ARIMA #from movstat import acf, acovf #from scikits.statsmodels.sandbox.tsa import acf, acovf, pacf from scikits.statsmodels.tsa.stattools import acf, acovf, pacf ar = [1., -0.6] #ar = [1., 0.] ma = [1., 0.4] #ma = [1., 0.4, 0.6] #ma = [1., 0.] mod = '' #'ma2' x = arma_generate_sample(ar, ma, 5000) x_acf = acf(x)[:10] x_ir = arma_impulse_response(ar, ma) #print x_acf[:10] #print x_ir[:10] #irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:] #print irc2[:10] #print irc2[:10]/irc2[0] #print irc2[:10-1] / irc2[1:10] #print x_acf[:10-1] / x_acf[1:10] # detrend helper from matplotlib.mlab def detrend(x, key=None): if key is None or key == 'constant': return detrend_mean(x)
xhat5, err5 = VARMA(x,B,C) #print err5 #in differences #VARMA(np.diff(x,axis=0),B,C) #Note: # * signal correlate applies same filter to all columns if kernel.shape[1]<K # e.g. signal.correlate(x0,np.ones((3,1)),'valid') # * if kernel.shape[1]==K, then `valid` produces a single column # -> possible to run signal.correlate K times with different filters, # see the following example, which replicates VAR filter x0 = np.column_stack([np.arange(T), 2*np.arange(T)]) B[:,:,0] = np.ones((P,K)) B[:,:,1] = np.ones((P,K)) B[1,1,1] = 0 xhat0 = VAR(x0,B) xcorr00 = signal.correlate(x0,B[:,:,0])#[:,0] xcorr01 = signal.correlate(x0,B[:,:,1]) print np.all(signal.correlate(x0,B[:,:,0],'valid')[:-1,0]==xhat0[P:,0]) print np.all(signal.correlate(x0,B[:,:,1],'valid')[:-1,0]==xhat0[P:,1]) #import error #from movstat import acovf, acf from scikits.statsmodels.tsa.stattools import acovf, acf aav = acovf(x[:,0]) print aav[0] == np.var(x[:,0]) aac = acf(x[:,0])
def test_acf(): acf_x = tsa.acf(x100, unbiased=False)[:21] assert_array_almost_equal(mlacf.acf100.ravel(), acf_x, 8) #why only dec=8 acf_x = tsa.acf(x1000, unbiased=False)[:21] assert_array_almost_equal(mlacf.acf1000.ravel(), acf_x, 8) #why only dec=9