#take I(2) series, etc.).
#not implemented since is not entirely needed for Granger test.


#2. difference once to I(1) series which are roughly stationary
#(e ~iid N(0,sigma^2))
#not implemented since is not entirely necesary for Granger test.


#3. calculate partial autocorrelation function (PACF) w/ Ljung-Box test to see
#which lags help predict current values.
clicksPACF = stattools.pacf_ols(clicksPerDay, nlags=MAX_LAG)
encountersPACF = stattools.pacf_ols(encountersPerDay, nlags=MAX_LAG)

#Ljung-Box test
clicksPACF_LJ = _math.ljungBox(clicksPACF, len(clicksPerDay), MAX_LAG)
encountersPACF_LJ = _math.ljungBox(encountersPACF, len(encountersPerDay), MAX_LAG)
print("clicks PACF Ljung-Box")
print(tabulate(clicksPACF_LJ, headers=["lag", "R", "Q", "p-val"]))
print("encounters PACF Ljung-Box")
print(tabulate(encountersPACF_LJ, headers=["lag", "R", "Q", "p-val"]))

#plot
def plotxy(title, x, y, LSRL=None):
	fig = plt.figure()
	plt.bar(x, y, width=0.08)
	plt.grid()
	plt.title(title)
	fig.autofmt_xdate()
	plt.show()
Esempio n. 2
0
#clicksPerDay.index.name = None
#encountersPerDay.index.name = None

clicksPerDay = pd.Series(data=clicksPerDay["count_clicks"], index=clicksPerDay.index)
encountersPerDay = pd.Series(data=encountersPerDay["count_encounter"], index=encountersPerDay.index)

clicksPerDay = clicksPerDay.fillna(method="ffill")
encountersPerDay = encountersPerDay.fillna(method="ffill")



####################LJUNG-BOX####################
clicksPACF = stattools.pacf_ols(clicksPerDay, nlags=MAX_LAG)
encountersPACF = stattools.pacf_ols(encountersPerDay, nlags=MAX_LAG)

#my implementation
results = _math.ljungBox(encountersPACF, len(encountersPerDay), MAX_LAG)
lag, R, Q, p = zip(*results)
print np.asarray(Q[1:])
print np.asarray(p[1:])

#statsmodels implementation
results = diagnostic.acorr_ljungbox(encountersPerDay, lags=MAX_LAG)
print results

#my copy of the statsmodels implementation
results = _math.ljungBox2(encountersPerDay, maxlag=MAX_LAG)
print results


#they are not the same... I wonder why.
Esempio n. 3
0
tval = tNum/tDen
df = len(R)-3
pval = scipy.stats.t.sf(tval, df)
header = ["type", "value"]
data = [
		["tval",tval],
		["pval",pval],
		["df",df]
]
print("\n")
print("*William's Modified Hotelling T Test*")
print(tabulate.tabulate(data, headers=header))

#Ljung Box Test & Autocorrelation/Crosscorrelation
coeffs = _math.ljungBox(R, R, maxLag=7)
header = ["lag", "R value", "Q", "p value"]
data = coeffs
print("\n")
print("*Ljung-Box Test for Crosscorrelation*")
print("## R vs R ##")
print(tabulate.tabulate(data, headers=header))

coeffs = _math.ljungBox(t, t, maxLag=7)
header = ["lag", "R value", "Q", "p value"]
data = coeffs
print("## t vs t ##")
print(tabulate.tabulate(data, headers=header))

coeffs = _math.ljungBox(rsent, rsent, maxLag=7)
header = ["lag", "R value", "Q", "p value"]