def get_d(): data = index2period() P = acorr_ljungbox(data)[1] d_data = data for i in range(1, 5): d_data = d_data.diff().dropna() P = acorr_ljungbox(d_data)[1] if len(P[P<0.05]) / len(P) >= 0.5: d = i break return d, d_data
def _setup_autocorrelation(self, residuals): lags = range(1, self.AUTOCORR_MAX_LAG + 1) # p_value is a probability of no autocorrelation present (separate value for each lag) _, p_value = acorr_ljungbox(residuals, lags=lags) self.autocorrelation = p_value <= self.AUTOCORR_SIGNIFICANCE_LEVEL
def _modlSignf(self, resid): qljungbox, pval, qboxpierce, pvalbp = acorr_ljungbox( resid, boxpierce=True) #只有当参数boxpierce=True时, 才会输出Q统计量. print("modlSinf =") print(qljungbox) print(pval) return min(pval) > 0.05
def pval_test(): data_1 = sequence_chart() qljungbox, pval, qboxpierce, pvalbp = acorr_ljungbox(data_1, boxpierce=True) plt.plot(range(0, len(pval)), pval) plt.plot(range(0, len(pvalbp)), pvalbp) plt.legend() plt.show()
def randomness(self, data=None): """ 随机性检测 默认情况下, acorr_ljungbox只计算LB统计量, 只有当参数boxpierce=True时, 才会输出Q统计量. 一般如果统计量的P值小于0.05时,则可以拒绝原假设,认为该序列为非白噪声序列,跟Q统计量差不多。 :return: """ if data is None: data = self.data lbvalue, pval = acorr_ljungbox(data.dropna(), lags=True) return (True, pval[0]) if pval[0] > 0.05 else (False, pval[0])
def arima(): series_ch = pd.read_csv( "http://labfile.oss.aliyuncs.com/courses/1176/agriculture.csv", index_col=0) series_ch.plot(figsize=(9, 6)) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch = series_ch.diff().dropna() axes[0].plot(diff_ch) autocorrelation_plot(diff_ch, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch)[1]) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch1 = series_ch.diff(periods=2).dropna() axes[0].plot(diff_ch1) autocorrelation_plot(diff_ch1, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch1)[1]) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch2 = series_ch.diff().diff().dropna() axes[0].plot(diff_ch2) autocorrelation_plot(diff_ch2, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch2)[1]) fig, axes = plt.subplots(ncols=3, nrows=1, figsize=(15, 3)) diff_ch3 = series_ch.diff().diff().diff().dropna() axes[0].plot(diff_ch3) autocorrelation_plot(diff_ch3, ax=axes[1]) axes[2].plot(acorr_ljungbox(diff_ch3)[1]) d = 1 p, q = arma_order_select_ic(diff_ch, ic='aic')['aic_min_order'] print('p,d,q', p, d, q) return p, d, q
def draw_picture(): parameter_type = ['W01', '060', 'W02', '101', 'W07'] wdp_mode = { 'W01': '水温', '060': '氨氮', 'W02': '溶解氧', '101': '总磷', 'W07': '高锰酸盐' } data_set = {parameter: get_data(parameter) for parameter in parameter_type} for key, value in data_set.items(): print(wdp_mode[key], acorr_ljungbox(value['data_value'], lags=6)) mpl.rcParams['font.sans-serif'] = ['SimHei'] #正常显示中文 mpl.rcParams['axes.unicode_minus'] = False # plt.title(wdp_mode[key]) # 显示图标题 # plt.show() return 0
def beforehand_test(df): """单变量检验""" from scipy import stats from statsmodels.sandbox.stats.diagnostic import acorr_ljungbox, het_arch def adf_sets(xs): adfs = {} for c in xs: adf = adfTest(xs[c]) adfs[c] = adf return adfs def statistic(df,l_ar_order=[1,15]): cols = df.columns from scipy import stats skew = stats.skew(df) kuro = stats.kurtosis(df) ar_tvalues = {} for _order in l_ar_order: ar_tvalues["AR(%s)"%_order] = [sm.tsa.AR(df[c]).fit(_order).tvalues[-1] for c in df] ar_tvalues = pd.DataFrame(ar_tvalues,index=cols).T desb2 = pd.DataFrame(np.vstack((skew, kuro)), columns=cols, index=['skew', "kuro"]) desb = pd.concat([df.describe(),desb2,ar_tvalues],axis=0) return desb accor = {} for i, c in enumerate(df): s = df[c] accor[c] = pd.Series([het_arch(s)[-1], \ (np.hsplit(np.array(acorr_ljungbox(s, 15)),15)[-1][-1])[0], stats.jarque_bera(s)[1]], index=['ARCH', 'LBQ', 'JB']) return pd.concat([statistic(df),\ pd.DataFrame(adf_sets(df)).loc[['p']].rename({"p":"ADF(p)"}),\ pd.DataFrame(accor)])
def boxpierce_test(): # fig,ax = plt.subplots(2,2) data_1 = sequence_chart() qljungbox, pval, qboxpierce, pvalbp = acorr_ljungbox(data_1, boxpierce=True) print(len(pval)) # for i in range(len(pval)): # print("true data:",qljungbox[i], pval[i], qboxpierce[i], pvalbp[i]) plt.plot(range(0, len(qljungbox)), qljungbox) plt.plot(range(0, len(qboxpierce)), qboxpierce) plt.legend() # ax[0, 0].plot(range(0,len(pval)), qljungbox, label = "ql") # ax[0, 0].plot(range(0,len(pval)), qboxpierce, label = "qb") # # ax[0,1].plot(range(0,len(pval)), label="pval",) # ax[0,1].plot(range(0,len(pvalbp)), label="pvalbp") # ax[0,0].legend() # ax[0,1].legend() plt.show()
def lb(x): s,p = acorr_ljungbox(x, lags=4) return np.r_[s, p]
def lb(x): s, p = acorr_ljungbox(x, lags=4, return_df=True) return np.r_[s, p]
def lb1(x): s, p = acorr_ljungbox(x, lags=1, return_df=True) return s[0], p[0]
def lb4(x): s, p = acorr_ljungbox(x, lags=4, return_df=True) return s[-1], p[-1]
def lb1(x): s,p = acorr_ljungbox(x, lags=1) return s[0], p[0]
def lb4(x): s,p = acorr_ljungbox(x, lags=4) return s[-1], p[-1]
print(u'原始序列的ADF检验结果为:', ADF(data[u'销量'])) # 返回值依次为adf(原始序列的单位根)、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore #p值0.99显著大于0.05,该序列为非平稳序列 #进行差分 D_data = data.diff().dropna() #diff沿着指定轴计算第N维的离散差值,即矩阵后一个元素减去前一个元素 D_data.columns = [u'销量差分'] D_data.plot() #差分后的时序图 plot_acf(D_data) #自相关图 plot_pacf(D_data) #偏自相关图 # plt.show() print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分'])) #一阶差分后时序图在均值附近波动,自相关图有很强的短期相关性,p值0.02小于0.05,说明序列是平稳序列 #对一阶差分后的序列做白噪声检验 print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1)) #输出为stat、p值,0.00077远小于0.05,所以是平稳白噪声序列 data[u'销量'] = data[u'销量'].astype(float) #对平稳白噪声序列拟合ARMA模型 #1、先定级确定p,q pmax = int(len(D_data) / 10) #一般阶数不超过length/10 qmax = int(len(D_data) / 10) bic_matrix = [] #bic矩阵 data.dropna(inplace=True) import warnings warnings.filterwarnings('error') for p in range(pmax + 1): tmp = []
def lb1(x): s, p = acorr_ljungbox(x, lags=1) return s[0], p[0]
def findSimModel(ts): ''' 寻找序列的简单模型,如果不存在则返回空 :param ts: 时间序列 :return: 是否是简单模型,拟合之后的模型 ''' #对序列差分得到平稳序列 stationSeries = difStationary(ts) exitSimModel = False isSimModel = True #查分的阶数 d = list(stationSeries.keys())[0] p,q = getpq(stationSeries[d]) if len(p) > 8 or max(p) >=12: isSimModel = False print('自相关系数超过两倍标准差的阶数有:{}'.format(p)) if len(q) > 8 or max(p) >=12: isSimModel = False print('偏自相关系数超过两倍标准差的阶数:{}'.format(q)) #确定该序列的p,q if isSimModel: ARp = max(p) MAq = max(q) else: res = sm.tsa.arma_order_select_ic(y, ic=['aic'], trend='nc') ARp = int(res.aic_min_order[0]) MAq = int(res.aic_min_order[1]) print('AIC MIN ORDER:{}'.format(res.aic_min_order)) model = SARIMAX(stationSeries[d], order=(1,d,1), seasonal_order=(0, 0, 0, 12), enforce_stationarity=False, enforce_invertibility=False) results = model.fit() p_values = results.pvalues t_values = results.tvalues resid = results.resid qljungbox, pval, qboxpierce, pvalbp=acorr_ljungbox(resid, boxpierce=True) #只有当参数boxpierce=True时, 才会输出Q统计量. print("qlb=") print(qljungbox[0:60]) print("pval=") print(pval[0:60]) print("t_values = ") print(t_values) print("p_value = ") print(p_values) paraSign = paraSignf(p_values) modlSign = modlSignf(pval) print(paraSign) print(modlSign) if paraSign and modlSign: exitSimModel = True if exitSimModel: print("存在合适的拟合模型!") return isSimModel,model else: print("没有合适的拟合模型!") return isSimModel,None
def isWN(ts): qljungbox, pval, qboxpierce, pvalbp=acorr_ljungbox(ts, boxpierce=True) #只有当参数boxpierce=True时, 才会输出Q统计量. print(qljungbox) print(pval) return not(modlSignf(pval))
line = f.readline() pre_565 = [] aft_565 = [] for i in list_pre: pre_565.append(i[565]) aft_565.append(wt(i, 'db3', 4, 1, 4)[565]) import matplotlib.pyplot as plt x = np.arange(0,1000) y1 = pre_565 y2 = aft_565 qljungbox, pval, qboxpierce, pvalbp = acorr_ljungbox(y1, boxpierce=True) plt.plot(range(0, len(pval)), pval) plt.plot(range(0, len(pvalbp)), pvalbp) plt.legend() plt.show() plt.plot(x,y1) plt.plot(x,y2) plt.show()
def lb4(x): s, p = acorr_ljungbox(x, lags=4) return s[-1], p[-1]
#!/usr/bin/env python3 import pandas as pd import statsmodels.tsa.stattools as ts from statsmodels.sandbox.stats.diagnostic import acorr_ljungbox if __name__ == "__main__": allData = pd.read_csv('MonthlyWeather.txt', header=None, sep=',') data = allData.iloc[:, 0] print(ts.adfuller(data, autolag='AIC')) p_value = acorr_ljungbox(data, lags=[6, 12]) print(p_value)
from statsmodels.tsa.arima_model import ARMA from statsmodels.sandbox.stats.diagnostic import acorr_ljungbox import matplotlib.pyplot as plt if __name__ == "__main__": allData = pd.read_csv('MonthlyWeather.txt', header=None, sep=',') data = allData.iloc[:, 0] original_new = data[234:] data = data[0:234] order = st.arma_order_select_ic(data, ic=['aic', 'bic']) model = ARMA(data, order=(4, 3)) result_arma = model.fit(disp=-1, method='css') print(result_arma.summary()) predict_ts = result_arma.predict() err = (data - predict_ts).dropna() p_value = acorr_ljungbox(err, [6, 12, 18, 24]) print(p_value) predict_new = result_arma.predict( 234, 271, ) ax = predict_new.plot(label='forecast') original_new.plot(label='observed') ax.set_xlabel('Month') ax.set_ylabel('Precipitation') plt.legend() plt.show() predict_new = predict_new.values original_new = original_new.values tp = 0 tn = 0