Beispiel #1
0
def whitenoise_test(dataset, number):
    data = dataset.copy()
    data = data.iloc[:len(data) - number]  #不使用最后5个数据
    #白噪声检测
    from statsmodels.stats.diagnostic import acorr_ljungbox
    [[lb], [p]] = acorr_ljungbox(data['rentNumber'], lags=1)
    if p < 0.05:
        print(u'原始序列为非白噪声序列,对应的p值为:%s' % p)
    else:
        print(u'原始该序列为白噪声序列,对应的p值为:%s' % p)
    [[lb], [p]] = acorr_ljungbox(data['rentNumber'].diff().dropna(), lags=1)
    if p < 0.05:
        print(u'一阶差分序列为非白噪声序列,对应的p值为:%s' % p)
    else:
        print(u'一阶差分该序列为白噪声序列,对应的p值为:%s' % p)
Beispiel #2
0
 def acorr_val(self):
     # 白噪声检测
     lbvalue, pvalue = acorr_ljungbox(self.ts, lags=1)
     table_rows = [[lbvalue, pvalue ]]
     table_names = ['lbvalue', 'pvalue']
     pre_table(table_names, table_rows)
     return pvalue
Beispiel #3
0
    def diff_process(self):

        self.p_value = acorr_ljungbox(self.df.iloc[:, 1], lags=1)
        print('白噪声检验p值:', self.p_value[1], '\n')  #大于0.05认为是白噪声,即序列在时间上不具有相关性
        #self.ADF_value = ADF(self.df.iloc[:,0]) #p值为0小于0.05认为是平稳的(单位根检验)
        '''
        单位根检验按p值判断是否平稳,否则一直作差分直到序列平稳
        '''
        self.diff_ = self.df.iloc[:, 1]
        self.ADF_value = adfuller(self.diff_, autolag='AIC')
        self.i = 0
        while self.ADF_value[1] >= 0.05:
            self.diff_ = self.diff_.diff()  #一次差分
            self.diff_ = self.diff_.dropna()
            self.ADF_value = adfuller(self.diff_, autolag='AIC')
            # 1%、%5、%10不同程度拒绝原假设的统计值和ADF Test result的比较,
            # ADF Test result同时小于1%、5%、10%说明非常好的拒绝原假设,p值小于0.05,则平稳
            print('ADF检验:', '\n', self.ADF_value, '\n')
            self.i += 1

        fig = plt.figure(figsize=(20, 6))
        ax1 = fig.add_subplot(211)  #原始数据图
        ax1.plot(self.df.iloc[:, 1])
        ax2 = fig.add_subplot(212)  #再一次差分之后 平稳
        ax2.plot(self.diff_)
        plt.show()
Beispiel #4
0
def get_best_log(ts, max_log=5, rule1=True, rule2=True):
    """
    稳定性检测+数据平稳处理
    :param ts: 时间序列格式数据,Series格式
    :param max_log: 最大的log处理次数
    :param rule1:
    :param rule2:
    :return:log处理次数,平稳处理的后的时间序列数据
    """
    if rule1 and rule2:
        return 0, ts
    else:
        for i in range(1, max_log):
            ts = np.log(ts)
            lbvalue, pvalue2 = acorr_ljungbox(ts, lags=1)  #白噪音简称,目的时间序列是否都是白噪声
            adf, pvalue1, usedlag, nobs, critical_values, icbest = adfuller(
                ts)  #ADF检测,同样是检测ts是否平稳
            rule1 = (adf < critical_values['1%']
                     and adf < critical_values['5%']
                     and adf < critical_values['10%']
                     and pvalue1 < 0.01)  #稳定性检测
            rule2 = (pvalue2 < 0.05)
            rule3 = (i < 5)
            if rule1 and rule2 and rule3:
                print('the best log n is :{0}'.format(i))
                return i, ts
Beispiel #5
0
def whitenoiseTest(data, lagnum=1):
    lb, p = acorr_ljungbox(data, lags=1)
    h = (p < 0.05).sum()  # p < 0.05 是非白噪声
    if h > 0:
        return False  # 序列为非白噪声序列
    else:
        return True  # 序列为白噪声序列
def whitenoise_test(ts):
    from statsmodels.stats.diagnostic import acorr_ljungbox

    q, p = acorr_ljungbox(ts)

    with plt.style.context('ggplot'):

        fig = plt.figure(figsize=(10, 4))

        axes = fig.subplots(1, 2)

        axes[0].plot(q, label='Q统计量')

        axes[0].set_ylabel('Q')
        axes[0].set_title('收益率残差平方自相关性检验')
        axes[1].plot(p, label='p值')

        axes[1].set_ylabel('P')
        axes[1].set_title('收益率残差平方自相关性检验')
        axes[0].legend()

        axes[1].legend()

        plt.tight_layout()

    return
Beispiel #7
0
def selectFFT(series, minAlpha=None):
    # Implements a forward algorithm for selecting FFT frequencies
    #1) Initialize variables
    series_ = series
    fftRes = np.fft.fft(series_, axis=0)
    fftRes = {i: j[0] for i, j in zip(range(fftRes.shape[0]), fftRes)}
    fftOpt = np.zeros(series_.shape, dtype=complex)
    lags, crit = int(12 * (series_.shape[0] / 100.)**.25), None
    #2) Search forward
    while True:
        key, critOld = None, crit
        for key_ in fftRes.keys():
            fftOpt[key_, 0] = fftRes[key_]
            series__ = np.fft.ifft(fftOpt, axis=0)
            series__ = np.real(series__)
            crit_ = sm3.acorr_ljungbox(series_ - series__,
                                       lags=lags)  # test for the max # lags
            crit_ = crit_[0][-1], crit_[1][-1]
            if crit == None or crit_[0] < crit[0]: crit, key = crit_, key_
            fftOpt[key_, 0] = 0
        if key != None:
            fftOpt[key, 0] = fftRes[key]
            del fftRes[key]
        else:
            break
        if minAlpha != None:
            if crit[1] > minAlpha: break
            if critOld != None and crit[0] / critOld[0] > 1 - minAlpha: break
    series_ = np.fft.ifft(fftOpt, axis=0)
    series_ = np.real(series_)
    out = {'series': series_, 'fft': fftOpt, 'res': fftRes, 'crit': crit}
    return out
def con_SARIMAX(y_series=None,season=7): 
	'''
	时间序列平稳性检验,p-value<0.05则通过,否则不通过
	最大差分次数max_diff_time=2
	'''
	# 检查数据量是否>=50
	if len(y_series)>=50:
		data_amount_check=True
	else:
		data_amount_check=False
	# 检查1阶差分+周期差分是否可平稳
	isStationarity=False
	p_value_shreshold = 0.05
	# 1阶差分
	ts_diff = y_series.diff(1) # 不会改变y_series的值
	ts_diff.dropna(inplace=True) # 丢掉缺失值,If True, do operation inplace and return None.
	#进行周期差分
	ts_diff = y_series.diff(season) 
	ts_diff.dropna(inplace=True) #丢掉缺失值,If True, do operation inplace and return None.
	# 白噪声检验结果
	lbvalue,pvalue2=acorr_ljungbox(ts_diff,lags=1) 
	rule_1=(pvalue2<p_value_shreshold)
	# ADF检验,平稳性检验
	adf,pvalue1,usedlag,nobs,critical_values,icbest= adfuller(ts_diff) 
	rule_2=(adf<critical_values['1%'] and adf<critical_values['5%'] and adf<critical_values['10%'] and pvalue1<0.01) 
	# 忽略白噪声检验 rule_1 and
	if rule_2:
		isStationarity=True

	return data_amount_check and isStationarity
    def test_acorr_ljung_box(self):
        res = self.res

        #> bt = Box.test(residuals(fm), lag=4, type = "Ljung-Box")
        #> mkhtest(bt, "ljung_box_4", "chi2")
        ljung_box_4 = dict(statistic=5.23587172795227, pvalue=0.263940335284713,
                           parameters=(4,), distr='chi2')

        #> bt = Box.test(residuals(fm), lag=4, type = "Box-Pierce")
        #> mkhtest(bt, "ljung_box_bp_4", "chi2")
        ljung_box_bp_4 = dict(statistic=5.12462932741681,
                              pvalue=0.2747471266820692,
                              parameters=(4,), distr='chi2')

        #ddof correction for fitted parameters in ARMA(p,q) fitdf=p+q
        #> bt = Box.test(residuals(fm), lag=4, type = "Ljung-Box", fitdf=2)
        #> mkhtest(bt, "ljung_box_4df2", "chi2")
        ljung_box_4df2 = dict(statistic=5.23587172795227,
                              pvalue=0.0729532930400377,
                              parameters=(2,), distr='chi2')

        #> bt = Box.test(residuals(fm), lag=4, type = "Box-Pierce", fitdf=2)
        #> mkhtest(bt, "ljung_box_bp_4df2", "chi2")
        ljung_box_bp_4df2 = dict(statistic=5.12462932741681,
                                 pvalue=0.0771260128929921,
                                 parameters=(2,), distr='chi2')


        lb, lbpval, bp, bppval = smsdia.acorr_ljungbox(res.resid, 4,
                                                       boxpierce=True)
        compare_t_est([lb[-1], lbpval[-1]], ljung_box_4, decimal=(13, 14))
        compare_t_est([bp[-1], bppval[-1]], ljung_box_bp_4, decimal=(13, 14))
Beispiel #10
0
def model_eval(dct_model, **kwargs):
    """
	__Description__:
		...
	__Parametres__:
		kwargs  : parametres supplementaires pour le modele qui sont:
		 -b_eval  : booléen précisant si l'évaluation du modele sur train doit etre fait
		
	__Return__:
	Un dictionnaire constitué des éléments suivants:
		model: retour de la methode statsmodels.tsa.statespace.SARIMAX
		result_model: retour de la methode statsmodels.tsa.statespace.SARIMAX.fit
		statistique: valeurs des grandeurs stat AIC, SSE
	"""
    b_eval = kwargs.get('b_eval', True)
    if not isinstance(b_eval, bool):
        print("'b_eval' parameter must be a boolean.")
        return 'evaluation'
    if 'result' not in dct_model.keys():
        print("No SARIMAXResult present for the key 'result' in dct_model.")
        return 'evaluation'
    if b_eval:
        eval_stat = {}
        eval_stat['AIC'] = dct_model['result'].aic
        eval_stat['BIC'] = dct_model['result'].bic
        eval_stat['SSE'] = dct_model['result'].sse
        eval_stat['MSE'] = dct_model['result'].mse
        eval_stat['LjungBox test'] = acorr_ljungbox(
            x=dct_model['result'].resid,
            lags=[int(log(dct_model['result'].resid.shape[0]))])
        dct_model['eval_stat'] = dict(eval_stat)
    return
def serial_correlation(variable, plot_name='autocorr_error.png'):
    autocorrelation_plot(variable)
    plot.savefig(plot_name)
    plot.close()
    # https://robjhyndman.com/hyndsight/ljung-box-test/
    lags = min(10, round(len(variable) / 5))
    print(acorr_ljungbox(variable, lags=lags))
def residue_test(residue):
    '''

        观察ARIMA模型的残差是否是平均值为0且方差为常数的正态分布

    '''
    fig = plt.figure(figsize=(12, 8))
    # ax1 = fig.add_subplot(211)
    # fig = plot_acf(residue.values.squeeze(), lags=35, ax=ax1)
    # plt.show()

    ax2 = fig.add_subplot(212)
    fig = plot_pacf(residue.values.squeeze(), lags=35, ax=ax2)
    plt.show()

    # 通过q-q图观察,检验残差是否符合正态分布
    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111)
    fig = qqplot(residue, line='q', ax=ax, fit=True)
    plt.show()

    # Ljung-Box Test - 基于一些列滞后阶数,判断序列总体的相关性或随机性是否存在
    r1, q1, p1 = ACF(residue.values.squeeze(), qstat=True)
    tmp = np.c_[list(range(1, 36)), r1[1:], q1, p1]
    table = pd.DataFrame(tmp, columns=['lag', 'AC', 'Q', 'Prob(>Q)'])
    print(table.set_index('lag')[:15])

    # 残差的白噪声检验
    print('残差的白噪声检验结果为:', acorr_ljungbox(residue, lags=1))
Beispiel #13
0
def autocorr_test(_xdata, _ydata):
    import numpy as np
    import pandas as pd
    from statsmodels.stats.diagnostic import acorr_ljungbox
    from statsmodels.tsa.stattools import acf
    #all statst need regularly spaced, continuous time series - just y variable
    #Durbin-Watson statistics:
    # calculated correctly with missing data
    # but no significance level. Apparently critical values for DW are not implemented in any python library
    #ACF:
    # crashes on missing data
    # Ljung-Box:
    # crashes on missing data too
    _ydata=np.ma.masked_invalid(_ydata)
    #autocorrelation in residuals
    #this is acf function that does not allow nans
#    print "\nautocorrelation for first three lags:", acf(_ydata)[1:4]
    #this is from pandas, is nan agnostic
    pdf=pd.Series(_ydata, index=_xdata, copy=True)
    print "autocorrelation for first three lags:", [pdf.autocorr(i) for i in range(1,4)]
    #durbin-watson
    a=_ydata[:-1].astype('float')
    b=_ydata[1:].astype('float')
    _stat=np.nansum((b-a)**2)/np.nansum(_ydata**2)
    print "Durbin-Watson statistic (close to 2 if no autocorrelation):", _stat
    _stat, _pvalue=acorr_ljungbox(_ydata, lags=1, boxpierce=False)    
    print "Ljung-Box p-value on lag 1 autocorrelation:", _pvalue
    print ""
def mix_model(time_series_diff, args, name):
    """
    time_series_diff: stationary time_series after diff. 
    args: arguments parsed before.
    name: the name of time_series_diff.
    return fitted ARIMA model, parameters for ARIMA model, fitted GARCH model and parameters for GARCH model.
    """
    # get arima model
    arima_model_fit, arima_order = ARIMA_model(time_series_diff, args, name)

    if args.plot:
        # residual plots of residual model
        plot_residual(arima_model_fit.resid, name)

    # check if the resid of arima model is white noise
    _, pvalue = acorr_ljungbox(arima_model_fit.resid,
                               # auto_lag=True,
                               model_df=sum(arima_order),
                               return_df=False
                               )
    logger.debug("acorr_ljungbox: " + str(list(pvalue)))
    if args.plot:
        plot_pvalue(pvalue, "acorr_ljungbox")
    if np.sum(pvalue < 0.05) > 0:
        logger.info("residual after fit still can not give white noises, we turn to use GARCH")
    else:
        logger.info("Although the residual does give good random values, we still turn to GARCH")
    
    # get garch model
    garch_model_fit, garch_order = GARCH_model(arima_model_fit.resid, args, name)

    return arima_model_fit, arima_order, garch_model_fit, garch_order
    def __ts_differencing(self):  # 计算时间序列的差分d值
        '''
		时间序列平稳性检验,p-value<0.05则通过,否则不通过
		最大差分次数max_diff_time=2 
		'''
        while True:
            lbvalue, pvalue2 = acorr_ljungbox(self.ts_diff, lags=1)  #白噪声检验结果
            adf, pvalue1, usedlag, nobs, critical_values, icbest = adfuller(
                self.ts_diff)  #ADF检验
            rule_1 = (adf < critical_values['1%']
                      and adf < critical_values['5%']
                      and adf < critical_values['10%'] and pvalue1 < 0.01)
            rule_2 = (pvalue2 < self.p_value_shreshold)
            if rule_1 and rule_2:
                self.isStationarity = True
                break
            if not (rule_1 and rule_2) and self.d < self.max_diff_time:
                self.d = self.d + 1
                self.ts_diff = self.ts_train.diff(self.d)  #进行d阶差分
                self.diffs.append(self.ts_diff)
                self.ts_diff.dropna(
                    inplace=True
                )  #丢掉缺失值,If True, do operation inplace and return None.
            else:
                break
Beispiel #16
0
def _ljung_box_test(table, input_cols, lags=None):
    result = dict()
    rb = BrtcReprBuilder()
    rb.addMD("""## Ljung Box test Result""")

    for input_col in input_cols:
        lbvalue, pvalue = acorr_ljungbox(x=table[input_col], lags=lags)

        lb_res = dict()
        lb_res['lags'] = range(1, len(lbvalue) + 1)
        lb_res['test statistic'] = lbvalue
        lb_res['p-value based on chi-square distribution'] = pvalue
        lb_res = pd.DataFrame(lb_res)

        rb.addMD(
            strip_margin("""
        | ## {input_col} test result
        |
        | {lb_res}
        """.format(input_col=input_col,
                   lb_res=pandasDF2MD(lb_res, num_rows=lb_res.shape[0]))))

        result[input_col] = lb_res

    result['_repr_brtc_'] = rb.get()

    return {'result': result}
Beispiel #17
0
def ljung(label, series, names):
    """
    Table for Ljung-Box test

    Parameters
    ----------
    label : string
        Label in latex and name of txt file

    series : list of pandas.Series

    names : list of strings
        Names of each series in table

    """

    with open('latex/tables/{}.txt'.format(label), 'w') as b:
        a = '''\\begin{{table}}[H]
\\caption{{Ljung-Box Test}}
\\label{{tab:{}}}
\\centering
\\begin{{tabular}}{{ | c | c | }}
\\hline
Series & P-value \\\\
\\hline \\hline'''.format(label)
        for i in range(len(series)):
            var = series[i][1:]
            a += '\n{0} & {1:.3e} \\\\'.format(names[i],
                                               dig.acorr_ljungbox(var)[1][39])
            a += '\n\\hline'
        a += '''\n\\end{tabular}
\\end{table}'''
        b.write(a)
Beispiel #18
0
    def test_acorr_ljung_box(self):
        res = self.res

        #> bt = Box.test(residuals(fm), lag=4, type = "Ljung-Box")
        #> mkhtest(bt, "ljung_box_4", "chi2")
        ljung_box_4 = dict(statistic=5.23587172795227, pvalue=0.263940335284713,
                           parameters=(4,), distr='chi2')

        #> bt = Box.test(residuals(fm), lag=4, type = "Box-Pierce")
        #> mkhtest(bt, "ljung_box_bp_4", "chi2")
        ljung_box_bp_4 = dict(statistic=5.12462932741681,
                              pvalue=0.2747471266820692,
                              parameters=(4,), distr='chi2')

        #ddof correction for fitted parameters in ARMA(p,q) fitdf=p+q
        #> bt = Box.test(residuals(fm), lag=4, type = "Ljung-Box", fitdf=2)
        #> mkhtest(bt, "ljung_box_4df2", "chi2")
        ljung_box_4df2 = dict(statistic=5.23587172795227,
                              pvalue=0.0729532930400377,
                              parameters=(2,), distr='chi2')

        #> bt = Box.test(residuals(fm), lag=4, type = "Box-Pierce", fitdf=2)
        #> mkhtest(bt, "ljung_box_bp_4df2", "chi2")
        ljung_box_bp_4df2 = dict(statistic=5.12462932741681,
                                 pvalue=0.0771260128929921,
                                 parameters=(2,), distr='chi2')


        lb, lbpval, bp, bppval = smsdia.acorr_ljungbox(res.resid, 4,
                                                       boxpierce=True)
        compare_t_est([lb[-1], lbpval[-1]], ljung_box_4, decimal=(13, 14))
        compare_t_est([bp[-1], bppval[-1]], ljung_box_bp_4, decimal=(13, 14))
Beispiel #19
0
def eval_plot(X, Y, Y_hat, lags=None):
    R = np.array(Y) - np.array(Y_hat)
    f, ax = plt.subplots(2, 2)
    res = stats.probplot(R, plot=ax[0, 0])
    ax[0, 0].set_title('Normal Probability Plot of the Residuals')
    ax[0, 1].scatter(X, R)
    ax[0, 1].set_title('Residuals vs Fitted Values')
    ax[1, 0].hist(R)
    ax[1, 0].set_title('Histogram of the Residuals')
    ax[1, 1].plot(R)
    ax[1, 1].set_title('Residuals vs Order of the Data')
    plt.show()
    if lags is None:
        lags = min(20, len(R) / 2)
    (lb, p_values) = acorr_ljungbox(R, lags=lags, boxpierce=False)
    print('Ljung-Box Test')
    print(
        "H_0 (p>0.05) --> The data are independently distributed -- i.e. there's no auto correlations"
    )
    print(
        "H_a (p<0.05) --> The data are not independently distributed -- i.e. there is auto correlations"
    )
    print('p_values', p_values)
    sub = list(filter(lambda p: p < .05, p_values))
    if len(sub) > 0:
        print(
            'PROBLEM!  There appears to be information left in the residuals')
    else:
        print('There does not appear to be information left in the residuals')
    return len(sub) > 0
def is_white_noise(col, lags=LAGS, box_pierce=False):
    # https://stats.stackexchange.com/questions/200267/interpreting-ljung-box-test-results-from-statsmodels-stats-diagnostic-acorr-lju
    ljung_box_result, pvals = diagnostic.acorr_ljungbox(col, lags, box_pierce)
    for val in pvals:
        if val > ALPHA:
            return True
    return False
Beispiel #21
0
    def ljung_box_test(self, output_folder, df_name):
        '''
        function that applies L-jung box test for detecting white noise in the target variable of the
        dataframe being passed as a parameter of this class
        :param output_folder: path to the output folder where the dataframe that contains
        the columns returned by the Ljung-Box test will be saved
        :param df_name: name that is associated to the dataframe that will be created
        :return: the dataframe created
        '''

        if self.service_name is not None and self.mohafaza is not None:
            print("testing for %s in %s" % (self.service_name, self.mohafaza))
        arr = sm.acorr_ljungbox(self.df[self.target_variable], boxpierce=True)
        df = pd.DataFrame({
            'lb': arr[0],
            'p-values': arr[1],
            'bpvalue': arr[2],
            'bpp-values': arr[3]
        })
        df.index.name = 'lag_nb'
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        df.to_csv(output_folder + df_name + '.csv')
        if len(df[df['p-values'] <= 0.05]) == len(df):
            print('all p-values for ljung box <= 0.05')
        if len(df[df['bpp-values'] <= 0.05]) == len(df):
            print('all p-values for box pierce <= 0.05')
        print('-----------------------------------------------')
        return df
Beispiel #22
0
def get_best_log(ts, max_log=5, rule1=True, rule2=True):
    '''
    :param ts: 时间序列数据,Series类型
    :param max_log: 最大log处理的次数,int型
    :param rule1: rule1规则布尔值,布尔型
    :param rule2: rule2规则布尔值,布尔型
    :return: 达到平稳处理的最佳次数值和处理后的时间序列
    '''
    if rule1 and rule2:  # 如果两个规则同时满足
        return 0, ts  # 直接返回0和原始时间序列数据
    else:  # 只要有一个规则不满足
        for i in range(1, max_log):  # 循环做log处理
            ts = np.log(ts)  # log处理
            lbvalue, pvalue1 = acorr_ljungbox(ts, lags=1)  # 白噪声检验结果
            adf, pvalue2, usedlag, nobs, critical_values, icbest = adfuller(
                ts)  # ADF检验
            rule_1 = (adf < critical_values['1%']
                      and adf < critical_values['5%']
                      and adf < critical_values['10%']
                      and pvalue1 < 0.01)  # 稳定性检验
            rule_2 = (pvalue2 < 0.05)  # 白噪声检验
            rule_3 = (i < 5)
            if rule_1 and rule_2 and rule_3:  # 如果同时满足条件
                print('The best log n is: {0}'.format(i))  # 打印输出最佳次数
                return i, ts  # 返回最佳次数和处理后的时间序列
Beispiel #23
0
def arimaModelCheck():
    '''
    模型检验
    :return:
    '''
    discfile = 'data/discdata_processed.xls'
    # 残差延迟个数
    lagnum = 12

    data = pd.read_excel(discfile, index_col='COLLECTTIME')
    data = data.iloc[:len(data) - 5]
    xdata = data['CWXT_DB:184:D:\\']
    # 建立ARIMA(0,1,1)模型
    from statsmodels.tsa.arima_model import ARIMA
    # 建立并训练模型
    arima = ARIMA(xdata, (0, 1, 1)).fit()
    # 预测
    xdata_pred = arima.predict(typ='levels')
    # 计算残差
    pred_error = (xdata_pred - xdata).dropna()

    from statsmodels.stats.diagnostic import acorr_ljungbox
    # 白噪声检验
    lb, p = acorr_ljungbox(pred_error, lags=lagnum)
    # p值小于0.05,认为是非白噪声。
    h = (p < 0.05).sum()
    if h > 0:
        print(u'模型ARIMA(0,1,1)不符合白噪声检验')
    else:
        print(u'模型ARIMA(0,1,1)符合白噪声检验')
Beispiel #24
0
def is_white_noise(time_series):
    values = time_series.values
    p = acorr_ljungbox(values, lags=1)[1]

    if p < 0.05:
        return False
    return True
Beispiel #25
0
def tsdiag(arimaResiduals, afcFags=25, lbLags=10, figsize=(10, 8), style='bmh'):
    if not isinstance(arimaResiduals, pd.Series):
        arimaFittedvVlues = pd.Series(arimaResiduals)

    with plt.style.context(style):
        plt.figure(figsize=figsize)  # Set the size of the figure

        layout = (3, 1)
        sr_ax = plt.subplot2grid(layout, (0, 0))
        acf_ax = plt.subplot2grid(layout, (1, 0))
        lb_ax = plt.subplot2grid(layout, (2, 0))

        # Create the standard residual plot
        sr_ax.plot(arimaFittedvVlues)
        sr_ax.set_title("Standardizede Residuals")
        sr_ax.set_xlabel("Time")

        # Crate the ACF plot
        plot_acf(arimaResiduals, lags=afcFags, ax=acf_ax)

        # Create the Ljung-Box statitics plot
        lb = acorr_ljungbox(arimaResiduals, lags=lbLags)
        lbPvalue = lb[1]  # get the pvalue from the ljungbox test

        lb_ax.scatter(np.arange(lbLags), lbPvalue, facecolors='none', edgecolors='b')
        lb_ax.set_ylim(-0.1, 1)
        lb_ax.axhline(y=0.05, linestyle='--')
        lb_ax.set_title("p values for Ljung-Box Statistic")
        lb_ax.set_ylabel("p values")
        lb_ax.set_xlabel("lags")

        plt.tight_layout()
    return
def testing(data):
    '''

        进行ADF平衡性检验 & 白噪声检验

    '''
    print('原始序列的ADF平衡性检验的结果为:', ADF(data['volume']))
    print('原始序列的白噪声检验的结果为:', acorr_ljungbox(data['volume'], lags=1))
Beispiel #27
0
def acorr_ljungbox_(timeseries):
    """

    :param timeseries: time series that aims to analyse
    :return: the values of the acorr ljungbox_test, in order to determine whether the time series is random or not
    """
    a = acorr_ljungbox(timeseries, lags=1)
    return a[1][0]  ### return 检验结果的 p_value值
def ljungbox(data, lags=12):
    blres = acorr_ljungbox(data, lags=lags, return_df=True)
    print(blres)
    print("Box-Ljung test")
    print(f"X-squared: {round(blres.tail(1)['lb_stat'].values[0], 4)}",
          end=", ")
    print(f"df = {len(blres)}", end=", ")
    print(f"p-value: {blres.tail(1)['lb_pvalue'].values[0]}")
Beispiel #29
0
def alles_ljung(residuals):
    '''
    Parameters
    ----------
    residuals : array of float
        The residuals after the fit of model+baseline+stellar_var.

    Returns
    -------
    isUncorrelated : bool
        True if the residuals are not correlated, False otherwise.
    
    Outputs
    -------
    It also prints the statstics and conclusions.
    
    Sauces
    ------
    https://www.statology.org/ljung-box-test-python/
    https://www.statsmodels.org/dev/generated/statsmodels.stats.diagnostic.acorr_ljungbox.html?highlight=ljung
    '''

    logprint('Ljung-Box Test')
    logprint('--------------')
    logprint(
        'This tests the null hypothesis that there is no correlation among the residuals.'
    )

    df = acorr_ljungbox(residuals, lags=[1, 5, 10, 15, 20], return_df=True)
    df.reset_index(inplace=True)
    df = df.rename(columns={'index': 'lag'})
    logprint('Does the null hypotheses hold at a significance level of...')
    df['0.15'] = df[
        'lb_pvalue'] > 0.15  #if Ture, the null hypothesis cannot be rejected at this significance level
    df['0.1'] = df[
        'lb_pvalue'] > 0.10  #if Ture, the null hypothesis cannot be rejected at this significance level
    df['0.05'] = df[
        'lb_pvalue'] > 0.05  #if Ture, the null hypothesis cannot be rejected at this significance level
    df['0.025'] = df[
        'lb_pvalue'] > 0.025  #if Ture, the null hypothesis cannot be rejected at this significance level
    df['0.01'] = df[
        'lb_pvalue'] > 0.01  #if Ture, the null hypothesis cannot be rejected at this significance level
    isUncorrelated = all(df['0.15'] == True) & all(df['0.1'] == True) & all(
        df['0.05'] == True) & all(df['0.025'] == True) & all(
            df['0.01'] == True)
    logprint(df.to_string(index=False))
    if isUncorrelated:
        logprint('The null hypothesis cannot be rejected.')
        logprint('In simple words: your residuals look good.')
    else:
        logprint(
            'The null hypothesis is rejected at some significance levels.')
        logprint(
            'In simple words: there might still be some structure in your residuals.'
        )
    logprint('\n')

    return isUncorrelated
Beispiel #30
0
def test_ljungbox_errors():
    data = sunspots.load_pandas().data['SUNACTIVITY']
    with pytest.raises(ValueError, match="model_df must"):
        smsdia.acorr_ljungbox(data, model_df=-1)
    with pytest.raises(ValueError, match="period must"):
        smsdia.acorr_ljungbox(data, model_df=-1, period=1)
    with pytest.raises(ValueError, match="period must"):
        smsdia.acorr_ljungbox(data, model_df=-1, period=-2)
    with pytest.warns(FutureWarning, match="The default value of lags"):
        smsdia.acorr_ljungbox(data, return_df=False)
Beispiel #31
0
def random_test(close_price):
    """"""
    acorr_result = acorr_ljungbox(close_price, lags=1)
    p_value = acorr_result[1]
    if p_value < 0.05:
        output("第二步:随机性检验:非纯随机性")
    else:
        output("第二步:随机性检验:纯随机性")
    output(f"白噪声检验结果:{acorr_result}\n")
Beispiel #32
0
 def test_autocorr(df):
     """
     :param df: pandas.DataFrame
     """
     lbvalue, pvalue, bpvalue, bppvalue = acorr_ljungbox(df[TimeSeriesDataFrameMap.Square_residuals], lags=10, boxpierce=True)
     print('Ljung Box Test')
     print('Lag  P-value')
     for l, p in zip(range(1, 13), pvalue):
         print(l, ' ', p)
def programmer_3():

    discfile = "data/discdata_processed.xls"

    data = pd.read_excel(discfile)
    data = data.iloc[:len(data) - 5]

    [[lb], [p]] = acorr_ljungbox(data["CWXT_DB:184:D:\\"], lags=1)
    if p < 0.05:
        print(u"原始序列为非白噪声序列,对应的p值为:%s" % p)
    else:
        print(u"原始序列为白噪声序列,对应的p值为:%s" % p)

    [[lb], [p]] = acorr_ljungbox(
        data["CWXT_DB:184:D:\\"].diff().dropna(), lags=1)

    if p < 0.05:
        print(u"一阶差分序列为非白噪声序列,对应的p值为:%s" % p)
    else:
        print(u"一阶差分序列为白噪声序列,对应的p值为:%s" % p)
    print(lb)
Beispiel #34
0
    def test_acorr_ljung_box_big_default(self):
        res = self.res
        #test with big dataset and default lag

        #> bt = Box.test(residuals(fm), type = "Ljung-Box")
        #> mkhtest(bt, "ljung_box_none", "chi2")
        ljung_box_none = dict(statistic=51.03724531797195, pvalue=0.11334744923390,
                              distr='chi2')

        #> bt = Box.test(residuals(fm), type = "Box-Pierce")
        #> mkhtest(bt, "ljung_box_bp_none", "chi2")
        ljung_box_bp_none = dict(statistic=45.12238537034000,
                              pvalue=0.26638168491464,
                              distr='chi2')
        lb, lbpval, bp, bppval = smsdia.acorr_ljungbox(res.resid, boxpierce=True)
        compare_t_est([lb[-1], lbpval[-1]], ljung_box_none, decimal=(13, 13))
        compare_t_est([bp[-1], bppval[-1]], ljung_box_bp_none, decimal=(13, 13))
Beispiel #35
0
    def test_acorr_ljung_box_small_default(self):
        res = self.res
        #test with small dataset and default lag

        #> bt = Box.test(residuals(fm), type = "Ljung-Box")
        #> mkhtest(bt, "ljung_box_small", "chi2")
        ljung_box_small = dict(statistic=9.61503968281915, pvalue=0.72507000996945,
                           parameters=(0,), distr='chi2')

        #> bt = Box.test(residuals(fm), type = "Box-Pierce")
        #> mkhtest(bt, "ljung_box_bp_small", "chi2")
        ljung_box_bp_small = dict(statistic=7.41692150864936,
                              pvalue=0.87940785887006,
                              parameters=(0,), distr='chi2')

        lb, lbpval, bp, bppval = smsdia.acorr_ljungbox(res.resid[:30], boxpierce=True)
        compare_t_est([lb[-1], lbpval[-1]], ljung_box_small, decimal=(13, 13))
        compare_t_est([bp[-1], bppval[-1]], ljung_box_bp_small, decimal=(13, 13))
def programmer_5():
    discfile = "data/discdata_processed.xls"
    # 残差延迟个数
    lagnum = 12

    data = pd.read_excel(discfile, index_col="COLLECTTIME")
    data = data.iloc[:len(data) - 5]
    xdata = data["CWXT_DB:184:D:\\"]

    # 训练模型并预测,计算残差
    arima = ARIMA(xdata, (0, 1, 1)).fit()
    xdata_pred = arima.predict(typ="levels")
    pred_error = (xdata_pred - xdata).dropna()

    lb, p = acorr_ljungbox(pred_error, lags=lagnum)
    h = (p < 0.05).sum()
    if h > 0:
        print(u"模型ARIMA(0,1,1)不符合白噪声检验")
    else:
        print(u"模型ARIMA(0,1,1)符合白噪声检验")
    print(lb)
#-*- coding: utf-8 -*-
#白噪声检验
import pandas as pd

#参数初始化
discfile = '../data/discdata_processed.xls'

data = pd.read_excel(discfile)
data = data.iloc[: len(data)-5] #不使用最后5个数据

#白噪声检测
from statsmodels.stats.diagnostic import acorr_ljungbox

[[lb], [p]] = acorr_ljungbox(data['CWXT_DB:184:D:\\'], lags = 1)
if p < 0.05:
  print(u'原始序列为非白噪声序列,对应的p值为:%s' %p)
else:
  print(u'原始该序列为白噪声序列,对应的p值为:%s' %p)

[[lb], [p]] = acorr_ljungbox(data['CWXT_DB:184:D:\\'].diff().dropna(), lags = 1)
if p < 0.05:
  print(u'一阶差分序列为非白噪声序列,对应的p值为:%s' %p)
else:
  print(u'一阶差分该序列为白噪声序列,对应的p值为:%s' %p)
Beispiel #38
0
def normal_stats(a_row, q):                                                                   # check independence and normality of transformed errors
    w, n_pval = sps.shapiro(a_row)                                                            # Null hypothesis: data is normal
    loc, scale = np.mean(a_row), np.std(a_row)
    upr, lwr = sps.norm.ppf(q, loc, scale), sps.norm.ppf(1.0 - q, loc, scale)
    l_pval = np.min(smd.acorr_ljungbox(a_row, lags=int(12 * (len(a_row) / 100.0)**0.25))[1])   # Null Hypothesis: data are independently distributed. Take the min (worst case)
    return [l_pval, n_pval, upr, lwr]
print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
#返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

#差分后的结果
D_data = data.diff().dropna()
D_data.columns = [u'销量差分']
D_data.plot() #时序图
plt.show()
plot_acf(D_data).show() #自相关图
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show() #偏自相关图
print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分'])) #平稳性检测

#白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox
print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1)) #返回统计量和p值

from statsmodels.tsa.arima_model import ARIMA

#定阶
pmax = int(len(D_data)/10) #一般阶数不超过length/10
qmax = int(len(D_data)/10) #一般阶数不超过length/10
bic_matrix = [] #bic矩阵
for p in range(pmax+1):
  tmp = []
  for q in range(qmax+1):
    try: #存在部分报错,所以用try来跳过报错。
      tmp.append(ARIMA(data, (p,1,q)).fit().bic)
    except:
      tmp.append(None)
  bic_matrix.append(tmp)
def programmer_6():
    """
    警告解释:
    # UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "
    调用了多次plt.show()
    解决方案,使用plt.subplot()

    # RuntimeWarning: overflow encountered in exp
    运算精度不够

    forecastnum-->预测天数
    plot_acf().show()-->自相关图
    plot_pacf().show()-->偏自相关图
    """
    discfile = 'data/arima_data.xls'
    forecastnum = 5
    data = pd.read_excel(discfile, index_col=u'日期')

    fig = plt.figure(figsize=(8, 6))
    # 第一幅自相关图
    ax1 = plt.subplot(411)
    fig = plot_acf(data, ax=ax1)

    # 平稳性检测
    print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
    # 返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

    # 差分后的结果
    D_data = data.diff().dropna()
    D_data.columns = [u'销量差分']
    # 时序图
    D_data.plot()
    plt.show()
    # 第二幅自相关图
    fig = plt.figure(figsize=(8, 6))
    ax2 = plt.subplot(412)
    fig = plot_acf(D_data, ax=ax2)
    # 偏自相关图
    ax3 = plt.subplot(414)
    fig = plot_pacf(D_data, ax=ax3)
    plt.show()
    fig.clf()

    print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分']))  # 平稳性检测

    # 白噪声检验
    print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  # 返回统计量和p值
    data[u'销量'] = data[u'销量'].astype(float)
    # 定阶
    pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    bic_matrix = []  # bic矩阵
    data.dropna(inplace=True)

    # 存在部分报错,所以用try来跳过报错;存在warning,暂未解决使用warnings跳过
    import warnings
    warnings.filterwarnings('error')
    for p in range(pmax + 1):
        tmp = []
        for q in range(qmax + 1):
            try:
                tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
            except:
                tmp.append(None)
        bic_matrix.append(tmp)
    # 从中可以找出最小值
    bic_matrix = pd.DataFrame(bic_matrix)
    # 用stack展平,然后用idxmin找出最小值位置。
    p, q = bic_matrix.stack().idxmin()
    print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
    model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型
    model.summary2()  # 给出一份模型报告
    model.forecast(forecastnum)  # 作为期5天的预测,返回预测结果、标准误差、置信区间。
#Raw Spending Plot of monthly averages
plt.plot(mcc2_ts_m)
plt.xticks(mcc2_ts_m.index, ('08', '09', '10', '11', '12', '01', '02', '03', '04', '05', '06'))
plt.savefig('C:/Users/bodhisattva_2/Dropbox/mcc2_ts_m_plot.png', bbox_inches='tight')
plt.clf()

#Generate and plot the autocorrelation function (ACF) over all available lags for monthly spending
acf_mcc2_m = stattools.acf(mcc2_ts_m['trans_amt'].values, nlags=num_rows_ts_m)
plt.axhline(y=0, xmin=0, xmax=1, color='k')
plt.plot(acf_mcc2_m)
plt.savefig('C:/Users/bodhisattva_2/Dropbox/mcc2_ts_m_acf_plot.png', bbox_inches='tight')
plt.clf()

#Given that the ACF showed fairly weak periodicity I decided to run a Ljung-Box test for white noise on monthly spending. 
#As I expected, I fail to reject null hypothesis of white noise process with series at the monthly level.
diagnostic.acorr_ljungbox(mcc2_ts_m['trans_amt'].values, lags=num_rows_ts_m-1)

#My next thought was that aggregating the data might be obscuring trends at the weekly level. I ran the same sequence of analyses
#as I did for monthly spending.

#Generate and plot the autocorrelation function (ACF) over 21 lags. This was chosen to make the plot clear, but a similar pattern held for
#more lags.
num_lags = 21
acf_mcc2 = stattools.acf(mcc2_ts['trans_amt'].values, nlags=num_lags)
plt.plot(acf_mcc2)
plt.axhline(y=0, xmin=0, xmax=1, color='k')
plt.xticks(np.arange(num_lags))
plt.savefig('C:/Users/bodhisattva_2/Dropbox/mcc2_ts_acf_plot.png', bbox_inches='tight')

#The ACF displays strong weekly periodicity and as one might expect, a Ljung-Box test rejects the null hypothesis of white noise process at the daily level.
diagnostic.acorr_ljungbox(mcc2_ts['trans_amt'].values, lags=num_lags)
Beispiel #42
0
print( ADF(data[u'销量']))
#返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

#差分后的结果
D_data = data.diff().dropna()
D_data.columns = [u'销量差分']
D_data.plot() #时序图
plt.show()
plot_acf(D_data).show() #自相关图
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show() #偏自相关图
ADF(D_data[u'销量差分'])#平稳性检测

#白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox
acorr_ljungbox(D_data, lags=1) #返回统计量和p值

from statsmodels.tsa.arima_model import ARIMA

#定阶
pmax = int(len(D_data)/10) #一般阶数不超过length/10
qmax = int(len(D_data)/10) #一般阶数不超过length/10
bic_matrix = [] #bic矩阵
for p in range(pmax+1):
  tmp = []
  for q in range(qmax+1):
    try: #存在部分报错,所以用try来跳过报错。
      tmp.append(ARIMA(data, (p,1,q)).fit().bic)
    except:
      tmp.append(None)
  bic_matrix.append(tmp)
# -*- coding: utf-8 -*-
# 模型检验
import pandas as pd

# 参数初始化
discfile = '../data/discdata_processed.xls'
lagnum = 12  # 残差延迟个数

data = pd.read_excel(discfile, index_col='COLLECTTIME')
data = data.iloc[: len(data) - 5]  # 不使用最后5个数据
xdata = data['CWXT_DB:184:D:\\']

from statsmodels.tsa.arima_model import ARIMA  # 建立ARIMA(0,1,1)模型

arima = ARIMA(xdata, (0, 1, 1)).fit()  # 建立并训练模型
xdata_pred = arima.predict(typ='levels')  # 预测

print "-------預測模型------------\n", xdata_pred

pred_error = (xdata_pred - xdata).dropna()  # 计算残差

from statsmodels.stats.diagnostic import acorr_ljungbox  # 白噪声检验

lb, p = acorr_ljungbox(pred_error, lags=lagnum)
h = (p < 0.05).sum()  # p值小于0.05,认为是非白噪声。
if h > 0:
    print(u'模型ARIMA(0,1,1)不符合白噪声检验')
else:
    print(u'模型ARIMA(0,1,1)符合白噪声检验')
Beispiel #44
0
#clicksPerDay.index.name = None
#encountersPerDay.index.name = None

clicksPerDay = pd.Series(data=clicksPerDay["count_clicks"], index=clicksPerDay.index)
encountersPerDay = pd.Series(data=encountersPerDay["count_encounter"], index=encountersPerDay.index)

clicksPerDay = clicksPerDay.fillna(method="ffill")
encountersPerDay = encountersPerDay.fillna(method="ffill")



####################LJUNG-BOX####################
clicksPACF = stattools.pacf_ols(clicksPerDay, nlags=MAX_LAG)
encountersPACF = stattools.pacf_ols(encountersPerDay, nlags=MAX_LAG)

#my implementation
results = _math.ljungBox(encountersPACF, len(encountersPerDay), MAX_LAG)
lag, R, Q, p = zip(*results)
print np.asarray(Q[1:])
print np.asarray(p[1:])

#statsmodels implementation
results = diagnostic.acorr_ljungbox(encountersPerDay, lags=MAX_LAG)
print results

#my copy of the statsmodels implementation
results = _math.ljungBox2(encountersPerDay, maxlag=MAX_LAG)
print results


#they are not the same... I wonder why.
Beispiel #45
0
plt.rcParams['axes.unicode_minus'] = False
data.plot()
plt.show()

from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()

from statsmodels.tsa.stattools import adfuller as ADF 

print 'ADF test result:', ADF(data['value'])

D_data = data.diff().dropna()
D_data.columns = ['diff value']
D_data.plot()
plt.show()
plot_acf(D_data).show()
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show()
print 'diff seq ADF test result:', ADF(D_data['diff value'])

from statsmodels.stats.diagnostic import acorr_ljungbox
print 'dff white noise test result:', acorr_ljungbox(D_data, lags = 1)

from statsmodels.tsa.arima_model import ARIMA


model = ARIMA(data, (1,1,1)).fit()
model.summary2()
model.forecast(5*6)