Exemple #1
0
def test_is_I1(price, alpha=0.05):
    adf_price = ADF(price)
    adf_return = ADF(np.diff(price))
    adf_price_is_pass = adf_price.pvalue < alpha
    adf_return_is_pass = adf_return.pvalue < alpha
    if (adf_price_is_pass == False) and (adf_return_is_pass == True):
        is_I1 = True
    else:
        is_I1 = False

    return is_I1
Exemple #2
0
def test_trends_low_memory(trend):
    rnd = np.random.RandomState(12345)
    y = np.cumsum(rnd.standard_normal(250))
    adf = ADF(y, trend=trend, max_lags=16)
    adf2 = ADF(y, trend=trend, low_memory=True, max_lags=16)
    assert adf.lags == adf2.lags
    assert adf.max_lags == 16
    with pytest.warns(FutureWarning, match="Mutating unit root"):
        adf.max_lags = 1
    assert_equal(adf.lags, 1)
    assert_equal(adf.max_lags, 1)
Exemple #3
0
 def ADF_test(self, df_ts, lags=None):
     if lags == 'None':
         try:
             adf = ADF(df_ts)
         except:
             adf = ADF(df_ts.dropna())
     else:
         try:
             adf = ADF(df_ts)
         except:
             adf = ADF(df_ts.dropna())
         adf.lags = lags
     print(adf.summary().as_text())
     return adf
Exemple #4
0
def test_adf_buggy_timeseries2():
    x = np.asarray([0, 0])
    adf = ADF(x)
    # IndexError: index 0 is out of bounds for axis 0 with size 0
    msg = "A minimum of 4 observations are needed"
    with pytest.raises(InfeasibleTestException, match=msg):
        assert np.isfinite(adf.stat)
Exemple #5
0
    def check_cointegration(self):
        results = []
        pairs_series = {}
        total_pairs_length = len(self.pairs_list)
        for i, pair in enumerate(self.pairs_list):
            x, y = self.log_price.loc[:, pair].values.T
            pair_name = "|".join(pair)
            pair_id = "|".join(sorted(pair))
            residuals = self._get_residuals(x, y)
            adf_test = ADF(residuals, lags=1)
            p_value = adf_test.pvalue
            test_stat = adf_test.stat
            results.append({
                "id": pair_id,
                "p_value": p_value,
                "stat": test_stat,
                "pair": pair_name
            })
            pairs_series[pair_name] = residuals
            current = (i + 1)
            print(
                f"{current}/{total_pairs_length} ({current / total_pairs_length:.2%})",
                end="\r",
                flush=True)
        pairs_series = pd.DataFrame(pairs_series, index=self.price.index)

        results = pd.DataFrame(results).set_index("id")
        results = results.sort_values(
            "p_value", ascending=False).groupby(level=0).first()
        self.cointegration_result = results.set_index("pair")
        valid_pairs = [s.split("|") for s in results.index]
        self.cointegrated_pairs_list = valid_pairs
        self.spreads = pairs_series
Exemple #6
0
 def test_adf_critical_value(self):
     adf = ADF(self.inflation, trend="c", lags=3)
     adf_cv = adf.critical_values
     temp = polyval(tau_2010["c"][0, :, ::-1].T, 1.0 / adf.nobs)
     cv = {"1%": temp[0], "5%": temp[1], "10%": temp[2]}
     for k, v in cv.items():
         assert_almost_equal(v, adf_cv[k])
Exemple #7
0
 def test_adf_critical_value(self):
     adf = ADF(self.inflation, trend='c', lags=3)
     adf_cv = adf.critical_values
     temp = polyval(tau_2010['c'][0, :, ::-1].T, 1. / adf.nobs)
     cv = {'1%': temp[0], '5%': temp[1], '10%': temp[2]}
     for k, v in iteritems(cv):
         assert_almost_equal(v, adf_cv[k])
Exemple #8
0
def test_low_memory_singular():
    x = np.zeros(1000)
    x[:3] = np.random.standard_normal()
    x[-3:] = np.random.standard_normal()
    match = "The maximum lag you are"
    with pytest.raises(InfeasibleTestException, match=match):
        ADF(x, max_lags=10, low_memory=True).stat
Exemple #9
0
def test_adf_buggy_timeseries1():
    x = np.asarray([0])
    adf = ADF(x)
    # ValueError: maxlag should be < nobs
    msg = "A minimum of 4 observations are needed"
    with pytest.raises(InfeasibleTestException, match=msg):
        assert np.isfinite(adf.stat)
def test_adf_buggy_timeseries3():
    x = np.asarray([1] * 1000)
    adf = ADF(x)
    # AssertionError: Number of manager items must equal union of block items
    # # manager items: 1, # tot_items: 0
    with pytest.raises(InfeasibleTestException, match="The maximum lag you are"):
        assert np.isfinite(adf.stat)
Exemple #11
0
 def cointegration(self, priceX, priceY):
     '''
     协整关系判断
     '''
     if priceX is None or priceY is None:
         print("缺少价格序列")
     logX = np.log(priceX)
     logY = np.log(priceY)
     results = sm.OLS(logY, sm.add_constant(logX)).fit()
     resid = results.resid
     adfSpread = ADF(resid)
     if adfSpread.pvalue >= 0.05:
         print("""交易价格不具有协整关系。
         p-value of ADF test:%f,
         Coefficients of regression:
         Intercept: %f
         beta: %f
         """ % (adfSpread.pvalue, results.params[0], results.params[1]))
         return None
     else:
         print("""交易价格具有协整关系。
         p-value of ADF test:%f,
         Coefficients of regression:
         Intercept: %f
         beta: %f
         """ % (adfSpread.pvalue, results.params[0], results.params[1]))
         return (results.params[0], results.params[1])
Exemple #12
0
 def Cointegration(self, priceX, priceY):
     if priceX is None or priceY is None:
         print('缺少价格序列')
     priceX = np.log(priceX)
     priceY = np.log(priceY)
     results = sm.OLS(priceY, sm.add_constant(priceX)).fit()
     resid = results.resid
     adfSpread = ADF(resid)
     if adfSpread.pvalue >= 0.05:
         print('''交易价格不具有协整关系.
               P-value of ADF text: %f
               Coefficients of regression:
               Intercept : %f
               Beta: %f
               ''' %
               (adfSpread.pvalue, results.params[0], results.params[1]))
         return (None)
     else:
         print('''交易价格具有协整关系.
               P-value of ADF text: %f
               Coefficients of regression:
               Intercept : %f
               Beta: %f
               ''' %
               (adfSpread.pvalue, results.params[0], results.params[1]))
         return (results.params[0], results.params[1], adfSpread.pvalue)
Exemple #13
0
def test_load_source_values():

    df = get_ytw_test()
    log = setlogfile('test.load.source.values', logging.INFO)

    from statsmodels.tsa.stattools import adfuller
    # adf_tstat, pvalue, tstat, results = adfuller(df['CS-Aaa-3MO'].values) # , maxlag=40, regression='ct', autolag='BIC', regresults=True)
    # print(adf_tstat)
    #print(pvalue)
    #print(tstat)
    #print(results)

    adf = adfuller(
        df['CS-Aaa-3MO'].values
    )  # , maxlag=40, regression='ct', autolag='BIC', regresults=True)
    print(adf)

    from arch.unitroot import ADF
    adf = ADF(y=df['CS-Aaa-3MO'].values)  #, lags=40, trend='ct', method='BIC')
    print(adf.pvalue)

    import cs_data_analysis as cs
    cs.print_full(df, log)
    pass
    '''
Exemple #14
0
def unitroot_test(series):
    # Basic statistic
    plt.figure()
    plt.plot(series)
    plot_pacf(series)

    # ADF test
    # AIC & BIC from lags 12 to 1
    print('$p$ & AIC & BIC \\\\')
    max_lags = 12
    for lags in (max_lags - i for i in range(max_lags)):
        ar_model = AutoReg(series, lags, 'n')
        res = ar_model.fit()
        print(f'{lags} & {round(res.aic, 3)} & {round(res.bic, 3)} \\\\')

    # Best lags by `ar_select_order`
    sel = ar_select_order(series, max_lags, trend='n')
    lags = sel.ar_lags[-1]
    print(f'Lags selection: {sel.ar_lags}')

    # Start ADF test
    adf = ADF(series, lags)
    print(adf.summary())

    # PP test
    pp_tau = PhillipsPerron(series, 3, test_type='tau')  # q = 3
    pp_rho = PhillipsPerron(series, 3, test_type='rho')  # q = 3
    print(pp_tau.summary())
    print(pp_rho.summary())
Exemple #15
0
def test_adf_short_timeseries():
    # GH 262
    import numpy as np
    from arch.unitroot import ADF
    x = np.asarray([0., 0., 0., 0., 0., 0., 1., 1., 0., 0.])
    adf = ADF(x)
    assert_almost_equal(adf.stat, -2.236, decimal=3)
    assert adf.lags == 1
def adf_test(timeseries, trend):
    from arch.unitroot import ADF
    adf = ADF(timeseries)
    adf.trend = str(trend)
    reg_res = adf.regression
    #print('ADF statistic: {0:0.4f}'.format(adf.stat))
    #print('ADF p-value: {0:0.4f}'.format(adf.pvalue))
    #print(reg_res.summary().as_text())
    return (adf.stat, adf.pvalue)
Exemple #17
0
 def test_adf_auto_t_stat(self):
     adf = ADF(self.inflation, method="t-stat")
     assert_equal(adf.lags, 11)
     adf2 = ADF(self.inflation, method="t-stat", low_memory=True)
     assert_equal(adf2.lags, 11)
     old_stat = adf.stat
     adf.lags += 1
     assert adf.stat != old_stat
     old_stat = adf.stat
     assert_equal(adf.y, self.inflation)
     adf.trend = "ctt"
     assert adf.stat != old_stat
     assert adf.trend == "ctt"
     assert len(adf.valid_trends) == len(("nc", "c", "ct", "ctt"))
     for d in adf.valid_trends:
         assert d in ("nc", "c", "ct", "ctt")
     assert adf.null_hypothesis == "The process contains a unit root."
     assert adf.alternative_hypothesis == "The process is weakly " "stationary."
Exemple #18
0
 def test_adf_auto_t_stat(self):
     adf = ADF(self.inflation, method='t-stat')
     assert_equal(adf.lags, 11)
     adf2 = ADF(self.inflation, method='t-stat', low_memory=True)
     assert_equal(adf2.lags, 11)
     old_stat = adf.stat
     adf.lags += 1
     assert adf.stat != old_stat
     old_stat = adf.stat
     assert_equal(adf.y, self.inflation)
     adf.trend = 'ctt'
     assert adf.stat != old_stat
     assert adf.trend == 'ctt'
     assert len(adf.valid_trends) == len(('nc', 'c', 'ct', 'ctt'))
     for d in adf.valid_trends:
         assert d in ('nc', 'c', 'ct', 'ctt')
     assert adf.null_hypothesis == 'The process contains a unit root.'
     assert adf.alternative_hypothesis == 'The process is weakly ' \
                                          'stationary.'
Exemple #19
0
def test_representations(trend):
    rnd = np.random.RandomState(12345)
    y = np.cumsum(rnd.randn(250))
    adf = ADF(y, trend=trend, max_lags=16)
    check = 'Constant'
    if trend == 'nc':
        check = 'No Trend'
    assert check in adf.__repr__()
    assert check in adf.__repr__()
    assert check in adf._repr_html_()
    assert 'class="simpletable"' in adf._repr_html_()
Exemple #20
0
 def test_no_change_lags_trend(self):
     adf = ADF(self.inflation)
     lags = adf.lags
     with pytest.warns(FutureWarning, match="Mutating unit root"):
         adf.lags = lags
     trend = adf.trend
     with pytest.warns(FutureWarning, match="Mutating unit root"):
         adf.trend = trend
     ml = adf.max_lags
     with pytest.warns(FutureWarning, match="Mutating unit root"):
         adf.max_lags = ml
Exemple #21
0
def test_representations(trend):
    rnd = np.random.RandomState(12345)
    y = np.cumsum(rnd.standard_normal(250))
    adf = ADF(y, trend=trend, max_lags=16)
    check = "Constant"
    if trend == "n":
        check = "No Trend"
    assert check in adf.__repr__()
    assert check in adf.__repr__()
    assert check in adf._repr_html_()
    assert 'class="simpletable"' in adf._repr_html_()
 def isCointegration(self, priceX, priceY):
     logX = priceX
     logY = priceY
     results = sm.OLS(logY, sm.add_constant(logX)).fit()
     resid = results.resid
     adfSpread = ADF(resid)
     if adfSpread.pvalue >= 0.05:
         # 残差序列是非平稳时间序列,不具有协整关系
         return (False, results.params[0], results.params[1])
     else:
         # 残差序列是平稳时间序列,具有协整关系
         return (True, results.params[0], results.params[1])
Exemple #23
0
def UnitRootTest(ret, isdefaultmethod=True, isprintsummary=False):
    """
    進行 Augmented Dickey-Fuller (ADF) 單根檢定
    若接受虛無假設則表示有單根,序列為非定態 (non-stationary),回傳值為False
    若拒絕虛無假設則表示無單根,序列為弱定態 (weakly stationary),回傳值為True
    """
    # 除非進行除錯,否則建議參數使用預設值
    if isdefaultmethod:  # default & recommended in the current version
        results = ADF(ret, lags = None, max_lags = None, \
                      trend = 'c', method = "AIC")
        if isprintsummary:
            print(results.summary().as_text())
        if results.pvalue < PVal:  # ADF測試對應p-value小於5%
            if isprintsummary:
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
                print("ADF stats p-value < %.2f -> weakly stationary!" % PVal)
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
            return True
        else:
            if isprintsummary:
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
                print("ADF stats p-value > %.2f -> non-stationary!" % PVal)
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
            return False
    else:  # future preference
        results = stattools.adfuller(ret, maxlag = None, \
                                     regression = 'c', autolag = "AIC", \
                                     store = False, regresults = False)
        # returns: adfstat, pvalue, usedlag, nobs, critvalues(, icbest)
        if isprintsummary:
            print("ADF test statistics: ", results[0])
            print("MacKinnon approximated p-value: ", results[1])
            print("# of lags used: ", results[2])
            print("# of obs. used for ADF test: ", results[3])
            print("Critical values for p-value = 0.01, 0.05, and 0.10: ")
            print(results[4]["1%"])
            print(results[4]["5%"])
            print(results[4]["10%"])
            print("The best information criterion (min. AIC): ", results[5])
        if results[1] < PVal:  # ADF測試對應p-value小於5%
            if isprintsummary:
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
                print("ADF stats p-value < %.2f -> weakly stationary!" % PVal)
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
            return True
        else:
            if isprintsummary:
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
                print("ADF stats p-value > %.2f -> non-stationary!" % PVal)
                print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
            return False
Exemple #24
0
def pares(dados, intervalo = [100, 120, 140, 160, 180, 200, 220, 250], char_excluido=-3, min_period = 5):
    
    coluna_df = ['Ativo_Independente', 'Ativo_Dependente', 'ADF-100', 'ADF-120', 'ADF-140', 'ADF-160',
             'ADF-180', 'ADF-200', 'ADF-220', 'ADF-250', 'Total']
    df = pd.DataFrame(columns = coluna_df)
    m = 0
    for i in range(len(dados.columns)):
        ativo_x = dados.columns[i][:char_excluido]
        
        for j in range(len(dados.columns)):
            if j != i:
                ativo_y = dados.columns[j][:char_excluido]
                
                par = dados.iloc[:,[i,j]]
                
                df.loc[m, 'Ativo_Independente'] = ativo_x
                df.loc[m, 'Ativo_Dependente'] = ativo_y
                
                ####################### ESCOLHENDO O INTERVALO ########################
                l_ini = len(par.index)
                
                k = 2
                soma = 0
                for n in intervalo:
                    par_n = par.iloc[l_ini-n:,:]
                
                
                    ##############################################################
                    
                    ###### REGRESSÃO E RESÍDUOS #################################
                    X = par_n.iloc[:, 0].values
                    y = par_n.iloc[:, 1].values
                    X = X.reshape(-1, 1)
                    modelo = LinearRegression()
                    modelo.fit(X,y)                
                    y_pred = modelo.predict(X)                
                    residuos = y - y_pred                
                    adf = ADF(residuos)
                    if adf.stat < adf.critical_values['5%']:
                        df.iloc[m,k] = float(adf.stat)
                        soma = soma + 1
                    else:
                        df.iloc[m,k] = 0
                    
                    k = k+1
                
                df.loc[m,'Total'] = soma
                m = m + 1 
    df = df.loc[df['Total'] >= min_period, :]
    return df
Exemple #25
0
 def ADF_test(df_ts, lags=None):
     """
     ADF from arch
     formula:
     xt-xt-1 ~ b0 + (b1-1)*xt-1 + e
     test if b1-1 == 0 ~ DF statistics
     :param df_ts:
     :param lags:
     :return:
     """
     if lags == 'None':
         try:
             adf = ADF(df_ts)
         except:
             adf = ADF(df_ts.dropna())
     else:
         try:
             adf = ADF(df_ts)
         except:
             adf = ADF(df_ts.dropna())
         adf.lags = lags
     print(adf.summary().as_text())
     return adf
def test():
    df = merge_prices(get_price('300033'), get_price('300059'))
    train_df, test_df = split_by_date(df, '2015-12-31')

    x, y = train_df.close_x, train_df.close_y
    # print(ADF(x).summary())
    # print(ADF(y).summary())
    # print(ADF(np.diff(x)).summary())
    # print(ADF(np.diff(y)).summary())
    modol = sm.OLS(y, sm.add_constant(x))
    result = modol.fit()
    # print(result.summary())
    residual = result.resid
    adf = ADF(residual)
    print(adf.summary())
    def time_series(self):  # 时间序列
        rate1 = self.rate
        # 计算自相关系数
        acfs = stattools.acf(rate1)
        # 计算偏自相关系数
        pacfs = stattools.pacf(rate1)
        # 绘制自相关系数图
        plot_acf(rate1, use_vlines=True, lags=30)
        # 绘制偏自相关系数图
        plot_pacf(rate1, use_vlines=True, lags=30)
        # 平稳性 1 看时序图 2 看自相关和偏自相关 3 单位根检验DF ADF PP检验
        # ADF检验
        adfrate = ADF(rate1)
        print(adfrate.summary().as_text())

        pass
Exemple #28
0
 def diff_selection(df, max_diff=12):
     dict_p = {}
     for i in range(1, max_diff+1):
         tmp = df.copy()
         tmp.loc[:, 'diff'] = tmp.loc[:, tmp.columns[0]].diff(i)
         tmp.dropna(inplace=True)
         pvalue = ADF(tmp.loc[:, 'diff']).pvalue
         dict_p[i] = pvalue
         df_p = pd.DataFrame.from_dict(dict_p, orient="index", columns=['p_value'])
     n = 0
     while n < len(df_p):
         if df_p.loc[:, 'p_value'].iloc[n] < 0.01:
             best_diff = i
             break
         n += 1
     return best_diff
Exemple #29
0
    def diff_process(self):
        self.p_value = acorr_ljungbox(self.df.iloc[:, 0], lags=1)
        print('白噪声检验p值:', self.p_value[1], '\n')  #大于0.05认为是白噪声,即序列在时间上不具有相关性

        self.ADF_value = ADF(self.df.iloc[:, 0])  #p值为0小于0.05认为是平稳的(单位根检验)

        self.diff_period = self.df.iloc[:, 0].diff(self.period)  #季节性差分
        self.diff_period = self.diff_period.dropna()
        self.diff_ = self.diff_period.diff()  #一次差分
        self.diff_ = self.diff_.dropna()

        fig = plt.figure(figsize=(20, 6))
        ax1 = fig.add_subplot(311)  #原始数据图
        ax1.plot(self.df.iloc[:, 0])
        ax2 = fig.add_subplot(312)  #季节性查分差分后 无周期性 但是不平稳
        ax2.plot(self.diff_period)
        ax3 = fig.add_subplot(313)  #再一次差分之后 平稳
        ax3.plot(self.diff_)
        plt.show()
Exemple #30
0
def coint_period(dados, ativo_x, ativo_y, period = 100, model = False):
    X = dados.loc[:, ativo_x].values[-period:]
    y = dados.loc[:, ativo_y].values[-period:]   
    X = X.reshape(-1, 1)
    modelo = LinearRegression()
    modelo.fit(X,y)
    y_pred = modelo.predict(X)
    residuos = y - y_pred
    adf = ADF(residuos)
    coint99 = adf.stat < adf.critical_values['1%']
    coint95 = adf.stat < adf.critical_values['5%']
    result = {'coef':float(modelo.coef_), 'intercept':float(modelo.intercept_),
              'media_res':np.mean(residuos), 'desvio_res':np.std(residuos),
              'adf_stats':adf.stat, 'Coint_99':coint99, 'Coint_95':coint95}
    
    if model == True:
        return modelo, y_pred, residuos, np.mean(residuos), np.std(residuos)
    else:
        return result