Exemple #1
0
    def test_het_arch(self):
        # test het_arch and indirectly het_lm against R
        # > library(FinTS)
        # > at = ArchTest(residuals(fm), lags=4)
        # > mkhtest(at, 'archtest_4', 'chi2')
        archtest_4 = dict(statistic=3.43473400836259, pvalue=0.487871315392619, parameters=(4,), distr="chi2")

        # > at = ArchTest(residuals(fm), lags=12)
        # > mkhtest(at, 'archtest_12', 'chi2')
        archtest_12 = dict(statistic=8.648320999014171, pvalue=0.732638635007718, parameters=(12,), distr="chi2")

        at4 = smsdia.het_arch(self.res.resid, maxlag=4)
        at12 = smsdia.het_arch(self.res.resid, maxlag=12)
        compare_t_est(at4[:2], archtest_4, decimal=(12, 13))
        compare_t_est(at12[:2], archtest_12, decimal=(12, 13))
Exemple #2
0
def engle_test_resid(series, order = (1,1)):
    model = smt.ARMA(series, order=order).fit(method='mle', trend='nc')      
    p_val = het_arch(model.resid)[1]
    print(series.name + ' p-value Engle: ' + str(p_val))
    with open(os.getcwd() + '/model_results/ARMA_'+ series.name + '.tex', "w") as text_file:
        text_file.write(model.summary().as_latex())
    return p_val
Exemple #3
0
def MF_GARCHFit(y,preproc = None,P = 1,Q = 1):

    y = (y - np.mean(y)) / np.std(y)

    N = len(y)

    outDict = {}

    lm, lmpval,fval,fpval = het_arch(y)

    outDict['lm'] = lm
    outDict['lmpval'] = lmpval
    outDict['fval'] = fval
    outDict['fpval'] = fpval

    model= arch.arch_model(y, vol='Garch', p=P, o=0, q=Q, dist='Normal')
    results=model.fit()

    #print(results.summary())

    params = results._params
    paraNames = results._names

    outDict['logl'] = results._loglikelihood
    outDict['success'] = results._optim_output['success']

    for i in range(len(params)):

        outDict[paraNames[i]] = params[i]

    #pprint(vars(results))

    return outDict
    def print_analyse(self):
        #计算标准残差序列,对其做arch检验。
        log_sigma = self.model.predict(self.x)
        sigma = np.sqrt(np.exp(log_sigma))
        sigma = np.reshape(sigma, (-1))
        std_res = self.y / sigma

        #检验arch效应
        arch_test = het_arch(std_res)
        print("*" * 10, self.name, "*" * 10)
        print("{}模型,最优的损失值{:.3f}".format(self.name, min(self.history)))
        print("arch检验的统计量:{:.3f},对应p值为:{:.4f}\n".format(
            arch_test[0], arch_test[1]))

        #绘制训练损失变化图
        plt.plot(np.arange(1,101), self.history, color='black',marker = ',',markersize = 7,\
                     lw=1, label='{} best loss: {:.3f}'.format(self.name,min(self.history)))
        plt.xlabel('interation')
        plt.ylabel('Training Loss')
        plt.title("{} Training loss with iteration".format(self.name))
        plt.legend(loc="upper right")

        #新建一个文件夹,存放所有训练时产生的损失图片。
        if not os.path.exists("plot_images/traning_loss"):
            os.mkdir("plot_images/traning_loss")
        plt.savefig("plot_images/traning_loss/{}.png".format(self.name))
        plt.show()
        plt.close()
Exemple #5
0
 def conditional_heteroskedastic(self, significance=0.05) -> bool:
     """
     Engles LM test H0 homoskedastic
     :param significance: significance level
     :return: True or False of CH
     """
     return het_arch(self.model.resid, ddof=sum(
         self.model.order))[2] < significance
    def test_het_arch2(self):
        #test autolag options, this also test het_lm
        #unfortunately optimal lag=1 for this data
        resid = self.res.resid

        res1 = smsdia.het_arch(resid, maxlag=1, autolag=None, store=True)
        rs1 = res1[-1]

        res2 = smsdia.het_arch(resid, maxlag=5, autolag='aic', store=True)
        rs2 = res2[-1]

        assert_almost_equal(rs2.resols.params, rs1.resols.params, decimal=13)
        assert_almost_equal(res2[:4], res1[:4], decimal=13)

        #test that smallest lag, maxlag=1 works
        res3 = smsdia.het_arch(resid, maxlag=1, autolag='aic')
        assert_almost_equal(res3[:4], res1[:4], decimal=13)
    def test_het_arch2(self):
        #test autolag options, this also test het_lm
        #unfortunately optimal lag=1 for this data
        resid = self.res.resid

        res1 = smsdia.het_arch(resid, maxlag=1, autolag=None, store=True)
        rs1 = res1[-1]

        res2 = smsdia.het_arch(resid, maxlag=5, autolag='aic', store=True)
        rs2 = res2[-1]

        assert_almost_equal(rs2.resols.params, rs1.resols.params, decimal=13)
        assert_almost_equal(res2[:4], res1[:4], decimal=13)

        #test that smallest lag, maxlag=1 works
        res3 = smsdia.het_arch(resid, maxlag=1, autolag='aic')
        assert_almost_equal(res3[:4], res1[:4], decimal=13)
Exemple #8
0
    def test_het_arch(self):
        #test het_arch and indirectly het_lm against R
        #> library(FinTS)
        #> at = ArchTest(residuals(fm), lags=4)
        #> mkhtest(at, 'archtest_4', 'chi2')
        archtest_4 = dict(statistic=3.43473400836259,
                          pvalue=0.487871315392619, parameters=(4,),
                          distr='chi2')

        #> at = ArchTest(residuals(fm), lags=12)
        #> mkhtest(at, 'archtest_12', 'chi2')
        archtest_12 = dict(statistic=8.648320999014171,
                           pvalue=0.732638635007718, parameters=(12,),
                           distr='chi2')

        at4 = smsdia.het_arch(self.res.resid, maxlag=4)
        at12 = smsdia.het_arch(self.res.resid, maxlag=12)
        compare_t_est(at4[:2], archtest_4, decimal=(12, 13))
        compare_t_est(at12[:2], archtest_12, decimal=(12, 13))
Exemple #9
0
def f_heterocerasticidad(df_indicador):
    """
    
    Parameters
    ----------
    df_indicador

    Returns
    -------

    """
    arch = het_arch(df_indicador['Actual'])
    return arch
def FitGARCH(series, p, q):
    """
    :param series: series to fit on
    :param p: lag length of residuals
    :param q: lag length of variances
    :return: dictionary of model configuration and result of arch-lm test
    """
    am = arch_model(series, mean='Zero', vol='GARCH', p=p, q=q,
                    rescale=False).fit(disp='off')

    # calculate standardized residuals
    series_std = series / np.sqrt(am.conditional_volatility)

    # append result of arch lm test with degrees of freedom of p+q to output
    archresult = het_arch(series_std, nlags=20, ddof=p + q)[1]
    return [(p, q), archresult]
Exemple #11
0
def het_arch(resid: pd.Series, nlags=None, ddof=0):
    """
    Engle’s Test for Autoregressive Conditional Heteroscedasticity (ARCH).
    
    Null hypothesis:
    No conditional heteroscedasticity
    
    Note: This does not imply no serial correlation.
    
    Alternative hypothesis:
    Conditional heteroscedasticity exists

    References: 
    * https://www.mathworks.com/help/econ/archtest.html
    * https://www.mathworks.com/help/econ/engles-arch-test.html

    Definition: Heteroscedasticity :=

    Heteroscedasticity means that the variance of a time series
    is not constant over time.  Therefore the variance over sliding
    window t,... t+i will differ from sliding window t+i+1,..t+j,
    where t is the initial time index, i, j are integers.
    
    Parameters
    ----------
    resid : pd.Series
        residuals from an estimation
    nlags : int, default None
        Highest lag to use.
    ddof : int, default 0
        If the residuals are from a regression, or ARMA estimation, then there
        are recommendations to correct the degrees of freedom by the number
        of parameters that have been estimated, for example ddof=p+q for an
        ARMA(p,q).

    Returns
    -------
    lm : float
        Lagrange multiplier test statistic
    lmpval : float
        p-value for Lagrange multiplier test
    """
    result = diagnostic.het_arch(resid, nlags=nlags, autolag=None, ddof=ddof)
    HetArchResult = namedtuple('HetArchResult', 'statistic pvalue')
    return HetArchResult(result[0], result[1])
def check_hetero(param_data):
    """
    Funcion que verifica si los residuos de una estimacion son heterosedasticos
    Parameters
    ---------
    param_data: DataFrame: DataFrame que contiene residuales
    ---------
    norm: boolean: indica true si los datos presentan heterodasticidad, o false si no la presentan.
    Debuggin
    ---------
    check_hetero(datos_residuales)
    """
    # arch test
    heterosced = het_arch(param_data)
    alpha = .05  # intervalo de 95% de confianza
    # si p-value menor a alpha se concluye que no hay heterodasticidad
    heter = True if heterosced[1] > alpha else False
    return heter
def get_engle_arch(errors, nlags):
    '''
    An uncorrelated time series can still be serially dependent due to a dynamic conditional variance process. 
    A time series exhibiting conditional heteroscedasticity—or autocorrelation in the squared series—is 
    said to have autoregressive conditional heteroscedastic (ARCH) effects. 
    Engle’s ARCH test is a Lagrange multiplier test to assess the significance of ARCH effects Rober, Engle (1982).
    
    H0: process is not autocorrelated in the squared residuals
    H1: process is autocorrelated in the squared residuals

    Large critical value of the F-statistic proves rejection of the null
    '''
    arch_lm = het_arch(errors.dropna(), nlags=nlags)
    print("Engle's Test for Autoregressive conditional Heteroscedasticity")
    print("Number of Lags: {}".format(nlags))
    print('LM test-stat:{}'.format(arch_lm[0]))
    print('P-val for LM:{}'.format(arch_lm[1]))
    print('F-statistic:{}'.format(arch_lm[2]))
    print('P-value for F-stat:{}'.format(arch_lm[3]))
Exemple #14
0
def f_hetero(param_data):
    """
    Parameters
    ----------
    param_data : DataFrame con los datos del precio del indicador
    Returns
    -------
    heterokedasticity : valores para vereficar la heterocedasticidad de la serie
    Debugging
    ---------
    param_data = df_data
    """
    data = param_data['actual']
    # hetero = sm.OLS(data, sm.add_constant(data.index)).fit()
    # res = hetero.resid
    # bp_test = smd.het_breuschpagan(res, hetero.model.exog)
    bp_test = smd.het_arch(data)
    labels = [
        'LM Statistic', 'LM-Test p-value', 'F-Statistic', 'F-Test p-value'
    ]
    heterokedasticity = [dict(zip(labels, bp_test))]
    return heterokedasticity
def arch(
    other_args: List[str],
    residuals: List[float],
):
    """Autoregressive conditional heteroscedasticity with Engle's test

    Parameters
    ----------
    other_args : str
        Command line arguments to be processed with argparse
    residuals : List[float]
        Residuals data
    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="arch",
        description="""
            Autoregressive conditional heteroscedasticity with Engle's test
        """,
    )
    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # Engle's Test for Autoregressive Conditional Heteroscedasticity (ARCH)
        lm, lmpval, fval, fpval = het_arch(residuals)
        print("Lagrange multiplier test statistic")
        print("Statistic: %.4f" % lm)
        print("p-value: %.4f" % lmpval)
        print("")
        print("fstatistic for F test")
        print("Statistic: %.4f" % fval)
        print("p-value: %.4f" % fpval)
        print("")

    except Exception as e:
        print(e, "\n")
        return
Exemple #16
0
            #当i=n-2length,j=0是,代表计算最后一个样本,前1日滞后的方差,根据 n-1-length到n-1的值计算。
            end = i + 2 * length - j - 1
            start = i + length - j - 1
            tmp_var[length - j - 1] = np.log(np.var(x[start:end]))
        x_var.append(tmp_var)

    #转换成array。
    x_data, y_data, x_var = np.array(x_data), np.array(y_data), np.array(x_var)
    x_data_square = x_data**2
    return x_data, x_data_square, x_var, y_data


if __name__ == "__main__":
    data = pd.read_csv("intermediate/arma_residual.csv")
    data.columns = ["residual"]
    arch_test = het_arch(data['residual'].values)
    print("残差数据,arch检验的统计量:{:.3f},对应p值为:{:.4f}".format(arch_test[0],
                                                       arch_test[1]))

    #生成只包含滞后值的样本。
    x_data, y_data, x_var = generate_data_lag(data['residual'].values,
                                              length=20)
    x_data = np.concatenate([x_data, x_var], axis=-1)

    #开始训练模型.
    fcnn_model = GARCH_DNN(build_fcnn_model(x_data.shape[-1]),
                           name="simple_FCNN_GARCH",
                           x=x_data,
                           y=y_data)
    fcnn_model.fit()
    fcnn_model.print_analyse()
Exemple #17
0
    test_jb[2],
    ' ',
    name_jb[3],
    ' ',
    test_jb[3],
)
print('     ')
# Lets try some more test_result  --- TEST FOR HETEROSCEDASTICITY "WHITE's test"
print('WHITE TEST')
white_t = ssd.het_white(model.resid, model.model.exog, retres=False)
print(white_t[1])
stat_1 = white_t[1]
print('Whites test for hetero using lm test: ', stat_1,
      'Whites test for hetero using F-Stat :', white_t[3])
print(' ')

#Next test :ARCH For heteroscedasticity
print('ARCH TEST')
arch = ssd.het_arch(model.resid,
                    maxlag=2,
                    autolag=None,
                    store=False,
                    regresults=False,
                    ddof=0)
print('LM test stat:', arch[0], 'LM P value:', arch[1], 'F test stat:',
      arch[2], 'F P value', arch[3])
print(' ')

#Next test : RESET
print('Ramsey RESET test')
    def test_all(self):

        d = macrodata.load().data
        #import datasetswsm.greene as g
        #d = g.load('5-1')

        #growth rates
        gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
        gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))

        #simple diff, not growthrate, I want heteroscedasticity later for testing
        endogd = np.diff(d['realinv'])
        exogd = add_constant(np.c_[np.diff(d['realgdp']), d['realint'][:-1]],
                            prepend=True)

        endogg = gs_l_realinv
        exogg = add_constant(np.c_[gs_l_realgdp, d['realint'][:-1]],prepend=True)

        res_ols = OLS(endogg, exogg).fit()
        #print res_ols.params

        mod_g1 = GLSAR(endogg, exogg, rho=-0.108136)
        res_g1 = mod_g1.fit()
        #print res_g1.params

        mod_g2 = GLSAR(endogg, exogg, rho=-0.108136)   #-0.1335859) from R
        res_g2 = mod_g2.iterative_fit(maxiter=5)
        #print res_g2.params


        rho = -0.108136

        #                 coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        partable = np.array([
                        [-9.50990,  0.990456, -9.602, 3.65e-018, -11.4631, -7.55670], # ***
                        [ 4.37040,  0.208146, 21.00,  2.93e-052,  3.95993, 4.78086], # ***
                        [-0.579253, 0.268009, -2.161, 0.0319, -1.10777, -0.0507346]]) #    **

        #Statistics based on the rho-differenced data:

        result_gretl_g1 = dict(
        endog_mean = ("Mean dependent var",   3.113973),
        endog_std = ("S.D. dependent var",   18.67447),
        ssr = ("Sum squared resid",    22530.90),
        mse_resid_sqrt = ("S.E. of regression",   10.66735),
        rsquared = ("R-squared",            0.676973),
        rsquared_adj = ("Adjusted R-squared",   0.673710),
        fvalue = ("F(2, 198)",            221.0475),
        f_pvalue = ("P-value(F)",           3.56e-51),
        resid_acf1 = ("rho",                 -0.003481),
        dw = ("Durbin-Watson",        1.993858))


        #fstatistic, p-value, df1, df2
        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]
        #LM-statistic, p-value, df
        arch_4 = [7.30776, 0.120491, 4, "chi2"]

        #multicollinearity
        vif = [1.002, 1.002]
        cond_1norm = 6862.0664
        determinant = 1.0296049e+009
        reciprocal_condition_number = 0.013819244

        #Chi-square(2): test-statistic, pvalue, df
        normality = [20.2792, 3.94837e-005, 2]

        #tests
        res = res_g1  #with rho from Gretl

        #basic

        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 6)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=4)
        assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=2)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=4)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        #tests
        res = res_g2 #with estimated rho

        #estimated lag coefficient
        assert_almost_equal(res.model.rho, rho, decimal=3)

        #basic
        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 3)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0)
        assert_almost_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], decimal=6)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO



        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(2,4))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(2,4))

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=1)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=2)



        '''
        Performing iterative calculation of rho...

                         ITER       RHO        ESS
                           1     -0.10734   22530.9
                           2     -0.10814   22530.9

        Model 4: Cochrane-Orcutt, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv
        rho = -0.108136

                         coefficient   std. error   t-ratio    p-value
          -------------------------------------------------------------
          const           -9.50990      0.990456    -9.602    3.65e-018 ***
          ds_l_realgdp     4.37040      0.208146    21.00     2.93e-052 ***
          realint_1       -0.579253     0.268009    -2.161    0.0319    **

        Statistics based on the rho-differenced data:

        Mean dependent var   3.113973   S.D. dependent var   18.67447
        Sum squared resid    22530.90   S.E. of regression   10.66735
        R-squared            0.676973   Adjusted R-squared   0.673710
        F(2, 198)            221.0475   P-value(F)           3.56e-51
        rho                 -0.003481   Durbin-Watson        1.993858
        '''

        '''
        RESET test for specification (squares and cubes)
        Test statistic: F = 5.219019,
        with p-value = P(F(2,197) > 5.21902) = 0.00619

        RESET test for specification (squares only)
        Test statistic: F = 7.268492,
        with p-value = P(F(1,198) > 7.26849) = 0.00762

        RESET test for specification (cubes only)
        Test statistic: F = 5.248951,
        with p-value = P(F(1,198) > 5.24895) = 0.023:
        '''

        '''
        Test for ARCH of order 4

                     coefficient   std. error   t-ratio   p-value
          --------------------------------------------------------
          alpha(0)   97.0386       20.3234       4.775    3.56e-06 ***
          alpha(1)    0.176114      0.0714698    2.464    0.0146   **
          alpha(2)   -0.0488339     0.0724981   -0.6736   0.5014
          alpha(3)   -0.0705413     0.0737058   -0.9571   0.3397
          alpha(4)    0.0384531     0.0725763    0.5298   0.5968

          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491:
        '''

        '''
        Variance Inflation Factors

        Minimum possible value = 1.0
        Values > 10.0 may indicate a collinearity problem

           ds_l_realgdp    1.002
              realint_1    1.002

        VIF(j) = 1/(1 - R(j)^2), where R(j) is the multiple correlation coefficient
        between variable j and the other independent variables

        Properties of matrix X'X:

         1-norm = 6862.0664
         Determinant = 1.0296049e+009
         Reciprocal condition number = 0.013819244
        '''
        '''
        Test for ARCH of order 4 -
          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491

        Test of common factor restriction -
          Null hypothesis: restriction is acceptable
          Test statistic: F(2, 195) = 0.426391
          with p-value = P(F(2, 195) > 0.426391) = 0.653468

        Test for normality of residual -
          Null hypothesis: error is normally distributed
          Test statistic: Chi-square(2) = 20.2792
          with p-value = 3.94837e-005:
        '''

        #no idea what this is
        '''
        Augmented regression for common factor test
        OLS, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv

                           coefficient   std. error   t-ratio    p-value
          ---------------------------------------------------------------
          const            -10.9481      1.35807      -8.062    7.44e-014 ***
          ds_l_realgdp       4.28893     0.229459     18.69     2.40e-045 ***
          realint_1         -0.662644    0.334872     -1.979    0.0492    **
          ds_l_realinv_1    -0.108892    0.0715042    -1.523    0.1294
          ds_l_realgdp_1     0.660443    0.390372      1.692    0.0923    *
          realint_2          0.0769695   0.341527      0.2254   0.8219

          Sum of squared residuals = 22432.8

        Test of common factor restriction

          Test statistic: F(2, 195) = 0.426391, with p-value = 0.653468
        '''


        ################ with OLS, HAC errors

        #Model 5: OLS, using observations 1959:2-2009:3 (T = 202)
        #Dependent variable: ds_l_realinv
        #HAC standard errors, bandwidth 4 (Bartlett kernel)

        #coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        #for confidence interval t(199, 0.025) = 1.972

        partable = np.array([
        [-9.48167,      1.17709,     -8.055,    7.17e-014, -11.8029, -7.16049], # ***
        [4.37422,      0.328787,    13.30,     2.62e-029, 3.72587, 5.02258], #***
        [-0.613997,     0.293619,    -2.091,    0.0378, -1.19300, -0.0349939]]) # **

        result_gretl_g1 = dict(
                    endog_mean = ("Mean dependent var",   3.257395),
                    endog_std = ("S.D. dependent var",   18.73915),
                    ssr = ("Sum squared resid",    22799.68),
                    mse_resid_sqrt = ("S.E. of regression",   10.70380),
                    rsquared = ("R-squared",            0.676978),
                    rsquared_adj = ("Adjusted R-squared",   0.673731),
                    fvalue = ("F(2, 199)",            90.79971),
                    f_pvalue = ("P-value(F)",           9.53e-29),
                    llf = ("Log-likelihood",      -763.9752),
                    aic = ("Akaike criterion",     1533.950),
                    bic = ("Schwarz criterion",    1543.875),
                    hqic = ("Hannan-Quinn",         1537.966),
                    resid_acf1 = ("rho",                 -0.107341),
                    dw = ("Durbin-Watson",        2.213805))

        linear_logs = [1.68351, 0.430953, 2, "chi2"]
        #for logs: dropping 70 nan or incomplete observations, T=133
        #(res_ols.model.exog <=0).any(1).sum() = 69  ?not 70
        linear_squares = [7.52477, 0.0232283, 2, "chi2"]

        #Autocorrelation, Breusch-Godfrey test for autocorrelation up to order 4
        lm_acorr4 = [1.17928, 0.321197, 4, 195, "F"]
        lm2_acorr4 = [4.771043, 0.312, 4, "chi2"]
        acorr_ljungbox4 = [5.23587, 0.264, 4, "chi2"]

        #break
        cusum_Harvey_Collier  = [0.494432, 0.621549, 198, "t"] #stats.t.sf(0.494432, 198)*2
        #see cusum results in files
        break_qlr = [3.01985, 0.1, 3, 196, "maxF"]  #TODO check this, max at 2001:4
        break_chow = [13.1897, 0.00424384, 3, "chi2"] # break at 1984:1

        arch_4 = [3.43473, 0.487871, 4, "chi2"]

        normality = [23.962, 0.00001, 2, "chi2"]

        het_white = [33.503723, 0.000003, 5, "chi2"]
        het_breush_pagan = [1.302014, 0.521520, 2, "chi2"]  #TODO: not available
        het_breush_pagan_konker = [0.709924, 0.701200, 2, "chi2"]


        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]  #not available

        cond_1norm = 5984.0525
        determinant = 7.1087467e+008
        reciprocal_condition_number = 0.013826504
        vif = [1.001, 1.001]

        names = 'date   residual        leverage       influence        DFFITS'.split()
        cur_dir = os.path.abspath(os.path.dirname(__file__))
        fpath = os.path.join(cur_dir, 'results/leverage_influence_ols_nostars.txt')
        lev = np.genfromtxt(fpath, skip_header=3, skip_footer=1,
                            converters={0:lambda s: s})
        #either numpy 1.6 or python 3.2 changed behavior
        if np.isnan(lev[-1]['f1']):
            lev = np.genfromtxt(fpath, skip_header=3, skip_footer=2,
                                converters={0:lambda s: s})

        lev.dtype.names = names

        res = res_ols #for easier copying

        cov_hac = sw.cov_hac_simple(res, nlags=4, use_correction=False)
        bse_hac =  sw.se_cov(cov_hac)

        assert_almost_equal(res.params, partable[:,0], 5)
        assert_almost_equal(bse_hac, partable[:,1], 5)
        #TODO

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=6) #FAIL
        assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=6) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        #f-value is based on cov_hac I guess
        #assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0) #FAIL
        #assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=1) #FAIL
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO


        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(6,5))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(6,5))

        linear_sq = smsdia.linear_lm(res.resid, res.model.exog)
        assert_almost_equal(linear_sq[0], linear_squares[0], decimal=6)
        assert_almost_equal(linear_sq[1], linear_squares[1], decimal=7)

        hbpk = smsdia.het_breushpagan(res.resid, res.model.exog)
        assert_almost_equal(hbpk[0], het_breush_pagan_konker[0], decimal=6)
        assert_almost_equal(hbpk[1], het_breush_pagan_konker[1], decimal=6)

        hw = smsdia.het_white(res.resid, res.model.exog)
        assert_almost_equal(hw[:2], het_white[:2], 6)

        #arch
        #sm_arch = smsdia.acorr_lm(res.resid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.resid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=5)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        vif2 = [oi.variance_inflation_factor(res.model.exog, k) for k in [1,2]]

        infl = oi.OLSInfluence(res_ols)
        #print np.max(np.abs(lev['DFFITS'] - infl.dffits[0]))
        #print np.max(np.abs(lev['leverage'] - infl.hat_matrix_diag))
        #print np.max(np.abs(lev['influence'] - infl.influence))  #just added this based on Gretl

        #just rough test, low decimal in Gretl output,
        assert_almost_equal(lev['residual'], res.resid, decimal=3)
        assert_almost_equal(lev['DFFITS'], infl.dffits[0], decimal=3)
        assert_almost_equal(lev['leverage'], infl.hat_matrix_diag, decimal=3)
        assert_almost_equal(lev['influence'], infl.influence, decimal=4)
## Teste de Durbin-Watson para autocorrelação dos resíduos. H_0: resíduos não têm autocorrelação com o seu 1o lag
#from statsmodels.stats.stattools import durbin_watson
#DW = durbin_watson(resid)
#print("\n", 'Teste de Durbin-Watson de independência dos resíduos:', DW[0])

# Teste de Breush-Godfrey para autocorrelação dos resíduos. H_0: resíduos não têm correlação com os seus "n" lags (neste caso definimos n = 10)
from statsmodels.stats.diagnostic import acorr_breusch_godfrey
bg = acorr_breusch_godfrey(model_output, nlags=10)
print(
    "\n", 'Teste de Breush-Godfrey de independência dos resíduos:', bg
)  # P-value (2o valor dos 4 apresentados) < 0.05 => existe autocorrelação dos resíduos

# Teste de heterocedasticidade ARCH. H_0: variância é constante. O segundo valor é o p-value
from statsmodels.stats.diagnostic import het_arch
archTest = het_arch(resid[0], maxlag=5, autolag=None)
print("\n", 'Teste ARCH de heterocedasticidade:', archTest[1]
      )  # P-value < 0.05 => rejeita-se H_0 => variância não é constante

### GJR-GARCH Model ### AR(2) + GJR-GARCH(1,1)
from arch import arch_model

gjrGarch = arch_model(tsReturns, mean="ARX", lags=2,
                      o=1)  # importa as 3 equações ao mesmo tempo
model_gjrGarch = gjrGarch.fit(update_freq=5)
print("\n", model_gjrGarch.summary())

### EGARCH Model ### AR(2) + EGARCH(1,1)
from arch import arch_model

egarch = arch_model(tsReturns, mean="ARX", lags=2, vol="EGARCH",
df_atip = fn.df_boxplot()
df_atip

#%%
import statsmodels.stats.diagnostic as stm #import arch_test

data_DGO = pd.read_csv("../Proyecto_Equipo_3-master/Indice")

df_DGO = pd.DataFrame(data_DGO)
df_DGO.sort_index(ascending = False, inplace = True)



df_DGO60 = df_DGO['Actual']

arc = stm.het_arch(df_DGO60)
#%%
arch1 = arc.fit()
print(arch1.summary())
            
#%%
p_arch = fn.df_parch()
p_arch
    
#%%
plt.plot(df_DGO['Actual'])
plt.show()


#%% Intento b
Exemple #21
0
def engle(data, lags=12):
    arch = het_arch(data, nlags=lags)
    print("Lagrange multiplier test")
    print(f"Chi-squared: {round(arch[0], 4)}", end=", ")
    print(f"p-value: {arch[1]}")

#stocks.to_csv('GSPC.csv')
#stocks = pd.DataFrame.from_csv('GSPC.csv')

#key symbols '^GSPC', '^DJI', '^VIX' 'DNKN''AAPL'

# find returns
ret_Stock = stocks['Adj Close'].pct_change().dropna()

# ACF (autocorrelation function) 
# acf and pacf from statsmodels
from statsmodels.tsa.stattools import acf
ac = acf(ret_Stock,nlags=250)
ac2 = acf(np.square(ret_Stock),nlags=200)

#from statsmodels.graphics.tsaplots import plot_acf
#plot_acf(ret_Stock)

# Ljung-Box Test for 20 lags
# statsmodels.stats.diagnostic.acorr_ljungbox
import statsmodels.stats.diagnostic as ssd 
lags = 10;
qstat, pval = ssd.acorr_ljungbox(ret_Stock,lags)
qstat2, pval2 =ssd.acorr_ljungbox(np.square(ret_Stock),lags)

# Engle LM test for heteroskedasticity consistent
# statsmodels.sandbox.stats.diagnostic.acorr_lm het_arch
lmstat = ssd.het_arch(ret_Stock,lags)
lmstat2 = ssd.het_arch(np.square(ret_Stock),lags)
Exemple #23
0
def engle_test_series(data, Returns):
    products = sorted(data.keys())
    for p in products:
        print(p + ':  p-value Engle: ' + str(het_arch(Returns[p])[1]))
    u_hat = series - y_hat_arima

    #generate plot of residuals
    plt.plot(u_hat)
    plt.savefig('../../manuscript/src/latexGraphics/u_hat_houstmw.png')
    #plt.show()
    plt.close()

    #generate acf plot of squared residuals
    plot_acf(u_hat**2, lags=100)
    plt.savefig('../../manuscript/src/latexGraphics/acf_u_hat_sq_houstmw.png')
    plt.close()

    #arch-lm test
    #https://www.statsmodels.org/stable/generated/statsmodels.stats.diagnostic.het_arch.html
    dct_tex['archlmresult'] = np.round(het_arch(u_hat, nlags=3, ddof=6)[1], 4)

    ####select optimal GARCH Model according to arch lm test result of standardized residuals

    #define grid of model parameters:
    ls_params = [(p, q) for p in range(1, 20) for q in range(1, 20)]
    ls_out = []
    if multiproc == True:

        ls_out = Parallel(n_jobs=-1)(
            delayed(FitGARCH)(*[u_hat, param[0], param[1]])
            for param in ls_params)

    else:
        for param in tqdm(ls_params):
            ls_out.append(FitGARCH(u_hat, param[0], param[1]))
Exemple #25
0
modelos_AR = dict()
for i in data.columns:

    print(f"Modelo arima para {i}"
          f"##############################################"
          f"##############################################")
    # Fit auto_arima function to AirPassengers dataset
    modelos_AR[i] = ARIMA(data[i].dropna(), order = (1,0,0)).fit()  # set to stepwise

    # To print the summary
    print(modelos_AR[i].summary())
"""
#VERIFICAR ARCH
for i in data.columns:
    print(f"ARCHTest para resíduos de {i}")
    print(het_arch(modelos[i].arima_res_.resid,nlags=6))
    print("Time Series")
    print(het_arch(data[i].dropna(),nlags=6))

#FAZER MODELOS GARCH
modelos_garch = dict()
for i in data.columns:

    print(f"Modelo GARCH para {i}"
          f"##############################################"
          f"##############################################")
    # Fit auto_arima function to AirPassengers dataset
    modelos_garch[i] = arch.arch_model(modelos[i].arima_res_.resid, vol = "GARCH", rescale= True).fit()  # set to stepwise

    # To print the summary
    print(modelos_garch[i].summary())
Exemple #26
0
    #收益率平稳性检验结果为1阶弱平稳
    #adftest(df2)
    #参数选择为(0,1)
    params_select(df1)
    print(acorr_ljungbox(df1, lags=1))
    #系数估计------------------------------------------------
    model = sm.tsa.ARMA(df2, (0, 1)).fit()
    print(model.summary())
    print('----------------------------------------')
    print(model.params)
    #--------------------------------------------------------
    #收益率残差自相关性检验-----------------------------------
    resid = model.resid
    print(sm.stats.durbin_watson(resid.values))
    #检验残差arch效应-----------------------------------------
    *_, fpvalue = diagnostic.het_arch(resid)
    if fpvalue < 0.05:
        print('异方差性显著', fpvalue)
    else:
        print('异方差性不显著', fpvalue)
    #建立arch模型-----------------------------------------------
    #模型预测
    model = sm.tsa.ARMA(df2, (0, 1)).fit()
    arch_mod = ConstantMean(df2)
    arch_mod.volatility = GARCH(1, 0, 1)
    arch_mod.distribution = StudentsT()
    res = arch_mod.fit(update_freq=5, disp='off')
    mu = model.params[0]
    theta = model.params[1]

    omega = res.params[1]
from util import energy_return_data
import numpy as np
from scipy.stats import kurtosis, skew
from statsmodels.stats.diagnostic import het_arch, acorr_ljungbox
windowed, np_data, scaler1 = energy_return_data(window=25)

y = np_data[:, :-1].reshape(-1)
print('mean', np.mean(y))
print('max', np.max(y))
print('min', np.min(y))
print('median', np.median(y))
print('S.D.', np.std(y))
print('Skewness', skew(y))
print('Excess kurtosis', kurtosis(y) - 3)
print('Ljung-Box test of autocorrelation in residuals',
      acorr_ljungbox(y, lags=[25])[1])
print('Engle’s Test for Autoregressive Conditional Heteroscedasticity (ARCH)',
      het_arch(y)[1])
    def test_all(self):

        d = macrodata.load().data
        #import datasetswsm.greene as g
        #d = g.load('5-1')

        #growth rates
        gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
        gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))

        #simple diff, not growthrate, I want heteroscedasticity later for testing
        endogd = np.diff(d['realinv'])
        exogd = add_constant(np.c_[np.diff(d['realgdp']), d['realint'][:-1]])

        endogg = gs_l_realinv
        exogg = add_constant(np.c_[gs_l_realgdp, d['realint'][:-1]])

        res_ols = OLS(endogg, exogg).fit()
        #print res_ols.params

        mod_g1 = GLSAR(endogg, exogg, rho=-0.108136)
        res_g1 = mod_g1.fit()
        #print res_g1.params

        mod_g2 = GLSAR(endogg, exogg, rho=-0.108136)   #-0.1335859) from R
        res_g2 = mod_g2.iterative_fit(maxiter=5)
        #print res_g2.params


        rho = -0.108136

        #                 coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        partable = np.array([
                        [-9.50990,  0.990456, -9.602, 3.65e-018, -11.4631, -7.55670], # ***
                        [ 4.37040,  0.208146, 21.00,  2.93e-052,  3.95993, 4.78086], # ***
                        [-0.579253, 0.268009, -2.161, 0.0319, -1.10777, -0.0507346]]) #    **

        #Statistics based on the rho-differenced data:

        result_gretl_g1 = dict(
        endog_mean = ("Mean dependent var",   3.113973),
        endog_std = ("S.D. dependent var",   18.67447),
        ssr = ("Sum squared resid",    22530.90),
        mse_resid_sqrt = ("S.E. of regression",   10.66735),
        rsquared = ("R-squared",            0.676973),
        rsquared_adj = ("Adjusted R-squared",   0.673710),
        fvalue = ("F(2, 198)",            221.0475),
        f_pvalue = ("P-value(F)",           3.56e-51),
        resid_acf1 = ("rho",                 -0.003481),
        dw = ("Durbin-Watson",        1.993858))


        #fstatistic, p-value, df1, df2
        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]
        #LM-statistic, p-value, df
        arch_4 = [7.30776, 0.120491, 4, "chi2"]

        #multicollinearity
        vif = [1.002, 1.002]
        cond_1norm = 6862.0664
        determinant = 1.0296049e+009
        reciprocal_condition_number = 0.013819244

        #Chi-square(2): test-statistic, pvalue, df
        normality = [20.2792, 3.94837e-005, 2]

        #tests
        res = res_g1  #with rho from Gretl

        #basic

        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 6)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=4)
        assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=2)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=4)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        #tests
        res = res_g2 #with estimated rho

        #estimated lag coefficient
        assert_almost_equal(res.model.rho, rho, decimal=3)

        #basic
        assert_almost_equal(res.params, partable[:,0], 4)
        assert_almost_equal(res.bse, partable[:,1], 3)
        assert_almost_equal(res.tvalues, partable[:,2], 2)

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        #assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
        #assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
        #assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=0)
        assert_almost_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], decimal=6)
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO



        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(2,4))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(2,4))

        #arch
        #sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=1)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=2)



        '''
        Performing iterative calculation of rho...

                         ITER       RHO        ESS
                           1     -0.10734   22530.9
                           2     -0.10814   22530.9

        Model 4: Cochrane-Orcutt, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv
        rho = -0.108136

                         coefficient   std. error   t-ratio    p-value
          -------------------------------------------------------------
          const           -9.50990      0.990456    -9.602    3.65e-018 ***
          ds_l_realgdp     4.37040      0.208146    21.00     2.93e-052 ***
          realint_1       -0.579253     0.268009    -2.161    0.0319    **

        Statistics based on the rho-differenced data:

        Mean dependent var   3.113973   S.D. dependent var   18.67447
        Sum squared resid    22530.90   S.E. of regression   10.66735
        R-squared            0.676973   Adjusted R-squared   0.673710
        F(2, 198)            221.0475   P-value(F)           3.56e-51
        rho                 -0.003481   Durbin-Watson        1.993858
        '''

        '''
        RESET test for specification (squares and cubes)
        Test statistic: F = 5.219019,
        with p-value = P(F(2,197) > 5.21902) = 0.00619

        RESET test for specification (squares only)
        Test statistic: F = 7.268492,
        with p-value = P(F(1,198) > 7.26849) = 0.00762

        RESET test for specification (cubes only)
        Test statistic: F = 5.248951,
        with p-value = P(F(1,198) > 5.24895) = 0.023:
        '''

        '''
        Test for ARCH of order 4

                     coefficient   std. error   t-ratio   p-value
          --------------------------------------------------------
          alpha(0)   97.0386       20.3234       4.775    3.56e-06 ***
          alpha(1)    0.176114      0.0714698    2.464    0.0146   **
          alpha(2)   -0.0488339     0.0724981   -0.6736   0.5014
          alpha(3)   -0.0705413     0.0737058   -0.9571   0.3397
          alpha(4)    0.0384531     0.0725763    0.5298   0.5968

          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491:
        '''

        '''
        Variance Inflation Factors

        Minimum possible value = 1.0
        Values > 10.0 may indicate a collinearity problem

           ds_l_realgdp    1.002
              realint_1    1.002

        VIF(j) = 1/(1 - R(j)^2), where R(j) is the multiple correlation coefficient
        between variable j and the other independent variables

        Properties of matrix X'X:

         1-norm = 6862.0664
         Determinant = 1.0296049e+009
         Reciprocal condition number = 0.013819244
        '''
        '''
        Test for ARCH of order 4 -
          Null hypothesis: no ARCH effect is present
          Test statistic: LM = 7.30776
          with p-value = P(Chi-square(4) > 7.30776) = 0.120491

        Test of common factor restriction -
          Null hypothesis: restriction is acceptable
          Test statistic: F(2, 195) = 0.426391
          with p-value = P(F(2, 195) > 0.426391) = 0.653468

        Test for normality of residual -
          Null hypothesis: error is normally distributed
          Test statistic: Chi-square(2) = 20.2792
          with p-value = 3.94837e-005:
        '''

        #no idea what this is
        '''
        Augmented regression for common factor test
        OLS, using observations 1959:3-2009:3 (T = 201)
        Dependent variable: ds_l_realinv

                           coefficient   std. error   t-ratio    p-value
          ---------------------------------------------------------------
          const            -10.9481      1.35807      -8.062    7.44e-014 ***
          ds_l_realgdp       4.28893     0.229459     18.69     2.40e-045 ***
          realint_1         -0.662644    0.334872     -1.979    0.0492    **
          ds_l_realinv_1    -0.108892    0.0715042    -1.523    0.1294
          ds_l_realgdp_1     0.660443    0.390372      1.692    0.0923    *
          realint_2          0.0769695   0.341527      0.2254   0.8219

          Sum of squared residuals = 22432.8

        Test of common factor restriction

          Test statistic: F(2, 195) = 0.426391, with p-value = 0.653468
        '''


        ################ with OLS, HAC errors

        #Model 5: OLS, using observations 1959:2-2009:3 (T = 202)
        #Dependent variable: ds_l_realinv
        #HAC standard errors, bandwidth 4 (Bartlett kernel)

        #coefficient   std. error   t-ratio    p-value 95% CONFIDENCE INTERVAL
        #for confidence interval t(199, 0.025) = 1.972

        partable = np.array([
        [-9.48167,      1.17709,     -8.055,    7.17e-014, -11.8029, -7.16049], # ***
        [4.37422,      0.328787,    13.30,     2.62e-029, 3.72587, 5.02258], #***
        [-0.613997,     0.293619,    -2.091,    0.0378, -1.19300, -0.0349939]]) # **

        result_gretl_g1 = dict(
                    endog_mean = ("Mean dependent var",   3.257395),
                    endog_std = ("S.D. dependent var",   18.73915),
                    ssr = ("Sum squared resid",    22799.68),
                    mse_resid_sqrt = ("S.E. of regression",   10.70380),
                    rsquared = ("R-squared",            0.676978),
                    rsquared_adj = ("Adjusted R-squared",   0.673731),
                    fvalue = ("F(2, 199)",            90.79971),
                    f_pvalue = ("P-value(F)",           9.53e-29),
                    llf = ("Log-likelihood",      -763.9752),
                    aic = ("Akaike criterion",     1533.950),
                    bic = ("Schwarz criterion",    1543.875),
                    hqic = ("Hannan-Quinn",         1537.966),
                    resid_acf1 = ("rho",                 -0.107341),
                    dw = ("Durbin-Watson",        2.213805))

        linear_logs = [1.68351, 0.430953, 2, "chi2"]
        #for logs: dropping 70 nan or incomplete observations, T=133
        #(res_ols.model.exog <=0).any(1).sum() = 69  ?not 70
        linear_squares = [7.52477, 0.0232283, 2, "chi2"]

        #Autocorrelation, Breusch-Godfrey test for autocorrelation up to order 4
        lm_acorr4 = [1.17928, 0.321197, 4, 195, "F"]
        lm2_acorr4 = [4.771043, 0.312, 4, "chi2"]
        acorr_ljungbox4 = [5.23587, 0.264, 4, "chi2"]

        #break
        cusum_Harvey_Collier  = [0.494432, 0.621549, 198, "t"] #stats.t.sf(0.494432, 198)*2
        #see cusum results in files
        break_qlr = [3.01985, 0.1, 3, 196, "maxF"]  #TODO check this, max at 2001:4
        break_chow = [13.1897, 0.00424384, 3, "chi2"] # break at 1984:1

        arch_4 = [3.43473, 0.487871, 4, "chi2"]

        normality = [23.962, 0.00001, 2, "chi2"]

        het_white = [33.503723, 0.000003, 5, "chi2"]
        het_breusch_pagan = [1.302014, 0.521520, 2, "chi2"]  #TODO: not available
        het_breusch_pagan_konker = [0.709924, 0.701200, 2, "chi2"]


        reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
        reset_2 = [7.268492, 0.00762, 1, 198, "f"]
        reset_3 = [5.248951, 0.023, 1, 198, "f"]  #not available

        cond_1norm = 5984.0525
        determinant = 7.1087467e+008
        reciprocal_condition_number = 0.013826504
        vif = [1.001, 1.001]

        names = 'date   residual        leverage       influence        DFFITS'.split()
        cur_dir = os.path.abspath(os.path.dirname(__file__))
        fpath = os.path.join(cur_dir, 'results/leverage_influence_ols_nostars.txt')
        lev = np.genfromtxt(fpath, skip_header=3, skip_footer=1,
                            converters={0:lambda s: s})
        #either numpy 1.6 or python 3.2 changed behavior
        if np.isnan(lev[-1]['f1']):
            lev = np.genfromtxt(fpath, skip_header=3, skip_footer=2,
                                converters={0:lambda s: s})

        lev.dtype.names = names

        res = res_ols #for easier copying

        cov_hac = sw.cov_hac_simple(res, nlags=4, use_correction=False)
        bse_hac =  sw.se_cov(cov_hac)

        assert_almost_equal(res.params, partable[:,0], 5)
        assert_almost_equal(bse_hac, partable[:,1], 5)
        #TODO

        assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
        assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=4) #not in gretl
        assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=6) #FAIL
        assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=6) #FAIL
        assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
        #f-value is based on cov_hac I guess
        #res2 = res.get_robustcov_results(cov_type='HC1')
        # TODO: fvalue differs from Gretl, trying any of the HCx
        #assert_almost_equal(res2.fvalue, result_gretl_g1['fvalue'][1], decimal=0) #FAIL
        #assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=1) #FAIL
        #assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO


        c = oi.reset_ramsey(res, degree=2)
        compare_ftest(c, reset_2, decimal=(6,5))
        c = oi.reset_ramsey(res, degree=3)
        compare_ftest(c, reset_2_3, decimal=(6,5))

        linear_sq = smsdia.linear_lm(res.resid, res.model.exog)
        assert_almost_equal(linear_sq[0], linear_squares[0], decimal=6)
        assert_almost_equal(linear_sq[1], linear_squares[1], decimal=7)

        hbpk = smsdia.het_breuschpagan(res.resid, res.model.exog)
        assert_almost_equal(hbpk[0], het_breusch_pagan_konker[0], decimal=6)
        assert_almost_equal(hbpk[1], het_breusch_pagan_konker[1], decimal=6)

        hw = smsdia.het_white(res.resid, res.model.exog)
        assert_almost_equal(hw[:2], het_white[:2], 6)

        #arch
        #sm_arch = smsdia.acorr_lm(res.resid**2, maxlag=4, autolag=None)
        sm_arch = smsdia.het_arch(res.resid, maxlag=4)
        assert_almost_equal(sm_arch[0], arch_4[0], decimal=5)
        assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)

        vif2 = [oi.variance_inflation_factor(res.model.exog, k) for k in [1,2]]

        infl = oi.OLSInfluence(res_ols)
        #print np.max(np.abs(lev['DFFITS'] - infl.dffits[0]))
        #print np.max(np.abs(lev['leverage'] - infl.hat_matrix_diag))
        #print np.max(np.abs(lev['influence'] - infl.influence))  #just added this based on Gretl

        #just rough test, low decimal in Gretl output,
        assert_almost_equal(lev['residual'], res.resid, decimal=3)
        assert_almost_equal(lev['DFFITS'], infl.dffits[0], decimal=3)
        assert_almost_equal(lev['leverage'], infl.hat_matrix_diag, decimal=3)
        assert_almost_equal(lev['influence'], infl.influence, decimal=4)
Exemple #29
0
 def het_arch(self, timeseries):
     model, model_result = self.generate_model(timeseries)
     result = diagnostic.het_arch(model_result)
     HetArchResult = namedtuple('HetArchResult', 'statistic pvalue')
     return HetArchResult(result[0], result[1])