def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() R2 = [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0]] cls.Ftest1 = res1.f_test(R2) hyp = 'x2 = x3, x5 = x6' cls.NewFtest1 = res1.f_test(hyp)
def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() R2 = [[0,1,-1,0,0,0,0],[0, 0, 0, 0, 1, -1, 0]] cls.Ftest1 = res1.f_test(R2) hyp = 'x2 = x3, x5 = x6' cls.NewFtest1 = res1.f_test(hyp)
def test_permuted_ols_statsmodels_withcovar(random_state=0): """ This test has a statsmodels dependance. There seems to be no simple, alternative way to perform a F-test on a linear model including covariates. """ try: from statsmodels.regression.linear_model import OLS except: warnings.warn("Statsmodels is required to run this test") raise nose.SkipTest rng = check_random_state(random_state) # design parameters n_samples = 50 # create design target_var = rng.randn(n_samples, 1) tested_var = rng.randn(n_samples, 1) confounding_vars = rng.randn(n_samples, 2) # statsmodels OLS ols = OLS(target_var, np.hstack((tested_var, confounding_vars))).fit() fvals = ols.f_test([[1., 0., 0.]]).fvalue # permuted OLS _, orig_scores, _ = permuted_ols(tested_var, target_var, confounding_vars, model_intercept=False, n_perm=0, random_state=random_state) assert_array_almost_equal(fvals, orig_scores, decimal=6) ### Adds intercept # permuted OLS _, orig_scores_addintercept, _ = permuted_ols(tested_var, target_var, confounding_vars, model_intercept=True, n_perm=0, random_state=random_state) # statsmodels OLS confounding_vars = np.hstack((confounding_vars, np.ones((n_samples, 1)))) ols = OLS(target_var, np.hstack((tested_var, confounding_vars))).fit() fvals_addintercept = ols.f_test([[1., 0., 0., 0.]]).fvalue assert_array_almost_equal(fvals_addintercept, orig_scores_addintercept, decimal=6)
def test_f_score_with_covars_and_normalized_design_withcovar(random_state=0): """ This test has a statsmodels dependance. There seems to be no simple, alternative way to perform a F-test on a linear model including covariates. """ try: from statsmodels.regression.linear_model import OLS except: warnings.warn("Statsmodels is required to run this test") raise nose.SkipTest rng = check_random_state(random_state) ### Normalized data n_samples = 50 # generate data var1 = np.ones((n_samples, 1)) / np.sqrt(n_samples) # normalized var2 = rng.randn(n_samples, 1) var2 = var2 / np.sqrt(np.sum(var2**2, 0)) # normalize covars = np.eye(n_samples, 3) # covars is orthogonal covars[3] = -1 # covars is orthogonal to var1 covars = orthonormalize_matrix(covars) # own f_score f_val_own = _f_score_with_covars_and_normalized_design(var1, var2, covars)[0] # statsmodels f_score test_matrix = np.array([[1., 0., 0., 0.]]) statsmodels_ols = OLS(var2, np.hstack((var1, covars))).fit() f_val_statsmodels = statsmodels_ols.f_test(test_matrix).fvalue[0] assert_array_almost_equal(f_val_own, f_val_statsmodels)
def test_f_score_with_covars_and_normalized_design_withcovar(random_state=0): """ This test has a statsmodels dependance. There seems to be no simple, alternative way to perform a F-test on a linear model including covariates. """ try: from statsmodels.regression.linear_model import OLS except: warnings.warn("Statsmodels is required to run this test") raise nose.SkipTest rng = check_random_state(random_state) ### Normalized data n_samples = 50 # generate data var1 = np.ones((n_samples, 1)) / np.sqrt(n_samples) # normalized var2 = rng.randn(n_samples, 1) var2 = var2 / np.sqrt(np.sum(var2 ** 2, 0)) # normalize covars = np.eye(n_samples, 3) # covars is orthogonal covars[3] = -1 # covars is orthogonal to var1 covars = orthonormalize_matrix(covars) # own f_score f_val_own = _f_score_with_covars_and_normalized_design(var1, var2, covars)[0] # statsmodels f_score test_matrix = np.array([[1.0, 0.0, 0.0, 0.0]]) statsmodels_ols = OLS(var2, np.hstack((var1, covars))).fit() f_val_statsmodels = statsmodels_ols.f_test(test_matrix).fvalue[0] assert_array_almost_equal(f_val_own, f_val_statsmodels)
def test_permuted_ols_intercept_statsmodels_withcovar(random_state=0): """ This test has a statsmodels dependance. There seems to be no simple, alternative way to perform a F-test on a linear model including covariates. """ try: from statsmodels.regression.linear_model import OLS except: warnings.warn("Statsmodels is required to run this test") raise nose.SkipTest rng = check_random_state(random_state) # design parameters n_samples = 50 # create design target_var = rng.randn(n_samples, 1) tested_var = np.ones((n_samples, 1)) confounding_vars = rng.randn(n_samples, 2) # statsmodels OLS ols = OLS(target_var, np.hstack((tested_var, confounding_vars))).fit() fvals = ols.f_test([[1.0, 0.0, 0.0]]).fvalue # permuted OLS _, orig_scores, _ = permuted_ols(tested_var, target_var, confounding_vars, n_perm=0, random_state=random_state) # same thing but with model_intercept=True to check it has no effect _, orig_scores_addintercept, _ = permuted_ols( tested_var, target_var, confounding_vars, model_intercept=True, n_perm=0, random_state=random_state ) assert_array_almost_equal(fvals, orig_scores, decimal=6) assert_array_almost_equal(orig_scores, orig_scores_addintercept, decimal=6)
def reset_ramsey(res, degree=5): '''Ramsey's RESET specification test for linear models This is a general specification test, for additional non-linear effects in a model. Notes ----- The test fits an auxiliary OLS regression where the design matrix, exog, is augmented by powers 2 to degree of the fitted values. Then it performs an F-test whether these additional terms are significant. If the p-value of the f-test is below a threshold, e.g. 0.1, then this indicates that there might be additional non-linear effects in the model and that the linear model is mis-specified. References ---------- http://en.wikipedia.org/wiki/Ramsey_RESET_test ''' order = degree + 1 k_vars = res.model.exog.shape[1] #vander without constant and x: y_fitted_vander = np.vander(res.fittedvalues, order)[:, :-2] #drop constant exog = np.column_stack((res.model.exog, y_fitted_vander)) res_aux = OLS(res.model.endog, exog).fit() #r_matrix = np.eye(degree, exog.shape[1], k_vars) r_matrix = np.eye(degree-1, exog.shape[1], k_vars) #df1 = degree - 1 #df2 = exog.shape[0] - degree - res.df_model (without constant) return res_aux.f_test(r_matrix) #, r_matrix, res_aux
def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() R = np.array([[0, 1, 1, 0, 0, 0, 0], [0, 1, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0]]) q = np.array([0, 0, 0, 1, 0]) cls.Ftest1 = res1.f_test((R, q))
def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() R = np.array([[0,1,1,0,0,0,0], [0,1,0,1,0,0,0], [0,1,0,0,0,0,0], [0,0,0,0,1,0,0], [0,0,0,0,0,1,0]]) q = np.array([0,0,0,1,0]) cls.Ftest1 = res1.f_test((R,q))
def linear_lm(resid, exog, func=None): '''Lagrange multiplier test for linearity against functional alternative limitations: Assumes currently that the first column is integer. Currently it doesn't check whether the transformed variables contain NaNs, for example log of negative number. Parameters ---------- resid : ndarray residuals of a regression exog : ndarray exogenous variables for which linearity is tested func : callable If func is None, then squares are used. func needs to take an array of exog and return an array of transformed variables. Returns ------- lm : float Lagrange multiplier test statistic lm_pval : float p-value of Lagrange multiplier tes ftest : ContrastResult instance the results from the F test variant of this test Notes ----- written to match Gretl's linearity test. The test runs an auxilliary regression of the residuals on the combined original and transformed regressors. The Null hypothesis is that the linear specification is correct. ''' from scipy import stats if func is None: func = lambda x: np.power(x, 2) exog_aux = np.column_stack((exog, func(exog[:,1:]))) nobs, k_vars = exog.shape ls = OLS(resid, exog_aux).fit() ftest = ls.f_test(np.eye(k_vars - 1, k_vars * 2 - 1, k_vars)) lm = nobs * ls.rsquared lm_pval = stats.chi2.sf(lm, k_vars - 1) return lm, lm_pval, ftest
def reset_ramsey(res, degree=5): """Ramsey's RESET specification test for linear models This is a general specification test, for additional non-linear effects in a model. Parameters ---------- degree : int Maximum power to include in the RESET test. Powers 0 and 1 are excluded, so that degree tests powers 2, ..., degree of the fitted values. Notes ----- The test fits an auxiliary OLS regression where the design matrix, exog, is augmented by powers 2 to degree of the fitted values. Then it performs an F-test whether these additional terms are significant. If the p-value of the f-test is below a threshold, e.g. 0.1, then this indicates that there might be additional non-linear effects in the model and that the linear model is mis-specified. References ---------- https://en.wikipedia.org/wiki/Ramsey_RESET_test """ order = degree + 1 k_vars = res.model.exog.shape[1] # vander without constant and x, and drop constant norm_values = np.asarray(res.fittedvalues) norm_values = norm_values / np.sqrt((norm_values**2).mean()) y_fitted_vander = np.vander(norm_values, order)[:, :-2] exog = np.column_stack((res.model.exog, y_fitted_vander)) exog /= np.sqrt((exog**2).mean(0)) endog = res.model.endog / (res.model.endog**2).mean() res_aux = OLS(endog, exog).fit() # r_matrix = np.eye(degree, exog.shape[1], k_vars) r_matrix = np.eye(degree - 1, exog.shape[1], k_vars) # df1 = degree - 1 # df2 = exog.shape[0] - degree - res.df_model (without constant) return res_aux.f_test(r_matrix) # , r_matrix, res_aux
def grangercausalitytests(x, maxlag, addconst=True, verbose=True): '''four tests for granger causality of 2 timeseries all four tests give similar results `params_ftest` and `ssr_ftest` are equivalent based of F test which is identical to lmtest:grangertest in R Parameters ---------- x : array, 2d, (nobs,2) data for test whether the time series in the second column Granger causes the time series in the first column maxlag : integer the Granger causality test results are calculated for all lags up to maxlag verbose : bool print results if true Returns ------- results : dictionary all test results, dictionary keys are the number of lags. For each lag the values are a tuple, with the first element a dictionary with teststatistic, pvalues, degrees of freedom, the second element are the OLS estimation results for the restricted model, the unrestricted model and the restriction (contrast) matrix for the parameter f_test. Notes ----- TODO: convert to class and attach results properly The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, Granger causes the time series in the first column, x1. This means that past values of x2 have a statistically significant effect on the current value of x1, taking also past values of x1 into account, as regressors. We reject the null hypothesis of x2 Granger causing x1 if the pvalues are below a desired size of the test. 'params_ftest', 'ssr_ftest' are based on F test 'ssr_chi2test', 'lrtest' are based on chi-square test ''' from scipy import stats # lazy import resli = {} for mlg in range(1, maxlag+1): result = {} if verbose: print '\nGranger Causality' print 'number of lags (no zero)', mlg mxlg = mlg #+ 1 # Note number of lags starting at zero in lagmat # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: dtaown = add_constant(dta[:,1:mxlg+1]) dtajoint = add_constant(dta[:,1:]) else: raise ValueError('Not Implemented') dtaown = dta[:,1:mxlg] dtajoint = dta[:,1:] #run ols on both models without and with lags of second variable res2down = OLS(dta[:,0], dtaown).fit() res2djoint = OLS(dta[:,0], dtajoint).fit() #print results #for ssr based tests see: http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = (res2down.ssr-res2djoint.ssr)/res2djoint.ssr/(mxlg)*res2djoint.df_resid if verbose: print 'ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d, df_num=%d' % \ (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs*(res2down.ssr-res2djoint.ssr)/res2djoint.ssr if verbose: print 'ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, df=%d' % \ (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2*(res2down.llf-res2djoint.llf) if verbose: print 'likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % \ (lr, stats.chi2.sf(lr, mxlg), mxlg) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros((mxlg-1,mxlg-1)), np.eye(mxlg-1, mxlg-1),\ np.zeros((mxlg-1, 1)))) rconstr = np.column_stack((np.zeros((mxlg,mxlg)), np.eye(mxlg, mxlg),\ np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print 'parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d, df_num=%d' % \ (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def grangercausalitytests(x, maxlag, addconst=True, verbose=True): """four tests for granger non causality of 2 timeseries all four tests give similar results `params_ftest` and `ssr_ftest` are equivalent based on F test which is identical to lmtest:grangertest in R Parameters ---------- x : array, 2d, (nobs,2) data for test whether the time series in the second column Granger causes the time series in the first column maxlag : integer the Granger causality test results are calculated for all lags up to maxlag verbose : bool print results if true Returns ------- results : dictionary all test results, dictionary keys are the number of lags. For each lag the values are a tuple, with the first element a dictionary with teststatistic, pvalues, degrees of freedom, the second element are the OLS estimation results for the restricted model, the unrestricted model and the restriction (contrast) matrix for the parameter f_test. Notes ----- TODO: convert to class and attach results properly The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test. The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero. 'params_ftest', 'ssr_ftest' are based on F distribution 'ssr_chi2test', 'lrtest' are based on chi-square distribution References ---------- http://en.wikipedia.org/wiki/Granger_causality Greene: Econometric Analysis """ from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError( "Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros( (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def grangercausalitytests_mod(x, maxlag, addconst=True, verbose=True): import numpy as np from scipy import stats from statsmodels.tsa.tsatools import lagmat2ds from statsmodels.tools.tools import add_constant from statsmodels.regression.linear_model import OLS from warnings import warn x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): warn("Insufficient observations. Maximum allowable lag is {0}." "The maximum lag will be set to " "this number".format(int((x.shape[0] - int(addconst)) / 3) - 1)) maxlag = int((x.shape[0] - int(addconst)) / 3) - 1 # print(x.shape[0]) # print(int((x.shape[0] - int(addconst)) / 3) - 1) # print(maxlag) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both') dta = np.delete(dta, -1, axis = 1) # removal of the not lagged xs #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros((mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def grangercausalitytests_mod(x, maxlag, addconst=True, verbose=True): import numpy as np from scipy import stats from statsmodels.tsa.tsatools import lagmat2ds from statsmodels.tools.tools import add_constant from statsmodels.regression.linear_model import OLS from warnings import warn x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): warn("Insufficient observations. Maximum allowable lag is {0}." "The maximum lag will be set to " "this number".format(int((x.shape[0] - int(addconst)) / 3) - 1)) maxlag = int((x.shape[0] - int(addconst)) / 3) - 1 # print(x.shape[0]) # print(int((x.shape[0] - int(addconst)) / 3) - 1) # print(maxlag) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both') dta = np.delete(dta, -1, axis=1) # removal of the not lagged xs #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros( (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog) res1 = OLS(data.endog, data.exog).fit() R2 = [[0,1,-1,0,0,0,0],[0, 0, 0, 0, 1, -1, 0]] cls.Ftest1 = res1.f_test(R2)
def hacked_gct (x, maxlag, addconst=True, verbose=True): #from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError("Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: '''dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)''' dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable '''res2down = OLS(dta[:, 0], dtaown).fit()''' res2down = 'skipped' res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up ''' # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) ''' # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros((mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def acorr_breusch_godfrey(results, nlags=None, store=False): '''Breusch Godfrey Lagrange Multiplier tests for residual autocorrelation Parameters ---------- results : Result instance Estimation results for which the residuals are tested for serial correlation nlags : int Number of lags to include in the auxiliary regression. (nlags is highest lag) store : bool If store is true, then an additional class instance that contains intermediate results is returned. Returns ------- lm : float Lagrange multiplier test statistic lmpval : float p-value for Lagrange multiplier test fval : float fstatistic for F test, alternative version of the same test based on F test for the parameter restriction fpval : float pvalue for F test resstore : instance (optional) a class instance that holds intermediate results. Only returned if store=True Notes ----- BG adds lags of residual to exog in the design matrix for the auxiliary regression with residuals as endog, see Greene 12.7.1. References ---------- Greene Econometrics, 5th edition ''' x = np.asarray(results.resid) exog_old = results.model.exog nobs = x.shape[0] if nlags is None: #for adf from Greene referencing Schwert 1989 nlags = np.trunc(12. * np.power(nobs/100., 1/4.))#nobs//4 #TODO: check default, or do AIC/BIC nlags = int(nlags) x = np.concatenate((np.zeros(nlags), x)) #xdiff = np.diff(x) # xdall = lagmat(x[:,None], nlags, trim='both') nobs = xdall.shape[0] xdall = np.c_[np.ones((nobs,1)), xdall] xshort = x[-nobs:] exog = np.column_stack((exog_old, xdall)) k_vars = exog.shape[1] if store: resstore = ResultsStore() resols = OLS(xshort, exog).fit() ft = resols.f_test(np.eye(nlags, k_vars, k_vars - nlags)) fval = ft.fvalue fpval = ft.pvalue fval = np.squeeze(fval)[()] #TODO: fix this in ContrastResults fpval = np.squeeze(fpval)[()] lm = nobs * resols.rsquared lmpval = stats.chi2.sf(lm, nlags) # Note: degrees of freedom for LM test is nvars minus constant = usedlags #return fval, fpval, lm, lmpval if store: resstore.resols = resols resstore.usedlag = nlags return lm, lmpval, fval, fpval, resstore else: return lm, lmpval, fval, fpval
def grangercausalitytests(x, maxlag, addconst=True, verbose=True): """four tests for granger non causality of 2 timeseries all four tests give similar results `params_ftest` and `ssr_ftest` are equivalent based on F test which is identical to lmtest:grangertest in R Parameters ---------- x : array, 2d, (nobs,2) data for test whether the time series in the second column Granger causes the time series in the first column maxlag : integer the Granger causality test results are calculated for all lags up to maxlag verbose : bool print results if true Returns ------- results : dictionary all test results, dictionary keys are the number of lags. For each lag the values are a tuple, with the first element a dictionary with teststatistic, pvalues, degrees of freedom, the second element are the OLS estimation results for the restricted model, the unrestricted model and the restriction (contrast) matrix for the parameter f_test. Notes ----- TODO: convert to class and attach results properly The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test. The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero. 'params_ftest', 'ssr_ftest' are based on F distribution 'ssr_chi2test', 'lrtest' are based on chi-square distribution References ---------- http://en.wikipedia.org/wiki/Granger_causality Greene: Econometric Analysis """ from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError("Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros((mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog) res1 = OLS(data.endog, data.exog).fit() R2 = [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0]] cls.Ftest1 = res1.f_test(R2)
def hacked_gct(x, maxlag, addconst=True, verbose=True): #from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError( "Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: '''dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)''' dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable '''res2down = OLS(dta[:, 0], dtaown).fit()''' res2down = 'skipped' res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up ''' # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) ''' # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros( (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli