def get_VAR_noise_matrix(signals, olag): from statsmodels.tools.tools import add_constant from statsmodels.regression.linear_model import OLS from statsmodels.tsa.tsatools import lagmat, lagmat2ds T = signals.shape[0] num_signals = signals.shape[1] # Now we can compute the VAR model with the computed order : VAR_resid = np.zeros((T - olag, num_signals)) VAR_model = {} for k in range(0, num_signals): # Permuting columns to compute VAR : signals = np.concatenate((signals[:, k:], signals[:, 0:k]), axis=1) if k == num_signals: break data = lagmat2ds(signals, olag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() VAR_resid[:, k] = OLS_.resid VAR_model[k] = OLS_ # Computing the noise covariance matrix of the full model : VAR_noise_matrix = np.cov(VAR_resid.T) return VAR_noise_matrix, VAR_resid, VAR_model
def grangercausalitytests(x, maxlag, addconst=True, verbose=True): """four tests for granger non causality of 2 timeseries all four tests give similar results `params_ftest` and `ssr_ftest` are equivalent based on F test which is identical to lmtest:grangertest in R Parameters ---------- x : array, 2d, (nobs,2) data for test whether the time series in the second column Granger causes the time series in the first column maxlag : integer the Granger causality test results are calculated for all lags up to maxlag verbose : bool print results if true Returns ------- results : dictionary all test results, dictionary keys are the number of lags. For each lag the values are a tuple, with the first element a dictionary with teststatistic, pvalues, degrees of freedom, the second element are the OLS estimation results for the restricted model, the unrestricted model and the restriction (contrast) matrix for the parameter f_test. Notes ----- TODO: convert to class and attach results properly The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test. The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero. 'params_ftest', 'ssr_ftest' are based on F distribution 'ssr_chi2test', 'lrtest' are based on chi-square distribution References ---------- http://en.wikipedia.org/wiki/Granger_causality Greene: Econometric Analysis """ from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError("Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros((mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def grangercausalitytests(x, maxlag, addconst=True, verbose=True): """four tests for granger non causality of 2 timeseries all four tests give similar results `params_ftest` and `ssr_ftest` are equivalent based on F test which is identical to lmtest:grangertest in R Parameters ---------- x : array, 2d data for test whether the time series in the second column Granger causes the time series in the first column maxlag : integer the Granger causality test results are calculated for all lags up to maxlag verbose : bool print results if true Returns ------- results : dictionary all test results, dictionary keys are the number of lags. For each lag the values are a tuple, with the first element a dictionary with teststatistic, pvalues, degrees of freedom, the second element are the OLS estimation results for the restricted model, the unrestricted model and the restriction (contrast) matrix for the parameter f_test. Notes ----- TODO: convert to class and attach results properly The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test. The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero. 'params_ftest', 'ssr_ftest' are based on F distribution 'ssr_chi2test', 'lrtest' are based on chi-square distribution References ---------- http://en.wikipedia.org/wiki/Granger_causality Greene: Econometric Analysis """ from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError( "Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros( (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
# For each order, computing VAR : for k in range(0, len(signals) + 1): # Permuting columns to compute VAR : SIGNALS = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1) if k == len(signals): break criterion_value = np.zeros((self._max_lag, 1)) # Testing each order : for lag in range(1, self._max_lag + 1): data = lagmat2ds(SIGNALS, lag, trim="both", dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC temporary values : if self._criterion == "bic": criterion_value[lag - 1] = OLS_.bic elif self._criterion == "aic": criterion_value[lag - 1] = OLS_.aic olag_AR[k] = criterion_value.argmin() + 1 # The optimal order is chosen as the mean order between all the estimated orders from all models olag = int(np.ceil(np.mean(olag_AR))) # Now we can compute the VAR model with the computed order :
def hacked_gct(x, maxlag, addconst=True, verbose=True): #from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError( "Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: '''dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)''' dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable '''res2down = OLS(dta[:, 0], dtaown).fit()''' res2down = 'skipped' res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up ''' # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) ''' # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros( (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def compute(self, *signals): """ This method computes the ConditionalGrangerCausality. At the end of the computation, a graph is made to show the links between the signals. :param signals: list of signals, one per person. :type signals: list[pd.DataFrame] :returns: dict -- matrix of links between the signals. """ ' Raise error if parameters are not in the correct type ' for i in range(0, len(signals)): if not (isinstance(signals[i], pd.DataFrame)): raise TypeError("Requires signal " + str(i + 1) + " to be a pd.DataFrame, ") ' Raise error if DataFrames have not the same size ' for i in range(0, len(signals)): if len(signals[0]) != len(signals[i]): raise ValueError( "All the signals must have the same size. Signal " + str(i + 1) + " does not have the same size as signal 1") # Saving the size of signals (they all supposed to have the same size) T = len(signals[0]) # Converting DataFrames to arrays : SIGNALS = np.zeros((T, len(signals))) for i in range(0, len(signals)): SIGNALS[:, i] = np.array(signals[i]).reshape(T) # Creating Matrix to save the links between the signals : M_direct = np.zeros((len(signals), len(signals))) # Testing for direct links between signals : print "Results of pairwise analysis:\n" for i in range(0, len(signals)): for j in range(0, len(signals)): if (i != j): gc = GC.GrangerCausality(max_lag=self._max_lag, criterion=self._criterion, plot=False) gc_res = gc.compute(signals[i], signals[j]) if gc_res['ratio'] > 0 and gc_res['p_value'] < 0.01: print "signal", j + 1, "->", i + 1, "detected" M_direct[i, j] = 1 # Computing the FULL VAR model : # First we have to determine the optimal order according to the given criterion olag_AR = np.zeros((len(signals), 1)) # For each order, computing VAR : for k in range(0, len(signals)): # Permuting columns to compute VAR : SIGNALS_V = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1) criterion_value = np.zeros((self._max_lag, 1)) # Testing each order : for lag in range(1, self._max_lag + 1): data = lagmat2ds(SIGNALS_V, lag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC temporary values : if self._criterion == 'bic': criterion_value[lag - 1] = OLS_.bic elif self._criterion == 'aic': criterion_value[lag - 1] = OLS_.aic olag_AR[k] = criterion_value.argmin() + 1 # The optimal order is chosen as the mean order between all the estimated orders from all models olag = int(np.ceil(np.mean(olag_AR))) # Now we can compute the VAR model with the computed order : VAR_resid = np.zeros((T - olag, len(signals))) for k in range(0, len(signals)): # Permuting columns to compute VAR : SIGNALS_P = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1) data = lagmat2ds(SIGNALS_P, olag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() VAR_resid[:, k] = OLS_.resid # Computing the noise covariance matrix of the full model : VAR_noise_matrix = np.cov(VAR_resid.T) M_final = np.zeros((len(signals), len(signals))) # Testing for mediated links between signals : print "\n" for i in range(0, len(signals)): for j in range(0, len(signals)): if M_direct[i, j] == 1: # We have detected a "direct link", we need to test with other signals to know if there is a mediated link: for k in range(0, len(signals)): if (k != j) and (k != i): SIGNALS_M = np.delete(SIGNALS, [i, j], 1) S = np.concatenate((SIGNALS[:, i].reshape( T, 1), SIGNALS_M[:, 0:].reshape(T, 2)), axis=1) data = lagmat2ds(S, olag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() var_noise = np.var(OLS_.resid) ratio = np.log(var_noise) - np.log( VAR_noise_matrix[i, i]) if ratio < 0.01: print "signal", j + 1, "->", i + 1, " is mediated by signal", k + 1 M_direct[i, j] = 0 M_final[i, k] = 1 M_final[k, j] = 1 break else: M_final[i, j] = 1 results = dict() results['link_matrix'] = M_final if (self._plot == True): plt.ion() self.plot_result(results) return results
# For each order, computing VAR : for k in range(0,len(signals)+1): # Permuting columns to compute VAR : SIGNALS = np.concatenate((SIGNALS[:,k:],SIGNALS[:,0:k]),axis = 1) if k == len(signals): break criterion_value = np.zeros((self._max_lag,1)) #Testing each order : for lag in range(1, self._max_lag+1): data = lagmat2ds(SIGNALS,lag,trim ='both', dropex = 1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC temporary values : if self._criterion == 'bic': criterion_value[lag-1] = OLS_.bic elif self._criterion == 'aic': criterion_value[lag-1] = OLS_.aic olag_AR[k] = criterion_value.argmin()+1 # The optimal order is chosen as the mean order between all the estimated orders from all models olag = int(np.ceil(np.mean(olag_AR))) # Now we can compute the VAR model with the computed order :
# For each order, computing VAR : for k in range(0, len(signals) + 1): # Permuting columns to compute VAR : SIGNALS = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1) if k == len(signals): break criterion_value = np.zeros((self._max_lag, 1)) #Testing each order : for lag in range(1, self._max_lag + 1): data = lagmat2ds(SIGNALS, lag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC temporary values : if self._criterion == 'bic': criterion_value[lag - 1] = OLS_.bic elif self._criterion == 'aic': criterion_value[lag - 1] = OLS_.aic olag_AR[k] = criterion_value.argmin() + 1 # The optimal order is chosen as the mean order between all the estimated orders from all models olag = int(np.ceil(np.mean(olag_AR))) # Now we can compute the VAR model with the computed order :
def compute(self, *signals): """ This method computes the ConditionalGrangerCausality. At the end of the computation, a graph is made to show the links between the signals. :param signals: list of signals, one per person. :type signals: list[pd.DataFrame] :returns: dict -- matrix of links between the signals. """ ' Raise error if parameters are not in the correct type ' for i in range(0, len(signals)): if not (isinstance(signals[i], pd.DataFrame)): raise TypeError( "Requires signal " + str(i + 1) + " to be a pd.DataFrame, ") ' Raise error if DataFrames have not the same size ' for i in range(0, len(signals)): if len(signals[0]) != len(signals[i]): raise ValueError( "All the signals must have the same size. Signal " + str( i + 1) + " does not have the same size as signal 1") # Saving the size of signals (they all supposed to have the same size) T = len(signals[0]) # Converting DataFrames to arrays : SIGNALS = np.zeros((T, len(signals))) for i in range(0, len(signals)): SIGNALS[:, i] = np.array(signals[i]).reshape(T) # Creating Matrix to save the links between the signals : M_direct = np.zeros((len(signals), len(signals))) # Testing for direct links between signals : print "Results of pairwise analysis:\n" for i in range(0, len(signals)): for j in range(0, len(signals)): if (i != j): gc = GC.GrangerCausality(max_lag=self._max_lag, criterion=self._criterion, plot=False) gc_res = gc.compute(signals[i], signals[j]) if gc_res['ratio'] > 0 and gc_res['p_value'] < 0.01: print "signal", j + 1, "->", i + 1, "detected" M_direct[i, j] = 1 # Computing the FULL VAR model : # First we have to determine the optimal order according to the given criterion olag_AR = np.zeros((len(signals), 1)) # For each order, computing VAR : for k in range(0, len(signals)): # Permuting columns to compute VAR : SIGNALS_V = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1) criterion_value = np.zeros((self._max_lag, 1)) # Testing each order : for lag in range(1, self._max_lag + 1): data = lagmat2ds(SIGNALS_V, lag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC temporary values : if self._criterion == 'bic': criterion_value[lag - 1] = OLS_.bic elif self._criterion == 'aic': criterion_value[lag - 1] = OLS_.aic olag_AR[k] = criterion_value.argmin() + 1 # The optimal order is chosen as the mean order between all the estimated orders from all models olag = int(np.ceil(np.mean(olag_AR))) # Now we can compute the VAR model with the computed order : VAR_resid = np.zeros((T - olag, len(signals))) for k in range(0, len(signals)): # Permuting columns to compute VAR : SIGNALS_P = np.concatenate((SIGNALS[:, k:], SIGNALS[:, 0:k]), axis=1) data = lagmat2ds(SIGNALS_P, olag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() VAR_resid[:, k] = OLS_.resid # Computing the noise covariance matrix of the full model : VAR_noise_matrix = np.cov(VAR_resid.T) M_final = np.zeros((len(signals), len(signals))) # Testing for mediated links between signals : print "\n" for i in range(0, len(signals)): for j in range(0, len(signals)): if M_direct[i, j] == 1: # We have detected a "direct link", we need to test with other signals to know if there is a mediated link: for k in range(0, len(signals)): if (k != j) and (k != i): SIGNALS_M = np.delete(SIGNALS, [i, j], 1) S = np.concatenate((SIGNALS[:, i].reshape(T, 1), SIGNALS_M[:, 0:].reshape(T, 2)), axis=1) data = lagmat2ds(S, olag, trim='both', dropex=1) datajoint = add_constant(data[:, 1:], prepend=False) OLS_ = OLS(data[:, 0], datajoint).fit() var_noise = np.var(OLS_.resid) ratio = np.log(var_noise) - np.log(VAR_noise_matrix[i, i]) if ratio < 0.01: print "signal", j + 1, "->", i + 1, " is mediated by signal", k + 1 M_direct[i, j] = 0 M_final[i, k] = 1 M_final[k, j] = 1 break else: M_final[i, j] = 1 results = dict() results['link_matrix'] = M_final if (self._plot == True): plt.ion() self.plot_result(results) return results
def hacked_gct (x, maxlag, addconst=True, verbose=True): #from scipy import stats x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): raise ValueError("Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: '''dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)''' dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable '''res2down = OLS(dta[:, 0], dtaown).fit()''' res2down = 'skipped' res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up ''' # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) ''' # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros((mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
def grangercausalitytests_mod(x, maxlag, addconst=True, verbose=True): import numpy as np from scipy import stats from statsmodels.tsa.tsatools import lagmat2ds from statsmodels.tools.tools import add_constant from statsmodels.regression.linear_model import OLS from warnings import warn x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): warn("Insufficient observations. Maximum allowable lag is {0}." "The maximum lag will be set to " "this number".format(int((x.shape[0] - int(addconst)) / 3) - 1)) maxlag = int((x.shape[0] - int(addconst)) / 3) - 1 # print(x.shape[0]) # print(int((x.shape[0] - int(addconst)) / 3) - 1) # print(maxlag) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both') dta = np.delete(dta, -1, axis=1) # removal of the not lagged xs #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros( (mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
pickle.dump(results_ARIMAX1, f) with open('results_Granger1', 'wb') as f: pickle.dump(results_Granger1, f) plot_hist(results_NN1, lags) plot_hist(results_LSTM1, lags) plot_hist(results_GRU1, lags) #%% AR models performance on test set from statsmodels.tsa.tsatools import lagmat2ds from statsmodels.tools.tools import add_constant #results_Granger1 = grangercausalitytests(data[:7000,:],lags,verbose=False) for l in lags: mdl1 = results_Granger1[l][1][0] mdl2 = results_Granger1[l][1][1] data_gr = lagmat2ds(data[7000:, :], l, trim="both", dropex=1) dtaown = add_constant(data_gr[:, 1:(l + 1)], prepend=False) dtajoint = add_constant(data_gr[:, 1:], prepend=False) x_pred1 = mdl1.predict(dtaown) x_pred2 = mdl2.predict(dtajoint) error1 = x_pred1 - data[7000 + l:, 0] error2 = x_pred2 - data[7000 + l:, 0] rss_x1 = sum(error1**2) rss_x2 = sum(error2**2) RSS1['Granger'][l] = rss_x1 RSS2['Granger'][l] = rss_x2 print('RSS1 = %0.2f' % rss_x1) print('RSS2 = %0.2f' % rss_x2) S, p_value = stats.wilcoxon(np.abs(error1), np.abs(error2), alternative='greater')
def grangercausalitytests_mod(x, maxlag, addconst=True, verbose=True): import numpy as np from scipy import stats from statsmodels.tsa.tsatools import lagmat2ds from statsmodels.tools.tools import add_constant from statsmodels.regression.linear_model import OLS from warnings import warn x = np.asarray(x) if x.shape[0] <= 3 * maxlag + int(addconst): warn("Insufficient observations. Maximum allowable lag is {0}." "The maximum lag will be set to " "this number".format(int((x.shape[0] - int(addconst)) / 3) - 1)) maxlag = int((x.shape[0] - int(addconst)) / 3) - 1 # print(x.shape[0]) # print(int((x.shape[0] - int(addconst)) / 3) - 1) # print(maxlag) resli = {} for mlg in range(1, maxlag + 1): result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mlg) mxlg = mlg # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both') dta = np.delete(dta, -1, axis = 1) # removal of the not lagged xs #add constant if addconst: dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') #dtaown = dta[:, 1:mxlg] #dtajoint = dta[:, 1:] # Run ols on both models without and with lags of second variable res2down = OLS(dta[:, 0], dtaown).fit() res2djoint = OLS(dta[:, 0], dtajoint).fit() #print results #for ssr based tests see: #http://support.sas.com/rnd/app/examples/ets/granger/index.htm #the other tests are made-up # Granger Causality test using ssr (F statistic) fgc1 = ((res2down.ssr - res2djoint.ssr) / res2djoint.ssr / mxlg * res2djoint.df_resid) if verbose: print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)) result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg) # Granger Causality test using ssr (ch2 statistic) fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr if verbose: print('ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, ' 'df=%d' % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)) result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) #likelihood ratio test pvalue: lr = -2 * (res2down.llf - res2djoint.llf) if verbose: print('likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % (lr, stats.chi2.sf(lr, mxlg), mxlg)) result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg) # F test that all lag coefficients of exog are zero rconstr = np.column_stack((np.zeros((mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1)))) ftres = res2djoint.f_test(rconstr) if verbose: print('parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d,' ' df_num=%d' % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)) result['params_ftest'] = (np.squeeze(ftres.fvalue)[()], np.squeeze(ftres.pvalue)[()], ftres.df_denom, ftres.df_num) resli[mxlg] = (result, [res2down, res2djoint, rconstr]) return resli
# Converting DataFrames to arrays : signal_to_predict = np.array(x).reshape(len(x)) helping_signal = np.array(y).reshape(len(y)) # Concatenate the two signals in a (nobs,2) array X = np.array([signal_to_predict, helping_signal]).T # Arrays that will contain BIC or AIC values according to the given criterion : C_r = np.zeros((self._max_lag, 1)) C_u = np.zeros((self._max_lag, 1)) # Computing OLS models for both 'restricted' and 'unrestricted' models, for each lag between 1 and 'max_lag' for lag in range(1, self._max_lag + 1): # Adapting datas : data = lagmat2ds(X, lag, trim='both', dropex=1) dataown = add_constant(data[:, 1:(lag + 1)], prepend=False) datajoint = add_constant(data[:, 1:], prepend=False) # OLS models : OLS_restricted = OLS(data[:, 0], dataown).fit() OLS_unrestricted = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC values : if self._criterion == 'bic': C_r[lag - 1] = OLS_restricted.bic C_u[lag - 1] = OLS_unrestricted.bic elif self._criterion == 'aic': C_r[lag - 1] = OLS_restricted.aic C_u[lag - 1] = OLS_unrestricted.aic
ssrEig = reg[0].ssr ssrBeid = reg[1].ssr print(np.log(ssrEig / ssrBeid)) test, reg = res[1] reg.llf reg[0].llf reg[1].llf reg[1].nobs reg[0].ssr reg[1].ssr reg[0].param reg[0].params reg[1].params x = np.vstack([dff_e[3], dff[43]]).T x.shape dta = lagmat2ds(x, 1, trim='both', dropex=1) from statsmodels.tsa.tsatools import lagmat, lagmat2ds, add_trend dta = lagmat2ds(x, 1, trim='both', dropex=1) dta.shape dta[:3, :] x[:3] pred1 = reg[0].predict(reg[0].params, dta[:, 1]) reg[0].prams reg[0].params reg[0].exog.shape reg[0].exog dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False) dtajoint = add_constant(dta[:, 1:], prepend=False) from statsmodels.tools.tools import add_constant, Bunch dtaown[:3] dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
def grangercausalitytests(x, mxlg, autolag=None, alpha=0.0001, max_iter=1e5, addconst=True, verbose=True): """four tests for granger non causality of 2 timeseries all four tests give similar results `params_ftest` and `ssr_ftest` are equivalent based on F test which is identical to lmtest:grangertest in R Parameters ---------- x : array, 2d data for test whether the time series in the second column Granger causes the time series in the first column lags : list of integers the Granger causality test results are calculated for all lags in the list autolag: If 'aic' the lag which minimizes the information criterion is used from the lags verbose : bool print results if true Returns ------- results : dictionary all test results, dictionary keys are the number of lags. For each lag the values are a tuple, with the first element a dictionary with teststatistic, pvalues, degrees of freedom, the second element are the OLS estimation results for the restricted model, the unrestricted model and the restriction (contrast) matrix for the parameter f_test. Notes ----- TODO: convert to class and attach results properly The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test. The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero. 'params_ftest', 'ssr_ftest' are based on F distribution 'ssr_chi2test', 'lrtest' are based on chi-square distribution References ---------- http://en.wikipedia.org/wiki/Granger_causality Greene: Econometric Analysis """ from scipy import stats from sklearn.linear_model import Lasso x = np.asarray(x) if x.shape[0] <= 3 * mxlg + int(addconst): raise ValueError("Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mxlg) # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: dtajoint = add_constant(dta[:, 1:], prepend=False) else: raise NotImplementedError('Not Implemented') # Run Lasso on all variables lassoreg = Lasso(alpha=alpha,normalize=True, max_iter=max_iter) lassoreg.fit(dtajoint[:,1:], dta[:, 0]) pred = lassoreg.predict(dtajoint[:,1:]) actual = dta[:,0] errors = [abs(i-j) for i, j in zip(actual, pred)] step_size = 10 window_size = 30 avg_errors=[] for i in range(len(actual)/step_size): err = 0 for j in range(i*step_size,(i*step_size)+window_size): if j>=len(actual): break err += errors[j] avg_errors += [err] rmse = np.mean(avg_errors) result = lassoreg.coef_ non_zeros = [(i,x) for i, x in enumerate(result) if x != 0] non_zero_vars = {} best_vars = {} for (i,x) in non_zeros: k = (i+1)/(mxlg) if k not in non_zero_vars or abs(x) > abs(best_vars[k]): non_zero_vars[k] = (i+1)%(mxlg) best_vars[k] = x return (rmse , non_zero_vars, best_vars)
# Converting DataFrames to arrays : signal_to_predict = np.array(x).reshape(len(x)) helping_signal = np.array(y).reshape(len(y)) # Concatenate the two signals in a (nobs,2) array X = np.array([signal_to_predict,helping_signal]).T # Arrays that will contain BIC or AIC values according to the given criterion : C_r = np.zeros((self._max_lag,1)) C_u = np.zeros((self._max_lag,1)) # Computing OLS models for both 'restricted' and 'unrestricted' models, for each lag between 1 and 'max_lag' for lag in range(1, self._max_lag+1): # Adapting datas : data = lagmat2ds(X,lag,trim ='both', dropex = 1) dataown = add_constant(data[:, 1:(lag + 1)], prepend=False) datajoint = add_constant(data[:, 1:], prepend=False) # OLS models : OLS_restricted = OLS(data[:, 0], dataown).fit() OLS_unrestricted = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC values : if self._criterion == 'bic': C_r[lag-1] = OLS_restricted.bic C_u[lag-1] = OLS_unrestricted.bic elif self._criterion == 'aic': C_r[lag-1] = OLS_restricted.aic C_u[lag-1] = OLS_unrestricted.aic
llf_arima_esti = ar1_model.llf print(ar1_model.summary()) mu_arima_results = [mu_arima_esti, mu_arima_CI] phi_arima_results = [phi_arima_esti, phi_arima_CI] pd.DataFrame({ 'mu': mu_arima_results, 'phi': phi_arima_results }, index=['ML estimation', '95% condifence interval']) #%% OLS estimation of the AR(1) parameters Y_endog = Y[1:] Ylag = np.transpose(np.matrix(lagmat2ds( x=Y, maxlag0=1)[1:, 1])) # exclude the first missing point # convert into matrix to match the datatype of mu_aux in order for concatenation mu_aux = np.transpose(np.matrix(np.ones(len(Ylag)))) exogen = np.array(np.concatenate((mu_aux, Ylag), axis=1)) OLS_reg = sm.OLS(endog=Y_endog, exog=exogen) results = OLS_reg.fit() print(results.summary()) mu_OLS = results.params[0] phi_OLS = results.params[1] mu_OLS_CI = results.conf_int()[0] phi_OLS_CI = results.conf_int()[1]
def compute(self,*signals): """ It computes restricted AR and unrestricted AR models, and evaluates whether the first signal (first parameter) could be forecasted by the others. F-value and p-value are computed, the interpretation of the results is up to the user. :param signals: list of signals, one per person. :type signals: list[pd.DataFrame] :returns: dict -- F-values and P-values. """ ' Raise error if parameters are not in the correct type ' for i in range(len(signals)) : if not(isinstance(signals[i], pd.DataFrame)): raise TypeError("Requires signal " + str(i+1) + " to be a pd.DataFrame.") ' Raise error if DataFrames have not the same size or same indexes ' for i in range(0,len(signals)): if len(signals[0]) != len(signals[i]) : raise ValueError("All the signals must have the same size. Signal " + str(i+1) + " does not have the same size as first signal.") if signals[0].index.tolist() != signals[i].index.tolist() : raise ValueError("All the signals must have the same time indexes. Signal " + str(i+1) + " does not have the same time index as first signal.") # Saving the size of signals (they all supposed to have the same size) T = len(signals[0]) # Converting DataFrames to arrays : SIGNALS = np.zeros((T,len(signals))) for i in range(0,len(signals)): SIGNALS[:,i] = np.array(signals[i]).reshape(T) # Arrays that will contain BIC or AIC values according to the given criterion : C_r = np.zeros((self._max_lag,1)) C_u = np.zeros((self._max_lag,1)) # Computing OLS models for both 'restricted' and 'unrestricted' models, for each lag between 1 and 'max_lag' for lag in range(1, self._max_lag+1): # Adapting datas : data = lagmat2ds(SIGNALS,lag,trim ='both', dropex = 1) dataown = add_constant(data[:, 1:(lag + 1)], prepend=False) datajoint = add_constant(data[:, 1:], prepend=False) # OLS models : OLS_restricted = OLS(data[:, 0], dataown).fit() OLS_unrestricted = OLS(data[:, 0], datajoint).fit() # Saving AIC or BIC values : if self._criterion == 'bic': C_r[lag-1] = OLS_restricted.bic C_u[lag-1] = OLS_unrestricted.bic elif self._criterion == 'aic': C_r[lag-1] = OLS_restricted.aic C_u[lag-1] = OLS_unrestricted.aic # Determine the optimal 'lag' according to 'bic' or 'aic' criterion : olag_r = C_r.argmin()+1 olag_u = C_u.argmin()+1 olag = min(olag_r,olag_u) # Computing OLS models with the optimal 'lag' data = lagmat2ds(SIGNALS,olag,trim ='both', dropex = 1) dataown = add_constant(data[:, 1:(olag + 1)], prepend=False) datajoint = add_constant(data[:, 1:], prepend=False) OLS_restricted = OLS(data[:, 0], dataown).fit() OLS_unrestricted = OLS(data[:, 0], datajoint).fit() # Checking divisions by 0 if np.any(OLS_unrestricted.ssr == 0): raise ValueError("OLS_unrestricted.ssr can't be eq to zero because it's used in division.") if olag == 0: raise ValueError("olag can't be eq to zero because it's used in division.") # Doing the F-TEST: F_value = ((OLS_restricted.ssr - OLS_unrestricted.ssr)/OLS_unrestricted.ssr/olag*OLS_unrestricted.df_resid) p_value = stats.f.sf(F_value, olag, OLS_unrestricted.df_resid) # Computing predicted signal with restricted model : predicted_signal_restricted = np.zeros(T) predicted_signal_restricted[0:olag] = np.copy(SIGNALS[0:olag,0]) for i in range(olag,T): predicted_signal_restricted[i] = np.dot(SIGNALS[(i-1)-np.array(range(0,olag)),0],OLS_restricted.params[0:olag]) # Computing predicted signal with unrestricted model : predicted_signal_unrestricted = np.zeros(T) predicted_signal_unrestricted[0:olag] = np.copy(SIGNALS[0:olag,0]) for i in range(olag,T): for k in range(0,len(signals)): predicted_signal_unrestricted[i] = predicted_signal_unrestricted[i] + np.dot(SIGNALS[(i-1)-np.array(range(0,olag)),k],OLS_unrestricted.params[k*olag:(k+1)*olag]) results = dict() results['F_value'] = F_value results['p_value'] = p_value results['optimal_lag'] = olag results['predicted_signal_restricted'] = predicted_signal_restricted results['predicted_signal_unrestricted'] = predicted_signal_unrestricted if self._plot: plt.ion() self.plot_result(results) return results