def __init__(self, data, lags, intercept=True): self._data = DataFrame(_combine_rhs(data)) self._p = lags self._columns = self._data.columns self._index = self._data.index self._intercept = intercept
def __init__(self, data, p=1, intercept=True): import scikits.statsmodels.tsa.var as sm_var self._data = DataFrame(_combine_rhs(data)) self._p = p self._columns = self._data.columns self._index = self._data.index self._intercept = intercept
def beta(self): """ Returns a DataFrame, where each column x1 contains the betas calculated by regressing the x1 column of the VAR input with the lagged input. Returns ------- DataFrame """ d = dict([(key, value.beta) for (key, value) in self.ols_results.iteritems()]) return DataFrame(d)
def resid(self): """ Returns the DataMatrix containing the residuals of the VAR regressions. Each column x1 contains the residuals generated by regressing the x1 column of the input against the lagged input. Returns ------- DataMatrix """ d = dict([(col, series.resid) for (col, series) in self.ols_results.iteritems()]) return DataFrame(d, index=self._index)
def granger_causality(self): """Returns the f-stats and p-values from the Granger Causality Test. If the data consists of columns x1, x2, x3, then we perform the following regressions: x1 ~ L(x2, x3) x1 ~ L(x1, x3) x1 ~ L(x1, x2) The f-stats of these results are placed in the 'x1' column of the returned DataMatrix. We then repeat for x2, x3. Returns ------- Dict, where 'f-stat' returns the DataMatrix containing the f-stats, and 'p-value' returns the DataMatrix containing the corresponding p-values of the f-stats. """ from pandas.stats.api import ols from scipy.stats import f d = {} for col in self._columns: d[col] = {} for i in xrange(1, 1 + self._p): lagged_data = self._lagged_data[i].filter(self._columns - [col]) for key, value in lagged_data.iteritems(): d[col][_make_param_name(i, key)] = value f_stat_dict = {} p_value_dict = {} for col, y in self._data.iteritems(): ssr_full = (self.resid[col]**2).sum() f_stats = [] p_values = [] for col2 in self._columns: result = ols(y=y, x=d[col2]) resid = result.resid ssr_reduced = (resid**2).sum() M = self._p N = self._nobs K = self._k * self._p + 1 f_stat = ((ssr_reduced - ssr_full) / M) / (ssr_full / (N - K)) f_stats.append(f_stat) p_value = 1 - f.cdf(f_stat, M, N - K) p_values.append(p_value) f_stat_dict[col] = Series(f_stats, self._columns) p_value_dict[col] = Series(p_values, self._columns) f_stat_mat = DataFrame(f_stat_dict) p_value_mat = DataFrame(p_value_dict) return { 'f-stat': f_stat_mat, 'p-value': p_value_mat, }