class VAR(object): def __init__(self, data, lags, intercept=True): self._data = DataFrame(_combine_rhs(data)) self._p = lags self._columns = self._data.columns self._index = self._data.index self._intercept = intercept @cache_readonly def aic(self): """Returns the Akaike information criterion.""" return self._ic['aic'] @cache_readonly def bic(self): """Returns the Bayesian information criterion.""" return self._ic['bic'] @cache_readonly def beta(self): """ Returns a DataFrame, where each column x1 contains the betas calculated by regressing the x1 column of the VAR input with the lagged input. Returns ------- DataFrame """ d = dict([(key, value.beta) for (key, value) in self.ols_results.iteritems()]) return DataFrame(d) def forecast(self, h): """ Returns a DataMatrix containing the forecasts for 1, 2, ..., n time steps. Each column x1 contains the forecasts of the x1 column. Parameters ---------- n: int Number of time steps ahead to forecast. Returns ------- DataMatrix """ forecast = self._forecast_raw(h)[:, 0, :] m = DataMatrix(forecast, index=xrange(1, 1 + h), columns=self._columns) return m def forecast_cov(self, h): """ Returns the covariance of the forecast residuals. Returns ------- DataMatrix """ return [ DataMatrix(value, index=self._columns, columns=self._columns) for value in self._forecast_cov_raw(h) ] def forecast_std_err(self, h): """ Returns the standard errors of the forecast residuals. Returns ------- DataMatrix """ return DataMatrix(self._forecast_std_err_raw(h), index=xrange(1, 1 + h), columns=self._columns) @cache_readonly def granger_causality(self): """Returns the f-stats and p-values from the Granger Causality Test. If the data consists of columns x1, x2, x3, then we perform the following regressions: x1 ~ L(x2, x3) x1 ~ L(x1, x3) x1 ~ L(x1, x2) The f-stats of these results are placed in the 'x1' column of the returned DataMatrix. We then repeat for x2, x3. Returns ------- Dict, where 'f-stat' returns the DataMatrix containing the f-stats, and 'p-value' returns the DataMatrix containing the corresponding p-values of the f-stats. """ from pandas.stats.api import ols from scipy.stats import f d = {} for col in self._columns: d[col] = {} for i in xrange(1, 1 + self._p): lagged_data = self._lagged_data[i].filter(self._columns - [col]) for key, value in lagged_data.iteritems(): d[col][_make_param_name(i, key)] = value f_stat_dict = {} p_value_dict = {} for col, y in self._data.iteritems(): ssr_full = (self.resid[col]**2).sum() f_stats = [] p_values = [] for col2 in self._columns: result = ols(y=y, x=d[col2]) resid = result.resid ssr_reduced = (resid**2).sum() M = self._p N = self._nobs K = self._k * self._p + 1 f_stat = ((ssr_reduced - ssr_full) / M) / (ssr_full / (N - K)) f_stats.append(f_stat) p_value = 1 - f.cdf(f_stat, M, N - K) p_values.append(p_value) f_stat_dict[col] = Series(f_stats, self._columns) p_value_dict[col] = Series(p_values, self._columns) f_stat_mat = DataFrame(f_stat_dict) p_value_mat = DataFrame(p_value_dict) return { 'f-stat': f_stat_mat, 'p-value': p_value_mat, } @cache_readonly def ols_results(self): """ Returns the results of the regressions: x_1 ~ L(X) x_2 ~ L(X) ... x_k ~ L(X) where X = [x_1, x_2, ..., x_k] and L(X) represents the columns of X lagged 1, 2, ..., n lags (n is the user-provided number of lags). Returns ------- dict """ from pandas.stats.api import ols d = {} for i in xrange(1, 1 + self._p): for col, series in self._lagged_data[i].iteritems(): d[_make_param_name(i, col)] = series result = dict([(col, ols(y=y, x=d, intercept=self._intercept)) for col, y in self._data.iteritems()]) return result @cache_readonly def resid(self): """ Returns the DataMatrix containing the residuals of the VAR regressions. Each column x1 contains the residuals generated by regressing the x1 column of the input against the lagged input. Returns ------- DataMatrix """ d = dict([(col, series.resid) for (col, series) in self.ols_results.iteritems()]) return DataFrame(d, index=self._index) @cache_readonly def summary(self): template = """ %(banner_top)s Number of Observations: %(nobs)d AIC: %(aic).3f BIC: %(bic).3f %(banner_coef)s %(coef_table)s %(banner_end)s """ params = { 'banner_top': common.banner('Summary of VAR'), 'banner_coef': common.banner('Summary of Estimated Coefficients'), 'banner_end': common.banner('End of Summary'), 'coef_table': self.beta, 'aic': self.aic, 'bic': self.bic, 'nobs': self._nobs, } return template % params @cache_readonly def _alpha(self): """ Returns array where the i-th element contains the intercept when regressing the i-th column of self._data with the lagged data. """ if self._intercept: return self._beta_raw[-1] else: return np.zeros(self._k) @cache_readonly def _beta_raw(self): return np.array([self.beta[col].values() for col in self._columns]).T def _trans_B(self, h): """ Returns 0, 1, ..., (h-1)-th power of transpose of B as defined in equation (4) on p. 142 of the Stata 11 Time Series reference book. """ result = [np.eye(1 + self._k * self._p)] row1 = np.zeros((1, 1 + self._k * self._p)) row1[0, 0] = 1 v = self._alpha.reshape((self._k, 1)) row2 = np.hstack(tuple([v] + self._lag_betas)) m = self._k * (self._p - 1) row3 = np.hstack((np.zeros((m, 1)), np.eye(m), np.zeros((m, self._k)))) trans_B = np.vstack((row1, row2, row3)).T result.append(trans_B) for i in xrange(2, h): result.append(np.dot(trans_B, result[i - 1])) return result @cache_readonly def _x(self): values = np.array([ self._lagged_data[i][col].values() for i in xrange(1, 1 + self._p) for col in self._columns ]).T x = np.hstack((np.ones((len(values), 1)), values))[self._p:] return x @cache_readonly def _cov_beta(self): cov_resid = self._sigma x = self._x inv_cov_x = inv(np.dot(x.T, x)) return np.kron(inv_cov_x, cov_resid) def _data_xs(self, i): """ Returns the cross-section of the data at the given timestep. """ return self._data.values[i] def _forecast_cov_raw(self, n): resid = self._forecast_cov_resid_raw(n) #beta = self._forecast_cov_beta_raw(n) #return [a + b for a, b in izip(resid, beta)] # TODO: ignore the beta forecast std err until it's verified return resid def _forecast_cov_beta_raw(self, n): """ Returns the covariance of the beta errors for the forecast at 1, 2, ..., n timesteps. """ p = self._p values = self._data.values T = len(values) - self._p - 1 results = [] for h in xrange(1, n + 1): psi = self._psi(h) trans_B = self._trans_B(h) sum = 0 cov_beta = self._cov_beta for t in xrange(T + 1): index = t + p y = values.take(xrange(index, index - p, -1), axis=0).flatten() trans_Z = np.hstack(([1], y)) trans_Z = trans_Z.reshape(1, len(trans_Z)) sum2 = 0 for i in xrange(h): ZB = np.dot(trans_Z, trans_B[h - 1 - i]) prod = np.kron(ZB, psi[i]) sum2 = sum2 + prod sum = sum + chain_dot(sum2, cov_beta, sum2.T) results.append(sum / (T + 1)) return results def _forecast_cov_resid_raw(self, h): """ Returns the covariance of the residual errors for the forecast at 1, 2, ..., h timesteps. """ psi_values = self._psi(h) sum = 0 result = [] for i in xrange(h): psi = psi_values[i] sum = sum + chain_dot(psi, self._sigma, psi.T) result.append(sum) return result def _forecast_raw(self, h): """ Returns the forecast at 1, 2, ..., h timesteps in the future. """ k = self._k result = [] for i in xrange(h): sum = self._alpha.reshape(1, k) for j in xrange(self._p): beta = self._lag_betas[j] idx = i - j if idx > 0: y = result[idx - 1] else: y = self._data_xs(idx - 1) sum = sum + np.dot(beta, y.T).T result.append(sum) return np.array(result) def _forecast_std_err_raw(self, h): """ Returns the standard error of the forecasts at 1, 2, ..., n timesteps. """ return np.array( [np.sqrt(np.diag(value)) for value in self._forecast_cov_raw(h)]) @cache_readonly def _ic(self): """ Returns the Akaike/Bayesian information criteria. """ RSS = self._rss k = self._p * (self._k * self._p + 1) n = self._nobs * self._k return { 'aic': 2 * k + n * np.log(RSS / n), 'bic': n * np.log(RSS / n) + k * np.log(n) } @cache_readonly def _k(self): return len(self._columns) @cache_readonly def _lag_betas(self): """ Returns list of B_i, where B_i represents the (k, k) matrix with the j-th row containing the betas of regressing the j-th column of self._data with self._data lagged i time steps. First element is B_1, second element is B_2, etc. """ k = self._k b = self._beta_raw return [b[k * i:k * (i + 1)].T for i in xrange(self._p)] @cache_readonly def _lagged_data(self): return dict([(i, self._data.shift(i)) for i in xrange(1, 1 + self._p)]) @cache_readonly def _nobs(self): return len(self._data) - self._p def _psi(self, h): """ psi value used for calculating standard error. Returns [psi_0, psi_1, ..., psi_(h - 1)] """ k = self._k result = [np.eye(k)] for i in xrange(1, h): result.append( sum([ np.dot(result[i - j], self._lag_betas[j - 1]) for j in xrange(1, 1 + i) if j <= self._p ])) return result @cache_readonly def _resid_raw(self): resid = np.array( [self.ols_results[col]._resid_raw for col in self._columns]) return resid @cache_readonly def _rss(self): """Returns the sum of the squares of the residuals.""" return (self._resid_raw**2).sum() @cache_readonly def _sigma(self): """Returns covariance of resids.""" k = self._k n = self._nobs resid = self._resid_raw return np.dot(resid, resid.T) / (n - k) def __repr__(self): return self.summary
class VAR(object): def __init__(self, data, lags, intercept=True): self._data = DataFrame(_combine_rhs(data)) self._p = lags self._columns = self._data.columns self._index = self._data.index self._intercept = intercept @cache_readonly def aic(self): """Returns the Akaike information criterion.""" return self._ic['aic'] @cache_readonly def bic(self): """Returns the Bayesian information criterion.""" return self._ic['bic'] @cache_readonly def beta(self): """ Returns a DataFrame, where each column x1 contains the betas calculated by regressing the x1 column of the VAR input with the lagged input. Returns ------- DataFrame """ d = dict([(key, value.beta) for (key, value) in self.ols_results.iteritems()]) return DataFrame(d) def forecast(self, h): """ Returns a DataMatrix containing the forecasts for 1, 2, ..., n time steps. Each column x1 contains the forecasts of the x1 column. Parameters ---------- n: int Number of time steps ahead to forecast. Returns ------- DataMatrix """ forecast = self._forecast_raw(h)[:, 0, :] m = DataMatrix( forecast, index=xrange(1, 1 + h), columns=self._columns) return m def forecast_cov(self, h): """ Returns the covariance of the forecast residuals. Returns ------- DataMatrix """ return [DataMatrix(value, index=self._columns, columns=self._columns) for value in self._forecast_cov_raw(h)] def forecast_std_err(self, h): """ Returns the standard errors of the forecast residuals. Returns ------- DataMatrix """ return DataMatrix(self._forecast_std_err_raw(h), index=xrange(1, 1 + h), columns=self._columns) @cache_readonly def granger_causality(self): """Returns the f-stats and p-values from the Granger Causality Test. If the data consists of columns x1, x2, x3, then we perform the following regressions: x1 ~ L(x2, x3) x1 ~ L(x1, x3) x1 ~ L(x1, x2) The f-stats of these results are placed in the 'x1' column of the returned DataMatrix. We then repeat for x2, x3. Returns ------- Dict, where 'f-stat' returns the DataMatrix containing the f-stats, and 'p-value' returns the DataMatrix containing the corresponding p-values of the f-stats. """ from pandas.stats.api import ols from scipy.stats import f d = {} for col in self._columns: d[col] = {} for i in xrange(1, 1 + self._p): lagged_data = self._lagged_data[i].filter(self._columns - [col]) for key, value in lagged_data.iteritems(): d[col][_make_param_name(i, key)] = value f_stat_dict = {} p_value_dict = {} for col, y in self._data.iteritems(): ssr_full = (self.resid[col] ** 2).sum() f_stats = [] p_values = [] for col2 in self._columns: result = ols(y=y, x=d[col2]) resid = result.resid ssr_reduced = (resid ** 2).sum() M = self._p N = self._nobs K = self._k * self._p + 1 f_stat = ((ssr_reduced - ssr_full) / M) / (ssr_full / (N - K)) f_stats.append(f_stat) p_value = 1 - f.cdf(f_stat, M, N - K) p_values.append(p_value) f_stat_dict[col] = Series(f_stats, self._columns) p_value_dict[col] = Series(p_values, self._columns) f_stat_mat = DataFrame(f_stat_dict) p_value_mat = DataFrame(p_value_dict) return { 'f-stat' : f_stat_mat, 'p-value' : p_value_mat, } @cache_readonly def ols_results(self): """ Returns the results of the regressions: x_1 ~ L(X) x_2 ~ L(X) ... x_k ~ L(X) where X = [x_1, x_2, ..., x_k] and L(X) represents the columns of X lagged 1, 2, ..., n lags (n is the user-provided number of lags). Returns ------- dict """ from pandas.stats.api import ols d = {} for i in xrange(1, 1 + self._p): for col, series in self._lagged_data[i].iteritems(): d[_make_param_name(i, col)] = series result = dict([(col, ols(y=y, x=d, intercept=self._intercept)) for col, y in self._data.iteritems()]) return result @cache_readonly def resid(self): """ Returns the DataMatrix containing the residuals of the VAR regressions. Each column x1 contains the residuals generated by regressing the x1 column of the input against the lagged input. Returns ------- DataMatrix """ d = dict([(col, series.resid) for (col, series) in self.ols_results.iteritems()]) return DataFrame(d, index=self._index) @cache_readonly def summary(self): template = """ %(banner_top)s Number of Observations: %(nobs)d AIC: %(aic).3f BIC: %(bic).3f %(banner_coef)s %(coef_table)s %(banner_end)s """ params = { 'banner_top' : common.banner('Summary of VAR'), 'banner_coef' : common.banner('Summary of Estimated Coefficients'), 'banner_end' : common.banner('End of Summary'), 'coef_table' : self.beta, 'aic' : self.aic, 'bic' : self.bic, 'nobs' : self._nobs, } return template % params @cache_readonly def _alpha(self): """ Returns array where the i-th element contains the intercept when regressing the i-th column of self._data with the lagged data. """ if self._intercept: return self._beta_raw[-1] else: return np.zeros(self._k) @cache_readonly def _beta_raw(self): return np.array([self.beta[col].values() for col in self._columns]).T def _trans_B(self, h): """ Returns 0, 1, ..., (h-1)-th power of transpose of B as defined in equation (4) on p. 142 of the Stata 11 Time Series reference book. """ result = [np.eye(1 + self._k * self._p)] row1 = np.zeros((1, 1 + self._k * self._p)) row1[0, 0] = 1 v = self._alpha.reshape((self._k, 1)) row2 = np.hstack(tuple([v] + self._lag_betas)) m = self._k * (self._p - 1) row3 = np.hstack(( np.zeros((m, 1)), np.eye(m), np.zeros((m, self._k)) )) trans_B = np.vstack((row1, row2, row3)).T result.append(trans_B) for i in xrange(2, h): result.append(np.dot(trans_B, result[i - 1])) return result @cache_readonly def _x(self): values = np.array([ self._lagged_data[i][col].values() for i in xrange(1, 1 + self._p) for col in self._columns ]).T x = np.hstack((np.ones((len(values), 1)), values))[self._p:] return x @cache_readonly def _cov_beta(self): cov_resid = self._sigma x = self._x inv_cov_x = inv(np.dot(x.T, x)) return np.kron(inv_cov_x, cov_resid) def _data_xs(self, i): """ Returns the cross-section of the data at the given timestep. """ return self._data.values[i] def _forecast_cov_raw(self, n): resid = self._forecast_cov_resid_raw(n) #beta = self._forecast_cov_beta_raw(n) #return [a + b for a, b in izip(resid, beta)] # TODO: ignore the beta forecast std err until it's verified return resid def _forecast_cov_beta_raw(self, n): """ Returns the covariance of the beta errors for the forecast at 1, 2, ..., n timesteps. """ p = self._p values = self._data.values T = len(values) - self._p - 1 results = [] for h in xrange(1, n + 1): psi = self._psi(h) trans_B = self._trans_B(h) sum = 0 cov_beta = self._cov_beta for t in xrange(T + 1): index = t + p y = values.take(xrange(index, index - p, -1), axis=0).flatten() trans_Z = np.hstack(([1], y)) trans_Z = trans_Z.reshape(1, len(trans_Z)) sum2 = 0 for i in xrange(h): ZB = np.dot(trans_Z, trans_B[h - 1 - i]) prod = np.kron(ZB, psi[i]) sum2 = sum2 + prod sum = sum + chain_dot(sum2, cov_beta, sum2.T) results.append(sum / (T + 1)) return results def _forecast_cov_resid_raw(self, h): """ Returns the covariance of the residual errors for the forecast at 1, 2, ..., h timesteps. """ psi_values = self._psi(h) sum = 0 result = [] for i in xrange(h): psi = psi_values[i] sum = sum + chain_dot(psi, self._sigma, psi.T) result.append(sum) return result def _forecast_raw(self, h): """ Returns the forecast at 1, 2, ..., h timesteps in the future. """ k = self._k result = [] for i in xrange(h): sum = self._alpha.reshape(1, k) for j in xrange(self._p): beta = self._lag_betas[j] idx = i - j if idx > 0: y = result[idx - 1] else: y = self._data_xs(idx - 1) sum = sum + np.dot(beta, y.T).T result.append(sum) return np.array(result) def _forecast_std_err_raw(self, h): """ Returns the standard error of the forecasts at 1, 2, ..., n timesteps. """ return np.array([np.sqrt(np.diag(value)) for value in self._forecast_cov_raw(h)]) @cache_readonly def _ic(self): """ Returns the Akaike/Bayesian information criteria. """ RSS = self._rss k = self._p * (self._k * self._p + 1) n = self._nobs * self._k return {'aic' : 2 * k + n * np.log(RSS / n), 'bic' : n * np.log(RSS / n) + k * np.log(n)} @cache_readonly def _k(self): return len(self._columns) @cache_readonly def _lag_betas(self): """ Returns list of B_i, where B_i represents the (k, k) matrix with the j-th row containing the betas of regressing the j-th column of self._data with self._data lagged i time steps. First element is B_1, second element is B_2, etc. """ k = self._k b = self._beta_raw return [b[k * i : k * (i + 1)].T for i in xrange(self._p)] @cache_readonly def _lagged_data(self): return dict([(i, self._data.shift(i)) for i in xrange(1, 1 + self._p)]) @cache_readonly def _nobs(self): return len(self._data) - self._p def _psi(self, h): """ psi value used for calculating standard error. Returns [psi_0, psi_1, ..., psi_(h - 1)] """ k = self._k result = [np.eye(k)] for i in xrange(1, h): result.append(sum( [np.dot(result[i - j], self._lag_betas[j - 1]) for j in xrange(1, 1 + i) if j <= self._p])) return result @cache_readonly def _resid_raw(self): resid = np.array([self.ols_results[col]._resid_raw for col in self._columns]) return resid @cache_readonly def _rss(self): """Returns the sum of the squares of the residuals.""" return (self._resid_raw ** 2).sum() @cache_readonly def _sigma(self): """Returns covariance of resids.""" k = self._k n = self._nobs resid = self._resid_raw return np.dot(resid, resid.T) / (n - k) def __repr__(self): return self.summary