def VARprocess(df,log=False): # Log transformation, relative difference and drop NULL values if (log): df = np.log(df+0.1).diff().dropna() # Vector Autoregression Process generation maxAttr = len(df.columns) # Find the right lag order orderFound = False while orderFound!=True: try: model = VAR(df.ix[:,0:maxAttr]) order = model.select_order() orderFound = True except: exc_type, exc_obj, exc_tb = sys.exc_info() if str(exc_obj)=="data already contains a constant.": maxAttr = maxAttr - 1 else: maxAttr = int(str(exc_obj).split("-th")[0])-1 print "Exception, reducing to n_attributes ",maxAttr orderFound = False n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1] method = max(order.iteritems(), key=operator.itemgetter(1))[0] print "n_lags ",n_lags print "method ",method results = model.fit(maxlags=n_lags, ic=method) return results
def data2AB(data, x0=None): n = data.shape[0] T = data.shape[1] YY = np.dot(data[:, 1:], data[:, 1:].T) XX = np.dot(data[:, :-1], data[:, :-1].T) YX = np.dot(data[:, 1:], data[:, :-1].T) model = VAR(data.T) r = model.fit(1) A = r.coefs[0,:,:] # A = np.ones((n,n)) B = np.ones((n, n)) np.fill_diagonal(B, 0) B[np.triu_indices(n)] = 0 K = np.int(scipy.sum(abs(B)))#abs(A)+abs(B))) a_idx = np.where(A != 0) b_idx = np.where(B != 0) np.fill_diagonal(B, 1) try: s = x0.shape x = x0 except AttributeError: x = np.r_[A.flatten(), 0.1*scipy.randn(K)] o = optimize.fmin_bfgs(nllf2, x, args=(np.double(A), np.double(B), YY, XX, YX, T, a_idx, b_idx), gtol=1e-12, maxiter=500, disp=False, full_output=True) A, B = x2M(o[0], np.double(A), np.double(B), a_idx, b_idx) B = B+B.T return A, B
def causality_VAR(post_ts, max_order): model = VAR(post_ts) best_lag = model.select_order(max_order, verbose= False) print 'best lag: ', best_lag result = model.fit(best_lag['aic']) return result, best_lag
def _train(self, data: np.ndarray, max_lag: int = 300, *args: Any, **kwargs: Any) -> None: data_fd = np.diff(data, axis=0) assert data_fd.shape[0] >= max_lag model = VAR(endog=data_fd) self.max_lag = max_lag self._model = model.fit(maxlags=max_lag, trend="n")
def model(data): # print(data) model = VAR(data) # print(data) res = model.fit(maxlags=1) # print(data) output = res.test_causality(1, [1, 2], kind='f') return output['pvalue']
def forecast_DNS_VARm(ts,pred): model = VAR(ts) x = model.select_order(maxlags=3) lag_order = x.selected_orders["bic"] #we select best model based on the BIC criterion if lag_order==0: #constrains not turning into a random walk lag_order=1 model_fitted = model.fit(lag_order) return model_fitted.forecast(ts.values[-lag_order:],pred)
def var(X, lookback=4, forward=1): """ Prediction using VAR model """ dat = X.iloc[:, -lookback:].values.T dat += 1e-10 * np.random.rand(dat.shape[0], dat.shape[1]) model = VAR(dat) results = model.fit() lag_order = results.k_ar return results.forecast(dat[-lag_order:], forward)[-1]
def data2VARgraph_model(data, pval=0.05): model = VAR(data.T) r = model.fit(1) A = r.coefs[0,:,:] n = A.shape[0] g = {str(i):{} for i in range(1,n+1)} for i in range(n): for j in range(n): if np.abs(A[j,i]) > pval: g[str(i+1)][str(j+1)] = set([(0,1)]) return g, r
def run_model(model_name, hidden_size): # import data # X, Y = data.import_data(set='cross_val') X, Y = data.import_data(set='train') # do not plug in returns, but residuals # plug in residuals VAR_model = VAR(X) results = VAR_model.fit(1) ar_returns = results.fittedvalues # columns to drop from dataframe columns = ['XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume', 'ETHbasevolume'] ar_returns.drop(columns, 1, inplace=True) X = X.loc[ar_returns.index] x_returns = X[ar_returns.columns] residual_df = x_returns - ar_returns X = X.join(residual_df, how='inner', rsuffix='residual') y_ar_returns = ar_returns y_ar_returns.columns = Y.columns Y = (Y.loc[X.index] - y_ar_returns.shift(-1)).dropna() y_ar_returns = y_ar_returns.shift(-1).dropna() X = X.loc[Y.index] x = X.as_matrix() y = Y.as_matrix() # set preditcion matrix y_pred = np.zeros(shape=y.shape) # set model model = RNN(hidden_size=hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size=len(Y.iloc[0:1].values[0])) model.load_state_dict( torch.load(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + '/model_params/{}.pth.tar'.format(model_name))) for iter in range(len(x)): input = Variable(torch.from_numpy(x[iter]).float()) output = model.forward(input) y_pred[iter] = output.data.numpy() y_pred = y_pred + y_ar_returns.as_matrix() Y_pred = pd.DataFrame(data=y_pred, index=Y.index, columns=Y.columns) return Y_pred, Y
def granger(cause, effect, lag): data = pd.DataFrame({'cause': cause, 'effect': effect}) return_vaule = 1 model = VAR(data) try: if lag == -1: results = model.fit(maxlags=15, trend='nc', ic='aic') else: results = model.fit(lag) except Exception: # can not find a lag in interval [1, maxlags], that means they have no causality return 1 try: x = results.test_causality('effect', 'cause', kind='wald').summary().data except Exception: return 0 return_vaule = x[1][2] return return_vaule
def var(self, df, host): df_diffed, no_diffs = Helper.diff_test(df) print(df_diffed) df_diffed.replace([np.inf, -np.inf], np.nan) cols = df_diffed.columns df_diffed = df_diffed.dropna() print("Length : " + str(len(df_diffed))) nobs = int(len(df_diffed) / 10) + 2 train = df_diffed[:-nobs] test = df_diffed[-nobs:] #print(train) model = VAR(train) maxlags = int(nobs / 2) + 1 aic = model.select_order(maxlags).selected_orders['aic'] results = model.fit(aic) print(results.summary()) lagged_values = train.values[-maxlags:] #print(lagged_values) forecast = results.forecast(y=lagged_values, steps=nobs) idx = pd.date_range(test.first_valid_index(), periods=nobs) df_forecast = pd.DataFrame(data=forecast, index = idx, columns=cols) #print(df_forecast) df_fixed = Helper.reverse_diff(df_forecast, df, nobs, no_diffs) test_range = df[-nobs:] print("-- TEST Result -- \n") print(test_range) print("-- TEST Result END -- \n") print("-- Forecast Result -- \n") print(df_fixed) print("-- Forecast Result END -- \n") for col in df.columns: print("-- RMSE --") print(rmse(test_range[col], df_fixed[col + '_forecast'])) print("-- Mean --") print(test_range[col].mean()) df[col].plot(legend=True) df_fixed[col + '_forecast'].plot(legend=True) plt.show()
def get_optimal_lag_exper(p_src_index, src_neighbor_indices, normalized_cells_response_curve): from statsmodels.tsa.api import VAR #get the src neighbors number_of_points = len(src_neighbor_indices) optimal_lag_vector = dict() for p_dst_index in src_neighbor_indices: src_dst_data = None try: src_dst_data = normalized_cells_response_curve[ [p_src_index, p_dst_index], :] src_dst_data = np.transpose(src_dst_data) model = VAR(src_dst_data) maxlags = None lag_order_results = model.select_order(maxlags=maxlags) lags = [ lag_order_results.aic, lag_order_results.bic, lag_order_results.fpe, lag_order_results.hqic ] min_i = np.argmin(lags) model = model.fit(maxlags=lags[min_i], ic=None) p_value_whiteness = model.test_whiteness(nlags=lags[min_i]).pvalue if p_value_whiteness == float('nan') or p_value_whiteness < 0.05: raise ValueError('found autocorrelation in residuals.') #i = models[min_i].k_ar + 1 #while i < 12 * (models[min_i].nobs/100.)**(1./4): # result_auto_co = model._estimate_var(i, trend='c') # if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05: # break # i += 1 # print 'error order:' + str(models[min_i].k_ar) # print 'found correlation ' + str(i) optimal_lag_vector[p_dst_index] = lags[min_i] except: print('src index: ' + str(p_src_index) + ' dst index: ' + str(p_dst_index)) if src_dst_data is not None: print(src_dst_data) raise return optimal_lag_vector
def data2VARgraph_model(data, pval=0.05): model = VAR(data.T) r = model.fit(1) A = r.coefs[0, :, :] n = A.shape[0] g = {str(i): {} for i in range(1, n + 1)} for i in range(n): for j in range(n): if np.abs(A[j, i]) > pval: g[str(i + 1)][str(j + 1)] = set([(0, 1)]) return g, r
def var_simul(self, M = 1e4, N = 10, max_lags = 4): ''' period: returns period, M: Number of sample trajectories, N: Number of steps ahead. ''' model = VAR(self.returns) results = model.fit(max_lags, ic = "aic") rets_simul = np.zeros((int(M), int(N), self.n), np.float64) for i in range(int(M)): rets_simul[i] = results.simulate_var(int(N)) return(rets_simul)
def modelorder(self): maxl = 12 model = VAR(self.mvdfg) mo_data = [] mo_indexed = [] lags = range(1, len(self.mvdfg.columns)-2) for i in lags: result = model.fit(i) mo_indexed.append(f'Lag Order {i}') mo_data.append([result.aic, result.bic, result.fpe, result.hqic]) self.mo_df = pd.DataFrame(data = mo_data, index = mo_indexed, columns = ['AIC','BIC','FPE','HQIC']) print(self.mo_df) return self.mo_df
def var(ds): #numpy array input lag = 10;days = 5 dslog = np.log(ds) df = np.nan_to_num(np.diff(dslog, axis=0)) model = VAR(df) results = model.fit(maxlags=lag, ic='aic') lag_order = results.k_ar if lag_order < 5: lag_order = 10 fc = results.forecast(df[-lag_order:], days) fcdenorm = np.exp(np.cumsum(fc,axis=0)+ dslog[-1:]) fcdenorm = np.vstack((ds[-1:],fcdenorm)) return np.round(fcdenorm[-5:],5),lag_order
def fit_forecast_next(dataset): cols = dataset.columns dataset_differenced, round_no = remove_stationary(dataset) model = VAR(dataset_differenced) model_fit = model.fit() # make prediction on validation prediction = model_fit.forecast(model_fit.endog, steps=1) # converting predictions to dataframe forecast = pd.DataFrame(prediction, index=dataset.index[-1:], columns=cols) if round_no != 0: forecast = invert_transformation(dataset, forecast, (round_no == 2)) # check rmse return forecast
def fit(data, maxlag): #with open('varModel.json') as f: # data = json.load(f) mdata = prepareData(data) equation = dict() equation["aic"] = [] equation["BIC"] = [] equation["hqic"] = [] equation["min"] = [] model = VAR(mdata) for x in range(0, maxlag): fitedModel = model.fit(x + 1) equation["aic"].append(fitedModel.aic) equation["BIC"].append(fitedModel.bic) equation["hqic"].append(fitedModel.hqic) minLag = model.fit(maxlags=maxlag, ic='bic') equation["min"].append(minLag.aic) equation["min"].append(minLag.bic) equation["min"].append(minLag.hqic) return equation
def VARprocess(df, log=False): """ Description: This function applies Vector Auto Regression Input: dataframe Output: VARresults object """ # Log transformation, relative difference and drop NULL values if (log): df = np.log(df + 0.1).diff().dropna() # Vector Autoregression Process generation maxAttr = len(df.columns) # Find the right lag order orderFound = False print "7.1.0 ----- Finding an order for the VAR" maxIter = 0 while orderFound != True and maxIter < 15: maxIter = maxIter + 1 try: model = VAR(df) order = model.select_order() orderFound = True print " !!! loop stuck" except: exc_type, exc_obj, exc_tb = sys.exc_info() #if str(exc_obj)=="data already contains a constant.": maxAttr = maxAttr - 1 #else: #maxAttr = int(str(exc_obj).split("-th")[0])-1 #print "Exception, reducing to n_attributes ",maxAttr orderFound = False print "7.1.1 ----- Model fitting" if orderFound: n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1] method = max(order.iteritems(), key=operator.itemgetter(1))[0] results = model.fit(maxlags=n_lags, ic=method) else: results = model.fit() return results
def get_fedea_on_gdp(): qbuilder = inquisitor.Inquisitor(token) df = qbuilder.series(ticker = ['ESE.940000D259D.Q.ES','FEEA.PURE064A.M.ES']) df.dropna(inplace = True) df['fedea'] = df['FEEA.PURE064A.M.ES'].diff() df['gdp'] = df['ESE.940000D259D.Q.ES'] / 100 data1 = df[['fedea','gdp']] data1.dropna(inplace = True) model1 = VAR(data1) results1 = model1.fit(4) irf1 = results1.irf(8) fedea_on_gdp = irf1.orth_lr_effects[1,0] / data1['fedea'].std() return fedea_on_gdp
def get_fedea_on_gdp(): qbuilder = inquisitor.Inquisitor(token) df = qbuilder.series(ticker=['ESE.940000D259D.Q.ES', 'FEEA.PURE064A.M.ES']) df.dropna(inplace=True) df['fedea'] = df['FEEA.PURE064A.M.ES'].diff() df['gdp'] = df['ESE.940000D259D.Q.ES'] / 100 data1 = df[['fedea', 'gdp']] data1.dropna(inplace=True) model1 = VAR(data1) results1 = model1.fit(4) irf1 = results1.irf(8) fedea_on_gdp = irf1.orth_lr_effects[1, 0] / data1['fedea'].std() return fedea_on_gdp
def get_optimal_lag(p_src_index, neighbor_indices, normalized_cells_response_curve): #get the src neighbors number_of_points = len(neighbor_indices) src_neighbor_indices = neighbor_indices[p_src_index] optimal_lag_vector = np.zeros((number_of_points)) for p_dst_index in src_neighbor_indices: #find the common neighbours dst_neighbor_indices = neighbor_indices[p_dst_index] disjoint_neighbours = get_disjoint_neighbours(p_src_index, p_dst_index, neighbor_indices) src_dst_data = normalized_cells_response_curve[ [p_src_index, p_dst_index], :] src_dst_data = np.transpose(src_dst_data) model = VAR(src_dst_data) maxlags = None lag_order_results = model.select_order(maxlags=maxlags) lags = [ lag_order_results.aic, lag_order_results.bic, lag_order_results.fpe, lag_order_results.hqic ] min_i = np.argmin(lags) model = model.fit(maxlags=lags[min_i], ic=None) if model.test_whiteness(nlags=lags[min_i]).pvalue < 0.05: raise ValueError('found autocorrelation in residuals.') #i = models[min_i].k_ar + 1 #while i < 12 * (models[min_i].nobs/100.)**(1./4): # result_auto_co = model._estimate_var(i, trend='c') # if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05: # break # i += 1 # print 'error order:' + str(models[min_i].k_ar) # print 'found correlation ' + str(i) optimal_lag_vector[p_dst_index] = lags[min_i] break return optimal_lag_vector
def SW_PCA_VAR(X, Y, ws=50, r=2): """ Vector auto-regression model. """ log_series_hat = sliding_window(X, ws) pca = PCA(n_components=r) components = pca.fit_transform(log_series_hat) model = VAR(components) results = model.fit(2) fitted_components = np.zeros(components.shape) fitted_components[:2, :] = components[:2, :] fitted_components[2:, :] = results.fittedvalues Y_hat = pca.inverse_transform(fitted_components) return fitted_components, Y_hat
def test_gc(data, maxlag=None, signif=0.05, verbose=False): """Summary Apply granger causaulity test into permutation of all columns Args: data (TYPE): Description maxlag (None, optional): Description signif (float, optional): Description verbose (bool, optional): Description Returns: TYPE: dataframe """ from statsmodels.tsa.api import VAR if isinstance(data, pd.core.frame.DataFrame): colns = data.columns arr = data.values else: arr = np.array(data) model = VAR(arr) if maxlag: res = model.fit(maxlag, verbose=verbose) else: res = model.fit(verbose=verbose) gc_test = [] obs_name = res.names for c1, c2 in permutations(obs_name, 2): gc_res = res.test_causality(c1, c2, signif=signif, verbose=verbose) coln1, coln2 = colns[[obs_name.index(c1), obs_name.index(c2)]] gc_res = pd.Series(gc_res, name=(coln1, coln2)) gc_res['H0'] = "'{}' do not Granger-cause '{}'".format(coln2, coln1) gc_test.append(gc_res) results = pd.DataFrame(gc_test) results['VAR'] = model results['best_order'] = (len(model.exog_names) - 1) / data.shape[1] return results
def get_optimal_lag(p_src_index, neighbor_indices, normalized_cells_response_curve): #get the src neighbors number_of_points = len(neighbor_indices) src_neighbor_indices = neighbor_indices[p_src_index] optimal_lag_vector = np.zeros((number_of_points)) for p_dst_index in src_neighbor_indices: src_dst_data = normalized_cells_response_curve[ [p_src_index, p_dst_index], :] src_dst_data = np.transpose(src_dst_data) model = VAR(src_dst_data) maxlags = None lag_order_results = model.select_order(maxlags=maxlags) lags = [ lag_order_results.aic, lag_order_results.bic, lag_order_results.fpe, lag_order_results.hqic ] min_i = np.argmin(lags) var_result = model.fit(maxlags=lags[min_i], ic=None) portmanteau_test = var_result.test_whiteness(lags[min_i]).pvalue if portmanteau_test < 0.05: raise ValueError('found autocorrelation in residuals.' + str(portmanteau_test)) ''' i = lags[min_i] + 1 while i < 12 * (model.nobs/100.)**(1./4): var_result = model.fit(i, ic=None) if var_result.test_whiteness(max(10, i + 1)).pvalue >= 0.05: break i += 1 #print('error order:' + str(lags[min_i])) #print('found correlation ' + str(i)) optimal_lag_vector[p_dst_index] = i else: ''' optimal_lag_vector[p_dst_index] = lags[min_i] return optimal_lag_vector
def select_order_of_VAR_model(self): model = VAR(self.df) print("\n*********checking different orders of lag************\n") for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]: result = model.fit(i) print('Lag Order =', i) print('AIC : ', result.aic) print('BIC : ', result.bic) print('FPE : ', result.fpe) print('HQIC: ', result.hqic, '\n') #alternative print("\n*********select_order method used: ************\n") x = model.select_order(maxlags=self.max_lags) print(x.summary())
def eval_ar(Y, T1, T2, ic, p, loss): MSFE = [] for u in range(T1, T2): trainY = Y[p:(u + p), :] var_mod = VAR(trainY) mod = var_mod.fit(maxlags=p, ic=ic) lag_order = mod.k_ar if lag_order != 0: yhat = mod.forecast(trainY[-lag_order:], 1) else: yhat = mod.params MSFE_temp = calc_loss(Y[u + p, :] - yhat, loss) MSFE.append(MSFE_temp) MSFE = np.array(MSFE) return (np.mean(MSFE))
def get_irf(nd, subset): ''' http://statsmodels.sourceforge.net/0.6.0/vector_ar.html ''' data = nd.reindex(columns=subset) data = data.dropna() data.describe() model = VAR(data) results = model.fit(6) irf = results.irf(12) cum_effects = irf.orth_cum_effects return cum_effects[12, 2, 0]
def predict(self, test_X, test_Y): predictions = numpy.empty(0) array_train = numpy.concatenate((numpy.array([self.train_Y]).T, self.train_X), axis=1) array_test = numpy.concatenate((numpy.array([test_Y]).T, test_X), axis=1) for t in range(0, test_Y.shape[0]): array = numpy.vstack((array_train, array_test[:t])) model = VAR(endog=pd.DataFrame(data=array)) fit = model.fit() lag = fit.k_ar pred = fit.forecast(array[-lag:],1)[0] predictions = numpy.append(predictions,pred[0]) return predictions
def get_irf(nd, subset): ''' http://statsmodels.sourceforge.net/0.6.0/vector_ar.html ''' data = nd.reindex(columns=subset) data = data.dropna() data.describe() model = VAR(data) results = model.fit(6) irf = results.irf(12) cum_effects = irf.orth_cum_effects return cum_effects[12,2,0]
class LinVAR: def __init__(self, X: np.ndarray, K=1): """ Linear VAR model. @param X: numpy array with data of shape T x p. @param K: order of the VAR model (maximum lag). """ # X.shape: T x p super(LinVAR, self).__init__() self.model = VAR(X) self.p = X.shape[1] self.K = K # Fit the model self.model_results = self.model.fit(maxlags=self.K) def infer_causal_structure(self, kind="f", adjust=False, signed=False): """ Infer GC based on the fitted VAR model. @param kind: type of the statistical test for GC (as implemented within statsmodels). Default: F-test. @param adjust: whether to adjust p-values? If True, p-values are adjusted using the Benjamini-Hochberg procedure for controlling the FDR. @param signed: whether to return coeffcient signs? @return: p x p array with p-values, p x p array with hypothesis test results, and, if signed == True, p x p array with coefficient signs. """ pvals = np.zeros((self.p, self.p)) reject = None for i in range(self.p): for j in range(self.p): pvals[i, j] = self.model_results.test_causality(caused=i, causing=j, kind=kind).pvalue reject = pvals <= 0.05 if adjust: reject, pvals, alpha_Sidak, alpha_Bonf = multitest.multipletests( pvals.ravel(), method="fdr_bh") pvals = np.reshape(pvals, (self.p, self.p)) reject = np.reshape(reject, (self.p, self.p)) if signed: return pvals, reject, np.sign(self.model_results.params[1:, :].T * reject) else: return pvals, reject
def construct_model_based_connectivity(event_type, reader, pairs, BUFFER, freqs, EPSILON, window_size): sess_events = reader.load('task_events') events = sess_events[sess_events.type == event_type] rel_start = 0 rel_stop = 0 if event_type == 'WORD': rel_stop = 1366 if event_type == 'COUNTDOWN_START': countdown_end_events = sess_events[sess_events.type == 'COUNTDOWN_END'] countdown_times = [] for i in np.arange(len(countdown_end_events)): countdown_times.append(countdown_end_events.iloc[i]['mstime'] - events.iloc[i]['mstime']) rel_stop = np.min(countdown_times) else: rel_stop = 1366 events_eeg = reader.load_eeg(events, rel_start=rel_start, rel_stop=rel_stop, scheme=pairs) events_eeg = events_eeg.to_ptsa() events_eeg = events_eeg.filtered(freq_range=[58.0, 62.0]) events_eeg.dims events_eeg = events_eeg.add_mirror_buffer(BUFFER) wf = morlet.MorletWaveletFilter(events_eeg, freqs=freqs) power_wavelet, phase_wavelet = wf.filter() power_wavelet = power_wavelet.remove_buffer(BUFFER) power_wavelet = power_wavelet.transpose('channel', 'event', 'time', 'frequency') power_wavelet = np.log10(power_wavelet + EPSILON) n_times = power_wavelet.shape[2] intervals = np.array_split(np.arange(n_times), int(n_times / window_size)) power_wavelet_aggregate = np.zeros(shape=list(power_wavelet.shape[:2]) + [len(intervals)]) for i in np.arange(len(intervals)): power_wavelet_aggregate[:, :, i] = power_wavelet[:, :, intervals[i], 0].mean('time') dims = power_wavelet_aggregate.shape power_wavelet_aggregate = power_wavelet_aggregate.reshape( dims[0], dims[1] * dims[2]) model = VAR(power_wavelet_aggregate[:, :].T) results = model.fit(maxlags=1) conn_mat = results.coefs[0, :, :] return conn_mat
def VAR(self): ''' 实现滚动计算 k-lag 的 VAR 模型 并且保存矩阵的系数以及相关系数矩阵 实现了 k-lag>1 时的向量值回归模型 k-lag: 向量值自回归的滞后系数 ''' for i in range(self.gundong_time, self.row+1,1): datai = self.data.iloc[i-self.gundong_time:i,:] model = VAR(datai) # 滞后 k_lag 个单位计算 results = model.fit(self.k_lag) coef = results.params self.save_data_coef[i-self.gundong_time,:,:]= coef.iloc[1:1+self.k_lag*self.column,:].T self.save_data_cov[i-self.gundong_time,:,:] = results.sigma_u
def trainVectorAutoRegressiveMethodModel(): X_train = readVectorAutoRegressiveMethodXTrain() #training model on the training set vectorAutoRegressiveMethodModel = VAR(X_train) #we are taking p = 5 as we have created different models based on the different p values. #Model gives minimum aic and bic for p =5 vectorAutoRegressiveMethodModelResult = vectorAutoRegressiveMethodModel.fit( 5) #saving the model in pickle file saveVectorAutoRegressiveMethodModel(vectorAutoRegressiveMethodModelResult) print(vectorAutoRegressiveMethodModelResult.summary())
def test(self): mdata = sm.datasets.macrodata.load_pandas().data dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] from statsmodels.tsa.base.datetools import dates_from_str quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp', 'realcons', 'realinv']] mdata.index = pandas.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # print(type(data)) # print(data) model = VAR(data) results = model.fit(2) # print(results.summary()) gc_result = results.test_causality(['realgdp', 'realcons', 'realinv'], ['realgdp', 'realcons', 'realinv'], kind='f') print(gc_result.summary())
import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.tsa.api import VAR from scipy.signal import lfilter mdata = sm.datasets.macrodata.load().data mdata = mdata[["realgdp", "realcons", "realinv"]] names = mdata.dtype.names data = mdata.view((float, 3)) data = np.diff(np.log(data), axis=0) model = VAR(data) res = model.fit(2) res.plot_sample_acorr() irf = res.irf(10) irf.plot() plt.show() plt.savefig("image.png") res.plot_forecast(5) res.fevd().plot() plt.show() plt.savefig("image2.png")
data['datetime'] = pd.to_datetime(data['datetime'], format=format) data.set_index(pd.DatetimeIndex(data['datetime']), inplace=True) # Select variables for VAR model varData = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[:-50] test = data[['pm2.5','TEMP','PRES', 'Iws']].dropna()[-50:] # endVal = varData.loc["2014-01-04 00:00:00"] # varData = varData.diff(1) model = VAR(varData) # define the model and data # model.select_order() # uses information criteria to select # model order reg = model.fit(30) # order chosen based on BIC criterion # Forecasting fcast = reg.forecast(varData['2013-01-04':].values, steps = 50) def dediff(todaysVal, forecast): future = forecast for i in range(np.shape(forecast)[0]): if (i==0): future[i] = todaysVal + forecast[0] else: future[i] = future[i-1] + forecast[i]
import pandas mdata = ds.macrodata.load_pandas().data # prepare the dates index dates = mdata[['year', 'quarter']].astype(int).astype('S4') quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp','realcons','realinv']] mdata.index = pandas.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() model = VAR(data) est = model.fit(maxlags=2) def plot_input(): est.plot() def plot_acorr(): est.plot_acorr() def plot_irf(): est.irf().plot() def plot_irf_cum(): irf = est.irf() irf.plot_cum_effects() def plot_forecast():
def estimate_VAR(): df = load_external() d = load_es_uncertainty() df1 = load_eu_uncertainty() nd = d.join(df).join(df1) plot_index_comparison(nd) plot_eu_epu(nd) plot_cinco_elpais(nd) nd = transform_data(nd) plot_epu_gdp(nd) benchmark_subset = ['EPU','europe', 'fedea', 'inflation', 'differential'] nd['EPU'] = nd['policy'].diff(periods = 1) data = nd.reindex(columns=benchmark_subset) data = data.dropna() data.describe() model = VAR(data) results = model.fit(6) irf = results.irf(12) irf.plot(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12}) #irf.plot_cum_effects(orth=True, impulse='EPU', subplot_params = {'fontsize' : 12}) # cum_effects = irf.orth_cum_effects fedea_on_gdp = get_fedea_on_gdp() elasticity = -100*fedea_on_gdp*cum_effects[12,2,0] print 'Effects of a 1 sd uncertainty shock on gdp growth (negative): %0.3f%%' % elasticity print 'Inflation increases by %0.2f' % (100* cum_effects[12,3,0], ) print 'Bond spreads increase by %0.1f basis points' % (100* cum_effects[12,4,0], ) full_sset = ['ibex','vol','resid','europe', 'fedea', 'inflation', 'differential' ] def get_irf(nd, subset): ''' http://statsmodels.sourceforge.net/0.6.0/vector_ar.html ''' data = nd.reindex(columns=subset) data = data.dropna() data.describe() model = VAR(data) results = model.fit(6) irf = results.irf(12) cum_effects = irf.orth_cum_effects return cum_effects[12,2,0] for colname in colnames: nd['uncert'] = nd[colname] / nd.articles nd['uncert'] = nd['uncert'] / nd['uncert'].mean() * 100 nd['uncert'] = nd['uncert'].diff(periods = 1) subset = ['uncert','europe', 'fedea', 'inflation', 'differential' ] cum_effect= get_irf(nd, subset) print '**%s** | %d | %.04f' % (colname, nd[colname].sum(), 100*fedea_on_gdp*cum_effect) aa = d.mean()[colnames] plt.figure(6) h = plt.bar(range(len(aa)),aa,label = list(aa.index) ) plt.subplots_adjust(bottom=0.3) xticks_pos = [0.65*patch.get_width() + patch.get_xy()[0] for patch in h] plt.xticks(xticks_pos, list(aa.index), ha='right', rotation=45) plt.savefig(os.path.join(rootdir, 'figures','frequency_types.%s' % fig_fmt), format=fig_fmt)
import numpy as np import statsmodels.api as sm from statsmodels.tsa.api import VAR # some example data mdata = sm.datasets.macrodata.load().data mdata = mdata[['realgdp','realcons','realinv']] names = mdata.dtype.names data = mdata.view((float,3)) use_growthrate = False #True #False if use_growthrate: data = 100 * 4 * np.diff(np.log(data), axis=0) model = VAR(data, names=names) res = model.fit(4) nobs_all = data.shape[0] #in-sample 1-step ahead forecasts fc_in = np.array([np.squeeze(res.forecast(model.y[t-20:t], 1)) for t in range(nobs_all-6,nobs_all)]) print fc_in - res.fittedvalues[-6:] #out-of-sample 1-step ahead forecasts fc_out = np.array([np.squeeze(VAR(data[:t]).fit(2).forecast(data[t-20:t], 1)) for t in range(nobs_all-6,nobs_all)]) print fc_out - data[nobs_all-6:nobs_all] print fc_out - res.fittedvalues[-6:]
stat_df = df.diff().dropna() #get rid of columns that are zeros at the end , we just assume they will continue to be zeros for col_name in stat_df.columns.values: if stat_df[col_name][-1] == 0 and stat_df[col_name][-2] == 0:# and stat_df[col_name][-3] == 0: print col_name del stat_df[col_name] no_forecast.setdefault(m,[]).append(col_name) #print stat_df forecast_cols[m] = stat_df.columns.values #new_df = stat_df[['P17','P15','P16']] model = VAR(stat_df) maxlags = 3 try: results = model.fit(maxlags, ic='aic', verbose=True) except Exception,exc: maxlags = 1 results = model.fit(maxlags, ic='aic', verbose=True) #if m == 'M2': # import pdb # pdb.set_trace() #import pdb #pdb.set_trace() # results = model.fit(4) #print results.summary() lag_order = results.k_ar #print "lag_order\n" ,lag_order #print "stat_df.values[-log_order] ---\n", stat_df.values[-lag_order:] #print "----------------\n"