def vecm(self): k_ar_diff = self.k_ar_diff from statsmodels.tsa.vector_ar.vecm import VECM coint_rank = self.neqs - 1 model = VECM(self.y_t, k_ar_diff=k_ar_diff, deterministic=self.deterministic, coint_rank=coint_rank) res = model.fit() return self.get_clean_results(res)
def vecm_one_target(X, nbre_preds, p): """ Predict an array X with vecm model X: np array where the first column repesent the target variable nbre_preds: number of predictions p: lag parameter """ predictions = [] for j in range(0, nbre_preds): X_train = X[0:(X.shape[0] - nbre_preds + j), :] try: model = VECM(X_train, k_ar_diff=p, deterministic="co") model = model.fit() yhat = model.predict(steps=1) predictions.append(yhat[0, 0]) except: #print ("VECM failed for this variable, we will try a p= 1") try: model = VECM(X_train, k_ar_diff=1, deterministic="co") model = model.fit() yhat = model.predict(steps=1) predictions.append(yhat[0, 0]) except: print("Still does not work") return pd.DataFrame() return pd.DataFrame(predictions)
def vecm_estimate(df, maxlags, rank, deterministic="nc", report=False): result = VECM(df, k_ar_diff=maxlags, coint_rank=rank, deterministic=deterministic).fit() if report: print(f"Rank={result.coint_rank}") print(f"α={result.alpha}") print(f"β={result.beta}") print(f"γ={result.gamma}") return result
def predict_target(data, prediction_model, method, nb_predictors, causality_graph): target_index = list(data.columns).index(target) if "Arima" in prediction_model: try: model = auto_arima(data.loc[:, target].values, start_p=1, start_q=1) predictions = model.fit_predict(data.loc[:, target].values, n_periods=5) except: print("VECM failed for this variable") predictions = [float('nan') for x in range(5)] else: if "PEHAR" in method: predictors_index = pehar_feature_selection( causality_graph, target_index)[0:nb_predictors] elif "GFSM": predictors_index = gfsm_feature_selection(causality_graph, target_index, nb_predictors) # Add target variable in the first column predictors_index.insert(0, target_index) # Construct data from the colmuns X_train = data.iloc[:, predictors_index].values # Fit the model try: model = VECM(X_train, k_ar_diff=lag, deterministic="ci").fit() # Make 5 predictions predictions = model.predict(steps=5)[:, 0] except: print("VECM falied for this variable") predictions = [float('nan') for x in range(5)] return predictions
def fit(self,model_state,endog_data: pd.DataFrame, exp_name: str, exog_data=None): from statsmodels.tsa.vector_ar.vecm import VECM # Hardcoded values to have consistency in models. train_pct = 0.9 prediction_steps = 1 total_samples = len(endog_data.index) training_sample_size = int(train_pct*total_samples) test_sample_size = total_samples - training_sample_size test_data = endog_data.iloc[training_sample_size:] num_endog_vars = len(endog_data.columns) predictions = np.empty((0,num_endog_vars), float) # Can predict <prediction_steps> into the future after fitting. for _test_idx in range(test_sample_size): endog_train_data = endog_data.iloc[:training_sample_size+_test_idx] exog_train_data = None exog_test_data = None if exog_data is not None: exog_train_data = exog_data.iloc[:training_sample_size+_test_idx] exog_test_data = test_data[:_test_idx+1] vecm_model = VECM(endog_train_data,exog=exog_train_data) vecm_model_fit = vecm_model.fit() lag_order = vecm_model_fit.k_ar # Unused predictions = np.append(predictions,vecm_model_fit.predict(prediction_steps,exog_fc=exog_test_data),axis=0) # Confidence interval with <alpha>=0.05. fit_score = {'states':[],'r2':[]} for _state in endog_data.columns: y_obs = test_data[:test_sample_size][_state].to_numpy() y_pred = predictions[:,list(endog_data.columns).index(_state)] fit_score['states'].append(_state) fit_score['r2'].append(r2_score(y_obs,y_pred)) store_exp_results(fit_score, exog_data is not None, exp_name) return vecm_model_fit, predictions, fit_score
if joh_trace.lr1[i] > joh_trace.cvt[i, 1]: r = i + 1 joh_trace.r = r return joh_trace # loops through 1 to 6 lags of months for i in range(1, 7): # tests for cointegration at i lags joh_trace = johansen_trace(df_selected, i) print('Using the Trace Test, there are', joh_trace.r, '''cointegrating vectors at %s lags between the series''' % i) print() #Vector error corretion model (VECM)=========================================== # estimates the VECM on the closing prices with 6 lags, 1 cointegrating relationship, and # a constant within the cointegration relationship model_vecm = VECM(endog = df_selected, k_ar_diff = 6, coint_rank = 1, deterministic = 'ci') model_vecm_fit = model_vecm.fit() model_vecm_fit.summary() #Impulse response function===================================================== irf = model_vecm_fit.irf(24) irf.plot(orth = False) #Dynamic forecasting=========================================================== model_vecm_fit.plot_forecast(12, n_last_obs=60)
'sorghum_tons', 'rice_tons', 'groundnuts_tons']] exog_coint=df[['NDVI_min', 'NDVI_avg', 'NDVI_max', 'rainfall_min', 'rainfall_avg', 'rainfall_max', 'millet_ha', 'corn_ha', 'sorghum_ha', 'rice_ha', 'groundnuts_ha']] exog=df[['LST_day', 'LST_night','population','millet_tons', 'corn_tons', 'sorghum_tons', 'rice_tons', 'groundnuts_tons']] #model from statsmodels.tsa.vector_ar.vecm import VECM vecm = VECM(endog = endog,exog=exog,exog_coint=exog_coint, k_ar_diff = 1, coint_rank = 5, deterministic ='cili') vecm_fit = vecm.fit() import pickle with open('VECM_result.pkl', 'wb') as f: pickle.dump(vecm_fit ,f) pip install git+https://github.com/ml-libs/mlserve.git import mlserve
df_diff_1 = difference(df) comparison_plot(title, df_diff_1, α.T, β, labels, [0.1, 0.8], plot) # %% sample_adf_test(df_diff_1) # %% result = VAR(df_diff_1).select_order(maxlags=15) result.ics result.selected_orders # %% result = VECM(df, k_ar_diff=1, coint_rank=1, deterministic="nc").fit() result.coint_rank α = result.alpha β = result.beta.T result.gamma # %% residual_adf_test(df, β, report=True) # %% title = "Trivariate VECM 1 Cointegrating Vector ACF-PCF" plot = "vecm_analysis_acf_pcf_1" max_lag = 9 acf_pcf_plot(title, df, max_lag, plot)
# lag order selection lag_order = select_order(data=consumer_df, maxlags=10, deterministic="ci", seasons=12) print(lag_order.summary()) print(lag_order) # Cointegration rank rank_test = select_coint_rank(consumer_df, 0, 2, method="trace", signif=0.05) rank_test.rank print(rank_test.summary()) print(rank_test) # Parameter Estimation model = VECM(consumer_df, deterministic="ci", seasons=12, k_ar_diff=lag_order.aic, coint_rank=rank_test.rank) vecm_res = model.fit() vecm_res.summary() vecm_res.predict(steps=5) vecm_res.predict(steps=5, alpha=0.05) for text, vaĺues in zip(("forecast", "lower", "upper"), vecm_res.predict(steps=5, alpha=0.05)): print(text + ":", vaĺues, sep="\n") vecm_res.plot_forecast(steps=12, n_last_obs=6)
for ind in Industry: print('Industry :', ind) sList = listedStock[listedStock['Industry'] == ind]['Symbol'] for i in sList: for j in sList: if i != j and i > j: name = i + '_' + j pADF[name] = np.nan pHypo[name] = np.nan pBeta0[name] = np.nan pBeta1[name] = np.nan print(name) for k in range(len(stock) - 244): if stock[[i, j]][k:k + 244].isna().sum().sum() == 0: mdl = VECM(stock[[i, j]][k:k + 244], coint_rank=1, deterministic='co') res = mdl.fit() x = (res.beta[0] * stock[i][k:k + 244] + res.beta[1] * stock[j][k:k + 244]) pBeta0[name][k + 244] = res.beta[0] pBeta1[name][k + 244] = res.beta[1] c = adf(x[:244], regression='c')[0] pADF[name][k + 244] = c pHypo[name][k + 244] = c <= -2.8741898504150574 pADF.drop(['TEMP'], axis=1, inplace=True) pHypo.drop(['TEMP'], axis=1, inplace=True) pBeta0.drop(['TEMP'], axis=1, inplace=True) pBeta1.drop(['TEMP'], axis=1, inplace=True) stock.drop(['TEMP'], axis=1, inplace=True)
def analyze(df, start_time, end_time, symbols): file_name = "{}_{}_{}_coint_series".format( '_'.join(list(map(lambda s: s.replace('/', '_'), symbols))), start_time if isinstance(start_time, int) else start_time.replace(' ', '_'), end_time if isinstance(end_time, int) else end_time.replace(' ', '_') ) results = { 'symbols': str(symbols), 'ts_start': start_time if isinstance(start_time, int) else string_to_timestamp(start_time), 'ts_end': end_time if isinstance(end_time, int) else string_to_timestamp(end_time) } data = df.to_numpy() max_order = int(data.shape[0] / 8) select_order_res = select_order(data, max_order, deterministic="ci") # print(select_order_res.summary()) selected_aic_order = select_order_res.selected_orders['aic'] results['selected_order'] = selected_aic_order select_coint_rank_result = select_coint_rank(data, 0, selected_aic_order) print(select_coint_rank_result.summary()) selected_coint_rank = 0 for i in range(len(select_coint_rank_result.test_stats)): if select_coint_rank_result.test_stats[i] < select_coint_rank_result.crit_vals[i]: selected_coint_rank = i break results['selected_rank'] = selected_coint_rank if selected_coint_rank != 0: model = VECM(data, deterministic="ci", k_ar_diff=selected_aic_order, coint_rank=selected_coint_rank) res = model.fit() results['cointegrated_alpha'] = np.array2string(res.alpha.flatten()) results['cointegrated_beta'] = np.array2string(res.beta.flatten()) results['cointegrated_constant'] = res.det_coef_coint.flatten()[0] cointegrated_series = np.dot(data, res.beta).flatten() + results['cointegrated_constant'] cointegrated_mean = np.mean(cointegrated_series) results['cointegrated_mean'] = cointegrated_mean cointegrated_std = np.std(cointegrated_series) results['cointegrated_std'] = cointegrated_std max_deviation = np.amax(np.absolute(cointegrated_series - cointegrated_mean)) results['cointegrated_max_deviation'] = max_deviation adf_res = adfuller(cointegrated_series, maxlag=max_order, store=True, regresults=True) cointegrated_adf_p_value = adf_res[1] results['cointegrated_adf_p_value'] = cointegrated_adf_p_value cointegrated_adf_lag = adf_res[3].usedlag results['cointegrated_adf_lag'] = cointegrated_adf_lag cointegrated_half_life = -math.log(2) / adf_res[3].resols.params[0] results['cointegrated_half_life'] = cointegrated_half_life fig, ax = plt.subplots() ax.plot(df.index.to_numpy(), cointegrated_series) path = "figures/{}.png".format(file_name) results['cointegrated_series_img_path'] = path plt.savefig(path) with open("results/{}.txt".format(file_name), "w") as result_file: result_file.write("{}".format(results)) return results # from dataloader import load_data # dataframe = load_data('2019-07-28', '2019-07-29', ['BTC', 'ETH'], []) # print(analyze(dataframe, '2019-07-28', '2019-07-29', ['BTC', 'ETH']))
def get_cointegrated_beta(combined_ratio_series): model = VECM(combined_ratio_series).fit() return model.beta
# In[67]: rank2 = select_coint_rank(train_ecm, det_order=1, k_ar_diff=3, method='maxeig', signif=0.01) print(rank2.summary()) # In[69]: from statsmodels.tsa.vector_ar.vecm import VECM # VECM vecm = VECM(train_ecm, k_ar_diff=3, coint_rank=3, deterministic='ci') """estimates the VECM on the prices with 3 lags, 3 cointegrating relationship, and a constant within the cointegration relationship""" vecm_fit = vecm.fit() print(vecm_fit.summary()) # # ## Checking residual auto-correlation # In[70]: from statsmodels.stats.stattools import durbin_watson out = durbin_watson(vecm_fit.resid) for col, val in zip(train_ecm.columns, out): print((col), ':', round(val, 2))
# print("Max eigen stats are: {}".format(johansen_result.lr2)) # max eigen stat # print("Max eigen stats critical values are: {}".format(johansen_result.cvm)) # max eigen critical values # print("Eigen values: {}".format(johansen_result.eig)) # print("Eigen vectors: {}".format(johansen_result.evec)) # portfolio = np.dot(data, johansen_result.evec[0]) # fig, ax = plt.subplots() # ax.plot(times, portfolio) # plt.show() from statsmodels.tsa.vector_ar.vecm import VECM, select_order, select_coint_rank # print(select_order(data, 50, deterministic="co").summary()) # print(select_coint_rank(data, 0, 43).summary()) model = VECM(data, deterministic="co", k_ar_diff=1) res = model.fit() print("Alpha: {}".format(res.alpha)) print("Beta: {}".format(res.beta)) # cointegration vector print(np.dot(res.alpha, np.transpose(res.beta))) print("Gamma: {}".format(res.gamma)) print(res.sigma_u) print(res.det_coef_coint) print("Residual: {}".format(res.resid)) print(res.det_coef) from statsmodels.tsa.stattools import adfuller coint_series = np.dot(data, res.beta).reshape(9999, ) result = adfuller(coint_series) print(result)
# In[14]: #print max eigen rank print("max eigen cointegration rank: ", vec_rank2.rank) # In[23]: ## fit model nc model #k_ar_diff set to 3 #coint_rank set to 4 vecm = VECM(endog = data, k_ar_diff = 2, coint_rank =4, deterministic = 'nc') vecm_fit = vecm.fit() # In[24]: #create plot for equilbirum exchange vs acutal exchange nc_equilibrium = pd.DataFrame(vecm_fit.resid)[0].values + data['AEXCAUS'].iloc[3:].values pd.DataFrame([pd.DataFrame(vecm_fit.resid)[0].values + data['AEXCAUS'].iloc[3:].values, data['AEXCAUS'].iloc[3:].values], index=['Coint 4 Equilibrium','Actuals'], columns=data['AEXCAUS'].iloc[3:].index)\ .T\ .plot(figsize=(10,7), title = 'CAD/USD FX Equilibrium vs Actuals Overtime',
sliced_prices = list( map( lambda price_with_idx: price_with_idx[1][start_indices[price_with_idx[ 0]]:end_indices[price_with_idx[0]] + 1], enumerate(prices))) data = np.asarray( list( map(lambda price: list(map(lambda p: p['close'], price)), sliced_prices))) times = list(map(lambda p: p['time'], sliced_prices[0])) for series in data: results = adfuller(series.reshape(len(series, )), store=True, regresults=True) print(results[0:3]) print(results[3].usedlag) print(results[3].resols.summary()) print(-math.log(2) / results[3].resols.params[0]) exit() data_to_fit = data.transpose() print(select_order(data_to_fit, 50, deterministic="co").selected_orders) print(select_coint_rank(data_to_fit, 0, 15).test_stats) # exit() model = VECM(data_to_fit, deterministic="co", k_ar_diff=15) res = model.fit() ax.plot(times, np.dot(data_to_fit, res.beta)) plt.show()