def runCointegrationJohansen(self, johansenSignificant): vec_rank = vecm.select_coint_rank(self.data, det_order = 0 , k_ar_diff = 1, method = 'trace', signif=johansenSignificant) num_tests = min(vec_rank.rank, vec_rank.neqs-1) data = [[i, vec_rank.r_1[i], vec_rank.test_stats[i], vec_rank.crit_vals[i]] for i in range(num_tests + 1)] if (len(data) == data[0][1]): return True else: return False
'PCE', 'ConConf', 'Unempl', 'HourlyEarning', 'CCredit', 'RetSales', 'HouseStarts' ] consumer_df = consumer_df.resample('1M').mean() type(consumer_df) # lag order selection lag_order = select_order(data=consumer_df, maxlags=10, deterministic="ci", seasons=12) print(lag_order.summary()) print(lag_order) # Cointegration rank rank_test = select_coint_rank(consumer_df, 0, 2, method="trace", signif=0.05) rank_test.rank print(rank_test.summary()) print(rank_test) # Parameter Estimation model = VECM(consumer_df, deterministic="ci", seasons=12, k_ar_diff=lag_order.aic, coint_rank=rank_test.rank) vecm_res = model.fit() vecm_res.summary() vecm_res.predict(steps=5) vecm_res.predict(steps=5, alpha=0.05)
def anomaly_vecm(list_var, num_fut=5, desv_mse=2, train=True, name='model-name'): df_var = pd.DataFrame() for i in range(len(list_var)): df_var['var_{}'.format(i)] = list_var[i] # split tam_train = int(len(df_var) * 0.7) #print tam_train df_train = df_var[:tam_train] print('Tamanio train: {}'.format(df_train.shape)) df_test = df_var[tam_train:] lag_order = vecm.select_order(data=df_train, maxlags=10, deterministic="ci", seasons=0) rank_test = vecm.select_coint_rank(df_train, 0, 3, method="trace", signif=0.01) print("pasa") model = vecm.VECM(df_train, deterministic="ci", seasons=4, coint_rank=rank_test.rank) # =1 print("define") vecm_res = model.fit() futures = vecm_res.predict(steps=len(df_test)) # lag_order.summary() result = [] for list in futures: result.append(list[0]) engine = engine_output_creation('vecm') print("empieza") df_test['puntos'] = df_test.index df_test['valores'] = df_test[df_var.columns[0]] engine.alerts_creation(result, df_test) # # print("empieza") engine.metrics_generation(df_test[df_test.columns[0]].values, result) # print("empieza") engine.debug_creation(result, df_test) lag_order = vecm.select_order(data=df_var, maxlags=10, deterministic="ci", seasons=4) rank_test = vecm.select_coint_rank(df_var, 0, 3, method="trace", signif=0.01) print("pasa") model = vecm.VECM(df_var, deterministic="ci", seasons=4, coint_rank=rank_test.rank) # =1 print("define") vecm_res = model.fit() futures = vecm_res.predict(steps=num_fut) # lag_order.summary() result = [] for list in futures: result.append(list[0]) engine.forecast_creation(result, df_var.shape[0], num_fut) return (engine.engine_output)
def analyze(df, start_time, end_time, symbols): file_name = "{}_{}_{}_coint_series".format( '_'.join(list(map(lambda s: s.replace('/', '_'), symbols))), start_time if isinstance(start_time, int) else start_time.replace(' ', '_'), end_time if isinstance(end_time, int) else end_time.replace(' ', '_') ) results = { 'symbols': str(symbols), 'ts_start': start_time if isinstance(start_time, int) else string_to_timestamp(start_time), 'ts_end': end_time if isinstance(end_time, int) else string_to_timestamp(end_time) } data = df.to_numpy() max_order = int(data.shape[0] / 8) select_order_res = select_order(data, max_order, deterministic="ci") # print(select_order_res.summary()) selected_aic_order = select_order_res.selected_orders['aic'] results['selected_order'] = selected_aic_order select_coint_rank_result = select_coint_rank(data, 0, selected_aic_order) print(select_coint_rank_result.summary()) selected_coint_rank = 0 for i in range(len(select_coint_rank_result.test_stats)): if select_coint_rank_result.test_stats[i] < select_coint_rank_result.crit_vals[i]: selected_coint_rank = i break results['selected_rank'] = selected_coint_rank if selected_coint_rank != 0: model = VECM(data, deterministic="ci", k_ar_diff=selected_aic_order, coint_rank=selected_coint_rank) res = model.fit() results['cointegrated_alpha'] = np.array2string(res.alpha.flatten()) results['cointegrated_beta'] = np.array2string(res.beta.flatten()) results['cointegrated_constant'] = res.det_coef_coint.flatten()[0] cointegrated_series = np.dot(data, res.beta).flatten() + results['cointegrated_constant'] cointegrated_mean = np.mean(cointegrated_series) results['cointegrated_mean'] = cointegrated_mean cointegrated_std = np.std(cointegrated_series) results['cointegrated_std'] = cointegrated_std max_deviation = np.amax(np.absolute(cointegrated_series - cointegrated_mean)) results['cointegrated_max_deviation'] = max_deviation adf_res = adfuller(cointegrated_series, maxlag=max_order, store=True, regresults=True) cointegrated_adf_p_value = adf_res[1] results['cointegrated_adf_p_value'] = cointegrated_adf_p_value cointegrated_adf_lag = adf_res[3].usedlag results['cointegrated_adf_lag'] = cointegrated_adf_lag cointegrated_half_life = -math.log(2) / adf_res[3].resols.params[0] results['cointegrated_half_life'] = cointegrated_half_life fig, ax = plt.subplots() ax.plot(df.index.to_numpy(), cointegrated_series) path = "figures/{}.png".format(file_name) results['cointegrated_series_img_path'] = path plt.savefig(path) with open("results/{}.txt".format(file_name), "w") as result_file: result_file.write("{}".format(results)) return results # from dataloader import load_data # dataframe = load_data('2019-07-28', '2019-07-29', ['BTC', 'ETH'], []) # print(analyze(dataframe, '2019-07-28', '2019-07-29', ['BTC', 'ETH']))
plot_predict(var_predict, df_tran) forecast_evaluate(var_predict, df_tran) (var_predict - df_tran).mean() """# VECM orginal data""" from statsmodels.tsa.vector_ar import vecm steps = 12 train = data[:-steps] test = data[-steps:] vec_rank = vecm.select_coint_rank(train, det_order = 1, k_ar_diff = 1, method = 'trace', signif=0.01) print(vec_rank.summary()) from statsmodels.tsa.api import VECM vecm_model = VECM(endog = train, k_ar_diff = 1, coint_rank = 1, deterministic = 'ci') vecm_fit = vecm_model.fit() preds = vecm_fit.predict(steps=steps) vecm_predict = data.copy() start = len(train) for i in range(steps): for j in range(len(test.columns)): vecm_predict.iloc[start+i,j] = preds[i][j] plot_predict(vecm_predict, data) forecast_evaluate(vecm_predict, data)
# - shows the long-run equilibrium relationships of variables. # - includes a short-run dynamic adjustment mechanism that describes how variables adjust when they are out of equilibrium. # - uses adjustment coefficients to measure the forces that push the relationship towards long-run equilibrium. # # ### Cointegration rank of a VECM # In[19]: from statsmodels.tsa.vector_ar.vecm import select_coint_rank # In[68]: from statsmodels.tsa.vector_ar.vecm import select_coint_rank rank1 = select_coint_rank(train_ecm, det_order=1, k_ar_diff=3, method='trace', signif=0.01) print(rank1.summary()) # - first column in the table shows the rank which is the number of cointegrating relationships for the dataset, while the second reports the number of equations in total. # - λtrace statistics in the third column, together with the corresponding critical values. # - first row of the table tests the null hypothesis of at most one cointegrating vector, against the alternative hypothesis that the number of cointegrating equations is strictly larger than the number assumed under the null hypothesis, i.e., larger than one. # - test statistic of 254.9 considerably exceeds the critical value (117.0) and so the null of at most one cointegrating vector is rejected. # - test statistic (89.92) also exceeds the critical value (87.77), so the null of at most two cointegrating vectors is rejected at the 1% level # - test statistic (48.59) do not exceeds the critical value (62.52), so the null of at most three cointegrating vectors cannot be rejected at the 1% level # # Below test statistic on maximum eigen value: # # Maximum-eigenvalue statistic assumes a given number of r cointegrating relations under the null hypothesis and tests this against the alternative that there are r + 1 cointegrating equations.
# In[10]: from statsmodels.tsa.vector_ar.vecm import VECM, select_coint_rank, select_order #print the k_ar_diff order using aic for nc model nc_lags = select_order(data,maxlags=2,deterministic='nc') print(nc_lags.summary()) print("nc lags based on aic: ",nc_lags.aic) # In[11]: #trace rank #k_ar_diff set to 1 since all variables are unit root at diff = 1 vec_rank = select_coint_rank(data, det_order = -1, k_ar_diff = nc_lags.aic, method = 'trace', signif=0.05) print(vec_rank.summary()) # In[12]: #print trace rank print("trace cointegration rank: ", vec_rank.rank) # In[13]: #eigen rank #k_ar_diff set to 1 since all variables are unit root at diff = 1
sliced_prices = list( map( lambda price_with_idx: price_with_idx[1][start_indices[price_with_idx[ 0]]:end_indices[price_with_idx[0]] + 1], enumerate(prices))) data = np.asarray( list( map(lambda price: list(map(lambda p: p['close'], price)), sliced_prices))) times = list(map(lambda p: p['time'], sliced_prices[0])) for series in data: results = adfuller(series.reshape(len(series, )), store=True, regresults=True) print(results[0:3]) print(results[3].usedlag) print(results[3].resols.summary()) print(-math.log(2) / results[3].resols.params[0]) exit() data_to_fit = data.transpose() print(select_order(data_to_fit, 50, deterministic="co").selected_orders) print(select_coint_rank(data_to_fit, 0, 15).test_stats) # exit() model = VECM(data_to_fit, deterministic="co", k_ar_diff=15) res = model.fit() ax.plot(times, np.dot(data_to_fit, res.beta)) plt.show()
import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.api import VECM from statsmodels.tsa.vector_ar.vecm import select_coint_rank from matplotlib.ticker import FuncFormatter series = (pd.read_csv("../dados/series_log.csv", parse_dates=[0]).set_index("date").dropna()) # endogenas = series.loc[:, ["spread", "selic", "ibc", "inad"]] endogenas = series.loc[:, ["selic", "inad", "ibc", "spread"]] exogenas = series.loc[:, ["igp"]].fillna(0) print( select_coint_rank(endog=endogenas, det_order=1, k_ar_diff=2, method="trace").summary().as_latex_tabular()) model = VECM(endog=endogenas, exog=exogenas, deterministic="co", k_ar_diff=2, coint_rank=1, dates=series.index, freq="MS", seasons=12, first_season=3) vecm = model.fit() print(vecm.summary())