def vecm(self):
     k_ar_diff = self.k_ar_diff
     from statsmodels.tsa.vector_ar.vecm import VECM
     coint_rank = self.neqs - 1
     model = VECM(self.y_t, k_ar_diff=k_ar_diff, deterministic=self.deterministic, coint_rank=coint_rank)
     res = model.fit()
     return self.get_clean_results(res)
def vecm_one_target(X, nbre_preds, p):
    """
    Predict an array X with vecm model
    X: np array where the first column repesent the target variable
    nbre_preds: number of predictions
    p: lag parameter
    """

    predictions = []

    for j in range(0, nbre_preds):

        X_train = X[0:(X.shape[0] - nbre_preds + j), :]
        try:
            model = VECM(X_train, k_ar_diff=p, deterministic="co")
            model = model.fit()
            yhat = model.predict(steps=1)
            predictions.append(yhat[0, 0])
        except:
            #print ("VECM failed for this variable, we will try a p= 1")
            try:
                model = VECM(X_train, k_ar_diff=1, deterministic="co")
                model = model.fit()
                yhat = model.predict(steps=1)
                predictions.append(yhat[0, 0])
            except:
                print("Still does not work")
                return pd.DataFrame()

    return pd.DataFrame(predictions)
Example #3
0
def vecm_estimate(df, maxlags, rank, deterministic="nc", report=False):
    result = VECM(df, k_ar_diff=maxlags, coint_rank=rank, deterministic=deterministic).fit()
    if report:
        print(f"Rank={result.coint_rank}")
        print(f"α={result.alpha}")
        print(f"β={result.beta}")
        print(f"γ={result.gamma}")
    return result
def predict_target(data, prediction_model, method, nb_predictors,
                   causality_graph):
    target_index = list(data.columns).index(target)

    if "Arima" in prediction_model:
        try:
            model = auto_arima(data.loc[:, target].values,
                               start_p=1,
                               start_q=1)
            predictions = model.fit_predict(data.loc[:, target].values,
                                            n_periods=5)
        except:
            print("VECM failed for this variable")
            predictions = [float('nan') for x in range(5)]
    else:
        if "PEHAR" in method:
            predictors_index = pehar_feature_selection(
                causality_graph, target_index)[0:nb_predictors]

        elif "GFSM":
            predictors_index = gfsm_feature_selection(causality_graph,
                                                      target_index,
                                                      nb_predictors)

        # Add target variable in the first column
        predictors_index.insert(0, target_index)

        # Construct data from the colmuns
        X_train = data.iloc[:, predictors_index].values

        # Fit the model
        try:
            model = VECM(X_train, k_ar_diff=lag, deterministic="ci").fit()
            # Make 5 predictions
            predictions = model.predict(steps=5)[:, 0]
        except:
            print("VECM falied for this variable")
            predictions = [float('nan') for x in range(5)]

    return predictions
Example #5
0
    def fit(self,model_state,endog_data: pd.DataFrame, exp_name: str, exog_data=None):
        from statsmodels.tsa.vector_ar.vecm import VECM
    
        # Hardcoded values to have consistency in models.
        train_pct = 0.9
        prediction_steps = 1
        total_samples = len(endog_data.index)
        training_sample_size = int(train_pct*total_samples)
        test_sample_size = total_samples - training_sample_size
        
        test_data = endog_data.iloc[training_sample_size:]
        num_endog_vars = len(endog_data.columns)
        predictions = np.empty((0,num_endog_vars), float)
        
        # Can predict <prediction_steps> into the future after fitting. 
        for _test_idx in range(test_sample_size):
            endog_train_data = endog_data.iloc[:training_sample_size+_test_idx]
            exog_train_data = None
            exog_test_data = None
            if exog_data is not None:
                exog_train_data = exog_data.iloc[:training_sample_size+_test_idx]
                exog_test_data = test_data[:_test_idx+1]

            vecm_model = VECM(endog_train_data,exog=exog_train_data)
            vecm_model_fit = vecm_model.fit()
            lag_order = vecm_model_fit.k_ar     # Unused
            predictions = np.append(predictions,vecm_model_fit.predict(prediction_steps,exog_fc=exog_test_data),axis=0)     # Confidence interval with <alpha>=0.05.
        fit_score = {'states':[],'r2':[]}
        for _state in endog_data.columns:
            y_obs = test_data[:test_sample_size][_state].to_numpy()
            y_pred = predictions[:,list(endog_data.columns).index(_state)]
            fit_score['states'].append(_state)
            fit_score['r2'].append(r2_score(y_obs,y_pred))

        store_exp_results(fit_score, exog_data is not None, exp_name)
        return vecm_model_fit, predictions, fit_score
            if joh_trace.lr1[i] > joh_trace.cvt[i, 1]:
                r = i + 1
        joh_trace.r = r

        return joh_trace

# loops through 1 to 6 lags of months
for i in range(1, 7): 
    # tests for cointegration at i lags
    joh_trace = johansen_trace(df_selected, i)
    print('Using the Trace Test, there are', joh_trace.r, '''cointegrating vectors at 
    %s lags between the series''' % i)
    print()


#Vector error corretion model (VECM)===========================================
# estimates the VECM on the closing prices with 6 lags, 1 cointegrating relationship, and
# a constant within the cointegration relationship
model_vecm = VECM(endog = df_selected, k_ar_diff = 6, coint_rank = 1, deterministic = 'ci')
model_vecm_fit = model_vecm.fit()
model_vecm_fit.summary()


#Impulse response function=====================================================
irf = model_vecm_fit.irf(24)
irf.plot(orth = False)


#Dynamic forecasting===========================================================
model_vecm_fit.plot_forecast(12, n_last_obs=60)
Example #7
0
       'sorghum_tons', 'rice_tons', 'groundnuts_tons']]
       
exog_coint=df[['NDVI_min', 'NDVI_avg', 'NDVI_max',
       'rainfall_min', 'rainfall_avg', 'rainfall_max', 'millet_ha', 'corn_ha',
       'sorghum_ha', 'rice_ha', 'groundnuts_ha']] 
       

exog=df[['LST_day', 'LST_night','population','millet_tons', 'corn_tons',
       'sorghum_tons', 'rice_tons', 'groundnuts_tons']]




#model
from statsmodels.tsa.vector_ar.vecm import VECM
vecm = VECM(endog = endog,exog=exog,exog_coint=exog_coint, k_ar_diff = 1, coint_rank = 5, deterministic ='cili')
vecm_fit = vecm.fit()



import pickle
with open('VECM_result.pkl', 'wb') as f:
  pickle.dump(vecm_fit ,f)





pip install git+https://github.com/ml-libs/mlserve.git

import mlserve
df_diff_1 = difference(df)
comparison_plot(title, df_diff_1, α.T, β, labels, [0.1, 0.8], plot)

# %%

sample_adf_test(df_diff_1)

# %%

result = VAR(df_diff_1).select_order(maxlags=15)
result.ics
result.selected_orders

# %%

result = VECM(df, k_ar_diff=1, coint_rank=1, deterministic="nc").fit()
result.coint_rank
α = result.alpha
β = result.beta.T
result.gamma

# %%

residual_adf_test(df, β, report=True)

# %%

title = "Trivariate VECM 1 Cointegrating Vector ACF-PCF"
plot = "vecm_analysis_acf_pcf_1"
max_lag = 9
acf_pcf_plot(title, df, max_lag, plot)
Example #9
0
# lag order selection
lag_order = select_order(data=consumer_df,
                         maxlags=10,
                         deterministic="ci",
                         seasons=12)
print(lag_order.summary())
print(lag_order)

# Cointegration rank
rank_test = select_coint_rank(consumer_df, 0, 2, method="trace", signif=0.05)
rank_test.rank
print(rank_test.summary())
print(rank_test)

# Parameter Estimation
model = VECM(consumer_df,
             deterministic="ci",
             seasons=12,
             k_ar_diff=lag_order.aic,
             coint_rank=rank_test.rank)
vecm_res = model.fit()
vecm_res.summary()

vecm_res.predict(steps=5)
vecm_res.predict(steps=5, alpha=0.05)
for text, vaĺues in zip(("forecast", "lower", "upper"),
                        vecm_res.predict(steps=5, alpha=0.05)):
    print(text + ":", vaĺues, sep="\n")

vecm_res.plot_forecast(steps=12, n_last_obs=6)
Example #10
0
for ind in Industry:
    print('Industry :', ind)
    sList = listedStock[listedStock['Industry'] == ind]['Symbol']
    for i in sList:
        for j in sList:
            if i != j and i > j:
                name = i + '_' + j
                pADF[name] = np.nan
                pHypo[name] = np.nan
                pBeta0[name] = np.nan
                pBeta1[name] = np.nan
                print(name)
                for k in range(len(stock) - 244):
                    if stock[[i, j]][k:k + 244].isna().sum().sum() == 0:
                        mdl = VECM(stock[[i, j]][k:k + 244],
                                   coint_rank=1,
                                   deterministic='co')
                        res = mdl.fit()
                        x = (res.beta[0] * stock[i][k:k + 244] +
                             res.beta[1] * stock[j][k:k + 244])
                        pBeta0[name][k + 244] = res.beta[0]
                        pBeta1[name][k + 244] = res.beta[1]
                        c = adf(x[:244], regression='c')[0]
                        pADF[name][k + 244] = c
                        pHypo[name][k + 244] = c <= -2.8741898504150574

pADF.drop(['TEMP'], axis=1, inplace=True)
pHypo.drop(['TEMP'], axis=1, inplace=True)
pBeta0.drop(['TEMP'], axis=1, inplace=True)
pBeta1.drop(['TEMP'], axis=1, inplace=True)
stock.drop(['TEMP'], axis=1, inplace=True)
Example #11
0
def analyze(df, start_time, end_time, symbols):
  file_name = "{}_{}_{}_coint_series".format(
    '_'.join(list(map(lambda s: s.replace('/', '_'), symbols))),
    start_time if isinstance(start_time, int) else start_time.replace(' ', '_'),
    end_time if isinstance(end_time, int) else end_time.replace(' ', '_')
  )

  results = {
    'symbols': str(symbols),
    'ts_start': start_time if isinstance(start_time, int) else string_to_timestamp(start_time),
    'ts_end': end_time if isinstance(end_time, int) else string_to_timestamp(end_time)
  }

  data = df.to_numpy()
  max_order = int(data.shape[0] / 8)
  select_order_res = select_order(data, max_order, deterministic="ci")
  # print(select_order_res.summary())
  selected_aic_order = select_order_res.selected_orders['aic']
  results['selected_order'] = selected_aic_order
  select_coint_rank_result = select_coint_rank(data, 0, selected_aic_order)
  print(select_coint_rank_result.summary())
  selected_coint_rank = 0
  for i in range(len(select_coint_rank_result.test_stats)):
    if select_coint_rank_result.test_stats[i] < select_coint_rank_result.crit_vals[i]:
      selected_coint_rank = i
      break
  results['selected_rank'] = selected_coint_rank
  if selected_coint_rank != 0:
    model = VECM(data, deterministic="ci", k_ar_diff=selected_aic_order, coint_rank=selected_coint_rank)
    res = model.fit()
    results['cointegrated_alpha'] = np.array2string(res.alpha.flatten())
    results['cointegrated_beta'] = np.array2string(res.beta.flatten())
    results['cointegrated_constant'] = res.det_coef_coint.flatten()[0]
    cointegrated_series = np.dot(data, res.beta).flatten() + results['cointegrated_constant']
    cointegrated_mean = np.mean(cointegrated_series)
    results['cointegrated_mean'] = cointegrated_mean
    cointegrated_std = np.std(cointegrated_series)
    results['cointegrated_std'] = cointegrated_std
    max_deviation = np.amax(np.absolute(cointegrated_series - cointegrated_mean))
    results['cointegrated_max_deviation'] = max_deviation

    adf_res = adfuller(cointegrated_series, maxlag=max_order, store=True, regresults=True)
    cointegrated_adf_p_value = adf_res[1]
    results['cointegrated_adf_p_value'] = cointegrated_adf_p_value
    cointegrated_adf_lag = adf_res[3].usedlag
    results['cointegrated_adf_lag'] = cointegrated_adf_lag
    cointegrated_half_life = -math.log(2) / adf_res[3].resols.params[0]
    results['cointegrated_half_life'] = cointegrated_half_life

    fig, ax = plt.subplots()
    ax.plot(df.index.to_numpy(), cointegrated_series)
    path = "figures/{}.png".format(file_name)
    results['cointegrated_series_img_path'] = path
    plt.savefig(path)
  with open("results/{}.txt".format(file_name), "w") as result_file:
    result_file.write("{}".format(results))
  return results

# from dataloader import load_data

# dataframe = load_data('2019-07-28', '2019-07-29', ['BTC', 'ETH'], [])
# print(analyze(dataframe, '2019-07-28', '2019-07-29', ['BTC', 'ETH']))
Example #12
0
 def get_cointegrated_beta(combined_ratio_series):
     model = VECM(combined_ratio_series).fit()
     return model.beta
Example #13
0
# In[67]:

rank2 = select_coint_rank(train_ecm,
                          det_order=1,
                          k_ar_diff=3,
                          method='maxeig',
                          signif=0.01)

print(rank2.summary())

# In[69]:

from statsmodels.tsa.vector_ar.vecm import VECM
# VECM
vecm = VECM(train_ecm, k_ar_diff=3, coint_rank=3, deterministic='ci')
"""estimates the VECM on the prices with 3 lags, 3 cointegrating relationship, and 
a constant within the cointegration relationship"""
vecm_fit = vecm.fit()
print(vecm_fit.summary())

#
# ## Checking residual auto-correlation

# In[70]:

from statsmodels.stats.stattools import durbin_watson
out = durbin_watson(vecm_fit.resid)
for col, val in zip(train_ecm.columns, out):
    print((col), ':', round(val, 2))
Example #14
0
# print("Max eigen stats are: {}".format(johansen_result.lr2)) # max eigen stat
# print("Max eigen stats critical values are: {}".format(johansen_result.cvm)) # max eigen critical values

# print("Eigen values: {}".format(johansen_result.eig))
# print("Eigen vectors: {}".format(johansen_result.evec))

# portfolio = np.dot(data, johansen_result.evec[0])

# fig, ax = plt.subplots()
# ax.plot(times, portfolio)
# plt.show()

from statsmodels.tsa.vector_ar.vecm import VECM, select_order, select_coint_rank
# print(select_order(data, 50, deterministic="co").summary())
# print(select_coint_rank(data, 0, 43).summary())
model = VECM(data, deterministic="co", k_ar_diff=1)
res = model.fit()
print("Alpha: {}".format(res.alpha))
print("Beta: {}".format(res.beta))  # cointegration vector
print(np.dot(res.alpha, np.transpose(res.beta)))
print("Gamma: {}".format(res.gamma))
print(res.sigma_u)
print(res.det_coef_coint)
print("Residual: {}".format(res.resid))
print(res.det_coef)

from statsmodels.tsa.stattools import adfuller
coint_series = np.dot(data, res.beta).reshape(9999, )
result = adfuller(coint_series)
print(result)
Example #15
0

# In[14]:


#print max eigen rank
print("max eigen cointegration rank: ", vec_rank2.rank)


# In[23]:


## fit model nc model
#k_ar_diff set to 3
#coint_rank set to 4
vecm = VECM(endog = data, k_ar_diff = 2, coint_rank =4, deterministic = 'nc')
vecm_fit = vecm.fit()


# In[24]:


#create plot for equilbirum exchange vs acutal exchange
nc_equilibrium = pd.DataFrame(vecm_fit.resid)[0].values + data['AEXCAUS'].iloc[3:].values
pd.DataFrame([pd.DataFrame(vecm_fit.resid)[0].values + data['AEXCAUS'].iloc[3:].values,
              data['AEXCAUS'].iloc[3:].values],
             index=['Coint 4 Equilibrium','Actuals'],
             columns=data['AEXCAUS'].iloc[3:].index)\
.T\
.plot(figsize=(10,7),
      title = 'CAD/USD FX Equilibrium vs Actuals Overtime',
Example #16
0
sliced_prices = list(
    map(
        lambda price_with_idx: price_with_idx[1][start_indices[price_with_idx[
            0]]:end_indices[price_with_idx[0]] + 1], enumerate(prices)))
data = np.asarray(
    list(
        map(lambda price: list(map(lambda p: p['close'], price)),
            sliced_prices)))
times = list(map(lambda p: p['time'], sliced_prices[0]))
for series in data:
    results = adfuller(series.reshape(len(series, )),
                       store=True,
                       regresults=True)
    print(results[0:3])
    print(results[3].usedlag)
    print(results[3].resols.summary())
    print(-math.log(2) / results[3].resols.params[0])

exit()

data_to_fit = data.transpose()
print(select_order(data_to_fit, 50, deterministic="co").selected_orders)
print(select_coint_rank(data_to_fit, 0, 15).test_stats)
# exit()
model = VECM(data_to_fit, deterministic="co", k_ar_diff=15)
res = model.fit()

ax.plot(times, np.dot(data_to_fit, res.beta))
plt.show()