def test_varmax_with_intercept(self): ts_data = self.get_data_for_varmax() f_name='varmax_with_intercept.pmml' model = VARMAX(ts_data, order=(1,1)) result = model.fit() StatsmodelsToPmml(result, f_name, conf_int=[80,95]) self.assertEqual(os.path.isfile(f_name),True)
def test_varmax_without_intercept2(self): ts_data = self.get_data_for_varmax() f_name='varmax_without_intercept2.pmml' model = VARMAX(ts_data, order=(1,1), trend=None) result = model.fit() StatsmodelsToPmml(result, f_name) self.assertEqual(os.path.isfile(f_name),True)
def varma(self, df, host): pd.plotting.register_matplotlib_converters() df_diffed, no_diffs = Helper.diff_test(df) print(df_diffed) df_diffed.replace([np.inf, -np.inf], np.nan) cols = df_diffed.columns df_diffed = df_diffed.dropna() nobs = int(len(df_diffed) / 10) + 2 train = df_diffed[:-nobs] test = df_diffed[-nobs:] model = VARMAX(train, order=(2,2), trend='c') results = model.fit(maxiter=1000, disp=False) print(results.summary()) df_forecast = results.forecast(nobs) print(df_forecast) df_fixed = Helper.reverse_diff(df_forecast, df, nobs, no_diffs) print(df_fixed) for col in df.columns: print("-- RMSE --") print(rmse(test[col], df_fixed[col + '_forecast'])) print("-- Mean --") print(test[col].mean()) df[col].plot(legend=True) df_fixed[col + '_forecast'].plot(legend=True) plt.show()
def test_varmax_without_intercept(self): ts_data = self.statsmodels_data_helper.get_data_for_varmax() f_name = 'varmax_without_intercept.pmml' model = VARMAX(ts_data, order=(1, 1), trend=None) result = model.fit() ArimaToPMML(result, f_name) self.assertEqual(self.schema.is_valid(f_name), True)
def VARMAXfit(self, endog, exog, order=(2, 0), maxiter=1000): model = VARMAX(endog.dropna(), order=order, trend='n', exog=exog.dropna()) model_fit = model.fit(maxiter=maxiter, disp=False) return model_fit
def VARMAXforecastPlot(self, endog, train_fraction=0.66, steps=1, order=(2, 0), maxiter=1000, verbose=False, error_cov_type='diagonal', title='', titleFontSize=12, labelFontSize=10, figureSize=(20, 6), lineWidth=1.0): sns.set_theme() cleaned = endog.dropna() size = int(len(cleaned) * train_fraction) timestamp = cleaned.index.to_timestamp().values.tolist() train = cleaned[0:size].to_numpy().tolist() test = cleaned[size:len(cleaned)].to_numpy().tolist() history = [z for z in train] predictions = train interval = 10 for t in range(len(test)): model = VARMAX(history, order=order, error_cov_type=error_cov_type) model_fit = model.fit(maxiter=maxiter, disp=False) output = model_fit.forecast(steps) yhat = output[0].tolist() predictions.append(yhat) obs = test[t] history.append(obs) if (t % 10 == 0 and verbose): print('percent done: ' + str(t / len(test))) print('percent done: 1.00') if (len(endog.dropna().to_numpy().tolist()) != len(predictions)): print('List lengths do not match') col = 0 for val in endog: plt.subplots(figsize=figureSize) plt.title(title, fontsize=titleFontSize) plt.xlabel('Timestamp', fontsize=labelFontSize) plt.ylabel(endog[val].name, fontsize=labelFontSize) column = [row[col] for row in predictions] plt.plot(timestamp, column, color='red', linewidth=lineWidth, linestyle='--') col += 1 plt.plot(timestamp, endog[val].dropna().to_numpy().tolist(), linewidth=lineWidth) plt.legend(labels=('prediction', endog[val].name)) plt.show() return predictions
def fit_varmax(data, p, q, exog=None, summary=True): model = VARMAX(data, order=(p, q), exog=exog, initialization='approximate_diffuse') model_fit = model.fit(maxiter=50, disp=False) if summary: print(model_fit.summary()) return model_fit
def VARMAXvma(self, endog, order=(2, 0), error_cov_type='diagonal', maxiter=1000, disp=False): model = VARMAX(endog.dropna(), order=order, error_cov_type=error_cov_type) model_fit = model.fit(maxiter=maxiter, disp=disp) return model_fit
# outcome = Y.tail(predictions.shape[0]) # else: # outcome = Y # accuracy_matrix = outcome*predictions # accuracy_matrix.values # accuracy.append(np.sum(np.sum((accuracy_matrix > 0)))/accuracy_matrix.size) endog_Y = X.drop(columns=[ 'XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume', 'ETHbasevolume' ]) exog_X = X.drop( columns=['ETHreturn', 'XRPreturn', 'LTCreturn', 'DASHreturn', 'XMRreturn']) p = 1 model = VARMAX(endog=endog_Y, order=(p, 0), exog=exog_X) results = model.fit(maxiter=0) predictions = results.predict() predictions = (predictions.shift(-1)).dropna() if predictions.shape[0] != Y.shape[0]: outcome = ((endog_Y.tail(predictions.shape[0])).shift(-1)).dropna() else: outcome = (endog_Y.shift(-1)).dropna() accuracy_matrix = np.sign(outcome) * np.sign(predictions) accuracy_matrix.values accuracy = (np.sum(np.sum((accuracy_matrix > 0.0))) / accuracy_matrix.size) # turn into numpy array endog_Y_test = X_test.drop(columns=[ 'XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume',