def test_dynamic_against_sarimax(): rs = np.random.RandomState(12345678) e = rs.standard_normal(1001) y = np.empty(1001) y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9 ** 2)) for i in range(1, 1001): y[i] = 0.9 * y[i - 1] + e[i] smod = SARIMAX(y, order=(1, 0, 0), trend="c") sres = smod.fit(disp=False) mod = AutoReg(y, 1) spred = sres.predict(900, 1100) pred = mod.predict(sres.params[:2], 900, 1100) assert_allclose(spred, pred) spred = sres.predict(900, 1100, dynamic=True) pred = mod.predict(sres.params[:2], 900, 1100, dynamic=True) assert_allclose(spred, pred) spred = sres.predict(900, 1100, dynamic=50) pred = mod.predict(sres.params[:2], 900, 1100, dynamic=50) assert_allclose(spred, pred)
def test_predict_exog(): rs = np.random.RandomState(12345678) e = rs.standard_normal(1001) y = np.empty(1001) x = rs.standard_normal((1001, 2)) y[:3] = e[:3] * np.sqrt(1.0 / (1 - 0.9 ** 2)) + x[:3].sum(1) for i in range(3, 1001): y[i] = 10 + 0.9 * y[i - 1] - 0.5 * y[i - 3] + e[i] + x[i].sum() ys = pd.Series(y, index=pd.date_range("1-1-1950", periods=1001, freq="M")) xdf = pd.DataFrame(x, columns=["x0", "x1"], index=ys.index) mod = AutoReg(ys, [1, 3], trend="c", exog=xdf) res = mod.fit() pred = res.predict(900) c = res.params.iloc[0] ar = res.params.iloc[1:3] ex = np.asarray(res.params.iloc[3:]) direct = c + ar[0] * y[899:-1] + ar[1] * y[897:-3] direct += ex[0] * x[900:, 0] + ex[1] * x[900:, 1] idx = pd.date_range(ys.index[900], periods=101, freq="M") direct = pd.Series(direct, index=idx) assert_series_equal(pred, direct) exog_oos = rs.standard_normal((100, 2)) pred = res.predict(900, 1100, dynamic=True, exog_oos=exog_oos) direct = np.zeros(201) direct[0] = c + ar[0] * y[899] + ar[1] * y[897] + x[900] @ ex direct[1] = c + ar[0] * direct[0] + ar[1] * y[898] + x[901] @ ex direct[2] = c + ar[0] * direct[1] + ar[1] * y[899] + x[902] @ ex for i in range(3, 201): direct[i] = c + ar[0] * direct[i - 1] + ar[1] * direct[i - 3] if 900 + i < x.shape[0]: direct[i] += x[900 + i] @ ex else: direct[i] += exog_oos[i - 101] @ ex direct = pd.Series( direct, index=pd.date_range(ys.index[900], periods=201, freq="M") ) assert_series_equal(pred, direct)
def test_predict_irregular_ar(): rs = np.random.RandomState(12345678) e = rs.standard_normal(1001) y = np.empty(1001) y[:3] = e[:3] * np.sqrt(1.0 / (1 - 0.9 ** 2)) for i in range(3, 1001): y[i] = 10 + 0.9 * y[i - 1] - 0.5 * y[i - 3] + e[i] ys = pd.Series( y, index=pd.date_range(dt.datetime(1950, 1, 1), periods=1001, freq="M") ) mod = AutoReg(ys, [1, 3], trend="ct") res = mod.fit() c = res.params.iloc[0] t = res.params.iloc[1] ar = np.asarray(res.params.iloc[2:]) pred = res.predict(900, 1100, True) direct = np.zeros(201) direct[0] = c + t * 901 + ar[0] * y[899] + ar[1] * y[897] direct[1] = c + t * 902 + ar[0] * direct[0] + ar[1] * y[898] direct[2] = c + t * 903 + ar[0] * direct[1] + ar[1] * y[899] for i in range(3, 201): direct[i] = ( c + t * (901 + i) + ar[0] * direct[i - 1] + ar[1] * direct[i - 3] ) direct = pd.Series( direct, index=pd.date_range(ys.index[900], periods=201, freq="M") ) assert_series_equal(pred, direct) pred = res.predict(900) direct = ( c + t * np.arange(901, 901 + 101) + ar[0] * y[899:-1] + ar[1] * y[897:-3] ) idx = pd.date_range(ys.index[900], periods=101, freq="M") direct = pd.Series(direct, index=idx) assert_series_equal(pred, direct)
def extrapolate_moments(mus0,fac,extrapolation_mode="1/n"): """Extrapolate moments""" if np.max(mus0.imag)>1e-4: raise # not implemented mus = mus0.real if extrapolation_mode=="plain": ftrans,ftransinv = no_transform() elif extrapolation_mode=="1/n": ftrans,ftransinv = power_transform(mus) elif extrapolation_mode=="power": ftrans,ftransinv = fit_power_transform(mus) mus = ftrans(mus) # scale the moments L = len(mus)//2 T = len(mus) L = T P = int(fac*T) # prediction train = mus[0:L].real # train data test = mus[L:T] # test data # model = AR(train).fit(ic="aic") # get the model lags = round(12*(len(train)/100.)**(1/4.)) model = AutoReg(train,lags=lags,trend="ct").fit(cov_type="HC1") # get the model # model = pm.auto_arima(train, start_p=1, start_q=1, # test='adf', # max_p=3, max_q=3, m=10, # start_P=0, seasonal=True, # d=None, D=1, trace=True, # error_action='ignore', # suppress_warnings=True, # stepwise=True) # pred = model.predict(n_periods=P-L) # prediction pred = model.predict(start=L,end=P-1) # prediction mus2 = np.zeros(P,dtype=np.complex) mus2[0:L] = mus[0:L] # initial data mus2[L:P] = pred[:] # predicted data mus2 = ftransinv(mus2) # transform back # print(extrapolation_mode,np.max(mus0),np.max(mus2)) return mus2
def get_autoregression(frame, metric, window, lags): # train-test split X = frame[f'{metric}'].values train, test = X[1:X.size - 100], X[X.size - 100:] # train autoregression window = window model = AutoReg(train, lags=lags) model_fit = model.fit() coef = model_fit.params # Walk forward over time steps in test history = train[train.size - window:] history = [history[i] for i in range(history.size)] preds = list() for t in range(test.size): length = len(history) lag = [history[i] for i in range(length - window, length)] yhat = coef[0] for d in range(window): yhat += coef[d + 1] * lag[window - d - 1] obs = test[t] preds.append(yhat) history.append(obs) print('predicted:', yhat, 'expected', obs) rmse = sqrt(mean_squared_error(test, preds)) print('Test RMSE:', rmse) # plot the results # plt.plot(test, label='Actual Observations') # plt.plot(preds, color='pink', label='Prediction') # plt.legend(loc="best") # plt.show() return None
def test_autoreg_smoke_plots(plot_data, close_figures): from matplotlib.figure import Figure mod = AutoReg( plot_data.endog, plot_data.lags, trend=plot_data.trend, seasonal=plot_data.seasonal, exog=plot_data.exog, hold_back=plot_data.hold_back, period=plot_data.period, missing=plot_data.missing, ) res = mod.fit() fig = res.plot_diagnostics() assert isinstance(fig, Figure) if plot_data.exog is None: fig = res.plot_predict(end=300) assert isinstance(fig, Figure) fig = res.plot_predict(end=300, alpha=None, in_sample=False) assert isinstance(fig, Figure) assert isinstance(res.summary(), Summary)
def fit(cdf, select_model): from sklearn.model_selection import TimeSeriesSplit from sklearn.metrics import mean_squared_error X = cdf['AverageTemperatureCelsius'].values tscv = TimeSeriesSplit(n_splits=5) rmse = [] for train_index, test_index in tscv.split(X): train, test = X[train_index], X[test_index] history = [x for x in train] predictions = list() for t in range(len(test)): if select_model == 'ARIMA': model = arima_mod(history, order=(5, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] elif select_model == 'AR': model = AutoReg(history, lags=10) model_fit = model.fit() yhat = model_fit.predict(len(history), len(history)) elif select_model == 'ES': model = exp_mod(history) model_fit = model.fit(optimized=True) output = model_fit.forecast() yhat = output[0] else: return predictions.append(yhat) obs = test[t] history.append(obs) # print('predicted=%f, expected=%f' % (yhat, obs)) error = mean_squared_error(test, predictions) # print('Test MSE: %.3f' % error) rmse.append(error) print("RMSE: %.3f" % np.mean(rmse))
def update_cards(n_intervals): array = generate_array() value = np.sum(array == "up") / array.size HISTORY[TODAY + timedelta(0, 3 * n_intervals)] = value gauge = generate_gauge(value) cards = [ dbc.Row( [dbc.Col(make_card(s, f"device {d}")) for d, s in enumerate(row)]) for row in array ] # FORECASTING forecast_fig = go.Figure().update_layout( title='Not enough datapoints for forecasting') if len(HISTORY) >= 5: try: series = pd.Series(HISTORY).sort_index() print(series) model = AutoReg(series, lags=2, old_names=False) model_fit = model.fit() start = series.index.max() end = start + timedelta(0, 12) pred_df = model_fit.predict(start=start, end=end) forecast_fig = px.line( pred_df, labels={ 'index': 'Future Timesteps', 'value': "Predicted %" }, title="Forecasting with Univariate Autoregressive Processes") forecast_fig.update_layout(showlegend=False) except Exception as e: print(e) return cards, gauge, forecast_fig
def train_model(df, N, desired_elo): X = df['rating'].values X = X[::-1] data = X X = difference(X) window_size = [1, 2, 5] model = AutoReg(X, lags = window_size) model_fit = model.fit() last_ob = data[len(data) - 1] # make prediction predictions = model_fit.predict(start=len(data), end=len(data)) # transform prediction yhat = predictions + last_ob df_data = pd.DataFrame() df_data['Actual Rating'] = data predicted_data = data # num of max games to predict count_games = 0 for _ in range(N): last_ob = predicted_data[len(predicted_data) - 1] # make prediction predictions = model_fit.predict(start=len(predicted_data), end=len(predicted_data), dynamic = False) # transform prediction yhat = predictions + last_ob predicted_data = np.append(predicted_data, int(yhat) ) if int(yhat) >= desired_elo: break count_games += 1 return count_games, predicted_data
def test_parameterless_autoreg(): data = gen_data(250, 0, False) mod = AutoReg(data.endog, 0, trend='n', seasonal=False, exog=None, old_names=False) res = mod.fit() for attr in dir(res): if attr.startswith('_'): continue # TODO if attr in ('predict', 'f_test', 't_test', 'initialize', 'load', 'remove_data', 'save', 't_test', 't_test_pairwise', 'wald_test', 'wald_test_terms'): continue attr = getattr(res, attr) if callable(attr): attr() else: assert isinstance(attr, object)
def test_predict_seasonal(): rs = np.random.RandomState(12345678) e = rs.standard_normal(1001) y = np.empty(1001) y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9**2)) effects = 10 * np.cos(np.arange(12) / 11 * 2 * np.pi) for i in range(1, 1001): y[i] = 10 + 0.9 * y[i - 1] + e[i] + effects[i % 12] ys = pd.Series(y, index=pd.date_range(dt.datetime(1950, 1, 1), periods=1001, freq="M")) mod = AutoReg(ys, 1, seasonal=True) res = mod.fit() c = res.params.iloc[0] seasons = np.zeros(12) seasons[1:] = res.params.iloc[1:-1] ar = res.params.iloc[-1] pred = res.predict(900, 1100, True) direct = np.zeros(201) direct[0] = y[899] * ar + c + seasons[900 % 12] for i in range(1, 201): direct[i] = direct[i - 1] * ar + c + seasons[(900 + i) % 12] direct = pd.Series(direct, index=pd.date_range(ys.index[900], periods=201, freq="M")) assert_series_equal(pred, direct) pred = res.predict(900, dynamic=False) direct = y[899:-1] * ar + c + seasons[np.arange(900, 1001) % 12] direct = pd.Series(direct, index=pd.date_range(ys.index[900], periods=101, freq="M")) assert_series_equal(pred, direct)
def AR_model(self, prediction_time_window, temp_df, name): # load dataset # temp_df = self.dataExtractor(lat, lon) # Date time temperature has two columns. The first column is the dates (01-01-2019 to 30-12-2019) and the second column # is the temperature. All these data are from one station. plt.clf() series = temp_df.set_index(['ds']).squeeze() # split dataset X = series.values train, test = X[1:len(X) - prediction_time_window], X[len(X) - prediction_time_window:] # train autoregression model = AutoReg(train, lags=prediction_time_window) model_fit = model.fit() # print('Coefficients: %s' % model_fit.params) # make predictions predictions = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=False) # for i in range(len(predictions)): # print('predicted=%f, expected=%f' % (predictions[i], test[i])) rmse = sqrt(mean_squared_error(test, predictions)) # print('Test RMSE: %.3f' % rmse) # plot results plt.plot(test, label='Test data') plt.plot(predictions, color='red', label='Predicted values') plt.ylabel('Temperature') plt.xlabel('Number of days in future') plt.legend() return plt.savefig('app/static/images/prediction/{0}.png'.format(name), dpi=50)
trainingModelFit = trainingModel.fit() print(trainingModelFit.summary()) r2Data = training - testing.mean() predictions = trainingModelFit.predict() plt.plot(predictions, color="green") plt.show() print( "The score after applying ARIMA feature engineering based on R^2 regression:" ) print(r2_score(r2Data, predictions), "\n") print("The R2 score subtracted from 100% accuracy gives:") print(100 - r2_score(r2Data, predictions), "\n") model = AutoReg(training, lags=1) model_fit = model.fit() prediction = model_fit.predict(len(testing), len(testing)) print("The results of applying Autoregression: ") print(prediction, "\n") model = ARMA(training, order=(2, 1)) model_fit = model.fit(disp=False) print("The results of applying Autoregressive Moving Average: ") prediction = model_fit.predict(len(testing), len(testing)) print(prediction, "\n") model = SARIMAX(training, order=(1, 1, 1), seasonal_order=(2, 2, 2, 2)) model_fit = model.fit(disp=False) print( "The results of applying Seasonal Autoregressive Integrated Moving-Average:"
# fit an AR model and manually save coefficients to file from pandas import read_csv from statsmodels.tsa.ar_model import AutoReg import numpy # create a difference transform of the dataset def difference(dataset): diff = list() for i in range(1, len(dataset)): value = dataset[i] - dataset[i - 1] diff.append(value) return numpy.array(diff) # load dataset series = read_csv('daily-total-female-births.csv', header=0, index_col=0, parse_dates=True, squeeze=True) X = difference(series.values) # fit model window_size = 6 model = AutoReg(X, lags=window_size) model_fit = model.fit() # save coefficients coef = model_fit.params numpy.save('man_model.npy', coef) # save lag lag = X[-window_size:] numpy.save('man_data.npy', lag) # save the last ob numpy.save('man_obs.npy', [series.values[-1]])
def prediksi_all(username): all_barangs = Barang.query.filter_by(username=username) result = barangs_schema.dump(all_barangs) listOfListData = [] for i in result: listData = [] many_detail_barang = DetailBarang.query.filter_by(id_barang=i['id']) all_prediksi = Prediksi.query.filter_by(id_barang=i['id']) for z in all_prediksi: db.session.delete(z) data = many_detail_barang_schema.dump(many_detail_barang) for j in data: listData.append(j['quantity']) listOfListData.append(listData) db.session.commit() first_prediction = [] second_prediction = [] information = Information.query.filter_by(username=username).first() information_result = information_schema.dump(information) for row in listOfListData: train, test = row[1:len(row)], row[len(row)-information_result['cycle']:] model = AutoReg(train, lags=10) model_fit = model.fit() predictions_one = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False) rmse_one = sqrt(mean_squared_error(test, predictions_one)) window = 10 model = AutoReg(train, lags=10) model_fit = model.fit() coef = model_fit.params history = train[len(train)-window:] history = [history[i] for i in range(len(history))] predictions_two = list() for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length-window,length)] yhat = coef[0] for d in range(window): yhat += coef[d+1] * lag[window-d-1] obs = test[t] predictions_two.append(yhat) history.append(obs) rmse_two = sqrt(mean_squared_error(test, predictions_two)) first_prediction.append(predictions_one) second_prediction.append(predictions_two) index = 0 for i in result: for y in first_prediction[index]: new_prediksi = Prediksi(i['id'], y) db.session.add(new_prediksi) for j in second_prediction[index]: prediksi_new = PrediksiNew(i['id'], j) db.session.add(prediksi_new) index = index + 1 db.session.commit() return "ok"
rates_frame.columns = [ 'time', 'Open', 'High', 'Low', 'Close', 'tick_volume', 'spread', 'real_volume' ] mpf.plot(rates_frame, type='candle') #%% # AR example # from statsmodels.tsa.ar_model import AR #op1 from statsmodels.tsa.ar_model import AutoReg from random import random import matplotlib.pyplot as plt # contrived dataset # fit model # model = AR(rates_frame.Close) #op1 rates_frame.index = pd.date_range(as2[0], periods=len(as2), freq='D') model = AutoReg(rates_frame.Close, lags=400, seasonal=True).fit() # model_fit = model.fit(maxlag=400)#op1 # make prediction # yhat = model.predict('23:55:00','23:59:00') yhat = model.predict(len(rates_frame.Close) - 10, len(rates_frame.Close) + 500) plt.plot(rates_frame.Close) plt.plot(yhat) # plt.show() # %% # %%
def setup_class(cls): data = sm.datasets.sunspots.load(as_pandas=False) cls.res1 = AutoReg(data.endog, lags=9, trend='n', old_names=False).fit() cls.res2 = results_ar.ARResultsOLS(constant=False)
plt.ylabel("Temperature", fontsize=20) plt.plot(test_set["AvgTemperature"], label="Original Data") plt.plot(predictions_MA, label="Predictions") fig6 = plt.savefig("/home/vaishnavi/Desktop/Final/Screenshots") plt.show() #plt.legend() #RMSE for MA model mse = mean_squared_error(predictions_MA, test_set["AvgTemperature"]) print(mse**0.5) #!pip install statsmodels --upgrade #AR model from statsmodels.tsa.ar_model import AutoReg model_AR = AutoReg(training_set["AvgTemperature"], lags=1000) model_fit_AR = model_AR.fit() predictions_AR = model_fit_AR.predict( training_set.shape[0], training_set.shape[0] + test_set.shape[0] - 1) import seaborn as sns fig6 = plt.figure(figsize=(15, 5)) plt.ylabel("Temperature (F)", fontsize=20) plt.plot(test_set["AvgTemperature"], label="Original Data") plt.plot(predictions_AR, label="Predictions") fig7 = plt.savefig("/home/vaishnavi/Desktop/Final/Screenshots") plt.show() #plt.legend() rmse = mean_squared_error(predictions_AR, test_set["AvgTemperature"]) print(rmse**0.5)
from sklearn.metrics import mean_squared_error from math import sqrt import warnings import csv from pandas import read_csv from datetime import datetime, timedelta import numpy as np # load dataset series = read_csv('deaths.csv', header=0, parse_dates=True, squeeze=True) print(len(series['date'])) X = (series['total'].values) train, test = X[1:len(X)-12], X[len(X)-12:] # train autoregression window = 3 model = AutoReg(train, lags=3) model_fit = model.fit() coef = model_fit.params # walk forward over time steps in test history = train[len(train)-window:] history = [history[i] for i in range(len(history))] predictions = list() for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length-window,length)] yhat = coef[0] for d in range(window): yhat += coef[d+1] * lag[window-d-1] obs = test[t] predictions.append(yhat) history.append(obs)
def test_autoreg_roots(): data = sunspots.load_pandas() ar = AutoReg(np.asarray(data.endog), lags=1) res = ar.fit() assert_almost_equal(res.roots, np.array([1.0 / res.params[-1]]))
def test_autoreg_plot_err(): y = np.random.standard_normal(100) mod = AutoReg(y, lags=[1, 3]) res = mod.fit() with pytest.raises(ValueError): res.plot_predict(0, end=50, in_sample=False)
def pred_forecast(sales): # values, dates, tmp = tuple_to_list(sales) csv_file, csv_columns, values = sales_to_csv(sales) try: with open(csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in values: writer.writerow(data) except IOError: print("I/O error") series = read_csv(csv_file, header=0, index_col=0) # print(series.head()) # series.plot() # pyplot.show() x = difference(series.values) size = int(len(x) * 0.66) train, test = x[0:size], x[size:] # train autoregression # AR window = 7 model_ar = AutoReg(train, lags=window) model_fit_ar = model_ar.fit() # save model to file model_fit_ar.save("manast_site/static/predictions/ar_model.pkl") # save the differenced dataset numpy.save("manast_site/static/predictions/ar_data.npy", x) # save the last ob numpy.save("manast_site/static/predictions/ar_obs.npy", [series.values[-1]]) # save coefficients coef = model_fit_ar.params numpy.save("manast_site/static/predictions/ar_man_model.npy", coef) # save lag lag = x[-window:] numpy.save("manast_site/static/predictions/ar_man_data.npy", lag) # save the last ob numpy.save("manast_site/static/predictions/ar_man_obs.npy", [series.values[-1]]) # load model model = AutoRegResults.load("manast_site/static/predictions/ar_model.pkl") data = numpy.load("manast_site/static/predictions/ar_data.npy") last_ob = numpy.load("manast_site/static/predictions/ar_obs.npy") # make prediction predictions = model.predict(start=len(data), end=len(data)) # transform prediction yhat_ar = predictions[0] + last_ob[0] rmse_ar = sqrt(mean_squared_error(test, predictions[:len(predictions) - 1])) direction = "manast_site/static/predictions/predictionAR.png" direction_ar = "predictions/predictionAR.png" pyplot.close() pyplot.plot(test, color='blue', label=_("Results")) pyplot.plot(predictions, color='red', label=_("Predictions")) pyplot.legend() pyplot.savefig(direction) # MA model_ma = ARMA(train, order=(0, 0)) model_fit_ma = model_ma.fit(disp=False) # save model to file model_fit_ma.save("manast_site/static/predictions/ma_model.pkl") # save the differenced dataset numpy.save("manast_site/static/predictions/ma_data.npy", x) # save the last ob numpy.save("manast_site/static/predictions/ma_obs.npy", [series.values[-1]]) # save coefficients coef = model_fit_ma.params numpy.save("manast_site/static/predictions/ma_man_model.npy", coef) # save lag lag = x[-window:] numpy.save("manast_site/static/predictions/ma_man_data.npy", lag) # save the last ob numpy.save("manast_site/static/predictions/ma_man_obs.npy", [series.values[-1]]) # load model model = AutoRegResults.load("manast_site/static/predictions/ma_model.pkl") data = numpy.load("manast_site/static/predictions/ma_data.npy") last_ob = numpy.load("manast_site/static/predictions/ma_obs.npy") # make prediction predictions = model.predict(start=len(data), end=len(data)) # transform prediction yhat_ma = predictions[0] + last_ob[0] rmse_ma = sqrt(mean_squared_error(test, predictions[:len(predictions) - 1])) # ARMA model_arma = ARMA(train, order=(window, 0)) model_fit_arma = model_arma.fit(disp=False) # save model to file model_fit_arma.save("manast_site/static/predictions/arma_model.pkl") # save the differenced dataset numpy.save("manast_site/static/predictions/arma_data.npy", x) # save the last ob numpy.save("manast_site/static/predictions/arma_obs.npy", [series.values[-1]]) # save coefficients coef = model_fit_arma.params numpy.save("manast_site/static/predictions/arma_man_model.npy", coef) # save lag lag = x[-window:] numpy.save("manast_site/static/predictions/arma_man_data.npy", lag) # save the last ob numpy.save("manast_site/static/predictions/arma_man_obs.npy", [series.values[-1]]) # load model model = AutoRegResults.load( "manast_site/static/predictions/arma_model.pkl") data = numpy.load("manast_site/static/predictions/arma_data.npy") last_ob = numpy.load("manast_site/static/predictions/arma_obs.npy") # make prediction predictions = model.predict(start=len(data), end=len(data)) # transform prediction yhat_arma = predictions[0] + last_ob[0] rmse_arma = sqrt( mean_squared_error(test, predictions[:len(predictions) - 1])) prev_week = [] error_prev_week = 0 actual_week = [] for v in range(6, len(series.values)): prev_week.append(float(series.values[v - 7])) actual_week.append(float(series.values[v])) error = float(series.values[v]) - float(series.values[v - 7]) error_prev_week += abs(error) epd_week = error_prev_week / (len(series.values) - 7) # print(epd_week) return direction_ar, yhat_ar, rmse_ar, yhat_ma, rmse_ma, yhat_arma, rmse_arma, prev_week, actual_week, error_prev_week, epd_week
plt.plot(x, y) # cheack for the correlation between the variables to determine the order of the from pandas.plotting import autocorrelation_plot autocorrelation_plot(sat1_x['x'] - sat1_x['x_sim']) from statsmodels.tsa.ar_model import AutoReg from sklearn.metrics import mean_squared_error from math import sqrt # load dataset X = (sat1_x['x'] - sat1_x['x_sim']).values train, test = X[1:len(X) - 100], X[len(X) - 100:] # train autoregression model = AutoReg(train, lags=100) model_fit = model.fit() print('Coefficients: %s' % model_fit.params) # make predictions predictions = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=False) for i in range(len(predictions)): print('predicted=%f, expected=%f' % (predictions[i], test[i])) rmse = sqrt(mean_squared_error(test, predictions)) print('Test RMSE: %.3f' % rmse) # plot results pyplot.plot(test) pyplot.plot(predictions, color='red') pyplot.show()
# create a difference transform of the dataset def difference(dataset): diff = list() for i in range(1, len(dataset)): value = dataset[i] - dataset[i - 1] diff.append(value) return numpy.array(diff) # load dataset #series = read_csv('petrol_prices.csv', header=0, index_col=0, parse_dates=True, squeeze=True) X = difference(series.values) # fit model model = AutoReg(X, lags=6) model_fit = model.fit() # save model to file model_fit.save('petrol_model.pkl') # save the differenced dataset numpy.save('petrol_data.npy', X) # save the last ob numpy.save('petrol_obs.npy', [series.values[-1]]) # load AR model from file and make a one-step prediction # load model model = AutoRegResults.load('petrol_model.pkl') data = numpy.load('petrol_data.npy') last_ob = numpy.load('petrol_obs.npy') # make prediction
def test_autoreg_roots(): data = sm.datasets.sunspots.load_pandas() ar = AutoReg(np.asarray(data.endog), lags=1, old_names=False) res = ar.fit() assert_almost_equal(res.roots, np.array([1. / res.params[-1]]))
#AutoRegression: The method is suitable for univariate time series without trend and seasonal components. from statsmodels.tsa.ar_model import AutoReg from random import random # contrived dataset data = [x + random() for x in range(1, 100)] # fit model model = AutoReg(data, lags=1) model_fit = model.fit() # make prediction yhat = model_fit.predict(len(data), len(data)) print(yhat) #MovingAverage: The method is suitable for univariate time series without trend and seasonal components. from statsmodels.tsa.arima.model import ARIMA from random import random # contrived dataset data = [x + random() for x in range(1, 100)] # fit model model = ARIMA(data, order=(0, 0, 1)) model_fit = model.fit() # make prediction yhat = model_fit.predict(len(data), len(data)) print(yhat) #AutoRegressiveMovingAverage: The method is suitable for univariate time series without trend and seasonal components. # ARMA example from statsmodels.tsa.arima.model import ARIMA from random import random # contrived dataset
def setup_class(cls): data = sm.datasets.sunspots.load(as_pandas=True) data.endog.index = list(range(len(data.endog))) cls.res1 = AutoReg(data.endog, lags=9, old_names=False).fit() cls.res2 = results_ar.ARResultsOLS(constant=True)
def fit_ar(train_data: InputData, params): return AutoReg(train_data.target, **params, exog=train_data.features).fit()
y_full = train['temp'] m_full = LinearRegression() m_full.fit(X_full, y_full) train['full_model'] = m_full.predict(X_full) print(f'Training-Score (Manual AR): {round(m_full.score(X_full, y_full),3)}') '''Cross-Validation''' time_series_split = TimeSeriesSplit(n_splits=5) splits = time_series_split.split(X_full, y_full) cv_manual_ar = cross_val_score(estimator=m_full, X=X_full, y=y_full, cv=splits) print(f'CV-Score (Manual AR): {round(cv_manual_ar.mean(),3)}') '''AutoRegressive Model - Statsmodels (on data taking into account trend and seasonality)''' ar_model = AutoReg(y_season, lags=3, exog=X_season).fit() #print(ar_model.summary()) prediction_ar = ar_model.predict() '''ARIMA Model - Statsmodels (on data taking into account trend and seasonality) - very slow!!''' #arima_model = ARIMA(y_season, order=(1,0,1), exog=X_season).fit() #print(arima_model.summary()) #prediction_arima = arima_model.predict() '''ARIMA Model - only on remainder ''' arima_model = ARIMA(remainder, order=(2, 0, 2), freq='D').fit() prediction_arima = arima_model.predict() prediction_arima.name = 'Arima_lags' # Use prediction of ARIMA Model as feature(includes lags2 , MA 2) for LinearRegression X_arima = X_season.join(prediction_arima)
#loading the dataset import pandas as pd import numpy as np import matplotlib.pyplot as plt train = pd.read_csv("train_data.csv") test = pd.read_csv("Test_data.csv") # AUTO-REGRESSIVE MODEL from statsmodels.tsa.ar_model import AutoReg # fit model model = AutoReg(train["Close_Value"], lags=1) model_fit = model.fit() # make prediction yhat3 = model_fit.predict(1, 2548) print(yhat3) print(model_fit.summary()) print("BIC: ", model_fit.bic) mse = np.square(np.subtract(test["Close_Value"], yhat3)).mean() print("MSE: ", mse) #plot x = list(range(len(test))) plt.plot(x, test["Close_Value"], c='blue') plt.plot(x, yhat3, c='green') plt.legend() plt.show() # MOVING AVERAGE from statsmodels.tsa.arima_model import ARMA # fit model model = ARMA(train["Close_Value"], order=(0, 1))