def ARIMA_forecasting(): if normalize_data: substring = 'normalized_'+PH+'min' else: substring = PH+'min' if dataset == 'oaps': print('Getting data from ', data_directory + seg + '\n') unpickled_train_data = unpickle_data(data_directory + seg + 'windowed_train_' + substring + '.pickle') #e.g. windowed_train_normalized_60min.pickle unpickled_test_data = unpickle_data(data_directory + seg + 'windowed_test_' + substring + '.pickle') elif dataset == 'ohio': unpickled_train_data = unpickle_data(data_directory + 'OhioT1DM-training/imputed/'+'windowed_' + substring + '.pickle') #e.g. windowed_normalized_60min.pickle unpickled_test_data = unpickle_data(data_directory + 'OhioT1DM-testing/imputed/'+'windowed_' + substring + '.pickle') subjs = list(unpickled_train_data.keys()) random.shuffle(subjs) testScores = list() subjects = list() i = 0 for subj in subjs: i = i + 1 print('----------Training on subject: ',subj,'----------') print('----------Subject: ',i,'/',len(subjs),'----------') df_train = unpickled_train_data[subj] df_test = unpickled_test_data[subj] df = pd.concat([df_train, df_test], axis=0) X,y = process_data(df) forecasts = list() n = int(0.2*len(X)) for j in range(100,n): data = np.hstack(X[j-100:j]) #model = sm.tsa.statespace.SARIMAX(X[j], trend='c', order=(1,1,0), enforce_stationarity=False, initialization='approximate_diffuse',enforce_invertibility=True) model = ARIMA(data, order=(1,1,0)) #model = pm.auto_arima(data, start_p=1, start_q=1, #test='adf', # use adftest to find optimal 'd' #max_p=3, max_q=3, # maximum p and q #d=0, #max_d=0, #m=1, # frequency of series #seasonal=False, # No Seasonality #start_P=0, #trace=True, #error_action='ignore', #suppress_warnings=True) try: model_fit = model.fit(disp=0) output = model_fit.forecast(steps=6)[0] yhat = output[-1] except: yhat = X[-1] print('----------Row: ',j,'/',n,'------Subject: ',i,'/',len(subjs),'----------') forecasts.append(yhat) try: forecasts = [ int(x) for x in forecasts ] error = math.sqrt(mean_squared_error(y[100:n], forecasts)) print('Test RMSE: %.3f' % error) testScores.append(error) subjects.append(subj) except: continue results_df = pd.DataFrame(list(zip(subjects,testScores)),columns=['Subject','RMSE']) results_df.sort_values(by=['Subject'], inplace = True) return results_df
#ARIMA #----------------------------- #% #Autoregressive Integrated Moving Average (ARIMA) #The Autoregressive Integrated Moving Average (ARIMA) method models the next step in the sequence as a linear function of the differenced observations and residual errors at prior time steps. # #It combines both Autoregression (AR) and Moving Average (MA) models as well as a differencing pre-processing step of the sequence to make the sequence stationary, called integration (I). # #The notation for the model involves specifying the order for the AR(p), I(d), and MA(q) models as parameters to an ARIMA function, e.g. ARIMA(p, d, q). An ARIMA model can also be used to develop AR, MA, and ARMA models. # #The method is suitable for univariate time series with trend and without seasonal components # ARIMA example from statsmodels.tsa.arima_model import ARIMA from random import random # contrived dataset data = [x + random() for x in range(1, 100)] data # fit model model = ARIMA(data, order=(1, 1, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.predict(len(data), len(data), typ='levels') print(yhat)
plt.plot(lag_pacf) plt.axhline(y=0,linestyle="--",color='gray') plt.axhline(y=-1.96/np.sqrt(len(timeseries)),linestyle="--",color='gray') plt.axhline(y=1.96/np.sqrt(len(timeseries)),linestyle="--",color='gray') plt.title('Partial Autocorrelation Function') plt.tight_layout() # In[173]: from statsmodels.tsa.arima_model import ARIMA timeseries=indexedtslog #AR Model model =ARIMA(timeseries,order = (1,1,1), freq= 'W-Fri') ARIMAresult= model.fit(disp=-1) plt.plot(indexedtslogdiffshift) plt.plot(ARIMAresult.fittedvalues, color='red') plt.title ('RSS: %.4f'%sum((ARIMAresult.fittedvalues-indexedtslogdiffshift[seriescol])**2)) print ('plotting ARIMA Model') # In[174]: #Fitting of timeseries model pred_ARIMA_diff = pd.Series(ARIMAresult.fittedvalues, copy =True) pred_ARIMA_diff.head()