def _worker(df): # Learn the best order via cv out = arima.grid_search(df, burn_in=burn_in, n_splits=n_splits, p_bounds=(1, 4), d_bounds=(1, 2), q_bounds=(1, 4), ic_score='AIC', return_order_rank=True, return_final_index=True, verbose=False) opt_order, order_rank, final_index = out print("Order rank:\n{}".format(order_rank)) df = df.iloc[burn_in:] # don't mix-up training/test # Try the order from best to worst for order in order_rank: p, d, q = order try: # perform moving-window arma print('Using ARIMA({}, {}, {}) ...'.format(p, d, q)) errs, forecast = arima.moving_window(df, w_size=w_size, ph=ph, p=p, d=d, q=q, start_params=None, verbose=False) print('ARIMA({}, {}, {}) success'.format(p, d, q)) break # greedy beahior: take the first that works except Exception as e: print('ARIMA({}, {}, {}) failure'.format(p, d, q)) print('arima.moving_window raised the following exception') print(e) # Save results reports error_summary = utils.forecast_report(errs) return error_summary, forecast
idx, lambda2, sigma2)) Q = np.array([[lambda2, 0], [0, 0]]) # transition_covariance R = sigma2 # observation (co)variance df = df.iloc[burn_in:] # don't mix-up training/test _kf = kf.cgmkalmanfilter(F=F, Q=Q, R=R, X0=X0, P0=P0) errs, forecast = kf.online_forecast(df, _kf, H, ph=18, lambda2=lambda2, sigma2=sigma2, verbose=True) # Save results reports error_summary = utils.forecast_report(errs) print(error_summary) # import matplotlib.pyplot as plt # plotting.cgm(df, forecast['ts'], title='Patient '+idx, # savefig=False) # plotting.residuals(df, forecast['ts'], skip_first=burn_in, # skip_last=ph, title='Patient '+idx, # savefig=False) # plt.show() # break # # dump it into a pkl pkl.dump(error_summary, open(idx + '.pkl', 'wb')) try: # Plot signal and its fit
# _X_ts_next = np.append(_X_ts_next[:, 1:, 0], y_pred[step]).reshape(1, w_size, 1) _X_ts_next = np.reshape(np.append(_X_ts_next[:, 1:, :], y_pred[step]), (1, w_size, 1)) # --------------------------------------------------------------------- # y_pred = scaler.inverse_transform( y_pred) # get back to original dimensions y_future_real = scaler.inverse_transform(Y_ts[t:t + n_steps]) abs_pred_err = np.abs(y_pred - y_future_real) # Save errors errs_dict['err_18'].append(abs_pred_err[17]) errs_dict['err_12'].append(abs_pred_err[11]) errs_dict['err_6'].append(abs_pred_err[5]) report = utils.forecast_report(errs_dict) print(report) # In[133]: # 30 60 90 # MAE 21.9983 58.5689 79.121 # RMSE 35.3848 93.0235 116.726 # In[101]: y_pred # In[76]: np.append(X_ts[0, 1:, 0], 0.22262774)
def main(args): """Run ARIMA experiments.""" ### TODO: deleteme ### # List all completed patients completed = list( filter( lambda x: x.endswith('.pkl'), os.listdir( '/home/samu/projects/glicemie/experiments/cgm-tools/scripts'))) completed = [x[-3] + '.csv' for x in completed] ### TODO: deleteme ### # Load full data set from pickle file (see data_wrangler.py) dfs_full = pkl.load(open(args.data_folder, 'rb')) # Keep only patients with more than `THRESHOLD` days of CGM acquisition _threshold = args.threshold if _threshold is None: _threshold = datetime.timedelta(days=3.5) # default dfs = utils.filter_patients(dfs_full, _threshold) # ----------------- TEST ----------------------------- # # Experiment parameters burn_in = 300 # burn-in samples used to learn the best order via cv n_splits = 15 # burn_in = 144 # burn-in samples used to learn the best order via cv # n_splits = 8 w_size = 36 # Window-size ph = 18 # prediction horizon # Get patients list patients = list(dfs.keys()) for count, idx in enumerate(patients): if idx not in completed: print("Evaluating patient: {} ({}/{}) ...".format( idx, count, len(patients))) df = utils.gluco_extract(dfs[idx], return_df=True) # Learn the best order via cv out = arima.grid_search(df, burn_in=burn_in, n_splits=n_splits, p_bounds=(1, 4), d_bounds=(1, 2), q_bounds=(1, 4), ic_score='AIC', return_order_rank=True, return_final_index=True, verbose=False) opt_order, order_rank, final_index = out print("Order rank:\n{}".format(order_rank)) df = df.iloc[burn_in:] # don't mix-up training/test errs = None # Try the order from best to worst for order in order_rank: p, d, q = order try: # perform moving-window arma print('Using ARIMA({}, {}, {}) ...'.format(p, d, q)) errs, forecast = arima.moving_window(df, w_size=w_size, ph=ph, p=p, d=d, q=q, start_params=None, verbose=False) print('ARIMA({}, {}, {}) success'.format(p, d, q)) break # greedy beahior: take the first that works except Exception as e: print('ARIMA({}, {}, {}) failure'.format(p, d, q)) print('arima.moving_window raised the following exception') print(e) if errs is not None: # Save results reports error_summary = utils.forecast_report(errs) print(error_summary) # dump it into a pkl pkl.dump(error_summary, open(idx + '.pkl', 'wb')) try: # Plot signal and its fit plotting.cgm(df, forecast['ts'], title='Patient ' + idx, savefig=True) # Plot residuals plotting.residuals(df, forecast['ts'], skip_first=w_size, skip_last=ph, title='Patient ' + idx, savefig=True) except: print("Plotting failed for patient {}".format(idx)) else: print("{} already completed".format(idx))
def worker(idx): """ spawn the work process """ import os my_rank_ = comm.Get_rank() t1_ = time.time() burn_in = 300 # burn-in samples used to learn the best order via cv w_size = 36 # print("Evaluating patient {}".format(idx)) # Train/test split df = utils.gluco_extract(dfs[idx], return_df=True) train_df0 = df.iloc[:burn_in] test_df0 = df.iloc[burn_in:] # preprocess the dataset # BEWARE! Do not use the trainig set to learn the scaling parameters scaler = MinMaxScaler(feature_range=(0, 1)) train_data = scaler.fit_transform(train_df0) test_data = scaler.transform(test_df0) # Create LSTM suitable {X, Y} dataset X_tr, Y_tr = lstm.create_XY_dataset(train_data, window_size=w_size) X_ts, Y_ts = lstm.create_XY_dataset(test_data, window_size=w_size) # Create LSTM model # model = lstm.create_model(n_units=4) # Create cross-validated LSTM model param_grid = {'n_units': [4, 8, 16]} keras_regressor = KerasRegressor(build_fn=lstm.create_model, batch_size=1, verbose=0, nb_epoch=50) model = GridSearchCV(keras_regressor, param_grid=param_grid) tic = time.time() # Fit the model # model.fit(X_tr, Y_tr, nb_epoch=50, batch_size=1, verbose=1) model.fit(X_tr, Y_tr) print("Fitting time: {} seconds".format(time.time() - tic)) # Predict the ph and save the errors tic = time.time() errs, forecast = lstm.online_forecast(X_ts, Y_ts, model, scaler, ph=18, verbose=True) print("Predicting time: {} seconds".format(time.time() - tic)) error_summary = utils.forecast_report(errs) print(error_summary) pkl.dump(error_summary, open(os.path.join(ROOT, 'results', idx + '.pkl'), 'wb')) #model.save(os.path.join(ROOT, 'results', idx+'_model_.h5')) # -- Plotting -- # try: import statsmodels.api as sm import numpy as np import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt Y_pred_tr = model.predict(X_tr) Y_pred_ts = model.predict(X_ts) # maybe its just forecast['ts'] Y_pred_tr_plot = scaler.inverse_transform(Y_pred_tr) Y_pred_ts_plot = scaler.inverse_transform(Y_pred_ts) plt.figure(figsize=(10, 6), dpi=300) plt.subplot(211) plt.plot(df.index, df.values, label='real cgm') plt.plot(df.index[w_size:burn_in], Y_pred_tr_plot.ravel(), '--', label='y_tr') plt.plot(df.index[burn_in + w_size:], Y_pred_ts_plot.ravel(), '--', label='y_tr') plt.legend() residuals = Y_pred_ts_plot.ravel() - df.values[burn_in + w_size:].ravel() mae = np.mean(residuals) rmse = np.sqrt(np.mean(residuals**2)) DW = sm.stats.durbin_watson(residuals) plt.subplot(212) plt.plot(df.index[burn_in:-w_size], residuals) plt.title("MAE {:2.5f} | RMSE {:2.5f} | DW {:2.5f}".format( mae, rmse, DW)) plt.tight_layout() plt.savefig(os.path.join(ROOT, 'results', idx + '.png')) except: print('Plotting failed') # Do the work # time.sleep(2) t2_ = time.time() if VERBOSITY: print(' ---> processor %s has calculated for %s' % (my_rank_, t2_ - t1_)) return t2_ - t1_