def slave(dfs): """Fit the ARIMA model.""" try: while True: status_ = MPI.Status() idx = COMM.recv(source=0, tag=MPI.ANY_TAG, status=status_) # check the tag of the received message if status_.tag == EXIT: return # do the work print(NAME + ": slave received", RANK, idx) df = utils.gluco_extract(dfs[idx], return_df=True) try: out = _worker(df) except: out = (None, None, None) # fit failed for current patient print(NAME + ": slave fit failed", RANK, idx) COMM.send((idx, out[0], out[1]), dest=0, tag=0) except StandardError as exc: print("Quitting ... TB:", str(exc))
# load data set from pickle file dfs_full = pkl.load(open('../../data/dfs_py3.pkl', 'rb')) # ## 1.1 Remove short acquisitions # In[31]: # Keep only patients with more than 3.5 days of acquisition thresh = datetime.timedelta(days=3.5) # List of patients that satisfy inclusion criterion ok_keys = [] for k in dfs_full.keys(): df = dfs_full[k] time, gluco = gluco_extract(df) try: delta = time[-1] - time[0] if delta > thresh: ok_keys.append(k) except: pass # Filter short time-series dfs = {k: dfs_full[k] for k in ok_keys} # In[32]: # Example: plot a single patient k = 0 idx = list(dfs.keys())[k] print(idx)
burn_in = 300 # burn-in samples used to learn the best order via cv n_splits = 15 ph = 18 # prediction horizon # State-space model: # transition matrix (double integration model) F = np.array([[2, -1], [1, 0]]) # measures matrix H = np.array([1, 0]) # Get patients list patients = list(dfs.keys()) for idx in patients: df = utils.gluco_extract(dfs[idx], return_df=True) # Learn the best order via cv # lambda2_range = np.logspace(-12, -4, 10) lambda2_range = np.logspace(-12, -4, 3) sigma2_range = np.linspace(1, 40, 3) # sigma2_range = np.linspace(1, 40, 10) out = kf.grid_search(df, lambda2_range, sigma2_range, burn_in=burn_in, n_splits=15, F=F, H=H, return_mean_vld_error=True, return_initial_state_mean=True,
def main(args): """Run ARIMA experiments.""" ### TODO: deleteme ### # List all completed patients completed = list( filter( lambda x: x.endswith('.pkl'), os.listdir( '/home/samu/projects/glicemie/experiments/cgm-tools/scripts'))) completed = [x[-3] + '.csv' for x in completed] ### TODO: deleteme ### # Load full data set from pickle file (see data_wrangler.py) dfs_full = pkl.load(open(args.data_folder, 'rb')) # Keep only patients with more than `THRESHOLD` days of CGM acquisition _threshold = args.threshold if _threshold is None: _threshold = datetime.timedelta(days=3.5) # default dfs = utils.filter_patients(dfs_full, _threshold) # ----------------- TEST ----------------------------- # # Experiment parameters burn_in = 300 # burn-in samples used to learn the best order via cv n_splits = 15 # burn_in = 144 # burn-in samples used to learn the best order via cv # n_splits = 8 w_size = 36 # Window-size ph = 18 # prediction horizon # Get patients list patients = list(dfs.keys()) for count, idx in enumerate(patients): if idx not in completed: print("Evaluating patient: {} ({}/{}) ...".format( idx, count, len(patients))) df = utils.gluco_extract(dfs[idx], return_df=True) # Learn the best order via cv out = arima.grid_search(df, burn_in=burn_in, n_splits=n_splits, p_bounds=(1, 4), d_bounds=(1, 2), q_bounds=(1, 4), ic_score='AIC', return_order_rank=True, return_final_index=True, verbose=False) opt_order, order_rank, final_index = out print("Order rank:\n{}".format(order_rank)) df = df.iloc[burn_in:] # don't mix-up training/test errs = None # Try the order from best to worst for order in order_rank: p, d, q = order try: # perform moving-window arma print('Using ARIMA({}, {}, {}) ...'.format(p, d, q)) errs, forecast = arima.moving_window(df, w_size=w_size, ph=ph, p=p, d=d, q=q, start_params=None, verbose=False) print('ARIMA({}, {}, {}) success'.format(p, d, q)) break # greedy beahior: take the first that works except Exception as e: print('ARIMA({}, {}, {}) failure'.format(p, d, q)) print('arima.moving_window raised the following exception') print(e) if errs is not None: # Save results reports error_summary = utils.forecast_report(errs) print(error_summary) # dump it into a pkl pkl.dump(error_summary, open(idx + '.pkl', 'wb')) try: # Plot signal and its fit plotting.cgm(df, forecast['ts'], title='Patient ' + idx, savefig=True) # Plot residuals plotting.residuals(df, forecast['ts'], skip_first=w_size, skip_last=ph, title='Patient ' + idx, savefig=True) except: print("Plotting failed for patient {}".format(idx)) else: print("{} already completed".format(idx))
def worker(idx): """ spawn the work process """ import os my_rank_ = comm.Get_rank() t1_ = time.time() burn_in = 300 # burn-in samples used to learn the best order via cv w_size = 36 # print("Evaluating patient {}".format(idx)) # Train/test split df = utils.gluco_extract(dfs[idx], return_df=True) train_df0 = df.iloc[:burn_in] test_df0 = df.iloc[burn_in:] # preprocess the dataset # BEWARE! Do not use the trainig set to learn the scaling parameters scaler = MinMaxScaler(feature_range=(0, 1)) train_data = scaler.fit_transform(train_df0) test_data = scaler.transform(test_df0) # Create LSTM suitable {X, Y} dataset X_tr, Y_tr = lstm.create_XY_dataset(train_data, window_size=w_size) X_ts, Y_ts = lstm.create_XY_dataset(test_data, window_size=w_size) # Create LSTM model # model = lstm.create_model(n_units=4) # Create cross-validated LSTM model param_grid = {'n_units': [4, 8, 16]} keras_regressor = KerasRegressor(build_fn=lstm.create_model, batch_size=1, verbose=0, nb_epoch=50) model = GridSearchCV(keras_regressor, param_grid=param_grid) tic = time.time() # Fit the model # model.fit(X_tr, Y_tr, nb_epoch=50, batch_size=1, verbose=1) model.fit(X_tr, Y_tr) print("Fitting time: {} seconds".format(time.time() - tic)) # Predict the ph and save the errors tic = time.time() errs, forecast = lstm.online_forecast(X_ts, Y_ts, model, scaler, ph=18, verbose=True) print("Predicting time: {} seconds".format(time.time() - tic)) error_summary = utils.forecast_report(errs) print(error_summary) pkl.dump(error_summary, open(os.path.join(ROOT, 'results', idx + '.pkl'), 'wb')) #model.save(os.path.join(ROOT, 'results', idx+'_model_.h5')) # -- Plotting -- # try: import statsmodels.api as sm import numpy as np import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt Y_pred_tr = model.predict(X_tr) Y_pred_ts = model.predict(X_ts) # maybe its just forecast['ts'] Y_pred_tr_plot = scaler.inverse_transform(Y_pred_tr) Y_pred_ts_plot = scaler.inverse_transform(Y_pred_ts) plt.figure(figsize=(10, 6), dpi=300) plt.subplot(211) plt.plot(df.index, df.values, label='real cgm') plt.plot(df.index[w_size:burn_in], Y_pred_tr_plot.ravel(), '--', label='y_tr') plt.plot(df.index[burn_in + w_size:], Y_pred_ts_plot.ravel(), '--', label='y_tr') plt.legend() residuals = Y_pred_ts_plot.ravel() - df.values[burn_in + w_size:].ravel() mae = np.mean(residuals) rmse = np.sqrt(np.mean(residuals**2)) DW = sm.stats.durbin_watson(residuals) plt.subplot(212) plt.plot(df.index[burn_in:-w_size], residuals) plt.title("MAE {:2.5f} | RMSE {:2.5f} | DW {:2.5f}".format( mae, rmse, DW)) plt.tight_layout() plt.savefig(os.path.join(ROOT, 'results', idx + '.png')) except: print('Plotting failed') # Do the work # time.sleep(2) t2_ = time.time() if VERBOSITY: print(' ---> processor %s has calculated for %s' % (my_rank_, t2_ - t1_)) return t2_ - t1_