Exemple #1
0
def _worker(df):
    # Learn the best order via cv
    out = arima.grid_search(df, burn_in=burn_in, n_splits=n_splits,
                            p_bounds=(1, 4), d_bounds=(1, 2), q_bounds=(1, 4),
                            ic_score='AIC', return_order_rank=True,
                            return_final_index=True, verbose=False)
    opt_order, order_rank, final_index = out

    print("Order rank:\n{}".format(order_rank))

    df = df.iloc[burn_in:]  # don't mix-up training/test

    # Try the order from best to worst
    for order in order_rank:
        p, d, q = order
        try:  # perform moving-window arma
            print('Using ARIMA({}, {}, {}) ...'.format(p, d, q))
            errs, forecast = arima.moving_window(df, w_size=w_size, ph=ph,
                                                 p=p, d=d, q=q,
                                                 start_params=None,
                                                 verbose=False)
            print('ARIMA({}, {}, {}) success'.format(p, d, q))
            break  # greedy beahior: take the first that works
        except Exception as e:
            print('ARIMA({}, {}, {}) failure'.format(p, d, q))
            print('arima.moving_window raised the following exception')
            print(e)

    # Save results reports
    error_summary = utils.forecast_report(errs)

    return error_summary, forecast
Exemple #2
0
        idx, lambda2, sigma2))
    Q = np.array([[lambda2, 0], [0, 0]])  # transition_covariance
    R = sigma2  # observation (co)variance

    df = df.iloc[burn_in:]  # don't mix-up training/test

    _kf = kf.cgmkalmanfilter(F=F, Q=Q, R=R, X0=X0, P0=P0)
    errs, forecast = kf.online_forecast(df,
                                        _kf,
                                        H,
                                        ph=18,
                                        lambda2=lambda2,
                                        sigma2=sigma2,
                                        verbose=True)
    # Save results reports
    error_summary = utils.forecast_report(errs)
    print(error_summary)
    # import matplotlib.pyplot as plt
    # plotting.cgm(df, forecast['ts'], title='Patient '+idx,
    #              savefig=False)
    # plotting.residuals(df, forecast['ts'], skip_first=burn_in,
    #                    skip_last=ph, title='Patient '+idx,
    #                    savefig=False)
    # plt.show()
    # break

    # # dump it into a pkl
    pkl.dump(error_summary, open(idx + '.pkl', 'wb'))

    try:
        # Plot signal and its fit
        #         _X_ts_next = np.append(_X_ts_next[:, 1:, 0], y_pred[step]).reshape(1, w_size, 1)
        _X_ts_next = np.reshape(np.append(_X_ts_next[:, 1:, :], y_pred[step]),
                                (1, w_size, 1))
    # --------------------------------------------------------------------- #

    y_pred = scaler.inverse_transform(
        y_pred)  # get back to original dimensions
    y_future_real = scaler.inverse_transform(Y_ts[t:t + n_steps])
    abs_pred_err = np.abs(y_pred - y_future_real)

    # Save errors
    errs_dict['err_18'].append(abs_pred_err[17])
    errs_dict['err_12'].append(abs_pred_err[11])
    errs_dict['err_6'].append(abs_pred_err[5])

report = utils.forecast_report(errs_dict)
print(report)

# In[133]:

#            30       60       90
# MAE   21.9983  58.5689   79.121
# RMSE  35.3848  93.0235  116.726

# In[101]:

y_pred

# In[76]:

np.append(X_ts[0, 1:, 0], 0.22262774)
Exemple #4
0
def main(args):
    """Run ARIMA experiments."""
    ### TODO: deleteme ###
    # List all completed patients
    completed = list(
        filter(
            lambda x: x.endswith('.pkl'),
            os.listdir(
                '/home/samu/projects/glicemie/experiments/cgm-tools/scripts')))
    completed = [x[-3] + '.csv' for x in completed]
    ### TODO: deleteme ###

    # Load full data set from pickle file (see data_wrangler.py)
    dfs_full = pkl.load(open(args.data_folder, 'rb'))

    # Keep only patients with more than `THRESHOLD` days of CGM acquisition
    _threshold = args.threshold
    if _threshold is None:
        _threshold = datetime.timedelta(days=3.5)  # default
    dfs = utils.filter_patients(dfs_full, _threshold)

    # ----------------- TEST ----------------------------- #
    # Experiment parameters
    burn_in = 300  # burn-in samples used to learn the best order via cv
    n_splits = 15
    # burn_in = 144  # burn-in samples used to learn the best order via cv
    # n_splits = 8
    w_size = 36  # Window-size
    ph = 18  # prediction horizon

    # Get patients list
    patients = list(dfs.keys())

    for count, idx in enumerate(patients):
        if idx not in completed:
            print("Evaluating patient: {} ({}/{}) ...".format(
                idx, count, len(patients)))
            df = utils.gluco_extract(dfs[idx], return_df=True)

            # Learn the best order via cv
            out = arima.grid_search(df,
                                    burn_in=burn_in,
                                    n_splits=n_splits,
                                    p_bounds=(1, 4),
                                    d_bounds=(1, 2),
                                    q_bounds=(1, 4),
                                    ic_score='AIC',
                                    return_order_rank=True,
                                    return_final_index=True,
                                    verbose=False)
            opt_order, order_rank, final_index = out

            print("Order rank:\n{}".format(order_rank))

            df = df.iloc[burn_in:]  # don't mix-up training/test

            errs = None
            # Try the order from best to worst
            for order in order_rank:
                p, d, q = order
                try:  # perform moving-window arma
                    print('Using ARIMA({}, {}, {}) ...'.format(p, d, q))
                    errs, forecast = arima.moving_window(df,
                                                         w_size=w_size,
                                                         ph=ph,
                                                         p=p,
                                                         d=d,
                                                         q=q,
                                                         start_params=None,
                                                         verbose=False)
                    print('ARIMA({}, {}, {}) success'.format(p, d, q))
                    break  # greedy beahior: take the first that works
                except Exception as e:
                    print('ARIMA({}, {}, {}) failure'.format(p, d, q))
                    print('arima.moving_window raised the following exception')
                    print(e)

            if errs is not None:
                # Save results reports
                error_summary = utils.forecast_report(errs)
                print(error_summary)
                # dump it into a pkl
                pkl.dump(error_summary, open(idx + '.pkl', 'wb'))

                try:
                    # Plot signal and its fit
                    plotting.cgm(df,
                                 forecast['ts'],
                                 title='Patient ' + idx,
                                 savefig=True)

                    # Plot residuals
                    plotting.residuals(df,
                                       forecast['ts'],
                                       skip_first=w_size,
                                       skip_last=ph,
                                       title='Patient ' + idx,
                                       savefig=True)
                except:
                    print("Plotting failed for patient {}".format(idx))
        else:
            print("{} already completed".format(idx))
Exemple #5
0
def worker(idx):
    """
    spawn the work process
    """
    import os
    my_rank_ = comm.Get_rank()

    t1_ = time.time()
    burn_in = 300  # burn-in samples used to learn the best order via cv
    w_size = 36

    # print("Evaluating patient {}".format(idx))
    # Train/test split
    df = utils.gluco_extract(dfs[idx], return_df=True)
    train_df0 = df.iloc[:burn_in]
    test_df0 = df.iloc[burn_in:]

    # preprocess the dataset
    # BEWARE! Do not use the trainig set to learn the scaling parameters
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_data = scaler.fit_transform(train_df0)
    test_data = scaler.transform(test_df0)

    # Create LSTM suitable {X, Y} dataset
    X_tr, Y_tr = lstm.create_XY_dataset(train_data, window_size=w_size)
    X_ts, Y_ts = lstm.create_XY_dataset(test_data, window_size=w_size)

    # Create LSTM model
    # model = lstm.create_model(n_units=4)

    # Create cross-validated LSTM model
    param_grid = {'n_units': [4, 8, 16]}
    keras_regressor = KerasRegressor(build_fn=lstm.create_model,
                                     batch_size=1,
                                     verbose=0,
                                     nb_epoch=50)
    model = GridSearchCV(keras_regressor, param_grid=param_grid)

    tic = time.time()
    # Fit the model
    # model.fit(X_tr, Y_tr, nb_epoch=50, batch_size=1, verbose=1)
    model.fit(X_tr, Y_tr)
    print("Fitting time: {} seconds".format(time.time() - tic))

    # Predict the ph and save the errors
    tic = time.time()
    errs, forecast = lstm.online_forecast(X_ts,
                                          Y_ts,
                                          model,
                                          scaler,
                                          ph=18,
                                          verbose=True)
    print("Predicting time: {} seconds".format(time.time() - tic))
    error_summary = utils.forecast_report(errs)
    print(error_summary)
    pkl.dump(error_summary,
             open(os.path.join(ROOT, 'results', idx + '.pkl'), 'wb'))
    #model.save(os.path.join(ROOT, 'results', idx+'_model_.h5'))

    # -- Plotting -- #
    try:
        import statsmodels.api as sm
        import numpy as np
        import matplotlib
        matplotlib.use('agg')
        import matplotlib.pyplot as plt
        Y_pred_tr = model.predict(X_tr)
        Y_pred_ts = model.predict(X_ts)  # maybe its just forecast['ts']
        Y_pred_tr_plot = scaler.inverse_transform(Y_pred_tr)
        Y_pred_ts_plot = scaler.inverse_transform(Y_pred_ts)
        plt.figure(figsize=(10, 6), dpi=300)
        plt.subplot(211)
        plt.plot(df.index, df.values, label='real cgm')
        plt.plot(df.index[w_size:burn_in],
                 Y_pred_tr_plot.ravel(),
                 '--',
                 label='y_tr')
        plt.plot(df.index[burn_in + w_size:],
                 Y_pred_ts_plot.ravel(),
                 '--',
                 label='y_tr')
        plt.legend()

        residuals = Y_pred_ts_plot.ravel() - df.values[burn_in +
                                                       w_size:].ravel()
        mae = np.mean(residuals)
        rmse = np.sqrt(np.mean(residuals**2))
        DW = sm.stats.durbin_watson(residuals)

        plt.subplot(212)
        plt.plot(df.index[burn_in:-w_size], residuals)
        plt.title("MAE {:2.5f} | RMSE {:2.5f} | DW {:2.5f}".format(
            mae, rmse, DW))
        plt.tight_layout()
        plt.savefig(os.path.join(ROOT, 'results', idx + '.png'))
    except:
        print('Plotting failed')

    # Do the work
    # time.sleep(2)

    t2_ = time.time()

    if VERBOSITY:
        print(' ---> processor %s has calculated for %s' %
              (my_rank_, t2_ - t1_))
    return t2_ - t1_