Beispiel #1
0
def main(args):
    """Run ARIMA experiments on multiple machines."""
    # TODO: everybody loads the data here
    # Load full data set from pickle file (see data_wrangler.py)
    dfs_full = pkl.load(open(args.data_folder, 'rb'))

    # Keep only patients with more than `THRESHOLD` days of CGM acquisition
    _threshold = args.threshold
    if _threshold is None:
        _threshold = datetime.timedelta(days=3.5)  # default
    dfs = utils.filter_patients(dfs_full, _threshold)

    if RANK == 0:
        patients_out = master(dfs)
    else:
        slave(dfs)

    # if IS_MPI_JOB:
    # Wait for all jobs to end
    COMM.barrier()

    if RANK == 0:
        pkl.dump(patients_out, open('full_output.pkl', 'wb'))
Beispiel #2
0
"""KF experiments development."""
from cgmtools import utils
from cgmtools import plotting
from cgmtools.forecast import kf
import datetime
import numpy as np
import pickle as pkl

###############################################################################

# Load full data set from pickle file (see data_wrangler.py)
dfs_full = pkl.load(open('../../data/dfs_py3.pkl', 'rb'))

# Keep only patients with more than `THRESHOLD` days of CGM acquisition
_threshold = datetime.timedelta(days=3.5)  # default
dfs = utils.filter_patients(dfs_full, _threshold)

burn_in = 300  # burn-in samples used to learn the best order via cv
n_splits = 15
ph = 18  # prediction horizon

# State-space model:
# transition matrix (double integration model)
F = np.array([[2, -1], [1, 0]])
# measures matrix
H = np.array([1, 0])

# Get patients list
patients = list(dfs.keys())

for idx in patients:
Beispiel #3
0
def main(args):
    """Run ARIMA experiments."""
    ### TODO: deleteme ###
    # List all completed patients
    completed = list(
        filter(
            lambda x: x.endswith('.pkl'),
            os.listdir(
                '/home/samu/projects/glicemie/experiments/cgm-tools/scripts')))
    completed = [x[-3] + '.csv' for x in completed]
    ### TODO: deleteme ###

    # Load full data set from pickle file (see data_wrangler.py)
    dfs_full = pkl.load(open(args.data_folder, 'rb'))

    # Keep only patients with more than `THRESHOLD` days of CGM acquisition
    _threshold = args.threshold
    if _threshold is None:
        _threshold = datetime.timedelta(days=3.5)  # default
    dfs = utils.filter_patients(dfs_full, _threshold)

    # ----------------- TEST ----------------------------- #
    # Experiment parameters
    burn_in = 300  # burn-in samples used to learn the best order via cv
    n_splits = 15
    # burn_in = 144  # burn-in samples used to learn the best order via cv
    # n_splits = 8
    w_size = 36  # Window-size
    ph = 18  # prediction horizon

    # Get patients list
    patients = list(dfs.keys())

    for count, idx in enumerate(patients):
        if idx not in completed:
            print("Evaluating patient: {} ({}/{}) ...".format(
                idx, count, len(patients)))
            df = utils.gluco_extract(dfs[idx], return_df=True)

            # Learn the best order via cv
            out = arima.grid_search(df,
                                    burn_in=burn_in,
                                    n_splits=n_splits,
                                    p_bounds=(1, 4),
                                    d_bounds=(1, 2),
                                    q_bounds=(1, 4),
                                    ic_score='AIC',
                                    return_order_rank=True,
                                    return_final_index=True,
                                    verbose=False)
            opt_order, order_rank, final_index = out

            print("Order rank:\n{}".format(order_rank))

            df = df.iloc[burn_in:]  # don't mix-up training/test

            errs = None
            # Try the order from best to worst
            for order in order_rank:
                p, d, q = order
                try:  # perform moving-window arma
                    print('Using ARIMA({}, {}, {}) ...'.format(p, d, q))
                    errs, forecast = arima.moving_window(df,
                                                         w_size=w_size,
                                                         ph=ph,
                                                         p=p,
                                                         d=d,
                                                         q=q,
                                                         start_params=None,
                                                         verbose=False)
                    print('ARIMA({}, {}, {}) success'.format(p, d, q))
                    break  # greedy beahior: take the first that works
                except Exception as e:
                    print('ARIMA({}, {}, {}) failure'.format(p, d, q))
                    print('arima.moving_window raised the following exception')
                    print(e)

            if errs is not None:
                # Save results reports
                error_summary = utils.forecast_report(errs)
                print(error_summary)
                # dump it into a pkl
                pkl.dump(error_summary, open(idx + '.pkl', 'wb'))

                try:
                    # Plot signal and its fit
                    plotting.cgm(df,
                                 forecast['ts'],
                                 title='Patient ' + idx,
                                 savefig=True)

                    # Plot residuals
                    plotting.residuals(df,
                                       forecast['ts'],
                                       skip_first=w_size,
                                       skip_last=ph,
                                       title='Patient ' + idx,
                                       savefig=True)
                except:
                    print("Plotting failed for patient {}".format(idx))
        else:
            print("{} already completed".format(idx))