import re
import sys
from sleep_misc import load_dataset, grid_search, load_mesa_PSG
from sleep_misc import time_based
from sleep_misc import oakley
from sleep_eval import eval_acc

TASK = int(sys.argv[1])
PATH_TO_FILES = "./datasets/task%d/*" % (TASK)

INPUTFILE="hdf_task%d" % (TASK)
SUMMARY_OUTPUT = "task%d_summary_formulas.csv" % (TASK)
OUTPUT = "task%d_formulas.csv" % (TASK)

print("...Loading Task %d dataset into memory..." % (TASK))
dftrain, _, _ = load_dataset(INPUTFILE, useCache=True)
print("...Done...")

uids_train = set(dftrain.mesaid.unique())

def get_uid_from_filename(filename):
    return map(int, re.findall(r'\d+', filename))[1]

dfs = []
for filename in glob(PATH_TO_FILES):

    uid = get_uid_from_filename(filename)
    if uid not in uids_train:
        continue

    print("Processing: %s" % (filename))
    sys.exit(0)

TASK = int(sys.argv[1])

print("Generating dataset for Task %d" % (TASK))
OUTPUTFILE = "hdf_task%d" % (TASK)

if TASK in [1, 2]:
    PATH_TO_FILES = "./datasets/task%d/" % (TASK)
else:
    PATH_TO_FILES = "./data/mesa/actigraphy_test/"

method = "stage" if TASK != 3 else "interval"

print("...Loading dataset into memory...")
dftrain, dftest, featnames = load_dataset(PATH_TO_FILES,
                                          useCache=False,
                                          saveCache=True,
                                          cacheName=OUTPUTFILE,
                                          ground_truth=method)
print("...Done...")

dfoutname = "dftest_task%d.csv" % (TASK)
print("...Saving Task %d dataset to disk. Filename: %s ..." %
      (TASK, dfoutname))
dftest[[
    "mesaid", "linetime", "marker", "interval", "binterval", "gt",
    "gt_sleep_block", "wake"
]].to_csv(dfoutname, index=False)
print("...Done...")
Exemple #3
0
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

from sleep_misc import load_dataset

TASKID = 1
INPUTFILE = "hdf_task%d" % (TASKID)
EXPERIMENT = "gbc"
SCORING = "accuracy"  # f1, accuracy, precision
OUTPUT = "task%d_%s_%s" % (TASKID, EXPERIMENT, SCORING)

print("...Loading dataset into memory...")
dftrain, dftest, featnames = load_dataset(INPUTFILE, useCache=True)
print("...Done...")

classifier = None

if EXPERIMENT == "sgd":
    classifier = SGDClassifier(random_state=42, n_jobs=1)
elif EXPERIMENT == "etc":
    classifier = ExtraTreesClassifier(random_state=42, n_jobs=8)
elif EXPERIMENT == "gbc":
    classifier = GradientBoostingClassifier(random_state=42)

etc_params = [{
    'classifier__n_estimators': [8, 64, 128, 512, 1024],
    'classifier__criterion': ['gini', 'entropy'],
    'classifier__class_weight': [None, "balanced", "balanced_subsample"],
epochs = 30
batch_size = 32

#from keras import backend as K
#session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
#sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
#K.set_session(sess)

MODEL_OUTFILE = "model_%s_task%d_%s_seq%d.pkl" % (NN_TYPE, TASK, input_type,
                                                  SEQ_LEN)
RESULTS_OUTFILE = "task%d_%s_%s_%d.csv" % (TASK, NN_TYPE, input_type, SEQ_LEN)

DATASET_PATH = "hdf_task%d" % (TASK)

print("...Loading dataset into memory...")
dftrain, dftest, featnames = load_dataset(DATASET_PATH, useCache=True)
print("...Done...")

if USING_MESA_VARIABLES:
    mesa_cols = [
        "gender1", "sleepage5c", "insmnia5", "rstlesslgs5", "slpapnea5"
    ]
    variables = pd.read_csv("./data/mesa-sleep-dataset-0.3.0.csv.gz")[
        ["mesaid"] + mesa_cols].fillna(0.0)

    dftrain = pd.merge(dftrain, variables)
    dftest = pd.merge(dftest, variables)

scaler = StandardScaler()
scaler.fit(dftrain[["activity"]].fillna(0.0))
from glob import glob
import re
import sys
import os
from sleep_misc import load_dataset, apply_formulas_to_psgfile

TASK = int(sys.argv[1])

PATH_TO_FILES = "./datasets/task%d/*" % (TASK)

INPUTFILE="hdf_task%d" % (TASK)
SUMMARY_OUTPUT = "task%d_summary_formulas.csv" % (TASK)
OUTPUT = "task%d_formulas.csv" % (TASK)

print("...Loading Task %d dataset into memory..." % (TASK))
_, dftest, _ = load_dataset(INPUTFILE, useCache=True)
print("...Done...")

#Get unique test ids
uids_test = set(dftest.mesaid.unique())

def get_uid_from_filename(filename):
    #Find a particular uid from a filename
    return map(int, re.findall(r'\d+', filename))[0]

dfs = []
print("Found %d files in path %s" % (len(glob(PATH_TO_FILES)), PATH_TO_FILES))

for filename in glob(PATH_TO_FILES):
    uid = get_uid_from_filename(os.path.basename(filename))
    # Check uid present in the list of test uids