import re import sys from sleep_misc import load_dataset, grid_search, load_mesa_PSG from sleep_misc import time_based from sleep_misc import oakley from sleep_eval import eval_acc TASK = int(sys.argv[1]) PATH_TO_FILES = "./datasets/task%d/*" % (TASK) INPUTFILE="hdf_task%d" % (TASK) SUMMARY_OUTPUT = "task%d_summary_formulas.csv" % (TASK) OUTPUT = "task%d_formulas.csv" % (TASK) print("...Loading Task %d dataset into memory..." % (TASK)) dftrain, _, _ = load_dataset(INPUTFILE, useCache=True) print("...Done...") uids_train = set(dftrain.mesaid.unique()) def get_uid_from_filename(filename): return map(int, re.findall(r'\d+', filename))[1] dfs = [] for filename in glob(PATH_TO_FILES): uid = get_uid_from_filename(filename) if uid not in uids_train: continue print("Processing: %s" % (filename))
sys.exit(0) TASK = int(sys.argv[1]) print("Generating dataset for Task %d" % (TASK)) OUTPUTFILE = "hdf_task%d" % (TASK) if TASK in [1, 2]: PATH_TO_FILES = "./datasets/task%d/" % (TASK) else: PATH_TO_FILES = "./data/mesa/actigraphy_test/" method = "stage" if TASK != 3 else "interval" print("...Loading dataset into memory...") dftrain, dftest, featnames = load_dataset(PATH_TO_FILES, useCache=False, saveCache=True, cacheName=OUTPUTFILE, ground_truth=method) print("...Done...") dfoutname = "dftest_task%d.csv" % (TASK) print("...Saving Task %d dataset to disk. Filename: %s ..." % (TASK, dfoutname)) dftest[[ "mesaid", "linetime", "marker", "interval", "binterval", "gt", "gt_sleep_block", "wake" ]].to_csv(dfoutname, index=False) print("...Done...")
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier from sklearn.linear_model import SGDClassifier from sklearn.preprocessing import StandardScaler from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sleep_misc import load_dataset TASKID = 1 INPUTFILE = "hdf_task%d" % (TASKID) EXPERIMENT = "gbc" SCORING = "accuracy" # f1, accuracy, precision OUTPUT = "task%d_%s_%s" % (TASKID, EXPERIMENT, SCORING) print("...Loading dataset into memory...") dftrain, dftest, featnames = load_dataset(INPUTFILE, useCache=True) print("...Done...") classifier = None if EXPERIMENT == "sgd": classifier = SGDClassifier(random_state=42, n_jobs=1) elif EXPERIMENT == "etc": classifier = ExtraTreesClassifier(random_state=42, n_jobs=8) elif EXPERIMENT == "gbc": classifier = GradientBoostingClassifier(random_state=42) etc_params = [{ 'classifier__n_estimators': [8, 64, 128, 512, 1024], 'classifier__criterion': ['gini', 'entropy'], 'classifier__class_weight': [None, "balanced", "balanced_subsample"],
epochs = 30 batch_size = 32 #from keras import backend as K #session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) #sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) #K.set_session(sess) MODEL_OUTFILE = "model_%s_task%d_%s_seq%d.pkl" % (NN_TYPE, TASK, input_type, SEQ_LEN) RESULTS_OUTFILE = "task%d_%s_%s_%d.csv" % (TASK, NN_TYPE, input_type, SEQ_LEN) DATASET_PATH = "hdf_task%d" % (TASK) print("...Loading dataset into memory...") dftrain, dftest, featnames = load_dataset(DATASET_PATH, useCache=True) print("...Done...") if USING_MESA_VARIABLES: mesa_cols = [ "gender1", "sleepage5c", "insmnia5", "rstlesslgs5", "slpapnea5" ] variables = pd.read_csv("./data/mesa-sleep-dataset-0.3.0.csv.gz")[ ["mesaid"] + mesa_cols].fillna(0.0) dftrain = pd.merge(dftrain, variables) dftest = pd.merge(dftest, variables) scaler = StandardScaler() scaler.fit(dftrain[["activity"]].fillna(0.0))
from glob import glob import re import sys import os from sleep_misc import load_dataset, apply_formulas_to_psgfile TASK = int(sys.argv[1]) PATH_TO_FILES = "./datasets/task%d/*" % (TASK) INPUTFILE="hdf_task%d" % (TASK) SUMMARY_OUTPUT = "task%d_summary_formulas.csv" % (TASK) OUTPUT = "task%d_formulas.csv" % (TASK) print("...Loading Task %d dataset into memory..." % (TASK)) _, dftest, _ = load_dataset(INPUTFILE, useCache=True) print("...Done...") #Get unique test ids uids_test = set(dftest.mesaid.unique()) def get_uid_from_filename(filename): #Find a particular uid from a filename return map(int, re.findall(r'\d+', filename))[0] dfs = [] print("Found %d files in path %s" % (len(glob(PATH_TO_FILES)), PATH_TO_FILES)) for filename in glob(PATH_TO_FILES): uid = get_uid_from_filename(os.path.basename(filename)) # Check uid present in the list of test uids