def setUpClass(cls): cls.settings_fh = 'test_data_assembler.json' cls.settings = utils.get_settings(cls.settings_fh) cls.subjects = cls.settings['SUBJECTS'] cls.features = cls.settings['FEATURES'] cls.data = utils.get_data(cls.settings) with open('../segmentMetadata.json', 'r') as f: cls.metadata = json.load(f) cls.ictyps = cls.settings['DATA_TYPES'] cls.segment_counts = {'Dog_1': {'preictal': 24, 'pseudopreictal': 20, 'interictal': 480, 'pseudointerictal': 400, 'test': 502}, 'Dog_2': {'preictal': 42, 'pseudopreictal': 35, 'interictal': 500, 'pseudointerictal': 416, 'test': 1000}, 'Dog_3': {'preictal': 72, 'pseudopreictal': 60, 'interictal': 1440, 'pseudointerictal': 1200, 'test': 907}, 'Dog_4': {'preictal': 97, 'pseudopreictal': 80, 'interictal': 804, 'pseudointerictal': 670, 'test': 990}, 'Dog_5': {'preictal': 30, 'pseudopreictal': 25, 'interictal': 450, 'pseudointerictal': 375, 'test': 191}, 'Patient_1': {'preictal': 18, 'pseudopreictal': 15, 'interictal': 50, 'pseudointerictal': 41, 'test': 195}, 'Patient_2': {'preictal': 18, 'pseudopreictal': 15, 'interictal': 42, 'pseudointerictal': 35, 'test': 150}} cls.feature_length = {'Dog_1': 16, 'Dog_2': 16, 'Dog_3': 16, 'Dog_4': 16, 'Dog_5': 15, 'Patient_1': 15, 'Patient_2': 24} cls.ictyp_mapping = {'preictal': 1, 'interictal': 0, 'pseudopreictal': 1, 'pseudointerictal': 0}
def main(settings_file='SETTINGS.json', verbose=False): # load the settings settings = utils.get_settings(settings_file) subjects = settings['SUBJECTS'] features = settings['FEATURES'] # load the data data = utils.get_data(settings) # load the metadata metadata = utils.get_metadata() features_that_parsed = list(data.keys()) settings['FEATURES'] = [ feature for feature in settings['FEATURES'] if feature in features_that_parsed ] # check if features are 1-minute if all('10feat' in feature for feature in settings['FEATURES']): # set the flage minutefeatures = True elif not all('10feat' in feature for feature in settings['FEATURES']) and \ any('10feat' in feature for feature in settings['FEATURES']): raise ValueError("Cannot mix 1-minute and 10-minute features.") else: minutefeatures = False # iterate over subjects prediction_dict = {} for subject in subjects: # load the trained model: model = utils.read_trained_model(subject, settings, verbose=verbose) # initialise the data assembler assembler = utils.DataAssembler(settings, data, metadata) # build test set X = assembler.build_test(subject) # make predictions predictions = model.predict_proba(X) # if using minute features combine the estimates # on each segment by averaging if minutefeatures: segmentdict = {} for segment, prediction in zip(assembler.test_segments, predictions): if segment not in segmentdict: segmentdict[segment] = [] segmentdict[segment].append(prediction) # gathered all predictions corresponding to a segment together # now average them along their columns for segment in assembler.test_segments: segmentdict[segment] = np.vstack(segmentdict[segment]) segmentdict[segment] = np.mean(segmentdict[segment], axis=0) for segment, prediction in zip(assembler.test_segments, predictions): prediction_dict[segment] = prediction utils.output_csv(prediction_dict, settings, verbose=verbose)
def main(settingsfname, verbose=False, store_models=True, store_features=False, save_training_detailed=False, load_pickled=False, parallel=0): settings = utils.get_settings(settingsfname) utils.print_verbose('=== Settings file ===', flag=verbose) utils.print_verbose(settingsfname, flag=verbose) utils.print_verbose('=== Settings loaded ===', flag=verbose) utils.print_verbose(settings, flag=verbose) utils.print_verbose('=======================', flag=verbose) subjects = settings['SUBJECTS'] data = utils.get_data(settings, verbose=verbose) metadata = utils.get_metadata() features_that_parsed = [ feature for feature in settings['FEATURES'] if feature in list(data.keys()) ] settings['FEATURES'] = features_that_parsed if not settings['FEATURES']: raise EnvironmentError('No features could be loaded') utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose) model_pipe = utils.build_model_pipe(settings) utils.print_verbose("=== Model Used ===\n" "{0}\n==================".format(model_pipe), flag=verbose) # dictionary to store results subject_predictions = {} # dictionary to store features in transformed_features = {} # if we're loading pickled features then load them if load_pickled: if isinstance(load_pickled, str): with open(load_pickled, "rb") as fh: Xtra = pickle.load(fh) else: with open( settingsfname.split(".")[0] + "_feature_dump.pickle", "rb") as fh: Xtra = pickle.load(fh) else: Xtra = None # dictionary for final scores auc_scores = {} if not parallel: for subject in subjects: utils.print_verbose("=====Training {0} Model=====".format( str(subject)), flag=verbose) if 'RFE' in settings: transformed_features, auc = utils.train_RFE( settings, data, metadata, subject, model_pipe, transformed_features, store_models, store_features, load_pickled, settingsfname, verbose, extra_data=Xtra) subject_predictions = None elif 'CUSTOM' in settings: results, auc = utils.train_custom_model(settings, data, metadata, subject, model_pipe, store_models, load_pickled, verbose, extra_data=Xtra) subject_predictions[subject] = results else: results, auc = utils.train_model(settings, data, metadata, subject, model_pipe, store_models, load_pickled, verbose, extra_data=Xtra) subject_predictions[subject] = results auc_scores.update({subject: auc}) if parallel: if 'RFE' in settings: raise NotImplementedError('Parallel RFE is not implemented') else: output = joblib.Parallel(n_jobs=parallel)( joblib.delayed(utils.train_model)(settings, data, metadata, subject, model_pipe, store_models, load_pickled, verbose, extra_data=Xtra, parallel=parallel) for subject in subjects) results = [x[0] for x in output] aucs = [x[1] for x in output] for result in results: subject_predictions.update(result) for auc in aucs: auc_scores.update(auc) if save_training_detailed: with open(save_training_detailed, "wb") as fh: pickle.dump(subject_predictions[subject], fh) combined_auc = utils.combined_auc_score(settings, auc_scores, subj_pred=subject_predictions) print( "predicted AUC score over all subjects: {0:.2f}".format(combined_auc)) auc_scores.update({'all': combined_auc}) utils.output_auc_scores(auc_scores, settings) return auc_scores
def main(settingsfname, verbose=False, store_models=True, store_features=False, save_training_detailed=False, load_pickled=False, parallel=0): settings = utils.get_settings(settingsfname) utils.print_verbose('=== Settings file ===', flag=verbose) utils.print_verbose(settingsfname, flag=verbose) utils.print_verbose('=== Settings loaded ===', flag=verbose) utils.print_verbose(settings, flag=verbose) utils.print_verbose('=======================', flag=verbose) subjects = settings['SUBJECTS'] data = utils.get_data(settings, verbose=verbose) metadata = utils.get_metadata() features_that_parsed = [feature for feature in settings['FEATURES'] if feature in list(data.keys())] settings['FEATURES'] = features_that_parsed if not settings['FEATURES']: raise EnvironmentError('No features could be loaded') utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose) model_pipe = utils.build_model_pipe(settings) utils.print_verbose("=== Model Used ===\n" "{0}\n==================".format(model_pipe), flag=verbose) # dictionary to store results subject_predictions = {} # dictionary to store features in transformed_features = {} # if we're loading pickled features then load them if load_pickled: if isinstance(load_pickled, str): with open(load_pickled, "rb") as fh: Xtra = pickle.load(fh) else: with open(settingsfname.split(".")[0] + "_feature_dump.pickle", "rb") as fh: Xtra = pickle.load(fh) else: Xtra = None # dictionary for final scores auc_scores = {} if not parallel: for subject in subjects: utils.print_verbose( "=====Training {0} Model=====".format(str(subject)), flag=verbose) if 'RFE' in settings: transformed_features, auc = utils.train_RFE(settings, data, metadata, subject, model_pipe, transformed_features, store_models, store_features, load_pickled, settingsfname, verbose, extra_data=Xtra) subject_predictions = None elif 'CUSTOM' in settings: results, auc = utils.train_custom_model(settings, data, metadata, subject, model_pipe, store_models, load_pickled, verbose, extra_data=Xtra) subject_predictions[subject] = results else: results, auc = utils.train_model(settings, data, metadata, subject, model_pipe, store_models, load_pickled, verbose, extra_data=Xtra) subject_predictions[subject] = results auc_scores.update({subject: auc}) if parallel: if 'RFE' in settings: raise NotImplementedError('Parallel RFE is not implemented') else: output = joblib.Parallel(n_jobs=parallel)( joblib.delayed(utils.train_model)(settings, data, metadata, subject, model_pipe, store_models, load_pickled, verbose, extra_data=Xtra, parallel=parallel) for subject in subjects) results = [x[0] for x in output] aucs = [x[1] for x in output] for result in results: subject_predictions.update(result) for auc in aucs: auc_scores.update(auc) if save_training_detailed: with open(save_training_detailed, "wb") as fh: pickle.dump(subject_predictions[subject], fh) combined_auc = utils.combined_auc_score(settings, auc_scores, subj_pred=subject_predictions) print( "predicted AUC score over all subjects: {0:.2f}".format(combined_auc)) auc_scores.update({'all': combined_auc}) utils.output_auc_scores(auc_scores, settings) return auc_scores