def segment(): raw_data_dir = os.path.join(BASE_DIR, 'data', 'raw') segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented') if exist(pathname=raw_data_dir): output_data = {} for filename_with_ext in fnmatch.filter(os.listdir(raw_data_dir), '*.txt'): filename, file_ext = os.path.splitext(filename_with_ext) participant, label = filename.split('-') if participant not in output_data: output_data[participant] = {} output_data[participant][label] = { 'sample_rate': PPG_SAMPLE_RATE, 'signal': map( float, load_text(pathname=os.path.join(raw_data_dir, filename_with_ext))), } for participant in output_data: output_filename = '%s.json' % participant dump_json(data=output_data[participant], pathname=os.path.join(segmented_data_dir, output_filename), overwrite=True)
def convert(): splited_data_dir = os.path.join(BASE_DIR, 'data', 'splited') output_data = {} with open('ppg.csv', 'r') as file: reader = csv.reader(file) for row in reader: participant = row[0] signal = row[2:-1] signal_value = [float(s) for s in signal] if participant not in output_data.keys(): output_data[participant] = [] output_data[participant].append(signal_value) feature_data = {} for label, signal_list in output_data.items(): feature_signals = [] for signal in signal_list: smooth_signal = smooth_ppg_signal(signal) sig = extract_ppg45(smooth_signal) if len(sig) != 0: feature_signals.append(sig) if len(feature_signals) != 0: feature_data[label] = feature_signals output_filename = "feature_data.json" dump_json(data=feature_data, pathname=os.path.join(splited_data_dir, output_filename), overwrite=True) return feature_data
def extract(): preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed') extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted') if exist(pathname=preprocessed_data_dir): for filename_with_ext in fnmatch.filter( os.listdir(preprocessed_data_dir), '*.json'): pathname = os.path.join(preprocessed_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for label in json_data: json_data[label]['ppg45'] = [ extract_ppg45( single_waveform=single_waveform, sample_rate=json_data[label]['sample_rate']) for single_waveform in json_data[label]['single_waveforms'] ] json_data[label]['svri'] = [ extract_svri(single_waveform=single_waveform) for single_waveform in json_data[label]['single_waveforms'] ] del json_data[label]['single_waveforms'] dump_json(data=json_data, pathname=os.path.join(extracted_data_dir, filename_with_ext), overwrite=True)
def split(): extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted') splited_data_dir = os.path.join(BASE_DIR, 'data', 'splited') if exist(pathname=extracted_data_dir): for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'): feature_data = { '0': [], '1': [], '2': [], } pathname = os.path.join(extracted_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for session_id in json_data: for block in json_data[session_id]['blocks']: feature_data[str(block['level'])].append({ 'ppg45': block['ppg']['ppg45'], 'ppg45_cr': get_change_ratio(data=block['ppg']['ppg45'], baseline=json_data[session_id]['rest']['ppg']['ppg45']), 'svri': block['ppg']['svri'], 'svri_cr': get_change_ratio(data=block['ppg']['svri'], baseline=json_data[session_id]['rest']['ppg']['svri']), }) output_data = { 'train': { '0': feature_data['0'][:int(len(feature_data['0']) * TRAINING_DATA_RATIO)], '1': feature_data['1'][:int(len(feature_data['1']) * TRAINING_DATA_RATIO)], '2': feature_data['2'][:int(len(feature_data['2']) * TRAINING_DATA_RATIO)], }, 'test': { '0': feature_data['0'][int(len(feature_data['0']) * TRAINING_DATA_RATIO):], '1': feature_data['1'][int(len(feature_data['1']) * TRAINING_DATA_RATIO):], '2': feature_data['2'][int(len(feature_data['2']) * TRAINING_DATA_RATIO):], }, } dump_json(data=output_data, pathname=os.path.join(splited_data_dir, filename_with_ext), overwrite=True)
def extract(): preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed') extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted') if exist(pathname=preprocessed_data_dir): for filename_with_ext in fnmatch.filter( os.listdir(preprocessed_data_dir), '*.json'): pathname = os.path.join(preprocessed_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for session_id in json_data: if json_data[session_id]['rest']['ppg'][ 'single_waveforms'] is not None: json_data[session_id]['rest']['ppg']['ppg45'] = [ extract_ppg45(single_waveform=single_waveform, sample_rate=json_data[session_id] ['rest']['ppg']['sample_rate']) for single_waveform in json_data[session_id] ['rest']['ppg']['single_waveforms'] ] json_data[session_id]['rest']['ppg']['svri'] = [ extract_svri(single_waveform=single_waveform) for single_waveform in json_data[session_id] ['rest']['ppg']['single_waveforms'] ] else: json_data[session_id]['rest']['ppg']['ppg45'] = None json_data[session_id]['rest']['ppg']['svri'] = None del json_data[session_id]['rest']['ppg'][ 'single_waveforms'] for block in json_data[session_id]['blocks']: if block['ppg']['single_waveforms'] is not None: block['ppg']['ppg45'] = [ extract_ppg45( single_waveform=single_waveform, sample_rate=block['ppg']['sample_rate']) for single_waveform in block['ppg'] ['single_waveforms'] ] block['ppg']['svri'] = [ extract_svri(single_waveform=single_waveform) for single_waveform in block['ppg'] ['single_waveforms'] ] else: block['ppg']['ppg45'] = None block['ppg']['svri'] = None del block['ppg']['single_waveforms'] dump_json(data=json_data, pathname=os.path.join(extracted_data_dir, filename_with_ext), overwrite=True)
def preprocess(): segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented') preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed') if exist(pathname=segmented_data_dir): for filename_with_ext in fnmatch.filter(os.listdir(segmented_data_dir), '*.json'): pathname = os.path.join(segmented_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for label in json_data: json_data[label]['single_waveforms'] = extract_ppg_single_waveform(signal=smooth_ppg_signal(signal=json_data[label]['signal'], sample_rate=json_data[label]['sample_rate'])) del json_data[label]['signal'] dump_json(data=json_data, pathname=os.path.join(preprocessed_data_dir, filename_with_ext), overwrite=True)
def subject_independent(): extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted') subject_independent_data_dir = os.path.join(BASE_DIR, 'data', 'subject_independent') if exist(pathname=extracted_data_dir): all_subject_data = {} for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'): subject = os.path.splitext(filename_with_ext)[0] feature_data = { '0': [], '1': [], '2': [], } pathname = os.path.join(extracted_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for session_id in json_data: for block in json_data[session_id]['blocks']: feature_data[str(block['level'])].append({ 'ppg45': block['ppg']['ppg45'], 'ppg45_cr': get_change_ratio(data=block['ppg']['ppg45'], baseline=json_data[session_id]['rest']['ppg']['ppg45']), 'svri': block['ppg']['svri'], 'svri_cr': get_change_ratio(data=block['ppg']['svri'], baseline=json_data[session_id]['rest']['ppg']['svri']), 'average_skin_conductance_level': block['skin_conductance']['average_level'], 'average_skin_conductance_level_cr': get_change_ratio(data=block['skin_conductance']['average_level'], baseline=json_data[session_id]['rest']['skin_conductance']['average_level']), 'minimum_skin_conductance_level': block['skin_conductance']['minimum_level'], 'minimum_skin_conductance_level_cr': get_change_ratio(data=block['skin_conductance']['minimum_level'], baseline=json_data[session_id]['rest']['skin_conductance']['minimum_level']), 'average_rri': block['ecg']['average_rri'], 'average_rri_cr': get_change_ratio(data=block['ecg']['average_rri'], baseline=json_data[session_id]['rest']['ecg']['average_rri']), 'rmssd': block['ecg']['rmssd'], 'rmssd_cr': get_change_ratio(data=block['ecg']['rmssd'], baseline=json_data[session_id]['rest']['ecg']['rmssd']), 'lf_hrv_power': block['ecg']['lf_hrv_power'], 'lf_hrv_power_cr': get_change_ratio(data=block['ecg']['lf_hrv_power'], baseline=json_data[session_id]['rest']['ecg']['lf_hrv_power']), 'hf_hrv_power': block['ecg']['hf_hrv_power'], 'hf_hrv_power_cr': get_change_ratio(data=block['ecg']['hf_hrv_power'], baseline=json_data[session_id]['rest']['ecg']['hf_hrv_power']), }) all_subject_data[subject] = feature_data for subject in all_subject_data: output_data = { 'train': reduce(lambda feature_data_1, feature_data_2: merge(feature_data_1, feature_data_2), [all_subject_data[participant] for participant in all_subject_data if participant != subject]), 'test': { '0': all_subject_data[subject]['0'][int(len(all_subject_data[subject]['0']) * TRAINING_DATA_RATIO):], '1': all_subject_data[subject]['1'][int(len(all_subject_data[subject]['1']) * TRAINING_DATA_RATIO):], '2': all_subject_data[subject]['2'][int(len(all_subject_data[subject]['2']) * TRAINING_DATA_RATIO):], }, } dump_json(data=output_data, pathname=os.path.join(subject_independent_data_dir, '%s.json' % subject), overwrite=True)
def segment(): raw_ppg_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'ppg') segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented') if exist(pathname=raw_ppg_data_dir): output_data = {} for filename_with_ext in fnmatch.filter(os.listdir(raw_ppg_data_dir), '*.txt'): filename, file_ext = os.path.splitext(filename_with_ext) if filename.endswith('-rest'): participant, session_id, block_id = filename.split('-') if participant not in output_data: output_data[participant] = {} if session_id not in output_data[participant]: output_data[participant][session_id] = { 'rest': { 'ppg': {}, }, 'blocks': [], } output_data[participant][session_id]['rest']['ppg']['sample_rate'] = PPG_SAMPLE_RATE output_data[participant][session_id]['rest']['ppg']['signal'] = map(float, load_text(pathname=os.path.join(raw_ppg_data_dir, filename_with_ext))) else: participant, session_id, block_id, task_level = filename.split('-') if participant not in output_data: output_data[participant] = {} if session_id not in output_data[participant]: output_data[participant][session_id] = { 'rest': { 'ppg': {}, }, 'blocks': [], } output_data[participant][session_id]['blocks'].append({ 'level': task_level, 'ppg': { 'sample_rate': PPG_SAMPLE_RATE, 'signal': map(float, load_text(pathname=os.path.join(raw_ppg_data_dir, filename_with_ext))), }, }) for participant in output_data: output_filename = '%s.json' % participant dump_json(data=output_data[participant], pathname=os.path.join(segmented_data_dir, output_filename), overwrite=True)
def preprocess(): segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented') preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed') if exist(pathname=segmented_data_dir): for filename_with_ext in fnmatch.filter(os.listdir(segmented_data_dir), '*.json'): pathname = os.path.join(segmented_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for session_id in json_data: if json_data[session_id]['rest']['ppg'][ 'signal'] is not None: json_data[session_id]['rest']['ppg'][ 'single_waveforms'] = extract_ppg_single_waveform( signal=smooth_ppg_signal( signal=json_data[session_id]['rest']['ppg'] ['signal'], sample_rate=json_data[session_id]['rest'] ['ppg']['sample_rate'])) else: json_data[session_id]['rest']['ppg'][ 'single_waveforms'] = None del json_data[session_id]['rest']['ppg']['signal'] for block in json_data[session_id]['blocks']: if block['ppg']['signal'] is not None: block['ppg'][ 'single_waveforms'] = extract_ppg_single_waveform( signal=smooth_ppg_signal( signal=block['ppg']['signal'], sample_rate=block['ppg'] ['sample_rate'])) else: block['ppg']['single_waveforms'] = None del block['ppg']['signal'] dump_json(data=json_data, pathname=os.path.join(preprocessed_data_dir, filename_with_ext), overwrite=True)
def classify(): splited_data_dir = os.path.join(BASE_DIR, 'data', 'splited') model_dir = os.path.join(BASE_DIR, 'models') result_dir = os.path.join(BASE_DIR, 'results') label_sets = [ ['pre', 'in'], ] feature_type_sets = [ ['ppg45', 'svri'], ['ppg45'], ['svri'], ] classifiers = [ ( 'logistic_regression', logistic_regression_classifier, ), # ('support_vector', support_vector_classifier, ), ( 'gaussian_naive_bayes', gaussian_naive_bayes_classifier, ), ( 'decision_tree', decision_tree_classifier, ), ( 'random_forest', random_forest_classifier, ), ( 'adaboost', adaboost_classifier, ), ( 'gradient_boosting', gradient_boosting_classifier, ), ( 'voting', voting_classifier, ), # voting classifier has to be the LAST item in the list ] if exist(pathname=splited_data_dir): result_data = {} for filename_with_ext in fnmatch.filter(os.listdir(splited_data_dir), '*.json'): participant = os.path.splitext(filename_with_ext)[0] pathname = os.path.join(splited_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for label_set in label_sets: label_set_name = '-'.join(label_set) if label_set_name not in result_data: result_data[label_set_name] = {} for feature_type_set in feature_type_sets: feature_type_set_name = '-'.join(feature_type_set) if feature_type_set_name not in result_data[ label_set_name]: result_data[label_set_name][ feature_type_set_name] = {} train_features, train_labels, test_features, test_labels = get_feature_set( data=json_data, label_set=label_set, feature_type_set=feature_type_set) estimators = [] for classifier_name, classifier_object in classifiers: if classifier_name not in result_data[ label_set_name][feature_type_set_name]: result_data[label_set_name][ feature_type_set_name][ classifier_name] = {} model_pathname = os.path.join( model_dir, label_set_name, feature_type_set_name, classifier_name, '%s.model' % participant) classifier = load_model(pathname=model_pathname) if classifier is None: if classifier_name == 'voting': classifier = classifier_object( estimators=estimators, features=train_features, labels=train_labels) else: classifier = classifier_object( features=train_features, labels=train_labels) dump_model(model=classifier, pathname=model_pathname) score = classifier.score(test_features, test_labels) print participant, score, label_set_name, feature_type_set_name, classifier_name result_data[label_set_name][feature_type_set_name][ classifier_name][participant] = score # prepare estimators for the training of voting classifier if classifier_name != 'voting': if hasattr(classifier, 'best_estimator_'): estimators.append(( classifier_name, classifier.best_estimator_, )) else: estimators.append(( classifier_name, classifier, )) for label_set_name in result_data: dump_json(data=result_data[label_set_name], pathname=os.path.join(result_dir, '%s.json' % label_set_name), overwrite=True) csv_data = [] for feature_type_set in feature_type_sets: feature_type_set_name = '-'.join(feature_type_set) csv_row = { 'feature_set': feature_type_set_name, } for classifier_name in result_data[label_set_name][ feature_type_set_name]: csv_row[classifier_name] = sum( result_data[label_set_name][feature_type_set_name] [classifier_name].values()) / len( result_data[label_set_name][feature_type_set_name] [classifier_name]) csv_data.append(csv_row) fieldnames = ['feature_set'] + [val[0] for val in classifiers] export_csv(data=csv_data, fieldnames=fieldnames, pathname=os.path.join(result_dir, '%s.csv' % label_set_name), overwrite=True)
def merge(): extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted') merged_data_dir = os.path.join(BASE_DIR, 'data', 'merged') if exist(pathname=extracted_data_dir): for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'): output_data = { '0': [], '1': [], '2': [], } pathname = os.path.join(extracted_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for session_id in json_data: for block in json_data[session_id]['blocks']: output_data[str(block['level'])].append({ 'ppg45': block['ppg']['ppg45'], 'ppg45_cr': get_change_ratio(data=block['ppg']['ppg45'], baseline=json_data[session_id] ['rest']['ppg']['ppg45']), 'svri': block['ppg']['svri'], 'svri_cr': get_change_ratio(data=block['ppg']['svri'], baseline=json_data[session_id] ['rest']['ppg']['svri']), 'average_skin_conductance_level': block['skin_conductance']['average_level'], 'average_skin_conductance_level_cr': get_change_ratio( data=block['skin_conductance'] ['average_level'], baseline=json_data[session_id]['rest'] ['skin_conductance']['average_level']), 'minimum_skin_conductance_level': block['skin_conductance']['minimum_level'], 'minimum_skin_conductance_level_cr': get_change_ratio( data=block['skin_conductance'] ['minimum_level'], baseline=json_data[session_id]['rest'] ['skin_conductance']['minimum_level']), 'average_rri': block['ecg']['average_rri'], 'average_rri_cr': get_change_ratio(data=block['ecg']['average_rri'], baseline=json_data[session_id] ['rest']['ecg']['average_rri']), 'rmssd': block['ecg']['rmssd'], 'rmssd_cr': get_change_ratio(data=block['ecg']['rmssd'], baseline=json_data[session_id] ['rest']['ecg']['rmssd']), 'lf_hrv_power': block['ecg']['lf_hrv_power'], 'lf_hrv_power_cr': get_change_ratio(data=block['ecg']['lf_hrv_power'], baseline=json_data[session_id] ['rest']['ecg']['lf_hrv_power']), 'hf_hrv_power': block['ecg']['hf_hrv_power'], 'hf_hrv_power_cr': get_change_ratio(data=block['ecg']['hf_hrv_power'], baseline=json_data[session_id] ['rest']['ecg']['hf_hrv_power']), }) dump_json(data=output_data, pathname=os.path.join(merged_data_dir, filename_with_ext), overwrite=True)
def segment(): raw_meta_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'meta') raw_ppg_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'ppg') raw_biopac_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'biopac') segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented') output_data = {} completeness = {} # Meta data if exist(pathname=raw_meta_data_dir): for filename_with_ext in fnmatch.filter(os.listdir(raw_meta_data_dir), '*.json'): filename, file_ext = os.path.splitext(filename_with_ext) participant, session_id = filename.split('-') if participant not in output_data: output_data[participant] = {} if participant not in completeness: completeness[participant] = True output_data[participant][session_id] = {'rest': {}, 'blocks': []} pathname = os.path.join(raw_meta_data_dir, filename_with_ext) raw_json_data = load_json(pathname=pathname) if raw_json_data is not None: output_data[participant][session_id]['rest'] = { 'start_time': parse_iso_time_string( raw_json_data['rest_start_timestamp']), 'ppg': { 'sample_rate': None, 'signal': None, }, 'ecg': { 'sample_rate': None, 'signal': None, }, 'skin_conductance': { 'sample_rate': None, 'signal': None, }, } for block in raw_json_data['blocks']: if block['stimuli'][0]['timestamp']['load'] is None: print 'Skip one invalid block in \'%s\'.' % filename_with_ext completeness[participant] = False continue output_data[participant][session_id]['blocks'].append({ 'level': block['level'], 'rsme': int(block['rsme']), 'start_time': parse_iso_time_string( block['stimuli'][0]['timestamp']['load']), 'stimuli': [{ 'stimulus': item['stimulus'], 'is_target': item['is_target'], 'answer': item['answer'], 'correct': item['correct'], 'response_time': item['response_time'], } for item in block['stimuli']], 'ppg': { 'sample_rate': None, 'signal': None, }, 'ecg': { 'sample_rate': None, 'signal': None, }, 'skin_conductance': { 'sample_rate': None, 'signal': None, }, }) # PPG data if exist(pathname=raw_ppg_data_dir): for filename_with_ext in fnmatch.filter(os.listdir(raw_ppg_data_dir), '*.txt'): filename, file_ext = os.path.splitext(filename_with_ext) participant, session_id, time_str = filename.split('-') raw_ppg_data_start_time = datetime( *[int(item) for item in time_str.split('_')]) if participant not in output_data or session_id not in output_data[ participant]: completeness[participant] = False continue if raw_ppg_data_start_time > output_data[participant][session_id][ 'rest']['start_time']: print 'Recoding data started too late in \'%s\': %s > %s' % ( filename_with_ext, raw_ppg_data_start_time, output_data[participant][session_id]['rest']['start_time']) pathname = os.path.join(raw_ppg_data_dir, filename_with_ext) raw_ppg_data = load_text(pathname=pathname) if raw_ppg_data is not None: raw_ppg_data = map(float, raw_ppg_data) tdelta = output_data[participant][session_id]['rest'][ 'start_time'] - raw_ppg_data_start_time if tdelta.total_seconds() < 0: print 'Skip \'rest\' PPG data.' completeness[participant] = False continue start_index = int(tdelta.total_seconds() * PPG_SAMPLE_RATE) length = REST_DURATION * PPG_SAMPLE_RATE end_index = start_index + length ppg_data = raw_ppg_data[start_index:end_index] if len(ppg_data) < length: print 'Not enough \'rest\' PPG data (%s < %s). Skip.' % ( len(ppg_data), length) completeness[participant] = False continue output_data[participant][session_id]['rest']['ppg'][ 'sample_rate'] = PPG_SAMPLE_RATE output_data[participant][session_id]['rest']['ppg'][ 'signal'] = ppg_data for block in output_data[participant][session_id]['blocks']: tdelta = block['start_time'] - raw_ppg_data_start_time if tdelta.total_seconds() < 0: print 'Skip one block PPG data.' completeness[participant] = False continue start_index = int(tdelta.total_seconds() * PPG_SAMPLE_RATE) length = BLOCK_DURATION * PPG_SAMPLE_RATE end_index = start_index + length ppg_data = raw_ppg_data[start_index:end_index] if len(ppg_data) < length: print 'Not enough one block PPG data (%s < %s). Skip.' % ( len(ppg_data), length) completeness[participant] = False continue block['ppg']['sample_rate'] = PPG_SAMPLE_RATE block['ppg']['signal'] = ppg_data # BIOPAC data if exist(pathname=raw_biopac_data_dir): for filename_with_ext in fnmatch.filter( os.listdir(raw_biopac_data_dir), '*.txt'): filename, file_ext = os.path.splitext(filename_with_ext) participant, session_id, seconds_str = filename.split('-') pre_tdelta = timedelta(seconds=int(seconds_str)) if participant not in output_data or session_id not in output_data[ participant]: completeness[participant] = False continue pathname = os.path.join(raw_biopac_data_dir, filename_with_ext) raw_biopac_data = load_text(pathname=pathname) if raw_biopac_data is not None: sample_rate = 1000 / int( raw_biopac_data[BIOPAC_MSEC_PER_SAMPLE_LINE_NUM - 1].split(' ')[0].strip()) raw_ecg_data = [ float(line.split('\t')[BIOPAC_ECG_CHANNEL].strip()) for line in raw_biopac_data[BIOPAC_HEADER_LINES:] ] raw_skin_conductance_data = [ float( line.split('\t') [BIOPAC_SKIN_CONDUCTANCE_CHANNEL].strip()) for line in raw_biopac_data[BIOPAC_HEADER_LINES:] ] tdelta = pre_tdelta if tdelta.total_seconds() < 0: print 'Skip \'rest\' ECG/skin conductance data.' completeness[participant] = False continue start_index = int(tdelta.total_seconds() * sample_rate) length = REST_DURATION * sample_rate end_index = start_index + length ecg_data = raw_ecg_data[start_index:end_index] skin_conductance_data = raw_skin_conductance_data[ start_index:end_index] if len(ecg_data) < length: print 'Not enough \'rest\' ECG/skin conductance data (%s < %s). Skip.' % ( len(ecg_data), length) completeness[participant] = False continue output_data[participant][session_id]['rest']['ecg'][ 'sample_rate'] = sample_rate output_data[participant][session_id]['rest']['ecg'][ 'signal'] = ecg_data output_data[participant][session_id]['rest'][ 'skin_conductance']['sample_rate'] = sample_rate output_data[participant][session_id]['rest'][ 'skin_conductance']['signal'] = skin_conductance_data for block in output_data[participant][session_id]['blocks']: tdelta = block['start_time'] - output_data[participant][ session_id]['rest']['start_time'] + pre_tdelta if tdelta.total_seconds() < 0: print 'Skip one block ECG/skin conductance data.' completeness[participant] = False continue start_index = int(tdelta.total_seconds() * sample_rate) length = BLOCK_DURATION * sample_rate end_index = start_index + length ecg_data = raw_ecg_data[start_index:end_index] skin_conductance_data = raw_skin_conductance_data[ start_index:end_index] if len(ecg_data) < length: print 'Not enough one block ECG/skin conductance data (%s < %s). Skip.' % ( len(ecg_data), length) completeness[participant] = False continue block['ecg']['sample_rate'] = sample_rate block['ecg']['signal'] = ecg_data block['skin_conductance']['sample_rate'] = sample_rate block['skin_conductance']['signal'] = skin_conductance_data # Clean up time data for participant in output_data: for session_id in output_data[participant]: del output_data[participant][session_id]['rest']['start_time'] for block in output_data[participant][session_id]['blocks']: del block['start_time'] # Save segmented signal data for participant in output_data: output_filename = '%s.json' % participant if completeness[participant] and len( output_data[participant]) == TOTAL_SESSION_NUM: dump_json(data=output_data[participant], pathname=os.path.join(segmented_data_dir, output_filename), overwrite=True) else: dump_json(data=output_data[participant], pathname=os.path.join(segmented_data_dir, 'incomplete', output_filename), overwrite=True)
def extract(): preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed') extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted') if exist(pathname=preprocessed_data_dir): for filename_with_ext in fnmatch.filter( os.listdir(preprocessed_data_dir), '*.json'): pathname = os.path.join(preprocessed_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for session_id in json_data: if json_data[session_id]['rest']['ppg'][ 'single_waveforms'] is not None: json_data[session_id]['rest']['ppg']['ppg45'] = [ extract_ppg45(single_waveform=single_waveform, sample_rate=json_data[session_id] ['rest']['ppg']['sample_rate']) for single_waveform in json_data[session_id] ['rest']['ppg']['single_waveforms'] ] json_data[session_id]['rest']['ppg']['svri'] = [ extract_svri(single_waveform=single_waveform) for single_waveform in json_data[session_id] ['rest']['ppg']['single_waveforms'] ] else: json_data[session_id]['rest']['ppg']['ppg45'] = None json_data[session_id]['rest']['ppg']['svri'] = None del json_data[session_id]['rest']['ppg'][ 'single_waveforms'] if json_data[session_id]['rest']['skin_conductance'][ 'signal'] is not None: json_data[session_id]['rest']['skin_conductance'][ 'average_level'] = extract_average_skin_conductance_level( signal=json_data[session_id]['rest'] ['skin_conductance']['signal']) json_data[session_id]['rest']['skin_conductance'][ 'minimum_level'] = extract_minimum_skin_conductance_level( signal=json_data[session_id]['rest'] ['skin_conductance']['signal']) else: json_data[session_id]['rest']['skin_conductance'][ 'average_level'] = None json_data[session_id]['rest']['skin_conductance'][ 'minimum_level'] = None del json_data[session_id]['rest']['skin_conductance'][ 'signal'] if json_data[session_id]['rest']['ecg']['rri'] is not None: json_data[session_id]['rest']['ecg'][ 'average_rri'] = extract_average_rri( rri=json_data[session_id]['rest']['ecg'] ['rri']) json_data[session_id]['rest']['ecg'][ 'rmssd'] = extract_rmssd(rri=json_data[session_id] ['rest']['ecg']['rri']) mf_hrv_power, hf_hrv_power = extract_hrv_power( rri=json_data[session_id]['rest']['ecg'] ['rri_interpolated'], sample_rate=json_data[session_id]['rest']['ecg'] ['sample_rate']) json_data[session_id]['rest']['ecg'][ 'mf_hrv_power'] = mf_hrv_power json_data[session_id]['rest']['ecg'][ 'hf_hrv_power'] = hf_hrv_power else: json_data[session_id]['rest']['ecg'][ 'average_rri'] = None json_data[session_id]['rest']['ecg']['rmssd'] = None json_data[session_id]['rest']['ecg'][ 'mf_hrv_power'] = None json_data[session_id]['rest']['ecg'][ 'hf_hrv_power'] = None del json_data[session_id]['rest']['ecg']['rri'] del json_data[session_id]['rest']['ecg'][ 'rri_interpolated'] for block in json_data[session_id]['blocks']: if block['ppg']['single_waveforms'] is not None: block['ppg']['ppg45'] = [ extract_ppg45( single_waveform=single_waveform, sample_rate=block['ppg']['sample_rate']) for single_waveform in block['ppg'] ['single_waveforms'] ] block['ppg']['svri'] = [ extract_svri(single_waveform=single_waveform) for single_waveform in block['ppg'] ['single_waveforms'] ] else: block['ppg']['ppg45'] = None block['ppg']['svri'] = None del block['ppg']['single_waveforms'] if block['skin_conductance']['signal'] is not None: block['skin_conductance'][ 'average_level'] = extract_average_skin_conductance_level( signal=block['skin_conductance']['signal']) block['skin_conductance'][ 'minimum_level'] = extract_minimum_skin_conductance_level( signal=block['skin_conductance']['signal']) else: block['skin_conductance']['average_level'] = None block['skin_conductance']['minimum_level'] = None del block['skin_conductance']['signal'] if block['ecg']['rri'] is not None: block['ecg']['average_rri'] = extract_average_rri( rri=block['ecg']['rri']) block['ecg']['rmssd'] = extract_rmssd( rri=block['ecg']['rri']) mf_hrv_power, hf_hrv_power = extract_hrv_power( rri=block['ecg']['rri_interpolated'], sample_rate=block['ecg']['sample_rate']) block['ecg']['mf_hrv_power'] = mf_hrv_power block['ecg']['hf_hrv_power'] = hf_hrv_power else: block['ecg']['average_rri'] = None block['ecg']['rmssd'] = None block['ecg']['mf_hrv_power'] = None block['ecg']['hf_hrv_power'] = None del block['ecg']['rri'] del block['ecg']['rri_interpolated'] dump_json(data=json_data, pathname=os.path.join(extracted_data_dir, filename_with_ext), overwrite=True)
def select_feature(): merged_data_dir = os.path.join(BASE_DIR, 'data', 'merged') model_dir = os.path.join(BASE_DIR, 'models', 'feature_selection') result_dir = os.path.join(BASE_DIR, 'results', 'feature_selection') level_sets = [ ['0', '2'], ['0', '1'], ['1', '2'], ] feature_type_sets = [ ['ppg45_cr'], ] if exist(pathname=merged_data_dir): result_data = {} for filename_with_ext in fnmatch.filter(os.listdir(merged_data_dir), '*.json'): participant = os.path.splitext(filename_with_ext)[0] pathname = os.path.join(merged_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for level_set in level_sets: level_set_name = '-'.join(level_set) if level_set_name not in result_data: result_data[level_set_name] = {} for feature_type_set in feature_type_sets: feature_type_set_name = '-'.join(feature_type_set) if feature_type_set_name not in result_data[ level_set_name]: result_data[level_set_name][ feature_type_set_name] = { 'grid_scores': {}, } features, labels = get_merged_feature_set( data=json_data, level_set=level_set, feature_type_set=feature_type_set) model_pathname = os.path.join(model_dir, level_set_name, feature_type_set_name, '%s.model' % participant) classifier = load_model(pathname=model_pathname) if classifier is None: classifier = feature_selection_classifier( features=features, labels=labels) dump_model(model=classifier, pathname=model_pathname) print(participant, level_set_name, feature_type_set_name) result_data[level_set_name][feature_type_set_name][ 'grid_scores'][ participant] = classifier.grid_scores_.tolist( ) for level_set_name in result_data: dump_json(data=result_data[level_set_name], pathname=os.path.join(result_dir, '%s.json' % level_set_name), overwrite=True) for feature_type_set in feature_type_sets: feature_type_set_name = '-'.join(feature_type_set) csv_data = [] all_grid_scores = [] for participant in result_data[level_set_name][ feature_type_set_name]['grid_scores']: csv_row = { 'participant': participant, } grid_scores = result_data[level_set_name][ feature_type_set_name]['grid_scores'][participant] all_grid_scores.append(grid_scores) for score_index, score in list(enumerate(grid_scores)): csv_row[str(score_index + 1)] = score csv_data.append(csv_row) csv_row = { 'participant': 'average', } for scores_index, scores in list( enumerate([ list(x) for x in zip(*[ grid_scores for grid_scores in all_grid_scores ]) ])): csv_row[str(scores_index + 1)] = sum(scores) / len(scores) csv_data.append(csv_row) fieldnames = ['participant'] + [ str(x) for x in list( range( 1, len(result_data[level_set_name] [feature_type_set_name]['grid_scores'] [participant]) + 1)) ] export_csv(data=csv_data, fieldnames=fieldnames, pathname=os.path.join( result_dir, feature_type_set_name, '%s-grid_scores.csv' % level_set_name), overwrite=True)
def stats(): extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted') stats_data_dir = os.path.join(BASE_DIR, 'data', 'stats') result_dir = os.path.join(BASE_DIR, 'results') fieldnames = [ 'name', 's1_l0_rsme', 's1_l0_correct_rate', 's1_l0_svri', 's1_l0_minimum_skin_conductance_level', 's1_l0_average_skin_conductance_level', 's1_l0_average_rri', 's1_l0_rmssd', 's1_l0_lf_hrv_power', 's1_l0_hf_hrv_power', 's1_l1_rsme', 's1_l1_correct_rate', 's1_l1_svri', 's1_l1_minimum_skin_conductance_level', 's1_l1_average_skin_conductance_level', 's1_l1_average_rri', 's1_l1_rmssd', 's1_l1_lf_hrv_power', 's1_l1_hf_hrv_power', 's1_l2_rsme', 's1_l2_correct_rate', 's1_l2_svri', 's1_l2_minimum_skin_conductance_level', 's1_l2_average_skin_conductance_level', 's1_l2_average_rri', 's1_l2_rmssd', 's1_l2_lf_hrv_power', 's1_l2_hf_hrv_power', 's2_l0_rsme', 's2_l0_correct_rate', 's2_l0_svri', 's2_l0_minimum_skin_conductance_level', 's2_l0_average_skin_conductance_level', 's2_l0_average_rri', 's2_l0_rmssd', 's2_l0_lf_hrv_power', 's2_l0_hf_hrv_power', 's2_l1_rsme', 's2_l1_correct_rate', 's2_l1_svri', 's2_l1_minimum_skin_conductance_level', 's2_l1_average_skin_conductance_level', 's2_l1_average_rri', 's2_l1_rmssd', 's2_l1_lf_hrv_power', 's2_l1_hf_hrv_power', 's2_l2_rsme', 's2_l2_correct_rate', 's2_l2_svri', 's2_l2_minimum_skin_conductance_level', 's2_l2_average_skin_conductance_level', 's2_l2_average_rri', 's2_l2_rmssd', 's2_l2_lf_hrv_power', 's2_l2_hf_hrv_power', ] if exist(pathname=extracted_data_dir): csv_data = [] for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'): participant = os.path.splitext(filename_with_ext)[0] output_data = {} csv_row = { 'name': participant, } pathname = os.path.join(extracted_data_dir, filename_with_ext) json_data = load_json(pathname=pathname) if json_data is not None: for session_id in json_data: if session_id not in output_data: output_data[session_id] = {} for block in json_data[session_id]['blocks']: correct_count = sum([item['correct'] for item in block['stimuli'] if item['correct'] is not None]) stimuli_count = len(block['stimuli']) correct_rate = correct_count / stimuli_count svri = np.mean(block['ppg']['svri']) output_data[session_id][block['level']] = { 'rsme': block['rsme'], 'correct_count': correct_count, 'stimuli_count': stimuli_count, 'correct_rate': correct_rate, 'svri': svri, 'minimum_skin_conductance_level': block['skin_conductance']['minimum_level'], 'average_skin_conductance_level': block['skin_conductance']['average_level'], 'average_rri': block['ecg']['average_rri'], 'rmssd': block['ecg']['rmssd'], 'lf_hrv_power': block['ecg']['lf_hrv_power'], 'hf_hrv_power': block['ecg']['hf_hrv_power'], } csv_row['s%s_l%s_rsme' % (session_id, block['level'])] = block['rsme'] csv_row['s%s_l%s_correct_rate' % (session_id, block['level'])] = correct_rate csv_row['s%s_l%s_svri' % (session_id, block['level'])] = svri csv_row['s%s_l%s_minimum_skin_conductance_level' % (session_id, block['level'])] = block['skin_conductance']['minimum_level'] csv_row['s%s_l%s_average_skin_conductance_level' % (session_id, block['level'])] = block['skin_conductance']['average_level'] csv_row['s%s_l%s_average_rri' % (session_id, block['level'])] = block['ecg']['average_rri'] csv_row['s%s_l%s_rmssd' % (session_id, block['level'])] = block['ecg']['rmssd'] csv_row['s%s_l%s_lf_hrv_power' % (session_id, block['level'])] = block['ecg']['lf_hrv_power'] csv_row['s%s_l%s_hf_hrv_power' % (session_id, block['level'])] = block['ecg']['hf_hrv_power'] dump_json(data=output_data, pathname=os.path.join(stats_data_dir, filename_with_ext), overwrite=True) csv_data.append(csv_row) export_csv(data=csv_data, fieldnames=fieldnames, pathname=os.path.join(result_dir, 'stats.csv'), overwrite=True)