def test_join(self): linux = 'path/to/////folder/is//' windows = 'path\\to\\\\folder\\is' add = 'tesfile.txt' should_be = 'path/to/folder/is/tesfile.txt' self.assertEqual(ospath.join(linux, add), should_be) self.assertEqual(ospath.join(windows, add), should_be) lead_slash = '/path/to/folder/is' joined = ospath.join(lead_slash, lead_slash) self.assertEqual(joined, '/path/to/folder/is/path/to/folder/is')
def test_list_folders(self): path = ospath.abspath('.') folder1 = ospath.join(path, 'folder1') + '/' sub1 = ospath.join(path, 'folder1', 'subfolder') + '/' folders = ospath.list_folders(path, subfolders=False, add_parent=False) self.assertEqual(folders, [folder1]) folders = ospath.list_folders(path, subfolders=False, add_parent=True) self.assertEqual(folders, [path, folder1]) folders = ospath.list_folders(path, subfolders=True, add_parent=False) self.assertEqual(folders, [folder1, sub1]) folders = ospath.list_folders(path, subfolders=True, add_parent=True) self.assertEqual(folders, [path, folder1, sub1])
check_matches_unique(matches, not_matched) # now we create the csv_string that we will write to a file: lines = ['#Patient Name; Patient Code; Patient Gender; Patient Age; Control Name; Control Code; Control Gender; Control Age; Difference'] for diff, match_i in enumerate(matches): #last one lines += [''] # add empty line before each new age diff section lines += [f'# +-{diff} age difference, {len(match_i)} matchings'] for p_name, c_name in match_i: p_code = mappings[p_name] p_gender = patients_all[p_name]['gender'] p_age = patients_all[p_name]['age'] c_code = mappings[c_name] c_gender = controls_all[c_name]['gender'] c_age = controls_all[c_name]['age'] lines.append(f'{p_name}; {p_code}; {p_gender}; {p_age}; {c_name}; {c_code}; {c_gender}; {c_age}; {diff}') lines += [''] lines += [f'# No match for {len(not_matched)} patients'] lines += [f'{m}; {patients_all[m]["gender"]}; {patients_all[m]["age"]};;;;;;99' for m in not_matched] # now we add all controls that are in the project lines += [''] lines += ['# Already used controls'] for c_name in controls_all: lines += [f'{c_name}; {controls_all[c_name]["gender"]}; {controls_all[c_name]["age"]};;;;;;99'] matching_csv = ospath.join(cfg.documents, 'matching.csv') misc.write_csv(matching_csv, lines)
def to_unisens(edf_file, unisens_folder, overwrite=False, tqdm_desc=None, skip_exist=False): pass # %% create unisens if tqdm_desc is None: tqdm_desc = lambda x: None dtype = np.int16 code = ospath.basename(edf_file)[:-4] folder = ospath.dirname(edf_file) unisens_folder = ospath.join(unisens_folder, code) if skip_exist and ospath.isdir(unisens_folder): return # get all additional files that belong to this EDF add_files = ospath.list_files(folder, patterns=code + '*') u = Patient(unisens_folder, makenew=False, autosave=True, measurementId=code) header = read_edf_header(edf_file) all_labels = header['channels'] u.starttime = header['startdate'] u.timestampStart = header['startdate'].strftime('%Y-%m-%dT%H:%M:%S') u.code = code attribs = misc.get_attribs() u.group = attribs[code].get('group', 'none') u.gender = attribs[code].get('gender', 'none') u.drug_hrv = attribs[code].get('drug_hrv', 0) u.drug_sleep = attribs[code].get('drug_sleep', 0) u.age = attribs[code].get('age', -1) u.match = attribs[code].get('match', '') u.channels = str(', '.join(header['channels'])) u.startsec = (u.starttime.hour * 60 + u.starttime.minute) * 60 + u.starttime.second u.use_offset = 1 # if the ECG/EEG is broken, mark it edfs_ecg_broken = [ p[1] for p in misc.read_csv(cfg.edfs_discard) if p[3] == '1' ] edfs_eeg_broken = [ p[1] for p in misc.read_csv(cfg.edfs_discard) if p[4] == '1' ] # we need to see if the eeg/emg of this file can be used # if one of them is broken we also remove its match from analysis u.ecg_broken = (code in edfs_ecg_broken) or (u.match in edfs_ecg_broken) u.eeg_broken = (code in edfs_eeg_broken) or (u.match in edfs_eeg_broken) # %% #### add ECG ########## ######################## tqdm_desc(f'{code}: Reading ECG') if not 'ECG' in u or overwrite: signals, shead, header = read_edf(edf_file, ch_names=['ECG I'], digital=True, verbose=False) signals[:, 0:2] = np.percentile(signals, 10), np.percentile( signals, 90) # trick for viewer automatic scaling pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max'] dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max'] lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': signals.astype(dtype), 'sampleRate': shead[0]['sample_rate'], 'ch_names': 'ECG', 'lsbValue': lsb, 'baseline': offset, 'unit': 'mV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax } SignalEntry(id='ECG.bin', parent=u).set_data(**attrib) u.sampling_frequency = shead[0]['sample_rate'] u.duration = len(signals.squeeze()) // shead[0]['sample_rate'] u.epochs_signals = signals.shape[1] // int(u.sampling_frequency) // 30 # %%#### add EEG ########## ############################## tqdm_desc(f'{code}: Reading EEG') if not 'EEG' in u or overwrite: chs = sleep_utils.infer_eeg_channels(all_labels) signals, shead, header = read_edf(edf_file, ch_names=chs, digital=True, verbose=False) if isinstance(signals, list): signals = np.atleast_2d(signals[0]) chs = chs[0] # trick for viewer automatic scaling signals[:, 0:2] = np.percentile(signals, 10), np.percentile(signals, 90) pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max'] dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max'] lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': signals.astype(dtype), 'sampleRate': shead[0]['sample_rate'], 'ch_names': chs, 'lsbValue': lsb, 'baseline': offset, 'contentClass': 'EEG', 'unit': 'uV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax } SignalEntry(id='EEG.bin', parent=u).set_data(**attrib) # %%## add EOG ######### ####################### if not 'EOG' in u or overwrite: tqdm_desc(f'{code}: Reading EOG') chs = sleep_utils.infer_eog_channels(all_labels) signals, shead, header = read_edf(edf_file, ch_names=chs, digital=True, verbose=False) if isinstance(signals, list): signals = np.atleast_2d(signals[0]) chs = chs[0] # trick for viewer automatic scaling signals[:, 0:2] = np.percentile(signals, 10), np.percentile(signals, 90) pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max'] dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max'] lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': signals.astype(dtype), 'sampleRate': shead[0]['sample_rate'], 'ch_names': chs, 'lsbValue': 1, 'baseline': 0, 'unit': 'uV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax } SignalEntry(id='EOG.bin', parent=u).set_data(**attrib) # %%#### add EMG ######### if not 'EMG' in u or overwrite: tqdm_desc(f'{code}: Reading EMG') chs = sleep_utils.infer_emg_channels(all_labels) if chs != []: # fix for 888_49272 signals, shead, header = read_edf(edf_file, ch_names=chs, digital=True, verbose=False) if isinstance(signals, list): signals = np.atleast_2d(signals[0]) chs = chs[0] # trick for viewer automatic scaling signals[:, 0:2] = np.percentile(signals, 10), np.percentile(signals, 90) pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max'] dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max'] lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': signals.astype(dtype), 'sampleRate': shead[0]['sample_rate'], 'ch_names': chs, 'lsbValue': 1, 'baseline': 0, 'unit': 'uV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax } SignalEntry(id='EMG.bin', parent=u).set_data(**attrib) ####################################### # %%add Thorax ######### ###################### if not 'thorax' in u or overwrite: tqdm_desc(f'{code}: Reading Thorax') signals, shead, header = read_edf(edf_file, ch_names=['Thorax'], digital=True, verbose=False) # trick for viewer automatic scaling signals[:, 0:2] = np.percentile(signals, 10), np.percentile(signals, 90) pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max'] dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max'] lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': signals.astype(dtype), 'sampleRate': shead[0]['sample_rate'], 'ch_names': 'thorax', 'lsbValue': 1, 'baseline': 0, 'unit': 'uV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax } SignalEntry(id='thorax.bin', parent=u).set_data(**attrib) ####################################### # %% add Body / Lagesensor ######### ######################################## if (not 'body' in u or overwrite) and 'Body' in all_labels: tqdm_desc(f'{code}: Reading Body') signals, shead, header = read_edf(edf_file, ch_names=['Body'], digital=True, verbose=False) signals[:, 0:2] = np.percentile(signals, 10), np.percentile(signals, 90) if np.ptp( signals ) < 10: # we have some weird body positions that we cant decode pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max'] dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max'] comment = 'Lagesensor: 1 = Bauchlage, 2 = aufrecht, 3 = links, 4 = rechts,' \ '5 = aufrecht (Kopfstand), 6 = Rückenlage' lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': signals.astype(dtype), 'sampleRate': shead[0]['sample_rate'], 'ch_names': 'body', 'lsbValue': 1, 'baseline': 0, 'unit': 'uV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax, 'comment': comment } SignalEntry(id='body.bin', parent=u).set_data(**attrib) # %% add annotations ####### ################################ if not 'annotations' in u or overwrite: annotations = header['annotations'] if annotations != []: annot_entry = EventEntry('annotations.csv', parent=u) annotations = [[int(a[0] * 1000), a[2]] for a in annotations] annot_entry.set_data(annotations, sampleRate=1000, typeLength=1, contentClass='Annotation') # %%#### add rest ####### ############################ for file in add_files: # ignore diagnosis files of StanfordStages if file.endswith( ('diagnosis.txt', 'hypnodensity.txt', 'hypnogram.txt')): # pass # %% add arousals elif file.endswith('_arousal.txt'): if 'arousals' in u and not overwrite: continue lines = misc.read_csv(file, convert_nums=True) sdate = u.starttime data = [] for t_arousal, length, _ in lines[4:]: t_arousal = f'{sdate.year}.{sdate.month}.{sdate.day} ' + t_arousal[: 8] t_arousal = datetime.strptime(t_arousal, '%Y.%m.%d %H:%M:%S') epoch = (t_arousal - sdate).seconds // 30 data += [[epoch, length]] arousal_event = EventEntry('arousals.csv', parent=u) arousal_event.set_data( data, comment='Arousal appearance epoch, name is lengths in seconds', sampleRate=1 / 30, contentClass='Arousal', typeLength=1) # %% add hypnogram elif file.endswith('txt'): if 'hypnogram' in u and not overwrite: continue tqdm_desc(f'{code}: Reading Hypnogram') hypno = sleep_utils.read_hypnogram(file) u.epochs_hypno = len(hypno) times = np.arange(len(hypno)) hypno = np.vstack([times, hypno]).T hypno_entry = EventEntry(id='hypnogram.csv', parent=u) hypno_entry.set_data( hypno, comment=f'File: {code}\nSleep stages 30s epochs.', sampleRate=1 / 30, contentClass='Stage', typeLength=1) elif file.endswith('.hypno'): if 'hypnogram_old' in u and not overwrite: continue hypno = sleep_utils.read_hypnogram(file) if not hasattr(u, 'epochs_hypno'): u.epochs_hypno = len(hypno) times = np.arange(len(hypno)) hypno = np.vstack([times, hypno]).T hypno_old_entry = EventEntry(id='hypnogram_old.csv', parent=u) hypno_old_entry.set_data( hypno, comment=f'File: {code}\nSleep stages 30s epochs.', sampleRate=1 / 30, contentClass='Stage', typeLength=1) # %% add features and kubios elif file.endswith('mat'): if 'feats.pkl' in u and not overwrite: continue tqdm_desc(f'{code}: Reading Kubios') mat = loadmat(file) HRV = mat['Res']['HRV'] feats_entry = CustomEntry('feats.pkl', parent=u) feats_entry.set_data( HRV, comment='pickle dump of the kubios created features file', fileType='pickle') wsize = cfg.default_wsize step = cfg.default_step offset = True u.compute_features() u.get_artefacts(wsize=wsize, step=step, offset=True) #%% add RRi tqdm_desc(f'{code}: writing RRi') rri_entry = CustomEntry('RRi.pkl', parent=u) rri_entry.set_data( HRV['Data']['RRi'], comment='raw data of RRi, the interpolated RRs at 4hz', fileType='pickle') rri_entry.sampleRate = 4 # add artefact ############ removed artefact detection and calculated from kubios above # elif file.endswith('npy'): # if 'artefacts' in u and not overwrite: continue # tqdm_desc(f'{code}: Reading artefacts') # art = np.load(file).ravel() # u.epochs_art = len(art)//2 # u.artefact_percentage = np.mean(art) # times = np.arange(len(art)) # art = np.vstack([times, art]).T # artefact_entry = ValuesEntry(id='artefacts.csv', parent=u) # artefact_entry.set_data(art, sampleRate=1/15, dataType='int16') elif file.endswith(('.edf', 'pkl')): pass else: raise Exception(f'unkown file type: {file}') u.save()
@author: skjerns """ import os from sleep import SleepSet import sleep_utils import numpy as np import ospath import config as cfg import matplotlib.pyplot as plt from tqdm import tqdm from multiprocessing import Process, Queue if __name__ == '__main__': ss = SleepSet(cfg.folder_unisens) ss = ss.filter( lambda x: x.duration < 60 * 60 * 11) # only less than 14 hours ss = ss.filter( lambda x: x.group in ['control', 'nt1']) # only less than 14 hours ss = ss.filter(lambda x: np.mean(x.get_artefacts(only_sleeptime=True)) < 0.25) #only take patients with artefact percentage <25% for p in tqdm(ss[:250]): dataset = p.get_attrib('dataset', 'none') saveas = ospath.join(cfg.documents, 'plots', p.group, dataset, p.code + '.jpg') if ospath.exists(saveas): continue p.spectogram(channels=['ecg', 'RRi'], ufreq=2) os.makedirs(os.path.dirname(saveas), exist_ok=True) plt.savefig(saveas) plt.close('all')
def to_unisens(edf_file, unisens_folder, mat_folder, overwrite=False, skip_exist=False): dtype = np.int16 folder = ospath.dirname(edf_file) filename = ospath.basename( edf_file)[:-9] # remove "-nsrr.edf" from filename mnc_info = misc.get_mnc_info() try: attribs = mnc_info[filename.upper().replace(' ', '_')] except: print(f'Info for {filename.upper().replace(" ", "_")} not found') return # get all additional files that belong to this EDF patterns = [filename + '*.xml', filename + '*.sta'] add_files = ospath.list_files(folder, patterns=patterns) if len(add_files) == 0: print(f'No hypnogram for {filename}, skip') return # try to find mat files mat_files = ospath.list_files(mat_folder, patterns=[filename + '-*.mat']) if len(mat_files) == 0: print(f'No matfile found for {filename}') return elif len(mat_files) == 1: mat_file = mat_files[0] else: print(f'too many matching mat files: {mat_files}') return # get the codified version of this file code = misc.codify(filename) unisens_folder = ospath.join(unisens_folder, code) # if this unisens folder exists, skip if requested if skip_exist and ospath.isdir(unisens_folder): return # now create the meta information for the new file try: header = read_edf_header(edf_file) except: repair_file(edf_file) try: header = read_edf_header(edf_file) except Exception as e: print(f'cant load {filename}, broken edf {e}') return channels = header['channels'] chs_eeg = [ch for ch in channels if 'EEG' in ch.upper()] chs = [ch for ch in channels if 'ECG' in ch.upper()] if 'cs_ECG' in chs and len(chs) > 1: chs.remove('cs_ECG') # add metadata for this file u = Patient(unisens_folder, makenew=True, autosave=True, measurementId=code) u.starttime = header['startdate'] u.timestampStart = header['startdate'].strftime('%Y-%m-%dT%H:%M:%S') u.code = code u.duration = header['Duration'] u.dataset = 'mnc' u.channels = str(', '.join(channels)) u.startsec = (u.starttime.hour * 60 + u.starttime.minute) * 60 + u.starttime.second if u.startsec == 0: print(edf_file) u.DQ0602 = attribs['DQ0602'] u.hypocretin = attribs['CSF hypocretin-1'] u.label = attribs['Label'] u.cohort = attribs['Cohort'] u.use_offset = 0 u.gender = 'unknown' u.match = None diagnosis = attribs['Diagnosis'] if 'CONTROL' in diagnosis: group = 'control' elif 'T1' in diagnosis: group = 'nt1' elif 'OTHER HYPERSOMNIA' in diagnosis: group = 'hypersomnia' else: group = attribs['Diagnosis'] raise AttributeError(f'unkown group: {group} for {filename}') u.group = group # %% Add ECG channel if not 'ecg' in u or overwrite: # add the original ECG channel sig_orig, shead_orig, _ = read_edf(edf_file, ch_names=chs[0], verbose=False, digital=True) assert sig_orig.max() <= 32767 and sig_orig.min( ) >= -32768, 'min/max exceeds int16' pmin, pmax = shead_orig[0]['physical_min'], shead_orig[0][ 'physical_max'] dmin, dmax = shead_orig[0]['digital_min'], shead_orig[0]['digital_max'] lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': sig_orig.astype(dtype), 'sampleRate': shead_orig[0]['sample_rate'], 'ch_names': 'ECG', 'lsbValue': lsb, 'baseline': offset, 'unit': 'mV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax } u.sampling_frequency = shead_orig[0]['sample_rate'] SignalEntry(id='ECG.bin', parent=u).set_data(**attrib) if (not 'eeg' in u or overwrite) and len(chs_eeg) > 0: # add the original ECG channel sig_orig, shead_orig, _ = read_edf(edf_file, ch_names=chs_eeg[0], verbose=False, digital=True) assert sig_orig.max() <= 32767 and sig_orig.min( ) >= -32768, 'min/max exceeds int16' pmin, pmax = shead_orig[0]['physical_min'], shead_orig[0][ 'physical_max'] dmin, dmax = shead_orig[0]['digital_min'], shead_orig[0]['digital_max'] lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax) attrib = { 'data': sig_orig.astype(dtype), 'sampleRate': shead_orig[0]['sample_rate'], 'ch_names': 'EEG', 'lsbValue': lsb, 'baseline': offset, 'unit': 'mV', 'dmin': dmin, 'dmax': dmax, 'pmin': pmin, 'pmax': pmax } u.sampling_frequency = shead_orig[0]['sample_rate'] SignalEntry(id='EEG.bin', parent=u).set_data(**attrib) # %% now extract the RR intervals if not 'annotations' in u or overwrite: annotations = header['annotations'] if annotations != []: annot_entry = EventEntry('annotations.csv', parent=u) annotations = [[int(a[0] * 1000), a[2]] for a in annotations] annot_entry.set_data(annotations, sampleRate=1000, typeLength=1, contentClass='Annotation') # %% add hypnogram, if it is available assert len(add_files) > 0, f'No hypno file? seems weird: {add_files}' if not 'hypnogram' in u or overwrite: if len(add_files) > 0: hypnograms = [ sleep_utils.read_hypnogram( file, epochlen_infile=30 if file.endswith('annot') else None) for file in add_files ] hypno = hypnograms[0] u.epochs_hypno = len(hypno) times = np.arange(len(hypno)) hypno = np.vstack([times, hypno]).T hypno_entry = EventEntry(id='hypnogram.csv', parent=u) hypno_entry.set_data( hypno, comment=f'File: {code}\nSleep stages 30s epochs.', sampleRate=1 / 30, contentClass='Stage', typeLength=1) # %% Add features if not 'feats.pkl' in u or overwrite: mat = loadmat(mat_file) HRV = mat['Res']['HRV'] feats_entry = CustomEntry('feats.pkl', parent=u) feats_entry.set_data( HRV, comment='pickle dump of the kubios created features file', fileType='pickle') wsize = cfg.default_wsize step = cfg.default_step offset = True u.compute_features(offset=False) u.get_artefacts(wsize=wsize, step=step, offset=False) rri_entry = CustomEntry('RRi.pkl', parent=u) rri_entry.set_data( HRV['Data']['RRi'], comment='raw data of RRi, the interpolated RRs at 4hz', fileType='pickle') rri_entry.sampleRate = 4 u.save() return True
It only copies files for which we have a match. Eg at current state 27/01/2020 we have set1 with 28 and set2 with 30 patients @author: skjerns """ import os from misc import read_csv import shutil import ospath import config as cfg from tqdm import tqdm if __name__ == '__main__': documents = cfg.documents datasets = [ ospath.join(documents, 'mapping_' + d + '.csv') for d in cfg.datasets ] matching = cfg.matching set1_path = ospath.join(cfg.folder_edf, 'set1') set2_path = ospath.join(cfg.folder_edf, 'set2') matchings = read_csv(matching) set1 = read_csv(datasets[0]) set2 = read_csv(datasets[1]) os.makedirs(ospath.join(cfg.folder_edf, 'set1'), exist_ok=True) os.makedirs(ospath.join(cfg.folder_edf, 'set2'), exist_ok=True) os.makedirs(ospath.join(cfg.folder_edf, 'set1', 'not_matched'), exist_ok=True) os.makedirs(ospath.join(cfg.folder_edf, 'set2', 'not_matched'),
personal_dbox_path = Path(j['personal']['path']) return personal_dbox_path ############################### ###USER SPECIFIC CONFIGURATION ############################### username = getpass.getuser().lower() # your login name host = platform.node().lower() # the name of this computer system = platform.system().lower() # linux, windows or mac. home = os.path.expanduser('~') dropbox = get_dropbox_location() if dropbox: documents = ospath.join(dropbox, 'nt1-hrv-documents') matching = ospath.join(documents, 'matching.csv') edfs_invert = ospath.join(documents, 'edfs_invert.csv') edfs_discard = ospath.join(documents, 'edfs_discard.csv') controls_csv = ospath.join(documents, 'subjects_control.csv') patients_csv = ospath.join(documents, 'subjects_nt1.csv') if username == 'nd269' and host == 'ess-donatra': USER_VAR = 'test123' elif username == 'simon' and host == 'desktop-simon': USER_VAR = 'test456' else: print('Username {} on host {} with {} has no configuration.\n'.format(username,host,system) + \ 'please set user specific information in config.py')
def anonymize_and_streamline(old_file, target_folder): """ This function loads the edfs of a folder and 1. removes their birthdate and patient name 2. renames the channels to standardized channel names 3. saves the files in another folder with a non-identifyable 4. verifies that the new files have the same content as the old """ # load the two csvs with the edfs that we dont process and where the ECG is upside down pre_coding_discard = [ line[0] for line in misc.read_csv(cfg.edfs_discard) if line[2] == '1' ] to_invert = [line[0] for line in misc.read_csv(cfg.edfs_invert)] # Here we read the list of controls and patients with their age and gender mappings = misc.read_csv(cfg.controls_csv) mappings.extend(misc.read_csv(cfg.patients_csv)) mappings = dict([[name, { 'gender': gender, 'age': age }] for name, gender, age, *_ in mappings]) # old name is the personalized file without file extension, e.g. thomas_smith(1) old_name = ospath.splitext(ospath.basename(old_file))[0] # new name is the codified version without extension e.g '123_45678' new_name = codify(old_name) # use a temporary file to write and then move it, # this avoids half-written files that cannot be read later tmp_name = tempfile.TemporaryFile(prefix='anonymize').name if old_name in pre_coding_discard: print('EDF is marked as corrupt and will be discarded') return # this is where the anonymized file will be stored new_file = ospath.join(target_folder, new_name + '.edf') if ospath.exists(new_file): print('New file extists already {}'.format(new_file)) else: # anonymize print('Writing {} from {}'.format(new_file, old_name)) assert ospath.isfile(old_file), f'{old_file} does not exist' signals, signal_headers, header = sleep_utils.read_edf(old_file, digital=True, verbose=False) # remove patient info header['birthdate'] = '' header['patientname'] = new_name header['patientcode'] = new_name header['gender'] = mappings[old_name]['gender'] header['age'] = mappings[old_name]['age'] # rename channels to a unified notation, e.g. EKG becomes ECG I for shead in signal_headers: ch = shead['label'] if ch in ch_mapping: ch = ch_mapping[ch] shead['label'] = ch # Invert the ECG channel if necessary if old_name in to_invert: for i, sig in enumerate(signals): label = signal_headers[i]['label'].lower() if label == cfg.ecg_channel.lower(): signals[i] = -sig # we write to tmp to prevent that corrupted files are not left print('Writing tmp for {}'.format(new_file)) sleep_utils.write_edf(tmp_name, signals, signal_headers, header, digital=True, correct=True) # verify that contents for both files match exactly print('Verifying tmp for {}'.format(new_file)) # embarrasing hack, as dmin/dmax dont in this files after inverting if not old_name == 'B0036': sleep_utils.compare_edf(old_file, tmp_name, verbose=False) # now we move the tmp file to its new location. shutil.move(tmp_name, new_file) # also copy additional file information ie hypnograms and kubios files old_dir = ospath.dirname(old_file) pattern = old_name.replace('_m', '').replace( '_w', '') # remove gender from weitere nt1 patients add_files = ospath.list_files( old_dir, patterns=[f'{pattern}*txt', f'{pattern}*dat', f'{pattern}*mat']) for add_file in add_files: # e.g. .mat or .npy etc etc new_add_file = ospath.join( target_folder, ospath.basename(add_file.replace(pattern, new_name))) if ospath.exists(new_add_file): continue # hypnograms will be copied to .hypno try: new_add_file = new_add_file.replace('-Schlafprofil', '') new_add_file = new_add_file.replace('_sl', '') new_add_file = new_add_file.replace('.txt', '.hypno').replace( '.dat', '.hypno') shutil.copy(add_file, new_add_file) except Exception as e: print(e) return old_name, new_name
return old_name, new_name #%% Main if __name__ == '__main__': print( 'running in parallel. if you don\'t see output, start with python.exe') # first get all edfs in all dataset folders files = [] # cheeky workaround for not functioning list comprehension .extend _ = [ files.extend(ospath.list_files(folder, exts='edf', subfolders=True)) for folder in datasets.values() ] results = Parallel(n_jobs=4, backend='loky')( delayed(anonymize_and_streamline)(file, target_folder=target_folder) for file in tqdm(files, desc='processing edfs')) # remove discarded files results = [res for res in results if not res is None] # check for hash collision assert len(set(list(zip(*results))[1]))==len(list(zip(*results))[1]),\ 'ERROR: Hash collision! Check thoroughly.' csv_file = ospath.join(documents, 'mapping_all.csv') df = pd.DataFrame(results) df.to_csv(csv_file, header=None, index=False, sep=';')
""" Created on Thu Feb 27 14:40:36 2020 add @author: skjerns """ import config import os from tqdm import tqdm import ospath from pyedflib import highlevel folder = "Z:/NT1-HRV-data" new_folder = ospath.join(folder, "new") os.makedirs(new_folder, exist_ok=True) mapping = config.mapping_channels files = ospath.list_files(folder, exts='edf') for file in tqdm(files): name = ospath.basename(file)[:-4] new_file = ospath.join(new_folder, name + ".edf") if os.path.exists(new_file): print(f"{new_file} exists, skipping") continue highlevel.anonymize_edf(file, new_file, to_remove = ['patientcode', 'patientname'], new_values = [name, name], verify=False) highlevel.rename_channels(new_file, mapping=mapping, new_file=new_file)
def spectogram(self, channels='eeg', hypnogram=True, fig=None, saveas=None, **kwargs): with plt.style.context('default'): hypnogram = hypnogram * ('hypnogram' in self or 'hypnogram_old.csv' in self) if isinstance(channels, str): channels = [channels] n_chs = len(channels) plots = n_chs + hypnogram h_ratio = [*[0.75 / n_chs] * n_chs, 0.25 ] if hypnogram else [((0.75 / n_chs) * n_chs)] if fig is None: fig = plt.figure() axs = fig.subplots(plots, 1, gridspec_kw={'height_ratios': h_ratio}, squeeze=False) axs = axs.flatten() for i, channel in enumerate(channels): ax = axs[i] if channel in self: entry = self[channel] signal = entry.get_data().squeeze() if signal.ndim > 1: signal = signal[0] sfreq = int(entry.sampleRate) if sfreq < 10: spec, freqs, _, _ = ax.specgram(signal, Fs=sfreq) else: sleep_utils.specgram_multitaper(signal, sfreq=sfreq, ax=ax, **kwargs) else: raise ValueError(f'Entry {channel} not found') ax.set_title(channel) for ax in axs[:-1]: ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) formatter = FuncFormatter( lambda s, x: time.strftime('%H:%M', time.gmtime(s))) axs[-1].xaxis.set_major_formatter(formatter) if hypnogram: offset = self.get_attrib('use_offset', 1) artefacts = self.get_artefacts(offset=offset) hypno = self.get_hypno() labeldict = { 0: 'Wake', 4: 'REM', 1: 'S1', 2: 'S2', 3: 'SWS', 5: 'Artefact' } sleep_utils.plot_hypnogram(hypno, ax=axs[-1], labeldict=labeldict) for i, is_art in enumerate(artefacts): plt.plot([i * 30, (i + 1) * 30], [0.2, 0.2], c='red', alpha=0.75 * is_art, linewidth=1) plt.suptitle(f'Plotted: {channels}, {sfreq} Hz', y=1) plt.pause(0.01) plt.tight_layout() plt.pause(0.01) file = ospath.join(self._folder, '/plots/', f'plot_{"_".join(channels)}.png') os.makedirs(os.path.dirname(file), exist_ok=True) if saveas is not False: plt.savefig(file) if saveas: os.makedirs(os.path.dirname(saveas), exist_ok=True) plt.savefig(saveas) return fig, axs