Exemple #1
0
    def test_join(self):
        linux = 'path/to/////folder/is//'
        windows = 'path\\to\\\\folder\\is'
        add = 'tesfile.txt'
        should_be = 'path/to/folder/is/tesfile.txt'
        self.assertEqual(ospath.join(linux, add), should_be)
        self.assertEqual(ospath.join(windows, add), should_be)

        lead_slash = '/path/to/folder/is'
        joined = ospath.join(lead_slash, lead_slash)
        self.assertEqual(joined, '/path/to/folder/is/path/to/folder/is')
Exemple #2
0
    def test_list_folders(self):
        path = ospath.abspath('.')
        folder1 = ospath.join(path, 'folder1') + '/'
        sub1 = ospath.join(path, 'folder1', 'subfolder') + '/'

        folders = ospath.list_folders(path, subfolders=False, add_parent=False)
        self.assertEqual(folders, [folder1])

        folders = ospath.list_folders(path, subfolders=False, add_parent=True)
        self.assertEqual(folders, [path, folder1])

        folders = ospath.list_folders(path, subfolders=True, add_parent=False)
        self.assertEqual(folders, [folder1, sub1])

        folders = ospath.list_folders(path, subfolders=True, add_parent=True)
        self.assertEqual(folders, [path, folder1, sub1])
Exemple #3
0
    
    check_matches_unique(matches, not_matched)
    
    # now we create the csv_string that we will write to a file:
    lines = ['#Patient Name; Patient Code; Patient Gender; Patient Age; Control Name; Control Code; Control Gender; Control Age; Difference']
    for diff, match_i in enumerate(matches): #last one
        lines += [''] # add empty line before each new age diff section
        lines += [f'# +-{diff} age difference, {len(match_i)} matchings']
        for p_name, c_name in match_i:
            p_code = mappings[p_name]
            p_gender = patients_all[p_name]['gender']
            p_age = patients_all[p_name]['age']
            c_code = mappings[c_name]
            c_gender = controls_all[c_name]['gender']
            c_age = controls_all[c_name]['age']
            lines.append(f'{p_name}; {p_code}; {p_gender}; {p_age}; {c_name}; {c_code}; {c_gender}; {c_age}; {diff}')
    
    lines += ['']
    lines += [f'# No match for {len(not_matched)} patients']
    lines += [f'{m}; {patients_all[m]["gender"]}; {patients_all[m]["age"]};;;;;;99' for m in not_matched]

    # now we add all controls that are in the project
    lines += ['']
    lines += ['# Already used controls']
    for c_name in controls_all:
        lines += [f'{c_name}; {controls_all[c_name]["gender"]}; {controls_all[c_name]["age"]};;;;;;99']
        
    matching_csv = ospath.join(cfg.documents, 'matching.csv')
    misc.write_csv(matching_csv, lines)
        
def to_unisens(edf_file,
               unisens_folder,
               overwrite=False,
               tqdm_desc=None,
               skip_exist=False):
    pass
    # %% create unisens
    if tqdm_desc is None:
        tqdm_desc = lambda x: None
    dtype = np.int16
    code = ospath.basename(edf_file)[:-4]
    folder = ospath.dirname(edf_file)

    unisens_folder = ospath.join(unisens_folder, code)

    if skip_exist and ospath.isdir(unisens_folder): return

    # get all additional files that belong to this EDF
    add_files = ospath.list_files(folder, patterns=code + '*')
    u = Patient(unisens_folder,
                makenew=False,
                autosave=True,
                measurementId=code)
    header = read_edf_header(edf_file)
    all_labels = header['channels']
    u.starttime = header['startdate']
    u.timestampStart = header['startdate'].strftime('%Y-%m-%dT%H:%M:%S')
    u.code = code

    attribs = misc.get_attribs()
    u.group = attribs[code].get('group', 'none')
    u.gender = attribs[code].get('gender', 'none')

    u.drug_hrv = attribs[code].get('drug_hrv', 0)
    u.drug_sleep = attribs[code].get('drug_sleep', 0)

    u.age = attribs[code].get('age', -1)
    u.match = attribs[code].get('match', '')

    u.channels = str(', '.join(header['channels']))
    u.startsec = (u.starttime.hour * 60 +
                  u.starttime.minute) * 60 + u.starttime.second
    u.use_offset = 1

    # if the ECG/EEG is broken, mark it
    edfs_ecg_broken = [
        p[1] for p in misc.read_csv(cfg.edfs_discard) if p[3] == '1'
    ]
    edfs_eeg_broken = [
        p[1] for p in misc.read_csv(cfg.edfs_discard) if p[4] == '1'
    ]

    # we need to see if the eeg/emg of this file can be used
    # if one of them is broken we also remove its match from analysis
    u.ecg_broken = (code in edfs_ecg_broken) or (u.match in edfs_ecg_broken)
    u.eeg_broken = (code in edfs_eeg_broken) or (u.match in edfs_eeg_broken)

    # %% #### add ECG ##########
    ########################
    tqdm_desc(f'{code}: Reading ECG')

    if not 'ECG' in u or overwrite:
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['ECG I'],
                                          digital=True,
                                          verbose=False)
        signals[:, 0:2] = np.percentile(signals, 10), np.percentile(
            signals, 90)  # trick for viewer automatic scaling
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': 'ECG',
            'lsbValue': lsb,
            'baseline': offset,
            'unit': 'mV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }

        SignalEntry(id='ECG.bin', parent=u).set_data(**attrib)

        u.sampling_frequency = shead[0]['sample_rate']
        u.duration = len(signals.squeeze()) // shead[0]['sample_rate']
        u.epochs_signals = signals.shape[1] // int(u.sampling_frequency) // 30

    # %%#### add EEG ##########
    ##############################
    tqdm_desc(f'{code}: Reading EEG')
    if not 'EEG' in u or overwrite:
        chs = sleep_utils.infer_eeg_channels(all_labels)
        signals, shead, header = read_edf(edf_file,
                                          ch_names=chs,
                                          digital=True,
                                          verbose=False)
        if isinstance(signals, list):
            signals = np.atleast_2d(signals[0])
            chs = chs[0]
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': chs,
            'lsbValue': lsb,
            'baseline': offset,
            'contentClass': 'EEG',
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='EEG.bin', parent=u).set_data(**attrib)

    # %%## add EOG #########
    #######################
    if not 'EOG' in u or overwrite:
        tqdm_desc(f'{code}: Reading EOG')
        chs = sleep_utils.infer_eog_channels(all_labels)
        signals, shead, header = read_edf(edf_file,
                                          ch_names=chs,
                                          digital=True,
                                          verbose=False)
        if isinstance(signals, list):
            signals = np.atleast_2d(signals[0])
            chs = chs[0]
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)
        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': chs,
            'lsbValue': 1,
            'baseline': 0,
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='EOG.bin', parent=u).set_data(**attrib)

    # %%#### add EMG #########

    if not 'EMG' in u or overwrite:
        tqdm_desc(f'{code}: Reading EMG')
        chs = sleep_utils.infer_emg_channels(all_labels)
        if chs != []:  # fix for 888_49272
            signals, shead, header = read_edf(edf_file,
                                              ch_names=chs,
                                              digital=True,
                                              verbose=False)
            if isinstance(signals, list):
                signals = np.atleast_2d(signals[0])
                chs = chs[0]
            # trick for viewer automatic scaling
            signals[:, 0:2] = np.percentile(signals,
                                            10), np.percentile(signals, 90)
            pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
            dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

            lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
            attrib = {
                'data': signals.astype(dtype),
                'sampleRate': shead[0]['sample_rate'],
                'ch_names': chs,
                'lsbValue': 1,
                'baseline': 0,
                'unit': 'uV',
                'dmin': dmin,
                'dmax': dmax,
                'pmin': pmin,
                'pmax': pmax
            }
            SignalEntry(id='EMG.bin', parent=u).set_data(**attrib)

    #######################################
    # %%add Thorax #########
    ######################
    if not 'thorax' in u or overwrite:
        tqdm_desc(f'{code}: Reading Thorax')
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['Thorax'],
                                          digital=True,
                                          verbose=False)
        # trick for viewer automatic scaling
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)

        pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
        dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': signals.astype(dtype),
            'sampleRate': shead[0]['sample_rate'],
            'ch_names': 'thorax',
            'lsbValue': 1,
            'baseline': 0,
            'unit': 'uV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        SignalEntry(id='thorax.bin', parent=u).set_data(**attrib)

    #######################################
    # %% add Body / Lagesensor #########
    ########################################
    if (not 'body' in u or overwrite) and 'Body' in all_labels:
        tqdm_desc(f'{code}: Reading Body')
        signals, shead, header = read_edf(edf_file,
                                          ch_names=['Body'],
                                          digital=True,
                                          verbose=False)
        signals[:, 0:2] = np.percentile(signals,
                                        10), np.percentile(signals, 90)

        if np.ptp(
                signals
        ) < 10:  # we have some weird body positions that we cant decode

            pmin, pmax = shead[0]['physical_min'], shead[0]['physical_max']
            dmin, dmax = shead[0]['digital_min'], shead[0]['digital_max']

            comment = 'Lagesensor: 1 = Bauchlage, 2 = aufrecht, 3 = links, 4 = rechts,' \
                      '5 = aufrecht (Kopfstand), 6 = Rückenlage'

            lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
            attrib = {
                'data': signals.astype(dtype),
                'sampleRate': shead[0]['sample_rate'],
                'ch_names': 'body',
                'lsbValue': 1,
                'baseline': 0,
                'unit': 'uV',
                'dmin': dmin,
                'dmax': dmax,
                'pmin': pmin,
                'pmax': pmax,
                'comment': comment
            }
            SignalEntry(id='body.bin', parent=u).set_data(**attrib)

    # %% add annotations #######
    ################################
    if not 'annotations' in u or overwrite:
        annotations = header['annotations']
        if annotations != []:
            annot_entry = EventEntry('annotations.csv', parent=u)
            annotations = [[int(a[0] * 1000), a[2]] for a in annotations]
            annot_entry.set_data(annotations,
                                 sampleRate=1000,
                                 typeLength=1,
                                 contentClass='Annotation')

    # %%#### add rest #######
    ############################
    for file in add_files:
        # ignore diagnosis files of StanfordStages
        if file.endswith(
            ('diagnosis.txt', 'hypnodensity.txt', 'hypnogram.txt')):  #
            pass
        # %% add arousals
        elif file.endswith('_arousal.txt'):
            if 'arousals' in u and not overwrite: continue
            lines = misc.read_csv(file, convert_nums=True)

            sdate = u.starttime
            data = []
            for t_arousal, length, _ in lines[4:]:
                t_arousal = f'{sdate.year}.{sdate.month}.{sdate.day} ' + t_arousal[:
                                                                                   8]
                t_arousal = datetime.strptime(t_arousal, '%Y.%m.%d %H:%M:%S')
                epoch = (t_arousal - sdate).seconds // 30
                data += [[epoch, length]]

            arousal_event = EventEntry('arousals.csv', parent=u)
            arousal_event.set_data(
                data,
                comment='Arousal appearance epoch, name is lengths in seconds',
                sampleRate=1 / 30,
                contentClass='Arousal',
                typeLength=1)
        # %% add hypnogram
        elif file.endswith('txt'):
            if 'hypnogram' in u and not overwrite: continue
            tqdm_desc(f'{code}: Reading Hypnogram')
            hypno = sleep_utils.read_hypnogram(file)
            u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_entry = EventEntry(id='hypnogram.csv', parent=u)
            hypno_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)

        elif file.endswith('.hypno'):
            if 'hypnogram_old' in u and not overwrite: continue
            hypno = sleep_utils.read_hypnogram(file)
            if not hasattr(u, 'epochs_hypno'): u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_old_entry = EventEntry(id='hypnogram_old.csv', parent=u)
            hypno_old_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)
        # %% add features and kubios
        elif file.endswith('mat'):
            if 'feats.pkl' in u and not overwrite: continue
            tqdm_desc(f'{code}: Reading Kubios')
            mat = loadmat(file)
            HRV = mat['Res']['HRV']

            feats_entry = CustomEntry('feats.pkl', parent=u)
            feats_entry.set_data(
                HRV,
                comment='pickle dump of the kubios created features file',
                fileType='pickle')

            wsize = cfg.default_wsize
            step = cfg.default_step
            offset = True
            u.compute_features()
            u.get_artefacts(wsize=wsize, step=step, offset=True)

            #%% add RRi

            tqdm_desc(f'{code}: writing RRi')

            rri_entry = CustomEntry('RRi.pkl', parent=u)
            rri_entry.set_data(
                HRV['Data']['RRi'],
                comment='raw data of RRi, the interpolated RRs at 4hz',
                fileType='pickle')
            rri_entry.sampleRate = 4

        # add artefact
        ############ removed artefact detection and calculated from kubios above
        # elif file.endswith('npy'):
        #     if  'artefacts' in u and not overwrite: continue
        #     tqdm_desc(f'{code}: Reading artefacts')
        #     art = np.load(file).ravel()
        #     u.epochs_art = len(art)//2
        #     u.artefact_percentage = np.mean(art)
        #     times = np.arange(len(art))
        #     art = np.vstack([times, art]).T
        #     artefact_entry = ValuesEntry(id='artefacts.csv', parent=u)
        #     artefact_entry.set_data(art, sampleRate=1/15, dataType='int16')

        elif file.endswith(('.edf', 'pkl')):
            pass

        else:
            raise Exception(f'unkown file type: {file}')

    u.save()
@author: skjerns
"""
import os
from sleep import SleepSet
import sleep_utils
import numpy as np
import ospath
import config as cfg
import matplotlib.pyplot as plt
from tqdm import tqdm
from multiprocessing import Process, Queue

if __name__ == '__main__':
    ss = SleepSet(cfg.folder_unisens)
    ss = ss.filter(
        lambda x: x.duration < 60 * 60 * 11)  # only less than 14 hours
    ss = ss.filter(
        lambda x: x.group in ['control', 'nt1'])  # only less than 14 hours
    ss = ss.filter(lambda x: np.mean(x.get_artefacts(only_sleeptime=True)) <
                   0.25)  #only take patients with artefact percentage <25%

    for p in tqdm(ss[:250]):
        dataset = p.get_attrib('dataset', 'none')
        saveas = ospath.join(cfg.documents, 'plots', p.group, dataset,
                             p.code + '.jpg')
        if ospath.exists(saveas): continue
        p.spectogram(channels=['ecg', 'RRi'], ufreq=2)
        os.makedirs(os.path.dirname(saveas), exist_ok=True)
        plt.savefig(saveas)
        plt.close('all')
Exemple #6
0
def to_unisens(edf_file,
               unisens_folder,
               mat_folder,
               overwrite=False,
               skip_exist=False):

    dtype = np.int16
    folder = ospath.dirname(edf_file)
    filename = ospath.basename(
        edf_file)[:-9]  # remove "-nsrr.edf" from filename

    mnc_info = misc.get_mnc_info()
    try:
        attribs = mnc_info[filename.upper().replace(' ', '_')]
    except:
        print(f'Info for {filename.upper().replace(" ", "_")} not found')
        return

    # get all additional files that belong to this EDF
    patterns = [filename + '*.xml', filename + '*.sta']
    add_files = ospath.list_files(folder, patterns=patterns)
    if len(add_files) == 0:
        print(f'No hypnogram for {filename}, skip')
        return

    # try to find mat files
    mat_files = ospath.list_files(mat_folder, patterns=[filename + '-*.mat'])
    if len(mat_files) == 0:
        print(f'No matfile found for {filename}')
        return
    elif len(mat_files) == 1:
        mat_file = mat_files[0]
    else:

        print(f'too many matching mat files: {mat_files}')
        return

    # get the codified version of this file
    code = misc.codify(filename)
    unisens_folder = ospath.join(unisens_folder, code)

    # if this unisens folder exists, skip if requested
    if skip_exist and ospath.isdir(unisens_folder): return

    # now create the meta information for the new file
    try:
        header = read_edf_header(edf_file)
    except:
        repair_file(edf_file)
        try:
            header = read_edf_header(edf_file)
        except Exception as e:
            print(f'cant load {filename}, broken edf {e}')
            return
    channels = header['channels']
    chs_eeg = [ch for ch in channels if 'EEG' in ch.upper()]
    chs = [ch for ch in channels if 'ECG' in ch.upper()]
    if 'cs_ECG' in chs and len(chs) > 1:
        chs.remove('cs_ECG')

    # add metadata for this file
    u = Patient(unisens_folder,
                makenew=True,
                autosave=True,
                measurementId=code)
    u.starttime = header['startdate']
    u.timestampStart = header['startdate'].strftime('%Y-%m-%dT%H:%M:%S')
    u.code = code
    u.duration = header['Duration']
    u.dataset = 'mnc'
    u.channels = str(', '.join(channels))
    u.startsec = (u.starttime.hour * 60 +
                  u.starttime.minute) * 60 + u.starttime.second
    if u.startsec == 0: print(edf_file)
    u.DQ0602 = attribs['DQ0602']
    u.hypocretin = attribs['CSF hypocretin-1']
    u.label = attribs['Label']
    u.cohort = attribs['Cohort']
    u.use_offset = 0
    u.gender = 'unknown'
    u.match = None

    diagnosis = attribs['Diagnosis']
    if 'CONTROL' in diagnosis:
        group = 'control'
    elif 'T1' in diagnosis:
        group = 'nt1'
    elif 'OTHER HYPERSOMNIA' in diagnosis:
        group = 'hypersomnia'
    else:
        group = attribs['Diagnosis']
        raise AttributeError(f'unkown group: {group} for {filename}')
    u.group = group

    # %% Add ECG channel
    if not 'ecg' in u or overwrite:
        # add the original ECG channel
        sig_orig, shead_orig, _ = read_edf(edf_file,
                                           ch_names=chs[0],
                                           verbose=False,
                                           digital=True)
        assert sig_orig.max() <= 32767 and sig_orig.min(
        ) >= -32768, 'min/max exceeds int16'
        pmin, pmax = shead_orig[0]['physical_min'], shead_orig[0][
            'physical_max']
        dmin, dmax = shead_orig[0]['digital_min'], shead_orig[0]['digital_max']
        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': sig_orig.astype(dtype),
            'sampleRate': shead_orig[0]['sample_rate'],
            'ch_names': 'ECG',
            'lsbValue': lsb,
            'baseline': offset,
            'unit': 'mV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        u.sampling_frequency = shead_orig[0]['sample_rate']
        SignalEntry(id='ECG.bin', parent=u).set_data(**attrib)

    if (not 'eeg' in u or overwrite) and len(chs_eeg) > 0:
        # add the original ECG channel
        sig_orig, shead_orig, _ = read_edf(edf_file,
                                           ch_names=chs_eeg[0],
                                           verbose=False,
                                           digital=True)
        assert sig_orig.max() <= 32767 and sig_orig.min(
        ) >= -32768, 'min/max exceeds int16'
        pmin, pmax = shead_orig[0]['physical_min'], shead_orig[0][
            'physical_max']
        dmin, dmax = shead_orig[0]['digital_min'], shead_orig[0]['digital_max']
        lsb, offset = sleep_utils.minmax2lsb(dmin, dmax, pmin, pmax)
        attrib = {
            'data': sig_orig.astype(dtype),
            'sampleRate': shead_orig[0]['sample_rate'],
            'ch_names': 'EEG',
            'lsbValue': lsb,
            'baseline': offset,
            'unit': 'mV',
            'dmin': dmin,
            'dmax': dmax,
            'pmin': pmin,
            'pmax': pmax
        }
        u.sampling_frequency = shead_orig[0]['sample_rate']
        SignalEntry(id='EEG.bin', parent=u).set_data(**attrib)
    # %% now extract the RR intervals

    if not 'annotations' in u or overwrite:
        annotations = header['annotations']
        if annotations != []:
            annot_entry = EventEntry('annotations.csv', parent=u)
            annotations = [[int(a[0] * 1000), a[2]] for a in annotations]
            annot_entry.set_data(annotations,
                                 sampleRate=1000,
                                 typeLength=1,
                                 contentClass='Annotation')

    # %% add hypnogram, if it is available
    assert len(add_files) > 0, f'No hypno file? seems weird: {add_files}'
    if not 'hypnogram' in u or overwrite:
        if len(add_files) > 0:
            hypnograms = [
                sleep_utils.read_hypnogram(
                    file,
                    epochlen_infile=30 if file.endswith('annot') else None)
                for file in add_files
            ]

            hypno = hypnograms[0]
            u.epochs_hypno = len(hypno)
            times = np.arange(len(hypno))
            hypno = np.vstack([times, hypno]).T
            hypno_entry = EventEntry(id='hypnogram.csv', parent=u)
            hypno_entry.set_data(
                hypno,
                comment=f'File: {code}\nSleep stages 30s epochs.',
                sampleRate=1 / 30,
                contentClass='Stage',
                typeLength=1)

    # %% Add features
    if not 'feats.pkl' in u or overwrite:
        mat = loadmat(mat_file)
        HRV = mat['Res']['HRV']

        feats_entry = CustomEntry('feats.pkl', parent=u)
        feats_entry.set_data(
            HRV,
            comment='pickle dump of the kubios created features file',
            fileType='pickle')

        wsize = cfg.default_wsize
        step = cfg.default_step
        offset = True
        u.compute_features(offset=False)
        u.get_artefacts(wsize=wsize, step=step, offset=False)

        rri_entry = CustomEntry('RRi.pkl', parent=u)
        rri_entry.set_data(
            HRV['Data']['RRi'],
            comment='raw data of RRi, the interpolated RRs at 4hz',
            fileType='pickle')
        rri_entry.sampleRate = 4
    u.save()
    return True
Exemple #7
0
It only copies files for which we have a match.
Eg at current state 27/01/2020 we have set1 with 28 and set2 with 30 patients

@author: skjerns
"""
import os
from misc import read_csv
import shutil
import ospath
import config as cfg
from tqdm import tqdm

if __name__ == '__main__':
    documents = cfg.documents
    datasets = [
        ospath.join(documents, 'mapping_' + d + '.csv') for d in cfg.datasets
    ]
    matching = cfg.matching
    set1_path = ospath.join(cfg.folder_edf, 'set1')
    set2_path = ospath.join(cfg.folder_edf, 'set2')

    matchings = read_csv(matching)

    set1 = read_csv(datasets[0])
    set2 = read_csv(datasets[1])

    os.makedirs(ospath.join(cfg.folder_edf, 'set1'), exist_ok=True)
    os.makedirs(ospath.join(cfg.folder_edf, 'set2'), exist_ok=True)
    os.makedirs(ospath.join(cfg.folder_edf, 'set1', 'not_matched'),
                exist_ok=True)
    os.makedirs(ospath.join(cfg.folder_edf, 'set2', 'not_matched'),
Exemple #8
0
    personal_dbox_path = Path(j['personal']['path'])
    return personal_dbox_path


###############################
###USER SPECIFIC CONFIGURATION
###############################
username = getpass.getuser().lower()  # your login name
host = platform.node().lower()  # the name of this computer
system = platform.system().lower()  # linux, windows or mac.
home = os.path.expanduser('~')

dropbox = get_dropbox_location()
if dropbox:
    documents = ospath.join(dropbox, 'nt1-hrv-documents')
    matching = ospath.join(documents, 'matching.csv')
    edfs_invert = ospath.join(documents, 'edfs_invert.csv')
    edfs_discard = ospath.join(documents, 'edfs_discard.csv')
    controls_csv = ospath.join(documents, 'subjects_control.csv')
    patients_csv = ospath.join(documents, 'subjects_nt1.csv')

if username == 'nd269' and host == 'ess-donatra':
    USER_VAR = 'test123'

elif username == 'simon' and host == 'desktop-simon':
    USER_VAR = 'test456'

else:
    print('Username {} on host {} with {} has no configuration.\n'.format(username,host,system) + \
    'please set user specific information in config.py')
Exemple #9
0
def anonymize_and_streamline(old_file, target_folder):
    """
    This function loads the edfs of a folder and
    1. removes their birthdate and patient name
    2. renames the channels to standardized channel names
    3. saves the files in another folder with a non-identifyable 
    4. verifies that the new files have the same content as the old
    """
    # load the two csvs with the edfs that we dont process and where the ECG is upside down
    pre_coding_discard = [
        line[0] for line in misc.read_csv(cfg.edfs_discard) if line[2] == '1'
    ]
    to_invert = [line[0] for line in misc.read_csv(cfg.edfs_invert)]

    # Here we read the list of controls and patients with their age and gender
    mappings = misc.read_csv(cfg.controls_csv)
    mappings.extend(misc.read_csv(cfg.patients_csv))
    mappings = dict([[name, {
        'gender': gender,
        'age': age
    }] for name, gender, age, *_ in mappings])

    # old name is the personalized file without file extension, e.g. thomas_smith(1)
    old_name = ospath.splitext(ospath.basename(old_file))[0]
    # new name is the codified version without extension e.g '123_45678'
    new_name = codify(old_name)

    # use a temporary file to write and then move it,
    # this avoids half-written files that cannot be read later
    tmp_name = tempfile.TemporaryFile(prefix='anonymize').name

    if old_name in pre_coding_discard:
        print('EDF is marked as corrupt and will be discarded')
        return

    # this is where the anonymized file will be stored
    new_file = ospath.join(target_folder, new_name + '.edf')

    if ospath.exists(new_file):
        print('New file extists already {}'.format(new_file))

    else:
        # anonymize
        print('Writing {} from {}'.format(new_file, old_name))
        assert ospath.isfile(old_file), f'{old_file} does not exist'
        signals, signal_headers, header = sleep_utils.read_edf(old_file,
                                                               digital=True,
                                                               verbose=False)
        # remove patient info
        header['birthdate'] = ''
        header['patientname'] = new_name
        header['patientcode'] = new_name
        header['gender'] = mappings[old_name]['gender']
        header['age'] = mappings[old_name]['age']

        # rename channels to a unified notation, e.g. EKG becomes ECG I
        for shead in signal_headers:
            ch = shead['label']
            if ch in ch_mapping:
                ch = ch_mapping[ch]
                shead['label'] = ch

        # Invert the ECG channel if necessary
        if old_name in to_invert:
            for i, sig in enumerate(signals):
                label = signal_headers[i]['label'].lower()
                if label == cfg.ecg_channel.lower():
                    signals[i] = -sig

        # we write to tmp to prevent that corrupted files are not left
        print('Writing tmp for {}'.format(new_file))
        sleep_utils.write_edf(tmp_name,
                              signals,
                              signal_headers,
                              header,
                              digital=True,
                              correct=True)

        # verify that contents for both files match exactly
        print('Verifying tmp for {}'.format(new_file))
        # embarrasing hack, as dmin/dmax dont in this files after inverting
        if not old_name == 'B0036':
            sleep_utils.compare_edf(old_file, tmp_name, verbose=False)

        # now we move the tmp file to its new location.
        shutil.move(tmp_name, new_file)

    # also copy additional file information ie hypnograms and kubios files
    old_dir = ospath.dirname(old_file)
    pattern = old_name.replace('_m', '').replace(
        '_w', '')  # remove gender from weitere nt1 patients
    add_files = ospath.list_files(
        old_dir,
        patterns=[f'{pattern}*txt', f'{pattern}*dat', f'{pattern}*mat'])
    for add_file in add_files:
        # e.g. .mat or .npy etc etc
        new_add_file = ospath.join(
            target_folder, ospath.basename(add_file.replace(pattern,
                                                            new_name)))
        if ospath.exists(new_add_file): continue
        # hypnograms will be copied to .hypno
        try:
            new_add_file = new_add_file.replace('-Schlafprofil', '')
            new_add_file = new_add_file.replace('_sl', '')
            new_add_file = new_add_file.replace('.txt', '.hypno').replace(
                '.dat', '.hypno')
            shutil.copy(add_file, new_add_file)
        except Exception as e:
            print(e)
    return old_name, new_name
Exemple #10
0
    return old_name, new_name


#%% Main
if __name__ == '__main__':
    print(
        'running in parallel. if you don\'t see output, start with python.exe')

    # first get all edfs in all dataset folders
    files = []
    # cheeky workaround for not functioning list comprehension .extend
    _ = [
        files.extend(ospath.list_files(folder, exts='edf', subfolders=True))
        for folder in datasets.values()
    ]

    results = Parallel(n_jobs=4, backend='loky')(
        delayed(anonymize_and_streamline)(file, target_folder=target_folder)
        for file in tqdm(files, desc='processing edfs'))

    # remove discarded files
    results = [res for res in results if not res is None]

    # check for hash collision
    assert len(set(list(zip(*results))[1]))==len(list(zip(*results))[1]),\
         'ERROR: Hash collision! Check thoroughly.'

    csv_file = ospath.join(documents, 'mapping_all.csv')
    df = pd.DataFrame(results)
    df.to_csv(csv_file, header=None, index=False, sep=';')
Exemple #11
0
"""
Created on Thu Feb 27 14:40:36 2020

add

@author: skjerns
"""
import config
import os
from tqdm import tqdm
import ospath
from pyedflib import highlevel


folder = "Z:/NT1-HRV-data"
new_folder = ospath.join(folder, "new")
os.makedirs(new_folder, exist_ok=True)
mapping = config.mapping_channels


files = ospath.list_files(folder, exts='edf')
for file in tqdm(files):
    name = ospath.basename(file)[:-4]
    new_file = ospath.join(new_folder, name + ".edf")
    if os.path.exists(new_file):
        print(f"{new_file} exists, skipping")
        continue
    
    highlevel.anonymize_edf(file, new_file, to_remove = ['patientcode', 'patientname'],
                            new_values  = [name, name], verify=False)
    highlevel.rename_channels(new_file, mapping=mapping, new_file=new_file)
Exemple #12
0
    def spectogram(self,
                   channels='eeg',
                   hypnogram=True,
                   fig=None,
                   saveas=None,
                   **kwargs):
        with plt.style.context('default'):
            hypnogram = hypnogram * ('hypnogram' in self
                                     or 'hypnogram_old.csv' in self)
            if isinstance(channels, str): channels = [channels]
            n_chs = len(channels)
            plots = n_chs + hypnogram

            h_ratio = [*[0.75 / n_chs] * n_chs, 0.25
                       ] if hypnogram else [((0.75 / n_chs) * n_chs)]

            if fig is None:
                fig = plt.figure()
            axs = fig.subplots(plots,
                               1,
                               gridspec_kw={'height_ratios': h_ratio},
                               squeeze=False)
            axs = axs.flatten()

            for i, channel in enumerate(channels):
                ax = axs[i]
                if channel in self:
                    entry = self[channel]
                    signal = entry.get_data().squeeze()
                    if signal.ndim > 1: signal = signal[0]
                    sfreq = int(entry.sampleRate)
                    if sfreq < 10:
                        spec, freqs, _, _ = ax.specgram(signal, Fs=sfreq)
                    else:
                        sleep_utils.specgram_multitaper(signal,
                                                        sfreq=sfreq,
                                                        ax=ax,
                                                        **kwargs)
                else:
                    raise ValueError(f'Entry {channel} not found')

                ax.set_title(channel)

            for ax in axs[:-1]:
                ax.tick_params(axis='x',
                               which='both',
                               bottom=False,
                               top=False,
                               labelbottom=False)

            formatter = FuncFormatter(
                lambda s, x: time.strftime('%H:%M', time.gmtime(s)))
            axs[-1].xaxis.set_major_formatter(formatter)

            if hypnogram:
                offset = self.get_attrib('use_offset', 1)
                artefacts = self.get_artefacts(offset=offset)
                hypno = self.get_hypno()
                labeldict = {
                    0: 'Wake',
                    4: 'REM',
                    1: 'S1',
                    2: 'S2',
                    3: 'SWS',
                    5: 'Artefact'
                }
                sleep_utils.plot_hypnogram(hypno,
                                           ax=axs[-1],
                                           labeldict=labeldict)
                for i, is_art in enumerate(artefacts):
                    plt.plot([i * 30, (i + 1) * 30], [0.2, 0.2],
                             c='red',
                             alpha=0.75 * is_art,
                             linewidth=1)
            plt.suptitle(f'Plotted: {channels}, {sfreq} Hz', y=1)
            plt.pause(0.01)
            plt.tight_layout()
            plt.pause(0.01)
            file = ospath.join(self._folder, '/plots/',
                               f'plot_{"_".join(channels)}.png')
            os.makedirs(os.path.dirname(file), exist_ok=True)
            if saveas is not False: plt.savefig(file)
            if saveas:
                os.makedirs(os.path.dirname(saveas), exist_ok=True)
                plt.savefig(saveas)

        return fig, axs