コード例 #1
0
ファイル: extract-features.py プロジェクト: jcraley/jhu-eeg
def main():
    """Set up the experiment, initialize folders, and write config"""
    # Load the configuration files
    argv = sys.argv[1:]
    params = tc.TestConfiguration('default.ini', argv)
    paths = pm.PathManager(params)
    paths.initialize_folder('data')

    # Load the dataset
    train_dataset = EpilepsyDataset(
        params['train manifest'],
        paths['buffers'],
        params['window length'],
        params['overlap'],
        device='cpu',
    )
    train_dataset.set_as_sequences(True)

    for feat_name in params['features']:
        print("Extracting {}".format(feat_name))
        paths.add_feature_folder(feat_name)

        # Load the manifest files
        manifest_files = read.read_manifest(params['train manifest'])
        fs = int(manifest_files[0]['fs'])

        # Loop over files and create windowed versions
        for sample in train_dataset:
            fn = sample['filename'].split('/')[-1].split('.')[0] + '.pt'
            windowed_buffers = sample['buffers']
            feat = eval('features.'
                        + feat_name + '(windowed_buffers, fs=fs)')
            feat_fn = os.path.join(paths[feat_name], fn)
            torch.save(feat, feat_fn)
コード例 #2
0
def main():
    """Load the command line args and parse"""
    # Load the configuration files
    argv = sys.argv[1:]
    params = tc.TestConfiguration('default.ini', argv)
    paths = pm.PathManager(params)
    paths.initialize_folder('buffers')

    # Initialize the loader
    loader = EdfLoader()

    # Load the manifest files
    manifest_files = read.read_manifest(params['train manifest'])

    # Load the edf file
    edf_fn = os.path.join(paths['raw data'], manifest_files[0]['fn'])
    print(edf_fn)
    eeg_info = loader.load_metadata(edf_fn)
    label_lists = [eeg_info.label_list]
    label_set = set(eeg_info.label_list)

    for eeg in manifest_files[1:]:
        # Load the edf file
        edf_fn = os.path.join(paths['raw data'], eeg['fn'])
        print(edf_fn)
        eeg_info = loader.load_metadata(edf_fn)
        label_lists.append(eeg_info.label_list)
        label_set = label_set & set(eeg_info.label_list)

    print(len(label_set))
    print(label_set)
コード例 #3
0
def main():
    """Load the command line args and parse"""
    # Load the configuration files
    argv = sys.argv[1:]
    params = tc.TestConfiguration('default.ini', argv)
    paths = pm.PathManager(params)
    paths['buffers'] = paths['buffers'] + '_200'
    paths.initialize_folder('buffers')

    # Read in the channel list
    label_list = read.read_channel_list(params['channel list'])
    loader = EdfLoader(label_list)

    # Load the manifest files
    manifest_files = read.read_manifest(params['train manifest'])

    for file in manifest_files:
        # Load the edf file
        edf_fn = os.path.join(paths['raw data'], file['fn'])
        print(edf_fn)
        eeg_info = loader.load_metadata(edf_fn)
        buffers = loader.load_buffers(eeg_info)
        print(eeg_info.fs)
        buffers = dsp.resample_256to200(buffers)
        eeg_info.fs = 200
        print(eeg_info.fs)
        buffers = dsp.prefilter(buffers, eeg_info.fs, params['notch'],
                                params['lpf fc'], params['hpf fc'],
                                params['clip level'], params['normalize'])
        buffers = torch.tensor(buffers, dtype=torch.float32).transpose(0, 1)

        fn_out = edf_fn.split('/')[-1].split('.')[0] + '.pt'
        fn_out = os.path.join(paths['buffers'], fn_out)
        torch.save(buffers, fn_out)
コード例 #4
0
def main():
    """Load the command line args and parse"""
    # Load the configuration files
    argv = sys.argv[1:]
    params = tc.TestConfiguration('default.ini', argv)
    paths = pm.PathManager(params)

    # Read in the connections and make a label list
    label_list = read.read_channel_list(params['channel list'])
    loader = EdfLoader(label_list)

    # Load the manifest files
    manifest_files = read.read_manifest(params['train manifest'])

    for file in manifest_files:
        # Load the edf file
        edf_fn = os.path.join(paths['raw data'], file['fn'])
        print(edf_fn)
        eeg_info = loader.load_metadata(edf_fn)
        for time, annotation in zip(eeg_info.annotations[0],
                                    eeg_info.annotations[2]):
            printable = True
            for word in DONT_WRITE:
                if annotation.startswith(word):
                    printable = False
            if printable:
                print("{0:<7}{1:<50}".format(time, annotation))
        print('')
コード例 #5
0
def main():
    """Set up the experiment, initialize folders, and write config"""
    # Load the configuration files
    argv = sys.argv[1:]
    params = tc.TestConfiguration('default.ini', argv)
    paths = pm.PathManager(params)
    paths.initialize_folder('data')

    for feat_name in params['features']:
        print("Extracting {}".format(feat_name))
        paths.add_feature_folder(feat_name)
        paths.add_feature_folder(feat_name + '_normalized')

        feature_list = []

        # Load the manifest files
        manifest_files = read.read_manifest(params['train manifest'])
        fs = int(manifest_files[0]['fs'])

        # Loop over files and load the features
        nwindows = 0
        for file in manifest_files:
            fn = file['fn'].split('/')[-1].split('.')[0] + '.pt'
            fn_feat = os.path.join(paths[feat_name], fn)
            feature_list.append(torch.load(fn_feat))
            nwindows += feature_list[-1].size(0)
            if torch.sum(torch.isnan(feature_list[-1])) > 0:
                print('{} contains nans'.format(fn))
                feature_list[-1][torch.where(torch.isnan(
                    feature_list[-1]))] = 0
            if torch.sum(torch.isinf(feature_list[-1])) > 0:
                print('{} contains infs'.format(fn))
                feature_list[-1][torch.where(torch.isinf(
                    feature_list[-1]))] = 0

        # Add everything to a tensor
        nchns = feature_list[-1].size(1)
        feat_dim = feature_list[-1].size(2)
        all_data = torch.zeros((nwindows, nchns, feat_dim))
        idx = 0
        for feat in feature_list:
            all_data[idx:idx + feat.size(0), :, :] = feat
            idx += feat.size(0)

        # Take mean and std
        feat_means = torch.mean(all_data, 0)
        feat_stds = torch.std(all_data, 0)

        # Write out
        print('Writing normalized files')
        for file, feature_tensor in zip(manifest_files, feature_list):
            fn = file['fn'].split('/')[-1].split('.')[0] + '.pt'
            fn_feat = os.path.join(paths[feat_name + '_normalized'], fn)
            torch.save((feature_tensor - feat_means) / feat_stds, fn_feat)
コード例 #6
0
def main():
    """Set up the experiment, initialize folders, and write config"""
    # Load the configuration files
    argv = sys.argv[1:]
    params = tc.TestConfiguration('default.ini', argv)
    paths = pm.PathManager(params)
    paths.initialize_folder('data')

    # Load the manifest files
    manifest_files = read.read_manifest(params['train manifest'])
    fs = int(manifest_files[0]['fs'])

    # Loop over files and create windowed versions
    for file in manifest_files:
        fn = file['fn'].split('/')[-1].split('.')[0] + '.pt'
        fn_buf = os.path.join(paths['buffers'], fn)
        buffers = torch.load(fn_buf)
        data = apply_window(buffers,
                            fs=fs,
                            window_length=params['window length'],
                            overlap=params['overlap'])
        fn_win_buf = os.path.join(paths['data'], fn)
        torch.save(data, fn_win_buf)
コード例 #7
0
    def __init__(self,
                 manifest_fn,
                 data_dir,
                 window_length,
                 overlap,
                 device='cpu',
                 features_dir='',
                 features=[],
                 no_load=False,
                 normalize_windows=False,
                 post_sz=False,
                 transform=None):
        self.as_sequences = False
        self.data_dir = data_dir

        self.device = device
        self.features_dir = features_dir
        self.features = features
        self.normalize_windows = normalize_windows
        self.post_sz = post_sz
        self.transform = transform

        # Read manifest, get number of channels and sample frequency
        self.manifest_files = read.read_manifest(manifest_fn)
        self.nchns = int(self.manifest_files[0]['nchns'])
        self.fs = int(self.manifest_files[0]['fs'])
        self.nfiles = len(self.manifest_files)

        # Set window parameters and compute window sample lengths
        assert overlap < window_length, "Overlap is longer than window"
        self.window_length = window_length
        self.overlap = overlap
        self.window_advance_seconds = window_length - overlap
        self.window_samples = int(window_length * self.fs)
        self.window_advance_samples = int(self.window_advance_seconds *
                                          self.fs)

        # Create the labels for the dataset
        self.labels = []
        self.start_windows = []
        self.onset_zones = []
        self.lateralizations = []
        self.lobes = []
        self.patient_numbers = []
        window_idx = 0
        for recording in self.manifest_files:
            label = create_label(float(recording['duration']),
                                 json.loads(recording['sz_starts']),
                                 json.loads(recording['sz_ends']),
                                 self.window_length,
                                 self.overlap,
                                 post_sz_state=self.post_sz)
            label = label.to(self.device)
            self.labels.append(label)
            self.start_windows.append(window_idx)
            window_idx += len(self.labels[-1])

            oz = int(recording['onset_zone'])
            self.onset_zones.append(oz)
            self.lateralizations.append(onset_zone_to_lateralization(oz))
            self.lobes.append(onset_zone_to_lobe(oz))
            self.patient_numbers.append(recording['pt_num'])
        self.nwindows = window_idx

        # Get the total duration and number of seizures
        self.total_duration = 0
        self.total_sz = 0
        for recording in self.manifest_files:
            self.total_duration += float(recording['duration'])
            self.total_sz += len(json.loads(recording['sz_starts']))

        # Load features
        if not no_load:
            if self.features:
                self._load_features()
            else:
                self._load_buffers()