Beispiel #1
0
    def __init__(self, patient_id, which_set, preprocessor_path, data_dir,
                 leave_one_out_seizure, sample_size_second, batch_size,
                 default_seed=0):
        """
        The Epilepsiae dataset customized for leave-one-seizure-out cross validation.

        Parameters
        ----------
        patient_id : int
            Patient ID.
        which_set : string
            Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
            If not specified, all data will be loaded.
        preprocessor_path : string
            File path to store the scaler for pre-processing the EEG data.
        data_dir : string
            Directory that store the source EEG data.
        leave_one_out_seizure : int
            Index of the withheld seizure.
        sample_size_second : int
            Number of seconds used to specify sample size.
        batch_size : int
            Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size.
        default_seed : int, optional
            Seed for random.

        For preprocessing, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py

        For customizing dataset, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py

        """

        # Load data
        files = ['rec_26402102/26402102_0003.mat',
                 'rec_26402102/26402102_0007.mat',
                 'rec_26402102/26402102_0008.mat',
                 'rec_26402102/26402102_0017.mat']
        scalp_channels = np.asarray([   u'FP1',
                                        u'FP2',
                                        u'F3',
                                        u'F4',
                                        u'C3',
                                        u'C4',
                                        u'P3',
                                        u'P4',
                                        u'O1',
                                        u'O2',
                                        u'F7',
                                        u'F8',
                                        u'T3',
                                        u'T4',
                                        u'T5',
                                        u'T6',
                                        u'FZ',
                                        u'CZ',
                                        u'PZ'   ])
        # Get seizure information
        seizure_info = pd.read_table(os.path.join(data_dir, 'RECORDS-WITH-SEIZURES.txt'), sep='\t')
        seizure_info['filename'] = seizure_info['filename'].str.replace('.data', '.mat', case=False)

        self.data_dir = data_dir
        self.files = files
        self.seizure_info = seizure_info
        self.filter_channels = scalp_channels
        self.default_seed = default_seed
        self.leave_one_out_seizure = leave_one_out_seizure
        self.batch_size = batch_size

        X, y, n_channels, sample_size = self.load_data(which_set, sample_size_second, batch_size, preprocessor_path)
        self.n_channels = n_channels
        self.sample_size = sample_size

        view_converter = DefaultViewConverter((1, sample_size, 1))
        view_converter.set_axes(axes=['b', 0, 1, 'c'])

        DenseDesignMatrix.__init__(self, X=X, y=y,
                                   view_converter=view_converter,
                                   axes=['b', 0, 1, 'c'])
    def __init__(self,
                 patient_id,
                 which_set,
                 preprocessor_path,
                 data_dir,
                 leave_one_out_seizure,
                 sample_size_second,
                 batch_size,
                 default_seed=0):
        """
        The Epilepsiae dataset customized for leave-one-seizure-out cross validation.

        Parameters
        ----------
        patient_id : int
            Patient ID.
        which_set : string
            Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
            If not specified, all data will be loaded.
        preprocessor_path : string
            File path to store the scaler for pre-processing the EEG data.
        data_dir : string
            Directory that store the source EEG data.
        leave_one_out_seizure : int
            Index of the withheld seizure.
        sample_size_second : int
            Number of seconds used to specify sample size.
        batch_size : int
            Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size.
        default_seed : int, optional
            Seed for random.

        For preprocessing, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py

        For customizing dataset, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py

        """

        # Load data
        files = [
            'rec_26402102/26402102_0003.mat', 'rec_26402102/26402102_0007.mat',
            'rec_26402102/26402102_0008.mat', 'rec_26402102/26402102_0017.mat'
        ]
        scalp_channels = np.asarray([
            u'FP1', u'FP2', u'F3', u'F4', u'C3', u'C4', u'P3', u'P4', u'O1',
            u'O2', u'F7', u'F8', u'T3', u'T4', u'T5', u'T6', u'FZ', u'CZ',
            u'PZ'
        ])
        # Get seizure information
        seizure_info = pd.read_table(os.path.join(data_dir,
                                                  'RECORDS-WITH-SEIZURES.txt'),
                                     sep='\t')
        seizure_info['filename'] = seizure_info['filename'].str.replace(
            '.data', '.mat', case=False)

        self.data_dir = data_dir
        self.files = files
        self.seizure_info = seizure_info
        self.filter_channels = scalp_channels
        self.default_seed = default_seed
        self.leave_one_out_seizure = leave_one_out_seizure
        self.batch_size = batch_size

        X, y, n_channels, sample_size = self.load_data(which_set,
                                                       sample_size_second,
                                                       batch_size,
                                                       preprocessor_path)
        self.n_channels = n_channels
        self.sample_size = sample_size

        view_converter = DefaultViewConverter((1, sample_size, 1))
        view_converter.set_axes(axes=['b', 0, 1, 'c'])

        DenseDesignMatrix.__init__(self,
                                   X=X,
                                   y=y,
                                   view_converter=view_converter,
                                   axes=['b', 0, 1, 'c'])