Ejemplo n.º 1
0
def test_shape_output():
    sel_funcs = ['mean', 'variance', 'kurtosis', 'pow_freq_bands']
    features = extract_features(data, sfreq, sel_funcs, n_jobs=1)
    features_as_df = extract_features(data, sfreq, sel_funcs,
                                      n_jobs=1, return_as_df=True)
    expected_shape = (n_epochs, (3 + 5) * n_channels)
    assert_equal(features.shape, expected_shape)
    assert_equal(features, features_as_df.values)
Ejemplo n.º 2
0
def test_feature_names_pow_freq_bands():
    _data = data[:, :2, :]  # keep only 2 channels for the sake of simplicity
    selected_funcs = ['pow_freq_bands']
    fb1 = np.array([[4., 8.], [30., 70.]])
    fb2 = {'theta': [4, 8], 'low-gamma': np.array([30, 70])}
    _fb = [fb1, fb2]
    ratios_col_names1 = [
        'ch0_band0/band1', 'ch0_band1/band0', 'ch1_band0/band1',
        'ch1_band1/band0'
    ]
    ratios_col_names2 = [
        'ch0_theta/low-gamma', 'ch0_low-gamma/theta', 'ch1_theta/low-gamma',
        'ch1_low-gamma/theta'
    ]
    _ratios_names = [ratios_col_names1, ratios_col_names2]
    pow_col_names1 = ['ch0_band0', 'ch0_band1', 'ch1_band0', 'ch1_band1']
    pow_col_names2 = [
        'ch0_theta', 'ch0_low-gamma', 'ch1_theta', 'ch1_low-gamma'
    ]
    _pow_names = [pow_col_names1, pow_col_names2]

    for fb, ratios_names, pow_names in zip(_fb, _ratios_names, _pow_names):
        # With `ratios = 'only'`:
        df_only = extract_features(_data,
                                   sfreq,
                                   selected_funcs,
                                   funcs_params={
                                       'pow_freq_bands__ratios': 'only',
                                       'pow_freq_bands__freq_bands': fb
                                   },
                                   return_as_df=True)
        assert_equal(df_only.columns.get_level_values(1).values, ratios_names)

        # With `ratios = 'all'`:
        df_all = extract_features(_data,
                                  sfreq,
                                  selected_funcs,
                                  funcs_params={
                                      'pow_freq_bands__ratios': 'all',
                                      'pow_freq_bands__freq_bands': fb
                                  },
                                  return_as_df=True)
        assert_equal(
            df_all.columns.get_level_values(1).values,
            pow_names + ratios_names)

        # With `ratios = None`:
        df = extract_features(_data,
                              sfreq,
                              selected_funcs,
                              funcs_params={
                                  'pow_freq_bands__ratios': None,
                                  'pow_freq_bands__freq_bands': fb
                              },
                              return_as_df=True)
        assert_equal(df.columns.get_level_values(1).values, pow_names)
Ejemplo n.º 3
0
def test_optional_params():
    features1 = extract_features(data, sfreq, ['spect_edge_freq'],
                                 {'spect_edge_freq__edge': [0.6]})
    features2 = extract_features(data, sfreq, ['spect_edge_freq'],
                                 {'spect_edge_freq__edge': [0.5, 0.95]})
    features3 = extract_features(data, sfreq, ['svd_fisher_info'],
                                 {'svd_fisher_info__tau': 5})
    assert_equal(features1.shape[-1], n_channels)
    assert_equal(features3.shape[-1], n_channels)
    assert_equal(features2.shape[-1], features1.shape[-1] * 2)
Ejemplo n.º 4
0
def test_user_defined_feature_function():
    # User-defined feature function
    @nb.jit()
    def top_feature(arr, gamma=3.14):
        return np.sum(np.power(gamma * arr, 3) - np.power(arr / gamma, 2),
                      axis=-1)

    # Valid feature extraction
    selected_funcs = ['mean', ('top_feature', top_feature)]
    feat = extract_features(data, sfreq, selected_funcs)
    assert_equal(feat.shape, (n_epochs, 2 * n_channels))
    # Changing optional parameter ``gamma`` of ``top_feature``
    feat2 = extract_features(data,
                             sfreq,
                             selected_funcs,
                             funcs_params={'top_feature__gamma': 1.41})
    assert_equal(feat2.shape, (n_epochs, 2 * n_channels))
    # Invalid feature extractions
    with assert_raises(ValueError):
        # Alias is already used
        extract_features(data, sfreq, ['variance', ('mean', top_feature)])
        # Tuple is not of length 2
        extract_features(
            data, sfreq,
            ['variance', ('top_feature', top_feature, data[:, ::2])])
        # Invalid type
        extract_features(data, sfreq, ['mean', top_feature])
Ejemplo n.º 5
0
def test_channel_naming():
    ch_names = ['CHANNEL%s' % i for i in range(n_channels)]
    ch_names[:4] = ['Cz', 'FCz', 'P1', 'CP1']
    selected_funcs = ['app_entropy']
    df = extract_features(
        data, sfreq, selected_funcs, ch_names=ch_names, return_as_df=True)
    expected_col_names = [('app_entropy', ch_name) for ch_name in ch_names]
    assert df.columns.values.tolist() == expected_col_names

    ch_names.append('CHANNEL%s' % n_channels)
    with assert_raises(ValueError):
        # incorrect number of channel names
        df = extract_features(
            data, sfreq, selected_funcs, ch_names=ch_names, return_as_df=True)
Ejemplo n.º 6
0
    def online_pipe(self, data: NDArray) -> NDArray:
        """
        The method get the data as ndarray with dimensions of (n_channels, n_samples).
        The method returns the features for the given data.
        :param data: ndarray with the shape (n_channels, n_samples)
        :return: ndarray with the shape of (1, n_features)
        """
        # Prepare the data to MNE functions
        data = data.astype(np.float64)

        # Filter the data (band-pass only)
        data = mne.filter.filter_data(data, l_freq=8, h_freq=30, sfreq=self.eeg.sfreq, verbose=False)

        # Laplacian
        data = self.eeg.laplacian(data, self.eeg.get_board_names())

        # Normalize
        scaler = StandardScaler()
        data = scaler.fit_transform(data.T).T

        # Extract features
        funcs_params = {'pow_freq_bands__freq_bands': np.array([8, 10, 12.5, 30])}
        selected_funcs = ['pow_freq_bands', 'variance']
        X = extract_features(data[np.newaxis], self.eeg.sfreq, selected_funcs, funcs_params)[0]

        return X
def generate_mne_features_of_one_file(signals, sfreq, selected_funcs,
                                      func_params, epoch_duration_s,
                                      max_abs_val, agg_mode):
    if agg_mode in ["none", "None", None]:
        agg_mode = None
    else:
        getattr(np, agg_mode)

    epochs = split_into_epochs(signals=signals,
                               sfreq=sfreq,
                               epoch_duration_s=epoch_duration_s)
    mask = reject_windows_with_outliers(epochs, outlier_value=max_abs_val)
    epochs = epochs[mask == False]
    if epochs.size == 0:
        logging.warning("removed all epochs due to outliers")
        return None, None

    # generate features implemented in mne_features
    features = extract_features(epochs,
                                sfreq,
                                selected_funcs,
                                funcs_params=func_params,
                                return_as_df=True)
    # aggregate over dimension of epochs
    if agg_mode:
        features = agg_mode(features, axis=0)

    return features
Ejemplo n.º 8
0
def test_feature_extractor():
    selected_funcs = ['app_entropy']
    extractor = FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs)
    expected_features = extract_features(data, sfreq, selected_funcs)
    assert_almost_equal(expected_features, extractor.fit_transform(data))
    with assert_raises(ValueError):
        FeatureExtractor(
            sfreq=sfreq, selected_funcs=selected_funcs,
            params={'app_entropy__metric': 'sqeuclidean'}).fit_transform(data)
Ejemplo n.º 9
0
def test_wrong_params():
    with assert_raises(ValueError):
        # Negative sfreq
        extract_features(data, -0.1, ['mean'])
    with assert_raises(ValueError):
        # Unknown alias of feature function
        extract_features(data, sfreq, ['power_freq_bands'])
    with assert_raises(ValueError):
        # No alias given
        extract_features(data, sfreq, list())
    with assert_raises(ValueError):
        # Passing optional arguments with unknown alias
        extract_features(data, sfreq, ['higuchi_fd'], {'higuch_fd__kmax': 3})
Ejemplo n.º 10
0
def test_feature_names_spect_slope():
    n_chans = 2  # keep only 2 channels for the sake of simplicity
    _data = data[:, :n_chans, :]
    selected_funcs = ['spect_slope']

    stats = ['intercept', 'slope', 'MSE', 'R2']

    col_names = [
        'ch%s_%s' % (ch, stat) for ch in range(n_chans) for stat in stats
    ]
    df = extract_features(_data, sfreq, selected_funcs, return_as_df=True)
    assert_equal(df.columns.get_level_values(1).values, col_names)
Ejemplo n.º 11
0
def extract_features(eeg: EEG, trials: List[RawArray],
                     features: List[str]) -> np.ndarray:
    # Convert RawArray to ndarray
    trials_ndarray = list(map(lambda x: x.get_data(), trials))

    # Convert to 3d matrix
    trials_ndarray = to_3d_matrix(trials_ndarray)

    # Return features
    return feature_extraction.extract_features(trials_ndarray,
                                               sfreq=eeg.sfreq,
                                               selected_funcs=features)
Ejemplo n.º 12
0
def test_channel_naming_bivariate(selected_func, include_diag):
    ch_names = ['CHANNEL%s' % i for i in range(n_channels)]
    ch_names[:4] = ['Cz', 'FCz', 'P1', 'CP1']
    func_params = {selected_func + '__include_diag': include_diag}
    df = extract_features(
        data, sfreq, [selected_func], func_params, ch_names=ch_names,
        return_as_df=True)
    expected_col_names = [
        (selected_func, ch_names[i] + '-' + ch_names[j])
        for s, i, j in _idxiter(n_channels, include_diag=include_diag)]

    assert df.columns.values.tolist() == expected_col_names
Ejemplo n.º 13
0
def extract_features_data(data,
                          selected_features,
                          sfreq=256,
                          funcs_params=None):
    data_T = np.transpose(data, axes=[0, 2, 1])
    #data_T = np.nan_to_num(data_T)
    extracted_features = extract_features(X=data_T,
                                          selected_funcs={selected_features},
                                          funcs_params=funcs_params,
                                          sfreq=sfreq,
                                          return_as_df=True,
                                          n_jobs=-1)

    return extracted_features
Ejemplo n.º 14
0
def test_feature_names_quantile():
    n_chans = 2  # keep only 2 channels for the sake of simplicity
    _data = data[:, :n_chans, :]
    selected_funcs = ['quantile']

    q = [0.25, 0.75]
    col_names = [
        'ch%s_%s' % (ch, i) for ch in range(n_chans) for i in range(len(q))
    ]
    df = extract_features(_data,
                          sfreq,
                          selected_funcs,
                          funcs_params={'quantile__q': q},
                          return_as_df=True)
    assert_equal(df.columns.get_level_values(1).values, col_names)
Ejemplo n.º 15
0
def test_generic_features_names():
    n_chans = 2  # keep only 2 channels for the sake of simplicity
    _data = data[:, :n_chans, :]
    selected_funcs = ([
        'mean', 'variance', 'std', 'ptp_amp', 'skewness', 'kurtosis', 'rms',
        'quantile', 'hurst_exp', 'app_entropy', 'samp_entropy', 'decorr_time',
        'hjorth_mobility_spect', 'hjorth_complexity_spect', 'hjorth_mobility',
        'hjorth_complexity', 'higuchi_fd', 'katz_fd', 'zero_crossings',
        'line_length', 'spect_entropy', 'svd_entropy', 'svd_fisher_info'
    ])

    col_names = [(func, 'ch%s' % ch) for func in selected_funcs
                 for ch in range(n_chans)]
    df = extract_features(_data, sfreq, selected_funcs, return_as_df=True)
    assert df.columns.to_list() == col_names
Ejemplo n.º 16
0
def test_channel_naming_pow_freq_bands():
    ch_names = ['CHANNEL%s' % i for i in range(n_channels)]
    ch_names[:4] = ['Cz', 'FCz', 'P1', 'CP1']
    selected_funcs = ['pow_freq_bands']
    func_params = {
        'pow_freq_bands__freq_bands': np.array([[0, 2], [10, 20]]),
        'pow_freq_bands__ratios': 'only'
    }
    df = extract_features(
        data, sfreq, selected_funcs, func_params, ch_names=ch_names,
        return_as_df=True)

    expected_col_names = [
        ('pow_freq_bands', f'{ch_name}_band{i}/band{j}')
        for ch_name in ch_names for _, i, j in _idxiter(2, triu=False)]
    assert df.columns.values.tolist() == expected_col_names
Ejemplo n.º 17
0
    def get_features(self, channels: List[str], selected_funcs: List[str],
                     notch: float = 50, low_pass: float = 4, high_pass: float = 48) -> NDArray:
        """
        Returns features of all data since last call to get_board_data method.
        :return features: NDArray of shape (1, n_features)
        """

        # Get the raw data
        data = self.get_raw_data(ch_names=channels)

        # Filter
        data = self.filter_data(data, notch, low_pass, high_pass)

        # Extract features
        features = extract_features(data.get_data()[0][np.newaxis], self.sfreq, selected_funcs)

        return features
Ejemplo n.º 18
0
def test_feature_names_energy_freq_bands():
    _data = data[:, :2, :]  # keep only 2 channels for the sake of simplicity
    selected_funcs = ['energy_freq_bands']
    fb1 = np.array([[4., 8.], [30., 70.]])
    fb2 = {'theta': [4, 8], 'low-gamma': np.array([30, 70])}
    _fb = [fb1, fb2]
    expected_names1 = ['ch0_band0', 'ch0_band1', 'ch1_band0', 'ch1_band1']
    expected_names2 = ['ch0_theta', 'ch0_low-gamma',
                       'ch1_theta', 'ch1_low-gamma']
    _expected_names = [expected_names1, expected_names2]

    for fb, feat_names in zip(_fb, _expected_names):

        df = extract_features(
            _data, sfreq, selected_funcs,
            funcs_params={'energy_freq_bands__freq_bands': fb},
            return_as_df=True)
        assert_equal(df.columns.get_level_values(1).values, feat_names)
def preprocess_dataset(output_dir):
    ch_names = np.array([
        'A1', 'A2', 'C3', 'C4', 'CZ', 'F3', 'F4', 'F7', 'F8', 'FP1', 'FP2',
        'FZ', 'O1', 'O2', 'P3', 'P4', 'PZ', 'T3', 'T4', 'T5', 'T6'
    ])
    data_paths = glob.glob('/storage/inria/viovene/tuh_data/**/*.edf',
                           recursive=True)
    np.random.shuffle(data_paths)
    train_paths, test_paths = train_test_split(data_paths)
    data = {}
    for dataset_name, data_paths in [('train', train_paths),
                                     ('test', test_paths)]:
        sfreqs = []
        xs = []
        ys = []
        for path in data_paths:
            f = mne.io.read_raw_edf(path)
            cleaned_ch_names = np.array([
                c.replace('EEG ', '').replace('-REF', '') for c in f.ch_names
            ])
            ch_idxs = np.array(
                [np.where(cleaned_ch_names == ch)[0][0] for ch in ch_names])
            sfreq = f.info['sfreq']
            if sfreq != 250.0:
                continue
            sfreqs.append(sfreq)
            x = f.get_data()
            x = x[ch_idxs, :]
            rnd_start_idx = np.random.randint(
                int(2 * 60 * sfreq), int(x.shape[1] - (2 * 60 * sfreq)))
            x = x[:, rnd_start_idx:int(rnd_start_idx + 60 * sfreq)]
            xs.append(x[np.newaxis, :, :])
            label = 'abnormal' in path
            ys.append(label)
        x = np.concatenate(xs, axis=0)
        y = np.array(ys)
        selected_funcs = {'mean', 'ptp_amp', 'std'}
        x = extract_features(x, sfreqs[0], selected_funcs)
        data[dataset_name] = {'x': x, 'y': y}

    for dataset_name in data:
        for k in ['x', 'y']:
            path = os.path.join(output_dir, f'{k}_{dataset_name}.npy')
            np.save(path, data[dataset_name][k])
Ejemplo n.º 20
0
def test_feature_names_spect_edge_freq():
    n_chans = 2  # keep only 2 channels for the sake of simplicity
    _data = data[:, :n_chans, :]
    selected_funcs = ['spect_edge_freq']

    _edges = [None, [.5], [.5, .9]]

    for edge in _edges:
        if edge is None:
            edge = [.5]
        col_names = [
            'ch%s_%s' % (ch, i) for ch in range(n_chans)
            for i in range(len(edge))
        ]
        df = extract_features(_data,
                              sfreq,
                              selected_funcs,
                              funcs_params={'spect_edge_freq__edge': edge},
                              return_as_df=True)
        assert_equal(df.columns.get_level_values(1).values, col_names)
Ejemplo n.º 21
0
def test_feature_names_wavelet_coef_energy(wavelet_name='db4'):
    n_chans = 2  # keep only 2 channels for the sake of simplicity
    _data = data[:, :n_chans, :]
    selected_funcs = ['wavelet_coef_energy']

    # number of coefficients of the DWT
    wavelet = pywt.Wavelet(wavelet_name)
    levdec = min(pywt.dwt_max_level(_data.shape[-1], wavelet.dec_len), 6)

    col_names = [
        'ch%s_%s' % (ch, i) for ch in range(n_chans) for i in range(levdec)
    ]

    df = extract_features(
        _data,
        sfreq,
        selected_funcs,
        funcs_params={'wavelet_coef_energy__wavelet_name': wavelet_name},
        return_as_df=True)
    assert_equal(df.columns.get_level_values(1).values, col_names)
Ejemplo n.º 22
0
def test_optional_params_func_with_numba():
    sel_funcs = ['higuchi_fd']
    features1 = extract_features(data, sfreq, sel_funcs,
                                 {'higuchi_fd__kmax': 5})
    n_features1 = features1.shape[-1]
    assert_equal(n_features1, n_channels)
Ejemplo n.º 23
0
import numpy as np
from mne_features.feature_extraction import extract_features

from moabb.datasets import physionet_mi

if __name__ == '__main__':
    # get dataset
    ds = physionet_mi.PhysionetMI()
    raw = ds.get_data([2])[2]['session_0']['run_4'].pick_channels(['C3', 'C4'])
    events = mne.events_from_annotations(raw)
    s_freq = raw.info['sfreq']

    # get x and save to file
    X = mne.Epochs(raw, events[0]).get_data()

    # extract features
    extract_features()
    params = {
        'pow_freq_bands__freq_bands': np.arange(1, int(s_freq / 2), 1),
    }
    selected_funcs = {'mean', 'ptp_amp', 'std', 'pow_freq_bands'}
    features_array = mne_features.feature_extraction.extract_features(
        X, s_freq, selected_funcs, params)

    # save features to file
    np.savetxt('../data/mne/1/features.csv', features_array, delimiter=',')

    # get y and save to file
    y = np.asarray([e[2] for e in events[0]])
    np.savetxt('../data/mne/1/stimulus_vectors.csv', y)
Ejemplo n.º 24
0
def test_njobs():
    sel_funcs = ['app_entropy']
    features = extract_features(data, sfreq, sel_funcs, n_jobs=-1)
    expected_shape = (n_epochs, n_channels)
    assert_equal(features.shape, expected_shape)
# Read epochs
epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks, proj=True,
                    baseline=None, preload=True)
labels = epochs.events[:, -1]

# get MEG and EEG data
data = epochs.get_data()

###############################################################################
# Prepare for the classification task:

pipe = Pipeline([('scaler', StandardScaler()),
                 ('lr', LogisticRegression(random_state=42, solver='lbfgs'))])
y = labels

###############################################################################
# Classification using features (mean, peak-to-peak amplitude,
# standard deviation). See :ref:`api_documentation` for full list of supported
# features.

selected_funcs = {'mean', 'ptp_amp', 'std'}
X_new = extract_features(data, raw.info['sfreq'], selected_funcs)
kf = KFold(n_splits=3, random_state=42)
scores = cross_val_score(pipe, X_new, y, scoring='accuracy', cv=kf)

###############################################################################
# Print the cross-validation score:

print('Cross-validation accuracy score = %1.3f (+/- %1.5f)' % (np.mean(scores),
                                                               np.std(scores)))