def test_shape_output(): sel_funcs = ['mean', 'variance', 'kurtosis', 'pow_freq_bands'] features = extract_features(data, sfreq, sel_funcs, n_jobs=1) features_as_df = extract_features(data, sfreq, sel_funcs, n_jobs=1, return_as_df=True) expected_shape = (n_epochs, (3 + 5) * n_channels) assert_equal(features.shape, expected_shape) assert_equal(features, features_as_df.values)
def test_feature_names_pow_freq_bands(): _data = data[:, :2, :] # keep only 2 channels for the sake of simplicity selected_funcs = ['pow_freq_bands'] fb1 = np.array([[4., 8.], [30., 70.]]) fb2 = {'theta': [4, 8], 'low-gamma': np.array([30, 70])} _fb = [fb1, fb2] ratios_col_names1 = [ 'ch0_band0/band1', 'ch0_band1/band0', 'ch1_band0/band1', 'ch1_band1/band0' ] ratios_col_names2 = [ 'ch0_theta/low-gamma', 'ch0_low-gamma/theta', 'ch1_theta/low-gamma', 'ch1_low-gamma/theta' ] _ratios_names = [ratios_col_names1, ratios_col_names2] pow_col_names1 = ['ch0_band0', 'ch0_band1', 'ch1_band0', 'ch1_band1'] pow_col_names2 = [ 'ch0_theta', 'ch0_low-gamma', 'ch1_theta', 'ch1_low-gamma' ] _pow_names = [pow_col_names1, pow_col_names2] for fb, ratios_names, pow_names in zip(_fb, _ratios_names, _pow_names): # With `ratios = 'only'`: df_only = extract_features(_data, sfreq, selected_funcs, funcs_params={ 'pow_freq_bands__ratios': 'only', 'pow_freq_bands__freq_bands': fb }, return_as_df=True) assert_equal(df_only.columns.get_level_values(1).values, ratios_names) # With `ratios = 'all'`: df_all = extract_features(_data, sfreq, selected_funcs, funcs_params={ 'pow_freq_bands__ratios': 'all', 'pow_freq_bands__freq_bands': fb }, return_as_df=True) assert_equal( df_all.columns.get_level_values(1).values, pow_names + ratios_names) # With `ratios = None`: df = extract_features(_data, sfreq, selected_funcs, funcs_params={ 'pow_freq_bands__ratios': None, 'pow_freq_bands__freq_bands': fb }, return_as_df=True) assert_equal(df.columns.get_level_values(1).values, pow_names)
def test_optional_params(): features1 = extract_features(data, sfreq, ['spect_edge_freq'], {'spect_edge_freq__edge': [0.6]}) features2 = extract_features(data, sfreq, ['spect_edge_freq'], {'spect_edge_freq__edge': [0.5, 0.95]}) features3 = extract_features(data, sfreq, ['svd_fisher_info'], {'svd_fisher_info__tau': 5}) assert_equal(features1.shape[-1], n_channels) assert_equal(features3.shape[-1], n_channels) assert_equal(features2.shape[-1], features1.shape[-1] * 2)
def test_user_defined_feature_function(): # User-defined feature function @nb.jit() def top_feature(arr, gamma=3.14): return np.sum(np.power(gamma * arr, 3) - np.power(arr / gamma, 2), axis=-1) # Valid feature extraction selected_funcs = ['mean', ('top_feature', top_feature)] feat = extract_features(data, sfreq, selected_funcs) assert_equal(feat.shape, (n_epochs, 2 * n_channels)) # Changing optional parameter ``gamma`` of ``top_feature`` feat2 = extract_features(data, sfreq, selected_funcs, funcs_params={'top_feature__gamma': 1.41}) assert_equal(feat2.shape, (n_epochs, 2 * n_channels)) # Invalid feature extractions with assert_raises(ValueError): # Alias is already used extract_features(data, sfreq, ['variance', ('mean', top_feature)]) # Tuple is not of length 2 extract_features( data, sfreq, ['variance', ('top_feature', top_feature, data[:, ::2])]) # Invalid type extract_features(data, sfreq, ['mean', top_feature])
def test_channel_naming(): ch_names = ['CHANNEL%s' % i for i in range(n_channels)] ch_names[:4] = ['Cz', 'FCz', 'P1', 'CP1'] selected_funcs = ['app_entropy'] df = extract_features( data, sfreq, selected_funcs, ch_names=ch_names, return_as_df=True) expected_col_names = [('app_entropy', ch_name) for ch_name in ch_names] assert df.columns.values.tolist() == expected_col_names ch_names.append('CHANNEL%s' % n_channels) with assert_raises(ValueError): # incorrect number of channel names df = extract_features( data, sfreq, selected_funcs, ch_names=ch_names, return_as_df=True)
def online_pipe(self, data: NDArray) -> NDArray: """ The method get the data as ndarray with dimensions of (n_channels, n_samples). The method returns the features for the given data. :param data: ndarray with the shape (n_channels, n_samples) :return: ndarray with the shape of (1, n_features) """ # Prepare the data to MNE functions data = data.astype(np.float64) # Filter the data (band-pass only) data = mne.filter.filter_data(data, l_freq=8, h_freq=30, sfreq=self.eeg.sfreq, verbose=False) # Laplacian data = self.eeg.laplacian(data, self.eeg.get_board_names()) # Normalize scaler = StandardScaler() data = scaler.fit_transform(data.T).T # Extract features funcs_params = {'pow_freq_bands__freq_bands': np.array([8, 10, 12.5, 30])} selected_funcs = ['pow_freq_bands', 'variance'] X = extract_features(data[np.newaxis], self.eeg.sfreq, selected_funcs, funcs_params)[0] return X
def generate_mne_features_of_one_file(signals, sfreq, selected_funcs, func_params, epoch_duration_s, max_abs_val, agg_mode): if agg_mode in ["none", "None", None]: agg_mode = None else: getattr(np, agg_mode) epochs = split_into_epochs(signals=signals, sfreq=sfreq, epoch_duration_s=epoch_duration_s) mask = reject_windows_with_outliers(epochs, outlier_value=max_abs_val) epochs = epochs[mask == False] if epochs.size == 0: logging.warning("removed all epochs due to outliers") return None, None # generate features implemented in mne_features features = extract_features(epochs, sfreq, selected_funcs, funcs_params=func_params, return_as_df=True) # aggregate over dimension of epochs if agg_mode: features = agg_mode(features, axis=0) return features
def test_feature_extractor(): selected_funcs = ['app_entropy'] extractor = FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs) expected_features = extract_features(data, sfreq, selected_funcs) assert_almost_equal(expected_features, extractor.fit_transform(data)) with assert_raises(ValueError): FeatureExtractor( sfreq=sfreq, selected_funcs=selected_funcs, params={'app_entropy__metric': 'sqeuclidean'}).fit_transform(data)
def test_wrong_params(): with assert_raises(ValueError): # Negative sfreq extract_features(data, -0.1, ['mean']) with assert_raises(ValueError): # Unknown alias of feature function extract_features(data, sfreq, ['power_freq_bands']) with assert_raises(ValueError): # No alias given extract_features(data, sfreq, list()) with assert_raises(ValueError): # Passing optional arguments with unknown alias extract_features(data, sfreq, ['higuchi_fd'], {'higuch_fd__kmax': 3})
def test_feature_names_spect_slope(): n_chans = 2 # keep only 2 channels for the sake of simplicity _data = data[:, :n_chans, :] selected_funcs = ['spect_slope'] stats = ['intercept', 'slope', 'MSE', 'R2'] col_names = [ 'ch%s_%s' % (ch, stat) for ch in range(n_chans) for stat in stats ] df = extract_features(_data, sfreq, selected_funcs, return_as_df=True) assert_equal(df.columns.get_level_values(1).values, col_names)
def extract_features(eeg: EEG, trials: List[RawArray], features: List[str]) -> np.ndarray: # Convert RawArray to ndarray trials_ndarray = list(map(lambda x: x.get_data(), trials)) # Convert to 3d matrix trials_ndarray = to_3d_matrix(trials_ndarray) # Return features return feature_extraction.extract_features(trials_ndarray, sfreq=eeg.sfreq, selected_funcs=features)
def test_channel_naming_bivariate(selected_func, include_diag): ch_names = ['CHANNEL%s' % i for i in range(n_channels)] ch_names[:4] = ['Cz', 'FCz', 'P1', 'CP1'] func_params = {selected_func + '__include_diag': include_diag} df = extract_features( data, sfreq, [selected_func], func_params, ch_names=ch_names, return_as_df=True) expected_col_names = [ (selected_func, ch_names[i] + '-' + ch_names[j]) for s, i, j in _idxiter(n_channels, include_diag=include_diag)] assert df.columns.values.tolist() == expected_col_names
def extract_features_data(data, selected_features, sfreq=256, funcs_params=None): data_T = np.transpose(data, axes=[0, 2, 1]) #data_T = np.nan_to_num(data_T) extracted_features = extract_features(X=data_T, selected_funcs={selected_features}, funcs_params=funcs_params, sfreq=sfreq, return_as_df=True, n_jobs=-1) return extracted_features
def test_feature_names_quantile(): n_chans = 2 # keep only 2 channels for the sake of simplicity _data = data[:, :n_chans, :] selected_funcs = ['quantile'] q = [0.25, 0.75] col_names = [ 'ch%s_%s' % (ch, i) for ch in range(n_chans) for i in range(len(q)) ] df = extract_features(_data, sfreq, selected_funcs, funcs_params={'quantile__q': q}, return_as_df=True) assert_equal(df.columns.get_level_values(1).values, col_names)
def test_generic_features_names(): n_chans = 2 # keep only 2 channels for the sake of simplicity _data = data[:, :n_chans, :] selected_funcs = ([ 'mean', 'variance', 'std', 'ptp_amp', 'skewness', 'kurtosis', 'rms', 'quantile', 'hurst_exp', 'app_entropy', 'samp_entropy', 'decorr_time', 'hjorth_mobility_spect', 'hjorth_complexity_spect', 'hjorth_mobility', 'hjorth_complexity', 'higuchi_fd', 'katz_fd', 'zero_crossings', 'line_length', 'spect_entropy', 'svd_entropy', 'svd_fisher_info' ]) col_names = [(func, 'ch%s' % ch) for func in selected_funcs for ch in range(n_chans)] df = extract_features(_data, sfreq, selected_funcs, return_as_df=True) assert df.columns.to_list() == col_names
def test_channel_naming_pow_freq_bands(): ch_names = ['CHANNEL%s' % i for i in range(n_channels)] ch_names[:4] = ['Cz', 'FCz', 'P1', 'CP1'] selected_funcs = ['pow_freq_bands'] func_params = { 'pow_freq_bands__freq_bands': np.array([[0, 2], [10, 20]]), 'pow_freq_bands__ratios': 'only' } df = extract_features( data, sfreq, selected_funcs, func_params, ch_names=ch_names, return_as_df=True) expected_col_names = [ ('pow_freq_bands', f'{ch_name}_band{i}/band{j}') for ch_name in ch_names for _, i, j in _idxiter(2, triu=False)] assert df.columns.values.tolist() == expected_col_names
def get_features(self, channels: List[str], selected_funcs: List[str], notch: float = 50, low_pass: float = 4, high_pass: float = 48) -> NDArray: """ Returns features of all data since last call to get_board_data method. :return features: NDArray of shape (1, n_features) """ # Get the raw data data = self.get_raw_data(ch_names=channels) # Filter data = self.filter_data(data, notch, low_pass, high_pass) # Extract features features = extract_features(data.get_data()[0][np.newaxis], self.sfreq, selected_funcs) return features
def test_feature_names_energy_freq_bands(): _data = data[:, :2, :] # keep only 2 channels for the sake of simplicity selected_funcs = ['energy_freq_bands'] fb1 = np.array([[4., 8.], [30., 70.]]) fb2 = {'theta': [4, 8], 'low-gamma': np.array([30, 70])} _fb = [fb1, fb2] expected_names1 = ['ch0_band0', 'ch0_band1', 'ch1_band0', 'ch1_band1'] expected_names2 = ['ch0_theta', 'ch0_low-gamma', 'ch1_theta', 'ch1_low-gamma'] _expected_names = [expected_names1, expected_names2] for fb, feat_names in zip(_fb, _expected_names): df = extract_features( _data, sfreq, selected_funcs, funcs_params={'energy_freq_bands__freq_bands': fb}, return_as_df=True) assert_equal(df.columns.get_level_values(1).values, feat_names)
def preprocess_dataset(output_dir): ch_names = np.array([ 'A1', 'A2', 'C3', 'C4', 'CZ', 'F3', 'F4', 'F7', 'F8', 'FP1', 'FP2', 'FZ', 'O1', 'O2', 'P3', 'P4', 'PZ', 'T3', 'T4', 'T5', 'T6' ]) data_paths = glob.glob('/storage/inria/viovene/tuh_data/**/*.edf', recursive=True) np.random.shuffle(data_paths) train_paths, test_paths = train_test_split(data_paths) data = {} for dataset_name, data_paths in [('train', train_paths), ('test', test_paths)]: sfreqs = [] xs = [] ys = [] for path in data_paths: f = mne.io.read_raw_edf(path) cleaned_ch_names = np.array([ c.replace('EEG ', '').replace('-REF', '') for c in f.ch_names ]) ch_idxs = np.array( [np.where(cleaned_ch_names == ch)[0][0] for ch in ch_names]) sfreq = f.info['sfreq'] if sfreq != 250.0: continue sfreqs.append(sfreq) x = f.get_data() x = x[ch_idxs, :] rnd_start_idx = np.random.randint( int(2 * 60 * sfreq), int(x.shape[1] - (2 * 60 * sfreq))) x = x[:, rnd_start_idx:int(rnd_start_idx + 60 * sfreq)] xs.append(x[np.newaxis, :, :]) label = 'abnormal' in path ys.append(label) x = np.concatenate(xs, axis=0) y = np.array(ys) selected_funcs = {'mean', 'ptp_amp', 'std'} x = extract_features(x, sfreqs[0], selected_funcs) data[dataset_name] = {'x': x, 'y': y} for dataset_name in data: for k in ['x', 'y']: path = os.path.join(output_dir, f'{k}_{dataset_name}.npy') np.save(path, data[dataset_name][k])
def test_feature_names_spect_edge_freq(): n_chans = 2 # keep only 2 channels for the sake of simplicity _data = data[:, :n_chans, :] selected_funcs = ['spect_edge_freq'] _edges = [None, [.5], [.5, .9]] for edge in _edges: if edge is None: edge = [.5] col_names = [ 'ch%s_%s' % (ch, i) for ch in range(n_chans) for i in range(len(edge)) ] df = extract_features(_data, sfreq, selected_funcs, funcs_params={'spect_edge_freq__edge': edge}, return_as_df=True) assert_equal(df.columns.get_level_values(1).values, col_names)
def test_feature_names_wavelet_coef_energy(wavelet_name='db4'): n_chans = 2 # keep only 2 channels for the sake of simplicity _data = data[:, :n_chans, :] selected_funcs = ['wavelet_coef_energy'] # number of coefficients of the DWT wavelet = pywt.Wavelet(wavelet_name) levdec = min(pywt.dwt_max_level(_data.shape[-1], wavelet.dec_len), 6) col_names = [ 'ch%s_%s' % (ch, i) for ch in range(n_chans) for i in range(levdec) ] df = extract_features( _data, sfreq, selected_funcs, funcs_params={'wavelet_coef_energy__wavelet_name': wavelet_name}, return_as_df=True) assert_equal(df.columns.get_level_values(1).values, col_names)
def test_optional_params_func_with_numba(): sel_funcs = ['higuchi_fd'] features1 = extract_features(data, sfreq, sel_funcs, {'higuchi_fd__kmax': 5}) n_features1 = features1.shape[-1] assert_equal(n_features1, n_channels)
import numpy as np from mne_features.feature_extraction import extract_features from moabb.datasets import physionet_mi if __name__ == '__main__': # get dataset ds = physionet_mi.PhysionetMI() raw = ds.get_data([2])[2]['session_0']['run_4'].pick_channels(['C3', 'C4']) events = mne.events_from_annotations(raw) s_freq = raw.info['sfreq'] # get x and save to file X = mne.Epochs(raw, events[0]).get_data() # extract features extract_features() params = { 'pow_freq_bands__freq_bands': np.arange(1, int(s_freq / 2), 1), } selected_funcs = {'mean', 'ptp_amp', 'std', 'pow_freq_bands'} features_array = mne_features.feature_extraction.extract_features( X, s_freq, selected_funcs, params) # save features to file np.savetxt('../data/mne/1/features.csv', features_array, delimiter=',') # get y and save to file y = np.asarray([e[2] for e in events[0]]) np.savetxt('../data/mne/1/stimulus_vectors.csv', y)
def test_njobs(): sel_funcs = ['app_entropy'] features = extract_features(data, sfreq, sel_funcs, n_jobs=-1) expected_shape = (n_epochs, n_channels) assert_equal(features.shape, expected_shape)
# Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks, proj=True, baseline=None, preload=True) labels = epochs.events[:, -1] # get MEG and EEG data data = epochs.get_data() ############################################################################### # Prepare for the classification task: pipe = Pipeline([('scaler', StandardScaler()), ('lr', LogisticRegression(random_state=42, solver='lbfgs'))]) y = labels ############################################################################### # Classification using features (mean, peak-to-peak amplitude, # standard deviation). See :ref:`api_documentation` for full list of supported # features. selected_funcs = {'mean', 'ptp_amp', 'std'} X_new = extract_features(data, raw.info['sfreq'], selected_funcs) kf = KFold(n_splits=3, random_state=42) scores = cross_val_score(pipe, X_new, y, scoring='accuracy', cv=kf) ############################################################################### # Print the cross-validation score: print('Cross-validation accuracy score = %1.3f (+/- %1.5f)' % (np.mean(scores), np.std(scores)))