def test_feature_extractor(): selected_funcs = ['app_entropy'] extractor = FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs) expected_features = extract_features(data, sfreq, selected_funcs) assert_almost_equal(expected_features, extractor.fit_transform(data)) with assert_raises(ValueError): FeatureExtractor( sfreq=sfreq, selected_funcs=selected_funcs, params={'app_entropy__metric': 'sqeuclidean'}).fit_transform(data)
def test_memory_feature_extractor(): selected_funcs = ['mean', 'zero_crossings'] cachedir = mkdtemp() extractor = FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs) cached_extractor = FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs, memory=cachedir) y = np.ones((data.shape[0], )) cached_extractor.fit_transform(data, y) # Ensure that the right features were cached assert_almost_equal(extractor.fit_transform(data, y), cached_extractor.fit_transform(data, y))
def test_gridsearch_feature_extractor(): X = data y = np.ones((X.shape[0],)) # dummy labels pipe = Pipeline([('FE', FeatureExtractor(sfreq=sfreq, selected_funcs=['higuchi_fd'])), ('clf', CheckingClassifier( check_X=lambda arr: arr.shape[1:] == (X.shape[1],)))]) params_grid = {'FE__higuchi_fd__kmax': [5, 10]} gs = GridSearchCV(estimator=pipe, param_grid=params_grid, cv=3) gs.fit(X, y) assert_equal(hasattr(gs, 'cv_results_'), True)
def preprocess_dataset(output_dir): data_paths = glob.glob('/storage/inria/viovene/tuh_data/**/*.edf', recursive=True) np.random.shuffle(data_paths) data = Parallel(n_jobs=30)(delayed(preprocess_one_file)(path) for path in data_paths) train, test = train_test_split(data) x_train = np.vstack([x for (_, x, _) in train]) x_test = np.vstack([x for (_, x, _) in test]) y_train = np.concatenate([y for (_, _, y) in train]) y_test = np.concatenate([y for (_, _, y) in test]) np.save(os.path.join(output_dir, 'x_train_raw.npy'), x_train) np.save(os.path.join(output_dir, 'x_test_raw.npy'), x_test) funcs = { 'skewness', 'kurtosis', 'mean', 'variance', 'std', 'ptp_amp', 'hurst_exp', 'app_entropy', 'pow_freq_bands', 'hjorth_complexity', } params = { 'pow_freq_bands__freq_bands': np.array([0.5, 4, 8, 13, 30, 49]), 'pow_freq_bands__ratios': 'all', 'pow_freq_bands__log': True } fe = FeatureExtractor(sfreq=SFREQ, selected_funcs=funcs, params=params) x_train = fe.fit_transform(x_train) x_test = fe.transform(x_test) print(x_train.shape, x_test.shape) np.save(os.path.join(output_dir, 'x_train.npy'), x_train) np.save(os.path.join(output_dir, 'y_train.npy'), y_train) np.save(os.path.join(output_dir, 'x_test.npy'), x_test) np.save(os.path.join(output_dir, 'y_test.npy'), y_test)
data_segments.append(_data.values.T[None, ...]) if 'setE' in path: labels.append(np.ones((len(fnames), ))) else: labels.append(np.zeros((len(fnames), ))) data = np.concatenate(data_segments) y = np.concatenate(labels, axis=0) # Shape of extracted data: print(data.shape) ############################################################################### # Prepare for the classification task: selected_funcs = ['line_length', 'kurtosis', 'ptp_amp', 'skewness'] pipe = Pipeline([('fe', FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs)), ('clf', RandomForestClassifier(n_estimators=100, max_depth=4, random_state=42))]) skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42) ############################################################################### # Print the cross-validation accuracy score: scores = cross_val_score(pipe, data, y, cv=skf) print('Cross-validation accuracy score = %1.3f (+/- %1.5f)' % (np.mean(scores), np.std(scores)))
Returns ------- output : (n_channels * n_times,) """ return medfilt(arr, kernel_size=(1, 5)).ravel() ############################################################################### # Prepare for the classification task # ----------------------------------- # # In addition to the new feature function, we also propose to extract the # mean of the data: selected_funcs = [('medfilt', compute_medfilt), 'mean'] pipe = Pipeline([('fe', FeatureExtractor(sfreq=raw.info['sfreq'], selected_funcs=selected_funcs)), ('scaler', StandardScaler()), ('clf', LogisticRegression(random_state=42))]) skf = StratifiedKFold(n_splits=3, random_state=42) y = labels ############################################################################### # Print the accuracy score on a test dataset. X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2) accuracy = pipe.fit(X_train, y_train).score(X_test, y_test) print('Accuracy score = %1.3f' % accuracy)
events = mne.read_events(event_fname) picks = mne.pick_types(raw.info, meg='grad', eeg=False) # Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks, proj=True, baseline=None, preload=True) labels = epochs.events[:, -1] # get MEG and EEG data data = epochs.get_data() ############################################################################### # Prepare for the classification task: pipe = Pipeline([('fe', FeatureExtractor(sfreq=raw.info['sfreq'], selected_funcs=['app_entropy', 'mean'])), ('scaler', StandardScaler()), ('clf', LogisticRegression(random_state=42))]) skf = StratifiedKFold(n_splits=3, random_state=42) y = labels ############################################################################### # Cross-validation accuracy score with default parameters (emb = 2, by default # for `compute_app_entropy`): scores = cross_val_score(pipe, data, y, cv=skf) print('Cross-validation accuracy score (with default parameters) = %1.3f ' '(+/- %1.5f)' % (np.mean(scores), np.std(scores))) ###############################################################################
tmin, tmax, picks=picks, proj=True, baseline=None, preload=True) labels = epochs.events[:, -1] # get MEG and EEG data data = epochs.get_data() ############################################################################### # Prepare for the classification task: pipe = Pipeline([('fe', FeatureExtractor(sfreq=raw.info['sfreq'], selected_funcs=['app_entropy', 'mean'])), ('scaler', StandardScaler()), ('clf', LogisticRegression(random_state=42, solver='lbfgs'))]) skf = StratifiedKFold(n_splits=3, random_state=42) y = labels ############################################################################### # Cross-validation accuracy score with default parameters (emb = 2, by default # for `compute_app_entropy`): scores = cross_val_score(pipe, data, y, cv=skf) print('Cross-validation accuracy score (with default parameters) = %1.3f ' '(+/- %1.5f)' % (np.mean(scores), np.std(scores))) ############################################################################### # Optimization of features extraction optional parameters:
Returns ------- output : (n_channels * n_times,) """ return medfilt(arr, kernel_size=(1, 5)).ravel() ############################################################################### # Prepare for the classification task # ----------------------------------- # # In addition to the new feature function, we also propose to extract the # mean of the data: selected_funcs = [('medfilt', compute_medfilt), 'mean'] pipe = Pipeline([('fe', FeatureExtractor(sfreq=raw.info['sfreq'], selected_funcs=selected_funcs, memory='.')), ('scaler', StandardScaler()), ('clf', LogisticRegression(random_state=42))]) skf = StratifiedKFold(n_splits=3, random_state=42) y = labels ############################################################################### # Print the accuracy score on a test dataset. X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2) accuracy = pipe.fit(X_train, y_train).score(X_test, y_test) print('Accuracy score = %1.3f' % accuracy)