Esempio n. 1
0
def test_feature_extractor():
    selected_funcs = ['app_entropy']
    extractor = FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs)
    expected_features = extract_features(data, sfreq, selected_funcs)
    assert_almost_equal(expected_features, extractor.fit_transform(data))
    with assert_raises(ValueError):
        FeatureExtractor(
            sfreq=sfreq, selected_funcs=selected_funcs,
            params={'app_entropy__metric': 'sqeuclidean'}).fit_transform(data)
Esempio n. 2
0
def test_memory_feature_extractor():
    selected_funcs = ['mean', 'zero_crossings']
    cachedir = mkdtemp()
    extractor = FeatureExtractor(sfreq=sfreq, selected_funcs=selected_funcs)
    cached_extractor = FeatureExtractor(sfreq=sfreq,
                                        selected_funcs=selected_funcs,
                                        memory=cachedir)
    y = np.ones((data.shape[0], ))
    cached_extractor.fit_transform(data, y)
    # Ensure that the right features were cached
    assert_almost_equal(extractor.fit_transform(data, y),
                        cached_extractor.fit_transform(data, y))
Esempio n. 3
0
def test_gridsearch_feature_extractor():
    X = data
    y = np.ones((X.shape[0],))  # dummy labels
    pipe = Pipeline([('FE', FeatureExtractor(sfreq=sfreq,
                                             selected_funcs=['higuchi_fd'])),
                     ('clf', CheckingClassifier(
                         check_X=lambda arr: arr.shape[1:] == (X.shape[1],)))])
    params_grid = {'FE__higuchi_fd__kmax': [5, 10]}
    gs = GridSearchCV(estimator=pipe, param_grid=params_grid, cv=3)
    gs.fit(X, y)
    assert_equal(hasattr(gs, 'cv_results_'), True)
Esempio n. 4
0
def preprocess_dataset(output_dir):
    data_paths = glob.glob('/storage/inria/viovene/tuh_data/**/*.edf',
                           recursive=True)
    np.random.shuffle(data_paths)
    data = Parallel(n_jobs=30)(delayed(preprocess_one_file)(path)
                               for path in data_paths)
    train, test = train_test_split(data)
    x_train = np.vstack([x for (_, x, _) in train])
    x_test = np.vstack([x for (_, x, _) in test])
    y_train = np.concatenate([y for (_, _, y) in train])
    y_test = np.concatenate([y for (_, _, y) in test])
    np.save(os.path.join(output_dir, 'x_train_raw.npy'), x_train)
    np.save(os.path.join(output_dir, 'x_test_raw.npy'), x_test)
    funcs = {
        'skewness',
        'kurtosis',
        'mean',
        'variance',
        'std',
        'ptp_amp',
        'hurst_exp',
        'app_entropy',
        'pow_freq_bands',
        'hjorth_complexity',
    }
    params = {
        'pow_freq_bands__freq_bands': np.array([0.5, 4, 8, 13, 30, 49]),
        'pow_freq_bands__ratios': 'all',
        'pow_freq_bands__log': True
    }
    fe = FeatureExtractor(sfreq=SFREQ, selected_funcs=funcs, params=params)
    x_train = fe.fit_transform(x_train)
    x_test = fe.transform(x_test)
    print(x_train.shape, x_test.shape)
    np.save(os.path.join(output_dir, 'x_train.npy'), x_train)
    np.save(os.path.join(output_dir, 'y_train.npy'), y_train)
    np.save(os.path.join(output_dir, 'x_test.npy'), x_test)
    np.save(os.path.join(output_dir, 'y_test.npy'), y_test)
        data_segments.append(_data.values.T[None, ...])
    if 'setE' in path:
        labels.append(np.ones((len(fnames), )))
    else:
        labels.append(np.zeros((len(fnames), )))
data = np.concatenate(data_segments)
y = np.concatenate(labels, axis=0)

# Shape of extracted data:
print(data.shape)

###############################################################################
# Prepare for the classification task:
selected_funcs = ['line_length', 'kurtosis', 'ptp_amp', 'skewness']

pipe = Pipeline([('fe',
                  FeatureExtractor(sfreq=sfreq,
                                   selected_funcs=selected_funcs)),
                 ('clf',
                  RandomForestClassifier(n_estimators=100,
                                         max_depth=4,
                                         random_state=42))])
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

###############################################################################
# Print the cross-validation accuracy score:

scores = cross_val_score(pipe, data, y, cv=skf)
print('Cross-validation accuracy score = %1.3f (+/- %1.5f)' %
      (np.mean(scores), np.std(scores)))
    Returns
    -------
    output : (n_channels * n_times,)
    """
    return medfilt(arr, kernel_size=(1, 5)).ravel()


###############################################################################
# Prepare for the classification task
# -----------------------------------
#
# In addition to the new feature function, we also propose to extract the
# mean of the data:
selected_funcs = [('medfilt', compute_medfilt), 'mean']

pipe = Pipeline([('fe',
                  FeatureExtractor(sfreq=raw.info['sfreq'],
                                   selected_funcs=selected_funcs)),
                 ('scaler', StandardScaler()),
                 ('clf', LogisticRegression(random_state=42))])
skf = StratifiedKFold(n_splits=3, random_state=42)
y = labels

###############################################################################
# Print the accuracy score on a test dataset.

X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2)
accuracy = pipe.fit(X_train, y_train).score(X_test, y_test)
print('Accuracy score = %1.3f' % accuracy)
Esempio n. 7
0
events = mne.read_events(event_fname)
picks = mne.pick_types(raw.info, meg='grad', eeg=False)

# Read epochs
epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks, proj=True,
                    baseline=None, preload=True)
labels = epochs.events[:, -1]

# get MEG and EEG data
data = epochs.get_data()

###############################################################################
# Prepare for the classification task:

pipe = Pipeline([('fe', FeatureExtractor(sfreq=raw.info['sfreq'],
                                         selected_funcs=['app_entropy',
                                                         'mean'])),
                 ('scaler', StandardScaler()),
                 ('clf', LogisticRegression(random_state=42))])
skf = StratifiedKFold(n_splits=3, random_state=42)
y = labels

###############################################################################
# Cross-validation accuracy score with default parameters (emb = 2, by default
# for `compute_app_entropy`):

scores = cross_val_score(pipe, data, y, cv=skf)
print('Cross-validation accuracy score (with default parameters) = %1.3f '
      '(+/- %1.5f)' % (np.mean(scores), np.std(scores)))

###############################################################################
Esempio n. 8
0
                    tmin,
                    tmax,
                    picks=picks,
                    proj=True,
                    baseline=None,
                    preload=True)
labels = epochs.events[:, -1]

# get MEG and EEG data
data = epochs.get_data()

###############################################################################
# Prepare for the classification task:

pipe = Pipeline([('fe',
                  FeatureExtractor(sfreq=raw.info['sfreq'],
                                   selected_funcs=['app_entropy', 'mean'])),
                 ('scaler', StandardScaler()),
                 ('clf', LogisticRegression(random_state=42, solver='lbfgs'))])
skf = StratifiedKFold(n_splits=3, random_state=42)
y = labels

###############################################################################
# Cross-validation accuracy score with default parameters (emb = 2, by default
# for `compute_app_entropy`):

scores = cross_val_score(pipe, data, y, cv=skf)
print('Cross-validation accuracy score (with default parameters) = %1.3f '
      '(+/- %1.5f)' % (np.mean(scores), np.std(scores)))

###############################################################################
# Optimization of features extraction optional parameters:
    Returns
    -------
    output : (n_channels * n_times,)
    """
    return medfilt(arr, kernel_size=(1, 5)).ravel()


###############################################################################
# Prepare for the classification task
# -----------------------------------
#
# In addition to the new feature function, we also propose to extract the
# mean of the data:
selected_funcs = [('medfilt', compute_medfilt), 'mean']

pipe = Pipeline([('fe',
                  FeatureExtractor(sfreq=raw.info['sfreq'],
                                   selected_funcs=selected_funcs,
                                   memory='.')), ('scaler', StandardScaler()),
                 ('clf', LogisticRegression(random_state=42))])
skf = StratifiedKFold(n_splits=3, random_state=42)
y = labels

###############################################################################
# Print the accuracy score on a test dataset.

X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2)
accuracy = pipe.fit(X_train, y_train).score(X_test, y_test)
print('Accuracy score = %1.3f' % accuracy)