Beispiel #1
0
def N170_test(session_data):
    markers = N170_MARKERS
    epochs = get_session_erp_epochs(session_data, markers)
    conditions = OrderedDict()
    for i in range(len(markers)):
        conditions[markers[i]] = [i+1]
   
    clfs = OrderedDict()
    
    clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
    clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
    clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
    clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM())
    methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM']
    # format data
    epochs.pick_types(eeg=True)
    X = epochs.get_data() * 1e6
    times = epochs.times
    y = epochs.events[:, -1]

    # define cross validation 
    cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, 
                                random_state=42)

    # run cross validation for each pipeline
    auc = []
    methods = []
    print('Calcul in progress...')
    for m in clfs:
        try:

            res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', 
                                  cv=cv, n_jobs=-1)
            auc.extend(res)
            methods.extend([m]*len(res))
        except Exception:
            print("exception")
        
    ## Plot Decoding Results

    results = pd.DataFrame(data=auc, columns=['AUC'])
    results['Method'] = methods
    n_row,n_column = results.shape
    auc_means = []
    for method in methods_list:
        auc = []
        for i in range(n_row):
            if results.loc[i,'Method']== method:
                auc.append(results.loc[i,'AUC'])
        auc_means.append(np.mean(auc))
    counter = 0
    for i in range(len(methods_list)):
        color = 'green' if auc_means[i]>=0.7 else 'red'
        counter = counter +1 if auc_means[i]>=0.7 else counter
        
    return counter > 0, counter
def xdawn_embedding(data, use_xdawn):
    """Perform embedding of EEG data in 2D Euclidean space
    with Laplacian Eigenmaps.

    Parameters
    ----------
    data : dict
        A dictionary containing training and testing data

    Returns
    -------
    array
        Embedded

    """

    if use_xdawn:
        nfilter = 3
        xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
        covs = xdwn.fit(data['train_x'],
                        data['train_y']).transform(data['test_x'])

        lapl = Embedding(metric='riemann', n_components=3)
        embd = lapl.fit_transform(covs)
    else:
        tangent_space = Pipeline([
            ('cov_transform', Covariances(estimator='lwf')),
            ('tangent_space', TangentSpace(metric='riemann'))
        ])
        t_space = tangent_space.fit(data['train_x'],
                                    data['train_y']).transform(data['test_x'])
        reducer = umap.UMAP(n_neighbors=30, min_dist=1, spread=2)
        embd = reducer.fit_transform(t_space)

    return embd
def get_sourcetarget_split_p300(source, target, ncovs_train):

    X_source = source['signals']
    y_source = source['labels'].flatten()
    covs_source = XdawnCovariances(classes=[2]).fit_transform(
        X_source, y_source)

    source = {}
    source['covs'] = covs_source
    source['labels'] = y_source

    X_target = target['signals']
    y_target = target['labels'].flatten()

    if ncovs_train is None:
        ncovs_train = np.sum(y_target == 2)

    sel = np.arange(len(y_target))
    np.random.shuffle(sel)
    X_target = X_target[sel]
    y_target = y_target[sel]

    idx_erps = np.where(y_target == 2)[0][:ncovs_train]
    idx_rest = np.where(
        y_target == 1)[0][:ncovs_train *
                          5]  # because there's one ERP in every 6 flashes

    idx_train = np.concatenate([idx_erps, idx_rest])
    idx_test = np.array(
        [i for i in range(len(y_target)) if i not in idx_train])

    erp = XdawnCovariances(classes=[2])
    erp.fit(X_target[idx_train], y_target[idx_train])

    target_train = {}
    covs_target_train = erp.transform(X_target[idx_train])
    y_target_train = y_target[idx_train]
    target_train['covs'] = covs_target_train
    target_train['labels'] = y_target_train

    target_test = {}
    covs_target_test = erp.transform(X_target[idx_test])
    y_target_test = y_target[idx_test]
    target_test['covs'] = covs_target_test
    target_test['labels'] = y_target_test

    return source, target_train, target_test
Beispiel #4
0
def test_xdawn_covariances_applyfilters(rndstate, get_labels):
    n_classes, nfilter = 2, 2
    n_matrices, n_channels, n_times = 4, 6, 100
    x = rndstate.randn(n_matrices, n_channels, n_times)
    labels = get_labels(n_matrices, n_classes)
    cov = XdawnCovariances(nfilter=nfilter, applyfilters=False)
    covmats = cov.fit_transform(x, labels)
    covsize = n_classes * nfilter + n_channels
    assert covmats.shape == (n_matrices, covsize, covsize)
    assert is_spsd(covmats)
Beispiel #5
0
def test_Xdawncovariances():
    """Test fit ERPCovariances"""
    x = np.random.randn(10, 3, 100)
    labels = np.array([0, 1]).repeat(5)
    cov = XdawnCovariances()
    cov.fit_transform(x, labels)
    assert_equal(cov.get_params(), dict(nfilter=4, applyfilters=True,
                                        classes=None, estimator='scm',
                                        xdawn_estimator='scm',
                                        baseline_cov=None))
Beispiel #6
0
def test_xdawn_covariances_nfilter(nfilter, rndstate, get_labels):
    """Test fit XdawnCovariances"""
    n_classes, n_matrices, n_channels, n_times = 2, 4, 8, 100
    x = rndstate.randn(n_matrices, n_channels, n_times)
    labels = get_labels(n_matrices, n_classes)
    cov = XdawnCovariances(nfilter=nfilter)
    covmats = cov.fit_transform(x, labels)
    assert cov.get_params() == dict(
        nfilter=nfilter,
        applyfilters=True,
        classes=None,
        estimator="scm",
        xdawn_estimator="scm",
        baseline_cov=None,
    )
    covsize = 2 * (n_classes * nfilter)
    assert covmats.shape == (n_matrices, covsize, covsize)
    assert is_spsd(covmats)
def xdawn_embedding(data):
    """Perform embedding of EEG data in 2D Euclidean space
    with Laplacian Eigenmaps.

    Parameters
    ----------
    data : dict
        A dictionary containing training and testing data

    Returns
    -------
    array
        Embedded

    """

    nfilter = 3
    xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
    covs = xdwn.fit(data['train_x'], data['train_y']).transform(data['test_x'])

    lapl = Embedding(metric='riemann', n_components=3)
    embd = lapl.fit_transform(covs)

    return embd
                reject={'eeg': 75e-6}, preload=True,
                verbose=False, picks=[0,1,2,3])

print('sample drop %: ', (1 - len(epochs.events)/len(events)) * 100)
epochs

###################################################################################################
# Run classification
# ----------------------------

clfs = OrderedDict()
clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM())

# format data
epochs.pick_types(eeg=True)
X = epochs.get_data() * 1e6
times = epochs.times
y = epochs.events[:, -1]

# define cross validation 
cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, 
                                    random_state=42)

# run cross validation for each pipeline
auc = []
methods = []
    subject_dir_list = test_list_np[i]
    subject_epoch = np.empty((0, 56, 260), float)
    for j in range(5):
        subject_dir = subject_dir_list[j]
        data = epoching('./data/test/' + subject_dir)
        subject_epoch = np.vstack((subject_epoch, data))
    subject_epoch = np.reshape(subject_epoch, (1, 340, 56, 260))
    test_data_list = np.vstack((test_data_list, subject_epoch))

print('Epoched training data shape: ' + str(train_data_list.shape))
print('Epoched testing data shape: ' + str(test_data_list.shape))

########################## apply data preprocessing ############################
y_train = pd.read_csv('TrainLabels.csv')['Prediction'].values
y_test = np.reshape(pd.read_csv('true_labels.csv', header=None).values, 3400)
XC = XdawnCovariances(nfilter=5)
output_train = XC.fit_transform(
    np.reshape(train_data_list, (16 * 340, 56, 260)), y_train)
X_train = TangentSpace(metric='riemann').fit_transform(output_train)
output_test = XC.fit_transform(np.reshape(test_data_list, (10 * 340, 56, 260)),
                               y_test)
X_test = TangentSpace(metric='riemann').fit_transform(output_test)
print('Preprocessed training data shape: ' + str(X_train.shape))
print('Preprocessed testing data shape: ' + str(X_test.shape))

############################## save data to disk ###############################
np.save('./data/train_data_56_260_1_40Hz.npy', train_data_list)
np.save('./data/test_data_56_260_1_40Hz.npy', test_data_list)
np.save('./data/X_train', X_train)
np.save('./data/X_test', X_test)
    stats['VR'] = {}
    stats['PC'] = {}

    for condition in datasets.keys():

        # get the epochs and labels
        X, y, meta = paradigm.get_data(datasets[condition], subjects=[subject])
        y = LabelEncoder().fit_transform(y)

        data[condition]['X'] = X
        data[condition]['y'] = y

        # estimate xDawn covs
        ncomps = 4
        erp = XdawnCovariances(classes=[1],
                               estimator='lwf',
                               nfilter=ncomps,
                               xdawn_estimator='lwf')
        #erp = ERPCovariances(classes=[1], estimator='lwf', svd=ncomps)
        split = train_test_split(X, y, train_size=0.50, random_state=42)
        Xtrain, Xtest, ytrain, ytest = split
        covs = erp.fit(Xtrain, ytrain).transform(Xtest)

        Mtarget = mean_riemann(covs[ytest == 1])
        Mnontarget = mean_riemann(covs[ytest == 0])
        stats[condition]['distance'] = distance_riemann(Mtarget, Mnontarget)
        stats[condition]['dispersion_target'] = np.sum(
            [distance_riemann(covi, Mtarget)**2
             for covi in covs[ytest == 1]]) / len(covs[ytest == 1])
        stats[condition]['dispersion_nontarget'] = np.sum([
            distance_riemann(covi, Mnontarget)**2 for covi in covs[ytest == 0]
        ]) / len(covs[ytest == 0])
Beispiel #11
0
# ____________________________________________________________________________
# Create pipelines
# ----------------
# Pipelines must be a dict of sklearn pipeline transformer.
pipelines = {}
# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {'Target': 1, 'NonTarget': 0}

# %%
# from sklearn.preprocessing import StandardScaler

pipelines['RG + LRR'] = make_pipeline(
    XdawnCovariances(nfilter=2,
                     classes=[labels_dict['Target']],
                     estimator='lwf',
                     xdawn_estimator='lwf'), TangentSpace(), LRR())

# %%
pipelines['Xdawn + LRR'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'),
                                         Vectorizer(), LRR())

#%%
pipelines['LRR'] = make_pipeline(Vectorizer(), LRR())

# ____________________________________________________________________________
# Evaluation
# %%
paradigm = P300(resample=128)
dataset = BNCI2015003()
from pyriemann.channelselection import ElectrodeSelection

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.cross_validation import KFold
from sklearn.metrics import roc_auc_score

from utils import (DownSampler, EpochsVectorizer, CospBoostingClassifier,
                   epoch_data)

dataframe1 = pd.read_csv('ecog_train_with_labels.csv')

array_clfs = OrderedDict()

# ERPs models
array_clfs['XdawnCov'] = make_pipeline(XdawnCovariances(6, estimator='oas'),
                                       TangentSpace('riemann'),
                                       LogisticRegression('l2'))

array_clfs['Xdawn'] = make_pipeline(Xdawn(12, estimator='oas'), DownSampler(5),
                                    EpochsVectorizer(),
                                    LogisticRegression('l2'))

# Induced activity models

baseclf = make_pipeline(
    ElectrodeSelection(10, metric=dict(mean='logeuclid', distance='riemann')),
    TangentSpace('riemann'), LogisticRegression('l1'))

array_clfs['Cosp'] = make_pipeline(
    CospCovariances(fs=1000, window=32, overlap=0.95, fmax=300, fmin=1),
Beispiel #13
0
def pyR_decoding_on_full_epochs(X,
                                y,
                                plot_conf_matrix=0,
                                class_names=None,
                                test_size=0.2,
                                n_splits=5,
                                classifier='ERP_cov'):
    """ This function decodes on the full epoch using the pyRiemannian decoder
    cf https://github.com/Team-BK/Biomag2016/blob/master/Final_Submission.ipynb

    Parameters
    ---------
    X : data extracted from the epochs provided to the decoder
    y : categorical variable (i.e. discrete but it can be more then 2 categories)
    plot_confusion_matrix : set to 1 if you wanna see the confusion matrix
    class_names: needed for the legend if confusion matrices are plotted ['cat1','cat2','cat3']
    test_size : proportion of the data on which you wanna test the decoder
    n_splits : when calculating the score, number of cross-validation folds
    classifier : set it to 'ERP_cov', 'Xdawn_cov' or 'Hankel_cov' depending on the classification you want to do.

    Returns: scores, y_test, y_pred, cnf_matrix or just scores if you don't want the confusion matrix
    -------

    """

    # ------- define the classifier -------
    if classifier == 'ERP_cov':
        spatial_filter = UnsupervisedSpatialFilter(PCA(20), average=False)
        ERP_cov = ERPCovariances(estimator='lwf')
        CSP_30 = CSP(30, log=False)
        tang = TangentSpace('logeuclid')
        clf = make_pipeline(spatial_filter, ERP_cov, CSP_30, tang,
                            LogisticRegression('l2'))

    if classifier == 'Xdawn_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(50), average=False),
            XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'),
            TangentSpace('logeuclid'), LogisticRegression('l2'))

    if classifier == 'Hankel_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(70), average=False),
            HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'),
            CSP(15, log=False), TangentSpace('logeuclid'),
            LogisticRegression('l2'))

    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4343)
    y = np.asarray(y)
    scores = []
    for train_index, test_index in cv.split(X, y):
        print(train_index)
        print(test_index)
        print('we are in the CV loop')
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Train on X_train, y_train
        clf.fit(X_train, y_train)
        # Predict the category on X_test
        y_pred = clf.predict(X_test)

        scores.append(accuracy_score(y_true=y_test, y_pred=y_pred))
    scores = np.asarray(scores)

    if plot_conf_matrix == 1:

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=7, stratify=y)
        print('train and test have been split')
        y_pred = clf.fit(X_train, y_train).predict(X_test)
        # Compute confusion matrix
        cnf_matrix = confusion_matrix(y_test, y_pred)
        np.set_printoptions(precision=2)
        print(cnf_matrix)

        # Plot non-normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              title='Confusion matrix, without normalization')

        # Plot normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              normalize=True,
                              title='Normalized confusion matrix')

        plt.show()
        return scores, y_test, y_pred, cnf_matrix

    return scores, y_test, y_pred, cnf_matrix
Beispiel #14
0
print('sample drop %: ', (1 - len(epochs.events) / len(events)) * 100)
epochs

###################################################################################################
# Run classification
# ----------------------------

clfs = OrderedDict()
clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(),
                                  LogisticRegression())
clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(),
                                      LDA(shrinkage='auto', solver='eigen'))
clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'),
                                    TangentSpace(), LogisticRegression())
clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'),
                                      TangentSpace(), LogisticRegression())
clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'),
                                       MDM())

# format data
epochs.pick_types(eeg=True)
X = epochs.get_data() * 1e6
times = epochs.times
y = epochs.events[:, -1]

# define cross validation
cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42)

# run cross validation for each pipeline
auc = []
Beispiel #15
0
                    tmin,
                    tmax,
                    proj=False,
                    picks=picks,
                    baseline=None,
                    preload=True,
                    verbose=False)

X = epochs.get_data()
y = epochs.events[:, -1]

###############################################################################
# Embedding the Xdawn covariance matrices with Laplacian Eigenmaps

nfilter = 4
xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
split = train_test_split(X, y, train_size=0.25, random_state=42)
Xtrain, Xtest, ytrain, ytest = split
covs = xdwn.fit(Xtrain, ytrain).transform(Xtest)

lapl = Embedding(metric='riemann', n_components=2)
embd = lapl.fit_transform(covs)

###############################################################################
# Plot the three first components of the embedded points

fig, ax = plt.subplots(figsize=(7, 8), facecolor='white')

for cond, label in event_id.items():
    idx = (ytest == label)
    ax.scatter(embd[idx, 0], embd[idx, 1], s=36, label=cond)
Beispiel #16
0
 def _init(self):
     self.n_components = self.params.get('n_components', 3)
     self.pipeline = make_pipeline(XdawnCovariances(
         self.n_components), TangentSpace(metric='riemann'), LogisticRegression())
Beispiel #17
0
def test_Xdawncovariances():
    """Test fit ERPCovariances"""
    x = np.random.randn(10, 3, 100)
    labels = np.array([0, 1]).repeat(5)
    cov = XdawnCovariances()
    cov.fit_transform(x, labels)
Beispiel #18
0
def decode(epochs,
           get_y_label_func,
           epoch_filter=None,
           decoding_method='standard',
           sliding_window_size=None,
           sliding_window_step=None,
           n_jobs=multiprocessing.cpu_count(),
           equalize_event_counts=True,
           only_fit=False,
           generalize_across_time=True):
    """
    Basic flow for decoding
    """

    config = dict(equalize_event_counts=equalize_event_counts,
                  only_fit=only_fit,
                  sliding_window_size=sliding_window_size,
                  sliding_window_step=sliding_window_step,
                  decoding_method=decoding_method,
                  generalize_across_time=generalize_across_time,
                  epoch_filter=str(epoch_filter))

    if epoch_filter is not None:
        epochs = epochs[epoch_filter]

    #-- Classify epochs into groups (training epochs)
    y_labels = get_y_label_func(epochs)

    if equalize_event_counts:
        epochs.events[:, 2] = y_labels
        epochs.event_id = {str(label): label for label in np.unique(y_labels)}
        min_n_items_per_y_label = min(
            [len(epochs[cond]) for cond in epochs.event_id.keys()])
        print("\nEqualizing the number of epochs to %d per condition..." %
              min_n_items_per_y_label)
        epochs.equalize_event_counts(epochs.event_id.keys())
        y_labels = epochs.events[:, 2]

    print("The epochs were classified into %d groups:" % len(set(y_labels)))
    for g in set(y_labels):
        print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g)))

    #-- Create the decoding pipeline
    print("Creating the classification pipeline...")

    epochs_data = epochs.get_data()

    preprocess_pipeline = None

    if decoding_method.startswith('standard'):

        if 'reg' in decoding_method:
            clf = make_pipeline(StandardScaler(), Ridge())
        else:
            clf = make_pipeline(
                StandardScaler(),
                svm.SVC(C=1, kernel='linear', class_weight='balanced'))

        if 'raw' not in decoding_method:
            assert sliding_window_size is not None
            assert sliding_window_step is not None
            preprocess_pipeline = \
                make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True))

    elif decoding_method == 'ERP_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(20), average=False),
            ERPCovariances(
                estimator='lwf'),  # todo how to apply sliding window?
            CSP(30, log=False),
            TangentSpace('logeuclid'),
            LogisticRegression('l2'))  # todo why logistic regression?

    elif decoding_method == 'Xdawn_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(50), average=False),
            XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'),
            TangentSpace('logeuclid'), LogisticRegression('l2'))

    elif decoding_method == 'Hankel_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(70), average=False),
            HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'),
            CSP(15, log=False), TangentSpace('logeuclid'),
            LogisticRegression('l2'))

    else:
        raise Exception('Unknown decoding method: {:}'.format(decoding_method))

    print('\nDecoding pipeline:')
    for i in range(len(clf.steps)):
        print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1]))

    if preprocess_pipeline is not None:
        print('\nApplying the pre-processing pipeline:')
        for i in range(len(preprocess_pipeline.steps)):
            print('Step #{:}: {:}'.format(i + 1,
                                          preprocess_pipeline.steps[i][1]))
        epochs_data = preprocess_pipeline.fit_transform(epochs_data)

    if only_fit:

        #-- Only fit the decoders

        procedure = 'only_fit'
        scores = None
        cv = None

        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)
            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)
        else:
            estimator = clf

        estimator.fit(X=epochs_data, y=y_labels)

    else:

        #-- Classify & score -- cross-validation

        procedure = 'fit_and_score'
        print(
            "\nCreating a classifier and calculating accuracy scores (this may take some time)..."
        )

        cv = StratifiedKFold(n_splits=5)
        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)

            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)

            scores = cross_val_multiscore(estimator=estimator,
                                          X=epochs_data,
                                          y=np.array(y_labels),
                                          cv=cv)
        else:
            scores = _run_cross_validation(X=epochs_data,
                                           y=np.array(y_labels),
                                           clf=clf,
                                           cv=cv)
            estimator = 'None'  # Estimator is not defined in the case of Riemannian decoding

    times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2])

    return dict(procedure=procedure,
                estimator=estimator,
                scores=scores,
                pipeline=clf,
                preprocess=preprocess_pipeline,
                cv=cv,
                times=times,
                config=config)
Beispiel #19
0
    def __run_strat_validation_RG(
            self,
            epochs,
            n_strat_folds=5,
            shuffle=False,
            random_state=42,
            RG_Pipeline_Num=0,
            estimator='lwf',
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz'],
            accuracy_threshold=0.7):
        """
        Complete a stratified cross-validation using Riemannian Geometery pipeline.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        n_strat_folds : int, optional
            Number of folds for the stratified K-Fold cross-validation.
            This value should be chosen carefully to avoid unbalanced classes.
            The default is 5.
        shuffle : bool, optional
            Shuffle training set data. See sklearn.model_selection.StratifiedKFold
            for more details.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        RG_Pipeline_Num : int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator : str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        accuracy_threshold : float, optional
            Threshold for determining which folds are 'good' fits. Accuracy found
            above the threshold (e.g. 70% or greater) will be reported as good fit
            folds.
            The default is 0.7.
        Returns
        -------
        DICT
            Dictionary of outputs are returned for the user.
            In order:
                Fold accuracy -'Fold Acc'
                Indices for `good` training folds > or = to accuracy_threshold value - 'Good Train Ind'
                Indices for `good` test folds > or = to given accuracy_threshold value -  'Good Test Ind'
                Indices for `bad` train folds < given accuracy_threshold value - 'Bad Train Ind'
                Indices for `bad` test folds < given accuracy_threshold value - 'Bad Test Ind'
                List of predicted classes from the RG Pipeline - 'Prediction List'
                List of true classes from the RG Pipeline - 'True Class List'
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Set the stratified CV model
        cv_strat = StratifiedKFold(
            n_splits=n_strat_folds, shuffle=True, random_state=random_state
        )  # Requires us to input in the ylabels as well...need to figure out how to get this.

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(penalty='elasticnet',
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   l1_ratio=0.5,
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # Make empty lists for each item in the stratified CV
        acc_list = []
        preds_list = []
        true_class_list = []
        good_train_indx = []
        good_test_indx = []
        bad_train_indx = []
        bad_test_indx = []

        # For loop testing each iteration of the stratified cross-validation
        for train_idx, test_idx in cv_strat.split(X_data, labels):
            # Get the x_train and x_test data for this fold
            x_train, x_test = X_data[train_idx], X_data[test_idx]
            # Get the y_train and y_test data for this fold
            y_train, y_test = labels[train_idx], labels[test_idx]
            # Fit the classifier
            clf.fit(x_train, y_train)
            # Find the predicted value on the test data in this fold
            preds = clf.predict(x_test)
            # Save in list
            preds_list.append(preds)
            # Save the true class labels in a list for this fold
            true_class_list.append(y_test)
            # Find the accuracy on average from this prediction
            acc_mean = np.average(preds == y_test)
            # Save the accuracy to a list
            acc_list.append(acc_mean)
            # Find out where the 'Good' training folds are. (Greater than threshold)
            if acc_mean >= accuracy_threshold:
                print(
                    "Train indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", train_idx)
                print(
                    "Test indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", test_idx)
                good_train_indx.append(train_idx)
                good_test_indx.append(test_idx)
            # Find out where the 'Bad' training folds are. (Less than threshold)
            else:
                bad_train_indx.append(train_idx)
                bad_test_indx.append(test_idx)
            # Make a plot for the confusion matrix
            fig = plt.figure()
            plot_confusion_matrix(y_test, preds, class_names)
        # Print out the final results from across all folds on average
        print(
            "The overall accuracy with " + str(n_strat_folds) +
            "-fold stratified CV was: ", np.average(acc_list))

        # Return output vals
        return dict({
            'Fold Acc': acc_list,
            'Good Train Ind': good_train_indx,
            'Good Test Ind': good_test_indx,
            'Bad Train Ind': bad_train_indx,
            'Bad Test Ind': bad_test_indx,
            'Prediction List': preds_list,
            'True Class List': true_class_list
        })
Beispiel #20
0
    def __train_predefined_classifier(
            self,
            epochs,
            RG_Pipeline_Num=0,
            estimator='lwf',
            estimate_accuracy=False,
            random_state=44,
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz']):
        """
        Train a predefined Riemannian Geometery pipeline on a single dataset using
        MNE and pyriemann.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        RG_Pipeline_Num :int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator :  str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        estimate_accuracy : bool, optional
            Estimate model accuracy roughly using a simple data-hold out train/test split.
            A default hold out of 75/25% train, test respectively is used.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        Returns
        -------
        clf : Classifier object (sklearn)
            Returns a trained classifier object based on the given epoch data and
            Riemannian Geometry pipeline.
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(l1,
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # This is NOT a great measure of the model accuracy. This just will give you
        # a rough estimate of how it is performing within its own dataset. This
        # should be used sparingly!
        if estimate_accuracy is True:
            # Do a simple data-hold out for testing
            x_train, x_test, y_train, y_test = train_test_split(
                X_data, labels, test_size=0.25, random_state=random_state)

            clf_estimate = clf

            clf_estimate.fit(x_train, y_train)

            pred_vals = clf_estimate.predict(x_test)

            accuracy_val = np.mean(pred_vals == y_test)

            fig = plt.figure()
            plot_confusion_matrix(y_test, pred_vals, class_names)

        # Fit the data to the given epoch information
        clf.fit(X_data, labels)

        return clf
labels = epochs.events[:, -1]
evoked = epochs.average()

###############################################################################
# Decoding in sensor space using a linear SVM

n_components = 3  # pick some components

# Define a monte-carlo cross-validation generator (reduce variance):
cv = KFold(len(labels), 10, shuffle=True, random_state=42)
pr = np.zeros(len(labels))
epochs_data = epochs.get_data()

print('Multiclass classification with XDAWN + MDM')
clf = Pipeline([('COV', XdawnCovariances(n_components)), ('MDM', MDM())])

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    pr[test_idx] = clf.predict(epochs_data[test_idx])

print classification_report(labels, pr)
print confusion_matrix(labels, pr)

print('Multiclass classification with XDAWN + FgMDM')
clf = Pipeline([('COV', XdawnCovariances(n_components)), ('MDM', FgMDM())])

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]
Beispiel #22
0
##############################################################################
# Create pipelines
# ----------------
#
# Pipelines must be a dict of sklearn pipeline transformer.
processing_sampling_rate = 128
pipelines = {}

# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {"Target": 1, "NonTarget": 0}

# Riemannian geometry based classification
pipelines["RG+LDA"] = make_pipeline(
    XdawnCovariances(nfilter=5, estimator="lwf", xdawn_estimator="scm"),
    TangentSpace(),
    LDA(solver="lsqr", shrinkage="auto"),
)

# Simple LDA pipeline using averaged feature values in certain time intervals
jumping_mean_ivals = [
    [0.10, 0.139],
    [0.14, 0.169],
    [0.17, 0.199],
    [0.20, 0.229],
    [0.23, 0.269],
    [0.27, 0.299],
    [0.30, 0.349],
    [0.35, 0.409],
    [0.41, 0.449],
Beispiel #23
0
##############################################################################
# Create pipelines
# ----------------
#
# Pipelines must be a dict of sklearn pipeline transformer.

pipelines = {}

# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {"Target": 1, "NonTarget": 0}

pipelines["RG + LDA"] = make_pipeline(
    XdawnCovariances(nfilter=2,
                     classes=[labels_dict["Target"]],
                     estimator="lwf",
                     xdawn_estimator="lwf"),
    TangentSpace(),
    LDA(solver="lsqr", shrinkage="auto"),
)

pipelines["Xdw + LDA"] = make_pipeline(Xdawn(nfilter=2, estimator="lwf"),
                                       Vectorizer(),
                                       LDA(solver="lsqr", shrinkage="auto"))

##############################################################################
# Evaluation
# ----------
#
# We define the paradigm (P300) and use all three datasets available for it.
# The evaluation will return a dataframe containing a single AUC score for
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()
############################# PyRiemann Portion ##############################

# code is taken from PyRiemann's ERP sample script, which is decoding in
# the tangent space with a logistic regression

n_components = 2  # pick some components

# set up sklearn pipeline
clf = make_pipeline(XdawnCovariances(n_components),
                    TangentSpace(metric='riemann'), LogisticRegression())

preds_rg = np.zeros(len(Y_test))

# reshape back to (trials, channels, samples)
X_train = X_train.reshape(X_train.shape[0], chans, samples)
X_test = X_test.reshape(X_test.shape[0], chans, samples)

# train a classifier with xDAWN spatial filtering + Riemannian Geometry (RG)
# labels need to be back in single-column format
history = clf.fit(X_train, Y_train.argmax(axis=-1))
preds_rg = clf.predict(X_test)

# Printing the results
acc2 = np.mean(preds_rg == Y_test.argmax(axis=-1))
Beispiel #25
0
        return np.reshape(X, (X.shape[0], -1))


##############################################################################
# Create pipelines
# ----------------
# Pipelines must be a dict of sklearn pipeline transformer.
pipelines = {}

# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {'Target': 1, 'NonTarget': 0}

pipelines['RG + LDA'] = make_pipeline(
    XdawnCovariances(
        nfilter=2,
        classes=[
            labels_dict['Target']],
        estimator='lwf',
        xdawn_estimator='lwf'),
    TangentSpace(),
    LDA(solver='lsqr', shrinkage='auto'))

pipelines['Xdw + LDA'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'),
                                       Vectorizer(), LDA(solver='lsqr',
                                                         shrinkage='auto'))
pipelines['ERPCov + TS'] = make_pipeline(ERPCovariances(classes=[0, 1], estimator='oas', svd=None),
                                         TangentSpace(metric='riemann'),
                                         LogisticRegression(solver='lbfgs'))
Beispiel #26
0
            _, x_test, y_test = load(data_name,
                                     s_id[s],
                                     npp_params,
                                     clean=True,
                                     physical=True,
                                     downsample=False)

            data_size = y_train.shape[0]
            shuffle_index = utils.shuffle_data(data_size)
            x_train = x_train[shuffle_index]
            x_train = np.squeeze(x_train)
            y_train = y_train[shuffle_index]

            # Build Model
            xd = XdawnCovariances(nfilter=5,
                                  applyfilters=True,
                                  estimator='lwf')
            # es = ElectrodeSelection(nelec=25, metric='riemann')
            ts = TangentSpace(metric='logeuclid')
            lr = LogisticRegression(solver='liblinear', max_iter=200, C=0.01)

            model = Pipeline([('xDAWN', xd), ('TangentSpace', ts), ('LR', lr)])

            model.fit(x_train, y_train)

            # Test Model
            y_pred = np.argmax(model.predict_proba(np.squeeze(x_test)), axis=1)
            bca = utils.bca(y_test, y_pred)
            acc = np.sum(y_pred == y_test).astype(np.float32) / len(y_pred)
            print('{}: acc-{} bca-{}'.format(data_name, acc, bca))
)

labels = epochs.events[:, -1]
evoked = epochs.average()

###############################################################################
# Decoding in tangent space with a logistic regression

n_components = 2  # pick some components

# Define a monte-carlo cross-validation generator (reduce variance):
cv = KFold(n_splits=10, shuffle=True, random_state=42)
epochs_data = epochs.get_data()

clf = make_pipeline(
    XdawnCovariances(n_components),
    TangentSpace(metric="riemann"),
    LogisticRegression(),
)

preds = np.zeros(len(labels))

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    preds[test_idx] = clf.predict(epochs_data[test_idx])

# Printing the results
acc = np.mean(preds == labels)
print("Classification accuracy: %f " % (acc))