def xdawn_embedding(data, use_xdawn):
    """Perform embedding of EEG data in 2D Euclidean space
    with Laplacian Eigenmaps.

    Parameters
    ----------
    data : dict
        A dictionary containing training and testing data

    Returns
    -------
    array
        Embedded

    """

    if use_xdawn:
        nfilter = 3
        xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
        covs = xdwn.fit(data['train_x'],
                        data['train_y']).transform(data['test_x'])

        lapl = Embedding(metric='riemann', n_components=3)
        embd = lapl.fit_transform(covs)
    else:
        tangent_space = Pipeline([
            ('cov_transform', Covariances(estimator='lwf')),
            ('tangent_space', TangentSpace(metric='riemann'))
        ])
        t_space = tangent_space.fit(data['train_x'],
                                    data['train_y']).transform(data['test_x'])
        reducer = umap.UMAP(n_neighbors=30, min_dist=1, spread=2)
        embd = reducer.fit_transform(t_space)

    return embd
Exemple #2
0
def N170_test(session_data):
    markers = N170_MARKERS
    epochs = get_session_erp_epochs(session_data, markers)
    conditions = OrderedDict()
    for i in range(len(markers)):
        conditions[markers[i]] = [i+1]
   
    clfs = OrderedDict()
    
    clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
    clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
    clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
    clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM())
    methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM']
    # format data
    epochs.pick_types(eeg=True)
    X = epochs.get_data() * 1e6
    times = epochs.times
    y = epochs.events[:, -1]

    # define cross validation 
    cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, 
                                random_state=42)

    # run cross validation for each pipeline
    auc = []
    methods = []
    print('Calcul in progress...')
    for m in clfs:
        try:

            res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', 
                                  cv=cv, n_jobs=-1)
            auc.extend(res)
            methods.extend([m]*len(res))
        except Exception:
            print("exception")
        
    ## Plot Decoding Results

    results = pd.DataFrame(data=auc, columns=['AUC'])
    results['Method'] = methods
    n_row,n_column = results.shape
    auc_means = []
    for method in methods_list:
        auc = []
        for i in range(n_row):
            if results.loc[i,'Method']== method:
                auc.append(results.loc[i,'AUC'])
        auc_means.append(np.mean(auc))
    counter = 0
    for i in range(len(methods_list)):
        color = 'green' if auc_means[i]>=0.7 else 'red'
        counter = counter +1 if auc_means[i]>=0.7 else counter
        
    return counter > 0, counter
def test_Xdawncovariances():
    """Test fit ERPCovariances"""
    x = np.random.randn(10, 3, 100)
    labels = np.array([0, 1]).repeat(5)
    cov = XdawnCovariances()
    cov.fit_transform(x, labels)
    assert_equal(cov.get_params(), dict(nfilter=4, applyfilters=True,
                                        classes=None, estimator='scm',
                                        xdawn_estimator='scm'))
Exemple #4
0
def test_Xdawncovariances():
    """Test fit ERPCovariances"""
    x = np.random.randn(10, 3, 100)
    labels = np.array([0, 1]).repeat(5)
    cov = XdawnCovariances()
    cov.fit_transform(x, labels)
    assert_equal(cov.get_params(), dict(nfilter=4, applyfilters=True,
                                        classes=None, estimator='scm',
                                        xdawn_estimator='scm',
                                        baseline_cov=None))
Exemple #5
0
def test_xdawn_covariances_applyfilters(rndstate, get_labels):
    n_classes, nfilter = 2, 2
    n_matrices, n_channels, n_times = 4, 6, 100
    x = rndstate.randn(n_matrices, n_channels, n_times)
    labels = get_labels(n_matrices, n_classes)
    cov = XdawnCovariances(nfilter=nfilter, applyfilters=False)
    covmats = cov.fit_transform(x, labels)
    covsize = n_classes * nfilter + n_channels
    assert covmats.shape == (n_matrices, covsize, covsize)
    assert is_spsd(covmats)
Exemple #6
0
def test_xdawn_covariances_nfilter(nfilter, rndstate, get_labels):
    """Test fit XdawnCovariances"""
    n_classes, n_matrices, n_channels, n_times = 2, 4, 8, 100
    x = rndstate.randn(n_matrices, n_channels, n_times)
    labels = get_labels(n_matrices, n_classes)
    cov = XdawnCovariances(nfilter=nfilter)
    covmats = cov.fit_transform(x, labels)
    assert cov.get_params() == dict(
        nfilter=nfilter,
        applyfilters=True,
        classes=None,
        estimator="scm",
        xdawn_estimator="scm",
        baseline_cov=None,
    )
    covsize = 2 * (n_classes * nfilter)
    assert covmats.shape == (n_matrices, covsize, covsize)
    assert is_spsd(covmats)
def xdawn_embedding(data):
    """Perform embedding of EEG data in 2D Euclidean space
    with Laplacian Eigenmaps.

    Parameters
    ----------
    data : dict
        A dictionary containing training and testing data

    Returns
    -------
    array
        Embedded

    """

    nfilter = 3
    xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
    covs = xdwn.fit(data['train_x'], data['train_y']).transform(data['test_x'])

    lapl = Embedding(metric='riemann', n_components=3)
    embd = lapl.fit_transform(covs)

    return embd
def get_sourcetarget_split_p300(source, target, ncovs_train):

    X_source = source['signals']
    y_source = source['labels'].flatten()
    covs_source = XdawnCovariances(classes=[2]).fit_transform(
        X_source, y_source)

    source = {}
    source['covs'] = covs_source
    source['labels'] = y_source

    X_target = target['signals']
    y_target = target['labels'].flatten()

    if ncovs_train is None:
        ncovs_train = np.sum(y_target == 2)

    sel = np.arange(len(y_target))
    np.random.shuffle(sel)
    X_target = X_target[sel]
    y_target = y_target[sel]

    idx_erps = np.where(y_target == 2)[0][:ncovs_train]
    idx_rest = np.where(
        y_target == 1)[0][:ncovs_train *
                          5]  # because there's one ERP in every 6 flashes

    idx_train = np.concatenate([idx_erps, idx_rest])
    idx_test = np.array(
        [i for i in range(len(y_target)) if i not in idx_train])

    erp = XdawnCovariances(classes=[2])
    erp.fit(X_target[idx_train], y_target[idx_train])

    target_train = {}
    covs_target_train = erp.transform(X_target[idx_train])
    y_target_train = y_target[idx_train]
    target_train['covs'] = covs_target_train
    target_train['labels'] = y_target_train

    target_test = {}
    covs_target_test = erp.transform(X_target[idx_test])
    y_target_test = y_target[idx_test]
    target_test['covs'] = covs_target_test
    target_test['labels'] = y_target_test

    return source, target_train, target_test
        return np.reshape(X, (X.shape[0], -1))


##############################################################################
# Create pipelines
# ----------------
# Pipelines must be a dict of sklearn pipeline transformer.
pipelines = {}

# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {'Target': 1, 'NonTarget': 0}

pipelines['RG + LDA'] = make_pipeline(
    XdawnCovariances(
        nfilter=2,
        classes=[
            labels_dict['Target']],
        estimator='lwf',
        xdawn_estimator='lwf'),
    TangentSpace(),
    LDA(solver='lsqr', shrinkage='auto'))

pipelines['Xdw + LDA'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'),
                                       Vectorizer(), LDA(solver='lsqr',
                                                         shrinkage='auto'))
pipelines['ERPCov + TS'] = make_pipeline(ERPCovariances(classes=[0, 1], estimator='oas', svd=None),
                                         TangentSpace(metric='riemann'),
                                         LogisticRegression(solver='lbfgs'))
Exemple #10
0
##############################################################################
# Create pipelines
# ----------------
#
# Pipelines must be a dict of sklearn pipeline transformer.

pipelines = {}

# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {"Target": 1, "NonTarget": 0}

pipelines["RG + LDA"] = make_pipeline(
    XdawnCovariances(nfilter=2,
                     classes=[labels_dict["Target"]],
                     estimator="lwf",
                     xdawn_estimator="lwf"),
    TangentSpace(),
    LDA(solver="lsqr", shrinkage="auto"),
)

pipelines["Xdw + LDA"] = make_pipeline(Xdawn(nfilter=2, estimator="lwf"),
                                       Vectorizer(),
                                       LDA(solver="lsqr", shrinkage="auto"))

##############################################################################
# Evaluation
# ----------
#
# We define the paradigm (P300) and use all three datasets available for it.
# The evaluation will return a dataframe containing a single AUC score for
labels = epochs.events[:, -1]
evoked = epochs.average()

###############################################################################
# Decoding in sensor space using a linear SVM

n_components = 3  # pick some components

# Define a monte-carlo cross-validation generator (reduce variance):
cv = KFold(len(labels), 10, shuffle=True, random_state=42)
pr = np.zeros(len(labels))
epochs_data = epochs.get_data()

print('Multiclass classification with XDAWN + MDM')
clf = Pipeline([('COV', XdawnCovariances(n_components)), ('MDM', MDM())])

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    pr[test_idx] = clf.predict(epochs_data[test_idx])

print classification_report(labels, pr)
print confusion_matrix(labels, pr)

print('Multiclass classification with XDAWN + FgMDM')
clf = Pipeline([('COV', XdawnCovariances(n_components)), ('MDM', FgMDM())])

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]
Exemple #12
0
# ____________________________________________________________________________
# Create pipelines
# ----------------
# Pipelines must be a dict of sklearn pipeline transformer.
pipelines = {}
# we have to do this because the classes are called 'Target' and 'NonTarget'
# but the evaluation function uses a LabelEncoder, transforming them
# to 0 and 1
labels_dict = {'Target': 1, 'NonTarget': 0}

# %%
# from sklearn.preprocessing import StandardScaler

pipelines['RG + LRR'] = make_pipeline(
    XdawnCovariances(nfilter=2,
                     classes=[labels_dict['Target']],
                     estimator='lwf',
                     xdawn_estimator='lwf'), TangentSpace(), LRR())

# %%
pipelines['Xdawn + LRR'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'),
                                         Vectorizer(), LRR())

#%%
pipelines['LRR'] = make_pipeline(Vectorizer(), LRR())

# ____________________________________________________________________________
# Evaluation
# %%
paradigm = P300(resample=128)
dataset = BNCI2015003()
                reject={'eeg': 75e-6}, preload=True,
                verbose=False, picks=[0,1,2,3])

print('sample drop %: ', (1 - len(epochs.events)/len(events)) * 100)
epochs

###################################################################################################
# Run classification
# ----------------------------

clfs = OrderedDict()
clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM())

# format data
epochs.pick_types(eeg=True)
X = epochs.get_data() * 1e6
times = epochs.times
y = epochs.events[:, -1]

# define cross validation 
cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, 
                                    random_state=42)

# run cross validation for each pipeline
auc = []
methods = []
    subject_dir_list = test_list_np[i]
    subject_epoch = np.empty((0, 56, 260), float)
    for j in range(5):
        subject_dir = subject_dir_list[j]
        data = epoching('./data/test/' + subject_dir)
        subject_epoch = np.vstack((subject_epoch, data))
    subject_epoch = np.reshape(subject_epoch, (1, 340, 56, 260))
    test_data_list = np.vstack((test_data_list, subject_epoch))

print('Epoched training data shape: ' + str(train_data_list.shape))
print('Epoched testing data shape: ' + str(test_data_list.shape))

########################## apply data preprocessing ############################
y_train = pd.read_csv('TrainLabels.csv')['Prediction'].values
y_test = np.reshape(pd.read_csv('true_labels.csv', header=None).values, 3400)
XC = XdawnCovariances(nfilter=5)
output_train = XC.fit_transform(
    np.reshape(train_data_list, (16 * 340, 56, 260)), y_train)
X_train = TangentSpace(metric='riemann').fit_transform(output_train)
output_test = XC.fit_transform(np.reshape(test_data_list, (10 * 340, 56, 260)),
                               y_test)
X_test = TangentSpace(metric='riemann').fit_transform(output_test)
print('Preprocessed training data shape: ' + str(X_train.shape))
print('Preprocessed testing data shape: ' + str(X_test.shape))

############################## save data to disk ###############################
np.save('./data/train_data_56_260_1_40Hz.npy', train_data_list)
np.save('./data/test_data_56_260_1_40Hz.npy', test_data_list)
np.save('./data/X_train', X_train)
np.save('./data/X_test', X_test)
Exemple #15
0
def test_Xdawncovariances():
    """Test fit ERPCovariances"""
    x = np.random.randn(10, 3, 100)
    labels = np.array([0, 1]).repeat(5)
    cov = XdawnCovariances()
    cov.fit_transform(x, labels)
                    picks=picks, baseline=None, preload=True)

labels = epochs.events[:, -1]
evoked = epochs.average()

###############################################################################
# Decoding in sensor space using a linear SVM


n_components = 3  # pick some components

# Define a monte-carlo cross-validation generator (reduce variance):
cv = ShuffleSplit(len(labels), 10, test_size=0.2, random_state=42)
scores = []
epochs_data = epochs.get_data()


clf = Pipeline([('COV',XdawnCovariances(n_components)),('MDM',MDM())])

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]
    
    clf.fit(epochs_data[train_idx], y_train)
    scores.append(clf.score(epochs_data[test_idx], y_test))

# Printing the results
class_balance = np.mean(labels == labels[0])
class_balance = max(class_balance, 1. - class_balance)
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))
Exemple #17
0
        subject_dir_list = test_list_np[testing_participant_id]
        subject_epoch = np.empty((0, len(channels), epoch_len), float)
        for trial_id in range(trial_per_subj):
            subject_dir = subject_dir_list[trial_id]
            data = generate_epoch('FeedBackEvent', './data/test/'+subject_dir,
                                  channels, fs, lowcut, highcut, epoch_s, epoch_e, bl_s, bl_e)
            subject_epoch = np.vstack((subject_epoch, data))
        subject_epoch = np.reshape(
            subject_epoch, (1, stimulus_per_subj, len(channels), epoch_len))
        test_data_list = np.vstack((test_data_list, subject_epoch))

    print('Epoched testing data shape: ' + str(test_data_list.shape))

    # ########################## apply data preprocessing ############################
    y_train = pd.read_csv('data/TrainLabels.csv')['Prediction'].values
    XC = XdawnCovariances(nfilter=5)
    X_train = XC.fit_transform(np.reshape(
        train_data_list, (total_training_participant*stimulus_per_subj, len(channels), epoch_len)), y_train)
    X_train = TangentSpace(metric='riemann').fit_transform(X_train)
    X_test = XC.transform(np.reshape(
        test_data_list, (10*stimulus_per_subj, len(channels), epoch_len)))
    X_test = TangentSpace(metric='riemann').transform(X_test)
    print('Preprocessed training data shape: ' + str(X_train.shape))
    print('Preprocessed testing data shape: ' + str(X_test.shape))

    # ############################## save data to disk ###############################
    np.save('./data/train_data.npy', train_data_list)
    np.save('./data/test_data.npy', test_data_list)
    np.save('./data/X_train', X_train)
    np.save('./data/X_test', X_test)
def test_Xdawncovariances():
    """Test fit ERPCovariances"""
    x = np.random.randn(10,3,100)
    labels = np.array([0,1]).repeat(5)
    cov = XdawnCovariances()
    cov.fit_transform(x,labels)
Exemple #19
0
def decode(epochs,
           get_y_label_func,
           epoch_filter=None,
           decoding_method='standard',
           sliding_window_size=None,
           sliding_window_step=None,
           n_jobs=multiprocessing.cpu_count(),
           equalize_event_counts=True,
           only_fit=False,
           generalize_across_time=True):
    """
    Basic flow for decoding
    """

    config = dict(equalize_event_counts=equalize_event_counts,
                  only_fit=only_fit,
                  sliding_window_size=sliding_window_size,
                  sliding_window_step=sliding_window_step,
                  decoding_method=decoding_method,
                  generalize_across_time=generalize_across_time,
                  epoch_filter=str(epoch_filter))

    if epoch_filter is not None:
        epochs = epochs[epoch_filter]

    #-- Classify epochs into groups (training epochs)
    y_labels = get_y_label_func(epochs)

    if equalize_event_counts:
        epochs.events[:, 2] = y_labels
        epochs.event_id = {str(label): label for label in np.unique(y_labels)}
        min_n_items_per_y_label = min(
            [len(epochs[cond]) for cond in epochs.event_id.keys()])
        print("\nEqualizing the number of epochs to %d per condition..." %
              min_n_items_per_y_label)
        epochs.equalize_event_counts(epochs.event_id.keys())
        y_labels = epochs.events[:, 2]

    print("The epochs were classified into %d groups:" % len(set(y_labels)))
    for g in set(y_labels):
        print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g)))

    #-- Create the decoding pipeline
    print("Creating the classification pipeline...")

    epochs_data = epochs.get_data()

    preprocess_pipeline = None

    if decoding_method.startswith('standard'):

        if 'reg' in decoding_method:
            clf = make_pipeline(StandardScaler(), Ridge())
        else:
            clf = make_pipeline(
                StandardScaler(),
                svm.SVC(C=1, kernel='linear', class_weight='balanced'))

        if 'raw' not in decoding_method:
            assert sliding_window_size is not None
            assert sliding_window_step is not None
            preprocess_pipeline = \
                make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True))

    elif decoding_method == 'ERP_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(20), average=False),
            ERPCovariances(
                estimator='lwf'),  # todo how to apply sliding window?
            CSP(30, log=False),
            TangentSpace('logeuclid'),
            LogisticRegression('l2'))  # todo why logistic regression?

    elif decoding_method == 'Xdawn_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(50), average=False),
            XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'),
            TangentSpace('logeuclid'), LogisticRegression('l2'))

    elif decoding_method == 'Hankel_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(70), average=False),
            HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'),
            CSP(15, log=False), TangentSpace('logeuclid'),
            LogisticRegression('l2'))

    else:
        raise Exception('Unknown decoding method: {:}'.format(decoding_method))

    print('\nDecoding pipeline:')
    for i in range(len(clf.steps)):
        print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1]))

    if preprocess_pipeline is not None:
        print('\nApplying the pre-processing pipeline:')
        for i in range(len(preprocess_pipeline.steps)):
            print('Step #{:}: {:}'.format(i + 1,
                                          preprocess_pipeline.steps[i][1]))
        epochs_data = preprocess_pipeline.fit_transform(epochs_data)

    if only_fit:

        #-- Only fit the decoders

        procedure = 'only_fit'
        scores = None
        cv = None

        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)
            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)
        else:
            estimator = clf

        estimator.fit(X=epochs_data, y=y_labels)

    else:

        #-- Classify & score -- cross-validation

        procedure = 'fit_and_score'
        print(
            "\nCreating a classifier and calculating accuracy scores (this may take some time)..."
        )

        cv = StratifiedKFold(n_splits=5)
        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)

            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)

            scores = cross_val_multiscore(estimator=estimator,
                                          X=epochs_data,
                                          y=np.array(y_labels),
                                          cv=cv)
        else:
            scores = _run_cross_validation(X=epochs_data,
                                           y=np.array(y_labels),
                                           clf=clf,
                                           cv=cv)
            estimator = 'None'  # Estimator is not defined in the case of Riemannian decoding

    times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2])

    return dict(procedure=procedure,
                estimator=estimator,
                scores=scores,
                pipeline=clf,
                preprocess=preprocess_pipeline,
                cv=cv,
                times=times,
                config=config)
    stats['VR'] = {}
    stats['PC'] = {}

    for condition in datasets.keys():

        # get the epochs and labels
        X, y, meta = paradigm.get_data(datasets[condition], subjects=[subject])
        y = LabelEncoder().fit_transform(y)

        data[condition]['X'] = X
        data[condition]['y'] = y

        # estimate xDawn covs
        ncomps = 4
        erp = XdawnCovariances(classes=[1],
                               estimator='lwf',
                               nfilter=ncomps,
                               xdawn_estimator='lwf')
        #erp = ERPCovariances(classes=[1], estimator='lwf', svd=ncomps)
        split = train_test_split(X, y, train_size=0.50, random_state=42)
        Xtrain, Xtest, ytrain, ytest = split
        covs = erp.fit(Xtrain, ytrain).transform(Xtest)

        Mtarget = mean_riemann(covs[ytest == 1])
        Mnontarget = mean_riemann(covs[ytest == 0])
        stats[condition]['distance'] = distance_riemann(Mtarget, Mnontarget)
        stats[condition]['dispersion_target'] = np.sum(
            [distance_riemann(covi, Mtarget)**2
             for covi in covs[ytest == 1]]) / len(covs[ytest == 1])
        stats[condition]['dispersion_nontarget'] = np.sum([
            distance_riemann(covi, Mnontarget)**2 for covi in covs[ytest == 0]
        ]) / len(covs[ytest == 0])
raw.info['bads'] = ['MEG 2443']  # set bad channels
picks = mne.pick_types(raw.info, meg=True, eeg=False, stim=False, eog=False,
                       exclude='bads')

# Read epochs
epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=False,
                    picks=picks, baseline=None, preload=True, verbose=False)

X = epochs.get_data()
y = epochs.events[:, -1]

###############################################################################
# Embedding the Xdawn covariance matrices with Laplacian Eigenmaps

nfilter = 4
xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
split = train_test_split(X, y, train_size=0.25, random_state=42)
Xtrain, Xtest, ytrain, ytest = split
covs = xdwn.fit(Xtrain, ytrain).transform(Xtest)

lapl = Embedding(metric='riemann', n_components=2)
embd = lapl.fit_transform(covs)

###############################################################################
# Plot the three first components of the embedded points

fig, ax = plt.subplots(figsize=(7, 8), facecolor='white')

for cond, label in event_id.items():
    idx = (ytest == label)
    ax.scatter(embd[idx, 0], embd[idx, 1], s=36, label=cond)
Exemple #22
0
    def __train_predefined_classifier(
            self,
            epochs,
            RG_Pipeline_Num=0,
            estimator='lwf',
            estimate_accuracy=False,
            random_state=44,
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz']):
        """
        Train a predefined Riemannian Geometery pipeline on a single dataset using
        MNE and pyriemann.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        RG_Pipeline_Num :int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator :  str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        estimate_accuracy : bool, optional
            Estimate model accuracy roughly using a simple data-hold out train/test split.
            A default hold out of 75/25% train, test respectively is used.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        Returns
        -------
        clf : Classifier object (sklearn)
            Returns a trained classifier object based on the given epoch data and
            Riemannian Geometry pipeline.
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(l1,
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # This is NOT a great measure of the model accuracy. This just will give you
        # a rough estimate of how it is performing within its own dataset. This
        # should be used sparingly!
        if estimate_accuracy is True:
            # Do a simple data-hold out for testing
            x_train, x_test, y_train, y_test = train_test_split(
                X_data, labels, test_size=0.25, random_state=random_state)

            clf_estimate = clf

            clf_estimate.fit(x_train, y_train)

            pred_vals = clf_estimate.predict(x_test)

            accuracy_val = np.mean(pred_vals == y_test)

            fig = plt.figure()
            plot_confusion_matrix(y_test, pred_vals, class_names)

        # Fit the data to the given epoch information
        clf.fit(X_data, labels)

        return clf
Exemple #23
0
clf = Pipeline([('COV',XdawnCovariances(n_components)),('MDM',MDM())])

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]
    
    clf.fit(epochs_data[train_idx], y_train)
    scores.append(clf.score(epochs_data[test_idx], y_test))

# Printing the results
class_balance = np.mean(labels == labels[0])
class_balance = max(class_balance, 1. - class_balance)
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))

# spatial patterns
xd = XdawnCovariances(n_components)
Cov = xd.fit_transform(epochs_data,labels)

evoked.data = xd.Xd._patterns.T
evoked.times = np.arange(evoked.data.shape[0])
evoked.plot_topomap(times=[0, 1, n_components, n_components+1], ch_type='grad',
                    colorbar=False, size=1.5)
                    
# prototyped covariance matrices
mdm = MDM()
mdm.fit(Cov,labels)
fig,axe = plt.subplots(1,2)
axe[0].matshow(mdm.covmeans[0])
axe[0].set_title('Class 1 covariance matrix')
axe[1].matshow(mdm.covmeans[1])
axe[1].set_title('Class 2 covariance matrix')
                    tmin,
                    tmax,
                    proj=False,
                    picks=picks,
                    baseline=None,
                    preload=True,
                    verbose=False)

X = epochs.get_data()
y = epochs.events[:, -1]

###############################################################################
# Embedding the Xdawn covariance matrices with Laplacian Eigenmaps

nfilter = 4
xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
split = train_test_split(X, y, train_size=0.25, random_state=42)
Xtrain, Xtest, ytrain, ytest = split
covs = xdwn.fit(Xtrain, ytrain).transform(Xtest)

lapl = Embedding(metric='riemann', n_components=2)
embd = lapl.fit_transform(covs)

###############################################################################
# Plot the three first components of the embedded points

fig, ax = plt.subplots(figsize=(7, 8), facecolor='white')

for cond, label in event_id.items():
    idx = (ytest == label)
    ax.scatter(embd[idx, 0], embd[idx, 1], s=36, label=cond)
Exemple #25
0
    def __run_strat_validation_RG(
            self,
            epochs,
            n_strat_folds=5,
            shuffle=False,
            random_state=42,
            RG_Pipeline_Num=0,
            estimator='lwf',
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz'],
            accuracy_threshold=0.7):
        """
        Complete a stratified cross-validation using Riemannian Geometery pipeline.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        n_strat_folds : int, optional
            Number of folds for the stratified K-Fold cross-validation.
            This value should be chosen carefully to avoid unbalanced classes.
            The default is 5.
        shuffle : bool, optional
            Shuffle training set data. See sklearn.model_selection.StratifiedKFold
            for more details.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        RG_Pipeline_Num : int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator : str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        accuracy_threshold : float, optional
            Threshold for determining which folds are 'good' fits. Accuracy found
            above the threshold (e.g. 70% or greater) will be reported as good fit
            folds.
            The default is 0.7.
        Returns
        -------
        DICT
            Dictionary of outputs are returned for the user.
            In order:
                Fold accuracy -'Fold Acc'
                Indices for `good` training folds > or = to accuracy_threshold value - 'Good Train Ind'
                Indices for `good` test folds > or = to given accuracy_threshold value -  'Good Test Ind'
                Indices for `bad` train folds < given accuracy_threshold value - 'Bad Train Ind'
                Indices for `bad` test folds < given accuracy_threshold value - 'Bad Test Ind'
                List of predicted classes from the RG Pipeline - 'Prediction List'
                List of true classes from the RG Pipeline - 'True Class List'
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Set the stratified CV model
        cv_strat = StratifiedKFold(
            n_splits=n_strat_folds, shuffle=True, random_state=random_state
        )  # Requires us to input in the ylabels as well...need to figure out how to get this.

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(penalty='elasticnet',
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   l1_ratio=0.5,
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # Make empty lists for each item in the stratified CV
        acc_list = []
        preds_list = []
        true_class_list = []
        good_train_indx = []
        good_test_indx = []
        bad_train_indx = []
        bad_test_indx = []

        # For loop testing each iteration of the stratified cross-validation
        for train_idx, test_idx in cv_strat.split(X_data, labels):
            # Get the x_train and x_test data for this fold
            x_train, x_test = X_data[train_idx], X_data[test_idx]
            # Get the y_train and y_test data for this fold
            y_train, y_test = labels[train_idx], labels[test_idx]
            # Fit the classifier
            clf.fit(x_train, y_train)
            # Find the predicted value on the test data in this fold
            preds = clf.predict(x_test)
            # Save in list
            preds_list.append(preds)
            # Save the true class labels in a list for this fold
            true_class_list.append(y_test)
            # Find the accuracy on average from this prediction
            acc_mean = np.average(preds == y_test)
            # Save the accuracy to a list
            acc_list.append(acc_mean)
            # Find out where the 'Good' training folds are. (Greater than threshold)
            if acc_mean >= accuracy_threshold:
                print(
                    "Train indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", train_idx)
                print(
                    "Test indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", test_idx)
                good_train_indx.append(train_idx)
                good_test_indx.append(test_idx)
            # Find out where the 'Bad' training folds are. (Less than threshold)
            else:
                bad_train_indx.append(train_idx)
                bad_test_indx.append(test_idx)
            # Make a plot for the confusion matrix
            fig = plt.figure()
            plot_confusion_matrix(y_test, preds, class_names)
        # Print out the final results from across all folds on average
        print(
            "The overall accuracy with " + str(n_strat_folds) +
            "-fold stratified CV was: ", np.average(acc_list))

        # Return output vals
        return dict({
            'Fold Acc': acc_list,
            'Good Train Ind': good_train_indx,
            'Good Test Ind': good_test_indx,
            'Bad Train Ind': bad_train_indx,
            'Bad Test Ind': bad_test_indx,
            'Prediction List': preds_list,
            'True Class List': true_class_list
        })
)

labels = epochs.events[:, -1]
evoked = epochs.average()

###############################################################################
# Decoding in tangent space with a logistic regression

n_components = 2  # pick some components

# Define a monte-carlo cross-validation generator (reduce variance):
cv = KFold(n_splits=10, shuffle=True, random_state=42)
epochs_data = epochs.get_data()

clf = make_pipeline(
    XdawnCovariances(n_components),
    TangentSpace(metric="riemann"),
    LogisticRegression(),
)

preds = np.zeros(len(labels))

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    preds[test_idx] = clf.predict(epochs_data[test_idx])

# Printing the results
acc = np.mean(preds == labels)
print("Classification accuracy: %f " % (acc))
epochs_data = epochs.get_data()

print("Multiclass classification with XDAWN + MDM")

clf = make_pipeline(XdawnCovariances(n_components), MDM())

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    pr[test_idx] = clf.predict(epochs_data[test_idx])

print(classification_report(labels, pr))

###############################################################################
# plot the spatial patterns
xd = XdawnCovariances(n_components)
xd.fit(epochs_data, labels)

evoked.data = xd.Xd.patterns_.T
evoked.times = np.arange(evoked.data.shape[0])
evoked.plot_topomap(
    times=[0, n_components, 2 * n_components, 3 * n_components], ch_type="grad", colorbar=False, size=1.5
)

###############################################################################
# plot the confusion matrix
names = ["audio left", "audio right", "vis left", "vis right"]
plot_confusion_matrix(labels, pr, names)
plt.show()
Exemple #28
0
 def _init(self):
     self.n_components = self.params.get('n_components', 3)
     self.pipeline = make_pipeline(XdawnCovariances(
         self.n_components), TangentSpace(metric='riemann'), LogisticRegression())