def xdawn_embedding(data, use_xdawn): """Perform embedding of EEG data in 2D Euclidean space with Laplacian Eigenmaps. Parameters ---------- data : dict A dictionary containing training and testing data Returns ------- array Embedded """ if use_xdawn: nfilter = 3 xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter) covs = xdwn.fit(data['train_x'], data['train_y']).transform(data['test_x']) lapl = Embedding(metric='riemann', n_components=3) embd = lapl.fit_transform(covs) else: tangent_space = Pipeline([ ('cov_transform', Covariances(estimator='lwf')), ('tangent_space', TangentSpace(metric='riemann')) ]) t_space = tangent_space.fit(data['train_x'], data['train_y']).transform(data['test_x']) reducer = umap.UMAP(n_neighbors=30, min_dist=1, spread=2) embd = reducer.fit_transform(t_space) return embd
def N170_test(session_data): markers = N170_MARKERS epochs = get_session_erp_epochs(session_data, markers) conditions = OrderedDict() for i in range(len(markers)): conditions[markers[i]] = [i+1] clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM'] # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] print('Calcul in progress...') for m in clfs: try: res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', cv=cv, n_jobs=-1) auc.extend(res) methods.extend([m]*len(res)) except Exception: print("exception") ## Plot Decoding Results results = pd.DataFrame(data=auc, columns=['AUC']) results['Method'] = methods n_row,n_column = results.shape auc_means = [] for method in methods_list: auc = [] for i in range(n_row): if results.loc[i,'Method']== method: auc.append(results.loc[i,'AUC']) auc_means.append(np.mean(auc)) counter = 0 for i in range(len(methods_list)): color = 'green' if auc_means[i]>=0.7 else 'red' counter = counter +1 if auc_means[i]>=0.7 else counter return counter > 0, counter
def test_Xdawncovariances(): """Test fit ERPCovariances""" x = np.random.randn(10, 3, 100) labels = np.array([0, 1]).repeat(5) cov = XdawnCovariances() cov.fit_transform(x, labels) assert_equal(cov.get_params(), dict(nfilter=4, applyfilters=True, classes=None, estimator='scm', xdawn_estimator='scm'))
def test_Xdawncovariances(): """Test fit ERPCovariances""" x = np.random.randn(10, 3, 100) labels = np.array([0, 1]).repeat(5) cov = XdawnCovariances() cov.fit_transform(x, labels) assert_equal(cov.get_params(), dict(nfilter=4, applyfilters=True, classes=None, estimator='scm', xdawn_estimator='scm', baseline_cov=None))
def test_xdawn_covariances_applyfilters(rndstate, get_labels): n_classes, nfilter = 2, 2 n_matrices, n_channels, n_times = 4, 6, 100 x = rndstate.randn(n_matrices, n_channels, n_times) labels = get_labels(n_matrices, n_classes) cov = XdawnCovariances(nfilter=nfilter, applyfilters=False) covmats = cov.fit_transform(x, labels) covsize = n_classes * nfilter + n_channels assert covmats.shape == (n_matrices, covsize, covsize) assert is_spsd(covmats)
def test_xdawn_covariances_nfilter(nfilter, rndstate, get_labels): """Test fit XdawnCovariances""" n_classes, n_matrices, n_channels, n_times = 2, 4, 8, 100 x = rndstate.randn(n_matrices, n_channels, n_times) labels = get_labels(n_matrices, n_classes) cov = XdawnCovariances(nfilter=nfilter) covmats = cov.fit_transform(x, labels) assert cov.get_params() == dict( nfilter=nfilter, applyfilters=True, classes=None, estimator="scm", xdawn_estimator="scm", baseline_cov=None, ) covsize = 2 * (n_classes * nfilter) assert covmats.shape == (n_matrices, covsize, covsize) assert is_spsd(covmats)
def xdawn_embedding(data): """Perform embedding of EEG data in 2D Euclidean space with Laplacian Eigenmaps. Parameters ---------- data : dict A dictionary containing training and testing data Returns ------- array Embedded """ nfilter = 3 xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter) covs = xdwn.fit(data['train_x'], data['train_y']).transform(data['test_x']) lapl = Embedding(metric='riemann', n_components=3) embd = lapl.fit_transform(covs) return embd
def get_sourcetarget_split_p300(source, target, ncovs_train): X_source = source['signals'] y_source = source['labels'].flatten() covs_source = XdawnCovariances(classes=[2]).fit_transform( X_source, y_source) source = {} source['covs'] = covs_source source['labels'] = y_source X_target = target['signals'] y_target = target['labels'].flatten() if ncovs_train is None: ncovs_train = np.sum(y_target == 2) sel = np.arange(len(y_target)) np.random.shuffle(sel) X_target = X_target[sel] y_target = y_target[sel] idx_erps = np.where(y_target == 2)[0][:ncovs_train] idx_rest = np.where( y_target == 1)[0][:ncovs_train * 5] # because there's one ERP in every 6 flashes idx_train = np.concatenate([idx_erps, idx_rest]) idx_test = np.array( [i for i in range(len(y_target)) if i not in idx_train]) erp = XdawnCovariances(classes=[2]) erp.fit(X_target[idx_train], y_target[idx_train]) target_train = {} covs_target_train = erp.transform(X_target[idx_train]) y_target_train = y_target[idx_train] target_train['covs'] = covs_target_train target_train['labels'] = y_target_train target_test = {} covs_target_test = erp.transform(X_target[idx_test]) y_target_test = y_target[idx_test] target_test['covs'] = covs_target_test target_test['labels'] = y_target_test return source, target_train, target_test
return np.reshape(X, (X.shape[0], -1)) ############################################################################## # Create pipelines # ---------------- # Pipelines must be a dict of sklearn pipeline transformer. pipelines = {} # we have to do this because the classes are called 'Target' and 'NonTarget' # but the evaluation function uses a LabelEncoder, transforming them # to 0 and 1 labels_dict = {'Target': 1, 'NonTarget': 0} pipelines['RG + LDA'] = make_pipeline( XdawnCovariances( nfilter=2, classes=[ labels_dict['Target']], estimator='lwf', xdawn_estimator='lwf'), TangentSpace(), LDA(solver='lsqr', shrinkage='auto')) pipelines['Xdw + LDA'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'), Vectorizer(), LDA(solver='lsqr', shrinkage='auto')) pipelines['ERPCov + TS'] = make_pipeline(ERPCovariances(classes=[0, 1], estimator='oas', svd=None), TangentSpace(metric='riemann'), LogisticRegression(solver='lbfgs'))
############################################################################## # Create pipelines # ---------------- # # Pipelines must be a dict of sklearn pipeline transformer. pipelines = {} # we have to do this because the classes are called 'Target' and 'NonTarget' # but the evaluation function uses a LabelEncoder, transforming them # to 0 and 1 labels_dict = {"Target": 1, "NonTarget": 0} pipelines["RG + LDA"] = make_pipeline( XdawnCovariances(nfilter=2, classes=[labels_dict["Target"]], estimator="lwf", xdawn_estimator="lwf"), TangentSpace(), LDA(solver="lsqr", shrinkage="auto"), ) pipelines["Xdw + LDA"] = make_pipeline(Xdawn(nfilter=2, estimator="lwf"), Vectorizer(), LDA(solver="lsqr", shrinkage="auto")) ############################################################################## # Evaluation # ---------- # # We define the paradigm (P300) and use all three datasets available for it. # The evaluation will return a dataframe containing a single AUC score for
labels = epochs.events[:, -1] evoked = epochs.average() ############################################################################### # Decoding in sensor space using a linear SVM n_components = 3 # pick some components # Define a monte-carlo cross-validation generator (reduce variance): cv = KFold(len(labels), 10, shuffle=True, random_state=42) pr = np.zeros(len(labels)) epochs_data = epochs.get_data() print('Multiclass classification with XDAWN + MDM') clf = Pipeline([('COV', XdawnCovariances(n_components)), ('MDM', MDM())]) for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) pr[test_idx] = clf.predict(epochs_data[test_idx]) print classification_report(labels, pr) print confusion_matrix(labels, pr) print('Multiclass classification with XDAWN + FgMDM') clf = Pipeline([('COV', XdawnCovariances(n_components)), ('MDM', FgMDM())]) for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx]
# ____________________________________________________________________________ # Create pipelines # ---------------- # Pipelines must be a dict of sklearn pipeline transformer. pipelines = {} # we have to do this because the classes are called 'Target' and 'NonTarget' # but the evaluation function uses a LabelEncoder, transforming them # to 0 and 1 labels_dict = {'Target': 1, 'NonTarget': 0} # %% # from sklearn.preprocessing import StandardScaler pipelines['RG + LRR'] = make_pipeline( XdawnCovariances(nfilter=2, classes=[labels_dict['Target']], estimator='lwf', xdawn_estimator='lwf'), TangentSpace(), LRR()) # %% pipelines['Xdawn + LRR'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'), Vectorizer(), LRR()) #%% pipelines['LRR'] = make_pipeline(Vectorizer(), LRR()) # ____________________________________________________________________________ # Evaluation # %% paradigm = P300(resample=128) dataset = BNCI2015003()
reject={'eeg': 75e-6}, preload=True, verbose=False, picks=[0,1,2,3]) print('sample drop %: ', (1 - len(epochs.events)/len(events)) * 100) epochs ################################################################################################### # Run classification # ---------------------------- clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = []
subject_dir_list = test_list_np[i] subject_epoch = np.empty((0, 56, 260), float) for j in range(5): subject_dir = subject_dir_list[j] data = epoching('./data/test/' + subject_dir) subject_epoch = np.vstack((subject_epoch, data)) subject_epoch = np.reshape(subject_epoch, (1, 340, 56, 260)) test_data_list = np.vstack((test_data_list, subject_epoch)) print('Epoched training data shape: ' + str(train_data_list.shape)) print('Epoched testing data shape: ' + str(test_data_list.shape)) ########################## apply data preprocessing ############################ y_train = pd.read_csv('TrainLabels.csv')['Prediction'].values y_test = np.reshape(pd.read_csv('true_labels.csv', header=None).values, 3400) XC = XdawnCovariances(nfilter=5) output_train = XC.fit_transform( np.reshape(train_data_list, (16 * 340, 56, 260)), y_train) X_train = TangentSpace(metric='riemann').fit_transform(output_train) output_test = XC.fit_transform(np.reshape(test_data_list, (10 * 340, 56, 260)), y_test) X_test = TangentSpace(metric='riemann').fit_transform(output_test) print('Preprocessed training data shape: ' + str(X_train.shape)) print('Preprocessed testing data shape: ' + str(X_test.shape)) ############################## save data to disk ############################### np.save('./data/train_data_56_260_1_40Hz.npy', train_data_list) np.save('./data/test_data_56_260_1_40Hz.npy', test_data_list) np.save('./data/X_train', X_train) np.save('./data/X_test', X_test)
def test_Xdawncovariances(): """Test fit ERPCovariances""" x = np.random.randn(10, 3, 100) labels = np.array([0, 1]).repeat(5) cov = XdawnCovariances() cov.fit_transform(x, labels)
picks=picks, baseline=None, preload=True) labels = epochs.events[:, -1] evoked = epochs.average() ############################################################################### # Decoding in sensor space using a linear SVM n_components = 3 # pick some components # Define a monte-carlo cross-validation generator (reduce variance): cv = ShuffleSplit(len(labels), 10, test_size=0.2, random_state=42) scores = [] epochs_data = epochs.get_data() clf = Pipeline([('COV',XdawnCovariances(n_components)),('MDM',MDM())]) for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) scores.append(clf.score(epochs_data[test_idx], y_test)) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance))
subject_dir_list = test_list_np[testing_participant_id] subject_epoch = np.empty((0, len(channels), epoch_len), float) for trial_id in range(trial_per_subj): subject_dir = subject_dir_list[trial_id] data = generate_epoch('FeedBackEvent', './data/test/'+subject_dir, channels, fs, lowcut, highcut, epoch_s, epoch_e, bl_s, bl_e) subject_epoch = np.vstack((subject_epoch, data)) subject_epoch = np.reshape( subject_epoch, (1, stimulus_per_subj, len(channels), epoch_len)) test_data_list = np.vstack((test_data_list, subject_epoch)) print('Epoched testing data shape: ' + str(test_data_list.shape)) # ########################## apply data preprocessing ############################ y_train = pd.read_csv('data/TrainLabels.csv')['Prediction'].values XC = XdawnCovariances(nfilter=5) X_train = XC.fit_transform(np.reshape( train_data_list, (total_training_participant*stimulus_per_subj, len(channels), epoch_len)), y_train) X_train = TangentSpace(metric='riemann').fit_transform(X_train) X_test = XC.transform(np.reshape( test_data_list, (10*stimulus_per_subj, len(channels), epoch_len))) X_test = TangentSpace(metric='riemann').transform(X_test) print('Preprocessed training data shape: ' + str(X_train.shape)) print('Preprocessed testing data shape: ' + str(X_test.shape)) # ############################## save data to disk ############################### np.save('./data/train_data.npy', train_data_list) np.save('./data/test_data.npy', test_data_list) np.save('./data/X_train', X_train) np.save('./data/X_test', X_test)
def test_Xdawncovariances(): """Test fit ERPCovariances""" x = np.random.randn(10,3,100) labels = np.array([0,1]).repeat(5) cov = XdawnCovariances() cov.fit_transform(x,labels)
def decode(epochs, get_y_label_func, epoch_filter=None, decoding_method='standard', sliding_window_size=None, sliding_window_step=None, n_jobs=multiprocessing.cpu_count(), equalize_event_counts=True, only_fit=False, generalize_across_time=True): """ Basic flow for decoding """ config = dict(equalize_event_counts=equalize_event_counts, only_fit=only_fit, sliding_window_size=sliding_window_size, sliding_window_step=sliding_window_step, decoding_method=decoding_method, generalize_across_time=generalize_across_time, epoch_filter=str(epoch_filter)) if epoch_filter is not None: epochs = epochs[epoch_filter] #-- Classify epochs into groups (training epochs) y_labels = get_y_label_func(epochs) if equalize_event_counts: epochs.events[:, 2] = y_labels epochs.event_id = {str(label): label for label in np.unique(y_labels)} min_n_items_per_y_label = min( [len(epochs[cond]) for cond in epochs.event_id.keys()]) print("\nEqualizing the number of epochs to %d per condition..." % min_n_items_per_y_label) epochs.equalize_event_counts(epochs.event_id.keys()) y_labels = epochs.events[:, 2] print("The epochs were classified into %d groups:" % len(set(y_labels))) for g in set(y_labels): print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g))) #-- Create the decoding pipeline print("Creating the classification pipeline...") epochs_data = epochs.get_data() preprocess_pipeline = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: clf = make_pipeline(StandardScaler(), Ridge()) else: clf = make_pipeline( StandardScaler(), svm.SVC(C=1, kernel='linear', class_weight='balanced')) if 'raw' not in decoding_method: assert sliding_window_size is not None assert sliding_window_step is not None preprocess_pipeline = \ make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True)) elif decoding_method == 'ERP_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(20), average=False), ERPCovariances( estimator='lwf'), # todo how to apply sliding window? CSP(30, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) # todo why logistic regression? elif decoding_method == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) elif decoding_method == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) else: raise Exception('Unknown decoding method: {:}'.format(decoding_method)) print('\nDecoding pipeline:') for i in range(len(clf.steps)): print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1])) if preprocess_pipeline is not None: print('\nApplying the pre-processing pipeline:') for i in range(len(preprocess_pipeline.steps)): print('Step #{:}: {:}'.format(i + 1, preprocess_pipeline.steps[i][1])) epochs_data = preprocess_pipeline.fit_transform(epochs_data) if only_fit: #-- Only fit the decoders procedure = 'only_fit' scores = None cv = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = clf estimator.fit(X=epochs_data, y=y_labels) else: #-- Classify & score -- cross-validation procedure = 'fit_and_score' print( "\nCreating a classifier and calculating accuracy scores (this may take some time)..." ) cv = StratifiedKFold(n_splits=5) if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) scores = cross_val_multiscore(estimator=estimator, X=epochs_data, y=np.array(y_labels), cv=cv) else: scores = _run_cross_validation(X=epochs_data, y=np.array(y_labels), clf=clf, cv=cv) estimator = 'None' # Estimator is not defined in the case of Riemannian decoding times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2]) return dict(procedure=procedure, estimator=estimator, scores=scores, pipeline=clf, preprocess=preprocess_pipeline, cv=cv, times=times, config=config)
stats['VR'] = {} stats['PC'] = {} for condition in datasets.keys(): # get the epochs and labels X, y, meta = paradigm.get_data(datasets[condition], subjects=[subject]) y = LabelEncoder().fit_transform(y) data[condition]['X'] = X data[condition]['y'] = y # estimate xDawn covs ncomps = 4 erp = XdawnCovariances(classes=[1], estimator='lwf', nfilter=ncomps, xdawn_estimator='lwf') #erp = ERPCovariances(classes=[1], estimator='lwf', svd=ncomps) split = train_test_split(X, y, train_size=0.50, random_state=42) Xtrain, Xtest, ytrain, ytest = split covs = erp.fit(Xtrain, ytrain).transform(Xtest) Mtarget = mean_riemann(covs[ytest == 1]) Mnontarget = mean_riemann(covs[ytest == 0]) stats[condition]['distance'] = distance_riemann(Mtarget, Mnontarget) stats[condition]['dispersion_target'] = np.sum( [distance_riemann(covi, Mtarget)**2 for covi in covs[ytest == 1]]) / len(covs[ytest == 1]) stats[condition]['dispersion_nontarget'] = np.sum([ distance_riemann(covi, Mnontarget)**2 for covi in covs[ytest == 0] ]) / len(covs[ytest == 0])
raw.info['bads'] = ['MEG 2443'] # set bad channels picks = mne.pick_types(raw.info, meg=True, eeg=False, stim=False, eog=False, exclude='bads') # Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=False, picks=picks, baseline=None, preload=True, verbose=False) X = epochs.get_data() y = epochs.events[:, -1] ############################################################################### # Embedding the Xdawn covariance matrices with Laplacian Eigenmaps nfilter = 4 xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter) split = train_test_split(X, y, train_size=0.25, random_state=42) Xtrain, Xtest, ytrain, ytest = split covs = xdwn.fit(Xtrain, ytrain).transform(Xtest) lapl = Embedding(metric='riemann', n_components=2) embd = lapl.fit_transform(covs) ############################################################################### # Plot the three first components of the embedded points fig, ax = plt.subplots(figsize=(7, 8), facecolor='white') for cond, label in event_id.items(): idx = (ytest == label) ax.scatter(embd[idx, 0], embd[idx, 1], s=36, label=cond)
def __train_predefined_classifier( self, epochs, RG_Pipeline_Num=0, estimator='lwf', estimate_accuracy=False, random_state=44, class_names=['Rest', '13 Hz', '17 Hz', '21 Hz']): """ Train a predefined Riemannian Geometery pipeline on a single dataset using MNE and pyriemann. Parameters ---------- epochs : Epoch Object from MNE Epoch data held in an appropriate MNE format. This could be derived from mne.Epochs, or using the `build_epochs` command included in this script. RG_Pipeline_Num :int, optional Which pre-defined Riemannian Geometery pipeline to run for analysis. Can be 0,1,2,3: Pipeline 0: Covariance w/ estimator -> Riemannian KNN Pipeline 1: Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression LogReg uses a 'balanced' option for class weights, l2 penalty. Pipeline 2: XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag. Pipeline 3: Covariance w/ estimator -> MDM. Minimum distance to mean (MDM) is the main classification scheme. The default is 0. estimator : str, optional Covariance matrix estimator to use. For regularization consider 'lwf' or 'oas'. For complete lists, see pyriemann.utils.covariance. The default is 'lwf'. estimate_accuracy : bool, optional Estimate model accuracy roughly using a simple data-hold out train/test split. A default hold out of 75/25% train, test respectively is used. The default is False. random_state : int, optional The value to be used as the 'seed' for `numpy.random.RandomState`. See sklearn.model_selection.StratifiedKFold for more details. The default is 42. class_names : List, optional List of names for the confusion matrix plot. The default is ['Rest','13 Hz','17 Hz','21 Hz']. Returns ------- clf : Classifier object (sklearn) Returns a trained classifier object based on the given epoch data and Riemannian Geometry pipeline. See Also -------- mne.Epochs sklearn.model_selection.StratifiedKFold sklearn.linear_model.LogisticRegression pyriemann.estimation.Covariances pyriemann.estimation.XdawnCovariances pyriemann.spatialfilters.CSP pyriemann.tangentspace.TangentSpace pyriemann.classification.MDM pyriemann.classification.KNearestNeighbor (riemmanian KNN) """ # Run one of the pre-defined pipelines if RG_Pipeline_Num == 1: clf = make_pipeline( Covariances(estimator=estimator), CSP(log=False), TangentSpace(), LogisticRegression(class_weight='balanced', max_iter=500)) elif RG_Pipeline_Num == 2: clf = make_pipeline( XdawnCovariances(estimator=estimator, xdawn_estimator=estimator), TangentSpace(), LogisticRegression(l1, class_weight=None, solver='saga', multi_class='multinomial', max_iter=500)) elif RG_Pipeline_Num == 3: clf = make_pipeline(Covariances(estimator=estimator), MDM()) # This is the best so far else: print( "...Running a default pipeline for RG using Covariance, and KNN..." ) clf = make_pipeline(Covariances(estimator=estimator), riem_KNN()) # Get the labels for the data labels = epochs.events[:, -1] # Identify the data itself X_data = epochs.get_data() # Get the class names for the confusion matrix class_names = class_names # This is NOT a great measure of the model accuracy. This just will give you # a rough estimate of how it is performing within its own dataset. This # should be used sparingly! if estimate_accuracy is True: # Do a simple data-hold out for testing x_train, x_test, y_train, y_test = train_test_split( X_data, labels, test_size=0.25, random_state=random_state) clf_estimate = clf clf_estimate.fit(x_train, y_train) pred_vals = clf_estimate.predict(x_test) accuracy_val = np.mean(pred_vals == y_test) fig = plt.figure() plot_confusion_matrix(y_test, pred_vals, class_names) # Fit the data to the given epoch information clf.fit(X_data, labels) return clf
clf = Pipeline([('COV',XdawnCovariances(n_components)),('MDM',MDM())]) for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) scores.append(clf.score(epochs_data[test_idx], y_test)) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) # spatial patterns xd = XdawnCovariances(n_components) Cov = xd.fit_transform(epochs_data,labels) evoked.data = xd.Xd._patterns.T evoked.times = np.arange(evoked.data.shape[0]) evoked.plot_topomap(times=[0, 1, n_components, n_components+1], ch_type='grad', colorbar=False, size=1.5) # prototyped covariance matrices mdm = MDM() mdm.fit(Cov,labels) fig,axe = plt.subplots(1,2) axe[0].matshow(mdm.covmeans[0]) axe[0].set_title('Class 1 covariance matrix') axe[1].matshow(mdm.covmeans[1]) axe[1].set_title('Class 2 covariance matrix')
tmin, tmax, proj=False, picks=picks, baseline=None, preload=True, verbose=False) X = epochs.get_data() y = epochs.events[:, -1] ############################################################################### # Embedding the Xdawn covariance matrices with Laplacian Eigenmaps nfilter = 4 xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter) split = train_test_split(X, y, train_size=0.25, random_state=42) Xtrain, Xtest, ytrain, ytest = split covs = xdwn.fit(Xtrain, ytrain).transform(Xtest) lapl = Embedding(metric='riemann', n_components=2) embd = lapl.fit_transform(covs) ############################################################################### # Plot the three first components of the embedded points fig, ax = plt.subplots(figsize=(7, 8), facecolor='white') for cond, label in event_id.items(): idx = (ytest == label) ax.scatter(embd[idx, 0], embd[idx, 1], s=36, label=cond)
def __run_strat_validation_RG( self, epochs, n_strat_folds=5, shuffle=False, random_state=42, RG_Pipeline_Num=0, estimator='lwf', class_names=['Rest', '13 Hz', '17 Hz', '21 Hz'], accuracy_threshold=0.7): """ Complete a stratified cross-validation using Riemannian Geometery pipeline. Parameters ---------- epochs : Epoch Object from MNE Epoch data held in an appropriate MNE format. This could be derived from mne.Epochs, or using the `build_epochs` command included in this script. n_strat_folds : int, optional Number of folds for the stratified K-Fold cross-validation. This value should be chosen carefully to avoid unbalanced classes. The default is 5. shuffle : bool, optional Shuffle training set data. See sklearn.model_selection.StratifiedKFold for more details. The default is False. random_state : int, optional The value to be used as the 'seed' for `numpy.random.RandomState`. See sklearn.model_selection.StratifiedKFold for more details. The default is 42. RG_Pipeline_Num : int, optional Which pre-defined Riemannian Geometery pipeline to run for analysis. Can be 0,1,2,3: Pipeline 0: Covariance w/ estimator -> Riemannian KNN Pipeline 1: Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression LogReg uses a 'balanced' option for class weights, l2 penalty. Pipeline 2: XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag. Pipeline 3: Covariance w/ estimator -> MDM. Minimum distance to mean (MDM) is the main classification scheme. The default is 0. estimator : str, optional Covariance matrix estimator to use. For regularization consider 'lwf' or 'oas'. For complete lists, see pyriemann.utils.covariance. The default is 'lwf'. class_names : List, optional List of names for the confusion matrix plot. The default is ['Rest','13 Hz','17 Hz','21 Hz']. accuracy_threshold : float, optional Threshold for determining which folds are 'good' fits. Accuracy found above the threshold (e.g. 70% or greater) will be reported as good fit folds. The default is 0.7. Returns ------- DICT Dictionary of outputs are returned for the user. In order: Fold accuracy -'Fold Acc' Indices for `good` training folds > or = to accuracy_threshold value - 'Good Train Ind' Indices for `good` test folds > or = to given accuracy_threshold value - 'Good Test Ind' Indices for `bad` train folds < given accuracy_threshold value - 'Bad Train Ind' Indices for `bad` test folds < given accuracy_threshold value - 'Bad Test Ind' List of predicted classes from the RG Pipeline - 'Prediction List' List of true classes from the RG Pipeline - 'True Class List' See Also -------- mne.Epochs sklearn.model_selection.StratifiedKFold sklearn.linear_model.LogisticRegression pyriemann.estimation.Covariances pyriemann.estimation.XdawnCovariances pyriemann.spatialfilters.CSP pyriemann.tangentspace.TangentSpace pyriemann.classification.MDM pyriemann.classification.KNearestNeighbor (riemmanian KNN) """ # Set the stratified CV model cv_strat = StratifiedKFold( n_splits=n_strat_folds, shuffle=True, random_state=random_state ) # Requires us to input in the ylabels as well...need to figure out how to get this. # Run one of the pre-defined pipelines if RG_Pipeline_Num == 1: clf = make_pipeline( Covariances(estimator=estimator), CSP(log=False), TangentSpace(), LogisticRegression(class_weight='balanced', max_iter=500)) elif RG_Pipeline_Num == 2: clf = make_pipeline( XdawnCovariances(estimator=estimator, xdawn_estimator=estimator), TangentSpace(), LogisticRegression(penalty='elasticnet', class_weight=None, solver='saga', multi_class='multinomial', l1_ratio=0.5, max_iter=500)) elif RG_Pipeline_Num == 3: clf = make_pipeline(Covariances(estimator=estimator), MDM()) # This is the best so far else: print( "...Running a default pipeline for RG using Covariance, and KNN..." ) clf = make_pipeline(Covariances(estimator=estimator), riem_KNN()) # Get the labels for the data labels = epochs.events[:, -1] # Identify the data itself X_data = epochs.get_data() # Get the class names for the confusion matrix class_names = class_names # Make empty lists for each item in the stratified CV acc_list = [] preds_list = [] true_class_list = [] good_train_indx = [] good_test_indx = [] bad_train_indx = [] bad_test_indx = [] # For loop testing each iteration of the stratified cross-validation for train_idx, test_idx in cv_strat.split(X_data, labels): # Get the x_train and x_test data for this fold x_train, x_test = X_data[train_idx], X_data[test_idx] # Get the y_train and y_test data for this fold y_train, y_test = labels[train_idx], labels[test_idx] # Fit the classifier clf.fit(x_train, y_train) # Find the predicted value on the test data in this fold preds = clf.predict(x_test) # Save in list preds_list.append(preds) # Save the true class labels in a list for this fold true_class_list.append(y_test) # Find the accuracy on average from this prediction acc_mean = np.average(preds == y_test) # Save the accuracy to a list acc_list.append(acc_mean) # Find out where the 'Good' training folds are. (Greater than threshold) if acc_mean >= accuracy_threshold: print( "Train indices above accuracy threshold of " + str(accuracy_threshold * 100) + "% are: ", train_idx) print( "Test indices above accuracy threshold of " + str(accuracy_threshold * 100) + "% are: ", test_idx) good_train_indx.append(train_idx) good_test_indx.append(test_idx) # Find out where the 'Bad' training folds are. (Less than threshold) else: bad_train_indx.append(train_idx) bad_test_indx.append(test_idx) # Make a plot for the confusion matrix fig = plt.figure() plot_confusion_matrix(y_test, preds, class_names) # Print out the final results from across all folds on average print( "The overall accuracy with " + str(n_strat_folds) + "-fold stratified CV was: ", np.average(acc_list)) # Return output vals return dict({ 'Fold Acc': acc_list, 'Good Train Ind': good_train_indx, 'Good Test Ind': good_test_indx, 'Bad Train Ind': bad_train_indx, 'Bad Test Ind': bad_test_indx, 'Prediction List': preds_list, 'True Class List': true_class_list })
) labels = epochs.events[:, -1] evoked = epochs.average() ############################################################################### # Decoding in tangent space with a logistic regression n_components = 2 # pick some components # Define a monte-carlo cross-validation generator (reduce variance): cv = KFold(n_splits=10, shuffle=True, random_state=42) epochs_data = epochs.get_data() clf = make_pipeline( XdawnCovariances(n_components), TangentSpace(metric="riemann"), LogisticRegression(), ) preds = np.zeros(len(labels)) for train_idx, test_idx in cv.split(epochs_data): y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) preds[test_idx] = clf.predict(epochs_data[test_idx]) # Printing the results acc = np.mean(preds == labels) print("Classification accuracy: %f " % (acc))
epochs_data = epochs.get_data() print("Multiclass classification with XDAWN + MDM") clf = make_pipeline(XdawnCovariances(n_components), MDM()) for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) pr[test_idx] = clf.predict(epochs_data[test_idx]) print(classification_report(labels, pr)) ############################################################################### # plot the spatial patterns xd = XdawnCovariances(n_components) xd.fit(epochs_data, labels) evoked.data = xd.Xd.patterns_.T evoked.times = np.arange(evoked.data.shape[0]) evoked.plot_topomap( times=[0, n_components, 2 * n_components, 3 * n_components], ch_type="grad", colorbar=False, size=1.5 ) ############################################################################### # plot the confusion matrix names = ["audio left", "audio right", "vis left", "vis right"] plot_confusion_matrix(labels, pr, names) plt.show()
def _init(self): self.n_components = self.params.get('n_components', 3) self.pipeline = make_pipeline(XdawnCovariances( self.n_components), TangentSpace(metric='riemann'), LogisticRegression())