def test_confusion_matrix(): """Test confusion_matrix""" target = np.array([0, 1] * 10) preds = np.array([0, 1] * 10) with pytest.warns(DeprecationWarning, match="plot_confusion_matrix is deprecated"): plot_confusion_matrix(target, preds, ["a", "b"])
X_train = X_train.reshape(X_train.shape[0], chans, samples) X_test = X_test.reshape(X_test.shape[0], chans, samples) # train a classifier with xDAWN spatial filtering + Riemannian Geometry (RG) # labels need to be back in single-column format history = clf.fit(X_train, Y_train.argmax(axis=-1)) preds_rg = clf.predict(X_test) # Printing the results acc2 = np.mean(preds_rg == Y_test.argmax(axis=-1)) print("Classification accuracy: %f " % (acc2)) # plot the confusion matrices for both classifiers names = ['audio left', 'audio right', 'vis left', 'vis right'] plt.figure(0) plot_confusion_matrix(preds, Y_test.argmax(axis=-1), names, title='EEGNet-8,2') plt.figure(1) plot_confusion_matrix(preds_rg, Y_test.argmax(axis=-1), names, title='xDAWN + RG') fig, loss_ax = plt.subplots() acc_ax = loss_ax.twinx() loss_ax.plot(hist.history['loss'], 'y', label='train loss') loss_ax.plot(hist.history['val_loss'], 'r', label='val loss') acc_ax.plot(hist.history['accuracy'], 'b', label='train acc') acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
def test_confusion_matrix(): """Test confusion_matrix""" target = np.array([0, 1] * 10) preds = np.array([0, 1] * 10) plot_confusion_matrix(target, preds, ['a', 'b'])
print('Multiclass classification with XDAWN + MDM') clf = make_pipeline(XdawnCovariances(n_components), MDM()) for train_idx, test_idx in cv.split(epochs_data): y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) pr[test_idx] = clf.predict(epochs_data[test_idx]) print(classification_report(labels, pr)) ############################################################################### # plot the spatial patterns xd = XdawnCovariances(n_components) xd.fit(epochs_data, labels) evoked.data = xd.Xd_.patterns_.T evoked.times = np.arange(evoked.data.shape[0]) evoked.plot_topomap( times=[0, n_components, 2 * n_components, 3 * n_components], ch_type='grad', colorbar=False, size=1.5) ############################################################################### # plot the confusion matrix names = ['audio left', 'audio right', 'vis left', 'vis right'] plot_confusion_matrix(labels, pr, names) plt.show()
epochs_data = epochs.get_data() print("Multiclass classification with XDAWN + MDM") clf = make_pipeline(XdawnCovariances(n_components), MDM()) for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) pr[test_idx] = clf.predict(epochs_data[test_idx]) print(classification_report(labels, pr)) ############################################################################### # plot the spatial patterns xd = XdawnCovariances(n_components) xd.fit(epochs_data, labels) evoked.data = xd.Xd.patterns_.T evoked.times = np.arange(evoked.data.shape[0]) evoked.plot_topomap( times=[0, n_components, 2 * n_components, 3 * n_components], ch_type="grad", colorbar=False, size=1.5 ) ############################################################################### # plot the confusion matrix names = ["audio left", "audio right", "vis left", "vis right"] plot_confusion_matrix(labels, pr, names) plt.show()
def __train_predefined_classifier( self, epochs, RG_Pipeline_Num=0, estimator='lwf', estimate_accuracy=False, random_state=44, class_names=['Rest', '13 Hz', '17 Hz', '21 Hz']): """ Train a predefined Riemannian Geometery pipeline on a single dataset using MNE and pyriemann. Parameters ---------- epochs : Epoch Object from MNE Epoch data held in an appropriate MNE format. This could be derived from mne.Epochs, or using the `build_epochs` command included in this script. RG_Pipeline_Num :int, optional Which pre-defined Riemannian Geometery pipeline to run for analysis. Can be 0,1,2,3: Pipeline 0: Covariance w/ estimator -> Riemannian KNN Pipeline 1: Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression LogReg uses a 'balanced' option for class weights, l2 penalty. Pipeline 2: XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag. Pipeline 3: Covariance w/ estimator -> MDM. Minimum distance to mean (MDM) is the main classification scheme. The default is 0. estimator : str, optional Covariance matrix estimator to use. For regularization consider 'lwf' or 'oas'. For complete lists, see pyriemann.utils.covariance. The default is 'lwf'. estimate_accuracy : bool, optional Estimate model accuracy roughly using a simple data-hold out train/test split. A default hold out of 75/25% train, test respectively is used. The default is False. random_state : int, optional The value to be used as the 'seed' for `numpy.random.RandomState`. See sklearn.model_selection.StratifiedKFold for more details. The default is 42. class_names : List, optional List of names for the confusion matrix plot. The default is ['Rest','13 Hz','17 Hz','21 Hz']. Returns ------- clf : Classifier object (sklearn) Returns a trained classifier object based on the given epoch data and Riemannian Geometry pipeline. See Also -------- mne.Epochs sklearn.model_selection.StratifiedKFold sklearn.linear_model.LogisticRegression pyriemann.estimation.Covariances pyriemann.estimation.XdawnCovariances pyriemann.spatialfilters.CSP pyriemann.tangentspace.TangentSpace pyriemann.classification.MDM pyriemann.classification.KNearestNeighbor (riemmanian KNN) """ # Run one of the pre-defined pipelines if RG_Pipeline_Num == 1: clf = make_pipeline( Covariances(estimator=estimator), CSP(log=False), TangentSpace(), LogisticRegression(class_weight='balanced', max_iter=500)) elif RG_Pipeline_Num == 2: clf = make_pipeline( XdawnCovariances(estimator=estimator, xdawn_estimator=estimator), TangentSpace(), LogisticRegression(l1, class_weight=None, solver='saga', multi_class='multinomial', max_iter=500)) elif RG_Pipeline_Num == 3: clf = make_pipeline(Covariances(estimator=estimator), MDM()) # This is the best so far else: print( "...Running a default pipeline for RG using Covariance, and KNN..." ) clf = make_pipeline(Covariances(estimator=estimator), riem_KNN()) # Get the labels for the data labels = epochs.events[:, -1] # Identify the data itself X_data = epochs.get_data() # Get the class names for the confusion matrix class_names = class_names # This is NOT a great measure of the model accuracy. This just will give you # a rough estimate of how it is performing within its own dataset. This # should be used sparingly! if estimate_accuracy is True: # Do a simple data-hold out for testing x_train, x_test, y_train, y_test = train_test_split( X_data, labels, test_size=0.25, random_state=random_state) clf_estimate = clf clf_estimate.fit(x_train, y_train) pred_vals = clf_estimate.predict(x_test) accuracy_val = np.mean(pred_vals == y_test) fig = plt.figure() plot_confusion_matrix(y_test, pred_vals, class_names) # Fit the data to the given epoch information clf.fit(X_data, labels) return clf
def __run_strat_validation_RG( self, epochs, n_strat_folds=5, shuffle=False, random_state=42, RG_Pipeline_Num=0, estimator='lwf', class_names=['Rest', '13 Hz', '17 Hz', '21 Hz'], accuracy_threshold=0.7): """ Complete a stratified cross-validation using Riemannian Geometery pipeline. Parameters ---------- epochs : Epoch Object from MNE Epoch data held in an appropriate MNE format. This could be derived from mne.Epochs, or using the `build_epochs` command included in this script. n_strat_folds : int, optional Number of folds for the stratified K-Fold cross-validation. This value should be chosen carefully to avoid unbalanced classes. The default is 5. shuffle : bool, optional Shuffle training set data. See sklearn.model_selection.StratifiedKFold for more details. The default is False. random_state : int, optional The value to be used as the 'seed' for `numpy.random.RandomState`. See sklearn.model_selection.StratifiedKFold for more details. The default is 42. RG_Pipeline_Num : int, optional Which pre-defined Riemannian Geometery pipeline to run for analysis. Can be 0,1,2,3: Pipeline 0: Covariance w/ estimator -> Riemannian KNN Pipeline 1: Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression LogReg uses a 'balanced' option for class weights, l2 penalty. Pipeline 2: XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag. Pipeline 3: Covariance w/ estimator -> MDM. Minimum distance to mean (MDM) is the main classification scheme. The default is 0. estimator : str, optional Covariance matrix estimator to use. For regularization consider 'lwf' or 'oas'. For complete lists, see pyriemann.utils.covariance. The default is 'lwf'. class_names : List, optional List of names for the confusion matrix plot. The default is ['Rest','13 Hz','17 Hz','21 Hz']. accuracy_threshold : float, optional Threshold for determining which folds are 'good' fits. Accuracy found above the threshold (e.g. 70% or greater) will be reported as good fit folds. The default is 0.7. Returns ------- DICT Dictionary of outputs are returned for the user. In order: Fold accuracy -'Fold Acc' Indices for `good` training folds > or = to accuracy_threshold value - 'Good Train Ind' Indices for `good` test folds > or = to given accuracy_threshold value - 'Good Test Ind' Indices for `bad` train folds < given accuracy_threshold value - 'Bad Train Ind' Indices for `bad` test folds < given accuracy_threshold value - 'Bad Test Ind' List of predicted classes from the RG Pipeline - 'Prediction List' List of true classes from the RG Pipeline - 'True Class List' See Also -------- mne.Epochs sklearn.model_selection.StratifiedKFold sklearn.linear_model.LogisticRegression pyriemann.estimation.Covariances pyriemann.estimation.XdawnCovariances pyriemann.spatialfilters.CSP pyriemann.tangentspace.TangentSpace pyriemann.classification.MDM pyriemann.classification.KNearestNeighbor (riemmanian KNN) """ # Set the stratified CV model cv_strat = StratifiedKFold( n_splits=n_strat_folds, shuffle=True, random_state=random_state ) # Requires us to input in the ylabels as well...need to figure out how to get this. # Run one of the pre-defined pipelines if RG_Pipeline_Num == 1: clf = make_pipeline( Covariances(estimator=estimator), CSP(log=False), TangentSpace(), LogisticRegression(class_weight='balanced', max_iter=500)) elif RG_Pipeline_Num == 2: clf = make_pipeline( XdawnCovariances(estimator=estimator, xdawn_estimator=estimator), TangentSpace(), LogisticRegression(penalty='elasticnet', class_weight=None, solver='saga', multi_class='multinomial', l1_ratio=0.5, max_iter=500)) elif RG_Pipeline_Num == 3: clf = make_pipeline(Covariances(estimator=estimator), MDM()) # This is the best so far else: print( "...Running a default pipeline for RG using Covariance, and KNN..." ) clf = make_pipeline(Covariances(estimator=estimator), riem_KNN()) # Get the labels for the data labels = epochs.events[:, -1] # Identify the data itself X_data = epochs.get_data() # Get the class names for the confusion matrix class_names = class_names # Make empty lists for each item in the stratified CV acc_list = [] preds_list = [] true_class_list = [] good_train_indx = [] good_test_indx = [] bad_train_indx = [] bad_test_indx = [] # For loop testing each iteration of the stratified cross-validation for train_idx, test_idx in cv_strat.split(X_data, labels): # Get the x_train and x_test data for this fold x_train, x_test = X_data[train_idx], X_data[test_idx] # Get the y_train and y_test data for this fold y_train, y_test = labels[train_idx], labels[test_idx] # Fit the classifier clf.fit(x_train, y_train) # Find the predicted value on the test data in this fold preds = clf.predict(x_test) # Save in list preds_list.append(preds) # Save the true class labels in a list for this fold true_class_list.append(y_test) # Find the accuracy on average from this prediction acc_mean = np.average(preds == y_test) # Save the accuracy to a list acc_list.append(acc_mean) # Find out where the 'Good' training folds are. (Greater than threshold) if acc_mean >= accuracy_threshold: print( "Train indices above accuracy threshold of " + str(accuracy_threshold * 100) + "% are: ", train_idx) print( "Test indices above accuracy threshold of " + str(accuracy_threshold * 100) + "% are: ", test_idx) good_train_indx.append(train_idx) good_test_indx.append(test_idx) # Find out where the 'Bad' training folds are. (Less than threshold) else: bad_train_indx.append(train_idx) bad_test_indx.append(test_idx) # Make a plot for the confusion matrix fig = plt.figure() plot_confusion_matrix(y_test, preds, class_names) # Print out the final results from across all folds on average print( "The overall accuracy with " + str(n_strat_folds) + "-fold stratified CV was: ", np.average(acc_list)) # Return output vals return dict({ 'Fold Acc': acc_list, 'Good Train Ind': good_train_indx, 'Good Test Ind': good_test_indx, 'Bad Train Ind': bad_train_indx, 'Bad Test Ind': bad_test_indx, 'Prediction List': preds_list, 'True Class List': true_class_list })
#------------------------------------------------------------------------------ ##Another method to plot confusion matrix #cnf = confusion_matrix(labels,preds) #plt.figure() #plot_confusion_matrix_melv(cnf,classes=names,title='confusion_matrix_without_normalization') # #plt.figure() #plot_confusion_matrix_melv(cnf, classes=names, normalize=True, # title='Normalized confusion matrix') #------------------------------------------------------------------------------ names = ['zero', 'one'] plot_confusion_matrix(preds, labels, names, title='Logistic Regression confusion matrix') print('Classification report: ') print(classification_report(labels, preds, target_names=names)) for name in ('patterns_', 'filters_'): # The `inverse_transform` parameter will call this method on any estimator # contained in the pipeline, in reverse order. coef = get_coef(clf, name) evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin) evoked.plot_topomap(title='EEG %s' % name[:-1], time_unit='s') #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #------------------------------------------------------------------------------
clf = make_pipeline(Covariances(), TangentSpace(metric='riemann'), LogisticRegression()) for train_idx, test_idx in cv.split(epochs_data): y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[:28], y_train[:min(28, len(y_train))]) preds[test_idx] = clf.predict(epochs_data[test_idx]) # clf.fit(epochs.get_data(), labels) # clf.predict(events) # Printing the results acc = np.mean(preds == labels) print("Classification accuracy: %f " % (acc)) names = ['resting-state', '13 Hz', '21 Hz', '17 Hz'] plot_confusion_matrix(preds, labels, names) plt.show() #from sklearn.linear_model import LogisticRegression #var = epochs.get_data() # cov_raw = scikitlearn.pipeline(Covariances(estimator='lwf').transform(var), TangentSpace().transform(epochs.get_data(var), logisticRegression.fit(x,y,weight)) # Tangent print("stop") # load your data # X = ... # your EEG data, in format Ntrials x Nchannels X Nsamples # y = ... # the labels # # estimate covariances matrices # cov = pyriemann.estimation.Covariances().fit_transform(X)
############################################################################### # Decoding in tangent space with a logistic regression n_components = 2 # pick some components # Define a monte-carlo cross-validation generator (reduce variance): cv = KFold(len(labels), 10, shuffle=True, random_state=42) epochs_data = epochs.get_data() clf = make_pipeline(XdawnCovariances(n_components), TangentSpace(metric='riemann'), LogisticRegression()) preds = np.zeros(len(labels)) for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) preds[test_idx] = clf.predict(epochs_data[test_idx]) # Printing the results acc = np.mean(preds == labels) print("Classification accuracy: %f " % (acc)) names = ['audio left', 'audio right', 'vis left', 'vis right'] plot_confusion_matrix(preds, labels, names) plt.show()