def test_csp_init(nfilter, metric, log, get_covmats, get_labels): n_classes, n_matrices, n_channels = 2, 6, 3 covmats = get_covmats(n_matrices, n_channels) labels = get_labels(n_matrices, n_classes) csp = CSP(nfilter=nfilter, metric=metric, log=log) csp.fit(covmats, labels) Xtr = csp.transform(covmats) if log: assert Xtr.shape == (n_matrices, n_channels) else: assert Xtr.shape == (n_matrices, n_channels, n_channels) assert csp.filters_.shape == (n_channels, n_channels) assert csp.patterns_.shape == (n_channels, n_channels)
def test_permutation_distance(): """Test one way permutation test""" covset = generate_cov(10, 5) labels = np.array([0, 1]).repeat(5) groups = np.array([0] * 5 + [1] * 5) with pytest.raises(ValueError): PermutationDistance(mode='badmode') # pairwise p = PermutationDistance(100, mode='pairwise') p.test(covset, labels) # with group p.test(covset, labels, groups=groups) # t-test p = PermutationDistance(100, mode='ttest') p.test(covset, labels) # f-test p = PermutationDistance(100, mode='ftest') p.test(covset, labels) # with custom estimator p = PermutationDistance(10, mode='pairwise', estimator=CSP(2, log=False)) p.test(covset, labels) # unique perms p = PermutationDistance(1000) p.test(covset, labels) p.plot(nbins=2)
def _train_raw(df): """Train a classifier on raw EEG data""" X, y = transform.signal_ndarray(df) # print(X, y) # Fixes non-convergence for binary classification dual = set(y) == 2 clfs: Dict[str, Pipeline] = { # These four are from https://neurotechx.github.io/eeg-notebooks/auto_examples/visual_ssvep/02r__ssvep_decoding.html "CSP + Cov + TS": make_pipeline( Covariances(), CSP(4, log=False), TangentSpace(), LogisticRegression(dual=dual), ), "Cov + TS": make_pipeline(Covariances(), TangentSpace(), LogisticRegression(dual=dual)), # Performs meh # "CSP + RegLDA": make_pipeline( # Covariances(), CSP(4), LDA(shrinkage="auto", solver="eigen") # ), # Performs badly # "Cov + MDM": make_pipeline(Covariances(), MDM()), } for name, clf in clfs.items(): logger.info(f"===== Training with {name} =====") _train(X, y, clf)
def test_permutation_pairwise_estimator(get_covmats, get_labels): """Test one way permutation with estimator""" n_matrices, n_channels, n_classes = 6, 3, 2 covmats = get_covmats(n_matrices, n_channels) labels = get_labels(n_matrices, n_classes) # with custom estimator p = PermutationDistance(10, mode="pairwise", estimator=CSP(2, log=False)) p.test(covmats, labels)
def RHvsLH_cross(out_dir, pipelines): name = 'RHvsLH_cross' datasets = utils.dataset_search('imagery', events=['right_hand', 'left_hand'], has_all_events=True, min_subjects=2, multi_session=False) print(datasets) pipelines = OrderedDict() pipelines['TS'] = make_pipeline(Covariances('oas'), TSclassifier()) pipelines['CSP+LDA'] = make_pipeline(Covariances('oas'), CSP(6), LDA()) pipelines['CSP+SVM'] = make_pipeline(Covariances('oas'), CSP(6), SVC()) # context = LeftRightImagery(pipelines, CrossSubjectEvaluation(n_jobs=10), datasets) results = context.process()
p_test = PermutationDistance(n_perms, metric='riemann', mode='ftest') p, F = p_test.test(covmats, labels) duration = time() - t_init fig, axes = plt.subplots(1, 1, figsize=[6, 3], sharey=True) p_test.plot(nbins=10, axes=axes) plt.title('F-test distance - %.2f sec.' % duration) print('p-value: %.3f' % p) sns.despine() plt.tight_layout() plt.show() ############################################################################### # Classification based permutation test ############################################################################### clf = make_pipeline(CSP(4), LogisticRegression()) t_init = time() p_test = PermutationModel(n_perms, model=clf, cv=3, scoring='roc_auc') p, F = p_test.test(covmats, labels) duration = time() - t_init fig, axes = plt.subplots(1, 1, figsize=[6, 3], sharey=True) p_test.plot(nbins=10, axes=axes) plt.title('Classification - %.2f sec.' % duration) print('p-value: %.3f' % p) sns.despine() plt.tight_layout() plt.show()
def decode(epochs, get_y_label_func, epoch_filter=None, decoding_method='standard', sliding_window_size=None, sliding_window_step=None, n_jobs=multiprocessing.cpu_count(), equalize_event_counts=True, only_fit=False, generalize_across_time=True): """ Basic flow for decoding """ config = dict(equalize_event_counts=equalize_event_counts, only_fit=only_fit, sliding_window_size=sliding_window_size, sliding_window_step=sliding_window_step, decoding_method=decoding_method, generalize_across_time=generalize_across_time, epoch_filter=str(epoch_filter)) if epoch_filter is not None: epochs = epochs[epoch_filter] #-- Classify epochs into groups (training epochs) y_labels = get_y_label_func(epochs) if equalize_event_counts: epochs.events[:, 2] = y_labels epochs.event_id = {str(label): label for label in np.unique(y_labels)} min_n_items_per_y_label = min( [len(epochs[cond]) for cond in epochs.event_id.keys()]) print("\nEqualizing the number of epochs to %d per condition..." % min_n_items_per_y_label) epochs.equalize_event_counts(epochs.event_id.keys()) y_labels = epochs.events[:, 2] print("The epochs were classified into %d groups:" % len(set(y_labels))) for g in set(y_labels): print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g))) #-- Create the decoding pipeline print("Creating the classification pipeline...") epochs_data = epochs.get_data() preprocess_pipeline = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: clf = make_pipeline(StandardScaler(), Ridge()) else: clf = make_pipeline( StandardScaler(), svm.SVC(C=1, kernel='linear', class_weight='balanced')) if 'raw' not in decoding_method: assert sliding_window_size is not None assert sliding_window_step is not None preprocess_pipeline = \ make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True)) elif decoding_method == 'ERP_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(20), average=False), ERPCovariances( estimator='lwf'), # todo how to apply sliding window? CSP(30, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) # todo why logistic regression? elif decoding_method == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) elif decoding_method == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) else: raise Exception('Unknown decoding method: {:}'.format(decoding_method)) print('\nDecoding pipeline:') for i in range(len(clf.steps)): print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1])) if preprocess_pipeline is not None: print('\nApplying the pre-processing pipeline:') for i in range(len(preprocess_pipeline.steps)): print('Step #{:}: {:}'.format(i + 1, preprocess_pipeline.steps[i][1])) epochs_data = preprocess_pipeline.fit_transform(epochs_data) if only_fit: #-- Only fit the decoders procedure = 'only_fit' scores = None cv = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = clf estimator.fit(X=epochs_data, y=y_labels) else: #-- Classify & score -- cross-validation procedure = 'fit_and_score' print( "\nCreating a classifier and calculating accuracy scores (this may take some time)..." ) cv = StratifiedKFold(n_splits=5) if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) scores = cross_val_multiscore(estimator=estimator, X=epochs_data, y=np.array(y_labels), cv=cv) else: scores = _run_cross_validation(X=epochs_data, y=np.array(y_labels), clf=clf, cv=cv) estimator = 'None' # Estimator is not defined in the case of Riemannian decoding times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2]) return dict(procedure=procedure, estimator=estimator, scores=scores, pipeline=clf, preprocess=preprocess_pipeline, cv=cv, times=times, config=config)
# Decoding # ---------------------------- # Next, we will use 4 different machine learning pipelines to classify the SSVEP based on the data we collected. The # - CSP + RegLDA : Common Spatial Patterns + Regularized Linear Discriminat Analysis. This is a very common EEG analysis pipeline. # - Cov + TS : Covariance + Tangent space mapping. One of the most reliable Riemannian geometry-based pipelines. # - Cov + MDM: Covariance + MDM. A very simple, yet effective (for low channel count), Riemannian geometry classifier. # - CSP + Cov + TS: Common Spatial Patterns + Covariance + Tangent spacem mapping. Riemannian pipeline with the standard CSP procedure beforehand # Evaluation is done through cross-validation, with area-under-the-curve (AUC) as metric (AUC is probably the best metric for binary and unbalanced classification problem) # Note: because we're doing machine learning here, the following cell may take a while to complete clfs = OrderedDict() clfs['CSP + RegLDA'] = make_pipeline(Covariances(), CSP(4), LDA(shrinkage='auto', solver='eigen')) clfs['Cov + TS'] = make_pipeline(Covariances(), TangentSpace(), LogisticRegression()) clfs['Cov + MDM'] = make_pipeline(Covariances(), MDM()) clfs['CSP + Cov + TS'] = make_pipeline(Covariances(), CSP(4, log=False), TangentSpace(), LogisticRegression()) # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] for m in clfs: print(m)
def __train_predefined_classifier( self, epochs, RG_Pipeline_Num=0, estimator='lwf', estimate_accuracy=False, random_state=44, class_names=['Rest', '13 Hz', '17 Hz', '21 Hz']): """ Train a predefined Riemannian Geometery pipeline on a single dataset using MNE and pyriemann. Parameters ---------- epochs : Epoch Object from MNE Epoch data held in an appropriate MNE format. This could be derived from mne.Epochs, or using the `build_epochs` command included in this script. RG_Pipeline_Num :int, optional Which pre-defined Riemannian Geometery pipeline to run for analysis. Can be 0,1,2,3: Pipeline 0: Covariance w/ estimator -> Riemannian KNN Pipeline 1: Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression LogReg uses a 'balanced' option for class weights, l2 penalty. Pipeline 2: XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag. Pipeline 3: Covariance w/ estimator -> MDM. Minimum distance to mean (MDM) is the main classification scheme. The default is 0. estimator : str, optional Covariance matrix estimator to use. For regularization consider 'lwf' or 'oas'. For complete lists, see pyriemann.utils.covariance. The default is 'lwf'. estimate_accuracy : bool, optional Estimate model accuracy roughly using a simple data-hold out train/test split. A default hold out of 75/25% train, test respectively is used. The default is False. random_state : int, optional The value to be used as the 'seed' for `numpy.random.RandomState`. See sklearn.model_selection.StratifiedKFold for more details. The default is 42. class_names : List, optional List of names for the confusion matrix plot. The default is ['Rest','13 Hz','17 Hz','21 Hz']. Returns ------- clf : Classifier object (sklearn) Returns a trained classifier object based on the given epoch data and Riemannian Geometry pipeline. See Also -------- mne.Epochs sklearn.model_selection.StratifiedKFold sklearn.linear_model.LogisticRegression pyriemann.estimation.Covariances pyriemann.estimation.XdawnCovariances pyriemann.spatialfilters.CSP pyriemann.tangentspace.TangentSpace pyriemann.classification.MDM pyriemann.classification.KNearestNeighbor (riemmanian KNN) """ # Run one of the pre-defined pipelines if RG_Pipeline_Num == 1: clf = make_pipeline( Covariances(estimator=estimator), CSP(log=False), TangentSpace(), LogisticRegression(class_weight='balanced', max_iter=500)) elif RG_Pipeline_Num == 2: clf = make_pipeline( XdawnCovariances(estimator=estimator, xdawn_estimator=estimator), TangentSpace(), LogisticRegression(l1, class_weight=None, solver='saga', multi_class='multinomial', max_iter=500)) elif RG_Pipeline_Num == 3: clf = make_pipeline(Covariances(estimator=estimator), MDM()) # This is the best so far else: print( "...Running a default pipeline for RG using Covariance, and KNN..." ) clf = make_pipeline(Covariances(estimator=estimator), riem_KNN()) # Get the labels for the data labels = epochs.events[:, -1] # Identify the data itself X_data = epochs.get_data() # Get the class names for the confusion matrix class_names = class_names # This is NOT a great measure of the model accuracy. This just will give you # a rough estimate of how it is performing within its own dataset. This # should be used sparingly! if estimate_accuracy is True: # Do a simple data-hold out for testing x_train, x_test, y_train, y_test = train_test_split( X_data, labels, test_size=0.25, random_state=random_state) clf_estimate = clf clf_estimate.fit(x_train, y_train) pred_vals = clf_estimate.predict(x_test) accuracy_val = np.mean(pred_vals == y_test) fig = plt.figure() plot_confusion_matrix(y_test, pred_vals, class_names) # Fit the data to the given epoch information clf.fit(X_data, labels) return clf
def __run_strat_validation_RG( self, epochs, n_strat_folds=5, shuffle=False, random_state=42, RG_Pipeline_Num=0, estimator='lwf', class_names=['Rest', '13 Hz', '17 Hz', '21 Hz'], accuracy_threshold=0.7): """ Complete a stratified cross-validation using Riemannian Geometery pipeline. Parameters ---------- epochs : Epoch Object from MNE Epoch data held in an appropriate MNE format. This could be derived from mne.Epochs, or using the `build_epochs` command included in this script. n_strat_folds : int, optional Number of folds for the stratified K-Fold cross-validation. This value should be chosen carefully to avoid unbalanced classes. The default is 5. shuffle : bool, optional Shuffle training set data. See sklearn.model_selection.StratifiedKFold for more details. The default is False. random_state : int, optional The value to be used as the 'seed' for `numpy.random.RandomState`. See sklearn.model_selection.StratifiedKFold for more details. The default is 42. RG_Pipeline_Num : int, optional Which pre-defined Riemannian Geometery pipeline to run for analysis. Can be 0,1,2,3: Pipeline 0: Covariance w/ estimator -> Riemannian KNN Pipeline 1: Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression LogReg uses a 'balanced' option for class weights, l2 penalty. Pipeline 2: XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag. Pipeline 3: Covariance w/ estimator -> MDM. Minimum distance to mean (MDM) is the main classification scheme. The default is 0. estimator : str, optional Covariance matrix estimator to use. For regularization consider 'lwf' or 'oas'. For complete lists, see pyriemann.utils.covariance. The default is 'lwf'. class_names : List, optional List of names for the confusion matrix plot. The default is ['Rest','13 Hz','17 Hz','21 Hz']. accuracy_threshold : float, optional Threshold for determining which folds are 'good' fits. Accuracy found above the threshold (e.g. 70% or greater) will be reported as good fit folds. The default is 0.7. Returns ------- DICT Dictionary of outputs are returned for the user. In order: Fold accuracy -'Fold Acc' Indices for `good` training folds > or = to accuracy_threshold value - 'Good Train Ind' Indices for `good` test folds > or = to given accuracy_threshold value - 'Good Test Ind' Indices for `bad` train folds < given accuracy_threshold value - 'Bad Train Ind' Indices for `bad` test folds < given accuracy_threshold value - 'Bad Test Ind' List of predicted classes from the RG Pipeline - 'Prediction List' List of true classes from the RG Pipeline - 'True Class List' See Also -------- mne.Epochs sklearn.model_selection.StratifiedKFold sklearn.linear_model.LogisticRegression pyriemann.estimation.Covariances pyriemann.estimation.XdawnCovariances pyriemann.spatialfilters.CSP pyriemann.tangentspace.TangentSpace pyriemann.classification.MDM pyriemann.classification.KNearestNeighbor (riemmanian KNN) """ # Set the stratified CV model cv_strat = StratifiedKFold( n_splits=n_strat_folds, shuffle=True, random_state=random_state ) # Requires us to input in the ylabels as well...need to figure out how to get this. # Run one of the pre-defined pipelines if RG_Pipeline_Num == 1: clf = make_pipeline( Covariances(estimator=estimator), CSP(log=False), TangentSpace(), LogisticRegression(class_weight='balanced', max_iter=500)) elif RG_Pipeline_Num == 2: clf = make_pipeline( XdawnCovariances(estimator=estimator, xdawn_estimator=estimator), TangentSpace(), LogisticRegression(penalty='elasticnet', class_weight=None, solver='saga', multi_class='multinomial', l1_ratio=0.5, max_iter=500)) elif RG_Pipeline_Num == 3: clf = make_pipeline(Covariances(estimator=estimator), MDM()) # This is the best so far else: print( "...Running a default pipeline for RG using Covariance, and KNN..." ) clf = make_pipeline(Covariances(estimator=estimator), riem_KNN()) # Get the labels for the data labels = epochs.events[:, -1] # Identify the data itself X_data = epochs.get_data() # Get the class names for the confusion matrix class_names = class_names # Make empty lists for each item in the stratified CV acc_list = [] preds_list = [] true_class_list = [] good_train_indx = [] good_test_indx = [] bad_train_indx = [] bad_test_indx = [] # For loop testing each iteration of the stratified cross-validation for train_idx, test_idx in cv_strat.split(X_data, labels): # Get the x_train and x_test data for this fold x_train, x_test = X_data[train_idx], X_data[test_idx] # Get the y_train and y_test data for this fold y_train, y_test = labels[train_idx], labels[test_idx] # Fit the classifier clf.fit(x_train, y_train) # Find the predicted value on the test data in this fold preds = clf.predict(x_test) # Save in list preds_list.append(preds) # Save the true class labels in a list for this fold true_class_list.append(y_test) # Find the accuracy on average from this prediction acc_mean = np.average(preds == y_test) # Save the accuracy to a list acc_list.append(acc_mean) # Find out where the 'Good' training folds are. (Greater than threshold) if acc_mean >= accuracy_threshold: print( "Train indices above accuracy threshold of " + str(accuracy_threshold * 100) + "% are: ", train_idx) print( "Test indices above accuracy threshold of " + str(accuracy_threshold * 100) + "% are: ", test_idx) good_train_indx.append(train_idx) good_test_indx.append(test_idx) # Find out where the 'Bad' training folds are. (Less than threshold) else: bad_train_indx.append(train_idx) bad_test_indx.append(test_idx) # Make a plot for the confusion matrix fig = plt.figure() plot_confusion_matrix(y_test, preds, class_names) # Print out the final results from across all folds on average print( "The overall accuracy with " + str(n_strat_folds) + "-fold stratified CV was: ", np.average(acc_list)) # Return output vals return dict({ 'Fold Acc': acc_list, 'Good Train Ind': good_train_indx, 'Good Test Ind': good_test_indx, 'Bad Train Ind': bad_train_indx, 'Bad Test Ind': bad_test_indx, 'Prediction List': preds_list, 'True Class List': true_class_list })
def test_CSP(): """Test CSP""" n_trials = 90 X = generate_cov(n_trials, 3) labels = np.array([0, 1, 2]).repeat(n_trials // 3) # Test Init csp = CSP() assert csp.nfilter == 4 assert csp.metric == 'euclid' assert csp.log csp = CSP(3, 'riemann', False) assert csp.nfilter == 3 assert csp.metric == 'riemann' assert not csp.log with pytest.raises(TypeError): CSP('foo') with pytest.raises(ValueError): CSP(metric='foo') with pytest.raises(TypeError): CSP(log='foo') # Test fit csp = CSP() csp.fit(X, labels % 2) # two classes csp.fit(X, labels) # 3 classes with pytest.raises(ValueError): csp.fit(X, labels * 0.) # 1 class with pytest.raises(ValueError): csp.fit(X, labels[:1]) # unequal # of samples with pytest.raises(TypeError): csp.fit(X, 'foo') # y must be an array with pytest.raises(TypeError): csp.fit('foo', labels) # X must be an array with pytest.raises(ValueError): csp.fit(X[:, 0], labels) with pytest.raises(ValueError): csp.fit(X, X) assert_array_equal(csp.filters_.shape, [X.shape[1], X.shape[1]]) assert_array_equal(csp.patterns_.shape, [X.shape[1], X.shape[1]]) # Test transform Xt = csp.transform(X) assert_array_equal(Xt.shape, [len(X), X.shape[1]]) with pytest.raises(TypeError): csp.transform('foo') with pytest.raises(ValueError): csp.transform(X[:, 1:, :]) # unequal # of chans csp.log = False Xt = csp.transform(X)
import logging import coloredlogs logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger() coloredlogs.install(level=logging.DEBUG) datasets = utils.dataset_search('imagery', events=['supination', 'hand_close'], has_all_events=False, min_subjects=2, multi_session=False) for d in datasets: d.subject_list = d.subject_list[:10] paradigm = ImageryNClass(2) context = WithinSessionEvaluation(paradigm=paradigm, datasets=datasets, random_state=42) pipelines = OrderedDict() pipelines['av+TS'] = make_pipeline(Covariances(estimator='oas'), TSclassifier()) pipelines['av+CSP+LDA'] = make_pipeline(Covariances(estimator='oas'), CSP(8), LDA()) results = context.process(pipelines, overwrite=True) analyze(results, './')
baseclf = make_pipeline( ElectrodeSelection(10, metric=dict(mean='logeuclid', distance='riemann')), TangentSpace('riemann'), LogisticRegression('l1')) array_clfs['Cosp'] = make_pipeline( CospCovariances(fs=1000, window=32, overlap=0.95, fmax=300, fmin=1), CospBoostingClassifier(baseclf)) array_clfs['HankelCov'] = make_pipeline( DownSampler(2), HankelCovariances(delays=[2, 4, 8, 12, 16], estimator='oas'), TangentSpace('logeuclid'), LogisticRegression('l1')) array_clfs['CSSP'] = make_pipeline( HankelCovariances(delays=[2, 4, 8, 12, 16], estimator='oas'), CSP(30), LogisticRegression('l1')) patients = dataframe1.PatientID.values index = array_clfs.keys() + ['Ensemble'] columns = ['ca', 'de', 'fp', 'ja', 'mv', 'wc', 'zt'] res_acc = pd.DataFrame(index=index, columns=columns) res_auc = pd.DataFrame(index=index, columns=columns) fnames = glob('./fhpred/data/*/*.mat') for fname in fnames: data = loadmat(fname) p = fname[-18:-16] clfs = deepcopy(array_clfs)
def pyR_decoding_on_full_epochs(X, y, plot_conf_matrix=0, class_names=None, test_size=0.2, n_splits=5, classifier='ERP_cov'): """ This function decodes on the full epoch using the pyRiemannian decoder cf https://github.com/Team-BK/Biomag2016/blob/master/Final_Submission.ipynb Parameters --------- X : data extracted from the epochs provided to the decoder y : categorical variable (i.e. discrete but it can be more then 2 categories) plot_confusion_matrix : set to 1 if you wanna see the confusion matrix class_names: needed for the legend if confusion matrices are plotted ['cat1','cat2','cat3'] test_size : proportion of the data on which you wanna test the decoder n_splits : when calculating the score, number of cross-validation folds classifier : set it to 'ERP_cov', 'Xdawn_cov' or 'Hankel_cov' depending on the classification you want to do. Returns: scores, y_test, y_pred, cnf_matrix or just scores if you don't want the confusion matrix ------- """ # ------- define the classifier ------- if classifier == 'ERP_cov': spatial_filter = UnsupervisedSpatialFilter(PCA(20), average=False) ERP_cov = ERPCovariances(estimator='lwf') CSP_30 = CSP(30, log=False) tang = TangentSpace('logeuclid') clf = make_pipeline(spatial_filter, ERP_cov, CSP_30, tang, LogisticRegression('l2')) if classifier == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) if classifier == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4343) y = np.asarray(y) scores = [] for train_index, test_index in cv.split(X, y): print(train_index) print(test_index) print('we are in the CV loop') X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Train on X_train, y_train clf.fit(X_train, y_train) # Predict the category on X_test y_pred = clf.predict(X_test) scores.append(accuracy_score(y_true=y_test, y_pred=y_pred)) scores = np.asarray(scores) if plot_conf_matrix == 1: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=7, stratify=y) print('train and test have been split') y_pred = clf.fit(X_train, y_train).predict(X_test) # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) np.set_printoptions(precision=2) print(cnf_matrix) # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix') plt.show() return scores, y_test, y_pred, cnf_matrix return scores, y_test, y_pred, cnf_matrix
def test_CSP(): """Test CSP""" n_trials = 90 X = generate_cov(n_trials, 3) labels = np.array([0, 1, 2]).repeat(n_trials // 3) # Test Init csp = CSP() assert_true(csp.nfilter == 4) assert_true(csp.metric == 'euclid') assert_true(csp.log) csp = CSP(3, 'riemann', False) assert_true(csp.nfilter == 3) assert_true(csp.metric == 'riemann') assert_true(not csp.log) assert_raises(TypeError, CSP, 'foo') assert_raises(ValueError, CSP, metric='foo') assert_raises(TypeError, CSP, log='foo') # Test fit csp = CSP() csp.fit(X, labels % 2) # two classes csp.fit(X, labels) # 3 classes assert_raises(ValueError, csp.fit, X, labels * 0.) # 1 class assert_raises(ValueError, csp.fit, X, labels[:1]) # unequal # of samples assert_raises(TypeError, csp.fit, X, 'foo') # y must be an array assert_raises(TypeError, csp.fit, 'foo', labels) # X must be an array assert_raises(ValueError, csp.fit, X[:, 0], labels) assert_raises(ValueError, csp.fit, X, X) assert_array_equal(csp.filters_.shape, [X.shape[1], X.shape[1]]) assert_array_equal(csp.patterns_.shape, [X.shape[1], X.shape[1]]) # Test transform Xt = csp.transform(X) assert_array_equal(Xt.shape, [len(X), X.shape[1]]) assert_raises(TypeError, csp.transform, 'foo') assert_raises(ValueError, csp.transform, X[:, 1:, :]) # unequal # of chans csp.log = False Xt = csp.transform(X)
from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV from pyriemann.spatialfilters import CSP from pyriemann.estimation import Covariances from sklearn.pipeline import make_pipeline parameters = {'kernel': ('linear', 'rbf'), 'C': [0.1, 1, 10]} clf = GridSearchCV(SVC(), parameters, cv=3) pipe = make_pipeline(Covariances('oas'), CSP(6), clf) # this is what will be loaded PIPELINE = { 'name': 'CSP + optSVM', 'paradigms': ['LeftRightImagery'], 'pipeline': pipe }
channels=['C3', 'C4']) results = context.process(suffix='C3C4') def run_analyses(out_dir): for suffix in ['', 'C3C4']: for ev in [CrossSubjectEvaluation, WithinSessionEvaluation]: analyze((ev, LeftRightImagery), out_dir, suffix=suffix, name='{}_{}'.format(ev.__name__, suffix)) if __name__ == '__main__': import mne # alter mne directories mne.utils.set_config('MNE_DATA', '/agbs/bcigroup/Studies/z008_ExternalDatasets/data') out_dir = os.path.dirname(os.path.realpath(__file__)) pipelines = OrderedDict() pipelines['TS'] = make_pipeline(Covariances('oas'), TSclassifier()) pipelines['CSP+LDA'] = make_pipeline(Covariances('oas'), CSP(8), LDA()) pipelines['CSP+SVM'] = make_pipeline(Covariances('oas'), CSP(8), SVC()) # # OnlyC3C4_cross(out_dir, pipelines) # OnlyC3C4_within(out_dir, pipelines) # # RHvsLH_cross(out_dir, pipelines) # RHvsLH_within(out_dir, pipelines) run_analyses('/agbs/bcigroup/_Share/Vinay/MOABB')
import os.path as osp import unittest from collections import OrderedDict import numpy as np from pyriemann.estimation import Covariances from pyriemann.spatialfilters import CSP from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.pipeline import make_pipeline from moabb.datasets.fake import FakeDataset from moabb.evaluations import evaluations as ev from moabb.paradigms.motor_imagery import FakeImageryParadigm pipelines = OrderedDict() pipelines["C"] = make_pipeline(Covariances("oas"), CSP(8), LDA()) dataset = FakeDataset(["left_hand", "right_hand"], n_subjects=2) if not osp.isdir(osp.join(osp.expanduser("~"), "mne_data")): os.makedirs(osp.join(osp.expanduser("~"), "mne_data")) class Test_WithinSess(unittest.TestCase): """This is actually integration testing but I don't know how to do this better. A paradigm implements pre-processing so it needs files to run MNE stuff on. To test the scoring and train/test we need to also have data and run it. Putting this on the future docket... """ def setUp(self): self.eval = ev.WithinSessionEvaluation(paradigm=FakeImageryParadigm(), datasets=[dataset])
from sklearn.svm import SVC from pyriemann.estimation import Covariances from pyriemann.spatialfilters import CSP from sklearn.model_selection import GridSearchCV from sklearn.feature_selection import SelectKBest, mutual_info_classif from moabb.pipelines.utils import FilterBank from sklearn.pipeline import make_pipeline import numpy as np parameters = {'C': np.logspace(-2, 2, 10)} clf = GridSearchCV(SVC(kernel='linear'), parameters) fb = FilterBank(make_pipeline(Covariances(estimator='oas'), CSP(nfilter=4))) pipe = make_pipeline(fb, SelectKBest(score_func=mutual_info_classif, k=10), clf) # this is what will be loaded PIPELINE = { 'name': 'FBCSP + optSVM', 'paradigms': ['FilterBankMotorImagery'], 'pipeline': pipe }
from moabb.evaluations import evaluations as ev from moabb.datasets.fake import FakeDataset from moabb.paradigms.motor_imagery import FakeImageryParadigm import unittest import os from pyriemann.spatialfilters import CSP from pyriemann.estimation import Covariances from sklearn.pipeline import make_pipeline from collections import OrderedDict from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA pipelines = OrderedDict() pipelines['C'] = make_pipeline(Covariances('oas'), CSP(8), LDA()) dataset = FakeDataset(['left_hand', 'right_hand'], n_subjects=2) class Test_WithinSess(unittest.TestCase): '''This is actually integration testing but I don't know how to do this better. A paradigm implements pre-processing so it needs files to run MNE stuff on. To test the scoring and train/test we need to also have data and run it. Putting this on the future docket... ''' def setUp(self): self.eval = ev.WithinSessionEvaluation(paradigm=FakeImageryParadigm(), datasets=[dataset]) def tearDown(self): path = self.eval.results.filepath
from pyriemann.estimation import Covariances from pyriemann.spatialfilters import CSP from sklearn.model_selection import GridSearchCV from sklearn.pipeline import make_pipeline from sklearn.svm import SVC parameters = {"kernel": ("linear", "rbf"), "C": [0.1, 1, 10]} clf = GridSearchCV(SVC(), parameters, cv=3) pipe = make_pipeline(Covariances("oas"), CSP(6), clf) # this is what will be loaded PIPELINE = {"name": "CSP + optSVM", "paradigms": ["LeftRightImagery"], "pipeline": pipe}