def main_EIV_single_from_template(myfilename, template_path): # x_test,y_test,e_test,t_test = make_epochs_EIV(myfilename, chnames, bandpass = (1.0,20.0), filtre_order = 2 ,delta=0.6, target=[2], nontarget=[1]) centroids_train = np.load(template_path + 'Centroids_List.npy') Covmats_Dict = np.load(template_path + 'Covmats_Dict.npy') Covmats_Dict = Covmats_Dict.item() chnames = Covmats_Dict['channel names'] ERP_train = np.load(template_path + 'ERP_Array.npy') erp_train = ERPCovariances(estimator='cov') erp_train.P = ERP_train # X_test = erp_train.transform(x_test) r_TNT_mu_List = np.load(template_path + 'rTNT_mu.npy') r_TNT_var_List = np.load(template_path + 'rTNT_var.npy') data, labels, event, target = get_data_from_csv_EIV(myfilename=myfilename, chnames=chnames) mean, var = generic_test_loop(data, labels, event, ERP_train, centroids_train, r_TNT_mu_List, r_TNT_var_List, column_number=7, nb_repetitions=4, items_list=[1, 2, 3, 4, 5, 6, 7], visu=False, flashmode='EIV') return mean, var
def ml_classifier(inputs, targets, classifier=None, pipeline=None): """Uses sklearn to fit a model given inputs and targets Args: inputs: list containing (N trials * M channels) data segments of length(number of features). targets: list containing (N trials * M channels) of marker data (0 or 1). classifier: pre-trained lda classifier; if None train from scratch pipeline: name of pipeline to create if classifier is None Returns: classifier: classifier object """ pipeline_dict = { 'vect_lr': make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()), 'vecct_reglda': make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'xdawn_reglda': make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'erpcov_ts': make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()), 'erpcov_mdm': make_pipeline(ERPCovariances(), MDM()) } if not classifier and pipeline: classifier = pipeline_dict[pipeline.lower()] classifier.fit(inputs, targets) return classifier
def test_erp_covariances_classes(rndstate, get_labels): n_matrices, n_channels, n_times, n_classes = 4, 3, 100, 2 x = rndstate.randn(n_matrices, n_channels, n_times) labels = get_labels(n_matrices, n_classes) cov = ERPCovariances(classes=[0]) covmats = cov.fit_transform(x, labels) assert covmats.shape == (n_matrices, 2 * n_channels, 2 * n_channels) assert is_spsd(covmats)
def N170_test(session_data): markers = N170_MARKERS epochs = get_session_erp_epochs(session_data, markers) conditions = OrderedDict() for i in range(len(markers)): conditions[markers[i]] = [i+1] clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM'] # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] print('Calcul in progress...') for m in clfs: try: res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', cv=cv, n_jobs=-1) auc.extend(res) methods.extend([m]*len(res)) except Exception: print("exception") ## Plot Decoding Results results = pd.DataFrame(data=auc, columns=['AUC']) results['Method'] = methods n_row,n_column = results.shape auc_means = [] for method in methods_list: auc = [] for i in range(n_row): if results.loc[i,'Method']== method: auc.append(results.loc[i,'AUC']) auc_means.append(np.mean(auc)) counter = 0 for i in range(len(methods_list)): color = 'green' if auc_means[i]>=0.7 else 'red' counter = counter +1 if auc_means[i]>=0.7 else counter return counter > 0, counter
def test_ERPcovariances(): """Test fit ERPCovariances""" x = np.random.randn(10, 3, 100) labels = np.array([0, 1]).repeat(5) cov = ERPCovariances() cov.fit_transform(x, labels) cov = ERPCovariances(classes=[0]) cov.fit_transform(x, labels) # assert raise svd assert_raises(TypeError, ERPCovariances, svd='42') cov = ERPCovariances(svd=1)
def test_from_cross_template_P300(template_path, subject_path, test_chnames, flashmode='RoCo', nb_targets=180, visu=False): T = 0 NT = 1 ERP = np.load(template_path + 'ERP_Array.npy') Centroids_List = np.load(template_path + 'Centroids_List.npy') mu_TNT = np.load(template_path + 'rTNT_mu.npy') sigma_TNT = np.load(template_path + 'rTNT_var.npy') data, labels, event = get_data_from_csv_EIV( myfilename=subject_path + '-signals.csv', markersfile=subject_path + 'markers.csv', chnames=test_chnames) erp = ERPCovariances() erp.P = ERP erp.estimator = 'cov' X = erp.transform(data) train_NaiveBayes = R_TNT_NaiveBayes(targets=[ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '_' ], mu_TNT=mu_TNT, sigma_TNT=sigma_TNT, class_prior=None) dist = [ np.array([distance(x, Centroids_List[l]) for i, x in enumerate(X)]) for l in [T, NT] ] r_TNT = np.array(np.log(dist[0] / dist[1])) mean, var = test_loop_P300(r_TNT_test=r_TNT, y_test=labels, e_test=event, train_NaiveBayes=train_NaiveBayes, T=0, NT=1, flashmode=flashmode, visu=visu, nb_targets=nb_targets) return mean, var
def test_erp_covariances(estimator, svd, rndstate, get_labels): """Test fit ERPCovariances""" n_classes, n_matrices, n_channels, n_times = 2, 4, 3, 100 x = rndstate.randn(n_matrices, n_channels, n_times) labels = get_labels(n_matrices, n_classes) cov = ERPCovariances(estimator=estimator, svd=svd) covmats = cov.fit_transform(x, labels) if svd is None: covsize = (n_classes + 1) * n_channels else: covsize = n_classes * svd + n_channels assert cov.get_params() == dict(classes=None, estimator=estimator, svd=svd) assert covmats.shape == (n_matrices, covsize, covsize) assert is_spsd(covmats)
def test_ERPcovariances(): """Test fit ERPCovariances""" x = np.random.randn(10, 3, 100) labels = np.array([0, 1]).repeat(5) cov = ERPCovariances() cov.fit_transform(x, labels) cov = ERPCovariances(classes=[0]) cov.fit_transform(x, labels) # assert raise svd assert_raises(TypeError, ERPCovariances, svd='42') cov = ERPCovariances(svd=2) assert_equal(cov.get_params(), dict(classes=None, estimator='scm', svd=2)) cov.fit_transform(x, labels)
def test_erp_covariances_svd_error(rndstate, get_labels): """ assert raise svd """ n_matrices, n_channels, n_times, n_classes = 4, 3, 50, 2 x = rndstate.randn(n_matrices, n_channels, n_times) labels = get_labels(n_matrices, n_classes) with pytest.raises(TypeError): ERPCovariances(svd="42").fit(x, labels)
def erpcov_ts_lr(): """Obtains Riemannian features and classifies them with logregression""" return make_pipeline( ERPCovariances(estimator="oas"), TangentSpace(), LogisticRegression(solver="liblinear", C=1.0, class_weight="balanced", penalty="l1"), )
def test_ERPcovariances(): """Test fit ERPCovariances""" x = np.random.randn(10,3,100) labels = np.array([0,1]).repeat(5) cov = ERPCovariances() cov.fit_transform(x,labels) cov = ERPCovariances(classes=[0]) cov.fit_transform(x,labels) # assert raise svd assert_raises(TypeError,ERPCovariances,svd='42') cov = ERPCovariances(svd=1)
def predict_ERP_centroids(x, y, metric='riemann', ERP_bloc=None, T=0, NT=1): """Helper to predict the r_TNT for a new set of trials. Parameters ---------- x : ndarray, shape (n_trials, n_channels, n_times) y : ndarray, shape (,n_trials) ERP_bloc : list with 0 or 1 for the class in the ERP Returns ------- erp : the ERPCovariance object with erp.P an ndarray, shape (n_channels*len(ERP_bloc), n_times) centroids : list of the two centers of classe which are both ndarray, shape (n_channels*len(ERP_bloc), n_channels*len(ERP_bloc)) X : ndarray, shape (n_trials, n_channels*len(ERP_bloc), n_channels*len(ERP_bloc)), the set of super covariance matrices of set signals given in input """ classes = [T, NT] erp = ERPCovariances(classes=ERP_bloc, estimator='cov') erp.fit(X=x, y=y) X = erp.transform(X=x) centroids = [ mean_covariance(X[y == l, :, :], metric=metric) for l in classes ] return erp, centroids, X
def erp_cov_vr_pc(X_training, labels_training, X_test, labels_test, class_name, class_info): # estimate the extended ERP covariance matrices with Xdawn erpc = ERPCovariances(classes=[class_info[class_name]], estimator='lwf') erpc.fit(X_training, labels_training) covs_training = erpc.transform(X_training) covs_test = erpc.transform(X_test) # get the AUC for the classification clf = MDM() clf.fit(covs_training, labels_training) labels_pred = clf.predict(covs_test) return roc_auc_score(labels_test, labels_pred)
def get_sourcetarget_split_p300(source, target, ncovs_train): X_source = source['epochs'] y_source = source['labels'].flatten() covs_source = ERPCovariances(classes=[2], estimator='lwf').fit_transform( X_source, y_source) source = {} source['covs'] = covs_source source['labels'] = y_source X_target = target['epochs'] y_target = target['labels'].flatten() sel = np.arange(len(y_target)) np.random.shuffle(sel) X_target = X_target[sel] y_target = y_target[sel] idx_erps = np.where(y_target == 2)[0][:ncovs_train] idx_rest = np.where( y_target == 1)[0][:ncovs_train * 5] # because there's one ERP in every 6 flashes idx_train = np.concatenate([idx_erps, idx_rest]) idx_test = np.array( [i for i in range(len(y_target)) if i not in idx_train]) erp = ERPCovariances(classes=[2], estimator='lwf') erp.fit(X_target[idx_train], y_target[idx_train]) target_train = {} covs_target_train = erp.transform(X_target[idx_train]) y_target_train = y_target[idx_train] target_train['covs'] = covs_target_train target_train['labels'] = y_target_train target_test = {} covs_target_test = erp.transform(X_target[idx_test]) y_target_test = y_target[idx_test] target_test['covs'] = covs_target_test target_test['labels'] = y_target_test return source, target_train, target_test
def distribution_single_from_template(test_sub, test_sess, template_path, database): x_test, y_test, e_test, t_test, _, _ = database.subjects[ test_sub].get_data([test_sess]) ERP_train = np.load(template_path + 'ERP_Array.npy') erp_train = ERPCovariances(estimator='cov') erp_train.P = ERP_train X_test = erp_train.transform(x_test) centroids_train = np.load(template_path + 'Centroids_List.npy') erp_train = ERPCovariances(estimator='cov') erp_train.P = ERP_train r_TNT_test = predict_R_TNT(X=X_test, centroids_list=centroids_train) return r_TNT_test, y_test
epochs = Epochs(raw, events=events, event_id=event_id, tmin=-0.1, tmax=0.8, baseline=None, reject={'eeg': 75e-6}, preload=True, verbose=False, picks=[0,1,2,3]) print('sample drop %: ', (1 - len(epochs.events)/len(events)) * 100) epochs ################################################################################################### # Run classification # ---------------------------- clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline
def test_ERPcovariances(): """Test fit ERPCovariances""" x = np.random.randn(10, 3, 100) labels = np.array([0, 1]).repeat(5) cov = ERPCovariances() cov.fit_transform(x, labels) cov = ERPCovariances(classes=[0]) cov.fit_transform(x, labels) # assert raise svd assert_raises(TypeError, ERPCovariances, svd='42') cov = ERPCovariances(svd=1) assert_equal(cov.get_params(), dict(classes=None, estimator='scm', svd=1))
return np.reshape(X, (X.shape[0], -1)) ############################################################################## # Create pipelines # ---------------- # Pipelines must be a dict of sklearn pipeline transformer. pipelines = {} # we have to do this because the classes are called 'Target' and 'NonTarget' # but the evaluation function uses a LabelEncoder, transforming them # to 0 and 1 labels_dict = {'Target': 1, 'NonTarget': 0} pipelines['RG + LDA'] = make_pipeline( XdawnCovariances( nfilter=2, classes=[ labels_dict['Target']], estimator='lwf', xdawn_estimator='lwf'), TangentSpace(), LDA(solver='lsqr', shrinkage='auto')) pipelines['Xdw + LDA'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'), Vectorizer(), LDA(solver='lsqr', shrinkage='auto')) pipelines['ERPCov + TS'] = make_pipeline(ERPCovariances(classes=[0, 1], estimator='oas', svd=None), TangentSpace(metric='riemann'), LogisticRegression(solver='lbfgs'))
# ---------------------------- clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['Xdawn + RegLDA'] = make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(), MDM()) # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=10, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = []
}, ), 'CSP LDA': ( make_pipeline(CSP(), LDA(shrinkage='auto', solver='eigen')), { 'csp__n_components': (6, 9, 13), 'csp__cov_est': ('concat', 'epoch') }, ), 'Xdawn LDA': ( make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')), {}, ), 'ERPCov TS LR': ( make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()), { 'erpcovariances__estimator': ('lwf', 'oas') }, ), 'ERPCov MDM': ( make_pipeline(ERPCovariances(), MDM()), { 'erpcovariances__estimator': ('lwf', 'oas') }, ), } def crossvalidate_record(record, clfs=clfs, scores=scores):
labels = LabelEncoder().fit_transform(labels) kf = KFold(n_splits = 6) repetitions = [1, 2] auc = [] blocks = np.arange(1, 12+1) for train_idx, test_idx in kf.split(np.arange(12)): # split in training and testing blocks X_training, labels_training, _ = get_block_repetition(X, labels, meta, blocks[train_idx], repetitions) X_test, labels_test, _ = get_block_repetition(X, labels, meta, blocks[test_idx], repetitions) # estimate the extended ERP covariance matrices with Xdawn dict_labels = {'Target':1, 'NonTarget':0} erpc = ERPCovariances(classes=[dict_labels['Target']], estimator='lwf') erpc.fit(X_training, labels_training) covs_training = erpc.transform(X_training) covs_test = erpc.transform(X_test) # get the AUC for the classification clf = MDM() clf.fit(covs_training, labels_training) labels_pred = clf.predict(covs_test) auc.append(roc_auc_score(labels_test, labels_pred)) # stock scores scores_subject.append(np.mean(auc)) scores.append(scores_subject)
def erp_cov(X, y, class_name, class_info): c = __get__proto__class__(class_name, class_info) skf = StratifiedKFold(n_splits=5) clf = make_pipeline(ERPCovariances(estimator='lwf', classes=c), MDM()) return cross_val_score(clf, X, y, cv=skf, scoring='roc_auc').mean()
events, event_id, tmin=0.0, tmax=0.8, baseline=None, verbose=False, preload=True) epochs.pick_types(eeg=True) # get trials and labels X = epochs.get_data() y = epochs.events[:, -1] y = y - 1 # cross validation skf = StratifiedKFold(n_splits=5) clf = make_pipeline(ERPCovariances(estimator='lwf', classes=[1]), MDM()) scr[subject] = cross_val_score(clf, X, y, cv=skf, scoring='roc_auc').mean() # print results of classification print('subject', subject) print('mean AUC :', scr[subject]) filename = './classification_scores.pkl' joblib.dump(scr, filename) with open('classification_scores.txt', 'w') as the_file: for subject in scr.keys(): the_file.write('subject ' + str(subject).zfill(2) + ' :' + ' {:.2f}'.format(scr[subject]) + '\n')
verbose=False, picks=[0, 1, 2, 3]) print('sample drop %: ', (1 - len(epochs.events) / len(events)) * 100) epochs ################################################################################################### # Run classification # ---------------------------- clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42)
def decode(epochs, get_y_label_func, epoch_filter=None, decoding_method='standard', sliding_window_size=None, sliding_window_step=None, n_jobs=multiprocessing.cpu_count(), equalize_event_counts=True, only_fit=False, generalize_across_time=True): """ Basic flow for decoding """ config = dict(equalize_event_counts=equalize_event_counts, only_fit=only_fit, sliding_window_size=sliding_window_size, sliding_window_step=sliding_window_step, decoding_method=decoding_method, generalize_across_time=generalize_across_time, epoch_filter=str(epoch_filter)) if epoch_filter is not None: epochs = epochs[epoch_filter] #-- Classify epochs into groups (training epochs) y_labels = get_y_label_func(epochs) if equalize_event_counts: epochs.events[:, 2] = y_labels epochs.event_id = {str(label): label for label in np.unique(y_labels)} min_n_items_per_y_label = min( [len(epochs[cond]) for cond in epochs.event_id.keys()]) print("\nEqualizing the number of epochs to %d per condition..." % min_n_items_per_y_label) epochs.equalize_event_counts(epochs.event_id.keys()) y_labels = epochs.events[:, 2] print("The epochs were classified into %d groups:" % len(set(y_labels))) for g in set(y_labels): print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g))) #-- Create the decoding pipeline print("Creating the classification pipeline...") epochs_data = epochs.get_data() preprocess_pipeline = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: clf = make_pipeline(StandardScaler(), Ridge()) else: clf = make_pipeline( StandardScaler(), svm.SVC(C=1, kernel='linear', class_weight='balanced')) if 'raw' not in decoding_method: assert sliding_window_size is not None assert sliding_window_step is not None preprocess_pipeline = \ make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True)) elif decoding_method == 'ERP_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(20), average=False), ERPCovariances( estimator='lwf'), # todo how to apply sliding window? CSP(30, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) # todo why logistic regression? elif decoding_method == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) elif decoding_method == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) else: raise Exception('Unknown decoding method: {:}'.format(decoding_method)) print('\nDecoding pipeline:') for i in range(len(clf.steps)): print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1])) if preprocess_pipeline is not None: print('\nApplying the pre-processing pipeline:') for i in range(len(preprocess_pipeline.steps)): print('Step #{:}: {:}'.format(i + 1, preprocess_pipeline.steps[i][1])) epochs_data = preprocess_pipeline.fit_transform(epochs_data) if only_fit: #-- Only fit the decoders procedure = 'only_fit' scores = None cv = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = clf estimator.fit(X=epochs_data, y=y_labels) else: #-- Classify & score -- cross-validation procedure = 'fit_and_score' print( "\nCreating a classifier and calculating accuracy scores (this may take some time)..." ) cv = StratifiedKFold(n_splits=5) if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) scores = cross_val_multiscore(estimator=estimator, X=epochs_data, y=np.array(y_labels), cv=cv) else: scores = _run_cross_validation(X=epochs_data, y=np.array(y_labels), clf=clf, cv=cv) estimator = 'None' # Estimator is not defined in the case of Riemannian decoding times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2]) return dict(procedure=procedure, estimator=estimator, scores=scores, pipeline=clf, preprocess=preprocess_pipeline, cv=cv, times=times, config=config)
def pyR_decoding_on_full_epochs(X, y, plot_conf_matrix=0, class_names=None, test_size=0.2, n_splits=5, classifier='ERP_cov'): """ This function decodes on the full epoch using the pyRiemannian decoder cf https://github.com/Team-BK/Biomag2016/blob/master/Final_Submission.ipynb Parameters --------- X : data extracted from the epochs provided to the decoder y : categorical variable (i.e. discrete but it can be more then 2 categories) plot_confusion_matrix : set to 1 if you wanna see the confusion matrix class_names: needed for the legend if confusion matrices are plotted ['cat1','cat2','cat3'] test_size : proportion of the data on which you wanna test the decoder n_splits : when calculating the score, number of cross-validation folds classifier : set it to 'ERP_cov', 'Xdawn_cov' or 'Hankel_cov' depending on the classification you want to do. Returns: scores, y_test, y_pred, cnf_matrix or just scores if you don't want the confusion matrix ------- """ # ------- define the classifier ------- if classifier == 'ERP_cov': spatial_filter = UnsupervisedSpatialFilter(PCA(20), average=False) ERP_cov = ERPCovariances(estimator='lwf') CSP_30 = CSP(30, log=False) tang = TangentSpace('logeuclid') clf = make_pipeline(spatial_filter, ERP_cov, CSP_30, tang, LogisticRegression('l2')) if classifier == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) if classifier == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4343) y = np.asarray(y) scores = [] for train_index, test_index in cv.split(X, y): print(train_index) print(test_index) print('we are in the CV loop') X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Train on X_train, y_train clf.fit(X_train, y_train) # Predict the category on X_test y_pred = clf.predict(X_test) scores.append(accuracy_score(y_true=y_test, y_pred=y_pred)) scores = np.asarray(scores) if plot_conf_matrix == 1: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=7, stratify=y) print('train and test have been split') y_pred = clf.fit(X_train, y_train).predict(X_test) # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) np.set_printoptions(precision=2) print(cnf_matrix) # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix') plt.show() return scores, y_test, y_pred, cnf_matrix return scores, y_test, y_pred, cnf_matrix
def test_erp_covariances_svd_error(rndstate): # assert raise svd with pytest.raises(TypeError): ERPCovariances(svd="42")