def run_time_decoding(subject, condition1, condition2, session=None): msg = f'Contrasting conditions: {condition1} – {condition2}' logger.info( gen_log_message(message=msg, step=8, subject=subject, session=session)) fname_in = BIDSPath(subject=subject, session=session, task=config.get_task(), acquisition=config.acq, run=None, recording=config.rec, space=config.space, suffix='epo', extension='.fif', datatype=config.get_datatype(), root=config.deriv_root, check=False) epochs = mne.read_epochs(fname_in) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] se = SlidingEstimator(make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)), scoring=config.decoding_metric, n_jobs=config.N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=config.decoding_n_splits) # let's save the scores now a_vs_b = f'{condition1}-{condition2}'.replace(op.sep, '') processing = f'{a_vs_b}+{config.decoding_metric}' processing = processing.replace('_', '-').replace('-', '') fname_td = fname_in.copy().update(suffix='decoding', processing=processing, extension='.mat') savemat(fname_td, {'scores': scores, 'times': epochs.times})
def run_time_decoding(subject, condition1, condition2): print("processing subject: %s (%s vs %s)" % (subject, condition1, condition2)) print("Processing subject: %s" % subject) meg_subject_dir = op.join(config.meg_dir, subject) extension = '-epo' fname_in = op.join(meg_subject_dir, config.base_fname.format(**locals())) print("Input: ", fname_in) epochs = mne.read_epochs(fname_in) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] # Use AUC because chance level is same regardless of the class balance se = SlidingEstimator(make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)), scoring=config.decoding_metric, n_jobs=config.N_JOBS) cv = StratifiedKFold(random_state=config.random_state, n_splits=config.decoding_n_splits) scores = cross_val_multiscore(se, X=X, y=y, cv=cv) # let's save the scores now a_vs_b = '%s_vs_%s' % (condition1, condition2) a_vs_b = a_vs_b.replace(op.sep, '') fname_td = op.join( meg_subject_dir, '%s_%s_%s_%s.mat' % (subject, config.study_name, a_vs_b, config.decoding_metric)) savemat(fname_td, {'scores': scores, 'times': epochs.times})
def run_time_decoding(subject_id, condition1, condition2): print("processing subject: %s (%s vs %s)" % (subject_id, condition1, condition2)) subject = "sub%03d" % subject_id data_path = os.path.join(meg_dir, subject) epochs = mne.read_epochs( os.path.join(data_path, '%s_highpass-%sHz-epo.fif' % (subject, l_freq))) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Let us restrict ourselves to the MEG channels, and also decimate to # make it faster (although we might miss some detail / alias) epochs.pick_types(meg=True).decimate(4, verbose='error') # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] # Use AUC because chance level is same regardless of the class balance se = SlidingEstimator(make_pipeline(StandardScaler(), LogisticRegression()), scoring='roc_auc', n_jobs=N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=StratifiedKFold()) # let's save the scores now a_vs_b = '%s_vs_%s' % (os.path.basename(condition1), os.path.basename(condition2)) fname_td = os.path.join( data_path, '%s_highpass-%sHz-td-auc-%s.mat' % (subject, l_freq, a_vs_b)) savemat(fname_td, {'scores': scores, 'times': epochs.times})
def mneClassify(self, sj, to_decode, conditions, time = [-0.3, 0.8]): ''' ''' clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1) # get eeg data eeg = [] for session in range(2): eeg.append(mne.read_epochs('/Users/dirk/Desktop/suppression/processed/subject-{}_ses-{}-epo.fif'.format(sj,session + 1))) times = eeg[0].times # select time window and electrodes s_idx, e_idx = eeg[0].time_as_index(time) picks = mne.pick_types(eeg[0].info, eeg=True, exclude='bads') eeg = np.vstack((eeg[0]._data,eeg[1]._data))[:,picks,:][:,:,s_idx:e_idx] # get behavior data with open('/Users/dirk/Desktop/suppression/beh/processed/subject-{}_all.pickle'.format(sj),'rb') as handle: beh = pickle.load(handle) plt.figure(figsize = (20,20)) for i,cnd in enumerate(conditions): X = eeg[beh['condition'] == cnd] y = beh[to_decode][beh['condition'] == cnd] scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) plt.plot(times[s_idx:e_idx],scores.mean(axis = 0), color = ['r','g','b','y'][i], label = cnd) plt.legend(loc = 'best') plt.savefig('/Users/dirk/Desktop/suppression/bdm/figs/{}_{}_bdm.pdf'.format(to_decode,sj)) plt.close()
def sliding_logreg_source(X, y, cross_val, return_clf=False): """Run a sliding estimator with Logistic Regression on source data. Parameters: ----------- X : np.array features. y : vector response vector. cross_val : cross validation object cross validation to adopt. return_clf : bool whether the clf object should be returned as well. Returns ------- score : float cross-validated AUC score clf : classifier object If return_clf == True, the classifier object will be returned, too. """ startt = time.time() # Model clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) sliding = SlidingEstimator(clf, scoring='roc_auc', n_jobs=1) print('Computing Logistic Regression.') score = cross_val_multiscore(sliding, X, y, cv=cross_val) endt = time.time() print('Done. Time elapsed for sliding estimator: %i seconds.' % (endt - startt)) if return_clf is True: return score, clf else: return score
k_folds = 1 # cv folds var_exp = 0.99 # percentage of variance scores_all = [] # score container for i in tqdm(range(1000)): labels_shuffled = np.random.permutation(labels) # generate iterator for cross validation kf = StratifiedKFold(n_splits=2, shuffle=True) cv_iter = kf.split(np.zeros(X.shape), labels_shuffled) # pipeline for classification cl = make_pipeline(RobustScaler(), PCA(n_components=var_exp), LinearSVC(max_iter=10000, dual=False, penalty="l1")) # temporal generalisation temp_genr = SlidingEstimator(cl, n_jobs=1, scoring="roc_auc") # cross validation scores = cross_val_multiscore(temp_genr, X, labels_shuffled, cv=cv_iter, n_jobs=-1) scores_all.append(scores) scores_all = np.vstack(scores_all) scores_path = op.join(output_dir, "reg_vs_odd_chance_level-{}.npy".format(subject)) np.save(scores_path, scores_all)
def run_time_decoding(subject_id, cond1, cond2, event_id): subject = "S%02d" % subject_id print subject, cond1, cond2 data_path = os.path.join('/home/claire/DATA/Data_Face_House/' + subject + '/EEG/') raw_fname = data_path + subject + '-raw.fif' event_fname = data_path + subject + '-eve.fif' tmin, tmax = -0.5, 1.5 raw = mne.io.read_raw_fif(raw_fname, preload=True) events = mne.read_events(event_fname) picks = mne.pick_types(raw.info, meg=False, eeg=True, stim=True, eog=True, exclude='bads') # Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=(None, 0.), preload=True, decim=4) epochs.pick_types(eeg=True, exclude='bads') #only look at occipital channels # select_chans = [u'Iz', u'Oz', u'O1', u'O2', u'O3', u'PO7', u'PO8', u'POz', u'PO1', u'PO3', u'PO2', u'PO4'] select_chans = [u'PO7', u'PO8'] #select_chans = [ u'Cz', u'FPz'] ch_names = [ch_name.replace('', '') for ch_name in select_chans] epochs.pick_types(eeg=True).pick_channels(ch_names) # average group of 4 trials data_cond1 = epochs['imag/face'].get_data() data_cond2 = epochs['imag/house'].get_data() mean_cond1 = [] ind_trial = 0 while ind_trial <= len(data_cond1) - 5: mean_cond1.append(mean(data_cond1[ind_trial:(ind_trial + 4)], 0)) print ind_trial ind_trial += 5 mean_cond2 = [] ind_trial = 0 while ind_trial <= len(data_cond2) - 5: mean_cond2.append(mean(data_cond2[ind_trial:(ind_trial + 4)], 0)) print ind_trial ind_trial += 5 X = [] # create variable for decoding X = mean_cond1 + mean_cond2 X = np.array(X) y = np.array([0] * len(mean_cond1) + [1] * len(mean_cond2)) # fit and time decoder #X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times #y = epochs.events[:, 2] # target: Audio left or right cv = StratifiedKFold(n_splits=3, shuffle=False) cv.get_n_splits(X, y) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc') #scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) scores = cross_val_multiscore(time_decod, X, y, cv=cv, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # save scores a_vs_b = '%s_vs_%s' % (cond1, cond2) print 'a_vs_b = %s' % a_vs_b fname_td = os.path.join( data_path, '%s-td-auc-%s_ave_4_trials_po7_po8.mat' % (subject, a_vs_b)) print 'Saving %s' % fname_td from scipy.io import savemat savemat(fname_td, {'scores': scores, 'times': epochs.times})
for name in ('patterns_', 'filters_'): # The `inverse_transform` parameter will call this method on any estimator # contained in the pipeline, in reverse order. coef = get_coef(clf, name, inverse_transform=True) evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin) evoked.plot_topomap(title='EEG %s' % name[:-1], time_unit='s') #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #Applying StandardScaler, LogisticRegression clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs')) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc') scores = cross_val_multiscore(time_decod, ica_data, labels, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # Plot fig, ax = plt.subplots() ax.plot(epochs.times, scores, label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding')
c1, c2 = list(eps.event_id.keys()) clf = make_pipeline( Scaler(eps.info), Vectorizer(), PCA(0.9999), LinearModel( LogisticRegression( solver=solver, penalty="l1", max_iter=1000, multi_class="auto", random_state=seed, )), ) time_decode = SlidingEstimator(clf, n_jobs=n_jobs, scoring="roc_auc", verbose=False) # K-fold cross-validation with ROC area under curve score if subject[4:] in ("101", "124a", "213", "301a") and interval != "All": use_splits = 3 else: use_splits = n_splits cv = StratifiedKFold(n_splits=use_splits, shuffle=True, random_state=seed) # Get the data and label X = eps.get_data() y = eps.events[:, -1] # AUC b/c chance level same regardless of the class balance score = np.mean(
#%% from mne.decoding import (GeneralizingEstimator, SlidingEstimator, Vectorizer, TimeFrequency, cross_val_multiscore, UnsupervisedSpatialFilter, Scaler, LinearModel, get_coef) from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline import sklearn as skl est = make_pipeline( StandardScaler(), LinearModel(LogisticRegression(class_weight='balanced', solver='lbfgs'))) sl = SlidingEstimator(est, scoring='roc_auc') def get_patterns(epochs): epochs.set_eeg_reference(ref_channels='average') sl.fit(epochs.get_data(), epochs.metadata.confdiff.to_numpy() <= 0) coef = mne.decoding.get_coef(sl, 'patterns_', inverse_transform=False) return mne.EvokedArray(-coef, epochs.info, tmin=epochs.times[0]) allpatterns = [] for ii, epochs in enumerate(data): print(ii, end=',') allpatterns.append(get_patterns(epochs)) scaling_dict = dict(scalings=dict(eeg=1e-6))
0.4, proj=True, baseline=(None, 0), preload=True, decim=4, reject=dict(mag=3e-12, grad=300e-12, eog=200e-6)) epochs.pick_types(meg='grad', exclude='bads') # Assign data and labels to X, y X = epochs.get_data() # has time as last extra dimension y = epochs.events[:, -1] # combine the processing steps into a scikit-learn pipeline object: scaler = StandardScaler() # scales the data svc = LinearSVC() clf = make_pipeline(scaler, svc) # To get results over time, we need a sliding estimator, which # will handle each time instant as a separate sample: slide_clf = SlidingEstimator(clf, n_jobs=4, scoring='accuracy', verbose=True) # Then, do cross-validation, fitting and scoring in one line: scores = cross_val_multiscore(slide_clf, X, y, cv=5, n_jobs=4) # Cross-validation returns several scores, average over those: score = np.mean(scores, axis=0) print('Mean performance over time: %0.1f%%' % (100 * np.mean(score), )) # plot a figure showing temporal evolution of decoding performance: plt.plot(epochs.times, score) plt.axhline(.25, color='k', linestyle='--', label='chance level') plt.xlabel('Time') plt.ylabel('Accuracy') plt.show()
scorer = make_scorer(get_scorer(scorer_spearman)) score = 'Spearman R' cv = KFold(5) if regressor == 'condition': y = binary_scaler(y) # set values to 0.0 and 1.0 clf = make_pipeline(StandardScaler(), LogisticRegression(solver='liblinear')) scorer = 'roc_auc' score = 'AUC' cv = StratifiedKFold(5) n_jobs = -1 # set up estimator, get scores if decode_using == 'spatial': gen = SlidingEstimator(n_jobs=n_jobs, scoring=scorer, base_estimator=clf) scores = cross_val_multiscore(gen, X, y, cv=cv) elif decode_using == 'temporal': gen = SlidingEstimator(n_jobs=n_jobs, scoring=scorer, base_estimator=clf) scores = cross_val_multiscore(gen, X, y, cv=cv) else: # scoring defaults to neg mean squared so set to scorer # shuffle must be true when binary values, otherwise fold will only have one value scores = cross_val_score(clf, X, y, scoring=scorer, #defaults to neg mean squared cv=KFold(5, shuffle=True))
# Loading data ------------------------------------------ idx = 3 running_name = f'MEG_S{idx:02d}' band_name = 'U07' worker = MEG_Worker(running_name=running_name) worker.pipeline(band_name=band_name) # %% # input('Press enter to escape.') # %% # MVPA ---------------------------------------------------------------- svm = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced') clf = make_pipeline(StandardScaler(), svm) estimator = SlidingEstimator(clf, n_jobs=n_jobs, scoring='f1', verbose=1) # %% def pair_X_y(epochs, label): X = epochs.get_data() num = X.shape[0] y = np.zeros(num, ) + label print(f'Got paired X: {X.shape} and y: {y.shape}') return X, y X1, y1 = pair_X_y(worker.clean_epochs, 1) X2, y2 = pair_X_y(worker.denoise_epochs['2'], 2)
y[np.where(pd.isnull(y))] = 'NaN' le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) # only consider non NaN values if ('cue_side' in analysis or 'cue_type' in analysis): sel = np.where(y != 0)[0] else: # When decoding memory at probe time, use only trial with # different probe compare to target if (epoch_type == 'Probe') & ('target' in analysis): sel = np.where((events['Change'] == 1) & (~np.isnan(y)))[0] else: sel = np.where(~np.isnan(y))[0] td = SlidingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) # run decoding cv = StratifiedKFold(8) scores = list() patterns = list() filters = list() for train, test in cv.split(X[sel], y[sel]): td.fit(X[sel][train], y[sel][train]) score = td.score(X[sel][test], y[sel][test]) scores.append(score) patterns.append(get_coef(td, 'patterns_', inverse_transform=True)) filters.append(get_coef(td, 'filters_', inverse_transform=True)) scores = np.mean(scores, axis=0) patterns = np.mean(patterns, axis=0) filters = np.mean(filters, axis=0)
for analysis in analyses: # define to-be-predicted values y = np.array(events[analysis]) # cue in WM task y_con = np.array(events_con[analysis]) # cue in control task (localizer) le = LabelEncoder() le.fit(y) y = le.transform(y) sel = np.where(y != 0)[0] le = LabelEncoder() le.fit(y_con) y_con = le.transform(y_con) sel_con = np.where(y_con != 0)[0] # Define estimators depending on the analysis clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict() est = SlidingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs) # Run decoding cv = StratifiedKFold(12) scores = list() scores_con = list() for train, test in cv.split(X[sel], y[sel]): est.fit(X[sel][train], y[sel][train]) # train during WM task score = est.score(X[sel][test], y[sel][test]) # test during WM task score_con = est.score(X_con[sel_con], y_con[sel_con]) # test during control task scores.append(score) scores_con.append(score_con) scores = np.mean(scores, axis=0) scores = np.reshape(scores, (n_freqs, n_times)) scores_con = np.mean(scores_con, axis=0) scores_con = np.reshape(scores_con, (n_freqs, n_times))
epochs.resample(50) X0 = epochs['motor'].get_data() # dat_file = ('S%02d_EB-epo' % (subj+1)) # dat_file = op.join(filepath, dat_file + '.fif') # epochs = mne.read_epochs(dat_file) # epochs.crop(tmin=0, tmax=1) # epochs.resample(50) X1 = epochs['non-motor'].get_data() X = np.concatenate((X0,X1),axis=0) y = np.concatenate((np.zeros(X0.shape[0]),np.ones(X1.shape[0])),axis=0) if t_by_t: clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs')) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True) scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) else: clf = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression(solver='lbfgs')) scores = cross_val_multiscore(clf, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits allscores[:,subj] = np.mean(scores, axis=0) if test_auc: for perm in np.arange(nperms): perm_y = np.random.permutation(y) if t_by_t: perm_clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs')) perm_time_decod = SlidingEstimator(perm_clf, n_jobs=1, scoring='roc_auc', verbose=True) perm_scores = cross_val_multiscore(perm_time_decod, X, perm_y, cv=5, n_jobs=1)
epochs_base_eq = epochs_base.copy().equalize_event_counts(['A', 'B'])[0] # Pick the EEG data for decoding (X) ... X = epochs_base_eq['A', 'B'].get_data() # ... and the categories / experimental conditions (y) y = epochs_base_eq['A', 'B'].events[:, 2] # Create the pipeline with linear support vector classification # and balanced classes. Alternatively, instead of equlizing trial # counts, class weights for each condition can be specified. clf = make_pipeline(Vectorizer(), StandardScaler(), LinearSVC(class_weight='balanced')) clf.fit(X, y) # Calculate scores for classification sl = SlidingEstimator(clf) scores_time_decoding = cross_val_multiscore(sl, X, y) # Append the results for each subject if file == './epochs/101_base-epo.fif': scores_td_base = scores_time_decoding else: scores_td_base = np.append(scores_td_base, scores_time_decoding, axis=0) # Again, calculate scores with a receiver operating curve gen = GeneralizingEstimator(clf, scoring='roc_auc') scores_gat = cross_val_multiscore(gen, X, y) if file == './epochs/101_base-epo.fif':
#y = np.array([0] * len(mean_cond1) + [1] * len(mean_cond2)) #----------------------------------# # Time decoding #----------------------------------# epochs = epochs['sq'] # fit and time decoder X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times le = LabelEncoder() y = le.fit_transform(epochs.events[:, 2]) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc') scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) class_balance = np.mean(y == y[0]) class_balance = max(class_balance, 1. - class_balance) # Plot fig, ax = plt.subplots() ax.plot(epochs.times, scores, label='score') ax.axhline(class_balance, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend()
def run_time_decoding(subject, condition1, condition2, session=None): msg = f'Contrasting conditions: {condition1} – {condition2}' logger.info(gen_log_message(message=msg, step=7, subject=subject, session=session)) fname_epochs = BIDSPath(subject=subject, session=session, task=config.get_task(), acquisition=config.acq, run=None, recording=config.rec, space=config.space, suffix='epo', extension='.fif', datatype=config.get_datatype(), root=config.deriv_root, check=False) epochs = mne.read_epochs(fname_epochs) if config.analyze_channels: # We special-case the average reference here to work around a situation # where e.g. `analyze_channels` might contain only a single channel: # `concatenate_epochs` below will then fail when trying to create / # apply the projection. We can avoid this by removing an existing # average reference projection here, and applying the average reference # directly – without going through a projector. if 'eeg' in config.ch_types and config.eeg_reference == 'average': epochs.set_eeg_reference('average') else: epochs.apply_proj() epochs.pick(config.analyze_channels) # We define the epochs and the labels if isinstance(config.conditions, dict): epochs_conds = [config.conditions[condition1], config.conditions[condition2]] cond_names = [condition1, condition2] else: epochs_conds = cond_names = [condition1, condition2] epochs_conds = [condition1, condition2] epochs = mne.concatenate_epochs([epochs[epochs_conds[0]], epochs[epochs_conds[1]]]) n_cond1 = len(epochs[epochs_conds[0]]) n_cond2 = len(epochs[epochs_conds[1]]) X = epochs.get_data() y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] clf = make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)) se = SlidingEstimator(clf, scoring=config.decoding_metric, n_jobs=config.N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=config.decoding_n_splits) # let's save the scores now a_vs_b = f'{cond_names[0]}+{cond_names[1]}'.replace(op.sep, '') processing = f'{a_vs_b}+{config.decoding_metric}' processing = processing.replace('_', '-').replace('-', '') fname_mat = fname_epochs.copy().update(suffix='decoding', processing=processing, extension='.mat') savemat(fname_mat, {'scores': scores, 'times': epochs.times}) fname_tsv = fname_mat.copy().update(extension='.tsv') tabular_data = pd.DataFrame( dict(cond_1=[cond_names[0]] * len(epochs.times), cond_2=[cond_names[1]] * len(epochs.times), time=epochs.times, mean_crossval_score=scores.mean(axis=0), metric=[config.decoding_metric] * len(epochs.times)) ) tabular_data.to_csv(fname_tsv, sep='\t', index=False)
# conditions and therefore figure out when the effect of interest happens. # # When working with linear models as estimators, this approach boils # down to estimating a discriminative spatial filter for each time instant. # # Temporal decoding # ^^^^^^^^^^^^^^^^^ # # We'll use a Logistic Regression for a binary classification as machine # learning model. # We will train the classifier on all left visual vs auditory trials on MEG clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs')) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True) scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # Plot fig, ax = plt.subplots() ax.plot(epochs.times, scores, label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding')
############################################################################### # Temporal decoding # ----------------- # # We'll use a Logistic Regression for a binary classification as machine # learning model. # We will train the classifier on all left visual vs auditory trials on MEG X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times y = epochs.events[:, 2] # target: Audio left or right clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True) scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # Plot fig, ax = plt.subplots() ax.plot(epochs.times, scores, label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding') plt.show()
import os import os.path as op import numpy as np import mne from mne.decoding import SlidingEstimator, cross_val_multiscore from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from config import * from basemy import * # make estimator clf = make_pipeline(StandardScaler(), LogisticRegression()) clf = SlidingEstimator(clf, scoring='roc_auc', n_jobs=-1) directory = thedirectory(path_analyzed_data, which_channels) directorydecod = thedirectorydecod(path_analyzed_data, 'results/decoding_together') for file_to_ana in files_to_ana: # ******** the codes ********* cow_codes_scr, textleft_codes, cow_codes, round_codes, left_codes, text_codes = thecodes( file_to_ana, decod_scr_with_trigger) if file_to_ana is 'Guided': for type_epo in ['cue', 'res', 'scr']: # for type_epo in ['res', 'scr']:
def decode(epochs, get_y_label_func, epoch_filter=None, decoding_method='standard', sliding_window_size=None, sliding_window_step=None, n_jobs=multiprocessing.cpu_count(), equalize_event_counts=True, only_fit=False, generalize_across_time=True): """ Basic flow for decoding """ config = dict(equalize_event_counts=equalize_event_counts, only_fit=only_fit, sliding_window_size=sliding_window_size, sliding_window_step=sliding_window_step, decoding_method=decoding_method, generalize_across_time=generalize_across_time, epoch_filter=str(epoch_filter)) if epoch_filter is not None: epochs = epochs[epoch_filter] #-- Classify epochs into groups (training epochs) y_labels = get_y_label_func(epochs) if equalize_event_counts: epochs.events[:, 2] = y_labels epochs.event_id = {str(label): label for label in np.unique(y_labels)} min_n_items_per_y_label = min( [len(epochs[cond]) for cond in epochs.event_id.keys()]) print("\nEqualizing the number of epochs to %d per condition..." % min_n_items_per_y_label) epochs.equalize_event_counts(epochs.event_id.keys()) y_labels = epochs.events[:, 2] print("The epochs were classified into %d groups:" % len(set(y_labels))) for g in set(y_labels): print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g))) #-- Create the decoding pipeline print("Creating the classification pipeline...") epochs_data = epochs.get_data() preprocess_pipeline = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: clf = make_pipeline(StandardScaler(), Ridge()) else: clf = make_pipeline( StandardScaler(), svm.SVC(C=1, kernel='linear', class_weight='balanced')) if 'raw' not in decoding_method: assert sliding_window_size is not None assert sliding_window_step is not None preprocess_pipeline = \ make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True)) elif decoding_method == 'ERP_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(20), average=False), ERPCovariances( estimator='lwf'), # todo how to apply sliding window? CSP(30, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) # todo why logistic regression? elif decoding_method == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) elif decoding_method == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) else: raise Exception('Unknown decoding method: {:}'.format(decoding_method)) print('\nDecoding pipeline:') for i in range(len(clf.steps)): print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1])) if preprocess_pipeline is not None: print('\nApplying the pre-processing pipeline:') for i in range(len(preprocess_pipeline.steps)): print('Step #{:}: {:}'.format(i + 1, preprocess_pipeline.steps[i][1])) epochs_data = preprocess_pipeline.fit_transform(epochs_data) if only_fit: #-- Only fit the decoders procedure = 'only_fit' scores = None cv = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = clf estimator.fit(X=epochs_data, y=y_labels) else: #-- Classify & score -- cross-validation procedure = 'fit_and_score' print( "\nCreating a classifier and calculating accuracy scores (this may take some time)..." ) cv = StratifiedKFold(n_splits=5) if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) scores = cross_val_multiscore(estimator=estimator, X=epochs_data, y=np.array(y_labels), cv=cv) else: scores = _run_cross_validation(X=epochs_data, y=np.array(y_labels), clf=clf, cv=cv) estimator = 'None' # Estimator is not defined in the case of Riemannian decoding times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2]) return dict(procedure=procedure, estimator=estimator, scores=scores, pipeline=clf, preprocess=preprocess_pipeline, cv=cv, times=times, config=config)
def run_time_decoding(subject_id, cond1, cond2, event_id): print cond1, cond2 subject = "S%02d" % subject_id data_path = os.path.join('/home/claire/DATA/Data_Face_House/' + subject + '/EEG/') raw_fname = data_path + subject + '-raw.fif' event_fname = data_path + subject + '-eve.fif' tmin, tmax = -0.2, 1 raw = mne.io.read_raw_fif(raw_fname, preload=True) events = mne.read_events(event_fname) picks = mne.pick_types(raw.info, meg=False, eeg=True, stim=True, eog=True, exclude='bads') # Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=(None, 0.), preload=True, decim=4) epochs.pick_types(eeg=True, exclude='bads') # only look at occipital channels # select_chans = [u'Iz', u'Oz', u'O1', u'O2', u'O3', u'PO7', u'PO8', u'POz', u'PO1', u'PO3', u'PO2', u'PO4'] #select_chans = [ u'PO7', u'PO8'] #select_chans = [ u'Cz', u'FPz'] #ch_names=[ch_name.replace('', '') for ch_name in select_chans] #epochs.pick_types(eeg=True).pick_channels(ch_names) # fit and time decoder X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times y = epochs.events[:, 2] # target: Audio left or right clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc') scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # save scores a_vs_b = '%s_vs_%s' % (cond1, cond2) print 'a_vs_b = %s' % a_vs_b fname_td = os.path.join(data_path, '%s-td-auc-%s.mat' % (subject, a_vs_b)) print 'Saving %s' % fname_td from scipy.io import savemat savemat(fname_td, {'scores': scores, 'times': epochs.times})
labels = all_labels[sample_ix] X = data[sample_ix] # parameters for the classification k_folds = 10 # cv folds var_exp = 0.99 # percentage of variance # generate iterator for cross validation kf = StratifiedKFold(n_splits=k_folds, shuffle=True) cv_iter = kf.split(np.zeros(X.shape), labels) # pipeline for classification cl = make_pipeline(LinearDiscriminantAnalysis()) # temporal generalisation temp_genr = SlidingEstimator(cl, n_jobs=1, scoring=make_scorer(accuracy_score)) # cross validation scores = cross_val_multiscore(temp_genr, X, labels, cv=cv_iter, n_jobs=-1) scores_all = [] scores_all.append(scores) scores_all = np.vstack(scores_all) scores_path = op.join(output_dir, "reg_vs_odd_lda_balanced-{}.npy".format(subject)) np.save(scores_path, scores_all) print("saved")
stcs = apply_inverse_epochs(epochs, inverse_operator, lambda2=1.0 / snr ** 2, verbose=False, method="dSPM", pick_ori="normal") # %% # Decoding in sensor space using a logistic regression # Retrieve source space data into an array X = np.array([stc.lh_data for stc in stcs]) # only keep left hemisphere y = epochs.events[:, 2] # prepare a series of classifier applied at each time sample clf = make_pipeline(StandardScaler(), # z-score normalization SelectKBest(f_classif, k=500), # select features for speed LinearModel(LogisticRegression(C=1, solver='liblinear'))) time_decod = SlidingEstimator(clf, scoring='roc_auc') # Run cross-validated decoding analyses: scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) # Plot average decoding scores of 5 splits fig, ax = plt.subplots(1) ax.plot(epochs.times, scores.mean(0), label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.axvline(0, color='k') plt.legend() # %% # To investigate weights, we need to retrieve the patterns of a fitted model # The fitting needs not be cross validated because the weights are based on
label_ts = np.array(label_ts) label_ts # %% n_jobs = 48 clf = make_pipeline( # Scaler(info), Vectorizer(), StandardScaler(), LinearModel(LogisticRegression(solver='liblinear')), ) time_decoder = SlidingEstimator( clf, scoring='roc_auc', n_jobs=n_jobs, ) y = epochs_df['label'].values.copy() y[y == 2] = 0 scores = cross_val_multiscore( time_decoder, X=label_ts, y=y, groups=epochs_df['session'], cv=len(epochs_df['session'].unique()), n_jobs=n_jobs, )
sfreq=epochs.info['sfreq'], freqs=freqs, output='power', n_cycles=n_cycles) n_epochs, n_channels, n_freqs, n_times = X.shape X = X.reshape(n_epochs, n_channels, -1) # collapse freqs and time # Run decoding on TFR output for analysis in analyses: fname = results_folder +\ '%s_tf_scores_%s_%s.npy' % (subject, 'Cue', analysis) y = np.array(events_behavior[analysis]) clf = make_pipeline( StandardScaler(), force_predict(LogisticRegression(), 'predict_proba', axis=1)) scorer = scorer_auc kwargs = dict() le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) sel = np.where(y != 0)[0] td = SlidingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) td.fit(X[sel], y[sel]) scores = cross_val_multiscore(td, X[sel], y[sel], cv=StratifiedKFold(12)) scores = scores.mean(axis=0) scores = np.reshape(scores, (n_freqs, n_times)) # Save cross validated scores np.save(fname, np.array(scores))
path = './epochs/' for file in glob.glob(os.path.join(path, '*-epo.fif')): epochs = mne.read_epochs(path, preload=True) epochs.crop(tmin=-0.5, tmax=epochs.tmax) epochs_eq = epochs.copy().equalize_event_counts(['reward', 'no_reward'])[0] X = epochs_eq['reward', 'no_reward'].get_data() y = epochs_eq['reward', 'no_reward'].events[:, 2] clf = make_pipeline(Vectorizer(), StandardScaler(), LinearSVC(class_weight='balanced')) clf.fit(X, y) sl = SlidingEstimator(clf) scores_time_decoding = cross_val_multiscore(sl, X, y) if file == './epochs/101-epo.fif': scores_td = scores_time_decoding else: scores_td = np.append(scores_td, scores_time_decoding, axis=0) gen = GeneralizingEstimator(clf, scoring='roc_auc') scores_gat = cross_val_multiscore(gen, X, y) if file == './epochs/101-epo.fif': scores_gat = scores_gat else: scores_gat = np.append(scores_gat, scores_gat, axis=0) ###### Plot decoding results