def devp_fit_decoding_sensor_space(self, **kwargs): from mne.decoding import (SlidingEstimator, GeneralizingEstimator, Scaler, cross_val_multiscore, LinearModel, get_coef, Vectorizer, CSP) from devp_basicio import save_pkl, load_pkl kwargs.update(dict(name=f'model_{kwargs["target_event"]}')) model_f = self.get_decoding_model_pkl(**kwargs) kwargs.update(dict(name=f'score_{kwargs["target_event"]}')) scores_f = self.get_decoding_score_pkl(**kwargs) if Path(model_f).exists() and Path(scores_f).exists(): time_decod = load_pkl(model_f, logger=self.logger) scores = load_pkl(scores_f, logger=self.logger) if kwargs['retrain'] == False: return scores, None X, y, epochs = self.devp_get_data_sensor_space(**kwargs) time_decod = self.devp_estimator(**kwargs) time_decod.fit(X, y) save_pkl(time_decod, model_f, logger=self.logger) scores = cross_val_multiscore(time_decod, X, y, cv=12, n_jobs=6) scores = np.mean(scores, axis=0) save_pkl(scores, scores_f, logger=self.logger) # self.plot_auc(epochs, scores, **kwargs) # coef = get_coef(time_decod, 'patterns_', inverse_transform=True) # self.plot_decoding_joint(coef, epochs, **kwargs) return scores, epochs
def NeuralNet(X_train, y_train, X_test, y_test, scorer, predict_mode, params): " Neural Network estimator " # Model model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3,), random_state=1) # Cross-validation scheme cv = StratifiedKFold(n_splits=4, random_state=0, shuffle=True) # Scaler scaler = StandardScaler() # Pipeline clf = make_pipeline(scaler, model) # Define scorer if scorer is 'scorer_auc': scorer = 'roc_auc' elif scorer is 'accuracy': scorer = None else: print('using accuracy as the scorer') # Learning and scoring time_gen = GeneralizingEstimator(clf, n_jobs=2, scoring=scorer) scores = cross_val_multiscore(time_gen, X_train, y_train, cv=cv, n_jobs=2) return scores
def logreg_timedecoding(epochs, numcv=4, jobs=1): """ Logistic regression over sensors. Returns Evoked array containing coefficients and ROC. Code snippets stolen from: https://martinos.org/mne/stable/auto_tutorials/plot_sensors_decoding.html """ X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times X = X.astype(float) y = epochs.events[:, 2] # targets # setup and run the decoder clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) time_decod = SlidingEstimator(clf, scoring='roc_auc', n_jobs=jobs) #scoring='roc_auc', scores = cross_val_multiscore(time_decod, X, y, cv=numcv, n_jobs=jobs) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # time_decod = SlidingEstimator(clf, scoring='roc_auc', n_jobs=jobs) time_decod.fit(X, y) coef = get_coef(time_decod, 'patterns_', inverse_transform=True) evoked = mne.EvokedArray(coef, epochs.info, tmin=epochs.times[0]) evoked.roc_auc = scores return evoked
def LogisticRegression(X_train, y_train, X_test, y_test, scorer, predict_mode, params): " Logistic Regression within or across conditions " # Model model = linear_model.LogisticRegression(class_weight='balanced') # Cross-validation scheme cv = StratifiedKFold(n_splits=4, random_state=0, shuffle=True) # Scaler scaler = StandardScaler() # Pipeline clf = make_pipeline(scaler, model) # Define scorer if scorer is 'scorer_auc': scorer = 'roc_auc' elif scorer is 'accuracy': scorer = None else: print('using accuracy as the scorer') # Learning and scoring time_gen = GeneralizingEstimator(clf, n_jobs=2, scoring=scorer) scores = cross_val_multiscore(time_gen, X_train, y_train, cv=cv, n_jobs=2) return scores
def run_time_decoding(subject, condition1, condition2, session=None): print("Processing subject: %s (%s vs %s)" % (subject, condition1, condition2)) # Construct the search path for the data file. `sub` is mandatory subject_path = op.join('sub-{}'.format(subject)) # `session` is optional if session is not None: subject_path = op.join(subject_path, 'ses-{}'.format(session)) subject_path = op.join(subject_path, config.kind) bids_basename = make_bids_basename(subject=subject, session=session, task=config.task, acquisition=config.acq, run=None, processing=config.proc, recording=config.rec, space=config.space ) fpath_deriv = op.join(config.bids_root, 'derivatives', config.PIPELINE_NAME, subject_path) fname_in = \ op.join(fpath_deriv, bids_basename + '-epo.fif') epochs = mne.read_epochs(fname_in) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] # Use AUC because chance level is same regardless of the class balance se = SlidingEstimator( make_pipeline(StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)), scoring=config.decoding_metric, n_jobs=config.N_JOBS) cv = StratifiedKFold(random_state=config.random_state, n_splits=config.decoding_n_splits) scores = cross_val_multiscore(se, X=X, y=y, cv=cv) # let's save the scores now a_vs_b = '%s_vs_%s' % (condition1, condition2) a_vs_b = a_vs_b.replace(op.sep, '') fname_td = op.join(config.bids_root, 'derivatives', config.PIPELINE_NAME, '%s_%s_%s_%s.mat' % (subject, config.study_name, a_vs_b, config.decoding_metric)) savemat(fname_td, {'scores': scores, 'times': epochs.times})
def run_time_decoding(subject_id, condition1, condition2): print("processing subject: %s (%s vs %s)" % (subject_id, condition1, condition2)) subject = "S%02d" % subject_id data_path = os.path.join(ana_path, subject, 'EEG', 'New_Preproc') epochs = mne.read_epochs( os.path.join(data_path, '%s-causal-highpass-2Hz-epo.fif' % subject)) # We define the epochs and the labels epochs = epochs[condition1, condition2] epochs.apply_baseline() # Let us restrict ourselves to the MEG channels, and also decimate to # make it faster (although we might miss some detail / alias) epochs.pick_types(eeg=True).decimate(2, verbose='error') mne.epochs.combine_event_ids(epochs, ['stim/face', 'stim/house'], {'stim': 100}, copy=False) mne.epochs.combine_event_ids(epochs, ['imag/face', 'imag/house'], {'imag': 200}, copy=False) # Get the data and labels X = epochs.get_data() # fit and time decoder le = LabelEncoder() y = le.fit_transform(epochs.events[:, 2]) # Use AUC because chance level is same regardless of the class balance se = SlidingEstimator(make_pipeline(StandardScaler(), LogisticRegression()), scoring='roc_auc', n_jobs=1) # scores = cross_val_multiscore(se, X=X, y=y, cv=StratifiedKFold()) #cv=StratifiedKFold() #scores, permutation_scores, pvalue = permutation_test_score(estimator = se, X=X, y=y, groups=None, scoring = None, # cv=3, n_permutations=100) #print("********* %s Classification score %s (pvalue : %s) ***********" % (subject, scores, pvalue)) # let's save the scores now cond1 = condition1.replace('/', '-') cond2 = condition2.replace('/', '-') a_vs_b = '%s_vs_%s' % (cond1, cond2) fname_td = os.path.join( results_path, '%s-causal-highpass-2Hz-td-auc-%s.mat' % (subject, a_vs_b)) savemat(fname_td, { 'scores': scores, 'times': epochs.times }) #, 'perm_scores': permutation_scores, 'pval' : pvalue})
def run_time_decoding(subject, condition1, condition2, session=None): msg = f'Contrasting conditions: {condition1} – {condition2}' logger.info( gen_log_message(message=msg, step=7, subject=subject, session=session)) fname_epochs = BIDSPath(subject=subject, session=session, task=config.get_task(), acquisition=config.acq, run=None, recording=config.rec, space=config.space, suffix='epo', extension='.fif', datatype=config.get_datatype(), root=config.deriv_root, check=False) epochs = mne.read_epochs(fname_epochs) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] clf = make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)) se = SlidingEstimator(clf, scoring=config.decoding_metric, n_jobs=config.N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=config.decoding_n_splits) # let's save the scores now a_vs_b = f'{condition1}-{condition2}'.replace(op.sep, '') processing = f'{a_vs_b}+{config.decoding_metric}' processing = processing.replace('_', '-').replace('-', '') fname_mat = fname_epochs.copy().update(suffix='decoding', processing=processing, extension='.mat') savemat(fname_mat, {'scores': scores, 'times': epochs.times}) fname_tsv = fname_mat.copy().update(extension='.tsv') tabular_data = pd.DataFrame( dict(cond_1=[condition1] * len(epochs.times), cond_2=[condition2] * len(epochs.times), time=epochs.times, mean_crossval_score=scores.mean(axis=0), metric=[config.decoding_metric] * len(epochs.times))) tabular_data.to_csv(fname_tsv, sep='\t', index=False)
def classify(epochs): Xall = epochs.get_data() # epochs*channels*time rel = epochs.events[:, 2] != 2 # throw out standard X = Xall[rel, :, :] # classifying target vs. novelty y = epochs.events[rel, 2] # target, standard or novelty clf = make_pipeline(StandardScaler(), LogisticRegression()) clfest = dc.SlidingEstimator(clf, n_jobs=1, scoring='accuracy') values = dc.cross_val_multiscore(clfest, X, y, cv=5, n_jobs=1) return np.mean(values, axis=0)
def run_gat(name, decoder="ridge"): """ Function to run Generalization Across Time (GAT). Parameters ---------- name: str Name (pseudonym) of individual subject. decoder: str Specify type of classifier -'ridge' for Ridge Regression (default),'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression """ # load high cloze epochs epochs = get_epochs(name)['song', 'voice'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=4) scores = cross_val_multiscore(gen_clf, epochs.get_data(), epochs.events[:, -1], cv=5, n_jobs=4).mean(0) data = epochs.get_data() labels = epochs.events[:, -1] cv = StratifiedKFold(n_splits=5, random_state=42) # calculate prediction confidence scores preds = np.empty((len(labels), 225, 225)) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d return scores, preds # return subject scores and prediction confidence
def run_time_decoding(subject, condition1, condition2, session=None): msg = f'Contrasting conditions: {condition1} – {condition2}' logger.info( gen_log_message(message=msg, step=8, subject=subject, session=session)) deriv_path = config.get_subject_deriv_path(subject=subject, session=session, kind=config.get_kind()) fname_in = BIDSPath(subject=subject, session=session, task=config.get_task(), acquisition=config.acq, run=None, recording=config.rec, space=config.space, prefix=deriv_path, kind='epo', extension='.fif', check=False) epochs = mne.read_epochs(fname_in) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] se = SlidingEstimator(make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)), scoring=config.decoding_metric, n_jobs=config.N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=config.decoding_n_splits) # let's save the scores now a_vs_b = f'{condition1}-{condition2}'.replace(op.sep, '') processing = f'{a_vs_b}+{config.decoding_metric}' processing = processing.replace('_', '-').replace('-', '') fname_td = fname_in.copy().update(kind='decoding', processing=processing, extension='.mat') savemat(fname_td, {'scores': scores, 'times': epochs.times})
def run_time_decoding(subject, condition1, condition2, session=None): msg = f'Contrasting conditions: {condition1} – {condition2}' logger.info( gen_log_message(message=msg, step=8, subject=subject, session=session)) deriv_path = config.get_subject_deriv_path(subject=subject, session=session, kind=config.get_kind()) bids_basename = make_bids_basename(subject=subject, session=session, task=config.get_task(), acquisition=config.acq, run=None, processing=config.proc, recording=config.rec, space=config.space) fname_in = op.join(deriv_path, bids_basename + '-epo.fif') epochs = mne.read_epochs(fname_in) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] # Use AUC because chance level is same regardless of the class balance se = SlidingEstimator(make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)), scoring=config.decoding_metric, n_jobs=config.N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=config.decoding_n_splits) # let's save the scores now a_vs_b = '%s_vs_%s' % (condition1, condition2) a_vs_b = a_vs_b.replace(op.sep, '') fname_td = op.join( config.bids_root, 'derivatives', config.PIPELINE_NAME, '%s_%s_%s_%s.mat' % (subject, config.study_name, a_vs_b, config.decoding_metric)) savemat(fname_td, {'scores': scores, 'times': epochs.times})
def mneClassify(self, sj, to_decode, conditions, time=[-0.3, 0.8]): ''' ''' clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1) # get eeg data eeg = [] for session in range(2): eeg.append( mne.read_epochs( '/Users/dirk/Desktop/suppression/processed/subject-{}_ses-{}-epo.fif' .format(sj, session + 1))) times = eeg[0].times # select time window and electrodes s_idx, e_idx = eeg[0].time_as_index(time) picks = mne.pick_types(eeg[0].info, eeg=True, exclude='bads') eeg = np.vstack((eeg[0]._data, eeg[1]._data))[:, picks, :][:, :, s_idx:e_idx] # get behavior data with open( '/Users/dirk/Desktop/suppression/beh/processed/subject-{}_all.pickle' .format(sj), 'rb') as handle: beh = pickle.load(handle) plt.figure(figsize=(20, 20)) for i, cnd in enumerate(conditions): X = eeg[beh['condition'] == cnd] y = beh[to_decode][beh['condition'] == cnd] scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) plt.plot(times[s_idx:e_idx], scores.mean(axis=0), color=['r', 'g', 'b', 'y'][i], label=cnd) plt.legend(loc='best') plt.savefig( '/Users/dirk/Desktop/suppression/bdm/figs/{}_{}_bdm.pdf'.format( to_decode, sj)) plt.close()
def run_time_decoding(subject, condition1, condition2): print("processing subject: %s (%s vs %s)" % (subject, condition1, condition2)) print("Processing subject: %s" % subject) meg_subject_dir = op.join(config.meg_dir, subject) extension = '-epo' fname_in = op.join(meg_subject_dir, config.base_fname.format(**locals())) print("Input: ", fname_in) epochs = mne.read_epochs(fname_in) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] # Use AUC because chance level is same regardless of the class balance se = SlidingEstimator(make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)), scoring=config.decoding_metric, n_jobs=config.N_JOBS) cv = StratifiedKFold(random_state=config.random_state, n_splits=config.decoding_n_splits) scores = cross_val_multiscore(se, X=X, y=y, cv=cv) # let's save the scores now a_vs_b = '%s_vs_%s' % (condition1, condition2) a_vs_b = a_vs_b.replace(op.sep, '') fname_td = op.join( meg_subject_dir, '%s_%s_%s_%s.mat' % (subject, config.study_name, a_vs_b, config.decoding_metric)) savemat(fname_td, {'scores': scores, 'times': epochs.times})
def sliding_logreg_source(X, y, cross_val, return_clf=False): """Run a sliding estimator with Logistic Regression on source data. Parameters: ----------- X : np.array features. y : vector response vector. cross_val : cross validation object cross validation to adopt. return_clf : bool whether the clf object should be returned as well. Returns ------- score : float cross-validated AUC score clf : classifier object If return_clf == True, the classifier object will be returned, too. """ startt = time.time() # Model clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) sliding = SlidingEstimator(clf, scoring='roc_auc', n_jobs=1) print('Computing Logistic Regression.') score = cross_val_multiscore(sliding, X, y, cv=cross_val) endt = time.time() print('Done. Time elapsed for sliding estimator: %i seconds.' % (endt - startt)) if return_clf is True: return score, clf else: return score
def run_time_decoding(subject_id, condition1, condition2): print("processing subject: %s (%s vs %s)" % (subject_id, condition1, condition2)) subject = "sub%03d" % subject_id data_path = os.path.join(meg_dir, subject) epochs = mne.read_epochs( os.path.join(data_path, '%s_highpass-%sHz-epo.fif' % (subject, l_freq))) # We define the epochs and the labels epochs = mne.concatenate_epochs([epochs[condition1], epochs[condition2]]) epochs.apply_baseline() # Let us restrict ourselves to the MEG channels, and also decimate to # make it faster (although we might miss some detail / alias) epochs.pick_types(meg=True).decimate(4, verbose='error') # Get the data and labels X = epochs.get_data() n_cond1 = len(epochs[condition1]) n_cond2 = len(epochs[condition2]) y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] # Use AUC because chance level is same regardless of the class balance se = SlidingEstimator(make_pipeline(StandardScaler(), LogisticRegression()), scoring='roc_auc', n_jobs=N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=StratifiedKFold()) # let's save the scores now a_vs_b = '%s_vs_%s' % (os.path.basename(condition1), os.path.basename(condition2)) fname_td = os.path.join( data_path, '%s_highpass-%sHz-td-auc-%s.mat' % (subject, l_freq, a_vs_b)) savemat(fname_td, {'scores': scores, 'times': epochs.times})
# by using :class:`sklearn.pipeline.Pipeline`. We can construct decoding # pipelines and perform cross-validation and grid-search. However scikit-learn # transformers and estimators generally expect 2D data # (n_samples * n_features), whereas MNE transformers typically output data # with a higher dimensionality # (e.g. n_samples * n_channels * n_frequencies * n_times). A Vectorizer # therefore needs to be applied between the MNE and the scikit-learn steps # like: # Uses all MEG sensors and time points as separate classification # features, so the resulting filters used are spatio-temporal clf = make_pipeline(Scaler(epochs.info), Vectorizer(), LogisticRegression(solver='lbfgs')) scores = cross_val_multiscore(clf, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits score = np.mean(scores, axis=0) print('Spatio-temporal: %0.1f%%' % (100 * score,)) ############################################################################### # PSDEstimator # ^^^^^^^^^^^^ # The :class:`mne.decoding.PSDEstimator` # computes the power spectral density (PSD) using the multitaper # method. It takes a 3D array as input, converts it into 2D and computes the # PSD. # # FilterEstimator # ^^^^^^^^^^^^^^^
labels_shuffled = np.random.permutation(labels) # generate iterator for cross validation kf = StratifiedKFold(n_splits=2, shuffle=True) cv_iter = kf.split(np.zeros(X.shape), labels_shuffled) # pipeline for classification cl = make_pipeline(RobustScaler(), PCA(n_components=var_exp), LinearSVC(max_iter=10000, dual=False, penalty="l1")) # temporal generalisation temp_genr = SlidingEstimator(cl, n_jobs=1, scoring="roc_auc") # cross validation scores = cross_val_multiscore(temp_genr, X, labels_shuffled, cv=cv_iter, n_jobs=-1) scores_all.append(scores) scores_all = np.vstack(scores_all) scores_path = op.join(output_dir, "reg_vs_odd_chance_level-{}.npy".format(subject)) np.save(scores_path, scores_all) print("saved") scores_all = scores_all[::2, :]
# Extract half of the epochs for similar SNR in all conditions epochs_nr = len(epochs['left/grating']) epochs_range = np.random.permutation(np.arange(0, epochs_nr, 1)) np.save('%s_epochs_range.npy' % subject, epochs_range) X = np.concatenate( (epochs["face"][epochs_range[:int(epochs_nr / 2)]].get_data(), epochs["grating"][epochs_range[:int(epochs_nr / 2)]].get_data())) y = np.concatenate( (np.zeros(int(epochs_nr / 2)), np.ones(int(epochs_nr / 2)))) cv = StratifiedKFold(n_splits=10, shuffle=True) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=n_jobs) time_gen.fit(X, y) scores = cross_val_multiscore(time_gen, X, y, cv=cv) # Save results joblib.dump(time_gen, "%s_time_gen_svm.jbl" % subject) np.save("%s_time_gen_score_svm.npy" % subject, scores) X_left = np.concatenate( (epochs["left/face"].get_data(), epochs["left/grating"].get_data())) y_left = np.concatenate((np.zeros(len(epochs["left/face"].get_data())), np.ones(len(epochs["left/grating"].get_data())))) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen_left = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=n_jobs) time_gen_left.fit(X_left, y_left) scores_left = cross_val_multiscore(time_gen, X_left, y_left, cv=cv)
cv = KFold(5) if regressor == 'condition': y = binary_scaler(y) # set values to 0.0 and 1.0 clf = make_pipeline(StandardScaler(), LogisticRegression(solver='liblinear')) scorer = 'roc_auc' score = 'AUC' cv = StratifiedKFold(5) n_jobs = -1 # set up estimator, get scores if decode_using == 'spatial': gen = SlidingEstimator(n_jobs=n_jobs, scoring=scorer, base_estimator=clf) scores = cross_val_multiscore(gen, X, y, cv=cv) elif decode_using == 'temporal': gen = SlidingEstimator(n_jobs=n_jobs, scoring=scorer, base_estimator=clf) scores = cross_val_multiscore(gen, X, y, cv=cv) else: # scoring defaults to neg mean squared so set to scorer # shuffle must be true when binary values, otherwise fold will only have one value scores = cross_val_score(clf, X, y, scoring=scorer, #defaults to neg mean squared cv=KFold(5, shuffle=True)) # mean scores across cross-validation splits scores = np.mean(scores, axis=0)
def main(): model_type = "lda" exp_name = "freq_gen_matrix/" for i, sample in enumerate(range(1, 22)): print("sample {}".format(sample)) if not os.path.isdir("Results/{}/{}/sample_{}".format( model_type, exp_name, sample)): os.mkdir("Results/{}/{}/sample_{}".format(model_type, exp_name, sample)) epochs = get_epochs(sample, scale=False) y_train = epochs.events[:, 2] freqs = np.logspace(*np.log10([2, 25]), num=15) n_cycles = freqs / 4. string_freqs = [round(x, 2) for x in freqs] print("applying morlet wavelet") wavelet_output = tfr_array_morlet(epochs.get_data(), sfreq=epochs.info['sfreq'], freqs=freqs, n_cycles=n_cycles, output='complex') time_results = np.zeros( (wavelet_output.shape[3], len(freqs), len(freqs))) for time in range(wavelet_output.shape[3]): print("time: {}".format(time)) wavelet_epochs = wavelet_output[:, :, :, time] wavelet_epochs = np.append(wavelet_epochs.real, wavelet_epochs.imag, axis=1) wavelet_info = mne.create_info(ch_names=wavelet_epochs.shape[1], sfreq=epochs.info['sfreq'], ch_types='mag') wavelet_epochs = mne.EpochsArray(wavelet_epochs, info=wavelet_info, events=epochs.events) x_train = pca(80, wavelet_epochs, plot=False) model = LinearModel( LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')) freq_gen = GeneralizingEstimator(model, n_jobs=1, scoring='accuracy', verbose=True) scores = cross_val_multiscore(freq_gen, x_train, y_train, cv=5, n_jobs=1) scores = np.mean(scores, axis=0) time_results[time] = scores sns.set() ax = sns.barplot( np.sort(string_freqs), np.diag(scores), ) ax.set(ylim=(0, 0.8), xlabel='Frequencies', ylabel='Accuracy', title='Cross Val Accuracy {} for Subject {} for Time {}'. format(model_type, sample, time)) ax.axhline(0.12, color='k', linestyle='--') ax.figure.set_size_inches(8, 6) ax.figure.savefig( "Results/{}/{}/sample_{}/time_{}_accuracy.png".format( model_type, exp_name, sample, time), dpi=300) plt.close('all') # plt.show() fig, ax = plt.subplots(1, 1) im = ax.imshow(scores, interpolation='lanczos', origin='lower', cmap='RdBu_r', extent=[2, 25, 2, 25], vmin=0., vmax=0.8) ax.set_xlabel('Testing Frequency (hz)') ax.set_ylabel('Training Frequency (hz)') ax.set_title( 'Frequency generalization for Subject {} at Time {}'.format( sample, time)) plt.colorbar(im, ax=ax) ax.grid(False) ax.figure.savefig( "Results/{}/{}/sample_{}/time_{}_matrix.png".format( model_type, exp_name, sample, time), dpi=300) plt.close('all') # plt.show() time_results = time_results.reshape(time_results.shape[0], -1) all_results_df = pd.DataFrame(time_results) all_results_df.to_csv( "Results/{}/{}/sample_{}/all_time_matrix_results.csv".format( model_type, exp_name, sample))
def run_time_decoding(subject_id, cond1, cond2, event_id): subject = "S%02d" % subject_id print subject, cond1, cond2 data_path = os.path.join('/home/claire/DATA/Data_Face_House/' + subject + '/EEG/') raw_fname = data_path + subject + '-raw.fif' event_fname = data_path + subject + '-eve.fif' tmin, tmax = -0.5, 1.5 raw = mne.io.read_raw_fif(raw_fname, preload=True) events = mne.read_events(event_fname) picks = mne.pick_types(raw.info, meg=False, eeg=True, stim=True, eog=True, exclude='bads') # Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=(None, 0.), preload=True, decim=4) epochs.pick_types(eeg=True, exclude='bads') #only look at occipital channels # select_chans = [u'Iz', u'Oz', u'O1', u'O2', u'O3', u'PO7', u'PO8', u'POz', u'PO1', u'PO3', u'PO2', u'PO4'] select_chans = [u'PO7', u'PO8'] #select_chans = [ u'Cz', u'FPz'] ch_names = [ch_name.replace('', '') for ch_name in select_chans] epochs.pick_types(eeg=True).pick_channels(ch_names) # average group of 4 trials data_cond1 = epochs['imag/face'].get_data() data_cond2 = epochs['imag/house'].get_data() mean_cond1 = [] ind_trial = 0 while ind_trial <= len(data_cond1) - 5: mean_cond1.append(mean(data_cond1[ind_trial:(ind_trial + 4)], 0)) print ind_trial ind_trial += 5 mean_cond2 = [] ind_trial = 0 while ind_trial <= len(data_cond2) - 5: mean_cond2.append(mean(data_cond2[ind_trial:(ind_trial + 4)], 0)) print ind_trial ind_trial += 5 X = [] # create variable for decoding X = mean_cond1 + mean_cond2 X = np.array(X) y = np.array([0] * len(mean_cond1) + [1] * len(mean_cond2)) # fit and time decoder #X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times #y = epochs.events[:, 2] # target: Audio left or right cv = StratifiedKFold(n_splits=3, shuffle=False) cv.get_n_splits(X, y) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc') #scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) scores = cross_val_multiscore(time_decod, X, y, cv=cv, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # save scores a_vs_b = '%s_vs_%s' % (cond1, cond2) print 'a_vs_b = %s' % a_vs_b fname_td = os.path.join( data_path, '%s-td-auc-%s_ave_4_trials_po7_po8.mat' % (subject, a_vs_b)) print 'Saving %s' % fname_td from scipy.io import savemat savemat(fname_td, {'scores': scores, 'times': epochs.times})
def run_time_decoding(subject, condition1, condition2, session=None): msg = f'Contrasting conditions: {condition1} – {condition2}' logger.info(gen_log_message(message=msg, step=7, subject=subject, session=session)) fname_epochs = BIDSPath(subject=subject, session=session, task=config.get_task(), acquisition=config.acq, run=None, recording=config.rec, space=config.space, suffix='epo', extension='.fif', datatype=config.get_datatype(), root=config.deriv_root, check=False) epochs = mne.read_epochs(fname_epochs) if config.analyze_channels: # We special-case the average reference here to work around a situation # where e.g. `analyze_channels` might contain only a single channel: # `concatenate_epochs` below will then fail when trying to create / # apply the projection. We can avoid this by removing an existing # average reference projection here, and applying the average reference # directly – without going through a projector. if 'eeg' in config.ch_types and config.eeg_reference == 'average': epochs.set_eeg_reference('average') else: epochs.apply_proj() epochs.pick(config.analyze_channels) # We define the epochs and the labels if isinstance(config.conditions, dict): epochs_conds = [config.conditions[condition1], config.conditions[condition2]] cond_names = [condition1, condition2] else: epochs_conds = cond_names = [condition1, condition2] epochs_conds = [condition1, condition2] epochs = mne.concatenate_epochs([epochs[epochs_conds[0]], epochs[epochs_conds[1]]]) n_cond1 = len(epochs[epochs_conds[0]]) n_cond2 = len(epochs[epochs_conds[1]]) X = epochs.get_data() y = np.r_[np.ones(n_cond1), np.zeros(n_cond2)] clf = make_pipeline( StandardScaler(), LogisticRegression(solver='liblinear', random_state=config.random_state)) se = SlidingEstimator(clf, scoring=config.decoding_metric, n_jobs=config.N_JOBS) scores = cross_val_multiscore(se, X=X, y=y, cv=config.decoding_n_splits) # let's save the scores now a_vs_b = f'{cond_names[0]}+{cond_names[1]}'.replace(op.sep, '') processing = f'{a_vs_b}+{config.decoding_metric}' processing = processing.replace('_', '-').replace('-', '') fname_mat = fname_epochs.copy().update(suffix='decoding', processing=processing, extension='.mat') savemat(fname_mat, {'scores': scores, 'times': epochs.times}) fname_tsv = fname_mat.copy().update(extension='.tsv') tabular_data = pd.DataFrame( dict(cond_1=[cond_names[0]] * len(epochs.times), cond_2=[cond_names[1]] * len(epochs.times), time=epochs.times, mean_crossval_score=scores.mean(axis=0), metric=[config.decoding_metric] * len(epochs.times)) ) tabular_data.to_csv(fname_tsv, sep='\t', index=False)
# The `inverse_transform` parameter will call this method on any estimator # contained in the pipeline, in reverse order. coef = get_coef(clf, name, inverse_transform=True) evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin) evoked.plot_topomap(title='EEG %s' % name[:-1], time_unit='s') #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #Applying StandardScaler, LogisticRegression clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs')) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc') scores = cross_val_multiscore(time_decod, ica_data, labels, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # Plot fig, ax = plt.subplots() ax.plot(epochs.times, scores, label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding') #------------------------------------------------------------------------------
if subject[4:] in ("101", "124a", "213", "301a") and interval != "All": use_splits = 3 else: use_splits = n_splits cv = StratifiedKFold(n_splits=use_splits, shuffle=True, random_state=seed) # Get the data and label X = eps.get_data() y = eps.events[:, -1] # AUC b/c chance level same regardless of the class balance score = np.mean( cross_val_multiscore(time_decode, X=X, y=y, cv=cv, n_jobs=n_jobs), axis=0, ) aucs[si, ci, ii] = score[s_ix].mean() print(" %s vs. %s mean AUC: %.3f on %d epochs" % (c1, c2, score.mean(), len(eps))) mne.externals.h5io.write_hdf5( fname, dict(auc=aucs[si], events=events, intervals=intervals)) ages = np.array(features["age"]) age_bounds = [[40, 80], [105, 150], [175, 210], [40, 210]] fig, axes = plt.subplots( len(events), len(age_bounds),
############################################################################### # Temporal decoding # ----------------- # # We'll use a Logistic Regression for a binary classification as machine # learning model. # We will train the classifier on all left visual vs auditory trials on MEG X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times y = epochs.events[:, 2] # target: Audio left or right clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True) scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # Plot fig, ax = plt.subplots() ax.plot(epochs.times, scores, label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding') plt.show()
rangeD = ['Resp_Screen_Object', 'Resp_Screen_Side'] else: y1 = whattodecod(cow_codes, y) # cow vs. bicycle y2 = whattodecod(round_codes, y) # round vs. square y3 = whattodecod(left_codes, y) # left vs. right y4 = whattodecod(text_codes, y) # text vs. image rangeY = [y1, y2, y3, y4] rangeD = ['Object', 'Cue', 'Button', 'Modality'] # ******** decoding and saving ********* for nb_yy, (yy, which_decod) in enumerate(zip(rangeY, rangeD)): scores = cross_val_multiscore(clf, data_to_dec, yy, cv=5, n_jobs=-1) scores = scores.mean(0) fname = op.join( directorydecod, '%s_%s_%s_%s_%s.npy' % (subject, file_to_ana, which_decod, type_epo, which_channels)) np.save(fname, scores) del epochs if file_to_ana is 'JustCue': for subj_nb, (subject, sessions) in enumerate(zip(subjects_id,
times[525:], (1.5, 1.6), mode="mean") labels = np.array(beh.movement_dir_sign) # data + labels # parameters for the classification k_folds = 10 # cv folds var_exp = 0.99 # percentage of variance # generate iterator for cross validation kf = StratifiedKFold(n_splits=k_folds, shuffle=True) cv_iter = kf.split(np.zeros(data.shape), labels) # pipeline for classification cl = make_pipeline(RobustScaler(), PCA(n_components=var_exp), LinearSVC(max_iter=10000, dual=False, penalty="l1")) # temporal generalisation temp_genr = GeneralizingEstimator(cl, n_jobs=1, scoring="roc_auc") # cross validation scores = cross_val_multiscore(temp_genr, data, labels, cv=cv_iter, n_jobs=-1) scores_path = op.join(output_dir, "clk_vs_anti_new_baseline-{}.npy".format(subject)) np.save(scores_path, scores) print("saved", scores_path)
st_scores = [] for sub in subjects: bids_path = dataset.update(subject=sub) raw_haemo, epochs = epoch_preprocessing(bids_path) epochs.pick(chroma) X = epochs.get_data() y = epochs.events[:, 2] clf = make_pipeline(Scaler(epochs.info), Vectorizer(), LogisticRegression(solver='liblinear')) scores = 100 * cross_val_multiscore( clf, X, y, cv=5, n_jobs=1, scoring='roc_auc') st_scores.append(np.mean(scores, axis=0)) print(f"Average spatio-temporal ROC-AUC performance ({chroma}) = " f"{np.round(np.mean(st_scores))} % ({np.round(np.std(st_scores))})") # %% # Conclusion # ---------- # # Data were epoched then decoding was performed on the hbo signal and the hbr # signal. The HbO signal decodes the conditions with 6% greater accuracy # than the HbR signal. For further discussion about the efficacy of fNIRS # signals in decoding experimental condition see Luke et. al. (2021) # :footcite:`Luke2021.11.19.469225`.
def run_time_decoding(subject_id, cond1, cond2, event_id): print cond1, cond2 subject = "S%02d" % subject_id data_path = os.path.join('/home/claire/DATA/Data_Face_House/' + subject + '/EEG/') raw_fname = data_path + subject + '-raw.fif' event_fname = data_path + subject + '-eve.fif' tmin, tmax = -0.2, 1 raw = mne.io.read_raw_fif(raw_fname, preload=True) events = mne.read_events(event_fname) picks = mne.pick_types(raw.info, meg=False, eeg=True, stim=True, eog=True, exclude='bads') # Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=(None, 0.), preload=True, decim=4) epochs.pick_types(eeg=True, exclude='bads') # only look at occipital channels # select_chans = [u'Iz', u'Oz', u'O1', u'O2', u'O3', u'PO7', u'PO8', u'POz', u'PO1', u'PO3', u'PO2', u'PO4'] #select_chans = [ u'PO7', u'PO8'] #select_chans = [ u'Cz', u'FPz'] #ch_names=[ch_name.replace('', '') for ch_name in select_chans] #epochs.pick_types(eeg=True).pick_channels(ch_names) # fit and time decoder X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times y = epochs.events[:, 2] # target: Audio left or right clf = make_pipeline(StandardScaler(), LogisticRegression()) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc') scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # save scores a_vs_b = '%s_vs_%s' % (cond1, cond2) print 'a_vs_b = %s' % a_vs_b fname_td = os.path.join(data_path, '%s-td-auc-%s.mat' % (subject, a_vs_b)) print 'Saving %s' % fname_td from scipy.io import savemat savemat(fname_td, {'scores': scores, 'times': epochs.times})
sfreq=epochs.info['sfreq'], freqs=freqs, output='power', n_cycles=n_cycles) n_epochs, n_channels, n_freqs, n_times = X.shape X = X.reshape(n_epochs, n_channels, -1) # collapse freqs and time # Run decoding on TFR output for analysis in analyses: fname = results_folder +\ '%s_tf_scores_%s_%s.npy' % (subject, 'Cue', analysis) y = np.array(events_behavior[analysis]) clf = make_pipeline( StandardScaler(), force_predict(LogisticRegression(), 'predict_proba', axis=1)) scorer = scorer_auc kwargs = dict() le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) sel = np.where(y != 0)[0] td = SlidingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) td.fit(X[sel], y[sel]) scores = cross_val_multiscore(td, X[sel], y[sel], cv=StratifiedKFold(12)) scores = scores.mean(axis=0) scores = np.reshape(scores, (n_freqs, n_times)) # Save cross validated scores np.save(fname, np.array(scores))
LinearModel(LogisticRegression(solver='liblinear')), ) time_decoder = SlidingEstimator( clf, scoring='roc_auc', n_jobs=n_jobs, ) y = epochs_df['label'].values.copy() y[y == 2] = 0 scores = cross_val_multiscore( time_decoder, X=label_ts, y=y, groups=epochs_df['session'], cv=len(epochs_df['session'].unique()), n_jobs=n_jobs, ) time_decoder.fit(label_ts, y) coef = get_coef(time_decoder, 'patterns_', inverse_transform=True) # %% # Plot def fig_save(fig, path): html = fig.to_html() with open(path, 'w') as f: f.write(html)