def get_scores_from_gat(epochs, seed): from sklearn.svm import LinearSVC X_train, X_test, y_train, y_test = train_test_split(epochs.get_data(), epochs.events[:, 2] == 2, test_size=0.2, random_state=seed) clf = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5, penalty='l2')) # clf = time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=-2) time_gen.fit(X_train, y_train) scores = time_gen.score(X_test, y_test) print( 'Units with highest weights of a classifier trained to predict subject' 's number:') print([(i, j) for (i, j) in zip( np.transpose( np.argsort( np.negative( np.abs(time_gen.estimators_[1]._final_estimator.coef_)))) [0:20], np.transpose( np.sort( np.negative( np.abs(time_gen.estimators_[1]._final_estimator.coef_)))) [0:20])]) return time_gen, scores
def train_test(X_train, y_train, X_test, y_test, clf=clf, scoring=scoring, n_jobs=n_jobs): # train and test time_gen = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) time_gen.fit(X=X_train, y=y_train) return time_gen.score(X=X_test, y=y_test)
def run_gat(name, decoder="ridge"): """ Function to run Generalization Across Time (GAT). Parameters ---------- name: str Name (pseudonym) of individual subject. decoder: str Specify type of classifier -'ridge' for Ridge Regression (default),'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression """ # load high cloze epochs epochs = get_epochs(name)['song', 'voice'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=4) scores = cross_val_multiscore(gen_clf, epochs.get_data(), epochs.events[:, -1], cv=5, n_jobs=4).mean(0) data = epochs.get_data() labels = epochs.events[:, -1] cv = StratifiedKFold(n_splits=5, random_state=42) # calculate prediction confidence scores preds = np.empty((len(labels), 225, 225)) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d return scores, preds # return subject scores and prediction confidence
def NeuralNet(X_train, y_train, X_test, y_test, scorer, predict_mode, params): " Neural Network estimator " # Model model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3,), random_state=1) # Cross-validation scheme cv = StratifiedKFold(n_splits=4, random_state=0, shuffle=True) # Scaler scaler = StandardScaler() # Pipeline clf = make_pipeline(scaler, model) # Define scorer if scorer is 'scorer_auc': scorer = 'roc_auc' elif scorer is 'accuracy': scorer = None else: print('using accuracy as the scorer') # Learning and scoring time_gen = GeneralizingEstimator(clf, n_jobs=2, scoring=scorer) scores = cross_val_multiscore(time_gen, X_train, y_train, cv=cv, n_jobs=2) return scores
def test_get_coef_multiclass_full(n_classes, n_channels, n_times): """Test a full example with pattern extraction.""" from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.model_selection import StratifiedKFold data = np.zeros((10 * n_classes, n_channels, n_times)) # Make only the first channel informative for ii in range(n_classes): data[ii * 10:(ii + 1) * 10, 0] = ii events = np.zeros((len(data), 3), int) events[:, 0] = np.arange(len(events)) events[:, 2] = data[:, 0, 0] info = create_info(n_channels, 1000., 'eeg') epochs = EpochsArray(data, info, events, tmin=0) clf = make_pipeline( Scaler(epochs.info), Vectorizer(), LinearModel(LogisticRegression(random_state=0, multi_class='ovr')), ) scorer = 'roc_auc_ovr_weighted' time_gen = GeneralizingEstimator(clf, scorer, verbose=True) X = epochs.get_data() y = epochs.events[:, 2] n_splits = 3 cv = StratifiedKFold(n_splits=n_splits) scores = cross_val_multiscore(time_gen, X, y, cv=cv, verbose=True) want = (n_splits, ) if n_times > 1: want += (n_times, n_times) assert scores.shape == want assert_array_less(0.8, scores) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', inverse_transform=True) assert patterns.shape == (n_classes, n_channels, n_times) assert_allclose(patterns[:, 1:], 0., atol=1e-7) # no other channels useful
def LogisticRegression(X_train, y_train, X_test, y_test, scorer, predict_mode, params): " Logistic Regression within or across conditions " # Model model = linear_model.LogisticRegression(class_weight='balanced') # Cross-validation scheme cv = StratifiedKFold(n_splits=4, random_state=0, shuffle=True) # Scaler scaler = StandardScaler() # Pipeline clf = make_pipeline(scaler, model) # Define scorer if scorer is 'scorer_auc': scorer = 'roc_auc' elif scorer is 'accuracy': scorer = None else: print('using accuracy as the scorer') # Learning and scoring time_gen = GeneralizingEstimator(clf, n_jobs=2, scoring=scorer) scores = cross_val_multiscore(time_gen, X_train, y_train, cv=cv, n_jobs=2) return scores
def devp_estimator_gat(self, **kwargs): from mne.decoding import GeneralizingEstimator, LinearModel from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Ridge from sklearn.metrics import make_scorer from sklearn.model_selection import StratifiedKFold from mne.decoding import (SlidingEstimator, GeneralizingEstimator, Scaler, cross_val_multiscore, LinearModel, get_coef, Vectorizer, CSP) from jr.gat import scorer_spearman clf = make_pipeline(StandardScaler(), LinearModel(Ridge())) scorer = scorer_spearman kwargs = dict() gat = GeneralizingEstimator(clf, scoring=make_scorer(scorer), n_jobs=6, **kwargs) return gat
n_vertices = len(stcs[0].data) n_epochs = len(epochs.events) X = np.zeros([n_epochs, n_vertices, n_times]) for jj, stc in enumerate(stcs): X[jj] = stc.data # Loop across each analysis for analysis in epoch_analyses: # define to-be-predicted values y = np.array(events[analysis]) # Define estimators depending on the analysis if ('cue_side' in analysis or 'cue_type' in analysis): clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict() clf = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs) le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) elif 'sfreq' in analysis[:14]: clf = make_pipeline(StandardScaler(), LinearModel(Ridge())) scorer = scorer_spearman kwargs = dict() clf = GeneralizingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) y = np.array(y, dtype=float) elif 'angle' in analysis[:14]: clf = make_pipeline(
IX_plur = IX2sentences[v + 1]['plural'] mean_activation[v + 1]['singular'] = np.mean(X[IX_sing, :, :], axis=0) mean_activation[v + 1]['plural'] = np.mean(X[IX_plur, :, :], axis=0) mean_activation[v + 1]['difference'] = np.mean( X[IX_sing, :, :], axis=0) - np.mean(X[IX_plur, :, :], axis=0) Y = np.zeros(num_trials) Y[IX_sing] = 1 Y[IX_plur] = 2 del IX_sing, IX_plur assert all(Y > 0) print(list(Y).count(1), list(Y).count(2)) # clf = make_pipeline(StandardScaler(), LinearSVC(class_weight='balanced')) clf = make_pipeline(LinearSVC(class_weight='balanced')) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=-1, verbose=True) cv = StratifiedKFold(n_splits=5, random_state=0, shuffle=True) #cv = StratifiedShuffleSplit(n_splits=20, random_state=0) scores = [] for i, (train, test) in enumerate(cv.split(X, Y)): time_gen.fit(X[train], Y[train]) scores.append(time_gen.score(X[test], Y[test])) # list (len=#cv-splits) of sublists (len=#timepoints): curr_weights_clf = np.asarray([ np.squeeze(w._final_estimator.coef_) for w in time_gen.estimators_ ]) weights_clf[v + 1]['splits'].append(curr_weights_clf) weights_clf[v + 1]['mean'] = np.mean(np.asarray(weights_clf[v +
scorer = make_scorer(get_scorer(scorer_spearman)) score = 'Spearman R' cv = KFold(5) if regressor == 'condition': y = binary_scaler(y) clf = make_pipeline(StandardScaler(), LogisticRegression()) scorer = 'roc_auc' score = 'AUC' cv = StratifiedKFold(5) n_jobs = -1 # set up estimator, get scores if decode_using == 'spatial': gen = GeneralizingEstimator(n_jobs=n_jobs, scoring=scorer, base_estimator=clf) scores = cross_val_multiscore(gen, X, y, cv=cv) elif decode_using == 'temporal': gen = SlidingEstimator(n_jobs=n_jobs, scoring=scorer, base_estimator=clf) scores = cross_val_multiscore(gen, X, y, cv=cv) else: # scoring defaults to neg mean squared so set to scorer # shuffle must be true when binary values, otherwise fold will only have # one value scores = cross_val_score(clf, X, y, scoring=scorer, #defaults to neg mean squared
typ='epoch_preprocessed', sbj=sbj, preload=True) sleep_epochs.event_id = sleep_event_id # event_id remapping. For wake this step works during preprocessing X1, y1 = get_Xy_balanced(wake_epochs, contrast1, n_sample=nsample) X2, y2 = get_Xy_balanced(sleep_epochs, contrast2, n_sample=nsample) X3, y3 = get_Xy_balanced(sleep_epochs, contrast3, n_sample=nsample) X4, y4 = get_Xy_balanced(sleep_epochs, contrast4, n_sample=nsample) X5, y5 = get_Xy_balanced(sleep_epochs, contrast5, n_sample=nsample) del wake_epochs del sleep_epochs clf = GeneralizingEstimator(make_pipeline( StandardScaler(), LogisticRegression(max_iter=4000)), scoring='accuracy', n_jobs=6) # clf = GeneralizingEstimator(make_pipeline(StandardScaler(), SVC(kernel='linear')), # scoring='accuracy', n_jobs=6) cv = StratifiedKFold(n_splits=2, shuffle=True) scores1, scores2, scores3, scores4, scores5 = [[] for i in range(5)] for train_idx, test_idx in cv.split(X1, y1): clf.fit(X1[train_idx], y=y1[train_idx]) scores1.append(clf.score(X1[test_idx], y=y1[test_idx])) scores2.append(clf.score(X2, y=y2)) scores3.append(clf.score(X3, y=y3)) scores4.append(clf.score(X4, y=y4)) scores5.append(clf.score(X5, y=y5))
le = LabelEncoder() le.fit(y) y = le.transform(y) sel = np.where(y != 0)[0] y = y[sel] X = epochs._data[sel] le = LabelEncoder() le.fit(y_con) y_con = le.transform(y_con) sel = np.where(y_con != 0)[0] y_con = y_con[sel] X_con = epochs_con._data[sel] # Define estimators depending on the analysis clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict() est = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs) # Run decoding cv = StratifiedKFold(12) scores = list() scores_con = list() for train, test in cv.split(X, y): est.fit(X[train], y[train]) # train during WM task score = est.score(X[test], y[test]) # test during WM task score_con = est.score(X_con, y_con) # test during control task scores.append(score) scores_con.append(score_con) scores = np.mean(scores, axis=0) scores_con = np.mean(scores_con, axis=0) # save cross-validated scores fname = results_folder +\ '%s_scores_%s.npy' % (subject, analysis)
cross_val_multiscore, LinearModel, get_coef, Vectorizer, CSP) from pyitab.ext.sklearn._validation import cross_validate from collections import Counter from imblearn.under_sampling import RandomUnderSampler import h5py import hdf5storage import numpy as np from scipy.io import loadmat, savemat clf = make_pipeline(Normalizer(), # z-score normalization SelectKBest(f_classif, k=50), # select features for speed LinearModel(LogisticRegression(C=1, solver='liblinear'))) #time_decod = SlidingEstimator(clf, scoring='accuracy') time_decod = GeneralizingEstimator(clf, scoring='accuracy') shared = "/run/user/1000/gvfs/smb-share:server=192.168.30.54,share=meg_data_analisi/HCP_Motor_Task_analysis/109123/" scores_ses = [] decoders = [] bigdata = [] for f in os.listdir(shared): fname = os.path.join(shared, f) mat = h5py.File(fname) data = mat['powerbox'][:] data /= np.nanmean(data) data = np.float32(data.swapaxes(1, 2))
def main(): model_type = "lda" exp_name = "freq_gen_matrix/" for i, sample in enumerate(range(1, 22)): print("sample {}".format(sample)) if not os.path.isdir("Results/{}/{}/sample_{}".format( model_type, exp_name, sample)): os.mkdir("Results/{}/{}/sample_{}".format(model_type, exp_name, sample)) epochs = get_epochs(sample, scale=False) y_train = epochs.events[:, 2] freqs = np.logspace(*np.log10([2, 25]), num=15) n_cycles = freqs / 4. string_freqs = [round(x, 2) for x in freqs] print("applying morlet wavelet") wavelet_output = tfr_array_morlet(epochs.get_data(), sfreq=epochs.info['sfreq'], freqs=freqs, n_cycles=n_cycles, output='complex') time_results = np.zeros( (wavelet_output.shape[3], len(freqs), len(freqs))) for time in range(wavelet_output.shape[3]): print("time: {}".format(time)) wavelet_epochs = wavelet_output[:, :, :, time] wavelet_epochs = np.append(wavelet_epochs.real, wavelet_epochs.imag, axis=1) wavelet_info = mne.create_info(ch_names=wavelet_epochs.shape[1], sfreq=epochs.info['sfreq'], ch_types='mag') wavelet_epochs = mne.EpochsArray(wavelet_epochs, info=wavelet_info, events=epochs.events) x_train = pca(80, wavelet_epochs, plot=False) model = LinearModel( LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')) freq_gen = GeneralizingEstimator(model, n_jobs=1, scoring='accuracy', verbose=True) scores = cross_val_multiscore(freq_gen, x_train, y_train, cv=5, n_jobs=1) scores = np.mean(scores, axis=0) time_results[time] = scores sns.set() ax = sns.barplot( np.sort(string_freqs), np.diag(scores), ) ax.set(ylim=(0, 0.8), xlabel='Frequencies', ylabel='Accuracy', title='Cross Val Accuracy {} for Subject {} for Time {}'. format(model_type, sample, time)) ax.axhline(0.12, color='k', linestyle='--') ax.figure.set_size_inches(8, 6) ax.figure.savefig( "Results/{}/{}/sample_{}/time_{}_accuracy.png".format( model_type, exp_name, sample, time), dpi=300) plt.close('all') # plt.show() fig, ax = plt.subplots(1, 1) im = ax.imshow(scores, interpolation='lanczos', origin='lower', cmap='RdBu_r', extent=[2, 25, 2, 25], vmin=0., vmax=0.8) ax.set_xlabel('Testing Frequency (hz)') ax.set_ylabel('Training Frequency (hz)') ax.set_title( 'Frequency generalization for Subject {} at Time {}'.format( sample, time)) plt.colorbar(im, ax=ax) ax.grid(False) ax.figure.savefig( "Results/{}/{}/sample_{}/time_{}_matrix.png".format( model_type, exp_name, sample, time), dpi=300) plt.close('all') # plt.show() time_results = time_results.reshape(time_results.shape[0], -1) all_results_df = pd.DataFrame(time_results) all_results_df.to_csv( "Results/{}/{}/sample_{}/all_time_matrix_results.csv".format( model_type, exp_name, sample))
def decode(epochs, get_y_label_func, epoch_filter=None, decoding_method='standard', sliding_window_size=None, sliding_window_step=None, n_jobs=multiprocessing.cpu_count(), equalize_event_counts=True, only_fit=False, generalize_across_time=True): """ Basic flow for decoding """ config = dict(equalize_event_counts=equalize_event_counts, only_fit=only_fit, sliding_window_size=sliding_window_size, sliding_window_step=sliding_window_step, decoding_method=decoding_method, generalize_across_time=generalize_across_time, epoch_filter=str(epoch_filter)) if epoch_filter is not None: epochs = epochs[epoch_filter] #-- Classify epochs into groups (training epochs) y_labels = get_y_label_func(epochs) if equalize_event_counts: epochs.events[:, 2] = y_labels epochs.event_id = {str(label): label for label in np.unique(y_labels)} min_n_items_per_y_label = min( [len(epochs[cond]) for cond in epochs.event_id.keys()]) print("\nEqualizing the number of epochs to %d per condition..." % min_n_items_per_y_label) epochs.equalize_event_counts(epochs.event_id.keys()) y_labels = epochs.events[:, 2] print("The epochs were classified into %d groups:" % len(set(y_labels))) for g in set(y_labels): print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g))) #-- Create the decoding pipeline print("Creating the classification pipeline...") epochs_data = epochs.get_data() preprocess_pipeline = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: clf = make_pipeline(StandardScaler(), Ridge()) else: clf = make_pipeline( StandardScaler(), svm.SVC(C=1, kernel='linear', class_weight='balanced')) if 'raw' not in decoding_method: assert sliding_window_size is not None assert sliding_window_step is not None preprocess_pipeline = \ make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True)) elif decoding_method == 'ERP_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(20), average=False), ERPCovariances( estimator='lwf'), # todo how to apply sliding window? CSP(30, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) # todo why logistic regression? elif decoding_method == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) elif decoding_method == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) else: raise Exception('Unknown decoding method: {:}'.format(decoding_method)) print('\nDecoding pipeline:') for i in range(len(clf.steps)): print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1])) if preprocess_pipeline is not None: print('\nApplying the pre-processing pipeline:') for i in range(len(preprocess_pipeline.steps)): print('Step #{:}: {:}'.format(i + 1, preprocess_pipeline.steps[i][1])) epochs_data = preprocess_pipeline.fit_transform(epochs_data) if only_fit: #-- Only fit the decoders procedure = 'only_fit' scores = None cv = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = clf estimator.fit(X=epochs_data, y=y_labels) else: #-- Classify & score -- cross-validation procedure = 'fit_and_score' print( "\nCreating a classifier and calculating accuracy scores (this may take some time)..." ) cv = StratifiedKFold(n_splits=5) if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) scores = cross_val_multiscore(estimator=estimator, X=epochs_data, y=np.array(y_labels), cv=cv) else: scores = _run_cross_validation(X=epochs_data, y=np.array(y_labels), clf=clf, cv=cv) estimator = 'None' # Estimator is not defined in the case of Riemannian decoding times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2]) return dict(procedure=procedure, estimator=estimator, scores=scores, pipeline=clf, preprocess=preprocess_pipeline, cv=cv, times=times, config=config)
# Keep only eye tracker signal (x and y eye position) epochs.pick_channels(['UADC009-2104', 'UADC010-2104']) for analysis in epoch_analyses: fname = results_folder +\ '%s_scores_%s_%s.npy' % (subject, epoch_type, analysis) # define to-be-predicted values y = np.array(events[analysis]) # Define estimators depending on the analysis if 'angle' in analysis[:14]: clf = AngularRegression(make_pipeline(StandardScaler(), LinearModel(Ridge())), independent=False) scorer = scorer_angle kwargs = dict() gat = GeneralizingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) y = np.array(y, dtype=float) elif 'sfreq' in analysis[:14]: clf = make_pipeline(StandardScaler(), LinearModel(Ridge())) scorer = scorer_spearman kwargs = dict() gat = GeneralizingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) y = np.array(y, dtype=float) elif ('cue_side' in analysis or 'cue_type' in analysis): clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict()
epochs.equalize_event_counts(epochs.event_id) # Extract half of the epochs for similar SNR in all conditions epochs_nr = len(epochs['left/grating']) epochs_range = np.random.permutation(np.arange(0, epochs_nr, 1)) np.save('%s_epochs_range.npy' % subject, epochs_range) X = np.concatenate( (epochs["face"][epochs_range[:int(epochs_nr / 2)]].get_data(), epochs["grating"][epochs_range[:int(epochs_nr / 2)]].get_data())) y = np.concatenate( (np.zeros(int(epochs_nr / 2)), np.ones(int(epochs_nr / 2)))) cv = StratifiedKFold(n_splits=10, shuffle=True) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=n_jobs) time_gen.fit(X, y) scores = cross_val_multiscore(time_gen, X, y, cv=cv) # Save results joblib.dump(time_gen, "%s_time_gen_svm.jbl" % subject) np.save("%s_time_gen_score_svm.npy" % subject, scores) X_left = np.concatenate( (epochs["left/face"].get_data(), epochs["left/grating"].get_data())) y_left = np.concatenate((np.zeros(len(epochs["left/face"].get_data())), np.ones(len(epochs["left/grating"].get_data())))) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen_left = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=n_jobs)
tmax=tmax, preload=True, baseline=(None, 0), decim=10) epochs.pick_types(meg=True, ref_meg=False) # Loop across analysis for analysis in analyses: fname = results_folder +\ '%s_scores_%s_%s.npy' % (subject, 'Cue', analysis) # define to-be-predicted values y = np.array(events_behavior[analysis]) clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict() le = LabelEncoder() le.fit(y) y = le.transform(y) sel = np.where(y != 0)[0] # Run decoding cv = StratifiedKFold(12) scores = list() X = epochs._data gat = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs) for train, test in cv.split(X[sel], y[sel]): gat.fit(X[sel][train], y[sel][train]) score = gat.score(X[sel][test], y[sel][test]) scores.append(score) scores = np.mean(scores, axis=0) # keep scores np.save(fname, np.array(scores))
joint_kwargs = dict(ts_args=dict(time_unit='s'), topomap_args=dict(time_unit='s')) evoked.plot_joint(times=np.arange(0., .500, .100), title='patterns', **joint_kwargs) ############################################################################### # Temporal Generalization # ----------------------- # # This runs the analysis used in [1]_ and further detailed in [2]_ # # The idea is to fit the models on each time instant and see how it # generalizes to any other time point. # define the Temporal Generalization object time_gen = GeneralizingEstimator(clf, n_jobs=1, scoring='roc_auc') scores = cross_val_multiscore(time_gen, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # Plot the diagonal (it's exactly the same as the time-by-time decoding above) fig, ax = plt.subplots() ax.plot(epochs.times, np.diag(scores), label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Decoding MEG sensors over time')
def run_gat(subj, decoder="ridge", n_jobs=2): """ Function to run Generalization Across Time (GAT). Parameters ---------- subj: int decoder: str Specify type of classifier -'ridge' for Ridge Regression (default), 'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression n_jobs: int The number of jobs to run in parallel. """ # load cue A and cue B epochs epochs = get_epochs(subj)['Correct A', 'Correct B'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } # get data and targets data = epochs.get_data() labels = epochs.events[:, -1] # create classifier pipeline clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=n_jobs) # compute cross validated performance scores scores = cross_val_multiscore(gen_clf, data, labels, cv=5, n_jobs=n_jobs).mean(0) # calculate prediction confidence scores cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) preds = np.empty((len(labels), data.shape[2], data.shape[2])) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d # compute topographical patterns dat = Vectorizer().fit_transform(data) clf.fit(dat, labels) dat = dat - dat.mean(0, keepdims=True) # look for the type of classifier and get the weights if decoder == 'ridge': filt_ = clf.named_steps.ridgeclassifier.coef_.copy() elif decoder == 'svm': filt_ = clf.named_steps.svc.coef_.copy() elif decoder == 'log_reg': filt_ = clf.named_steps.logisticregression.coef_.copy() # Compute patterns using Haufe's trick: A = Cov_X . W . Precision_Y # cf.Haufe, et al., 2014, NeuroImage, # doi:10.1016/j.neuroimage.2013.10.067) inv_y = 1. patt_ = np.cov(dat.T).dot(filt_.T.dot(inv_y)).T # store the patterns accordingly if decoder == 'ridge': clf.named_steps.ridgeclassifier.patterns_ = patt_ elif decoder == 'svm': clf.named_steps.svc.patterns_ = patt_ elif decoder == 'log_reg': clf.named_steps.logisticregression.patterns_ = patt_ # back transform using steps in pipeline patterns = get_coef(clf, 'patterns_', inverse_transform=True) # return subject scores, prediction confidence and topographical patterns return scores, preds, patterns
mean_test2 = [] ind_trial = 0 while ind_trial <= len(data_test2) - 5: mean_test2.append(mean(data_test2[ind_trial:(ind_trial + 4)], 0)) print ind_trial ind_trial += 5 #--------------------------- # define decoding pipeline and run #--------------------------- # Use AUC because chance level is same regardless of the class balance clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) time_gen = GeneralizingEstimator(clf, n_jobs=1, scoring='roc_auc') # # We will train the classifier on all stim face vs house trials # and test on all images face vs house trials. #le = LabelEncoder() # train on stim time_gen.fit(X=np.array(mean_train1 + mean_train2), y=np.array([0] * len(mean_train1) + [1] * len(mean_train2))) # score on imagery scores = time_gen.score(X=np.array(mean_test1 + mean_test2), y=np.array([0] * len(mean_test1) +
['target_angle_cue_right_angle', 'right_angle'], ['left_angle', 'target_angle_cue_left_angle'], ['right_angle', 'target_angle_cue_right_angle']] # Loop across each pair of analyses for paired_analysis in paired_analyses: y_test = np.array(events[paired_analysis[0]]) y_train = np.array(events[paired_analysis[1]]) # Define estimators depending on the analysis if 'angle' in paired_analysis[0][:14]: clf = make_pipeline( StandardScaler(), LinearModel(AngularRegression(Ridge(), independent=False))) scorer = scorer_angle kwargs = dict() gat = GeneralizingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) y_test = np.array(y_test, dtype=float) y_train = np.array(y_train, dtype=float) elif 'sfreq' in paired_analysis[0][:14]: clf = make_pipeline(StandardScaler(), LinearModel(Ridge())) scorer = scorer_spearman kwargs = dict() gat = GeneralizingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) y_test = np.array(y_test, dtype=float) y_train = np.array(y_train, dtype=float) # only consider trials with correct fixation sel = np.where(events['is_eye_fixed'] == 1)[0]
epochs = mne.read_epochs(os.path.join(data_path, '%s-epo.fif' % subject), preload=True) epochs.interpolate_bads(reset_bads=True) all_epochs.append(epochs) epochs = mne.concatenate_epochs(all_epochs) decim = 2 epochs.decimate(decim) # We will train the classifier on all stim face vs house trials # and test on all images face vs house trials. clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=6) le = LabelEncoder() # train on stim time_gen.fit(X=epochs['stim'].get_data(), y=le.fit_transform(epochs['stim'].events[:, 2])) # score on imagery scores = time_gen.score(X=epochs['imag'].get_data(), y=le.fit_transform(epochs['imag'].events[:, 2])) # Plot fig, ax = plt.subplots(1) im = ax.matshow(scores, vmin=0,
# generate iterator for cross validation kf = StratifiedKFold(n_splits=k_folds, shuffle=True) cv_iter = kf.split(np.zeros(X.shape), labels) # pipeline for classification cl = make_pipeline( RobustScaler(), PCA(n_components=var_exp), LinearSVC(max_iter=10000, dual=False, penalty="l1") ) # temporal generalisation temp_genr = GeneralizingEstimator( cl, n_jobs=1, scoring="roc_auc" ) # cross validation scores = cross_val_multiscore(temp_genr, X, labels, cv=cv_iter, n_jobs=-1) scores_all.append(scores) scores_all = np.vstack(scores_all) scores_path = op.join( output_dir, "reg_vs_odd_svm-{}.npy".format(subject) ) np.save(scores_path, scores_all)
# generate iterator for cross validation kf = StratifiedKFold(n_splits=k_folds, shuffle=True) cv_iter = kf.split(np.zeros(X.shape), labels) # pipeline for classification cl = make_pipeline( RobustScaler(), PCA(n_components=var_exp), LinearSVC(max_iter=10000, dual=False, penalty="l1") ) # temporal generalisation temp_genr = GeneralizingEstimator( cl, n_jobs=1, scoring=make_scorer(accuracy_score) ) # cross validation scores = cross_val_multiscore(temp_genr, X, labels, cv=cv_iter, n_jobs=-1) scores_all = [] scores_all.append(scores) scores_all = np.vstack(scores_all) scores_path = op.join( output_dir, "reg_vs_odd_svm_balanced-{}.npy".format(subject) )
raw.filter(1., 30., fir_design='firwin') # Band pass filtering signals events = mne.read_events(events_fname) event_id = {'Auditory/Left': 1, 'Auditory/Right': 2, 'Visual/Left': 3, 'Visual/Right': 4} tmin = -0.050 tmax = 0.400 decim = 2 # decimate to make the example faster to run epochs = mne.Epochs(raw, events, event_id=event_id, tmin=tmin, tmax=tmax, proj=True, picks=picks, baseline=None, preload=True, reject=dict(mag=5e-12), decim=decim) ############################################################################### # We will train the classifier on all left visual vs auditory trials # and test on all right visual vs auditory trials. clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=1, verbose=True) # Fit classifiers on the epochs where the stimulus was presented to the left. # Note that the experimental condition y indicates auditory or visual time_gen.fit(X=epochs['Left'].get_data(), y=epochs['Left'].events[:, 2] > 2) ############################################################################### # Score on the epochs where the stimulus was presented to the right. scores = time_gen.score(X=epochs['Right'].get_data(), y=epochs['Right'].events[:, 2] > 2) ############################################################################### # Plot fig, ax = plt.subplots(1) im = ax.matshow(scores, vmin=0, vmax=1., cmap='RdBu_r', origin='lower',
clf.fit(X, y) # Calculate scores for classification sl = SlidingEstimator(clf) scores_time_decoding = cross_val_multiscore(sl, X, y) # Append the results for each subject if file == './epochs/101_base-epo.fif': scores_td_base = scores_time_decoding else: scores_td_base = np.append(scores_td_base, scores_time_decoding, axis=0) # Again, calculate scores with a receiver operating curve gen = GeneralizingEstimator(clf, scoring='roc_auc') scores_gat = cross_val_multiscore(gen, X, y) if file == './epochs/101_base-epo.fif': scores_gat_base = scores_gat else: scores_gat_base = np.append(scores_gat_base, scores_gat, axis=0) for file in glob.glob(os.path.join(path, '*reg-epo.fif')): epochs_reg = mne.read_epochs(file, preload=True) epochs_base.resample(256) epochs_base.crop(tmin=-0.25, tmax=epochs_base.tmax) epochs_reg_eq = epochs_reg.copy().equalize_event_counts(['A', 'B'])[0]
tmin=tmin, tmax=tmax, proj=True, picks=picks, baseline=None, preload=True, reject=dict(mag=5e-12), decim=decim, verbose='error') ############################################################################### # We will train the classifier on all left visual vs auditory trials # and test on all right visual vs auditory trials. clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs')) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=1, verbose=True) # Fit classifiers on the epochs where the stimulus was presented to the left. # Note that the experimental condition y indicates auditory or visual time_gen.fit(X=epochs['Left'].get_data(), y=epochs['Left'].events[:, 2] > 2) ############################################################################### # Score on the epochs where the stimulus was presented to the right. scores = time_gen.score(X=epochs['Right'].get_data(), y=epochs['Right'].events[:, 2] > 2) ############################################################################### # Plot fig, ax = plt.subplots(1) im = ax.matshow(scores,
path = '/home/carlos/mount/megmri03/monks' subjects = os.listdir(path) subjects = [s for s in subjects if s.find('.') == -1 and s.find('_') == -1] # Load monk data in the form of n_samples x n_voxels x n_time ds, _, _ = load_subject_ds( path, subjects[:1], #os.path.join(path, 'subjects.csv'), 'meditation_permut1.conf', 'fmri', prepro=MonksPreprocessingPipeline(), roi_labels=atlas_dict) clf = make_pipeline(StandardScaler(), LinearSVC(C=1)) time_gen = GeneralizingEstimator(clf, scoring='accuracy', n_jobs=20) ds = SampleSlicer({'group': ['E']}).transform(ds) scores_dict = {} # Generalization of time for network in os.listdir(path_templates): network = network[:-21] ds_network = FeatureSlicer({network: ['!0']}).transform(ds) n_samples, n_voxels = ds_network.shape data = ds_network.samples.reshape(-1, 135, n_voxels) X = np.rollaxis(data, 1, 3) y = np.arange(data.shape[0]) % 2
evoked.plot_joint(times=np.arange(0., .500, .100), title='patterns', **joint_kwargs) ############################################################################### # Temporal Generalization # ----------------------- # # This runs the analysis used in [1]_ and further detailed in [2]_ # # The idea is to fit the models on each time instant and see how it # generalizes to any other time point. # define the Temporal Generalization object time_gen = GeneralizingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True) scores = cross_val_multiscore(time_gen, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # Plot the diagonal (it's exactly the same as the time-by-time decoding above) fig, ax = plt.subplots() ax.plot(epochs.times, np.diag(scores), label='score') ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') ax.legend() ax.axvline(.0, color='k', linestyle='-')
# %% for idx in range(1, 11): # Loading data ------------------------------------------ running_name = f'MEG_S{idx:02d}' band_name = 'U07' worker = MEG_Worker(running_name=running_name) worker.pipeline(band_name=band_name) # MVPA ---------------------------------------------------------------- # Prepare classifiers _svm = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced') clf = make_pipeline(StandardScaler(), _svm) estimator = GeneralizingEstimator(clf, n_jobs=n_jobs, scoring='f1', verbose=1) # Prepare paired X and y # Get X and y for class 1 X1, y1 = pair_X_y(worker.clean_epochs, 1) # Get X and y for class 2 X2, y2 = pair_X_y(worker.denoise_epochs['2'], 2) # Concatenate X and y X_all = np.concatenate([X1, X2], axis=0) y_all = np.concatenate([y1, y2], axis=0) # Get time line times = worker.clean_epochs.times