def get_pattern(X, y, clf, time_point=None): """Get pattern from classifier on X and y at peak time. Re-fit the classifier without cross-validation and get the patterns/ coefficients. Parameters: ----------- X : array The data to fit (features). y : vector The response vector. clf : sklearn classifier object The classifier to re-fit. time-point : float | None If data is more than two dimensional: which time-point to fit. Returns: -------- pattern : array The sensor or source pattern of coefficients. """ if time_point is not None: X = X[:, :, time_point] clf.fit(X, y) if clf.steps[-1][0] == 'logisticregression': pattern = get_coef(clf, 'coefs_', inverse_transform=True) else: pattern = get_coef(clf, 'patterns_', inverse_transform=True) return pattern
def logreg_timedecoding(epochs, numcv=4, jobs=1): """ Logistic regression over sensors. Returns Evoked array containing coefficients and ROC. Code snippets stolen from: https://martinos.org/mne/stable/auto_tutorials/plot_sensors_decoding.html """ X = epochs.get_data() # MEG signals: n_epochs, n_channels, n_times X = X.astype(float) y = epochs.events[:, 2] # targets # setup and run the decoder clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) time_decod = SlidingEstimator(clf, scoring='roc_auc', n_jobs=jobs) #scoring='roc_auc', scores = cross_val_multiscore(time_decod, X, y, cv=numcv, n_jobs=jobs) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) # time_decod = SlidingEstimator(clf, scoring='roc_auc', n_jobs=jobs) time_decod.fit(X, y) coef = get_coef(time_decod, 'patterns_', inverse_transform=True) evoked = mne.EvokedArray(coef, epochs.info, tmin=epochs.times[0]) evoked.roc_auc = scores return evoked
# 4.2) vectorise channel data for linear regression # data to be analysed dat = cues[subj].get_data() dat = dat[:, :, times_to_use] Y = Vectorizer().fit_transform(dat) # 4.3) fit linear model with sklearn's LinearRegression weights = compute_sample_weight(class_weight='balanced', y=metadata.cue.to_numpy()) linear_model = LinearRegression(n_jobs=n_jobs, fit_intercept=True) linear_model.fit(design, Y, sample_weight=weights) # 4.4) extract the resulting coefficients (i.e., betas) # extract betas coefs = get_coef(linear_model, 'coef_') inter = linear_model.intercept_ # 4.5) extract model r_squared r2 = r2_score(Y, linear_model.predict(design), multioutput='raw_values') # save model R-squared r_squared[subj_ind, :] = r2 # save results for pred_i, predictor in enumerate(design.columns): print(pred_i, predictor) if 'cue' in predictor: # extract cue beats betas[subj_ind, :] = coefs[:, pred_i] # elif 'Intercept' in predictor: # continue
ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding') plt.show() ############################################################################### # You can retrieve the spatial filters and spatial patterns if you explicitly # use a LinearModel clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True) time_decod.fit(X, y) coef = get_coef(time_decod, 'patterns_', inverse_transform=True) evoked = mne.EvokedArray(coef, epochs.info, tmin=epochs.times[0]) joint_kwargs = dict(ts_args=dict(time_unit='s'), topomap_args=dict(time_unit='s')) evoked.plot_joint(times=np.arange(0., .500, .100), title='patterns', **joint_kwargs) ############################################################################### # Temporal Generalization # ----------------------- # # This runs the analysis used in [1]_ and further detailed in [2]_ # # The idea is to fit the models on each time instant and see how it # generalizes to any other time point.
tmin = i * 1.0 tmax = (i + 1) * 1.0 #Create :class:'Epochs <mne.Epochs>' object epochs = mne.Epochs(raw, events=events, event_id=event_id, tmin=tmin, tmax=tmax, baseline=None, verbose=True, preload=True) for i in range(0, len(epochs.events)): if i % 2 == 0: epochs.events[i, 2] = 3 #epochs.plot(scalings = 'auto',block = True,n_epochs=10) X = epochs.pick_types(meg=False, eeg=True) y = epochs.events[:, -1] # Define a unique pipeline to sequentially: clf = make_pipeline( Vectorizer(), # 1) vectorize across time and channels StandardScaler(), # 2) normalize features across trials LinearModel(LogisticRegression())) # 3) fits a logistic regression clf.fit(X, y) coef = get_coef(clf, 'patterns_', inverse_transform=True) evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin) fig = evoked.plot_topomap(title='EEG Patterns', size=3, show=False) fig.savefig(title + "_ti_" + str(tmin) + "_tf_" + str(tmax) + '.png')
# The data was vectorized to fit a single model across all time points and # all channels. We thus reshape it: coef = coef.reshape(len(meg_epochs.ch_names), -1) # Plot evoked = EvokedArray(coef, meg_epochs.info, tmin=epochs.tmin) evoked.plot_topomap(title='MEG %s' % name, time_unit='s') ############################################################################### # Let's do the same on EEG data using a scikit-learn pipeline X = epochs.pick_types(meg=False, eeg=True) y = epochs.events[:, 2] # Define a unique pipeline to sequentially: clf = make_pipeline( Vectorizer(), # 1) vectorize across time and channels StandardScaler(), # 2) normalize features across trials LinearModel( LogisticRegression(solver='lbfgs'))) # 3) fits a logistic regression clf.fit(X, y) # Extract and plot patterns and filters for name in ('patterns_', 'filters_'): # The `inverse_transform` parameter will call this method on any estimator # contained in the pipeline, in reverse order. coef = get_coef(clf, name, inverse_transform=True) evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin) evoked.plot_topomap(title='EEG %s' % name[:-1], time_unit='s')
random = np.random.RandomState(random_state) # number of random samples boot = 2000 # create empty array for saving the bootstrap samples boot_betas = np.zeros((boot, Y.shape[1], len(predictors))) # run bootstrap for regression coefficients for i in range(boot): # extract random epochs from data resamples = random.choice(range(n_epochs), n_epochs, replace=True) # set up model and fit model model = LinearRegression(fit_intercept=False) model.fit(X=design.iloc[resamples], y=Y[resamples, :]) # extract regression coefficients boot_betas[i, :, :] = get_coef(model, 'coef_') # delete the previously fitted model del model ############################################################################### # compute lower and upper boundaries of confidence interval based on # distribution of bootstrap betas. lower, upper = np.quantile(boot_betas, [.025, .975], axis=0) ############################################################################### # fit linear regression model to original data and store the results in # MNE's evoked format for convenience # set up linear model linear_model = LinearRegression(fit_intercept=False) # fit model
sleep_epochs = myload(sleep_path_data, typ='epoch_preprocessed', sbj=sbj, preload=True) # SLEEP! sleep_epochs.event_id = sleep_event_id # event_id remapping. For wake this step works during preprocessing # SLEEP ! sleep_epochs = sleep_epochs.crop(tmin=tmin, tmax=tmax) X1, y1 = get_Xy_balanced(sleep_epochs, contrast1) clf = make_pipeline(Vectorizer(), StandardScaler(), LinearModel(LogisticRegression(max_iter = 4000))) #StandardScaler(), cv = StratifiedKFold(n_splits=2, shuffle=True) coef_folds = [] for train_idx, test_idx in cv.split(X1, y1): clf.fit(X1[train_idx], y=y1[train_idx]) #scores1.append(clf.score(X1[test_idx], y=y1[test_idx])) coef_folds.append(get_coef(clf, attr='patterns_', inverse_transform=True)) coef = np.asarray(coef_folds).mean(0).reshape([173, -1]) #mean folds and reshape evoked = EvokedArray(coef, sleep_epochs.info, tmin=tmin) evokeds.append(evoked) ga = mne.grand_average(evokeds) #SLEEP f = ga.plot_topomap([0., 0.2, 0.4, 0.6, 0.8], scalings=0.1, vmin=-2, vmax=2) #f = ga.plot_topomap([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], scalings=0.1, vmin=-6, vmax=6) #f = ga.plot_topomap([0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5], scalings=0.1, vmin=-6, vmax=6) f.savefig(os.path.join(save_path, contrast1[0].split('/')[1] + 'coefs_new_detail.tiff')) #WAKE # f = ga.plot_topomap([0., 0.2, 0.4, 0.6, 0.8], scalings=0.1, vmin=-2, vmax=2)
ax.axhline(.5, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding') ############################################################################### # You can retrieve the spatial filters and spatial patterns if you explicitly # use a LinearModel clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression(solver='lbfgs'))) time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True) time_decod.fit(X, y) coef = get_coef(time_decod, 'patterns_', inverse_transform=True) evoked = mne.EvokedArray(coef, epochs.info, tmin=epochs.times[0]) joint_kwargs = dict(ts_args=dict(time_unit='s'), topomap_args=dict(time_unit='s')) evoked.plot_joint(times=np.arange(0., .500, .100), title='patterns', **joint_kwargs) ############################################################################### # Temporal generalization # ^^^^^^^^^^^^^^^^^^^^^^^ # # Temporal generalization is an extension of the decoding over time approach. # It consists in evaluating whether the model estimated at a particular # time instant accurately predicts any other time instant. It is analogous to # transferring a trained model to a distinct learning problem, where the # problems correspond to decoding the patterns of brain activity recorded at
coef = scaler.inverse_transform([coef])[0] # The data was vectorized to fit a single model across all time points and # all channels. We thus reshape it: coef = coef.reshape(len(meg_epochs.ch_names), -1) # Plot evoked = EvokedArray(coef, meg_epochs.info, tmin=epochs.tmin) evoked.plot_topomap(title='MEG %s' % name) ############################################################################### # Let's do the same on EEG data using a scikit-learn pipeline X = epochs.pick_types(meg=False, eeg=True) y = epochs.events[:, 2] # Define a unique pipeline to sequentially: clf = make_pipeline( Vectorizer(), # 1) vectorize across time and channels StandardScaler(), # 2) normalize features across trials LinearModel(LogisticRegression())) # 3) fits a logistic regression clf.fit(X, y) # Extract and plot patterns and filters for name in ('patterns_', 'filters_'): # The `inverse_transform` parameter will call this method on any estimator # contained in the pipeline, in reverse order. coef = get_coef(clf, name, inverse_transform=True) evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin) evoked.plot_topomap(title='EEG %s' % name[:-1])
# *** 2.1) create bootstrap sample *** # extract random subjects from overall sample resampled_subjects = random.choice(range(betas_cue[0, ...].shape[0]), betas_cue[0, ...].shape[0], replace=True) # resampled betas resampled_betas = betas_cue[0, ...][resampled_subjects, :] # *** 2.2) estimate effect of moderator (i.e., PBI) on group-level *** # set up and fit moderator (i.e., PBI) model using bootstrap sample model_boot = LinearRegression(fit_intercept=False) model_boot.fit(X=group.iloc[resampled_subjects], y=resampled_betas) # extract regression coefficients group_coefs = get_coef(model_boot, 'coef_') # save bootstrap betas for pred_i, predictor in enumerate(group.columns): if 'pbi_rt' in predictor: # store regression coefficient for moderator (i.e., PBI) group_pp_betas[i, :] = group_coefs[:, pred_i] # remove prev object del resampled_betas # *** 2.3) compute test statistic for bootstrap sample *** # compute standard error resampled_betas = betas_null[resampled_subjects, :] se = resampled_betas.std(axis=0) / np.sqrt(resampled_betas.shape[0])
else: sel = np.where(~np.isnan(y))[0] td = SlidingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) # run decoding cv = StratifiedKFold(8) scores = list() patterns = list() filters = list() for train, test in cv.split(X[sel], y[sel]): td.fit(X[sel][train], y[sel][train]) score = td.score(X[sel][test], y[sel][test]) scores.append(score) patterns.append(get_coef(td, 'patterns_', inverse_transform=True)) filters.append(get_coef(td, 'filters_', inverse_transform=True)) scores = np.mean(scores, axis=0) patterns = np.mean(patterns, axis=0) filters = np.mean(filters, axis=0) if 'angle' in analysis: patterns = np.mean(np.abs(patterns), axis=1) filters = np.mean(np.abs(filters), axis=1) scores = np.reshape(scores, (n_freqs, n_times)) patterns = np.reshape(patterns, (n_channels, n_freqs, n_times)) filters = np.reshape(filters, (n_channels, n_freqs, n_times)) # save cross-validated scores fname = results_folder +\ '%s_tf_scores_%s_%s.npy' % (subject, epoch_type, analysis) np.save(fname, np.array(scores)) fname = results_folder +\
# customized the temporal decoding process # define 10-fold cross validation cv = StratifiedShuffleSplit(n_splits=10, random_state=12345) coefs = [] scores = [] for time_ in tqdm(range(data.shape[-1]), desc='temporal decoding'): coef = [] scores_ = [] # at each time point, we use the frequency information in each channel as the features for train, test in cv.split(data, labels): data_ = data[train, :, :, time_] clf = make_clf(True, True) clf.fit(data_, labels[train]) # print(time_,metrics.classification_report(clf.predict(data[test,:,:,time_]),labels[test])) # get the patterns decoded by the classifier coef_ = get_coef(clf, 'patterns_', True) coef.append(coef_) temp = metrics.roc_auc_score( labels[test], clf.predict_proba(data[test, :, :, time_])[:, -1]) scores_.append(temp) print('\n', '%d' % time_, 'auc = ', np.mean(scores_), '\n') coefs.append(np.array(coef)) scores.append(scores_) coefs = np.array(coefs) scores = np.array(scores) # get info object to plot the "pattern" temp_epochs = mne.read_epochs(working_dir + 'sub5_d2-eventsRelated-epo.fif') info = temp_epochs.info import pickle pickle.dump([scores, info, coefs], open(saving_dir + 'score_info_coefs.p',
# --- 2) vectorize (eeg-channel) data for linear regression analysis --- # data to be analysed data = subject.get_data() # vectorize data across channels Y = Vectorizer().fit_transform(data) # --- 3) fit linear model with sklearn's LinearRegression --- # we already have an intercept column in the design matrix, # thus we'll call LinearRegression with fit_intercept=False linear_model = LinearRegression(fit_intercept=False) linear_model.fit(design, Y) # --- 4) extract the resulting coefficients (i.e., betas) --- # extract betas coefs = get_coef(linear_model, 'coef_') # only keep relevant predictor betas[iteration, :] = coefs[:, pred_col] # calculate coefficient of determination (r-squared) r_squared[iteration, :] = r2_score(Y, linear_model.predict(design), multioutput='raw_values') # clean up del linear_model ############################################################################### # create design matrix from group-level regression # z-core age predictor
def run_gat(subj, decoder="ridge", n_jobs=2): """ Function to run Generalization Across Time (GAT). Parameters ---------- subj: int decoder: str Specify type of classifier -'ridge' for Ridge Regression (default), 'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression n_jobs: int The number of jobs to run in parallel. """ # load cue A and cue B epochs epochs = get_epochs(subj)['Correct A', 'Correct B'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } # get data and targets data = epochs.get_data() labels = epochs.events[:, -1] # create classifier pipeline clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=n_jobs) # compute cross validated performance scores scores = cross_val_multiscore(gen_clf, data, labels, cv=5, n_jobs=n_jobs).mean(0) # calculate prediction confidence scores cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) preds = np.empty((len(labels), data.shape[2], data.shape[2])) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d # compute topographical patterns dat = Vectorizer().fit_transform(data) clf.fit(dat, labels) dat = dat - dat.mean(0, keepdims=True) # look for the type of classifier and get the weights if decoder == 'ridge': filt_ = clf.named_steps.ridgeclassifier.coef_.copy() elif decoder == 'svm': filt_ = clf.named_steps.svc.coef_.copy() elif decoder == 'log_reg': filt_ = clf.named_steps.logisticregression.coef_.copy() # Compute patterns using Haufe's trick: A = Cov_X . W . Precision_Y # cf.Haufe, et al., 2014, NeuroImage, # doi:10.1016/j.neuroimage.2013.10.067) inv_y = 1. patt_ = np.cov(dat.T).dot(filt_.T.dot(inv_y)).T # store the patterns accordingly if decoder == 'ridge': clf.named_steps.ridgeclassifier.patterns_ = patt_ elif decoder == 'svm': clf.named_steps.svc.patterns_ = patt_ elif decoder == 'log_reg': clf.named_steps.logisticregression.patterns_ = patt_ # back transform using steps in pipeline patterns = get_coef(clf, 'patterns_', inverse_transform=True) # return subject scores, prediction confidence and topographical patterns return scores, preds, patterns