def get_pattern(X, y, clf, time_point=None):
    """Get pattern from classifier on X and y at peak time.

    Re-fit the classifier without cross-validation and get the patterns/
    coefficients.

    Parameters:
    -----------
    X : array
        The data to fit (features).
    y : vector
        The response vector.
    clf : sklearn classifier object
        The classifier to re-fit.
    time-point : float | None
        If data is more than two dimensional: which time-point to fit.

    Returns:
    --------
    pattern : array
        The sensor or source pattern of coefficients.
    """
    if time_point is not None:
        X = X[:, :, time_point]

    clf.fit(X, y)

    if clf.steps[-1][0] == 'logisticregression':
        pattern = get_coef(clf, 'coefs_', inverse_transform=True)
    else:
        pattern = get_coef(clf, 'patterns_', inverse_transform=True)

    return pattern
Esempio n. 2
0
def logreg_timedecoding(epochs, numcv=4, jobs=1):
    """
    Logistic regression over sensors. Returns Evoked array containing coefficients and ROC.
    Code snippets stolen from:
    https://martinos.org/mne/stable/auto_tutorials/plot_sensors_decoding.html
    """

    X = epochs.get_data()  # MEG signals: n_epochs, n_channels, n_times
    X = X.astype(float)
    y = epochs.events[:, 2]  # targets

    # setup and run the decoder

    clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression()))

    time_decod = SlidingEstimator(clf, scoring='roc_auc',
                                  n_jobs=jobs)  #scoring='roc_auc',

    scores = cross_val_multiscore(time_decod, X, y, cv=numcv, n_jobs=jobs)

    # Mean scores across cross-validation splits
    scores = np.mean(scores, axis=0)

    #
    time_decod = SlidingEstimator(clf, scoring='roc_auc', n_jobs=jobs)
    time_decod.fit(X, y)

    coef = get_coef(time_decod, 'patterns_', inverse_transform=True)

    evoked = mne.EvokedArray(coef, epochs.info, tmin=epochs.times[0])
    evoked.roc_auc = scores

    return evoked
Esempio n. 3
0
    # 4.2) vectorise channel data for linear regression
    # data to be analysed
    dat = cues[subj].get_data()
    dat = dat[:, :, times_to_use]
    Y = Vectorizer().fit_transform(dat)

    # 4.3) fit linear model with sklearn's LinearRegression
    weights = compute_sample_weight(class_weight='balanced',
                                    y=metadata.cue.to_numpy())
    linear_model = LinearRegression(n_jobs=n_jobs, fit_intercept=True)
    linear_model.fit(design, Y, sample_weight=weights)

    # 4.4) extract the resulting coefficients (i.e., betas)
    # extract betas
    coefs = get_coef(linear_model, 'coef_')
    inter = linear_model.intercept_

    # 4.5) extract model r_squared
    r2 = r2_score(Y, linear_model.predict(design), multioutput='raw_values')
    # save model R-squared
    r_squared[subj_ind, :] = r2

    # save results
    for pred_i, predictor in enumerate(design.columns):
        print(pred_i, predictor)
        if 'cue' in predictor:
            # extract cue beats
            betas[subj_ind, :] = coefs[:, pred_i]
        # elif 'Intercept' in predictor:
        #     continue
Esempio n. 4
0
ax.axhline(.5, color='k', linestyle='--', label='chance')
ax.set_xlabel('Times')
ax.set_ylabel('AUC')  # Area Under the Curve
ax.legend()
ax.axvline(.0, color='k', linestyle='-')
ax.set_title('Sensor space decoding')
plt.show()

###############################################################################
# You can retrieve the spatial filters and spatial patterns if you explicitly
# use a LinearModel
clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression()))
time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True)
time_decod.fit(X, y)

coef = get_coef(time_decod, 'patterns_', inverse_transform=True)
evoked = mne.EvokedArray(coef, epochs.info, tmin=epochs.times[0])
joint_kwargs = dict(ts_args=dict(time_unit='s'),
                    topomap_args=dict(time_unit='s'))
evoked.plot_joint(times=np.arange(0., .500, .100),
                  title='patterns',
                  **joint_kwargs)

###############################################################################
# Temporal Generalization
# -----------------------
#
# This runs the analysis used in [1]_ and further detailed in [2]_
#
# The idea is to fit the models on each time instant and see how it
# generalizes to any other time point.
    tmin = i * 1.0
    tmax = (i + 1) * 1.0

    #Create :class:'Epochs <mne.Epochs>' object
    epochs = mne.Epochs(raw,
                        events=events,
                        event_id=event_id,
                        tmin=tmin,
                        tmax=tmax,
                        baseline=None,
                        verbose=True,
                        preload=True)
    for i in range(0, len(epochs.events)):
        if i % 2 == 0:
            epochs.events[i, 2] = 3
    #epochs.plot(scalings = 'auto',block = True,n_epochs=10)
    X = epochs.pick_types(meg=False, eeg=True)
    y = epochs.events[:, -1]

    # Define a unique pipeline to sequentially:
    clf = make_pipeline(
        Vectorizer(),  # 1) vectorize across time and channels
        StandardScaler(),  # 2) normalize features across trials
        LinearModel(LogisticRegression()))  # 3) fits a logistic regression
    clf.fit(X, y)

    coef = get_coef(clf, 'patterns_', inverse_transform=True)
    evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin)
    fig = evoked.plot_topomap(title='EEG Patterns', size=3, show=False)
    fig.savefig(title + "_ti_" + str(tmin) + "_tf_" + str(tmax) + '.png')
Esempio n. 6
0
    # The data was vectorized to fit a single model across all time points and
    # all channels. We thus reshape it:
    coef = coef.reshape(len(meg_epochs.ch_names), -1)

    # Plot
    evoked = EvokedArray(coef, meg_epochs.info, tmin=epochs.tmin)
    evoked.plot_topomap(title='MEG %s' % name, time_unit='s')

###############################################################################
# Let's do the same on EEG data using a scikit-learn pipeline

X = epochs.pick_types(meg=False, eeg=True)
y = epochs.events[:, 2]

# Define a unique pipeline to sequentially:
clf = make_pipeline(
    Vectorizer(),  # 1) vectorize across time and channels
    StandardScaler(),  # 2) normalize features across trials
    LinearModel(
        LogisticRegression(solver='lbfgs')))  # 3) fits a logistic regression
clf.fit(X, y)

# Extract and plot patterns and filters
for name in ('patterns_', 'filters_'):
    # The `inverse_transform` parameter will call this method on any estimator
    # contained in the pipeline, in reverse order.
    coef = get_coef(clf, name, inverse_transform=True)
    evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin)
    evoked.plot_topomap(title='EEG %s' % name[:-1], time_unit='s')
Esempio n. 7
0
random = np.random.RandomState(random_state)

# number of random samples
boot = 2000

# create empty array for saving the bootstrap samples
boot_betas = np.zeros((boot, Y.shape[1], len(predictors)))
# run bootstrap for regression coefficients
for i in range(boot):
    # extract random epochs from data
    resamples = random.choice(range(n_epochs), n_epochs, replace=True)
    # set up model and fit model
    model = LinearRegression(fit_intercept=False)
    model.fit(X=design.iloc[resamples], y=Y[resamples, :])
    # extract regression coefficients
    boot_betas[i, :, :] = get_coef(model, 'coef_')
    # delete the previously fitted model
    del model

###############################################################################
# compute lower and upper boundaries of confidence interval based on
# distribution of bootstrap betas.
lower, upper = np.quantile(boot_betas, [.025, .975], axis=0)

###############################################################################
# fit linear regression model to original data and store the results in
# MNE's evoked format for convenience

# set up linear model
linear_model = LinearRegression(fit_intercept=False)
# fit model
    sleep_epochs = myload(sleep_path_data, typ='epoch_preprocessed', sbj=sbj, preload=True) # SLEEP!
    sleep_epochs.event_id = sleep_event_id # event_id remapping. For wake this step works during preprocessing # SLEEP !

    sleep_epochs = sleep_epochs.crop(tmin=tmin, tmax=tmax)
    
    X1, y1 = get_Xy_balanced(sleep_epochs, contrast1)
    
    clf =  make_pipeline(Vectorizer(), StandardScaler(), LinearModel(LogisticRegression(max_iter = 4000))) #StandardScaler(),
     
    cv  = StratifiedKFold(n_splits=2, shuffle=True)
    
    coef_folds = [] 
    for train_idx, test_idx in cv.split(X1, y1):
        clf.fit(X1[train_idx], y=y1[train_idx])
        #scores1.append(clf.score(X1[test_idx], y=y1[test_idx]))
        coef_folds.append(get_coef(clf, attr='patterns_', inverse_transform=True))
    coef = np.asarray(coef_folds).mean(0).reshape([173, -1]) #mean folds and reshape
    evoked = EvokedArray(coef, sleep_epochs.info, tmin=tmin)
    evokeds.append(evoked)

ga = mne.grand_average(evokeds)

#SLEEP
f = ga.plot_topomap([0., 0.2, 0.4,  0.6, 0.8], scalings=0.1, vmin=-2, vmax=2)
#f = ga.plot_topomap([0.1, 0.2,  0.3, 0.4, 0.5, 0.6,  0.7, 0.8], scalings=0.1, vmin=-6, vmax=6)
#f = ga.plot_topomap([0.1, 0.15,  0.2, 0.25, 0.3, 0.35,  0.4, 0.45, 0.5], scalings=0.1, vmin=-6, vmax=6)

f.savefig(os.path.join(save_path, contrast1[0].split('/')[1]  + 'coefs_new_detail.tiff'))

#WAKE
# f = ga.plot_topomap([0., 0.2, 0.4,  0.6, 0.8], scalings=0.1, vmin=-2, vmax=2) 
Esempio n. 9
0
ax.axhline(.5, color='k', linestyle='--', label='chance')
ax.set_xlabel('Times')
ax.set_ylabel('AUC')  # Area Under the Curve
ax.legend()
ax.axvline(.0, color='k', linestyle='-')
ax.set_title('Sensor space decoding')

###############################################################################
# You can retrieve the spatial filters and spatial patterns if you explicitly
# use a LinearModel
clf = make_pipeline(StandardScaler(),
                    LinearModel(LogisticRegression(solver='lbfgs')))
time_decod = SlidingEstimator(clf, n_jobs=1, scoring='roc_auc', verbose=True)
time_decod.fit(X, y)

coef = get_coef(time_decod, 'patterns_', inverse_transform=True)
evoked = mne.EvokedArray(coef, epochs.info, tmin=epochs.times[0])
joint_kwargs = dict(ts_args=dict(time_unit='s'),
                    topomap_args=dict(time_unit='s'))
evoked.plot_joint(times=np.arange(0., .500, .100), title='patterns',
                  **joint_kwargs)

###############################################################################
# Temporal generalization
# ^^^^^^^^^^^^^^^^^^^^^^^
#
# Temporal generalization is an extension of the decoding over time approach.
# It consists in evaluating whether the model estimated at a particular
# time instant accurately predicts any other time instant. It is analogous to
# transferring a trained model to a distinct learning problem, where the
# problems correspond to decoding the patterns of brain activity recorded at
    coef = scaler.inverse_transform([coef])[0]

    # The data was vectorized to fit a single model across all time points and
    # all channels. We thus reshape it:
    coef = coef.reshape(len(meg_epochs.ch_names), -1)

    # Plot
    evoked = EvokedArray(coef, meg_epochs.info, tmin=epochs.tmin)
    evoked.plot_topomap(title='MEG %s' % name)

###############################################################################
# Let's do the same on EEG data using a scikit-learn pipeline

X = epochs.pick_types(meg=False, eeg=True)
y = epochs.events[:, 2]

# Define a unique pipeline to sequentially:
clf = make_pipeline(
    Vectorizer(),                       # 1) vectorize across time and channels
    StandardScaler(),                   # 2) normalize features across trials
    LinearModel(LogisticRegression()))  # 3) fits a logistic regression
clf.fit(X, y)

# Extract and plot patterns and filters
for name in ('patterns_', 'filters_'):
    # The `inverse_transform` parameter will call this method on any estimator
    # contained in the pipeline, in reverse order.
    coef = get_coef(clf, name, inverse_transform=True)
    evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin)
    evoked.plot_topomap(title='EEG %s' % name[:-1])
Esempio n. 11
0
    # *** 2.1) create bootstrap sample ***
    # extract random subjects from overall sample
    resampled_subjects = random.choice(range(betas_cue[0, ...].shape[0]),
                                       betas_cue[0, ...].shape[0],
                                       replace=True)
    # resampled betas
    resampled_betas = betas_cue[0, ...][resampled_subjects, :]

    # *** 2.2) estimate effect of moderator (i.e., PBI) on group-level ***
    # set up and fit moderator (i.e., PBI) model using bootstrap sample
    model_boot = LinearRegression(fit_intercept=False)
    model_boot.fit(X=group.iloc[resampled_subjects], y=resampled_betas)

    # extract regression coefficients
    group_coefs = get_coef(model_boot, 'coef_')

    # save bootstrap betas
    for pred_i, predictor in enumerate(group.columns):
        if 'pbi_rt' in predictor:
            # store regression coefficient for moderator (i.e., PBI)
            group_pp_betas[i, :] = group_coefs[:, pred_i]

    # remove prev object
    del resampled_betas

    # *** 2.3) compute test statistic for bootstrap sample ***
    # compute standard error
    resampled_betas = betas_null[resampled_subjects, :]
    se = resampled_betas.std(axis=0) / np.sqrt(resampled_betas.shape[0])
Esempio n. 12
0
     else:
         sel = np.where(~np.isnan(y))[0]
 td = SlidingEstimator(clf,
                       scoring=make_scorer(scorer),
                       n_jobs=24,
                       **kwargs)
 # run decoding
 cv = StratifiedKFold(8)
 scores = list()
 patterns = list()
 filters = list()
 for train, test in cv.split(X[sel], y[sel]):
     td.fit(X[sel][train], y[sel][train])
     score = td.score(X[sel][test], y[sel][test])
     scores.append(score)
     patterns.append(get_coef(td, 'patterns_', inverse_transform=True))
     filters.append(get_coef(td, 'filters_', inverse_transform=True))
 scores = np.mean(scores, axis=0)
 patterns = np.mean(patterns, axis=0)
 filters = np.mean(filters, axis=0)
 if 'angle' in analysis:
     patterns = np.mean(np.abs(patterns), axis=1)
     filters = np.mean(np.abs(filters), axis=1)
 scores = np.reshape(scores, (n_freqs, n_times))
 patterns = np.reshape(patterns, (n_channels, n_freqs, n_times))
 filters = np.reshape(filters, (n_channels, n_freqs, n_times))
 # save cross-validated scores
 fname = results_folder +\
     '%s_tf_scores_%s_%s.npy' % (subject, epoch_type, analysis)
 np.save(fname, np.array(scores))
 fname = results_folder +\
# customized the temporal decoding process
# define 10-fold cross validation
cv = StratifiedShuffleSplit(n_splits=10, random_state=12345)
coefs = []
scores = []
for time_ in tqdm(range(data.shape[-1]), desc='temporal decoding'):
    coef = []
    scores_ = []
    # at each time point, we use the frequency information in each channel as the features
    for train, test in cv.split(data, labels):
        data_ = data[train, :, :, time_]
        clf = make_clf(True, True)
        clf.fit(data_, labels[train])
        #        print(time_,metrics.classification_report(clf.predict(data[test,:,:,time_]),labels[test]))
        # get the patterns decoded by the classifier
        coef_ = get_coef(clf, 'patterns_', True)
        coef.append(coef_)
        temp = metrics.roc_auc_score(
            labels[test],
            clf.predict_proba(data[test, :, :, time_])[:, -1])
        scores_.append(temp)
    print('\n', '%d' % time_, 'auc = ', np.mean(scores_), '\n')
    coefs.append(np.array(coef))
    scores.append(scores_)
coefs = np.array(coefs)
scores = np.array(scores)
# get info object to plot the "pattern"
temp_epochs = mne.read_epochs(working_dir + 'sub5_d2-eventsRelated-epo.fif')
info = temp_epochs.info
import pickle
pickle.dump([scores, info, coefs], open(saving_dir + 'score_info_coefs.p',
Esempio n. 14
0
    # --- 2) vectorize (eeg-channel) data for linear regression analysis ---
    # data to be analysed
    data = subject.get_data()

    # vectorize data across channels
    Y = Vectorizer().fit_transform(data)

    # --- 3) fit linear model with sklearn's LinearRegression ---
    # we already have an intercept column in the design matrix,
    # thus we'll call LinearRegression with fit_intercept=False
    linear_model = LinearRegression(fit_intercept=False)
    linear_model.fit(design, Y)

    # --- 4) extract the resulting coefficients (i.e., betas) ---
    # extract betas
    coefs = get_coef(linear_model, 'coef_')
    # only keep relevant predictor
    betas[iteration, :] = coefs[:, pred_col]

    # calculate coefficient of determination (r-squared)
    r_squared[iteration, :] = r2_score(Y,
                                       linear_model.predict(design),
                                       multioutput='raw_values')

    # clean up
    del linear_model

###############################################################################
# create design matrix from group-level regression

# z-core age predictor
def run_gat(subj, decoder="ridge", n_jobs=2):
    """
    Function to run Generalization Across Time (GAT).

    Parameters
    ----------
    subj: int
    decoder: str
        Specify type of classifier -'ridge' for Ridge Regression (default),
        'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg'
        for Logistic Regression
    n_jobs: int
        The number of jobs to run in parallel.
    """
    # load cue A and cue B epochs
    epochs = get_epochs(subj)['Correct A', 'Correct B']

    # specify whether to use a linear or nonlinear SVM if SVM is used
    lin = ''  # if not svm it doesn't matter, both log_reg and ridge are linear
    if "svm" in decoder:
        decoder, lin = decoder.split("-")

    # build classifier pipeline #
    # pick a machine learning algorithm to use (ridge/SVM/logistic regression)
    decoder_dict = {
        "ridge":
        RidgeClassifier(class_weight='balanced', random_state=42,
                        solver="sag"),
        "svm":
        SVC(class_weight='balanced',
            kernel=("rbf" if "non" in lin else "linear"),
            random_state=42),
        "log_reg":
        LogisticRegression(class_weight='balanced', random_state=42)
    }

    # get data and targets
    data = epochs.get_data()
    labels = epochs.events[:, -1]

    # create classifier pipeline
    clf = make_pipeline(StandardScaler(), decoder_dict[decoder])
    gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=n_jobs)

    # compute cross validated performance scores
    scores = cross_val_multiscore(gen_clf, data, labels, cv=5,
                                  n_jobs=n_jobs).mean(0)

    # calculate prediction confidence scores
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    preds = np.empty((len(labels), data.shape[2], data.shape[2]))
    for train, test in cv.split(data, labels):
        gen_clf.fit(data[train], labels[train])
        d = gen_clf.decision_function(data[test])
        preds[test] = d

    # compute topographical patterns
    dat = Vectorizer().fit_transform(data)
    clf.fit(dat, labels)
    dat = dat - dat.mean(0, keepdims=True)

    # look for the type of classifier and get the weights
    if decoder == 'ridge':
        filt_ = clf.named_steps.ridgeclassifier.coef_.copy()
    elif decoder == 'svm':
        filt_ = clf.named_steps.svc.coef_.copy()
    elif decoder == 'log_reg':
        filt_ = clf.named_steps.logisticregression.coef_.copy()

    # Compute patterns using Haufe's trick: A = Cov_X . W . Precision_Y
    # cf.Haufe, et al., 2014, NeuroImage,
    # doi:10.1016/j.neuroimage.2013.10.067)
    inv_y = 1.
    patt_ = np.cov(dat.T).dot(filt_.T.dot(inv_y)).T

    # store the patterns accordingly
    if decoder == 'ridge':
        clf.named_steps.ridgeclassifier.patterns_ = patt_
    elif decoder == 'svm':
        clf.named_steps.svc.patterns_ = patt_
    elif decoder == 'log_reg':
        clf.named_steps.logisticregression.patterns_ = patt_

    # back transform using steps in pipeline
    patterns = get_coef(clf, 'patterns_', inverse_transform=True)

    # return subject scores,  prediction confidence and topographical patterns
    return scores, preds, patterns