예제 #1
0
def get_scores_from_gat(epochs, seed):
    from sklearn.svm import LinearSVC
    X_train, X_test, y_train, y_test = train_test_split(epochs.get_data(),
                                                        epochs.events[:,
                                                                      2] == 2,
                                                        test_size=0.2,
                                                        random_state=seed)
    clf = make_pipeline(StandardScaler(),
                        LinearSVC(random_state=0, tol=1e-5, penalty='l2'))
    # clf =
    time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=-2)
    time_gen.fit(X_train, y_train)
    scores = time_gen.score(X_test, y_test)

    print(
        'Units with highest weights of a classifier trained to predict subject'
        's number:')
    print([(i, j) for (i, j) in zip(
        np.transpose(
            np.argsort(
                np.negative(
                    np.abs(time_gen.estimators_[1]._final_estimator.coef_))))
        [0:20],
        np.transpose(
            np.sort(
                np.negative(
                    np.abs(time_gen.estimators_[1]._final_estimator.coef_))))
        [0:20])])

    return time_gen, scores
예제 #2
0
def train_test(X_train, y_train, X_test, y_test,
               clf=clf, scoring=scoring, n_jobs=n_jobs):
    # train and test
    time_gen = GeneralizingEstimator(clf,
                                     scoring=scoring,
                                     n_jobs=n_jobs)
    time_gen.fit(X=X_train, y=y_train)
    return time_gen.score(X=X_test, y=y_test)
예제 #3
0
def run_gat(name, decoder="ridge"):
    """
    Function to run Generalization Across Time (GAT).

    Parameters
    ----------
    name: str
        Name (pseudonym) of individual subject.
    decoder: str
        Specify type of classifier -'ridge' for Ridge Regression (default),'lin-svm' for linear SVM
        'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression
    """
    # load high cloze epochs
    epochs = get_epochs(name)['song', 'voice']

    # specify whether to use a linear or nonlinear SVM if SVM is used
    lin = ''  # if not svm it doesn't matter, both log_reg and ridge are linear
    if "svm" in decoder:
        decoder, lin = decoder.split("-")

    # build classifier pipeline #
    # pick a machine learning algorithm to use (ridge/SVM/logistic regression)
    decoder_dict = {
        "ridge":
        RidgeClassifier(class_weight='balanced', random_state=42,
                        solver="sag"),
        "svm":
        SVC(class_weight='balanced',
            kernel=("rbf" if "non" in lin else "linear"),
            random_state=42),
        "log_reg":
        LogisticRegression(class_weight='balanced', random_state=42)
    }

    clf = make_pipeline(StandardScaler(), decoder_dict[decoder])
    gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=4)
    scores = cross_val_multiscore(gen_clf,
                                  epochs.get_data(),
                                  epochs.events[:, -1],
                                  cv=5,
                                  n_jobs=4).mean(0)

    data = epochs.get_data()
    labels = epochs.events[:, -1]

    cv = StratifiedKFold(n_splits=5, random_state=42)
    # calculate prediction confidence scores
    preds = np.empty((len(labels), 225, 225))
    for train, test in cv.split(data, labels):
        gen_clf.fit(data[train], labels[train])
        d = gen_clf.decision_function(data[test])
        preds[test] = d

    return scores, preds  # return subject scores and prediction confidence
def NeuralNet(X_train, y_train, X_test, y_test, scorer, predict_mode, params):
    " Neural Network estimator "

    # Model
    model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(3,), random_state=1)

    # Cross-validation scheme
    cv = StratifiedKFold(n_splits=4, random_state=0, shuffle=True)

    # Scaler
    scaler = StandardScaler()

    # Pipeline
    clf = make_pipeline(scaler, model)

    # Define scorer
    if scorer is 'scorer_auc':
        scorer = 'roc_auc'
    elif scorer is 'accuracy':
        scorer = None
    else:
        print('using accuracy as the scorer')

    # Learning and scoring
    time_gen = GeneralizingEstimator(clf, n_jobs=2, scoring=scorer)
    scores = cross_val_multiscore(time_gen, X_train, y_train, cv=cv, n_jobs=2)

    return scores
예제 #5
0
def test_get_coef_multiclass_full(n_classes, n_channels, n_times):
    """Test a full example with pattern extraction."""
    from sklearn.pipeline import make_pipeline
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import StratifiedKFold
    data = np.zeros((10 * n_classes, n_channels, n_times))
    # Make only the first channel informative
    for ii in range(n_classes):
        data[ii * 10:(ii + 1) * 10, 0] = ii
    events = np.zeros((len(data), 3), int)
    events[:, 0] = np.arange(len(events))
    events[:, 2] = data[:, 0, 0]
    info = create_info(n_channels, 1000., 'eeg')
    epochs = EpochsArray(data, info, events, tmin=0)
    clf = make_pipeline(
        Scaler(epochs.info),
        Vectorizer(),
        LinearModel(LogisticRegression(random_state=0, multi_class='ovr')),
    )
    scorer = 'roc_auc_ovr_weighted'
    time_gen = GeneralizingEstimator(clf, scorer, verbose=True)
    X = epochs.get_data()
    y = epochs.events[:, 2]
    n_splits = 3
    cv = StratifiedKFold(n_splits=n_splits)
    scores = cross_val_multiscore(time_gen, X, y, cv=cv, verbose=True)
    want = (n_splits, )
    if n_times > 1:
        want += (n_times, n_times)
    assert scores.shape == want
    assert_array_less(0.8, scores)
    clf.fit(X, y)
    patterns = get_coef(clf, 'patterns_', inverse_transform=True)
    assert patterns.shape == (n_classes, n_channels, n_times)
    assert_allclose(patterns[:, 1:], 0., atol=1e-7)  # no other channels useful
def LogisticRegression(X_train, y_train, X_test, y_test, scorer, predict_mode, params):
    " Logistic Regression within or across conditions "

    # Model
    model = linear_model.LogisticRegression(class_weight='balanced')

    # Cross-validation scheme
    cv = StratifiedKFold(n_splits=4, random_state=0, shuffle=True)

    # Scaler
    scaler = StandardScaler()

    # Pipeline
    clf = make_pipeline(scaler, model)

    # Define scorer
    if scorer is 'scorer_auc':
        scorer = 'roc_auc'
    elif scorer is 'accuracy':
        scorer = None
    else:
        print('using accuracy as the scorer')

    # Learning and scoring
    time_gen = GeneralizingEstimator(clf, n_jobs=2, scoring=scorer)
    scores = cross_val_multiscore(time_gen, X_train, y_train, cv=cv, n_jobs=2)

    return scores
예제 #7
0
 def devp_estimator_gat(self, **kwargs):
     from mne.decoding import GeneralizingEstimator, LinearModel
     from sklearn.pipeline import make_pipeline
     from sklearn.preprocessing import StandardScaler
     from sklearn.linear_model import Ridge
     from sklearn.metrics import make_scorer
     from sklearn.model_selection import StratifiedKFold
     from mne.decoding import (SlidingEstimator, GeneralizingEstimator,
                               Scaler, cross_val_multiscore, LinearModel,
                               get_coef, Vectorizer, CSP)
     from jr.gat import scorer_spearman
     clf = make_pipeline(StandardScaler(), LinearModel(Ridge()))
     scorer = scorer_spearman
     kwargs = dict()
     gat = GeneralizingEstimator(clf,
                                 scoring=make_scorer(scorer),
                                 n_jobs=6,
                                 **kwargs)
     return gat
 n_vertices = len(stcs[0].data)
 n_epochs = len(epochs.events)
 X = np.zeros([n_epochs, n_vertices, n_times])
 for jj, stc in enumerate(stcs):
     X[jj] = stc.data
 # Loop across each analysis
 for analysis in epoch_analyses:
     # define to-be-predicted values
     y = np.array(events[analysis])
     # Define estimators depending on the analysis
     if ('cue_side' in analysis or 'cue_type' in analysis):
         clf = make_pipeline(StandardScaler(),
                             LinearModel(LogisticRegression()))
         kwargs = dict()
         clf = GeneralizingEstimator(clf,
                                     scoring='roc_auc',
                                     n_jobs=24,
                                     **kwargs)
         le = preprocessing.LabelEncoder()
         le.fit(y)
         y = le.transform(y)
     elif 'sfreq' in analysis[:14]:
         clf = make_pipeline(StandardScaler(), LinearModel(Ridge()))
         scorer = scorer_spearman
         kwargs = dict()
         clf = GeneralizingEstimator(clf,
                                     scoring=make_scorer(scorer),
                                     n_jobs=24,
                                     **kwargs)
         y = np.array(y, dtype=float)
     elif 'angle' in analysis[:14]:
         clf = make_pipeline(
    IX_plur = IX2sentences[v + 1]['plural']
    mean_activation[v + 1]['singular'] = np.mean(X[IX_sing, :, :], axis=0)
    mean_activation[v + 1]['plural'] = np.mean(X[IX_plur, :, :], axis=0)
    mean_activation[v + 1]['difference'] = np.mean(
        X[IX_sing, :, :], axis=0) - np.mean(X[IX_plur, :, :], axis=0)
    Y = np.zeros(num_trials)
    Y[IX_sing] = 1
    Y[IX_plur] = 2
    del IX_sing, IX_plur
    assert all(Y > 0)
    print(list(Y).count(1), list(Y).count(2))

    # clf = make_pipeline(StandardScaler(), LinearSVC(class_weight='balanced'))
    clf = make_pipeline(LinearSVC(class_weight='balanced'))
    time_gen = GeneralizingEstimator(clf,
                                     scoring='roc_auc',
                                     n_jobs=-1,
                                     verbose=True)
    cv = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
    #cv = StratifiedShuffleSplit(n_splits=20, random_state=0)

    scores = []
    for i, (train, test) in enumerate(cv.split(X, Y)):
        time_gen.fit(X[train], Y[train])
        scores.append(time_gen.score(X[test], Y[test]))
        # list (len=#cv-splits) of sublists (len=#timepoints):
        curr_weights_clf = np.asarray([
            np.squeeze(w._final_estimator.coef_) for w in time_gen.estimators_
        ])
        weights_clf[v + 1]['splits'].append(curr_weights_clf)

    weights_clf[v + 1]['mean'] = np.mean(np.asarray(weights_clf[v +
예제 #10
0
    scorer = make_scorer(get_scorer(scorer_spearman))
    score = 'Spearman R'
    cv = KFold(5)
if regressor == 'condition':
    y = binary_scaler(y)
    clf = make_pipeline(StandardScaler(), LogisticRegression())
    scorer = 'roc_auc'
    score = 'AUC'
    cv = StratifiedKFold(5)

n_jobs = -1

# set up estimator, get scores
if decode_using == 'spatial':
    gen = GeneralizingEstimator(n_jobs=n_jobs,
                            scoring=scorer,
                            base_estimator=clf)
    scores = cross_val_multiscore(gen, X, y,
                                  cv=cv)
elif decode_using == 'temporal':
    gen = SlidingEstimator(n_jobs=n_jobs,
                        scoring=scorer,
                        base_estimator=clf)
    scores = cross_val_multiscore(gen, X, y,
                                cv=cv)
else:
    # scoring defaults to neg mean squared so set to scorer
    # shuffle must be true when binary values, otherwise fold will only have
        # one value
    scores = cross_val_score(clf, X, y,
                            scoring=scorer, #defaults to neg mean squared
                          typ='epoch_preprocessed',
                          sbj=sbj,
                          preload=True)
    sleep_epochs.event_id = sleep_event_id  # event_id remapping. For wake this step works during preprocessing

    X1, y1 = get_Xy_balanced(wake_epochs, contrast1, n_sample=nsample)
    X2, y2 = get_Xy_balanced(sleep_epochs, contrast2, n_sample=nsample)
    X3, y3 = get_Xy_balanced(sleep_epochs, contrast3, n_sample=nsample)
    X4, y4 = get_Xy_balanced(sleep_epochs, contrast4, n_sample=nsample)
    X5, y5 = get_Xy_balanced(sleep_epochs, contrast5, n_sample=nsample)

    del wake_epochs
    del sleep_epochs

    clf = GeneralizingEstimator(make_pipeline(
        StandardScaler(), LogisticRegression(max_iter=4000)),
                                scoring='accuracy',
                                n_jobs=6)
    # clf = GeneralizingEstimator(make_pipeline(StandardScaler(), SVC(kernel='linear')),
    #                         scoring='accuracy', n_jobs=6)

    cv = StratifiedKFold(n_splits=2, shuffle=True)

    scores1, scores2, scores3, scores4, scores5 = [[] for i in range(5)]

    for train_idx, test_idx in cv.split(X1, y1):
        clf.fit(X1[train_idx], y=y1[train_idx])
        scores1.append(clf.score(X1[test_idx], y=y1[test_idx]))
    scores2.append(clf.score(X2, y=y2))
    scores3.append(clf.score(X3, y=y3))
    scores4.append(clf.score(X4, y=y4))
    scores5.append(clf.score(X5, y=y5))
 le = LabelEncoder()
 le.fit(y)
 y = le.transform(y)
 sel = np.where(y != 0)[0]
 y = y[sel]
 X = epochs._data[sel]
 le = LabelEncoder()
 le.fit(y_con)
 y_con = le.transform(y_con)
 sel = np.where(y_con != 0)[0]
 y_con = y_con[sel]
 X_con = epochs_con._data[sel]
 # Define estimators depending on the analysis
 clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression()))
 kwargs = dict()
 est = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs)
 # Run decoding
 cv = StratifiedKFold(12)
 scores = list()
 scores_con = list()
 for train, test in cv.split(X, y):
     est.fit(X[train], y[train])  # train during WM task
     score = est.score(X[test], y[test])  # test during WM task
     score_con = est.score(X_con, y_con)  # test during control task
     scores.append(score)
     scores_con.append(score_con)
 scores = np.mean(scores, axis=0)
 scores_con = np.mean(scores_con, axis=0)
 # save cross-validated scores
 fname = results_folder +\
     '%s_scores_%s.npy' % (subject, analysis)
예제 #13
0
                          cross_val_multiscore, LinearModel, get_coef,
                          Vectorizer, CSP)
from pyitab.ext.sklearn._validation import cross_validate
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler

import h5py
import hdf5storage
import numpy as np
from scipy.io import loadmat, savemat

clf = make_pipeline(Normalizer(),  # z-score normalization
                    SelectKBest(f_classif, k=50),  # select features for speed
                    LinearModel(LogisticRegression(C=1, solver='liblinear')))
#time_decod = SlidingEstimator(clf, scoring='accuracy')
time_decod = GeneralizingEstimator(clf, scoring='accuracy')


shared = "/run/user/1000/gvfs/smb-share:server=192.168.30.54,share=meg_data_analisi/HCP_Motor_Task_analysis/109123/"


scores_ses = []
decoders = []

bigdata = []
for f in os.listdir(shared):
    fname = os.path.join(shared, f)
    mat = h5py.File(fname)
    data = mat['powerbox'][:]
    data /= np.nanmean(data)
    data = np.float32(data.swapaxes(1, 2))
예제 #14
0
def main():
    model_type = "lda"
    exp_name = "freq_gen_matrix/"

    for i, sample in enumerate(range(1, 22)):
        print("sample {}".format(sample))

        if not os.path.isdir("Results/{}/{}/sample_{}".format(
                model_type, exp_name, sample)):
            os.mkdir("Results/{}/{}/sample_{}".format(model_type, exp_name,
                                                      sample))

        epochs = get_epochs(sample, scale=False)
        y_train = epochs.events[:, 2]

        freqs = np.logspace(*np.log10([2, 25]), num=15)
        n_cycles = freqs / 4.
        string_freqs = [round(x, 2) for x in freqs]

        print("applying morlet wavelet")

        wavelet_output = tfr_array_morlet(epochs.get_data(),
                                          sfreq=epochs.info['sfreq'],
                                          freqs=freqs,
                                          n_cycles=n_cycles,
                                          output='complex')

        time_results = np.zeros(
            (wavelet_output.shape[3], len(freqs), len(freqs)))

        for time in range(wavelet_output.shape[3]):
            print("time: {}".format(time))

            wavelet_epochs = wavelet_output[:, :, :, time]
            wavelet_epochs = np.append(wavelet_epochs.real,
                                       wavelet_epochs.imag,
                                       axis=1)

            wavelet_info = mne.create_info(ch_names=wavelet_epochs.shape[1],
                                           sfreq=epochs.info['sfreq'],
                                           ch_types='mag')
            wavelet_epochs = mne.EpochsArray(wavelet_epochs,
                                             info=wavelet_info,
                                             events=epochs.events)

            x_train = pca(80, wavelet_epochs, plot=False)

            model = LinearModel(
                LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto'))
            freq_gen = GeneralizingEstimator(model,
                                             n_jobs=1,
                                             scoring='accuracy',
                                             verbose=True)
            scores = cross_val_multiscore(freq_gen,
                                          x_train,
                                          y_train,
                                          cv=5,
                                          n_jobs=1)
            scores = np.mean(scores, axis=0)
            time_results[time] = scores

            sns.set()
            ax = sns.barplot(
                np.sort(string_freqs),
                np.diag(scores),
            )
            ax.set(ylim=(0, 0.8),
                   xlabel='Frequencies',
                   ylabel='Accuracy',
                   title='Cross Val Accuracy {} for Subject {} for Time {}'.
                   format(model_type, sample, time))
            ax.axhline(0.12, color='k', linestyle='--')
            ax.figure.set_size_inches(8, 6)
            ax.figure.savefig(
                "Results/{}/{}/sample_{}/time_{}_accuracy.png".format(
                    model_type, exp_name, sample, time),
                dpi=300)
            plt.close('all')
            # plt.show()

            fig, ax = plt.subplots(1, 1)
            im = ax.imshow(scores,
                           interpolation='lanczos',
                           origin='lower',
                           cmap='RdBu_r',
                           extent=[2, 25, 2, 25],
                           vmin=0.,
                           vmax=0.8)
            ax.set_xlabel('Testing Frequency (hz)')
            ax.set_ylabel('Training Frequency (hz)')
            ax.set_title(
                'Frequency generalization for Subject {} at Time {}'.format(
                    sample, time))
            plt.colorbar(im, ax=ax)
            ax.grid(False)
            ax.figure.savefig(
                "Results/{}/{}/sample_{}/time_{}_matrix.png".format(
                    model_type, exp_name, sample, time),
                dpi=300)
            plt.close('all')
            # plt.show()

        time_results = time_results.reshape(time_results.shape[0], -1)
        all_results_df = pd.DataFrame(time_results)
        all_results_df.to_csv(
            "Results/{}/{}/sample_{}/all_time_matrix_results.csv".format(
                model_type, exp_name, sample))
예제 #15
0
def decode(epochs,
           get_y_label_func,
           epoch_filter=None,
           decoding_method='standard',
           sliding_window_size=None,
           sliding_window_step=None,
           n_jobs=multiprocessing.cpu_count(),
           equalize_event_counts=True,
           only_fit=False,
           generalize_across_time=True):
    """
    Basic flow for decoding
    """

    config = dict(equalize_event_counts=equalize_event_counts,
                  only_fit=only_fit,
                  sliding_window_size=sliding_window_size,
                  sliding_window_step=sliding_window_step,
                  decoding_method=decoding_method,
                  generalize_across_time=generalize_across_time,
                  epoch_filter=str(epoch_filter))

    if epoch_filter is not None:
        epochs = epochs[epoch_filter]

    #-- Classify epochs into groups (training epochs)
    y_labels = get_y_label_func(epochs)

    if equalize_event_counts:
        epochs.events[:, 2] = y_labels
        epochs.event_id = {str(label): label for label in np.unique(y_labels)}
        min_n_items_per_y_label = min(
            [len(epochs[cond]) for cond in epochs.event_id.keys()])
        print("\nEqualizing the number of epochs to %d per condition..." %
              min_n_items_per_y_label)
        epochs.equalize_event_counts(epochs.event_id.keys())
        y_labels = epochs.events[:, 2]

    print("The epochs were classified into %d groups:" % len(set(y_labels)))
    for g in set(y_labels):
        print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g)))

    #-- Create the decoding pipeline
    print("Creating the classification pipeline...")

    epochs_data = epochs.get_data()

    preprocess_pipeline = None

    if decoding_method.startswith('standard'):

        if 'reg' in decoding_method:
            clf = make_pipeline(StandardScaler(), Ridge())
        else:
            clf = make_pipeline(
                StandardScaler(),
                svm.SVC(C=1, kernel='linear', class_weight='balanced'))

        if 'raw' not in decoding_method:
            assert sliding_window_size is not None
            assert sliding_window_step is not None
            preprocess_pipeline = \
                make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True))

    elif decoding_method == 'ERP_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(20), average=False),
            ERPCovariances(
                estimator='lwf'),  # todo how to apply sliding window?
            CSP(30, log=False),
            TangentSpace('logeuclid'),
            LogisticRegression('l2'))  # todo why logistic regression?

    elif decoding_method == 'Xdawn_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(50), average=False),
            XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'),
            TangentSpace('logeuclid'), LogisticRegression('l2'))

    elif decoding_method == 'Hankel_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(70), average=False),
            HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'),
            CSP(15, log=False), TangentSpace('logeuclid'),
            LogisticRegression('l2'))

    else:
        raise Exception('Unknown decoding method: {:}'.format(decoding_method))

    print('\nDecoding pipeline:')
    for i in range(len(clf.steps)):
        print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1]))

    if preprocess_pipeline is not None:
        print('\nApplying the pre-processing pipeline:')
        for i in range(len(preprocess_pipeline.steps)):
            print('Step #{:}: {:}'.format(i + 1,
                                          preprocess_pipeline.steps[i][1]))
        epochs_data = preprocess_pipeline.fit_transform(epochs_data)

    if only_fit:

        #-- Only fit the decoders

        procedure = 'only_fit'
        scores = None
        cv = None

        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)
            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)
        else:
            estimator = clf

        estimator.fit(X=epochs_data, y=y_labels)

    else:

        #-- Classify & score -- cross-validation

        procedure = 'fit_and_score'
        print(
            "\nCreating a classifier and calculating accuracy scores (this may take some time)..."
        )

        cv = StratifiedKFold(n_splits=5)
        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)

            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)

            scores = cross_val_multiscore(estimator=estimator,
                                          X=epochs_data,
                                          y=np.array(y_labels),
                                          cv=cv)
        else:
            scores = _run_cross_validation(X=epochs_data,
                                           y=np.array(y_labels),
                                           clf=clf,
                                           cv=cv)
            estimator = 'None'  # Estimator is not defined in the case of Riemannian decoding

    times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2])

    return dict(procedure=procedure,
                estimator=estimator,
                scores=scores,
                pipeline=clf,
                preprocess=preprocess_pipeline,
                cv=cv,
                times=times,
                config=config)
 #  Keep only eye tracker signal (x and y eye position)
 epochs.pick_channels(['UADC009-2104', 'UADC010-2104'])
 for analysis in epoch_analyses:
     fname = results_folder +\
             '%s_scores_%s_%s.npy' % (subject, epoch_type, analysis)
     # define to-be-predicted values
     y = np.array(events[analysis])
     # Define estimators depending on the analysis
     if 'angle' in analysis[:14]:
         clf = AngularRegression(make_pipeline(StandardScaler(),
                                               LinearModel(Ridge())),
                                 independent=False)
         scorer = scorer_angle
         kwargs = dict()
         gat = GeneralizingEstimator(clf,
                                     scoring=make_scorer(scorer),
                                     n_jobs=24,
                                     **kwargs)
         y = np.array(y, dtype=float)
     elif 'sfreq' in analysis[:14]:
         clf = make_pipeline(StandardScaler(), LinearModel(Ridge()))
         scorer = scorer_spearman
         kwargs = dict()
         gat = GeneralizingEstimator(clf,
                                     scoring=make_scorer(scorer),
                                     n_jobs=24,
                                     **kwargs)
         y = np.array(y, dtype=float)
     elif ('cue_side' in analysis or 'cue_type' in analysis):
         clf = make_pipeline(StandardScaler(),
                             LinearModel(LogisticRegression()))
         kwargs = dict()
예제 #17
0
    epochs.equalize_event_counts(epochs.event_id)

    # Extract half of the epochs for similar SNR in all conditions
    epochs_nr = len(epochs['left/grating'])
    epochs_range = np.random.permutation(np.arange(0, epochs_nr, 1))
    np.save('%s_epochs_range.npy' % subject, epochs_range)

    X = np.concatenate(
        (epochs["face"][epochs_range[:int(epochs_nr / 2)]].get_data(),
         epochs["grating"][epochs_range[:int(epochs_nr / 2)]].get_data()))
    y = np.concatenate(
        (np.zeros(int(epochs_nr / 2)), np.ones(int(epochs_nr / 2))))
    cv = StratifiedKFold(n_splits=10, shuffle=True)

    clf = make_pipeline(StandardScaler(), LogisticRegression())
    time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=n_jobs)
    time_gen.fit(X, y)
    scores = cross_val_multiscore(time_gen, X, y, cv=cv)

    # Save results
    joblib.dump(time_gen, "%s_time_gen_svm.jbl" % subject)
    np.save("%s_time_gen_score_svm.npy" % subject, scores)

    X_left = np.concatenate(
        (epochs["left/face"].get_data(), epochs["left/grating"].get_data()))
    y_left = np.concatenate((np.zeros(len(epochs["left/face"].get_data())),
                             np.ones(len(epochs["left/grating"].get_data()))))
    clf = make_pipeline(StandardScaler(), LogisticRegression())
    time_gen_left = GeneralizingEstimator(clf,
                                          scoring='roc_auc',
                                          n_jobs=n_jobs)
                tmax=tmax,
                preload=True,
                baseline=(None, 0),
                decim=10)
epochs.pick_types(meg=True, ref_meg=False)
# Loop across analysis
for analysis in analyses:
    fname = results_folder +\
        '%s_scores_%s_%s.npy' % (subject, 'Cue', analysis)
    # define to-be-predicted values
    y = np.array(events_behavior[analysis])

    clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression()))
    kwargs = dict()
    le = LabelEncoder()
    le.fit(y)
    y = le.transform(y)
    sel = np.where(y != 0)[0]
    # Run decoding
    cv = StratifiedKFold(12)
    scores = list()
    X = epochs._data
    gat = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs)
    for train, test in cv.split(X[sel], y[sel]):
        gat.fit(X[sel][train], y[sel][train])
        score = gat.score(X[sel][test], y[sel][test])
        scores.append(score)
    scores = np.mean(scores, axis=0)
    # keep scores
    np.save(fname, np.array(scores))
예제 #19
0
joint_kwargs = dict(ts_args=dict(time_unit='s'),
                    topomap_args=dict(time_unit='s'))
evoked.plot_joint(times=np.arange(0., .500, .100), title='patterns',
                  **joint_kwargs)

###############################################################################
# Temporal Generalization
# -----------------------
#
# This runs the analysis used in [1]_ and further detailed in [2]_
#
# The idea is to fit the models on each time instant and see how it
# generalizes to any other time point.

# define the Temporal Generalization object
time_gen = GeneralizingEstimator(clf, n_jobs=1, scoring='roc_auc')

scores = cross_val_multiscore(time_gen, X, y, cv=5, n_jobs=1)

# Mean scores across cross-validation splits
scores = np.mean(scores, axis=0)

# Plot the diagonal (it's exactly the same as the time-by-time decoding above)
fig, ax = plt.subplots()
ax.plot(epochs.times, np.diag(scores), label='score')
ax.axhline(.5, color='k', linestyle='--', label='chance')
ax.set_xlabel('Times')
ax.set_ylabel('AUC')
ax.legend()
ax.axvline(.0, color='k', linestyle='-')
ax.set_title('Decoding MEG sensors over time')
예제 #20
0
def run_gat(subj, decoder="ridge", n_jobs=2):
    """
    Function to run Generalization Across Time (GAT).

    Parameters
    ----------
    subj: int
    decoder: str
        Specify type of classifier -'ridge' for Ridge Regression (default),
        'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg'
        for Logistic Regression
    n_jobs: int
        The number of jobs to run in parallel.
    """
    # load cue A and cue B epochs
    epochs = get_epochs(subj)['Correct A', 'Correct B']

    # specify whether to use a linear or nonlinear SVM if SVM is used
    lin = ''  # if not svm it doesn't matter, both log_reg and ridge are linear
    if "svm" in decoder:
        decoder, lin = decoder.split("-")

    # build classifier pipeline #
    # pick a machine learning algorithm to use (ridge/SVM/logistic regression)
    decoder_dict = {
        "ridge":
        RidgeClassifier(class_weight='balanced', random_state=42,
                        solver="sag"),
        "svm":
        SVC(class_weight='balanced',
            kernel=("rbf" if "non" in lin else "linear"),
            random_state=42),
        "log_reg":
        LogisticRegression(class_weight='balanced', random_state=42)
    }

    # get data and targets
    data = epochs.get_data()
    labels = epochs.events[:, -1]

    # create classifier pipeline
    clf = make_pipeline(StandardScaler(), decoder_dict[decoder])
    gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=n_jobs)

    # compute cross validated performance scores
    scores = cross_val_multiscore(gen_clf, data, labels, cv=5,
                                  n_jobs=n_jobs).mean(0)

    # calculate prediction confidence scores
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    preds = np.empty((len(labels), data.shape[2], data.shape[2]))
    for train, test in cv.split(data, labels):
        gen_clf.fit(data[train], labels[train])
        d = gen_clf.decision_function(data[test])
        preds[test] = d

    # compute topographical patterns
    dat = Vectorizer().fit_transform(data)
    clf.fit(dat, labels)
    dat = dat - dat.mean(0, keepdims=True)

    # look for the type of classifier and get the weights
    if decoder == 'ridge':
        filt_ = clf.named_steps.ridgeclassifier.coef_.copy()
    elif decoder == 'svm':
        filt_ = clf.named_steps.svc.coef_.copy()
    elif decoder == 'log_reg':
        filt_ = clf.named_steps.logisticregression.coef_.copy()

    # Compute patterns using Haufe's trick: A = Cov_X . W . Precision_Y
    # cf.Haufe, et al., 2014, NeuroImage,
    # doi:10.1016/j.neuroimage.2013.10.067)
    inv_y = 1.
    patt_ = np.cov(dat.T).dot(filt_.T.dot(inv_y)).T

    # store the patterns accordingly
    if decoder == 'ridge':
        clf.named_steps.ridgeclassifier.patterns_ = patt_
    elif decoder == 'svm':
        clf.named_steps.svc.patterns_ = patt_
    elif decoder == 'log_reg':
        clf.named_steps.logisticregression.patterns_ = patt_

    # back transform using steps in pipeline
    patterns = get_coef(clf, 'patterns_', inverse_transform=True)

    # return subject scores,  prediction confidence and topographical patterns
    return scores, preds, patterns
예제 #21
0
        mean_test2 = []
        ind_trial = 0
        while ind_trial <= len(data_test2) - 5:
            mean_test2.append(mean(data_test2[ind_trial:(ind_trial + 4)], 0))
            print ind_trial
            ind_trial += 5

        #---------------------------
        # define decoding pipeline and run
        #---------------------------

        # Use AUC because chance level is same regardless of the class balance
        clf = make_pipeline(StandardScaler(),
                            LinearModel(LogisticRegression()))
        time_gen = GeneralizingEstimator(clf, n_jobs=1, scoring='roc_auc')
        #

        # We will train the classifier on all stim face vs house trials
        # and test on all images face vs house trials.

        #le = LabelEncoder()

        # train on stim
        time_gen.fit(X=np.array(mean_train1 + mean_train2),
                     y=np.array([0] * len(mean_train1) +
                                [1] * len(mean_train2)))

        # score on imagery
        scores = time_gen.score(X=np.array(mean_test1 + mean_test2),
                                y=np.array([0] * len(mean_test1) +
예제 #22
0
                   ['target_angle_cue_right_angle', 'right_angle'],
                   ['left_angle', 'target_angle_cue_left_angle'],
                   ['right_angle', 'target_angle_cue_right_angle']]
# Loop across each pair of analyses
for paired_analysis in paired_analyses:
    y_test = np.array(events[paired_analysis[0]])
    y_train = np.array(events[paired_analysis[1]])
    # Define estimators depending on the analysis
    if 'angle' in paired_analysis[0][:14]:
        clf = make_pipeline(
            StandardScaler(),
            LinearModel(AngularRegression(Ridge(), independent=False)))
        scorer = scorer_angle
        kwargs = dict()
        gat = GeneralizingEstimator(clf,
                                    scoring=make_scorer(scorer),
                                    n_jobs=24,
                                    **kwargs)
        y_test = np.array(y_test, dtype=float)
        y_train = np.array(y_train, dtype=float)
    elif 'sfreq' in paired_analysis[0][:14]:
        clf = make_pipeline(StandardScaler(), LinearModel(Ridge()))
        scorer = scorer_spearman
        kwargs = dict()
        gat = GeneralizingEstimator(clf,
                                    scoring=make_scorer(scorer),
                                    n_jobs=24,
                                    **kwargs)
        y_test = np.array(y_test, dtype=float)
        y_train = np.array(y_train, dtype=float)
    # only consider trials with correct fixation
    sel = np.where(events['is_eye_fixed'] == 1)[0]
예제 #23
0
    epochs = mne.read_epochs(os.path.join(data_path, '%s-epo.fif' % subject),
                             preload=True)
    epochs.interpolate_bads(reset_bads=True)

    all_epochs.append(epochs)

epochs = mne.concatenate_epochs(all_epochs)

decim = 2
epochs.decimate(decim)

# We will train the classifier on all stim face vs house trials
# and test on all images face vs house trials.
clf = make_pipeline(StandardScaler(), LogisticRegression())
time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=6)

le = LabelEncoder()

# train on stim
time_gen.fit(X=epochs['stim'].get_data(),
             y=le.fit_transform(epochs['stim'].events[:, 2]))

# score on imagery
scores = time_gen.score(X=epochs['imag'].get_data(),
                        y=le.fit_transform(epochs['imag'].events[:, 2]))

# Plot
fig, ax = plt.subplots(1)
im = ax.matshow(scores,
                vmin=0,
    # generate iterator for cross validation
    kf = StratifiedKFold(n_splits=k_folds, shuffle=True)
    cv_iter = kf.split(np.zeros(X.shape), labels)

    # pipeline for classification
    cl = make_pipeline(
        RobustScaler(), 
        PCA(n_components=var_exp), 
        LinearSVC(max_iter=10000, dual=False, penalty="l1")
    )

    # temporal generalisation
    temp_genr = GeneralizingEstimator(
        cl, 
        n_jobs=1, 
        scoring="roc_auc"
    )

    # cross validation
    scores = cross_val_multiscore(temp_genr, X, labels, cv=cv_iter, n_jobs=-1)
    scores_all.append(scores)

scores_all = np.vstack(scores_all)

scores_path = op.join(
    output_dir,
    "reg_vs_odd_svm-{}.npy".format(subject)
)

np.save(scores_path, scores_all)
예제 #25
0
# generate iterator for cross validation
kf = StratifiedKFold(n_splits=k_folds, shuffle=True)
cv_iter = kf.split(np.zeros(X.shape), labels)

# pipeline for classification
cl = make_pipeline(
    RobustScaler(), 
    PCA(n_components=var_exp), 
    LinearSVC(max_iter=10000, dual=False, penalty="l1")
)

# temporal generalisation
temp_genr = GeneralizingEstimator(
    cl, 
    n_jobs=1, 
    scoring=make_scorer(accuracy_score)
)

# cross validation
scores = cross_val_multiscore(temp_genr, X, labels, cv=cv_iter, n_jobs=-1)
scores_all = []
scores_all.append(scores)

scores_all = np.vstack(scores_all)

scores_path = op.join(
    output_dir,
    "reg_vs_odd_svm_balanced-{}.npy".format(subject)
)
raw.filter(1., 30., fir_design='firwin')  # Band pass filtering signals
events = mne.read_events(events_fname)
event_id = {'Auditory/Left': 1, 'Auditory/Right': 2,
            'Visual/Left': 3, 'Visual/Right': 4}
tmin = -0.050
tmax = 0.400
decim = 2  # decimate to make the example faster to run
epochs = mne.Epochs(raw, events, event_id=event_id, tmin=tmin, tmax=tmax,
                    proj=True, picks=picks, baseline=None, preload=True,
                    reject=dict(mag=5e-12), decim=decim)

###############################################################################
# We will train the classifier on all left visual vs auditory trials
# and test on all right visual vs auditory trials.
clf = make_pipeline(StandardScaler(), LogisticRegression())
time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=1,
                                 verbose=True)

# Fit classifiers on the epochs where the stimulus was presented to the left.
# Note that the experimental condition y indicates auditory or visual
time_gen.fit(X=epochs['Left'].get_data(),
             y=epochs['Left'].events[:, 2] > 2)

###############################################################################
# Score on the epochs where the stimulus was presented to the right.
scores = time_gen.score(X=epochs['Right'].get_data(),
                        y=epochs['Right'].events[:, 2] > 2)

###############################################################################
# Plot
fig, ax = plt.subplots(1)
im = ax.matshow(scores, vmin=0, vmax=1., cmap='RdBu_r', origin='lower',
예제 #27
0
    clf.fit(X, y)

    # Calculate scores for classification
    sl = SlidingEstimator(clf)
    scores_time_decoding = cross_val_multiscore(sl, X, y)

    # Append the results for each subject
    if file == './epochs/101_base-epo.fif':
        scores_td_base = scores_time_decoding
    else:
        scores_td_base = np.append(scores_td_base,
                                   scores_time_decoding,
                                   axis=0)

    # Again, calculate scores with a receiver operating curve
    gen = GeneralizingEstimator(clf, scoring='roc_auc')
    scores_gat = cross_val_multiscore(gen, X, y)

    if file == './epochs/101_base-epo.fif':
        scores_gat_base = scores_gat
    else:
        scores_gat_base = np.append(scores_gat_base, scores_gat, axis=0)

for file in glob.glob(os.path.join(path, '*reg-epo.fif')):

    epochs_reg = mne.read_epochs(file, preload=True)

    epochs_base.resample(256)

    epochs_base.crop(tmin=-0.25, tmax=epochs_base.tmax)
    epochs_reg_eq = epochs_reg.copy().equalize_event_counts(['A', 'B'])[0]
                    tmin=tmin,
                    tmax=tmax,
                    proj=True,
                    picks=picks,
                    baseline=None,
                    preload=True,
                    reject=dict(mag=5e-12),
                    decim=decim,
                    verbose='error')

###############################################################################
# We will train the classifier on all left visual vs auditory trials
# and test on all right visual vs auditory trials.
clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs'))
time_gen = GeneralizingEstimator(clf,
                                 scoring='roc_auc',
                                 n_jobs=1,
                                 verbose=True)

# Fit classifiers on the epochs where the stimulus was presented to the left.
# Note that the experimental condition y indicates auditory or visual
time_gen.fit(X=epochs['Left'].get_data(), y=epochs['Left'].events[:, 2] > 2)

###############################################################################
# Score on the epochs where the stimulus was presented to the right.
scores = time_gen.score(X=epochs['Right'].get_data(),
                        y=epochs['Right'].events[:, 2] > 2)

###############################################################################
# Plot
fig, ax = plt.subplots(1)
im = ax.matshow(scores,
예제 #29
0
path = '/home/carlos/mount/megmri03/monks'
subjects = os.listdir(path)
subjects = [s for s in subjects if s.find('.') == -1 and s.find('_') == -1]

# Load monk data in the form of n_samples x n_voxels x n_time
ds, _, _ = load_subject_ds(
    path,
    subjects[:1],
    #os.path.join(path, 'subjects.csv'),
    'meditation_permut1.conf',
    'fmri',
    prepro=MonksPreprocessingPipeline(),
    roi_labels=atlas_dict)

clf = make_pipeline(StandardScaler(), LinearSVC(C=1))
time_gen = GeneralizingEstimator(clf, scoring='accuracy', n_jobs=20)

ds = SampleSlicer({'group': ['E']}).transform(ds)

scores_dict = {}
# Generalization of time
for network in os.listdir(path_templates):

    network = network[:-21]
    ds_network = FeatureSlicer({network: ['!0']}).transform(ds)

    n_samples, n_voxels = ds_network.shape
    data = ds_network.samples.reshape(-1, 135, n_voxels)
    X = np.rollaxis(data, 1, 3)
    y = np.arange(data.shape[0]) % 2
예제 #30
0
evoked.plot_joint(times=np.arange(0., .500, .100),
                  title='patterns',
                  **joint_kwargs)

###############################################################################
# Temporal Generalization
# -----------------------
#
# This runs the analysis used in [1]_ and further detailed in [2]_
#
# The idea is to fit the models on each time instant and see how it
# generalizes to any other time point.

# define the Temporal Generalization object
time_gen = GeneralizingEstimator(clf,
                                 n_jobs=1,
                                 scoring='roc_auc',
                                 verbose=True)

scores = cross_val_multiscore(time_gen, X, y, cv=5, n_jobs=1)

# Mean scores across cross-validation splits
scores = np.mean(scores, axis=0)

# Plot the diagonal (it's exactly the same as the time-by-time decoding above)
fig, ax = plt.subplots()
ax.plot(epochs.times, np.diag(scores), label='score')
ax.axhline(.5, color='k', linestyle='--', label='chance')
ax.set_xlabel('Times')
ax.set_ylabel('AUC')
ax.legend()
ax.axvline(.0, color='k', linestyle='-')
예제 #31
0
# %%
for idx in range(1, 11):
    # Loading data ------------------------------------------
    running_name = f'MEG_S{idx:02d}'
    band_name = 'U07'

    worker = MEG_Worker(running_name=running_name)
    worker.pipeline(band_name=band_name)

    # MVPA ----------------------------------------------------------------
    # Prepare classifiers
    _svm = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced')
    clf = make_pipeline(StandardScaler(), _svm)
    estimator = GeneralizingEstimator(clf,
                                      n_jobs=n_jobs,
                                      scoring='f1',
                                      verbose=1)

    # Prepare paired X and y
    # Get X and y for class 1
    X1, y1 = pair_X_y(worker.clean_epochs, 1)

    # Get X and y for class 2
    X2, y2 = pair_X_y(worker.denoise_epochs['2'], 2)

    # Concatenate X and y
    X_all = np.concatenate([X1, X2], axis=0)
    y_all = np.concatenate([y1, y2], axis=0)

    # Get time line
    times = worker.clean_epochs.times