예제 #1
0
def run_gat(name, decoder="ridge"):
    """
    Function to run Generalization Across Time (GAT).

    Parameters
    ----------
    name: str
        Name (pseudonym) of individual subject.
    decoder: str
        Specify type of classifier -'ridge' for Ridge Regression (default),'lin-svm' for linear SVM
        'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression
    """
    # load high cloze epochs
    epochs = get_epochs(name)['song', 'voice']

    # specify whether to use a linear or nonlinear SVM if SVM is used
    lin = ''  # if not svm it doesn't matter, both log_reg and ridge are linear
    if "svm" in decoder:
        decoder, lin = decoder.split("-")

    # build classifier pipeline #
    # pick a machine learning algorithm to use (ridge/SVM/logistic regression)
    decoder_dict = {
        "ridge":
        RidgeClassifier(class_weight='balanced', random_state=42,
                        solver="sag"),
        "svm":
        SVC(class_weight='balanced',
            kernel=("rbf" if "non" in lin else "linear"),
            random_state=42),
        "log_reg":
        LogisticRegression(class_weight='balanced', random_state=42)
    }

    clf = make_pipeline(StandardScaler(), decoder_dict[decoder])
    gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=4)
    scores = cross_val_multiscore(gen_clf,
                                  epochs.get_data(),
                                  epochs.events[:, -1],
                                  cv=5,
                                  n_jobs=4).mean(0)

    data = epochs.get_data()
    labels = epochs.events[:, -1]

    cv = StratifiedKFold(n_splits=5, random_state=42)
    # calculate prediction confidence scores
    preds = np.empty((len(labels), 225, 225))
    for train, test in cv.split(data, labels):
        gen_clf.fit(data[train], labels[train])
        d = gen_clf.decision_function(data[test])
        preds[test] = d

    return scores, preds  # return subject scores and prediction confidence
예제 #2
0
def run_gat(subj, decoder="ridge", n_jobs=2):
    """
    Function to run Generalization Across Time (GAT).

    Parameters
    ----------
    subj: int
    decoder: str
        Specify type of classifier -'ridge' for Ridge Regression (default),
        'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg'
        for Logistic Regression
    n_jobs: int
        The number of jobs to run in parallel.
    """
    # load cue A and cue B epochs
    epochs = get_epochs(subj)['Correct A', 'Correct B']

    # specify whether to use a linear or nonlinear SVM if SVM is used
    lin = ''  # if not svm it doesn't matter, both log_reg and ridge are linear
    if "svm" in decoder:
        decoder, lin = decoder.split("-")

    # build classifier pipeline #
    # pick a machine learning algorithm to use (ridge/SVM/logistic regression)
    decoder_dict = {
        "ridge":
        RidgeClassifier(class_weight='balanced', random_state=42,
                        solver="sag"),
        "svm":
        SVC(class_weight='balanced',
            kernel=("rbf" if "non" in lin else "linear"),
            random_state=42),
        "log_reg":
        LogisticRegression(class_weight='balanced', random_state=42)
    }

    # get data and targets
    data = epochs.get_data()
    labels = epochs.events[:, -1]

    # create classifier pipeline
    clf = make_pipeline(StandardScaler(), decoder_dict[decoder])
    gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=n_jobs)

    # compute cross validated performance scores
    scores = cross_val_multiscore(gen_clf, data, labels, cv=5,
                                  n_jobs=n_jobs).mean(0)

    # calculate prediction confidence scores
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    preds = np.empty((len(labels), data.shape[2], data.shape[2]))
    for train, test in cv.split(data, labels):
        gen_clf.fit(data[train], labels[train])
        d = gen_clf.decision_function(data[test])
        preds[test] = d

    # compute topographical patterns
    dat = Vectorizer().fit_transform(data)
    clf.fit(dat, labels)
    dat = dat - dat.mean(0, keepdims=True)

    # look for the type of classifier and get the weights
    if decoder == 'ridge':
        filt_ = clf.named_steps.ridgeclassifier.coef_.copy()
    elif decoder == 'svm':
        filt_ = clf.named_steps.svc.coef_.copy()
    elif decoder == 'log_reg':
        filt_ = clf.named_steps.logisticregression.coef_.copy()

    # Compute patterns using Haufe's trick: A = Cov_X . W . Precision_Y
    # cf.Haufe, et al., 2014, NeuroImage,
    # doi:10.1016/j.neuroimage.2013.10.067)
    inv_y = 1.
    patt_ = np.cov(dat.T).dot(filt_.T.dot(inv_y)).T

    # store the patterns accordingly
    if decoder == 'ridge':
        clf.named_steps.ridgeclassifier.patterns_ = patt_
    elif decoder == 'svm':
        clf.named_steps.svc.patterns_ = patt_
    elif decoder == 'log_reg':
        clf.named_steps.logisticregression.patterns_ = patt_

    # back transform using steps in pipeline
    patterns = get_coef(clf, 'patterns_', inverse_transform=True)

    # return subject scores,  prediction confidence and topographical patterns
    return scores, preds, patterns