def dotheclassification(ds,
                        classifier,
                        bilateral
                        ):
    """ Dotheclassification does the classification.
    Input: the dataset on which to perform a leave-one-out crossvalidation with a classifier
    of choice.
    Specify: the classifier to be used (gnb (linear gnb), l-sgd (linear sgd), sgd)
             whether the sensitivities should be computed and stored for later use
             whether the dataset has ROIs combined across hemisphere (bilateral)
    """
    if classifier == 'gnb':

        # set up classifier
        prior = 'ratio'
        if bilateral:
            targets = 'bilat_ROIs'
        else:
            targets = 'all_ROIs'

        clf = mv.GNB(common_variance=True,
                 prior=prior,
                 space=targets)


    elif classifier == 'l-sgd':
        # set up the dataset: If I understand the sourcecode correctly, the
        # Stochastic Gradient Descent wants to have unique labels in a sample attribute
        # called 'targets' and is quite stubborn with this name - I could not convince
        # it to look for targets somewhere else, so now I catering to his demands
        if bilateral:
            ds.sa['targets'] = ds.sa.bilat_ROIs
        else:
            ds.sa['targets'] = ds.sa.all_ROIs

        # necessary I believe regardless of the SKLLearnerAdapter
        from sklearn.linear_model import SGDClassifier

        # get a stochastic gradient descent into pymvpa by using the SKLLearnerAdapter.
        # Get it to perform 1 vs 1 decisions (instead of one vs all) with the MulticlassClassifier
        clf = mv.MulticlassClassifier(mv.SKLLearnerAdapter(SGDClassifier(loss='hinge',
                                                                         penalty='l2',
                                                                         class_weight='balanced'
                                                                         )))


    cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='participant'),
                            errorfx=mv.mean_match_accuracy,
                            enable_ca=['stats'])
    results = cv(ds)
    return cv
Beispiel #2
0
 def __call__(self, valsTrain, labelsTrain, valsTest, doAncestralCV=True):
     """Trains on ancestral population followed by testing on
     admixed population.  Optionally does cross validation on
     ancestral population.
     
     Arguments:
     - `valsTrain`: numpy array (nSamplesxnFeatures) of training samples 
     - `labelsTrain`: list of nSamples labels
     - `valsTest`:  numpy array of (nSamples2xnFeatures) of test samples
     """
     #Create and normalize data
     ds = pymvpa.Dataset(valsTrain)
     ds.sa['targets'] = labelsTrain
     runtype = np.zeros(valsTrain.shape[0])
     runtype[0::3] = 0
     runtype[1::3] = 1
     runtype[2::3] = 2
     ds.sa['runtype'] = runtype
     try:  #Train on ancestral
         self.classifier.train(ds)
         admixedClass = self.classifier.predict(valsTest)
     except pymvpa.DegenerateInputError:  #The valsTrain is to small to contain information
         print "WARNING: Window is degenerate; guessing ancestry"
         admixedClass = np.zeros(
             valsTest.shape[0])  #Just assign ancestry to first pop
         if doAncestralCV:
             return 1. / len(
                 np.unique(labelsTrain
                           )), admixedClass  #Assign success to create equal
         return admixedClass
     if doAncestralCV:  #Cross Validated ancestral population
         hspl = pymvpa.NGroupPartitioner(3, attr='runtype')
         # cvte = pymvpa.CrossValidation(self.classifier, hspl)
         cvte = pymvpa.CrossValidation(self.classifier,
                                       hspl,
                                       enable_ca='stats')
         cv_results = cvte(ds)
         return cvte.ca.stats.matrix, admixedClass
         # ancestralSuccess=1-np.mean(cv_results)
         # return ancestralSuccess, admixedClass
     return admixedClass
Beispiel #3
0
                              dtype='bool')]
    print '... and only', dataset.shape[
        0], 'cases of interest (Keep vs Switch Language)'
    dataset = M.datasets.miscfx.remove_invariant_features(dataset)
    print 'saving as compressed file', trimmedCache
    pickleFile = gzip.open(trimmedCache, 'wb', 5)
    pickle.dump(dataset, pickleFile)

anovaSelectedSMLR = M.FeatureSelectionClassifier(
    M.PLR(),
    M.SensitivityBasedFeatureSelection(
        M.OneWayAnova(),
        M.FixedNElementTailSelector(500, mode='select', tail='upper')),
)
foldwiseCvedAnovaSelectedSMLR = M.CrossValidation(
    anovaSelectedSMLR,
    M.NFoldPartitioner(),
    enable_ca=['samples_error', 'stats', 'calling_time', 'confusion'])
# run classifier
print 'learning on detrended, normalised, averaged, Keep vs Switch ...', datetime.datetime.now(
)
results = foldwiseCvedAnovaSelectedSMLR(dataset)
print '... done', datetime.datetime.now()
print 'accuracy', N.round(100 - N.mean(results) * 100,
                          1), '%', datetime.datetime.now()

#New lines for out putting the result into a csv file.
precision = N.round(100 - N.mean(results) * 100, 1)
st = str(boldDelay) + ',' + str(stimulusWidth) + ',' + str(precision) + '\n'
f = open("withinPredictionResult.csv", "a")
f.write(st)
f.close
Beispiel #4
0
#del ds_q2.sa['intents']
del ds_q2.sa['stats']
mv.zscore(ds_q2, chunks_attr='chunks')

n_medial = {'lh': 3486, 'rh': 3491}
medial_wall = np.where(np.sum(ds_q2.samples == 0, axis=0) == 200)[0].tolist()
cortical_vertices = np.where(
    np.sum(ds_q2.samples == 0, axis=0) < 200)[0].tolist()
assert len(medial_wall) == n_medial[hemisphere]
n_vertices = ds_q2.fa.node_indices.shape[0]
assert len(medial_wall) + len(cortical_vertices) == n_vertices

# 2. cross validation __________________________________________________________________
# setting up classifier
clf = mv.LinearCSVMC(space='targets')
cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='chunks'))
cv_within = cv(ds_q2)
cv_within
np.mean(cv_within)
# why is the mean lower?

# 3. searchlight _______________________________________________________________________
fsaverage_gii = os.path.join(main_dir, 'fs_templates',
                             hemisphere + '.pial.gii')
surf = mv.surf.read(fsaverage_gii)
# note: surf.vertices.shape (81920, 3) and surf.faces.shape (40962, 3) surface = surf,
qe = mv.SurfaceQueryEngine(surf, radius=radii, distance_metric='dijkstra')
sl = mv.Searchlight(cv, queryengine=qe, roi_ids=cortical_vertices)
sl_q2 = sl(ds_q2)

# 4. save output _______________________________________________________________________
hemisphere = sys.argv[2]
task_list = ['beh', 'tax']
radii = 10.0

# 1. create pymvpa dataset  ____________________________________________________________
ds_q3 = generate_dataset.create_dataset(sub_name, main_dir, task_list,
                                        hemisphere)
ds_q3.sa['chunks'] = ds_q3.sa['tax']
ds_q3.sa['targets'] = ds_q3.sa['beh']
del ds_q3.sa['intents']
mv.zscore(ds_q3, chunks_attr='chunks')

# 2. cross validation __________________________________________________________________
# setting up classifier
clf = mv.LinearCSVMC()
cv = mv.CrossValidation(clf, mv.NFoldPartitioner())
cv_within = cv(ds_q3)
cv_within
np.mean(cv_within)
# why is the mean lower?

# 3. searchlight _______________________________________________________________________
fsaverage_gii = os.path.join(main_dir, 'fs_templates',
                             hemisphere + '.pial.gii')
surf = mv.surf.read(fsaverage_gii)
# note: surf.vertices.shape (81920, 3) and surf.faces.shape (40962, 3) surface = surf,
qe = mv.SurfaceQueryEngine(surf, radius=radii, distance_metric='dijkstra')
sl = mv.Searchlight(cv, queryengine=qe, nproc=4)
sl_q3 = sl(ds_q3)

# 4. save output _______________________________________________________________________
Beispiel #6
0
def dotheclassification(ds, bilateral, store_sens=True):
    """ Dotheclassification does the classification. It builds a
    linear gaussian naive bayes classifier, performs a leave-one-out
    crossvalidation and stores the sensitivities from the SGD classifier of each
    fold in a combined dataset for further use in a glm.
    If sens == False, the sensitivities are not stored, and only a
    classification is performed"""
    import matplotlib.pyplot as plt
    # set up the dataset: If I understand the sourcecode correctly, the
    # MulticlassClassifier wants to have unique labels in a sample attribute
    # called 'targets' and is quite stubborn with this name - I could not convince
    # it to look for targets somewhere else, so now I catering to his demands
    if bilateral:
        ds.sa['targets'] = ds.sa.bilat_ROIs
    else:
        ds.sa['targets'] = ds.sa.all_ROIs

    # necessary I believe regardless of the SKLLearnerAdapter
    from sklearn.linear_model import SGDClassifier

    # get a stochastic gradient descent into pymvpa by using the SKLLearnerAdapter.
    # Get it to perform 1 vs 1 decisions (instead of one vs all) with the MulticlassClassifier
    clf = mv.MulticlassClassifier(
        mv.SKLLearnerAdapter(
            SGDClassifier(loss='hinge', penalty='l2',
                          class_weight='balanced')))

    # prepare for callback of sensitivity extraction within CrossValidation
    sensitivities = []
    if store_sens:

        def store_sens(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(
                force_train=False)(data)
            # we also need to manually append the time attributes to the sens ds
            sens.fa['time_coords'] = data.fa['time_coords']
            sens.fa['chunks'] = data.fa['chunks']
            sensitivities.append(sens)

            # do a crossvalidation classification

        cv = mv.CrossValidation(clf,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'],
                                callback=store_sens)
    else:
        cv = mv.CrossValidation(clf,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'])
    results = cv(ds)
    # save classification results

    with open(results_dir + 'avmovie_clf.txt', 'a') as f:
        f.write(cv.ca.stats.as_string(description=True))
    # printing of the confusion matrix
    if bilateral:
        desired_order = ['VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA']
    else:
        desired_order = [
            'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA',
            'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA',
            'right PPA'
        ]
    labels = get_known_labels(desired_order, cv.ca.stats.labels)

    # plot the confusion matrix with pymvpas build-in plot function currently fails
    #    cv.ca.stats.plot(labels=labels,
    #                     numbers=True,
    #                     cmap='gist_heat_r')
    #    plt.savefig(results_dir + 'confusion_matrix.png')
    #    if niceplot:
    #        ACC = cv.ca.stats.stats['mean(ACC)']
    #        plot_confusion(cv,
    #                       labels,
    #                       fn=results_dir + 'confusion_matrix_avmovie.svg',
    #                       figsize=(9, 9),
    #                       vmax=100,
    #                       cmap='Blues',
    #                       ACC='%.2f' % ACC)
    #    mv.h5save(results_dir + 'SGD_cv_classification_results.hdf5', results)
    print('Saved the crossvalidation results.')
    if store_sens:
        mv.h5save(results_dir + 'sensitivities_nfold.hdf5', sensitivities)
        print('Saved the sensitivities.')
    # results now has the overall accuracy. results.samples gives the
    # accuracy per participant.
    # sensitivities contains a dataset for each participant with the
    # sensitivities as samples and class-pairings as attributes
    return sensitivities, cv
    d.sa['conditions'] = conditions
    d.sa['taxonomy'] = taxonomy
    d.sa['behavior'] = behavior
    if ds is None:
        ds = d
    else:
        ds = mv.vstack((ds, d))
ds.fa['node_indices'] = range(ds.shape[1])
# zscore all of our samples
mv.zscore(ds, chunks_attr='chunks', dtype='float32')
# load in surgace and get searchlight query
radius = 10
surface = mv.surf.read(join(data_path, '{0}.pial.gii'.format(hemi)))
# this is an arbitrary radius and distance metric!
query = mv.SurfaceQueryEngine(surface, radius, distance_metric='dijkstra')
# based off PyMVPA tutorial
clf = mv.LinearNuSVMC(space=predict)

cv = mv.CrossValidation(clf,
                        mv.NFoldPartitioner(attr=train_on),
                        errorfx=lambda p, t: np.mean(p == t),
                        enable_ca=['stats'])
searchlights = mv.Searchlight(cv,
                              queryengine=query,
                              postproc=mv.mean_sample(),
                              roi_ids=None)
sl_clf_results = searchlights(ds)
outstr = save_path + 'results/sub' + sub + '_sl_clf_' + predict + '_' + hemi
res = np.array(sl_clf_results)
np.save(outstr, res)
Beispiel #8
0
def dotheclassification(ds, bilateral, store_sens=True):
    """ Dotheclassification does the classification. It builds a
    linear gaussian naive bayes classifier, performs a leave-one-out
    crossvalidation and stores the sensitivities from the GNB classifier of each
    fold in a combined dataset for further use in a glm.
    If sens == False, the sensitivities are not stored, and only a
    classification is performed"""
    import matplotlib.pyplot as plt
    # set up classifier
    prior = 'ratio'
    if bilateral:
        targets = 'bilat_ROIs'
    else:
        targets = 'all_ROIs'
    gnb = mv.GNB(common_variance=True, prior=prior, space=targets)

    # prepare for callback of sensitivity extraction within CrossValidation
    sensitivities = []
    if store_sens:

        def store_sens(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(
                force_train=False)(data)
            # we also need to manually append the time attributes to the sens ds
            sens.fa['time_coords'] = data.fa['time_coords']
            sens.fa['chunks'] = data.fa['chunks']
            sensitivities.append(sens)

            # do a crossvalidation classification

        cv = mv.CrossValidation(gnb,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'],
                                callback=store_sens)
    else:
        cv = mv.CrossValidation(gnb,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'])
    results = cv(ds)
    # save classification results

    with open(results_dir + 'avmovie_clf.txt', 'a') as f:
        f.write(cv.ca.stats.as_string(description=True))
    # printing of the confusion matrix
    if bilateral:
        desired_order = ['VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA']
    else:
        desired_order = [
            'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA',
            'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA',
            'right PPA'
        ]
    labels = get_known_labels(desired_order, cv.ca.stats.labels)

    # plot the confusion matrix with pymvpas build-in plot function currently fails
    # cv.ca.stats.plot(labels=labels,
    #                 numbers=True,
    #                 cmap='gist_heat_r')
    # plt.savefig(results_dir + 'confusion_matrix.png')
    if niceplot:
        ACC = cv.ca.stats.stats['mean(ACC)']
        plot_confusion(cv,
                       labels,
                       fn=results_dir + 'confusion_matrix_avmovie.svg',
                       figsize=(9, 9),
                       vmax=100,
                       cmap='Blues',
                       ACC='%.2f' % ACC)
    mv.h5save(results_dir + 'gnb_cv_classification_results.hdf5', results)
    print('Saved the crossvalidation results.')
    if store_sens:
        mv.h5save(results_dir + 'sensitivities_nfold.hdf5', sensitivities)
        print('Saved the sensitivities.')
    # results now has the overall accuracy. results.samples gives the
    # accuracy per participant.
    # sensitivities contains a dataset for each participant with the
    # sensitivities as samples and class-pairings as attributes
    return sensitivities, cv
    np.sum(ds.samples == 0, axis=0) < n_conditions * 5)[0].tolist()
assert len(medial_wall) == n_medial[hemi]
assert len(medial_wall) + len(cortical_vertices) == n_vertices

#np.save(join(mvpa_dir, 'cortical_vertices_{0}.npy'.format(hemi)), cortical_vertices)
#cortical_vertices = = np.load(join(mvpa_dir, 'cortical_vertices_{0}.npy').tolist()

# Z-score features across samples
#mv.zscore(ds, chunks_attr='runs')
ds.samples = ((ds.samples - np.mean(ds.samples, axis=1)[:, None]) /
              np.std(ds.samples, axis=1)[:, None])

clf = mv.LinearCSVMC(space=targets)

cv = mv.CrossValidation(clf,
                        mv.NFoldPartitioner(attr=chunks),
                        errorfx=mv.mean_match_accuracy)

sl = mv.Searchlight(cv,
                    queryengine=qe,
                    enable_ca=['roi_sizes'],
                    nproc=1,
                    roi_ids=cortical_vertices)
#sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'],
#                    nproc=1, results_backend='native', roi_ids=cortical_vertices)
#tmp_prefix='/local/tmp/sam_sl_p{0}_{1}_'.format(participant_id, hemi)
mv.debug.active += ['SLC']
sl_result = sl(ds)

# Average across folds and finalize result on surface
print("Average searchlight size = {0}".format(np.mean(sl.ca.roi_sizes)))
def dotheclassification(ds_movie,
                        ds_loc,
                        classifier,
                        bilateral):
    """ Dotheclassification does the classification.
    Input: the dataset on which to perform a leave-one-out crossvalidation with a classifier
    of choice.
    Specify: the classifier to be used (gnb (linear gnb), l-sgd (linear sgd), sgd)
             whether the sensitivities should be computed and stored for later use
             whether the dataset has ROIs combined across hemisphere (bilateral)
    """

    dfs = []
    for idx, ds in enumerate([ds_movie, ds_loc]):
        if bilateral:
            ds.sa['targets'] = ds.sa.bilat_ROIs
        else:
            ds.sa['targets'] = ds.sa.all_ROIs

        if classifier == 'gnb':
            # set up classifier
            prior = 'ratio'
            clf = mv.GNB(common_variance=True,
                         prior=prior)

        elif classifier == 'sgd':
            # necessary I believe regardless of the SKLLearnerAdapter
            from sklearn.linear_model import SGDClassifier
            clf = mv.SKLLearnerAdapter(SGDClassifier(loss='hinge',
                                                     penalty='l2',
                                                     class_weight='balanced'))
        elif classifier == 'l-sgd':
            # necessary I believe regardless of the SKLLearnerAdapter
            from sklearn.linear_model import SGDClassifier
            # get a stochastic gradient descent into pymvpa by using the SKLLearnerAdapter.
            # Get it to perform 1 vs 1 decisions (instead of one vs all) with the MulticlassClassifier
            clf = mv.MulticlassClassifier(mv.SKLLearnerAdapter(SGDClassifier(loss='hinge',
                                                                             penalty='l2',
                                                                             class_weight='balanced'
                                                                             )))

        # prepare for callback of sensitivity extraction within CrossValidation
        classifications = []

        def store_class(data, node, result):
            # import pdb; pdb.set_trace()
            class_ds = mv.Dataset(samples=data.sa.voxel_indices)
            class_ds.sa['targets'] = data.sa.targets
            class_ds.sa['partitions'] = data.sa.partitions
            class_ds.sa['predictions'] = clf.predict(data)
            class_ds.sa['participant'] = data.sa.participant
            classifications.append(class_ds)

        # do a crossvalidation classification and store the classification results
        cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'],
                                callback=store_class)
        # import pdb; pdb.set_trace()
        results = cv(ds)
        # import pdb; pdb.set_trace()
        # save classification results as a Dataset
        ds_type = ['movie', 'loc']
        mv.h5save(results_dir + 'cv_classification_results_{}.hdf5'.format(ds_type[idx]), classifications)
        print('Saved the classification results obtained during crossvalidation.')

        # get the classification list into a pandas dataframe

        for i, classification in enumerate(classifications):
            df = pd.DataFrame(data={'voxel_indices': list(classification.samples),
                                    'targets': list(classification.sa.targets),
                                    'predictions': list(classification.sa.predictions),
                                    'partitions': list(classification.sa.partitions),
                                    'participants': list(classification.sa.participant),
                                    'ds_type': [ds_type[idx]] * len(classification.sa.predictions)
                                    }
                              )
            dfs.append(df)

    # two helper functions for later use in a lamda function
    def hits(row):
        if row['predictions'] == row['targets']:
            return 1
        else:
            return 0

    def parts(row):
        if row['partitions'] == 1:
            return "train"
        elif row['partitions'] == 2:
            return "test"

    # get all folds into one dataframe, disregard the index
    all_classifications = pd.concat(dfs, ignore_index=True)
    # compute hits as correspondence between target and prediction
    all_classifications['hits'] = all_classifications.apply(lambda row: hits(row), axis=1)
    # assign string labels to testing and training partitions (instead of 1, 2)
    all_classifications['parts'] = all_classifications.apply(lambda row: parts(row), axis=1)
    # transform voxel coordinates from arrays (unhashable) into tuples
    all_classifications['voxel_indices'] = all_classifications['voxel_indices'].apply(tuple)

    # subset the dataset to contain only the testing data
    all_testing = all_classifications[all_classifications.parts == "test"]
    # check that every participant is in the data
    assert len(all_testing.participants.unique()) == 15
    # to check for correspondence between the sum of the two experiments confusion matrices,
    # do sth like this: len(all_testing[(all_testing['predictions'] == 'PPA') & (all_testing['targets'] == 'VIS')])

    # this counts hits per fold across experiments (2 if both experiments classified correctly,
    # 1 if 1 experiment classified correctly, 0 is none did). Also, append the targets per voxel.
    # we use 'min' here because aggregate needs any function, but targets are the same between
    # the experiments
    compare_exp = all_testing.groupby(['voxel_indices', 'participants']).agg(
        {'hits': 'sum', 'targets': 'min'}).reset_index().sort_values(['voxel_indices', 'participants'])
    all_testing_movie = all_testing[all_testing.ds_type == 'movie'].sort_values(
        ['voxel_indices', 'participants']).reset_index()
    all_testing_loc = all_testing[all_testing.ds_type == 'loc'].sort_values(
        ['voxel_indices', 'participants']).reset_index()
    # append movie and loc predictions to the dataframe
    compare_exp['pred_movie'] = all_testing_movie.predictions
    compare_exp['pred_loc'] = all_testing_loc.predictions

    # get the ROIS from the classification
    ROIS = np.unique(ds_movie.sa.targets)

    # there can't be values greater than two or lower than zero
    assert compare_exp.hits.max() <= 2
    assert compare_exp.hits.min() >= 0
    return compare_exp, all_testing, ROIS
Beispiel #11
0
        if hyperalign:
            ds = mappers[i][participant].forward(ds)
            print("Hyperaligned participant {0}".format(participant))
            if zscore_features:
                mv.zscore(ds, chunks_attr=None)
            ds.fa['node_indices'] = range(ds.shape[1])
            ds.fa['center_ids'] = range(ds.shape[1])

    ds_all = mv.vstack((ds1, ds2, ds3, ds4), fa='update')
    rsa.PDist(**kwargs)
    #variant_ids = mv.remove_invariant_features(ds_both).fa.center_ids.tolist()

    # Set up cross-validated RSA
    cv_rsa_ = mv.CrossValidation(mv.CDist(pairwise_metric='correlation'),
                                 mv.HalfPartitioner(attr='sessions'),
                                 errorfx=None)

    # cv_rsa above would return all kinds of .sa which are important
    # but must be the same across searchlights. so we first apply it
    # to the entire ds to capture them
    cv_rsa_out = cv_rsa_(ds_all)
    target_sa = cv_rsa_out.sa.copy(deep=True)

    # And now create a postproc which would verify and strip them off
    # to just return samples
    from mvpa2.testing.tools import assert_collections_equal
    from mvpa2.base.collections import SampleAttributesCollection
    from mvpa2.base.node import Node
    def lean_errorfx(ds):#Node):
        #def __call__(self, ds):
Beispiel #12
0
def dotheclassification(ds, bilateral):
    """This functions performs the classification in a one-vs-all fashion with a
    stochastic gradient descent.
    Future TODO: Selection of alpha may be better performed via
    GridSearchCV. To quote sklearns documentation: 'Finding a reasonable
    regularization term is best done using GridSearchCV, usually in the range
    10.0**-np.arange(1,7).'"""

    # set up the dataset: If I understand the sourcecode correctly, the
    # SGDclassifier wants to have unique labels in a sample attribute
    # called 'targets' and is quite stubborn with this name - I could not convince
    # it to look for targets somewhere else, so now I'm catering to his demands
    if bilateral:
        ds.sa['targets'] = ds.sa.bilat_ROIs
    else:
        ds.sa['targets'] = ds.sa.all_ROIs

    clf = mv.SKLLearnerAdapter(
        SGDClassifier(loss='hinge', penalty='l2', class_weight='balanced'))

    cv = mv.CrossValidation(clf,
                            mv.NFoldPartitioner(attr='participant'),
                            errorfx=mv.mean_match_accuracy,
                            enable_ca=['stats'])

    results = cv(ds)

    # save classification results
    with open(results_dir + 'SGD_clf.txt', 'a') as f:
        f.write(cv.ca.stats.as_string(description=True))

    if bilateral:
        desired_order = ['brain', 'VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA']
    else:
        desired_order = [
            'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA',
            'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA',
            'right PPA'
        ]

    labels = get_known_labels(desired_order, cv.ca.stats.labels)

    # print confusion matrix with pymvpas build in function
    cv.ca.stats.plot(labels=labels, numbers=True, cmap='gist_heat_r')
    plt.savefig(results_dir + 'confusion_matrix.png')

    # print confusion matrix with matplotlib
    if niceplot:
        ACC = cv.ca.stats.stats['mean(ACC)']
        plot_confusion(cv,
                       labels,
                       fn=results_dir + 'confusion_matrix_SGD.svg',
                       figsize=(9, 9),
                       vmax=100,
                       cmap='Blues',
                       ACC='%.2f' % ACC)

    mv.h5save(results_dir + 'SGD_cv_classification_results.hdf5', results)
    print('Saved the crossvalidation results.')

    return cv