def __test_matthias_question(self): rfe_clf = LinearCSVMC(C=1) rfesvm_split = SplitClassifier(rfe_clf) clf = \ FeatureSelectionClassifier( clf = LinearCSVMC(C=1), feature_selection = RFE( sensitivity_analyzer = rfesvm_split.get_sensitivity_analyzer( combiner=first_axis_mean, transformer=np.abs), transfer_error=ConfusionBasedError( rfesvm_split, confusion_state="confusion"), stopping_criterion=FixedErrorThresholdStopCrit(0.20), feature_selector=FractionTailSelector( 0.2, mode='discard', tail='lower'), update_sensitivity=True)) no_permutations = 1000 permutator = AttributePermutator('targets', count=no_permutations) cv = CrossValidation(clf, NFoldPartitioner(), null_dist=MCNullDist(permutator, tail='left'), enable_ca=['stats']) error = cv(datasets['uni2small']) self.assertTrue(error < 0.4) self.assertTrue(cv.ca.null_prob < 0.05)
def test_james_problem_multiclass(self): percent = 80 dataset = datasets['uni4large'] #dataset = dataset[:, dataset.a.nonbogus_features] rfesvm_split = LinearCSVMC() fs = \ RFE(rfesvm_split.get_sensitivity_analyzer( postproc=ChainMapper([ #FxMapper('features', l2_normed), #FxMapper('samples', np.mean), #FxMapper('samples', np.abs) FxMapper('features', lambda x: np.argsort(np.abs(x))), #maxofabs_sample() mean_sample() ])), ProxyMeasure(rfesvm_split, postproc=BinaryFxNode(mean_mismatch_error, 'targets')), Splitter('train'), fselector=FractionTailSelector( percent / 100.0, mode='select', tail='upper'), update_sensitivity=True) clf = FeatureSelectionClassifier( LinearCSVMC(), # on features selected via RFE fs) # update sensitivity at each step (since we're not using the # same CLF as sensitivity analyzer) class StoreResults(object): def __init__(self): self.storage = [] def __call__(self, data, node, result): self.storage.append((node.measure.mapper.ca.history, node.measure.mapper.ca.errors)), cv_storage = StoreResults() cv = CrossValidation(clf, NFoldPartitioner(), postproc=mean_sample(), callback=cv_storage, enable_ca=['stats']) #cv = SplitClassifier(clf) try: error = cv(dataset).samples.squeeze() except Exception, e: self.fail('CrossValidation cannot handle classifier with RFE ' 'feature selection. Got exception: %s' % (e, ))
def test_james_problem_multiclass(self): percent = 80 dataset = datasets['uni4large'] #dataset = dataset[:, dataset.a.nonbogus_features] rfesvm_split = LinearCSVMC() fs = \ RFE(rfesvm_split.get_sensitivity_analyzer( postproc=ChainMapper([ #FxMapper('features', l2_normed), #FxMapper('samples', np.mean), #FxMapper('samples', np.abs) FxMapper('features', lambda x: np.argsort(np.abs(x))), #maxofabs_sample() mean_sample() ])), ProxyMeasure(rfesvm_split, postproc=BinaryFxNode(mean_mismatch_error, 'targets')), Splitter('train'), fselector=FractionTailSelector( percent / 100.0, mode='select', tail='upper'), update_sensitivity=True) clf = FeatureSelectionClassifier( LinearCSVMC(), # on features selected via RFE fs) # update sensitivity at each step (since we're not using the # same CLF as sensitivity analyzer) class StoreResults(object): def __init__(self): self.storage = [] def __call__(self, data, node, result): self.storage.append((node.measure.mapper.ca.history, node.measure.mapper.ca.errors)), cv_storage = StoreResults() cv = CrossValidation(clf, NFoldPartitioner(), postproc=mean_sample(), callback=cv_storage, enable_ca=['stats']) #cv = SplitClassifier(clf) try: error = cv(dataset).samples.squeeze() except Exception, e: self.fail('CrossValidation cannot handle classifier with RFE ' 'feature selection. Got exception: %s' % (e,))
def test_james_problem(self): percent = 80 dataset = datasets['uni2small'] rfesvm_split = LinearCSVMC() fs = \ RFE(rfesvm_split.get_sensitivity_analyzer(), ProxyMeasure(rfesvm_split, postproc=BinaryFxNode(mean_mismatch_error, 'targets')), Splitter('train'), fselector=FractionTailSelector( percent / 100.0, mode='select', tail='upper'), update_sensitivity=True) clf = FeatureSelectionClassifier( LinearCSVMC(), # on features selected via RFE fs) # update sensitivity at each step (since we're not using the # same CLF as sensitivity analyzer) class StoreResults(object): def __init__(self): self.storage = [] def __call__(self, data, node, result): self.storage.append((node.measure.mapper.ca.history, node.measure.mapper.ca.errors)), cv_storage = StoreResults() cv = CrossValidation(clf, NFoldPartitioner(), postproc=mean_sample(), callback=cv_storage, enable_ca=['confusion']) # TODO -- it is stats #cv = SplitClassifier(clf) try: error = cv(dataset).samples.squeeze() except Exception as e: self.fail('CrossValidation cannot handle classifier with RFE ' 'feature selection. Got exception: %s' % (e, )) assert (len(cv_storage.storage) == len(dataset.sa['chunks'].unique)) assert (len(cv_storage.storage[0]) == 2) assert (len(cv_storage.storage[0][0]) == dataset.nfeatures) self.assertTrue(error < 0.2)
def _test_edmund_chong_20120907(): # pragma: no cover # commented out to avoid syntax warnings while compiling # from mvpa2.suite import * from mvpa2.testing.datasets import datasets repeater = Repeater(count=20) partitioner = ChainNode([NFoldPartitioner(cvtype=1), Balancer(attr='targets', count=1, # for real data > 1 limit='partitions', apply_selection=True )], space='partitions') clf = LinearCSVMC() #choice of classifier permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) null_cv = CrossValidation( clf, ChainNode([partitioner, permutator], space=partitioner.get_space()), errorfx=mean_mismatch_error) distr_est = MCNullDist(repeater, tail='left', measure=null_cv, enable_ca=['dist_samples']) cvte = CrossValidation(clf, partitioner, errorfx=mean_mismatch_error, null_dist=distr_est, enable_ca=['stats']) errors = cvte(datasets['uni2small'])
def _test_mcasey20120222(): # pragma: no cover # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2012q1/002034.html # This one is conditioned on allowing # of samples to be changed # by the mapper provided to MappedClassifier. See # https://github.com/yarikoptic/PyMVPA/tree/_tent/allow_ch_nsamples import numpy as np from mvpa2.datasets.base import dataset_wizard from mvpa2.generators.partition import NFoldPartitioner from mvpa2.mappers.base import ChainMapper from mvpa2.mappers.svd import SVDMapper from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.measures.base import CrossValidation mapper = ChainMapper([mean_group_sample(['targets','chunks']), SVDMapper()]) clf = MappedClassifier(LinearCSVMC(), mapper) cvte = CrossValidation(clf, NFoldPartitioner(), enable_ca=['repetition_results', 'stats']) ds = dataset_wizard( samples=np.arange(32).reshape((8, -1)), targets=[1, 1, 2, 2, 1, 1, 2, 2], chunks=[1, 1, 1, 1, 2, 2, 2, 2]) errors = cvte(ds)
def test_sifter_superord_usecase(): from mvpa2.misc.data_generators import normal_feature_dataset from mvpa2.clfs.svm import LinearCSVMC # fast one to use for tests from mvpa2.measures.base import CrossValidation from mvpa2.base.node import ChainNode from mvpa2.generators.partition import NFoldPartitioner from mvpa2.generators.base import Sifter # Let's simulate the beast -- 6 categories total groupped into 3 # super-ordinate, and actually without any 'superordinate' effect # since subordinate categories independent ds = normal_feature_dataset( nlabels=6, snr=100, # pure signal! ;) perlabel=30, nfeatures=6, nonbogus_features=range(6), nchunks=5) ds.sa['subord'] = ds.sa.targets.copy() ds.sa['superord'] = ['super%d' % (int(i[1]) % 3, ) for i in ds.targets] # 3 superord categories # let's override original targets just to be sure that we aren't relying on them ds.targets[:] = 0 npart = ChainNode( [ ## so we split based on superord NFoldPartitioner(len(ds.sa['superord'].unique), attr='subord'), ## so it should select only those splits where we took 1 from ## each of the superord categories leaving things in balance Sifter([('partitions', 2), ('superord', { 'uvalues': ds.sa['superord'].unique, 'balanced': True })]), ], space='partitions') # and then do your normal where clf is space='superord' clf = LinearCSVMC(space='superord') cvte_regular = CrossValidation(clf, NFoldPartitioner(), errorfx=lambda p, t: np.mean(p == t)) cvte_super = CrossValidation(clf, npart, errorfx=lambda p, t: np.mean(p == t)) accs_regular = cvte_regular(ds) accs_super = cvte_super(ds) # With sifting we should get only 2^3 = 8 splits assert (len(accs_super) == 8) # I don't think that this would ever fail, so not marking it labile assert (np.mean(accs_regular) > .8) assert (np.mean(accs_super) < .6)
def test_searchlight_cross_decoding(path, subjects, conf_file, type, **kwargs): conf = read_configuration(path, conf_file, type) for arg in kwargs: conf[arg] = kwargs[arg] if arg == 'radius': radius = kwargs[arg] debug.active += ["SLC"] ds_merged = get_merged_ds(path, subjects, conf_file, type, **kwargs) clf = LinearCSVMC(C=1, probability=1, enable_ca=['probabilities']) cv = CrossValidation(clf, NFoldPartitioner(attr='task')) maps = [] for ds in ds_merged: ds.targets[ds.targets == 'point'] = 'face' ds.targets[ds.targets == 'saccade'] = 'place' sl = sphere_searchlight(cv, radius, space='voxel_indices') sl_map = sl(ds) sl_map.samples *= -1 sl_map.samples += 1 nif = map2nifti(sl_map, imghdr=ds.a.imghdr) maps.append(nif) datetime = get_time() analysis = 'cross_searchlight' mask = conf['mask_area'] task = type new_dir = datetime + '_' + analysis + '_' + mask + '_' + task command = 'mkdir ' + os.path.join(path, '0_results', new_dir) os.system(command) parent_dir = os.path.join(path, '0_results', new_dir) for s, map in zip(subjects, maps): name = s command = 'mkdir ' + os.path.join(parent_dir, name) os.system(command) results_dir = os.path.join(parent_dir, name) fname = name + '_radius_' + str(radius) + '_searchlight_map.nii.gz' map.to_filename(os.path.join(results_dir, fname)) return maps
def _call(self, ds): # Function overrided to let the results have # some dataset attributes res = LinearCSVMC._call(self, ds) if isinstance(res, Dataset): for k in ds.sa.keys(): res.sa[k] = ds.sa[k] return res else: return Dataset(res, sa=ds.sa)
def test_SplitRFE(self): # just a smoke test ATM from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.misc.data_generators import normal_feature_dataset #import mvpa2.featsel.rfe #reload(mvpa2.featsel.rfe) from mvpa2.featsel.rfe import RFE, SplitRFE from mvpa2.generators.partition import NFoldPartitioner from mvpa2.featsel.helpers import FractionTailSelector from mvpa2.testing import ok_, assert_equal clf = LinearCSVMC(C=1) dataset = normal_feature_dataset(perlabel=20, nlabels=2, nfeatures=30, snr=1., nonbogus_features=[1, 5]) # flip one of the meaningful features around to see # if we are still getting proper selection dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1 # 4 partitions should be enough for testing partitioner = NFoldPartitioner(count=4) rfeclf = MappedClassifier( clf, SplitRFE(clf, partitioner, fselector=FractionTailSelector(0.2, mode='discard', tail='lower'))) r0 = repr(rfeclf) ok_(rfeclf.mapper.nfeatures_min == 0) rfeclf.train(dataset) ok_(rfeclf.mapper.nfeatures_min > 0) predictions = rfeclf(dataset).samples # at least 1 of the nonbogus-features should be chosen ok_( len( set(dataset.a.nonbogus_features).intersection( rfeclf.mapper.slicearg)) > 0) # check repr to have all needed pieces r = repr(rfeclf) s = str(rfeclf) ok_(('partitioner=NFoldP' in r) or ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r)) ok_('lrn=' in r) ok_(not 'slicearg=' in r) assert_equal(r, r0)
def test_sifter_superord_usecase(): from mvpa2.misc.data_generators import normal_feature_dataset from mvpa2.clfs.svm import LinearCSVMC # fast one to use for tests from mvpa2.measures.base import CrossValidation from mvpa2.base.node import ChainNode from mvpa2.generators.partition import NFoldPartitioner from mvpa2.generators.base import Sifter ds = _get_superord_dataset() npart = ChainNode( [ ## so we split based on superord NFoldPartitioner(len(ds.sa['superord'].unique), attr='subord'), ## so it should select only those splits where we took 1 from ## each of the superord categories leaving things in balance Sifter([('partitions', 2), ('superord', { 'uvalues': ds.sa['superord'].unique, 'balanced': True })]), ], space='partitions') # and then do your normal where clf is space='superord' clf = LinearCSVMC(space='superord') cvte_regular = CrossValidation(clf, NFoldPartitioner(), errorfx=lambda p, t: np.mean(p == t)) cvte_super = CrossValidation(clf, npart, errorfx=lambda p, t: np.mean(p == t)) accs_regular = cvte_regular(ds) accs_super = cvte_super(ds) # With sifting we should get only 2^3 = 8 splits assert (len(accs_super) == 8) # I don't think that this would ever fail, so not marking it labile assert (np.mean(accs_regular) > .8) assert (np.mean(accs_super) < .6)
from mvpa2.clfs.svm import LinearCSVMC from mvpa2.measures.base import CrossValidation from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.testing.datasets import datasets from mvpa2.mappers.fx import mean_sample """For the sake of simplicity, let's use a small artificial dataset.""" # Lets just use our tiny 4D dataset from testing battery dataset = datasets['3dlarge'] """Now it only takes three lines for a searchlight analysis.""" # setup measure to be computed in each sphere (cross-validated # generalization error on odd/even splits) cv = CrossValidation(LinearCSVMC(), OddEvenPartitioner()) # setup searchlight with 2 voxels radius and measure configured above sl = sphere_searchlight(cv, radius=2, space='myspace', postproc=mean_sample()) # run searchlight on dataset sl_map = sl(dataset) print 'Best performing sphere error:', np.min(sl_map.samples) """ If this analysis is done on a fMRI dataset using `NiftiDataset` the resulting searchlight map (`sl_map`) can be mapped back into the original dataspace and viewed as a brain overlay. :ref:`Another example <example_searchlight>` shows a typical application of this algorithm.
def test_rfe_sensmap(): # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2013q3/002538.html # just a smoke test. fails with from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import FeatureSelectionClassifier from mvpa2.measures.base import CrossValidation, RepeatedMeasure from mvpa2.generators.splitters import Splitter from mvpa2.generators.partition import NFoldPartitioner from mvpa2.misc.errorfx import mean_mismatch_error from mvpa2.mappers.fx import mean_sample from mvpa2.mappers.fx import maxofabs_sample from mvpa2.generators.base import Repeater from mvpa2.featsel.rfe import RFE from mvpa2.featsel.helpers import FractionTailSelector, BestDetector from mvpa2.featsel.helpers import NBackHistoryStopCrit from mvpa2.datasets import vstack from mvpa2.misc.data_generators import normal_feature_dataset # Let's simulate the beast -- 6 categories total groupped into 3 # super-ordinate, and actually without any 'superordinate' effect # since subordinate categories independent fds = normal_feature_dataset(nlabels=3, snr=1, # 100, # pure signal! ;) perlabel=9, nfeatures=6, nonbogus_features=range(3), nchunks=3) clfsvm = LinearCSVMC() rfesvm = RFE(clfsvm.get_sensitivity_analyzer(postproc=maxofabs_sample()), CrossValidation( clfsvm, NFoldPartitioner(), errorfx=mean_mismatch_error, postproc=mean_sample()), Repeater(2), fselector=FractionTailSelector(0.70, mode='select', tail='upper'), stopping_criterion=NBackHistoryStopCrit(BestDetector(), 10), update_sensitivity=True) fclfsvm = FeatureSelectionClassifier(clfsvm, rfesvm) sensanasvm = fclfsvm.get_sensitivity_analyzer(postproc=maxofabs_sample()) # manually repeating/splitting so we do both RFE sensitivity and classification senses, errors = [], [] for i, pset in enumerate(NFoldPartitioner().generate(fds)): # split partitioned dataset split = [d for d in Splitter('partitions').generate(pset)] senses.append(sensanasvm(split[0])) # and it also should train the classifier so we would ask it about error errors.append(mean_mismatch_error(fclfsvm.predict(split[1]), split[1].targets)) senses = vstack(senses) errors = vstack(errors) # Let's compare against rerunning the beast simply for classification with CV errors_cv = CrossValidation(fclfsvm, NFoldPartitioner(), errorfx=mean_mismatch_error)(fds) # and they should match assert_array_equal(errors, errors_cv) # buggy! cv_sensana_svm = RepeatedMeasure(sensanasvm, NFoldPartitioner()) senses_rm = cv_sensana_svm(fds) #print senses.samples, senses_rm.samples #print errors, errors_cv.samples assert_raises(AssertionError, assert_array_almost_equal, senses.samples, senses_rm.samples) raise SkipTest("Known failure for repeated measures: https://github.com/PyMVPA/PyMVPA/issues/117")
def test_multiclass_pairs_svm_searchlight(): from mvpa2.measures.searchlight import sphere_searchlight import mvpa2.clfs.meta #reload(mvpa2.clfs.meta) from mvpa2.clfs.meta import MulticlassClassifier from mvpa2.datasets import Dataset from mvpa2.clfs.svm import LinearCSVMC #import mvpa2.testing.datasets #reload(mvpa2.testing.datasets) from mvpa2.testing.datasets import datasets from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.testing import ok_, assert_equal, assert_array_equal from mvpa2.sandbox.multiclass import get_pairwise_accuracies # Some parameters used in the test below nproc = 1 + int(mvpa2.externals.exists('pprocess')) ntargets = 4 # number of targets npairs = ntargets*(ntargets-1)/2 center_ids = [35, 55, 1] ds = datasets['3dsmall'].copy() # redefine C,T so we have a multiclass task nsamples = len(ds) ds.sa.targets = range(ntargets) * (nsamples//ntargets) ds.sa.chunks = np.arange(nsamples) // ntargets # and add some obvious signal where it is due ds.samples[:, 55] += 15*ds.sa.targets # for all 4 targets ds.samples[:, 35] += 15*(ds.sa.targets % 2) # so we have conflicting labels # while 35 would still be just for 2 categories which would conflict mclf = MulticlassClassifier(LinearCSVMC(), pass_attr=['sa.chunks', 'ca.raw_predictions_ds'], enable_ca=['raw_predictions_ds']) label_pairs = mclf._get_binary_pairs(ds) def place_sa_as_samples(ds): # add a degenerate dimension for the hstacking in the searchlight ds.samples = ds.sa.raw_predictions_ds[:, None] ds.sa.pop('raw_predictions_ds') # no need to drag the copy return ds mcv = CrossValidation(mclf, OddEvenPartitioner(), errorfx=None, postproc=place_sa_as_samples) sl = sphere_searchlight(mcv, nproc=nproc, radius=2, space='myspace', center_ids=center_ids) slmap = sl(ds) ok_('chunks' in slmap.sa) ok_('cvfolds' in slmap.sa) ok_('targets' in slmap.sa) # so for each SL we got all pairwise tests assert_equal(slmap.shape, (nsamples, len(center_ids), npairs)) assert_array_equal(np.unique(slmap.sa.cvfolds), [0, 1]) # Verify that we got right labels in each 'pair' # all searchlights should have the same set of labels for a given # pair of targets label_pairs_ = np.apply_along_axis( np.unique, 0, ## reshape slmap so we have only simple pairs in the columns np.reshape(slmap, (-1, npairs))).T # need to prep that list of pairs obtained from MulticlassClassifier # and since it is 1-vs-1, they all should be just pairs of lists of # 1 element so should work assert_equal(len(label_pairs_), npairs) assert_array_equal(np.squeeze(np.array(label_pairs)), label_pairs_) assert_equal(label_pairs_.shape, (npairs, 2)) # for this particular case out = get_pairwise_accuracies(slmap) out123 = get_pairwise_accuracies(slmap, select=[1, 2, 3]) assert_array_equal(np.unique(out123.T), np.arange(1, 4)) # so we got at least correct targets # test that we extracted correct accuracies # First 3 in out.T should have category 0, so skip them and compare otherwise assert_array_equal(out.samples[3:], out123.samples) ok_(np.all(out.samples[:, 1] == 1.), "This was with super-strong result")
def test_SplitRFE(self, fmeasure): # just a smoke test ATM from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.misc.data_generators import normal_feature_dataset #import mvpa2.featsel.rfe #reload(mvpa2.featsel.rfe) from mvpa2.featsel.rfe import RFE, SplitRFE from mvpa2.generators.partition import NFoldPartitioner from mvpa2.featsel.helpers import FractionTailSelector from mvpa2.testing import ok_, assert_equal clf = LinearCSVMC(C=1) dataset = normal_feature_dataset(perlabel=20, nlabels=2, nfeatures=11, snr=1., nonbogus_features=[1, 5]) # flip one of the meaningful features around to see # if we are still getting proper selection dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1 # 3 partitions should be enough for testing partitioner = NFoldPartitioner(count=3) rfeclf = MappedClassifier( clf, SplitRFE( clf, partitioner, fselector=FractionTailSelector(0.5, mode='discard', tail='lower'), fmeasure=fmeasure, # need to update only when using clf's sens anal update_sensitivity=fmeasure is None)) r0 = repr(rfeclf) ok_(rfeclf.mapper.nfeatures_min == 0) rfeclf.train(dataset) ok_(rfeclf.mapper.nfeatures_min > 0) predictions = rfeclf(dataset).samples # at least 1 of the nonbogus-features should be chosen ok_( len( set(dataset.a.nonbogus_features).intersection( rfeclf.mapper.slicearg)) > 0) # check repr to have all needed pieces r = repr(rfeclf) s = str(rfeclf) ok_(('partitioner=NFoldP' in r) or ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r)) ok_('lrn=' in r) ok_(not 'slicearg=' in r) assert_equal(r, r0) if externals.exists('joblib'): rfeclf.mapper.nproc = -1 # compare results against the one ran in parallel _slicearg = rfeclf.mapper.slicearg _predictions = predictions rfeclf.train(dataset) predictions = rfeclf(dataset).samples assert_array_equal(predictions, _predictions) assert_array_equal(_slicearg, rfeclf.mapper.slicearg) # Test that we can collect stats from cas within cross-validation sensitivities = [] nested_errors = [] nested_nfeatures = [] def store_me(data, node, result): sens = node.measure.get_sensitivity_analyzer( force_train=False)(data) sensitivities.append(sens) nested_errors.append(node.measure.mapper.ca.nested_errors) nested_nfeatures.append(node.measure.mapper.ca.nested_nfeatures) cv = CrossValidation(rfeclf, NFoldPartitioner(count=1), callback=store_me, enable_ca=['stats']) _ = cv(dataset) # just to make sure we collected them assert_equal(len(sensitivities), 1) assert_equal(len(nested_errors), 1) assert_equal(len(nested_nfeatures), 1)
#fds2.samples = stats.zscore(fds2.samples, axis = None) #fds = fds1.copy() fds = fds1 #fds.samples = np.dstack((fds1.samples, fds2.samples)) fds.sa.accuracy = sequences.accuracy # qe.ids - vertices roi_ids = np.intersect1d(labels[0].ravel(), qe.ids) seq_train = sequences.loc[:, ["seq_type", "seq_train"]].drop_duplicates() seq_train = dict(zip(seq_train["seq_type"], seq_train["seq_train"])) if True: # compute classification accuracy classifiers = {'svm': LinearCSVMC()} mycl = 'svm' myresults = [] fds_acc = fds1[sequences.accuracy >= minacc, :].copy() for myseq in seq_train.keys(): print(myseq) errorfx = lambda p, t: np.sum(np.logical_and(p == t, t == myseq), dtype=float) / np.sum(t == myseq, dtype=float) cv = CrossValidation(classifiers[mycl], NFoldPartitioner(), errorfx=errorfx, enable_ca=['stats'])
nlabels=2, nfeatures=2, snr=0, nonbogus_features=[0, 1]) # signal levels sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0] """ To mimic behavior of hard-margin SVM whenever classes become separable, which is easier to comprehend, we are intentionally setting very high C value. """ clf = LinearCSVMC(C=1000, enable_ca=['training_stats']) cve = CrossValidation(clf, NFoldPartitioner(), enable_ca='stats') sana = clf.get_sensitivity_analyzer(postproc=None) rs = [] errors, training_errors = [], [] for sig in sigs: ds = ds_noise.copy() # introduce signal into the first feature ds.samples[ds.T == 'L1', 0] += sig error = np.mean(cve(ds)) sa = sana(ds) training_error = 1 - clf.ca.training_stats.stats['ACC']
np.array(ds.sa.evidence,dtype= np.str)) ''' ds.targets = ds.sa.memory_status conf['label_dropped'] = 'None' conf['label_included'] = 'all' ds = preprocess_dataset(ds, data_type, **conf) count_ = 1 field_ = 'memory' balanc = Balancer(count=count_, apply_selection=True, limit=None) gen = balanc.generate(ds) cv_storage = StoreResults() clf = LinearCSVMC(C=1) # This is used for the sklearn crossvalidation y = np.zeros_like(ds.targets, dtype=np.int_) y[ds.targets == ds.uniquetargets[0]] = 1 # We needs to modify the chunks in order to use sklearn ds.chunks = np.arange(len(ds.chunks)) permut_ = [] i = 3 partitioner = SKLCrossValidation(StratifiedKFold(y, n_folds=i)) cvte = CrossValidation(clf,
from mvpa2.clfs.svm import LinearCSVMC from mvpa2.generators.partition import NFoldPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.misc.errorfx import mean_match_accuracy from mvpa2.mappers.fx import mean_sample clf = LinearCSVMC(space='condition') obj = CrossValidation( clf, NFoldPartitioner(), errorfx=mean_match_accuracy, postproc=mean_sample(), enable_ca=['stats'])
(Linear voxel indices mean that each voxel is indexed by a value between 0 (inclusive) and N (exclusive), where N is the number of voxels in the volume (N = NX * NY * NZ, where NX, NY and NZ are the number of voxels in the three spatial dimensions). For certain analyses one may want to index voxels by 'sub indices' (triples (i,j,k) with 0<=i<NX, 0<=j<=NY, and 0<=k<NZ) or spatial coordinates; conversions amongst linear and sub indices and spatial coordinates is provided by functions in the VolGeom (volume geometry) instance stored in 'qe.voxsel.volgeom'.) From now on we follow the example as in doc/examples/searchlight.py. First, cross-validation is defined using a (SVM) classifier. """ clf = LinearCSVMC() cv = CrossValidation(clf, NFoldPartitioner(), errorfx=lambda p, t: np.mean(p == t), enable_ca=['stats']) """ Set the roi_ids, that is the node indices that serve as searchlight center. In this example it is set to None, meaning that all nodes are used as a searchlight center. It is also possible to restrict the nodes that serve as a searchlight center: setting roi_ids=np.arange(400,800) means that only nodes in the range from 400 (inclusive) to 800 (exclusive) are used as a searchlight center, and the result would be a partial brain map. """ roi_ids = None
mvpa2.seed(1); ds_noise = normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=2, snr=0, nonbogus_features=[0,1]) # signal levels sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0] """ To mimic behavior of hard-margin SVM whenever classes become separable, which is easier to comprehend, we are intentionally setting very high C value. """ clf = LinearCSVMC(C=1000, enable_ca=['training_stats']) cve = CrossValidation(clf, NFoldPartitioner(), enable_ca='stats') sana = clf.get_sensitivity_analyzer(postproc=None) rs = [] errors, training_errors = [], [] for sig in sigs: ds = ds_noise.copy() # introduce signal into the first feature ds.samples[ds.T == 'L1', 0] += sig error = np.mean(cve(ds)) sa = sana(ds) training_error = 1-clf.ca.training_stats.stats['ACC']
def do_searchlight(glm_dataset, radius, output_basename, with_null_prob=False): clf = LinearCSVMC(space='condition') # clf = RbfCSVMC(C=5.0) splt = NFoldPartitioner() cv = CrossValidation(clf, splt, errorfx=mean_match_accuracy, enable_ca=['stats'], postproc=mean_sample()) distr_est = [] if with_null_prob: permutator = AttributePermutator('condition', count=100, limit='chunks') distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) """ repeater = Repeater(count=100) permutator = AttributePermutator('condition', limit={'partitions': 1}, count=1) null_cv = CrossValidation(clf, ChainNode([splt, permutator],space=splt.get_space()), postproc=mean_sample()) null_sl = sphere_searchlight(null_cv, radius=radius, space='voxel_indices', enable_ca=['roi_sizes']) distr_est = MCNullDist(repeater,tail='left', measure=null_sl, enable_ca=['dist_samples']) """ sl = sphere_searchlight(cv, radius=radius, space='voxel_indices', null_dist=distr_est, enable_ca=['roi_sizes', 'roi_feature_ids']) else: sl = sphere_searchlight(cv, radius=radius, space='voxel_indices', enable_ca=['roi_sizes', 'roi_feature_ids']) #ds = glm_dataset.copy(deep=False, # sa=['condition','chunks'], # fa=['voxel_indices'], # a=['mapper']) #debug.active += ["SLC"] sl_map = sl(glm_dataset) errresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr) errresults.to_filename('{}-acc.nii.gz'.format(output_basename)) sl_map.samples *= -1 sl_map.samples += 1 niftiresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr) niftiresults.to_filename('{}-err.nii.gz'.format(output_basename)) #TODO: save p value map if with_null_prob: nullt_results = map2nifti(sl_map, data=sl.ca.null_t, imghdr=glm_dataset.a.imghdr) nullt_results.to_filename('{}-t.nii.gz'.format(output_basename)) nullprob_results = map2nifti(sl_map, data=sl.ca.null_prob, imghdr=glm_dataset.a.imghdr) nullprob_results.to_filename('{}-prob.nii.gz'.format(output_basename)) nullprob_results = map2nifti(sl_map, data=distr_est.cdf(sl_map.samples), imghdr=glm_dataset.a.imghdr) nullprob_results.to_filename('{}-cdf.nii.gz'.format(output_basename))
def _train(self, ds): avg_mapper = mean_group_sample([self._attribute]) ds = ds.get_mapped(avg_mapper) return LinearCSVMC._train(self, ds)
def __init__(self, C=1, attr='trial'): LinearCSVMC.__init__(self, C=1) self._attribute = attr
def plot_vertex(label, vertex_data): fdsz = vertex_data[label][0] fds_effects = vertex_data[label][1] print("#######") print(label) # clean accuracy # extract data and remove outside of valid trials NCOMPS = 10 pca = PCA(n_components=NCOMPS, whiten=False) nels = len(np.unique(fds_effects.targets)) meanRDMs = [] meanRDMs_pca = [] for chunkind, chunk in enumerate(np.unique(fds_effects.chunks)): fds_red = fds_effects[fds_effects.chunks == chunk] #use new class fds_pca = pca.fit_transform(fds_effects.samples) fds_pca_red = fds_pca[fds_effects.chunks == chunk] pl.plot(pca.explained_variance_ratio_) pl.xlim((0, NCOMPS)) dist_matrix = rsa.pdist(fds_red[fds_red.sa.accuracy == 1, :], metric='correlation') dist_matrix_pca = rsa.pdist(fds_pca_red[fds_red.sa.accuracy == 1, :], metric='correlation') meanRDM, within, between = get_RDM_metric( dist_matrix, 'correlation', fds_red[fds_red.sa.accuracy == 1, :].targets) meanRDM_pca, within_pca, between_pca = get_RDM_metric( dist_matrix_pca, 'correlation', fds_red[fds_red.sa.accuracy == 1, :].targets) meanRDMs.append(meanRDM) meanRDMs_pca.append(meanRDM_pca) meanRDMs = 1 - np.tanh(np.nanmean(np.dstack(meanRDMs), axis=2)) meanRDMs_pca = 1 - np.tanh(np.nanmean(np.dstack(meanRDMs_pca), axis=2)) pl.figure pl.subplot(1, 2, 1) pl.imshow(meanRDMs) pl.subplot(1, 2, 2) pl.imshow(meanRDMs_pca) pl.savefig(os.path.join(datapath, 'results', 'matrix-%s.png' % (label))) RDM = meanRDMs_pca # select one colors = {1: 'b', 2: 'g', 3: 'r', 4: 'k'} # markers = ['o', 'v', '+', 's'] mymat = RDM.copy() # as barplot within_values = np.diag(RDM) np.fill_diagonal(mymat, 0) between_values = np.sum(mymat, axis=0) / (nels - 1) fig = pl.figure(figsize=(15, 10), dpi=300) ax = fig.gca() ind = np.unique(fds_effects.targets) p1 = ax.bar(ind - 0.2, within_values, color='green', width=0.4) p2 = ax.bar(ind + 0.2, between_values, color='red', width=0.4) ax.set_xticks(ind) ax.set_ylim((0, .7)) fig.legend((p1[0], p2[0]), ('Within', 'Between')) fig.savefig(os.path.join(datapath, 'results', 'barplot%s.png' % (label))) pl.figure(figsize=(15, 10), dpi=300) for t, target in enumerate(np.unique(fds_red.targets)): sel = np.logical_and(fds_red.targets == target, fds_red.sa.accuracy == 1) # sel = np.logical_and(fds_red.chunks == 2, sel) std = np.std(fds_pca_red[sel, :], axis=0) mean = np.mean(fds_pca_red[sel, :], axis=0) pl.plot(mean, color=colors[target]) pl.plot(fds_pca_red[sel, :].T, color=colors[target], linewidth=0.2) pl.errorbar(x=np.arange(len(mean)), y=mean, yerr=std, fmt='o') pl.savefig(os.path.join(datapath, 'results', 'PCA-%s.png' % (label))) # ADD ALL # mtgs = mean_group_sample(['targets', 'chunks']) # fds_mean = mtgs(fds[fds.sa.trials > 0, fd_indices]) X = fdsz.samples pl.figure(figsize=(15, 10), dpi=300) colors = {'1': 'b', '2': 'g', '3': 'r', '4': 'k'} # for chunk in np.unique(fds.chunks): # pl.subplot(1, NRUNS, chunk + 1) # for trial in range(1, 6): for target in ['1', '2', '3', '4']: sel = fds.targets == target #np.logical_and(fds.chunks == chunk, fds.targets == target) if np.sum(sel): df = pd.DataFrame({ 't': np.round(fds.sa.trial_time[sel], 1), 'x': np.mean(X[sel, :], axis=1) }) xx = np.linspace(np.min(df.t), np.max(df.t), 20) yy = np.interp(xx, df.t, df.x) # df = df.groupby('t').mean() # pl.plot(df.index, df.x, color = colors[target]) pl.plot(xx, yy, color=colors[target]) pl.xlim((0, 25)) pl.ylim((-1, 2)) pl.axvline(x=1.0, linestyle='--', color='k') # fixation pl.axvline(x=3.1, linestyle='--', color='k') # execution pl.axvline(x=6.6, linestyle='--', color='k') # end of execution pl.axvline(x=6.6 + 0.5 + 6.7, linestyle='--', color='r') # mean for next trial pl.axhline(y=0.0, linestyle='--', color='g') pl.savefig( os.path.join(datapath, 'results', 'timecourses-%s.png' % (label))) ######### if True: svm = LinearCSVMC() # ridge = RidgeReg() NPERMS = 100 permutator = AttributePermutator('targets', count=NPERMS) partitioner = NFoldPartitioner() distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) cv_svm = CrossValidation(svm, partitioner, errorfx=lambda p, t: np.mean(p == t), enable_ca=['stats']) # cv_ridge = CrossValidation(ridge, partitioner, # errorfx=lambda p, t: np.mean(p == t), # enable_ca=['stats']) results_svm = cv_svm(fds_effects[fds_effects.sa.accuracy == 1, :]) # results_ridge = cv_ridge(fds) cv_mc = CrossValidation(svm, partitioner, postproc=mean_sample(), null_dist=distr_est, enable_ca=['stats']) # cv_mc = CrossValidation(svm, partitioner, # errorfx=mean_mismatch_error, # null_dist=distr_est, # enable_ca=['stats']) # run results_clf = cv_mc(fds_effects[fds_effects.sa.accuracy == 1, :]) p = cv_mc.ca.null_prob print 'Accuracy:', np.mean(results_svm) print 'CV-errors:', np.ravel(results_clf) print 'Corresponding p-values:', np.ravel(p) # pl.hist(fds.samples) if False: #compute matrix and aggregate across trials dist_matrix = squareform( rsa.pdist(fds_effects[fds_effects.sa.accuracy == 1, :], metric='correlation')) # dist_matrix = squareform(rsa.pdist(fds, metric='mahalanobis')) meanRDM, elements, score, within, between = aggregate_matrix( dist_matrix, fds_effects.chunks, fds_effects.targets) print(label, score, within, between) # pl.figure(figsize=(15, 9), dpi=300) # pl.plot(within, between) # pl.xlim((0, 2)) # pl.ylim((0, 2)) # pl.savefig(os.path.join(datapath, 'results', 'score-%s.png'%(label))) pl.figure(figsize=(15, 9), dpi=300) for chunkind, chunk in enumerate(np.unique(fds_effects.chunks)): pl.subplot(1, 5, chunkind + 1) mtx = meanRDM[:, :, chunkind] pl.imshow(mtx, interpolation='nearest') pl.xticks(range(len(mtx)), elements, rotation=-45) pl.yticks(range(len(mtx)), elements) pl.title('Correlation distances') pl.clim((0, np.nanmax(mtx))) # if chunkind == 4: # pl.colorbar() pl.savefig( os.path.join(datapath, 'results', 'target_distances-%s.png' % (label))) # MDS embedding = MDS(n_components=2, dissimilarity='precomputed') X_transformed = embedding.fit_transform(dist_matrix) colors = ['k', 'r', 'g', 'b'] markers = ['o', 'v', 'P', 's'] pl.figure(figsize=(27, 9), dpi=300) for c, chunk in enumerate(np.unique(fds_effects.chunks)): pl.subplot(1, 5, c + 1) xmin = np.min(X_transformed[fds_effects.chunks == chunk, 0]) * 1.1 xmax = np.max(X_transformed[fds_effects.chunks == chunk, 0]) * 1.1 ymin = np.min(X_transformed[fds_effects.chunks == chunk, 1]) * 1.1 ymax = np.max(X_transformed[fds_effects.chunks == chunk, 1]) * 1.1 for t, target in enumerate(np.unique(fds_effects.targets)): sel = np.logical_and(fds_effects.chunks == chunk, fds_effects.targets == target) pl.scatter(X_transformed[sel, 0], X_transformed[sel, 1], marker=markers[t], color=colors[t], s=5, alpha=0.8) pl.xlim((xmin, xmax)) pl.ylim((ymin, ymax)) pl.scatter(np.mean(X_transformed[sel, 0], axis=0), np.mean(X_transformed[sel, 1], axis=0), marker=markers[t], color=colors[t], s=50) pl.savefig(os.path.join(datapath, 'results', 'MDS-%s.png' % (label)))