def test_gnb_overflow(): # https://github.com/PyMVPA/PyMVPA/issues/581 gnb = GNB( enable_ca='estimates', #logprob=True, # implemented only for True ATM normalize=True, # uncomment if interested to trigger: # guard_overflows=False, ) # Having lots of features could trigger under/overflows ds = normal_feature_dataset(perlabel=4, nlabels=2, nfeatures=100000, nchunks=2, snr=5, nonbogus_features=[0, 1]) ds_train = ds[ds.chunks == ds.UC[0]] ds_test = ds[ds.chunks == ds.UC[1]] gnb.train(ds_train) res = gnb.predict(ds_test) res_est = gnb.ca.estimates probs = np.exp(res_est) if gnb.params.logprob else res_est assert np.all(np.isfinite(res_est)) assert np.all(np.isfinite(probs)) assert_equal(sorted(np.unique(probs)), [0, 1]) # quantized into 0, 1 given this many samples
def test_gnb_sensitivities(): gnb = GNB(common_variance=True) ds = normal_feature_dataset(perlabel=4, nlabels=3, nfeatures=5, nchunks=4, snr=10, nonbogus_features=[0, 1, 2] ) s = gnb.get_sensitivity_analyzer()(ds) assert_in('targets', s.sa) assert_equal(s.shape, (((len(ds.uniquetargets) * (len(ds.uniquetargets) - 1))/2), ds.nfeatures)) # test zero variance case # set variance of feature to zero ds.samples[:,3]=0.3 s_zerovar = gnb.get_sensitivity_analyzer() sens = s_zerovar(ds) assert_true(all(sens.samples[:, 3] == 0)) # test whether tagging and untagging works assert 'has_sensitivity' in gnb.__tags__ gnb.untrain() assert 'has_sensitivity' not in gnb.__tags__ # test whether content of sensitivities makes rough sense # e.g.: sensitivity of first feature should be larger than of bogus last feature assert_true(abs(sens.samples[i, 0]) > abs(sens.samples[i, 4]) for i in range(np.shape(sens.samples)[0]))
def test_gnbsearchlight_matchaccuracy(self): # was not able to deal with custom errorfx collapsing samples # after 55e147e0bd30fbf4edede3faef3a15c6c65b33ea ds = datasets['3dmedium'].copy() ds.fa['voxel_indices'] = ds.fa.myspace sl_err = sphere_gnbsearchlight(GNB(), NFoldPartitioner(cvtype=1), radius=0) sl_acc = sphere_gnbsearchlight(GNB(), NFoldPartitioner(cvtype=1), radius=0, errorfx=mean_match_accuracy) assert_array_almost_equal(sl_err(ds), 1.0 - sl_acc(ds).samples)
def test_cached_qe_gnbsearchlight(self): ds1 = datasets['3dsmall'].copy(deep=True) qe = IndexQueryEngine(myspace=Sphere(2)) cached_qe = CachedQueryEngine(qe) gnb_sl = GNBSearchlight(GNB(), NFoldPartitioner(), qe=cached_qe) res = gnb_sl(ds1) assert_false(cached_qe.ids is None)
def test_chained_crossvalidation_searchlight(): from mvpa2.clfs.gnb import GNB from mvpa2.clfs.meta import MappedClassifier from mvpa2.generators.partition import NFoldPartitioner from mvpa2.mappers.base import ChainMapper from mvpa2.mappers.base import Mapper from mvpa2.measures.base import CrossValidation from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.testing.datasets import datasets dataset = datasets['3dlarge'].copy() dataset.fa['voxel_indices'] = dataset.fa.myspace sample_clf = GNB() # fast and deterministic class ZScoreFeaturesMapper(Mapper): """Very basic mapper which would take care about standardizing all features within each sample separately """ def _forward_data(self, data): return (data - np.mean(data, axis=1)[:, None])/np.std(data, axis=1)[:, None] # only do partial to save time sl_kwargs = dict(radius=2, center_ids=[3, 50]) clf_mapped = MappedClassifier(sample_clf, ZScoreFeaturesMapper()) cv = CrossValidation(clf_mapped, NFoldPartitioner()) sl = sphere_searchlight(cv, **sl_kwargs) results_mapped = sl(dataset) cv_chained = ChainMapper([ZScoreFeaturesMapper(auto_train=True), CrossValidation(sample_clf, NFoldPartitioner())]) sl_chained = sphere_searchlight(cv_chained, **sl_kwargs) results_chained = sl_chained(dataset) assert_array_equal(results_mapped, results_chained)
def test_gnbsearghlight_exclude_partition(self): # just a smoke test with a custom partitioner ds1 = datasets['3dsmall'].copy(deep=True) gnb_sl = GNBSearchlight(GNB(), generator=CustomPartitioner([([0], [1])]), qe=IndexQueryEngine(myspace=Sphere(2)), errorfx=None) res = gnb_sl(ds1)
def test_gnb(self): gnb = GNB() gnb_nc = GNB(common_variance=False) gnb_n = GNB(normalize=True) gnb_n_nc = GNB(normalize=True, common_variance=False) gnb_lin = GNB(common_variance=True) ds = datasets['uni2medium'] # Generic silly coverage just to assure that it works in all # possible scenarios: bools = (True, False) # There should be better way... heh for cv in bools: # common_variance? for prior in ('uniform', 'laplacian_smoothing', 'ratio'): tp = None # predictions -- all above should # result in the same predictions for n in bools: # normalized? for ls in bools: # logspace? for es in ((), ('estimates')): gnb_ = GNB(common_variance=cv, prior=prior, normalize=n, logprob=ls, enable_ca=es) tm = TransferMeasure(gnb_, Splitter('train')) predictions = tm(ds).samples[:,0] if tp is None: tp = predictions assert_array_equal(predictions, tp) # if normalized -- check if estimates are such if n and 'estimates' in es: v = gnb_.ca.estimates if ls: # in log space -- take exp ;) v = np.exp(v) d1 = np.sum(v, axis=1) - 1.0 self.assertTrue(np.max(np.abs(d1)) < 1e-5) # smoke test to see whether invocation of sensitivity analyser blows # if gnb classifier isn't linear, and to see whether it doesn't blow # when it is linear. if cv: assert 'has_sensitivity' in gnb_.__tags__ gnb_.get_sensitivity_analyzer() if not cv: with self.assertRaises(NotImplementedError): gnb_.get_sensitivity_analyzer()
def test_partial_searchlight_with_full_report(self): ds = self.dataset.copy() center_ids = np.zeros(ds.nfeatures, dtype='bool') center_ids[[3, 50]] = True ds.fa['center_ids'] = center_ids # compute N-1 cross-validation for each sphere cv = CrossValidation(GNB(), NFoldPartitioner()) # contruct diameter 1 (or just radius 0) searchlight # one time give center ids as a list, the other one takes it from the # dataset itself sls = ( sphere_searchlight(cv, radius=0, center_ids=[3, 50]), sphere_searchlight(None, radius=0, center_ids=[3, 50]), sphere_searchlight(cv, radius=0, center_ids='center_ids'), ) for sl in sls: # assure that we could set cv post constructor if sl.datameasure is None: sl.datameasure = cv # run searchlight results = sl(ds) # only two spheres but error for all CV-folds self.assertEqual(results.shape, (len(self.dataset.UC), 2)) # Test if results hold if we "set" a "new" datameasure sl.datameasure = CrossValidation(GNB(), NFoldPartitioner()) results2 = sl(ds) assert_array_almost_equal(results, results2) # test if we graciously puke if center_ids are out of bounds dataset0 = ds[:, :50] # so we have no 50th feature self.assertRaises(IndexError, sls[0], dataset0) # but it should be fine on the one that gets the ids from the dataset # itself results = sl(dataset0) assert_equal(results.nfeatures, 1) # check whether roi_seeds are correct sl = sphere_searchlight(lambda x: np.vstack( (x.fa.roi_seed, x.samples)), radius=1, add_center_fa=True, center_ids=[12]) res = sl(ds) assert_array_equal( res.samples[1:, res.samples[0].astype('bool')].squeeze(), ds.samples[:, 12])
def test_searchlight_errors_per_trial(): # To make sure that searchlight can return error/accuracy per trial from mvpa2.clfs.gnb import GNB from mvpa2.generators.partition import OddEvenPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight from mvpa2.testing.datasets import datasets from mvpa2.misc.errorfx import prediction_target_matches dataset = datasets['3dsmall'].copy() # randomly permute samples so we break any random correspondence # to strengthen tests below sample_idx = np.arange(len(dataset)) dataset = dataset[np.random.permutation(sample_idx)] dataset.sa.targets = ['L%d' % l for l in dataset.sa.targets] dataset.fa['voxel_indices'] = dataset.fa.myspace sample_clf = GNB() # fast and deterministic part = OddEvenPartitioner() # only do partial to save time cv = CrossValidation(sample_clf, part, errorfx=None) #prediction_target_matches) # Just to compare error cv_error = CrossValidation(sample_clf, part) # Large searchlight radius so we get entire ROI, 2 centers just to make sure # that all stacking works correctly sl = sphere_searchlight(cv, radius=10, center_ids=[0, 1]) results = sl(dataset) sl_gnb = sphere_gnbsearchlight(sample_clf, part, radius=10, errorfx=None, center_ids=[0, 1]) results_gnbsl = sl_gnb(dataset) # inspect both results # verify that partitioning was done correctly partitions = list(part.generate(dataset)) for res in (results, results_gnbsl): assert('targets' in res.sa.keys()) # should carry targets assert('cvfolds' in res.sa.keys()) # should carry cvfolds for ipart in xrange(len(partitions)): assert_array_equal(dataset[partitions[ipart].sa.partitions == 2].targets, res.sa.targets[res.sa.cvfolds == ipart]) assert_datasets_equal(results, results_gnbsl) # one "accuracy" per each trial assert_equal(results.shape, (len(dataset), 2)) # with accuracies the same in both searchlights since the same # features were to be selected in both cases due too large radii errors_dataset = cv(dataset) assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 0]) assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 1]) # and error matching (up to precision) the one if we run with default error function assert_array_almost_equal(np.mean(results.targets[:, None] != results.samples, axis=0)[0], np.mean(cv_error(dataset)))
def test_nblocks(self): skip_if_no_external('pprocess') # just a basic test to see that we are getting the same # results with different nblocks ds = datasets['3dsmall'].copy(deep=True)[:, :13] ds.fa['voxel_indices'] = ds.fa.myspace cv = CrossValidation(GNB(), OddEvenPartitioner()) res1 = sphere_searchlight(cv, radius=1, nproc=2)(ds) res2 = sphere_searchlight(cv, radius=1, nproc=2, nblocks=5)(ds) assert_array_equal(res1, res2)
def test_splitter_gnbsearghlight(self): ds1 = datasets['3dsmall'].copy(deep=True) gnb_sl = GNBSearchlight(GNB(), generator=CustomPartitioner([([0], [1])]), qe=IndexQueryEngine(myspace=Sphere(2)), splitter=Splitter(attr='partitions', attr_values=[1, 2]), errorfx=None) res = gnb_sl(ds1) assert_equal(res.nsamples, (ds1.chunks == 1).sum())
def test_gnbsearchlight_3partitions_and_splitter(self): ds = self.dataset[:, :20] # custom partitioner which provides 3 partitions part = CustomPartitioner([([2], [3], [1])]) gnb_sl = sphere_gnbsearchlight(GNB(), part) res_gnb_sl = gnb_sl(ds) # compare results to full blown searchlight sl = sphere_searchlight(CrossValidation(GNB(), part)) res_sl = sl(ds) assert_datasets_equal(res_gnb_sl, res_sl) # and theoretically for this simple single cross-validation we could # just use Splitter splitter = Splitter('chunks', [2, 3]) # we have to put explicit None since can't become a kwarg in 1 day any # longer here gnb_sl_ = sphere_gnbsearchlight(GNB(), None, splitter=splitter) res_gnb_sl_ = gnb_sl_(ds) assert_datasets_equal(res_gnb_sl, res_gnb_sl_)
def test_gnb_sensitivities(): gnb = GNB(common_variance=True) ds = normal_feature_dataset(perlabel=4, nlabels=3, nfeatures=5, nchunks=4, snr=10, nonbogus_features=[0, 1, 2]) s = gnb.get_sensitivity_analyzer()(ds) assert_in('targets', s.sa) assert_equal(s.shape, (((len(ds.uniquetargets) * (len(ds.uniquetargets) - 1)) / 2), ds.nfeatures)) # test zero variance case # set variance of feature to zero ds.samples[:, 3] = 0.3 s_zerovar = gnb.get_sensitivity_analyzer() sens = s_zerovar(ds) assert_true(all(sens.samples[:, 3] == 0)) # test whether tagging and untagging works assert 'has_sensitivity' in gnb.__tags__ gnb.untrain() assert 'has_sensitivity' not in gnb.__tags__ # test whether content of sensitivities makes rough sense # e.g.: sensitivity of first feature should be larger than of bogus last feature assert_true( abs(sens.samples[i, 0]) > abs(sens.samples[i, 4]) for i in range(np.shape(sens.samples)[0]))
def _test_gnb_overflow_haxby(): # pragma: no cover # example from https://github.com/PyMVPA/PyMVPA/issues/581 # a heavier version of the above test import os import numpy as np from mvpa2.datasets.sources.native import load_tutorial_data from mvpa2.clfs.gnb import GNB from mvpa2.measures.base import CrossValidation from mvpa2.generators.partition import HalfPartitioner from mvpa2.mappers.zscore import zscore from mvpa2.mappers.detrend import poly_detrend from mvpa2.datasets.miscfx import remove_invariant_features from mvpa2.testing.datasets import * datapath = '/usr/share/data/pymvpa2-tutorial/' haxby = load_tutorial_data(datapath, roi='vt', add_fa={ 'vt_thr_glm': os.path.join(datapath, 'haxby2001', 'sub001', 'masks', 'orig', 'vt.nii.gz') }) # poly_detrend(haxby, polyord=1, chunks_attr='chunks') haxby = haxby[np.array( [ l in ['rest', 'scrambled'] # ''house', 'face'] for l in haxby.targets ], dtype='bool')] #zscore(haxby, chunks_attr='chunks', param_est=('targets', ['rest']), # dtype='float32') # haxby = haxby[haxby.sa.targets != 'rest'] haxby = remove_invariant_features(haxby) clf = GNB(enable_ca='estimates', logprob=True, normalize=True) #clf.train(haxby) #clf.predict(haxby) # estimates a bit "overfit" to judge in the train/predict on the same data cv = CrossValidation(clf, HalfPartitioner(attr='chunks'), postproc=None, enable_ca=['stats']) cv_results = cv(haxby) res1_est = clf.ca.estimates print "Estimates:\n", res1_est print "Exp(estimates):\n", np.round(np.exp(res1_est), 3) assert np.all(np.isfinite(res1_est))
def test_gnb_sensitivities(): gnb = GNB(common_variance=True) ds = normal_feature_dataset(perlabel=4, nlabels=3, nfeatures=5, nchunks=4, snr=20, nonbogus_features=[0, 1, 2] ) s = gnb.get_sensitivity_analyzer()(ds) assert_in('targets', s.sa) assert_equal(s.shape, (((len(ds.uniquetargets) * (len(ds.uniquetargets) - 1))/2), ds.nfeatures)) # test zero variance case # set variance of feature to zero ds.samples[:, 3] = 0.3 s_zerovar = gnb.get_sensitivity_analyzer() sens = s_zerovar(ds) assert_equal(sens.T.dtype, 'O') # we store pairs assert_equal(sens.T[0], ('L0', 'L1')) assert_true(all(sens.samples[:, 3] == 0)) gnb.untrain() # test whether content of sensitivities makes rough sense # First feature has information only about L0, so it would be of # no use for L1 -vs- L2 classification, so we will go through each pair # and make sure that signs etc all correct for each pair. # This in principle should be a generic test for multiclass sensitivities abssens = abs(sens.samples) for (t1, t2), t1t2sens in zip(sens.T, sens.samples): # go from literal L1 to 1, L0 to 0 - corresponds to feature i1 = int(t1[1]) i2 = int(t2[1]) assert t1t2sens[i1] < 0 assert t1t2sens[i2] > 0 assert t1t2sens[i2] > t1t2sens[4]
def test_gnb(self): gnb = GNB() gnb_nc = GNB(common_variance=False) gnb_n = GNB(normalize=True) gnb_n_nc = GNB(normalize=True, common_variance=False) ds = datasets['uni2medium'] # Generic silly coverage just to assure that it works in all # possible scenarios: bools = (True, False) # There should be better way... heh for cv in bools: # common_variance? for prior in ('uniform', 'laplacian_smoothing', 'ratio'): tp = None # predictions -- all above should # result in the same predictions for n in bools: # normalized? for ls in bools: # logspace? for es in ((), ('estimates')): gnb_ = GNB(common_variance=cv, prior=prior, normalize=n, logprob=ls, enable_ca=es) tm = TransferMeasure(gnb_, Splitter('train')) predictions = tm(ds).samples[:, 0] if tp is None: tp = predictions assert_array_equal(predictions, tp) # if normalized -- check if estimates are such if n and 'estimates' in es: v = gnb_.ca.estimates if ls: # in log space -- take exp ;) v = np.exp(v) d1 = np.sum(v, axis=1) - 1.0 self.assertTrue(np.max(np.abs(d1)) < 1e-5)
def test_confusionmatrix_nulldist(self): from mvpa2.clfs.gnb import GNB from mvpa2.clfs.transerror import ConfusionMatrixError from mvpa2.misc.data_generators import normal_feature_dataset for snr in [ 0., 2., ]: ds = normal_feature_dataset(snr=snr, perlabel=42, nchunks=3, nonbogus_features=[0, 1], nfeatures=2) clf = GNB() num_perm = 50 permutator = AttributePermutator('targets', limit='chunks', count=num_perm) cv = CrossValidation( clf, NFoldPartitioner(), errorfx=ConfusionMatrixError(labels=ds.sa['targets'].unique), postproc=mean_sample(), null_dist=MCNullDist( permutator, tail='right', # because we now look at accuracy not error enable_ca=['dist_samples']), enable_ca=['stats']) cmatrix = cv(ds) #print "Result:\n", cmatrix.samples cvnp = cv.ca.null_prob.samples #print cvnp self.assertTrue(cvnp.shape, (2, 2)) if cfg.getboolean('tests', 'labile', default='yes'): if snr == 0.: # all p should be high since no signal assert_array_less(0.05, cvnp) else: # diagonal p is low -- we have signal after all assert_array_less(np.diag(cvnp), 0.05) # off diagonals are high p since for them we would # need to look at the other tail assert_array_less( 0.9, cvnp[(np.array([0, 1]), np.array([1, 0]))])
def test_confusion_as_node(): from mvpa2.misc.data_generators import normal_feature_dataset from mvpa2.clfs.gnb import GNB from mvpa2.clfs.transerror import Confusion ds = normal_feature_dataset(snr=2.0, perlabel=42, nchunks=3, nonbogus_features=[0, 1], nfeatures=2) clf = GNB() cv = CrossValidation(clf, NFoldPartitioner(), errorfx=None, postproc=Confusion(labels=ds.UT), enable_ca=['stats']) res = cv(ds) # needs to be identical to CA assert_array_equal(res.samples, cv.ca.stats.matrix) assert_array_equal(res.sa.predictions, ds.UT) assert_array_equal(res.fa.targets, ds.UT) skip_if_no_external('scipy') from mvpa2.clfs.transerror import BayesConfusionHypothesis from mvpa2.base.node import ChainNode # same again, but this time with Bayesian hypothesis testing at the end cv = CrossValidation(clf, NFoldPartitioner(), errorfx=None, postproc=ChainNode((Confusion(labels=ds.UT), BayesConfusionHypothesis()))) res = cv(ds) # only two possible hypothesis with two classes assert_equals(len(res), 2) # the first hypothesis is the can't discriminate anything assert_equal(len(res.sa.hypothesis[0]), 1) assert_equal(len(res.sa.hypothesis[0][0]), 2) # and the hypothesis is actually less likely than the other one # (both classes can be distinguished) assert (np.e**res.samples[0, 0] < np.e**res.samples[1, 0])
def test_gnb_sensitivities(logprob): gnb = GNB(common_variance=True, logprob=logprob) ds = normal_feature_dataset(perlabel=4, nlabels=3, nfeatures=5, nchunks=4, snr=20, nonbogus_features=[0, 1, 2]) s = gnb.get_sensitivity_analyzer()(ds) assert_in('targets', s.sa) assert_equal(s.shape, (((len(ds.uniquetargets) * (len(ds.uniquetargets) - 1)) / 2), ds.nfeatures)) # test zero variance case # set variance of feature to zero ds.samples[:, 3] = 0.3 s_zerovar = gnb.get_sensitivity_analyzer() sens = s_zerovar(ds) assert_equal(sens.T.dtype, 'O') # we store pairs assert_equal(sens.T[0], ('L0', 'L1')) assert_true(all(sens.samples[:, 3] == 0)) gnb.untrain() # test whether content of sensitivities makes rough sense # First feature has information only about L0, so it would be of # no use for L1 -vs- L2 classification, so we will go through each pair # and make sure that signs etc all correct for each pair. # This in principle should be a generic test for multiclass sensitivities abssens = abs(sens.samples) for (t1, t2), t1t2sens in zip(sens.T, sens.samples): # go from literal L1 to 1, L0 to 0 - corresponds to feature i1 = int(t1[1]) i2 = int(t2[1]) assert t1t2sens[i1] < 0 assert t1t2sens[i2] > 0 assert t1t2sens[i2] > t1t2sens[4]
def test_voxel_selection(self): '''Compare surface and volume based searchlight''' ''' Tests to see whether results are identical for surface-based searchlight (just one plane; Euclidean distnace) and volume-based searchlight. Note that the current value is a float; if it were int, it would specify the number of voxels in each searchlight''' radius = 10. '''Define input filenames''' epi_fn = os.path.join(pymvpa_dataroot, 'bold.nii.gz') maskfn = os.path.join(pymvpa_dataroot, 'mask.nii.gz') ''' Use the EPI datafile to define a surface. The surface has as many nodes as there are voxels and is parallel to the volume 'slice' ''' vg = volgeom.from_any(maskfn, mask_volume=True) aff = vg.affine nx, ny, nz = vg.shape[:3] '''Plane goes in x and y direction, so we take these vectors from the affine transformation matrix of the volume''' plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny) ''' Simulate pial and white matter as just above and below the central plane ''' normal_vec = aff[:3, 2] outer = plane + normal_vec inner = plane + -normal_vec ''' Combine volume and surface information ''' vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner) ''' Run voxel selection with specified radius (in mm), using Euclidean distance measure ''' surf_voxsel = surf_voxel_selection.voxel_selection(vsm, radius, distance_metric='e') '''Define the measure''' # run_slow=True would give an actual cross-validation with meaningful # accuracies. Because this is a unit-test only the number of voxels # in each searchlight is tested. run_slow = False if run_slow: meas = CrossValidation(GNB(), OddEvenPartitioner(), errorfx=lambda p, t: np.mean(p == t)) postproc = mean_sample else: meas = _Voxel_Count_Measure() postproc = lambda x: x ''' Surface analysis: define the query engine, cross validation, and searchlight ''' surf_qe = SurfaceVerticesQueryEngine(surf_voxsel) surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc) ''' new (Sep 2012): also test 'simple' queryengine wrapper function ''' surf_qe2 = disc_surface_queryengine(radius, maskfn, inner, outer, plane, volume_mask=True, distance_metric='euclidean') surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc) ''' Same for the volume analysis ''' element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2]))) sph = Sphere(radius, element_sizes=element_sizes) kwa = {'voxel_indices': sph} vol_qe = IndexQueryEngine(**kwa) vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc) '''The following steps are similar to start_easy.py''' attr = SampleAttributes( os.path.join(pymvpa_dataroot, 'attributes_literal.txt')) mask = surf_voxsel.get_mask() dataset = fmri_dataset(samples=os.path.join(pymvpa_dataroot, 'bold.nii.gz'), targets=attr.targets, chunks=attr.chunks, mask=mask) if run_slow: # do chunkswise linear detrending on dataset poly_detrend(dataset, polyord=1, chunks_attr='chunks') # zscore dataset relative to baseline ('rest') mean zscore(dataset, chunks_attr='chunks', param_est=('targets', ['rest'])) # select class face and house for this demo analysis # would work with full datasets (just a little slower) dataset = dataset[np.array( [l in ['face', 'house'] for l in dataset.sa.targets], dtype='bool')] '''Apply searchlight to datasets''' surf_dset = surf_sl(dataset) surf_dset2 = surf_sl2(dataset) vol_dset = vol_sl(dataset) surf_data = surf_dset.samples surf_data2 = surf_dset2.samples vol_data = vol_dset.samples assert_array_equal(surf_data, surf_data2) assert_array_equal(surf_data, vol_data)
def test_gnbsearchlight_permutations(): import mvpa2 from mvpa2.base.node import ChainNode from mvpa2.clfs.gnb import GNB from mvpa2.generators.base import Repeater from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner #import mvpa2.generators.permutation #reload(mvpa2.generators.permutation) from mvpa2.generators.permutation import AttributePermutator from mvpa2.testing.datasets import datasets from mvpa2.measures.base import CrossValidation from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.mappers.fx import mean_sample from mvpa2.misc.errorfx import mean_mismatch_error from mvpa2.clfs.stats import MCNullDist from mvpa2.testing.tools import assert_raises, ok_, assert_array_less # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM'] # mvpa2.debug.metrics += ['pid'] count = 10 nproc = 1 + int(mvpa2.externals.exists('pprocess')) ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace slkwargs = dict(radius=3, space='voxel_indices', enable_ca=['roi_sizes'], center_ids=[1, 10, 70, 100]) mvpa2.seed(mvpa2._random_seed) clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) distr_est = MCNullDist(repeater, tail='left', measure=null_sl, enable_ca=['dist_samples']) sl = sphere_gnbsearchlight(clf, splt, reuse_neighbors=True, null_dist=distr_est, postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) if __debug__: # assert is done only without -O mode assert_raises(NotImplementedError, sl, ds) # "ad-hoc searchlights can't handle yet varying targets across partitions" if False: # after above limitation is removed -- enable sl_map = sl(ds) sl_null_prob = sl.ca.null_prob.samples.copy() mvpa2.seed(mvpa2._random_seed) ### 'normal' Searchlight clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state # would be reused across all pprocesses null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample()) null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs) distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal, enable_ca=['dist_samples']) cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error, enable_ca=['stats'], postproc=mean_sample() ) sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs) sl_map_normal = sl(ds) sl_null_prob_normal = sl.ca.null_prob.samples.copy() # For every feature -- we should get some variance in estimates In # case of failure they are all really close to each other (up to # numerical precision), so variance will be close to 0 assert_array_less(-np.var(distr_est_normal.ca.dist_samples.samples[0], axis=1), -1e-5) for s in distr_est_normal.ca.dist_samples.samples[0]: ok_(len(np.unique(s)) > 1)
kNN(), SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), descr="kNN on 5%(ANOVA)") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( OneWayAnova(), FixedNElementTailSelector(50, mode='select', tail='upper')), descr="kNN on 50(ANOVA)") # GNB clfswh += GNB(descr="GNB()") clfswh += GNB(common_variance=True, descr="GNB(common_variance=True)") clfswh += GNB(prior='uniform', descr="GNB(prior='uniform')") clfswh += \ FeatureSelectionClassifier( GNB(), SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), descr="GNB on 5%(ANOVA)") # GPR if externals.exists('scipy'): from mvpa2.clfs.gpr import GPR regrswh += GPR(kernel=LinearKernel(), descr="GPR(kernel='linear')")
class SearchlightTests(unittest.TestCase): def setUp(self): self.dataset = datasets['3dlarge'] # give the feature coord a more common name, matching the default of # the searchlight self.dataset.fa['voxel_indices'] = self.dataset.fa.myspace self._tested_pprocess = False # https://github.com/PyMVPA/PyMVPA/issues/67 # https://github.com/PyMVPA/PyMVPA/issues/69 def test_gnbsearchlight_doc(self): # Test either we excluded nproc from the docstrings ok_(not 'nproc' in GNBSearchlight.__init__.__doc__) ok_(not 'nproc' in GNBSearchlight.__doc__) ok_(not 'nproc' in sphere_gnbsearchlight.__doc__) # but present elsewhere ok_('nproc' in sphere_searchlight.__doc__) ok_('nproc' in Searchlight.__init__.__doc__) # https://github.com/PyMVPA/PyMVPA/issues/106 def test_searchlights_doc_qe(self): # queryengine should not be provided to sphere_* helpers for sl in (sphere_searchlight, sphere_gnbsearchlight, sphere_m1nnsearchlight): for kw in ('queryengine', 'qe'): ok_(not kw in sl.__doc__, msg='There should be no %r in %s.__doc__' % (kw, sl)) # queryengine should be provided in corresponding classes __doc__s for sl in (Searchlight, GNBSearchlight, M1NNSearchlight): for kw in ('queryengine', ): ok_(kw in sl.__init__.__doc__, msg='There should be %r in %s.__init__.__doc__' % (kw, sl)) for kw in ('qe', ): ok_(not kw in sl.__init__.__doc__, msg='There should be no %r in %s.__init__.__doc__' % (kw, sl)) #def _test_searchlights(self, ds, sls, roi_ids, result_all): # pragma: no cover @sweepargs( lrn_sllrn_SL_partitioner=[ ( GNB(common_variance=v, descr='GNB'), None, sphere_gnbsearchlight, NFoldPartitioner(cvtype=1), 0. # correction for the error range ) for v in (True, False) ] + # Mean 1 NN searchlights [ (ChainMapper( [mean_group_sample(['targets', 'partitions']), kNN(1)], space='targets', descr='M1NN'), kNN(1), sphere_m1nnsearchlight, NFoldPartitioner(0.5, selection_strategy='random', count=20), 0.05), # the same but with NFold(1) partitioner since it still should work (ChainMapper( [mean_group_sample(['targets', 'partitions']), kNN(1)], space='targets', descr='NF-M1NN'), kNN(1), sphere_m1nnsearchlight, NFoldPartitioner(1), 0.05), ]) @sweepargs(do_roi=(False, True)) @sweepargs(results_backend=('native', 'hdf5')) @reseed_rng() def test_spatial_searchlight(self, lrn_sllrn_SL_partitioner, do_roi=False, results_backend='native'): """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight) Test of and adhoc searchlight anyways requires a ground-truth comparison to the generic version, so we are doing sweepargs here """ lrn, sllrn, SL, partitioner, correction = lrn_sllrn_SL_partitioner ## if results_backend == 'hdf5' and not common_variance: ## # no need for full combination of all possible arguments here ## return if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \ and isinstance(lrn, ChainMapper): raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper (M1NN here)") # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn # "learner" we must use a chainmapper atm if sllrn is None: sllrn = lrn ds = datasets['3dsmall'].copy() # Let's test multiclass here, so boost # of labels ds[6:18].T += 2 ds.fa['voxel_indices'] = ds.fa.myspace # To assure that users do not run into incorrect operation due to overflows ds.samples += 5000 ds.samples *= 1000 ds.samples = ds.samples.astype(np.int16) # compute N-1 cross-validation for each sphere # YOH: unfortunately sample_clf_lin is not guaranteed # to provide exactly the same results due to inherent # iterative process. Therefore lets use something quick # and pure Python cv = CrossValidation(lrn, partitioner) skwargs = dict( radius=1, enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids']) if do_roi: # select some random set of features nroi = rnd.randint(1, ds.nfeatures) # and lets compute the full one as well once again so we have a reference # which will be excluded itself from comparisons but values will be compared # for selected roi_id sl_all = SL(sllrn, partitioner, **skwargs) result_all = sl_all(ds) # select random features roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi] skwargs['center_ids'] = roi_ids else: nroi = ds.nfeatures roi_ids = np.arange(nroi) result_all = None if results_backend == 'hdf5': skip_if_no_external('h5py') sls = [ sphere_searchlight(cv, results_backend=results_backend, **skwargs), #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1)) SL(sllrn, partitioner, indexsum='fancy', **skwargs) ] if externals.exists('scipy'): sls += [SL(sllrn, partitioner, indexsum='sparse', **skwargs)] # Test nproc just once if externals.exists('pprocess') and not self._tested_pprocess: sls += [sphere_searchlight(cv, nproc=2, **skwargs)] self._tested_pprocess = True # Provide the dataset and all those searchlights for testing #self._test_searchlights(ds, sls, roi_ids, result_all) #nroi = len(roi_ids) #do_roi = nroi != ds.nfeatures all_results = [] for sl in sls: # run searchlight mvpa2.seed() # reseed rng again for m1nnsl results = sl(ds) all_results.append(results) #print `sl` # check for correct number of spheres self.assertTrue(results.nfeatures == nroi) # and measures (one per xfold) if partitioner.cvtype == 1: self.assertTrue(len(results) == len(ds.UC)) elif partitioner.cvtype == 0.5: # here we had 4 unique chunks, so 6 combinations # even though 20 max was specified for NFold self.assertTrue(len(results) == 6) else: raise RuntimeError("Unknown yet type of partitioner to check") # check for chance-level performance across all spheres # makes sense only if number of features was big enough # to get some stable estimate of mean if not do_roi or nroi > 20: # correction here is for M1NN class which has wider distribution self.assertTrue(0.67 - correction < results.samples.mean() < 0.85 + correction, msg="Out of range mean result: " "lrn: %s sllrn: %s NROI: %d MEAN: %.3f" % ( lrn, sllrn, nroi, results.samples.mean(), )) mean_errors = results.samples.mean(axis=0) # that we do get different errors ;) self.assertTrue(len(np.unique(mean_errors) > 3)) # check resonable sphere sizes self.assertTrue(len(sl.ca.roi_sizes) == nroi) self.assertTrue(len(sl.ca.roi_feature_ids) == nroi) for i, fids in enumerate(sl.ca.roi_feature_ids): self.assertTrue(len(fids) == sl.ca.roi_sizes[i]) if do_roi: # for roi we should relax conditions a bit self.assertTrue(max(sl.ca.roi_sizes) <= 7) self.assertTrue(min(sl.ca.roi_sizes) >= 4) else: self.assertTrue(max(sl.ca.roi_sizes) == 7) self.assertTrue(min(sl.ca.roi_sizes) == 4) # check base-class state self.assertEqual(sl.ca.raw_results.nfeatures, nroi) # Test if we got results correctly for 'selected' roi ids if do_roi: assert_array_equal(result_all[:, roi_ids], results) if len(all_results) > 1: # if we had multiple searchlights, we can check either they all # gave the same result (they should have) aresults = np.array([a.samples for a in all_results]) dresults = np.abs(aresults - aresults.mean(axis=0)) dmax = np.max(dresults) self.assertTrue(dmax <= 1e-13) # Test the searchlight's reuse of neighbors for indexsum in ['fancy'] + (externals.exists('scipy') and ['sparse'] or []): sl = SL(sllrn, partitioner, indexsum='fancy', reuse_neighbors=True, **skwargs) mvpa2.seed() result1 = sl(ds) mvpa2.seed() result2 = sl(ds) # must be faster assert_array_equal(result1, result2) def test_adhocsearchlight_perm_testing(self): # just a smoke test pretty much ds = datasets['3dmedium'].copy() #ds.samples += np.random.normal(size=ds.samples.shape)*10 mvpa2.seed() ds.fa['voxel_indices'] = ds.fa.myspace from mvpa2.mappers.fx import mean_sample from mvpa2.clfs.stats import MCNullDist permutator = AttributePermutator('targets', count=8, limit='chunks') distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) slargs = (kNN(1), NFoldPartitioner(0.5, selection_strategy='random', count=9)) slkwargs = dict(radius=1, postproc=mean_sample()) sl_nodistr = sphere_m1nnsearchlight(*slargs, **slkwargs) skip_if_no_external('scipy') # needed for null_t sl = sphere_m1nnsearchlight(*slargs, null_dist=distr_est, enable_ca=['null_t'], reuse_neighbors=True, **slkwargs) mvpa2.seed() res_nodistr = sl_nodistr(ds) mvpa2.seed() res = sl(ds) # verify that we at least got the same main result # ah (yoh) -- null dist is estimated before the main # estimate so we can't guarantee correspondence :-/ # assert_array_equal(res_nodistr, res) # only resemblance (TODO, may be we want to get/setstate # for rng before null_dist.fit?) # and dimensions correspond assert_array_equal(distr_est.ca.dist_samples.shape, (1, ds.nfeatures, 8)) assert_array_equal(sl.ca.null_t.samples.shape, (1, ds.nfeatures)) def test_partial_searchlight_with_full_report(self): ds = self.dataset.copy() center_ids = np.zeros(ds.nfeatures, dtype='bool') center_ids[[3, 50]] = True ds.fa['center_ids'] = center_ids # compute N-1 cross-validation for each sphere cv = CrossValidation(GNB(), NFoldPartitioner()) # contruct diameter 1 (or just radius 0) searchlight # one time give center ids as a list, the other one takes it from the # dataset itself sls = ( sphere_searchlight(cv, radius=0, center_ids=[3, 50]), sphere_searchlight(None, radius=0, center_ids=[3, 50]), sphere_searchlight(cv, radius=0, center_ids='center_ids'), ) for sl in sls: # assure that we could set cv post constructor if sl.datameasure is None: sl.datameasure = cv # run searchlight results = sl(ds) # only two spheres but error for all CV-folds self.assertEqual(results.shape, (len(self.dataset.UC), 2)) # Test if results hold if we "set" a "new" datameasure sl.datameasure = CrossValidation(GNB(), NFoldPartitioner()) results2 = sl(ds) assert_array_almost_equal(results, results2) # test if we graciously puke if center_ids are out of bounds dataset0 = ds[:, :50] # so we have no 50th feature self.assertRaises(IndexError, sls[0], dataset0) # but it should be fine on the one that gets the ids from the dataset # itself results = sl(dataset0) assert_equal(results.nfeatures, 1) # check whether roi_seeds are correct sl = sphere_searchlight(lambda x: np.vstack( (x.fa.roi_seed, x.samples)), radius=1, add_center_fa=True, center_ids=[12]) res = sl(ds) assert_array_equal( res.samples[1:, res.samples[0].astype('bool')].squeeze(), ds.samples[:, 12]) def test_partial_searchlight_with_confusion_matrix(self): ds = self.dataset from mvpa2.clfs.stats import MCNullDist from mvpa2.mappers.fx import mean_sample, sum_sample # compute N-1 cross-validation for each sphere cm = ConfusionMatrix(labels=ds.UT) cv = CrossValidation( sample_clf_lin, NFoldPartitioner(), # we have to assure that matrix does not get flatted by # first vstack in cv and then hstack in searchlight -- # thus 2 leading dimensions # TODO: RF? make searchlight/crossval smarter? errorfx=lambda *a: cm(*a)[None, None, :]) # contruct diameter 2 (or just radius 1) searchlight sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50]) # our regular searchlight -- to compare results cv_gross = CrossValidation(sample_clf_lin, NFoldPartitioner()) sl_gross = sphere_searchlight(cv_gross, radius=1, center_ids=[3, 5, 50]) # run searchlights res = sl(ds) res_gross = sl_gross(ds) # only two spheres but error for all CV-folds and complete confusion matrix assert_equal(res.shape, (len(ds.UC), 3, len(ds.UT), len(ds.UT))) assert_equal(res_gross.shape, (len(ds.UC), 3)) # briefly inspect the confusion matrices mat = res.samples # since input dataset is probably balanced (otherwise adjust # to be per label): sum within columns (thus axis=-2) should # be identical to per-class/chunk number of samples samples_per_classchunk = len(ds) / (len(ds.UT) * len(ds.UC)) ok_(np.all(np.sum(mat, axis=-2) == samples_per_classchunk)) # and if we compute accuracies manually -- they should # correspond to the one from sl_gross assert_array_almost_equal( res_gross.samples, # from accuracies to errors 1 - (mat[..., 0, 0] + mat[..., 1, 1]).astype(float) / (2 * samples_per_classchunk)) # and now for those who remained sited -- lets perform H0 MC # testing of this searchlight... just a silly one with minimal # number of permutations no_permutations = 10 permutator = AttributePermutator('targets', count=no_permutations) # once again -- need explicit leading dimension to avoid # vstacking during cross-validation cv.postproc = lambda x: sum_sample()(x)[None, :] sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50], null_dist=MCNullDist( permutator, tail='right', enable_ca=['dist_samples'])) res_perm = sl(ds) # XXX all of the res_perm, sl.ca.null_prob and # sl.null_dist.ca.dist_samples carry a degenerate leading # dimension which was probably due to introduced new axis # above within cv.postproc assert_equal(res_perm.shape, (1, 3, 2, 2)) assert_equal(sl.null_dist.ca.dist_samples.shape, res_perm.shape + (no_permutations, )) assert_equal(sl.ca.null_prob.shape, res_perm.shape) # just to make sure ;) ok_(np.all(sl.ca.null_prob.samples >= 0)) ok_(np.all(sl.ca.null_prob.samples <= 1)) # we should have got sums of hits across the splits assert_array_equal(np.sum(mat, axis=0), res_perm.samples[0]) def test_chi_square_searchlight(self): # only do partial to save time # Can't yet do this since test_searchlight isn't yet "under nose" #skip_if_no_external('scipy') if not externals.exists('scipy'): return from mvpa2.misc.stats import chisquare cv = CrossValidation(sample_clf_lin, NFoldPartitioner(), enable_ca=['stats']) def getconfusion(data): cv(data) return chisquare(cv.ca.stats.matrix)[0] sl = sphere_searchlight(getconfusion, radius=0, center_ids=[3, 50]) # run searchlight results = sl(self.dataset) self.assertTrue(results.nfeatures == 2) def test_1d_multispace_searchlight(self): ds = Dataset([np.arange(6)]) ds.fa['coord1'] = np.repeat(np.arange(3), 2) # add a second space to the dataset ds.fa['coord2'] = np.tile(np.arange(2), 3) measure = lambda x: "+".join([str(x) for x in x.samples[0]]) # simply select each feature once res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)), nproc=1)(ds) assert_array_equal(res.samples, [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']]) #@sweepargs(regr=regrswh[:]) @reseed_rng() def test_regression_with_additional_sa(self): regr = regrswh[:][0] ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace # Create a new sample attribute which will be used along with # every searchlight ds.sa['beh'] = np.random.normal(size=(ds.nsamples, 2)) # and now for fun -- lets create custom linar regression # targets out of some random feature and beh linearly combined rfeature = np.random.randint(ds.nfeatures) ds.sa.targets = np.dot( np.hstack((ds.sa.beh, ds.samples[:, rfeature:rfeature + 1])), np.array([0.3, 0.2, 0.3])) class CrossValidationWithBeh(CrossValidation): """An adapter for regular CV which would hstack sa.beh to the searchlighting ds""" def _call(self, ds): return CrossValidation._call( self, Dataset(np.hstack((ds, ds.sa.beh)), sa=ds.sa)) cvbeh = CrossValidationWithBeh(regr, OddEvenPartitioner(), errorfx=corr_error) # regular cv cv = CrossValidation(regr, OddEvenPartitioner(), errorfx=corr_error) slbeh = sphere_searchlight(cvbeh, radius=1) slmapbeh = slbeh(ds) sl = sphere_searchlight(cv, radius=1) slmap = sl(ds) assert_equal(slmap.shape, (2, ds.nfeatures)) # SL which had access to beh should have got for sure better # results especially in the vicinity of the chosen feature... features = sl.queryengine.query_byid(rfeature) assert_array_lequal(slmapbeh.samples[:, features], slmap.samples[:, features]) # elsewhere they should tend to be better but not guaranteed @labile(5, 1) def test_usecase_concordancesl(self): import numpy as np from mvpa2.base.dataset import vstack from mvpa2.mappers.fx import mean_sample # Take our sample 3d dataset ds1 = datasets['3dsmall'].copy(deep=True) ds1.fa['voxel_indices'] = ds1.fa.myspace ds1.sa['subject'] = [1 ] # not really necessary -- but let's for clarity ds1 = mean_sample()( ds1) # so we get just a single representative sample def corr12(ds): corr = np.corrcoef(ds.samples) assert (corr.shape == (2, 2)) # for paranoid ones return corr[0, 1] for nsc, thr, thr_mean in ((0, 1.0, 1.0), (0.1, 0.3, 0.8)): # just a bit of noise ds2 = ds1.copy(deep=True) # make a copy for the 2nd subject ds2.sa['subject'] = [2] ds2.samples += nsc * np.random.normal(size=ds1.shape) # make sure that both have the same voxel indices assert (np.all(ds1.fa.voxel_indices == ds2.fa.voxel_indices)) ds_both = vstack((ds1, ds2)) # join 2 images into a single dataset # with .sa.subject distinguishing both sl = sphere_searchlight(corr12, radius=2) slmap = sl(ds_both) ok_(np.all(slmap.samples >= thr)) ok_(np.mean(slmap.samples) >= thr) def test_swaroop_case(self): """Test hdf5 backend to pass results on Swaroop's usecase """ skip_if_no_external('h5py') from mvpa2.measures.base import Measure class sw_measure(Measure): def __init__(self): Measure.__init__(self, auto_train=True) def _call(self, dataset): # For performance measures -- increase to 50-200 # np.sum here is just to get some meaningful value in # them #return np.ones(shape=(2, 2))*np.sum(dataset) return Dataset( np.array([{ 'd': np.ones(shape=(5, 5)) * np.sum(dataset) }], dtype=object)) results = [] ds = datasets['3dsmall'].copy(deep=True) ds.fa['voxel_indices'] = ds.fa.myspace our_custom_prefix = tempfile.mktemp() for backend in ['native'] + \ (externals.exists('h5py') and ['hdf5'] or []): sl = sphere_searchlight(sw_measure(), radius=1, tmp_prefix=our_custom_prefix, results_backend=backend) t0 = time.time() results.append(np.asanyarray(sl(ds))) # print "Done for backend %s in %d sec" % (backend, time.time() - t0) # because of swaroop's ad-hoc (who only could recommend such # a construct?) use case, and absent fancy working assert_objectarray_equal # let's compare manually #assert_objectarray_equal(*results) if not externals.exists('h5py'): self.assertRaises(RuntimeError, sphere_searchlight, sw_measure(), results_backend='hdf5') raise SkipTest('h5py required for test of backend="hdf5"') assert_equal(results[0].shape, results[1].shape) results = [r.flatten() for r in results] for x, y in zip(*results): assert_equal(x.keys(), y.keys()) assert_array_equal(x['d'], y['d']) # verify that no junk is left behind tempfiles = glob.glob(our_custom_prefix + '*') assert_equal(len(tempfiles), 0) def test_nblocks(self): skip_if_no_external('pprocess') # just a basic test to see that we are getting the same # results with different nblocks ds = datasets['3dsmall'].copy(deep=True)[:, :13] ds.fa['voxel_indices'] = ds.fa.myspace cv = CrossValidation(GNB(), OddEvenPartitioner()) res1 = sphere_searchlight(cv, radius=1, nproc=2)(ds) res2 = sphere_searchlight(cv, radius=1, nproc=2, nblocks=5)(ds) assert_array_equal(res1, res2) def test_custom_results_fx_logic(self): # results_fx was introduced for the blow-up-the-memory-Swaroop # where keeping all intermediate results of the dark-magic SL # hyperalignment is not feasible. So it is desired to split # searchlight computation in more blocks while composing the # target result "on-the-fly" from available so far results. # # Implementation relies on using generators feeding the # results_fx with fresh results whenever those become # available. # # This test/example's "measure" creates files which should be # handled by the results_fx function and removed in this case # to check if we indeed have desired high number of blocks while # only limited nproc. skip_if_no_external('pprocess') tfile = tempfile.mktemp('mvpa', 'test-sl') ds = datasets['3dsmall'].copy()[:, :25] # smaller copy ds.fa['voxel_indices'] = ds.fa.myspace ds.fa['feature_id'] = np.arange(ds.nfeatures) nproc = 3 # it is not about computing -- so we will can # start more processes than possibly having CPUs just to test nblocks = nproc * 7 # figure out max number of features to be given to any proc_block # yoh: not sure why I had to +1 here... but now it became more robust and # still seems to be doing what was demanded so be it max_block = int(ceil(ds.nfeatures / float(nblocks)) + 1) def print_(s, *args): """For local debugging""" #print s, args pass def results_fx(sl=None, dataset=None, roi_ids=None, results=None): """It will "process" the results by removing those files generated inside the measure """ res = [] print_("READY") for x in results: ok_(isinstance(x, list)) res.append(x) print_("R: ", x) for r in x: # Can happen if we requested those .ca's enabled # -- then automagically _proc_block would wrap # results in a dataset... Originally detected by # running with MVPA_DEBUG=.* which triggered # enabling all ca's if is_datasetlike(r): r = np.asscalar(r.samples) os.unlink(r) # remove generated file print_("WAITING") results_ds = hstack(sum(res, [])) # store the center ids as a feature attribute since we use # them for testing results_ds.fa['center_ids'] = roi_ids return results_ds def results_postproc_fx(results): for ds in results: ds.fa['test_postproc'] = np.atleast_1d(ds.a.roi_center_ids**2) return results def measure(ds): """The "measure" will check if a run with the same "index" from previous block has been processed by now """ f = '%s+%03d' % (tfile, ds.fa.feature_id[0] % (max_block * nproc)) print_("FID:%d f:%s" % (ds.fa.feature_id[0], f)) # allow for up to few seconds to wait for the file to # disappear -- i.e. its result from previous "block" was # processed t0 = time.time() while os.path.exists(f) and time.time() - t0 < 4.: time.sleep(0.5) # so it does take time to compute the measure pass if os.path.exists(f): print_("ERROR: ", f) raise AssertionError( "File %s must have been processed by now" % f) open(f, 'w').write( 'XXX') # signal that we have computing this measure print_("RES: %s" % f) return f sl = sphere_searchlight(measure, radius=0, nproc=nproc, nblocks=nblocks, results_postproc_fx=results_postproc_fx, results_fx=results_fx, center_ids=np.arange(ds.nfeatures)) assert_equal(len(glob.glob(tfile + '*')), 0) # so no junk around try: res = sl(ds) assert_equal(res.nfeatures, ds.nfeatures) # verify that we did have results_postproc_fx called assert_array_equal(res.fa.test_postproc, np.power(res.fa.center_ids, 2)) finally: # remove those generated left-over files for f in glob.glob(tfile + '*'): os.unlink(f)
assert_equal(set(training_stats.keys()), set([('L0', 'L1'), ('L0', 'L2'), ('L1', 'L2')])) for pair, cm in training_stats.iteritems(): assert_array_equal(cm.labels, ds.UT) # we should have no predictions for absent label assert_array_equal(cm.matrix[~np.in1d(ds.UT, pair)], 0) # while altogether all samples were processed once assert_array_equal(cm.stats['P'], len(ds)) # and number of sets should be equal number of chunks here assert_equal(len(cm.sets), len(ds.UC)) # Sweep through some representative interesting classifiers @sweepargs(clf=[ LinearCSVMC(C=1), GNB(common_variance=True), ]) def test_multiclass_without_combiner_sens(clf): ds = datasets['uni3small'].copy() # do the clone since later we will compare sensitivities and need it # independently trained etc mclf = MulticlassClassifier(clf.clone(), combiner=None) # We have lots of sandwiching # Multiclass.clfs -> [BinaryClassifier] -> clf # where BinaryClassifier's estimates are binarized. # Let's also check that we are getting sensitivities correctly. # With addition of MulticlassClassifierSensitivityAnalyzer we managed to break # it and none tests picked it up, so here we will test that sensitivities # are computed and labeled correctly
def test_confusion_as_node(): from mvpa2.misc.data_generators import normal_feature_dataset from mvpa2.clfs.gnb import GNB from mvpa2.clfs.transerror import Confusion ds = normal_feature_dataset(snr=2.0, perlabel=42, nchunks=3, nonbogus_features=[0,1], nfeatures=2) clf = GNB() cv = CrossValidation( clf, NFoldPartitioner(), errorfx=None, postproc=Confusion(labels=ds.UT), enable_ca=['stats']) res = cv(ds) # needs to be identical to CA assert_array_equal(res.samples, cv.ca.stats.matrix) assert_array_equal(res.sa.predictions, ds.UT) assert_array_equal(res.fa.targets, ds.UT) skip_if_no_external('scipy') from mvpa2.clfs.transerror import BayesConfusionHypothesis from mvpa2.base.node import ChainNode # same again, but this time with Bayesian hypothesis testing at the end cv = CrossValidation( clf, NFoldPartitioner(), errorfx=None, postproc=ChainNode([Confusion(labels=ds.UT), BayesConfusionHypothesis()])) res = cv(ds) # only two possible hypothesis with two classes assert_equals(len(res), 2) # the first hypothesis is the can't discriminate anything assert_equal(len(res.sa.hypothesis[0]), 1) assert_equal(len(res.sa.hypothesis[0][0]), 2) # and the hypothesis is actually less likely than the other one # (both classes can be distinguished) assert(np.e**res.samples[0,0] < np.e**res.samples[1,0]) # Let's see how well it would work within the searchlight when we also # would like to store the hypotheses per each voxel # Somewhat an ad-hoc solution for the answer posted on the ML # # run 1d searchlight of radii 0, for that just provide a .fa with coordinates ds.fa['voxel_indices'] = [[0], [1]] # and a custom Node which would collect .sa.hypothesis to place together along # with the posterior probabilities from mvpa2.base.node import Node from mvpa2.measures.searchlight import sphere_searchlight class KeepBothPosteriorAndHypothesis(Node): def _call(self, ds): out = np.zeros(1, dtype=object) out[0] = (ds.samples, ds.sa.hypothesis) return out cv.postproc.append(KeepBothPosteriorAndHypothesis()) sl = sphere_searchlight(cv, radius=0, nproc=1) res = sl(ds) assert_equal(res.shape, (1, 2)) assert_equal(len(res.samples[0,0]), 2) assert_equal(res.samples[0,0][0].shape, (2, 2)) # posteriors per 1st SL assert_equal(len(res.samples[0,0][1]), 2) # 2 of hypotheses
def test_gnb(self): gnb = GNB() gnb_nc = GNB(common_variance=False) gnb_n = GNB(normalize=True) gnb_n_nc = GNB(normalize=True, common_variance=False) gnb_lin = GNB(common_variance=True) ds = datasets['uni2medium'] # Store probabilities for further comparison probabilities = {} # Generic silly coverage just to assure that it works in all # possible scenarios: bools = (True, False) # There should be better way... heh for cv in bools: # common_variance? for prior in ('uniform', 'laplacian_smoothing', 'ratio'): tp = None # predictions -- all above should # result in the same predictions for n in bools: # normalized? for ls in bools: # logspace? for es in ((), ('estimates')): gnb_ = GNB(common_variance=cv, prior=prior, normalize=n, logprob=ls, enable_ca=es) tm = TransferMeasure(gnb_, Splitter('train')) predictions = tm(ds).samples[:, 0] if tp is None: tp = predictions assert_array_equal(predictions, tp) # if normalized -- check if estimates are such if n and 'estimates' in es: v = gnb_.ca.estimates if ls: # in log space -- take exp ;) v = np.exp(v) d1 = np.sum(v, axis=1) - 1.0 self.assertTrue(np.max(np.abs(d1)) < 1e-5) probabilities[repr(gnb_)] = v # smoke test to see whether invocation of sensitivity analyser blows # if gnb classifier isn't linear, and to see whether it doesn't blow # when it is linear. if cv: assert 'has_sensitivity' in gnb_.__tags__ gnb_.get_sensitivity_analyzer() if not cv: with self.assertRaises(NotImplementedError): gnb_.get_sensitivity_analyzer() # Verify that probabilities are identical when we use logprob or not assert_array_almost_equal( probabilities[ "GNB(space='targets', normalize=True, logprob=False)"], probabilities["GNB(space='targets', normalize=True)"]) assert_array_almost_equal( probabilities[ "GNB(space='targets', normalize=True, logprob=False, prior='uniform')"], probabilities[ "GNB(space='targets', normalize=True, prior='uniform')"])