def test_adhocsearchlight_perm_testing(self): # just a smoke test pretty much ds = datasets['3dmedium'].copy() #ds.samples += np.random.normal(size=ds.samples.shape)*10 ds.fa['voxel_indices'] = ds.fa.myspace from mvpa2.mappers.fx import mean_sample from mvpa2.clfs.stats import MCNullDist permutator = AttributePermutator('targets', count=8, limit='chunks') distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) slargs = (kNN(1), NFoldPartitioner(0.5, selection_strategy='random', count=9)) slkwargs = dict(radius=1, postproc=mean_sample()) sl_nodistr = sphere_m1nnsearchlight(*slargs, **slkwargs) skip_if_no_external('scipy') # needed for null_t sl = sphere_m1nnsearchlight( *slargs, null_dist=distr_est, enable_ca=['null_t'], reuse_neighbors=True, **slkwargs ) mvpa2.seed() res_nodistr = sl_nodistr(ds) mvpa2.seed() res = sl(ds) # verify that we at least got the same main result # ah (yoh) -- null dist is estimated before the main # estimate so we can't guarantee correspondence :-/ # assert_array_equal(res_nodistr, res) # only resemblance (TODO, may be we want to get/setstate # for rng before null_dist.fit?) # and dimensions correspond assert_array_equal(distr_est.ca.dist_samples.shape, (1, ds.nfeatures, 8)) assert_array_equal(sl.ca.null_t.samples.shape, (1, ds.nfeatures))
def test_adhocsearchlight_perm_testing(self): # just a smoke test pretty much ds = datasets['3dmedium'].copy() #ds.samples += np.random.normal(size=ds.samples.shape)*10 ds.fa['voxel_indices'] = ds.fa.myspace from mvpa2.mappers.fx import mean_sample from mvpa2.clfs.stats import MCNullDist permutator = AttributePermutator('targets', count=8, limit='chunks') distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) slargs = (kNN(1), NFoldPartitioner(0.5, selection_strategy='random', count=9)) slkwargs = dict(radius=1, postproc=mean_sample()) sl_nodistr = sphere_m1nnsearchlight(*slargs, **slkwargs) skip_if_no_external('scipy') # needed for null_t sl = sphere_m1nnsearchlight(*slargs, null_dist=distr_est, enable_ca=['null_t'], reuse_neighbors=True, **slkwargs) mvpa2.seed() res_nodistr = sl_nodistr(ds) mvpa2.seed() res = sl(ds) # verify that we at least got the same main result # ah (yoh) -- null dist is estimated before the main # estimate so we can't guarantee correspondence :-/ # assert_array_equal(res_nodistr, res) # only resemblance (TODO, may be we want to get/setstate # for rng before null_dist.fit?) # and dimensions correspond assert_array_equal(distr_est.ca.dist_samples.shape, (1, ds.nfeatures, 8)) assert_array_equal(sl.ca.null_t.samples.shape, (1, ds.nfeatures))
TODO """ import numpy as np """ """ import mvpa2 from mvpa2.base import cfg from mvpa2.misc.data_generators import * from mvpa2.clfs.knn import kNN from mvpa2.misc.plot import * mvpa2.seed(0) # to reproduce the plot dataset_kwargs = dict(nfeatures=2, nchunks=10, snr=2, nlabels=4, means=[[0, 1], [1, 0], [1, 1], [0, 0]]) dataset_train = normal_feature_dataset(**dataset_kwargs) dataset_plot = normal_feature_dataset(**dataset_kwargs) # make a new figure pl.figure(figsize=(9, 9)) for i, k in enumerate((1, 3, 9, 20)): knn = kNN(k)
def test_gnbsearchlight_permutations(): import mvpa2 from mvpa2.base.node import ChainNode from mvpa2.clfs.gnb import GNB from mvpa2.generators.base import Repeater from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner #import mvpa2.generators.permutation #reload(mvpa2.generators.permutation) from mvpa2.generators.permutation import AttributePermutator from mvpa2.testing.datasets import datasets from mvpa2.measures.base import CrossValidation from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.mappers.fx import mean_sample from mvpa2.misc.errorfx import mean_mismatch_error from mvpa2.clfs.stats import MCNullDist from mvpa2.testing.tools import assert_raises, ok_, assert_array_less # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM'] # mvpa2.debug.metrics += ['pid'] count = 10 nproc = 1 + int(mvpa2.externals.exists('pprocess')) ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace slkwargs = dict(radius=3, space='voxel_indices', enable_ca=['roi_sizes'], center_ids=[1, 10, 70, 100]) mvpa2.seed(mvpa2._random_seed) clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) distr_est = MCNullDist(repeater, tail='left', measure=null_sl, enable_ca=['dist_samples']) sl = sphere_gnbsearchlight(clf, splt, reuse_neighbors=True, null_dist=distr_est, postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) if __debug__: # assert is done only without -O mode assert_raises(NotImplementedError, sl, ds) # "ad-hoc searchlights can't handle yet varying targets across partitions" if False: # after above limitation is removed -- enable sl_map = sl(ds) sl_null_prob = sl.ca.null_prob.samples.copy() mvpa2.seed(mvpa2._random_seed) ### 'normal' Searchlight clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state # would be reused across all pprocesses null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample()) null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs) distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal, enable_ca=['dist_samples']) cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error, enable_ca=['stats'], postproc=mean_sample() ) sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs) sl_map_normal = sl(ds) sl_null_prob_normal = sl.ca.null_prob.samples.copy() # For every feature -- we should get some variance in estimates In # case of failure they are all really close to each other (up to # numerical precision), so variance will be close to 0 assert_array_less(-np.var(distr_est_normal.ca.dist_samples.samples[0], axis=1), -1e-5) for s in distr_est_normal.ca.dist_samples.samples[0]: ok_(len(np.unique(s)) > 1)
def test_spatial_searchlight(self, lrn_sllrn_SL_partitioner, do_roi=False, results_backend='native'): """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight) Test of and adhoc searchlight anyways requires a ground-truth comparison to the generic version, so we are doing sweepargs here """ lrn, sllrn, SL, partitioner, correction = lrn_sllrn_SL_partitioner ## if results_backend == 'hdf5' and not common_variance: ## # no need for full combination of all possible arguments here ## return if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \ and isinstance(lrn, ChainMapper): raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper (M1NN here)") # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn # "learner" we must use a chainmapper atm if sllrn is None: sllrn = lrn ds = datasets['3dsmall'].copy() # Let's test multiclass here, so boost # of labels ds[6:18].T += 2 ds.fa['voxel_indices'] = ds.fa.myspace # To assure that users do not run into incorrect operation due to overflows ds.samples += 5000 ds.samples *= 1000 ds.samples = ds.samples.astype(np.int16) # compute N-1 cross-validation for each sphere # YOH: unfortunately sample_clf_lin is not guaranteed # to provide exactly the same results due to inherent # iterative process. Therefore lets use something quick # and pure Python cv = CrossValidation(lrn, partitioner) skwargs = dict( radius=1, enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids']) if do_roi: # select some random set of features nroi = rnd.randint(1, ds.nfeatures) # and lets compute the full one as well once again so we have a reference # which will be excluded itself from comparisons but values will be compared # for selected roi_id sl_all = SL(sllrn, partitioner, **skwargs) result_all = sl_all(ds) # select random features roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi] skwargs['center_ids'] = roi_ids else: nroi = ds.nfeatures roi_ids = np.arange(nroi) result_all = None if results_backend == 'hdf5': skip_if_no_external('h5py') sls = [ sphere_searchlight(cv, results_backend=results_backend, **skwargs), #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1)) SL(sllrn, partitioner, indexsum='fancy', **skwargs) ] if externals.exists('scipy'): sls += [SL(sllrn, partitioner, indexsum='sparse', **skwargs)] # Test nproc just once if externals.exists('pprocess') and not self._tested_pprocess: sls += [sphere_searchlight(cv, nproc=2, **skwargs)] self._tested_pprocess = True # Provide the dataset and all those searchlights for testing #self._test_searchlights(ds, sls, roi_ids, result_all) #nroi = len(roi_ids) #do_roi = nroi != ds.nfeatures all_results = [] for sl in sls: # run searchlight mvpa2.seed() # reseed rng again for m1nnsl results = sl(ds) all_results.append(results) #print `sl` # check for correct number of spheres self.assertTrue(results.nfeatures == nroi) # and measures (one per xfold) if partitioner.cvtype == 1: self.assertTrue(len(results) == len(ds.UC)) elif partitioner.cvtype == 0.5: # here we had 4 unique chunks, so 6 combinations # even though 20 max was specified for NFold self.assertTrue(len(results) == 6) else: raise RuntimeError("Unknown yet type of partitioner to check") # check for chance-level performance across all spheres # makes sense only if number of features was big enough # to get some stable estimate of mean if not do_roi or nroi > 20: # correction here is for M1NN class which has wider distribution self.assertTrue(0.67 - correction < results.samples.mean() < 0.85 + correction, msg="Out of range mean result: " "lrn: %s sllrn: %s NROI: %d MEAN: %.3f" % ( lrn, sllrn, nroi, results.samples.mean(), )) mean_errors = results.samples.mean(axis=0) # that we do get different errors ;) self.assertTrue(len(np.unique(mean_errors) > 3)) # check resonable sphere sizes self.assertTrue(len(sl.ca.roi_sizes) == nroi) self.assertTrue(len(sl.ca.roi_feature_ids) == nroi) for i, fids in enumerate(sl.ca.roi_feature_ids): self.assertTrue(len(fids) == sl.ca.roi_sizes[i]) if do_roi: # for roi we should relax conditions a bit self.assertTrue(max(sl.ca.roi_sizes) <= 7) self.assertTrue(min(sl.ca.roi_sizes) >= 4) else: self.assertTrue(max(sl.ca.roi_sizes) == 7) self.assertTrue(min(sl.ca.roi_sizes) == 4) # check base-class state self.assertEqual(sl.ca.raw_results.nfeatures, nroi) # Test if we got results correctly for 'selected' roi ids if do_roi: assert_array_equal(result_all[:, roi_ids], results) if len(all_results) > 1: # if we had multiple searchlights, we can check either they all # gave the same result (they should have) aresults = np.array([a.samples for a in all_results]) dresults = np.abs(aresults - aresults.mean(axis=0)) dmax = np.max(dresults) self.assertTrue(dmax <= 1e-13) # Test the searchlight's reuse of neighbors for indexsum in ['fancy'] + (externals.exists('scipy') and ['sparse'] or []): sl = SL(sllrn, partitioner, indexsum='fancy', reuse_neighbors=True, **skwargs) mvpa2.seed() result1 = sl(ds) mvpa2.seed() result2 = sl(ds) # must be faster assert_array_equal(result1, result2)
def test_spatial_searchlight(self, lrn_sllrn_SL_partitioner, do_roi=False, results_backend='native'): """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight) Test of and adhoc searchlight anyways requires a ground-truth comparison to the generic version, so we are doing sweepargs here """ lrn, sllrn, SL, partitioner, correction = lrn_sllrn_SL_partitioner ## if results_backend == 'hdf5' and not common_variance: ## # no need for full combination of all possible arguments here ## return if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \ and isinstance(lrn, ChainMapper): raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper (M1NN here)") # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn # "learner" we must use a chainmapper atm if sllrn is None: sllrn = lrn ds = datasets['3dsmall'].copy() # Let's test multiclass here, so boost # of labels ds[6:18].T += 2 ds.fa['voxel_indices'] = ds.fa.myspace # To assure that users do not run into incorrect operation due to overflows ds.samples += 5000 ds.samples *= 1000 ds.samples = ds.samples.astype(np.int16) # compute N-1 cross-validation for each sphere # YOH: unfortunately sample_clf_lin is not guaranteed # to provide exactly the same results due to inherent # iterative process. Therefore lets use something quick # and pure Python cv = CrossValidation(lrn, partitioner) skwargs = dict(radius=1, enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids']) if do_roi: # select some random set of features nroi = rnd.randint(1, ds.nfeatures) # and lets compute the full one as well once again so we have a reference # which will be excluded itself from comparisons but values will be compared # for selected roi_id sl_all = SL(sllrn, partitioner, **skwargs) result_all = sl_all(ds) # select random features roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi] skwargs['center_ids'] = roi_ids else: nroi = ds.nfeatures roi_ids = np.arange(nroi) result_all = None if results_backend == 'hdf5': skip_if_no_external('h5py') sls = [sphere_searchlight(cv, results_backend=results_backend, **skwargs), #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1)) SL(sllrn, partitioner, indexsum='fancy', **skwargs) ] if externals.exists('scipy'): sls += [ SL(sllrn, partitioner, indexsum='sparse', **skwargs)] # Test nproc just once if externals.exists('pprocess') and not self._tested_pprocess: sls += [sphere_searchlight(cv, nproc=2, **skwargs)] self._tested_pprocess = True # Provide the dataset and all those searchlights for testing #self._test_searchlights(ds, sls, roi_ids, result_all) #nroi = len(roi_ids) #do_roi = nroi != ds.nfeatures all_results = [] for sl in sls: # run searchlight mvpa2.seed() # reseed rng again for m1nnsl results = sl(ds) all_results.append(results) #print `sl` # check for correct number of spheres self.assertTrue(results.nfeatures == nroi) # and measures (one per xfold) if partitioner.cvtype == 1: self.assertTrue(len(results) == len(ds.UC)) elif partitioner.cvtype == 0.5: # here we had 4 unique chunks, so 6 combinations # even though 20 max was specified for NFold self.assertTrue(len(results) == 6) else: raise RuntimeError("Unknown yet type of partitioner to check") # check for chance-level performance across all spheres # makes sense only if number of features was big enough # to get some stable estimate of mean if not do_roi or nroi > 20: # correction here is for M1NN class which has wider distribution self.assertTrue( 0.68 - correction < results.samples.mean() < 0.84 + correction, msg="Out of range mean result: " "lrn: %s sllrn: %s NROI: %d MEAN: %.3f" % (lrn, sllrn, nroi, results.samples.mean(),)) mean_errors = results.samples.mean(axis=0) # that we do get different errors ;) self.assertTrue(len(np.unique(mean_errors) > 3)) # check resonable sphere sizes self.assertTrue(len(sl.ca.roi_sizes) == nroi) self.assertTrue(len(sl.ca.roi_feature_ids) == nroi) for i, fids in enumerate(sl.ca.roi_feature_ids): self.assertTrue(len(fids) == sl.ca.roi_sizes[i]) if do_roi: # for roi we should relax conditions a bit self.assertTrue(max(sl.ca.roi_sizes) <= 7) self.assertTrue(min(sl.ca.roi_sizes) >= 4) else: self.assertTrue(max(sl.ca.roi_sizes) == 7) self.assertTrue(min(sl.ca.roi_sizes) == 4) # check base-class state self.assertEqual(sl.ca.raw_results.nfeatures, nroi) # Test if we got results correctly for 'selected' roi ids if do_roi: assert_array_equal(result_all[:, roi_ids], results) if len(all_results) > 1: # if we had multiple searchlights, we can check either they all # gave the same result (they should have) aresults = np.array([a.samples for a in all_results]) dresults = np.abs(aresults - aresults.mean(axis=0)) dmax = np.max(dresults) self.assertTrue(dmax <= 1e-13) # Test the searchlight's reuse of neighbors for indexsum in ['fancy'] + ( externals.exists('scipy') and ['sparse'] or []): sl = SL(sllrn, partitioner, indexsum='fancy', reuse_neighbors=True, **skwargs) mvpa2.seed() result1 = sl(ds) mvpa2.seed() result2 = sl(ds) # must be faster assert_array_equal(result1, result2)
""" import mvpa2 import pylab as pl import numpy as np from mvpa2.misc.data_generators import normal_feature_dataset from mvpa2.clfs.svm import LinearCSVMC from mvpa2.generators.partition import NFoldPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.mappers.zscore import zscore """ Generate a binary dataset without any signal (snr=0). """ mvpa2.seed(1) ds_noise = normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=2, snr=0, nonbogus_features=[0, 1]) # signal levels sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0] """ To mimic behavior of hard-margin SVM whenever classes become separable, which is easier to comprehend, we are intentionally setting very high C value. """
def _proc_block(self, block, ds, measure, seed=None, iblock='main'): """Little helper to capture the parts of the computation that can be parallelized Parameters ---------- seed RNG seed. Should be provided e.g. in child process invocations to guarantee that they all seed differently to not keep generating the same sequencies due to reusing the same copy of numpy's RNG block Critical for generating non-colliding temp filenames in case of hdf5 backend. Otherwise RNGs of different processes might collide in their temporary file names leading to problems. """ if seed is not None: mvpa2.seed(seed) if __debug__: debug_slc_ = 'SLC_' in debug.active debug('SLC', "Starting computing block for %i elements" % len(block)) results = [] store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids') store_roi_sizes = self.ca.is_enabled('roi_sizes') store_roi_center_ids = self.ca.is_enabled('roi_center_ids') assure_dataset = any([store_roi_feature_ids, store_roi_sizes, store_roi_center_ids]) # put rois around all features in the dataset and compute the # measure within them for i, f in enumerate(block): # retrieve the feature ids of all features in the ROI from the query # engine roi_specs = self._queryengine[f] if __debug__ and debug_slc_: debug('SLC_', 'For %r query returned roi_specs %r' % (f, roi_specs)) if is_datasetlike(roi_specs): # TODO: unittest assert(len(roi_specs) == 1) roi_fids = roi_specs.samples[0] else: roi_fids = roi_specs # slice the dataset roi = ds[:, roi_fids] if is_datasetlike(roi_specs): for n, v in roi_specs.fa.iteritems(): roi.fa[n] = v if self.__add_center_fa: # add fa to indicate ROI seed if requested roi_seed = np.zeros(roi.nfeatures, dtype='bool') if f in roi_fids: roi_seed[roi_fids.index(f)] = True else: warning("Center feature attribute id %s not found" % f) roi.fa[self.__add_center_fa] = roi_seed # compute the datameasure and store in results res = measure(roi) if assure_dataset and not is_datasetlike(res): res = Dataset(np.atleast_1d(res)) if store_roi_feature_ids: # add roi feature ids to intermediate result dataset for later # aggregation res.a['roi_feature_ids'] = roi_fids if store_roi_sizes: res.a['roi_sizes'] = roi.nfeatures if store_roi_center_ids: res.a['roi_center_ids'] = f results.append(res) if __debug__: debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \ % (len(block), f + 1, roi.nfeatures, float(i + 1) / len(block) * 100,), cr=True) if self.results_postproc_fx: if __debug__: debug('SLC', "Post-processing %d results in proc_block using %s" % (len(results), self.results_postproc_fx)) results = self.results_postproc_fx(results) if self.results_backend == 'native': pass # nothing special elif self.results_backend == 'hdf5': # store results in a temporary file and return a filename results_file = tempfile.mktemp(prefix=self.tmp_prefix, suffix='-%s.hdf5' % iblock) if __debug__: debug('SLC', "Storing results into %s" % results_file) h5save(results_file, results) if __debug__: debug('SLC_', "Results stored") results = results_file else: raise RuntimeError("Must not reach this point") return results
def newfunc(*arg, **kwargs): mvpa2.seed(mvpa2._random_seed) return func(*arg, **kwargs)
def _proc_block(self, block, datasets, featselhyper, queryengines, seed=None, iblock='main'): if seed is not None: mvpa2.seed(seed) if __debug__: debug('SLC', 'Starting computing block for %i elements' % len(block)) bar = ProgressBar() projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=self.params.dtype) for isub in range(self.ndatasets) ] for i, node_id in enumerate(block): # retrieve the feature ids of all features in the ROI from the query # engine # Find the neighborhood for that selected nearest node roi_feature_ids_all = [qe[node_id] for qe in queryengines] # handling queryengines that return AttrDatasets for isub in range(len(roi_feature_ids_all)): if is_datasetlike(roi_feature_ids_all[isub]): # making sure queryengine returned proper shaped output assert (roi_feature_ids_all[isub].nsamples == 1) roi_feature_ids_all[isub] = roi_feature_ids_all[ isub].samples[0, :].tolist() if len(roi_feature_ids_all) == 1: # just one was provided to be "broadcasted" roi_feature_ids_all *= len(datasets) # if qe returns zero-sized ROI for any subject, pass... if any(len(x) == 0 for x in roi_feature_ids_all): continue # selecting neighborhood for all subject for hyperalignment ds_temp = [ sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all) ] if self.force_roi_seed: roi_seed = np.array( roi_feature_ids_all[self.params.ref_ds]) == node_id ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed if __debug__: msg = 'ROI (%i/%i), %i features' % ( i + 1, len(block), ds_temp[self.params.ref_ds].nfeatures) debug('SLC', bar(float(i + 1) / len(block), msg), cr=True) hmappers = featselhyper(ds_temp) assert (len(hmappers) == len(datasets)) roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds] for isub, roi_feature_ids in enumerate(roi_feature_ids_all): if not self.params.combine_neighbormappers: I = roi_feature_ids #J = [roi_feature_ids[node_id]] * len(roi_feature_ids) J = [node_id] * len(roi_feature_ids) V = hmappers[isub].tolist() if np.isscalar(V): V = [V] else: I, J, V = [], [], [] for f2, roi_feature_id_ref_ds in enumerate( roi_feature_ids_ref_ds): I += roi_feature_ids J += [roi_feature_id_ref_ds] * len(roi_feature_ids) V += hmappers[isub][:, f2].tolist() proj = coo_matrix( (V, (I, J)), shape=(max(self.nfeatures, max(I) + 1), max(self.nfeatures, max(J) + 1)), dtype=self.params.dtype) proj = proj.tocsc() # Cleaning up the current subject's projections to free up memory hmappers[isub] = [[] for _ in hmappers] projections[isub] = projections[isub] + proj if self.params.results_backend == 'native': return projections elif self.params.results_backend == 'hdf5': # store results in a temporary file and return a filename results_file = mktemp(prefix=self.params.tmp_prefix, suffix='-%s.hdf5' % iblock) if __debug__: debug('SLC', "Storing results into %s" % results_file) h5save(results_file, projections) if __debug__: debug('SLC_', "Results stored") return results_file else: raise RuntimeError("Must not reach this point")
This brief examples provides a demonstration. """ import mvpa2 import pylab as pl import numpy as np from mvpa2.misc.data_generators import normal_feature_dataset from mvpa2.clfs.svm import LinearCSVMC from mvpa2.generators.partition import NFoldPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.mappers.zscore import zscore """ Generate a binary dataset without any signal (snr=0). """ mvpa2.seed(1); ds_noise = normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=2, snr=0, nonbogus_features=[0,1]) # signal levels sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0] """ To mimic behavior of hard-margin SVM whenever classes become separable, which is easier to comprehend, we are intentionally setting very high C value. """ clf = LinearCSVMC(C=1000, enable_ca=['training_stats'])
def _proc_block(self, block, datasets, featselhyper, queryengines, seed=None, iblock='main'): if seed is not None: mvpa2.seed(seed) if __debug__: debug('SLC', 'Starting computing block for %i elements' % len(block)) bar = ProgressBar() projections = [csc_matrix((self.nfeatures, self.nfeatures), dtype=self.params.dtype) for isub in range(self.ndatasets)] for i, node_id in enumerate(block): # retrieve the feature ids of all features in the ROI from the query # engine # Find the neighborhood for that selected nearest node roi_feature_ids_all = [qe[node_id] for qe in queryengines] # handling queryengines that return AttrDatasets for isub in range(len(roi_feature_ids_all)): if is_datasetlike(roi_feature_ids_all[isub]): # making sure queryengine returned proper shaped output assert(roi_feature_ids_all[isub].nsamples == 1) roi_feature_ids_all[isub] = roi_feature_ids_all[isub].samples[0, :].tolist() if len(roi_feature_ids_all) == 1: # just one was provided to be "broadcasted" roi_feature_ids_all *= len(datasets) # if qe returns zero-sized ROI for any subject, pass... if any(len(x)==0 for x in roi_feature_ids_all): continue # selecting neighborhood for all subject for hyperalignment ds_temp = [sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)] if self.force_roi_seed: roi_seed = np.array(roi_feature_ids_all[self.params.ref_ds]) == node_id ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed if __debug__: msg = 'ROI (%i/%i), %i features' % (i + 1, len(block), ds_temp[self.params.ref_ds].nfeatures) debug('SLC', bar(float(i + 1) / len(block), msg), cr=True) hmappers = featselhyper(ds_temp) assert(len(hmappers) == len(datasets)) roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds] for isub, roi_feature_ids in enumerate(roi_feature_ids_all): if not self.params.combine_neighbormappers: I = roi_feature_ids #J = [roi_feature_ids[node_id]] * len(roi_feature_ids) J = [node_id] * len(roi_feature_ids) V = hmappers[isub].tolist() if np.isscalar(V): V = [V] else: I, J, V = [], [], [] for f2, roi_feature_id_ref_ds in enumerate(roi_feature_ids_ref_ds): I += roi_feature_ids J += [roi_feature_id_ref_ds] * len(roi_feature_ids) V += hmappers[isub][:, f2].tolist() proj = coo_matrix( (V, (I, J)), shape=(max(self.nfeatures, max(I) + 1), max(self.nfeatures, max(J) + 1)), dtype=self.params.dtype) proj = proj.tocsc() # Cleaning up the current subject's projections to free up memory hmappers[isub] = [[] for _ in hmappers] projections[isub] = projections[isub] + proj if self.params.results_backend == 'native': return projections elif self.params.results_backend == 'hdf5': # store results in a temporary file and return a filename results_file = mktemp(prefix=self.params.tmp_prefix, suffix='-%s.hdf5' % iblock) if __debug__: debug('SLC', "Storing results into %s" % results_file) h5save(results_file, projections) if __debug__: debug('SLC_', "Results stored") return results_file else: raise RuntimeError("Must not reach this point")
def _proc_block_inplace(self, block, ds, measure, seed=None, iblock='main'): """Little helper to capture the parts of the computation that can be parallelized. This method preallocates the output of the block, reducing the number of elementes to be hstacked down the processing line. Parameters ---------- seed RNG seed. Should be provided e.g. in child process invocations to guarantee that they all seed differently to not keep generating the same sequencies due to reusing the same copy of numpy's RNG block Critical for generating non-colliding temp filenames in case of hdf5 backend. Otherwise RNGs of different processes might collide in their temporary file names leading to problems. """ if seed is not None: mvpa2.seed(seed) if __debug__: debug('SLC', "Starting computing block for %i elements" % len(block)) store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids') store_roi_sizes = self.ca.is_enabled('roi_sizes') store_roi_center_ids = self.ca.is_enabled('roi_center_ids') assure_dataset = any( [store_roi_feature_ids, store_roi_sizes, store_roi_center_ids]) # compute first result in block to get estimate of output if __debug__: debug('SLC', "Computing measure for first ROI to preallocate " "output") first_res, roi = self.__process_roi(ds, block[0], measure, assure_dataset) nsamples, nfeatures = first_res.shape results = np.empty((nsamples, nfeatures * len(block)), dtype=first_res.samples.dtype) if __debug__: debug('SLC', "Preallocated ouput of shape %s" % str(results.shape)) results[:, :nfeatures] = first_res.samples start = nfeatures step = nfeatures # put rois around all features in the dataset and compute the # measure within them bar = ProgressBar() # initialize dictionaries to store fa and a fa = defaultdict(list) for first_res_fa in first_res.fa: val = first_res.fa[first_res_fa].value if isinstance(val, list): adder = fa[first_res_fa].extend else: adder = fa[first_res_fa].append adder(val) a = defaultdict(list) for first_res_a in first_res.a: val = first_res.a[first_res_a].value if first_res_a != 'roi_feature_ids' and isinstance(val, list): adder = a[first_res_a].extend else: adder = a[first_res_a].append adder(val) for i, f in enumerate(block[1:]): res, roi = self.__process_roi(ds, f, measure, assure_dataset) if store_roi_feature_ids: # add roi feature ids to intermediate result dataset for later # aggregation a['roi_feature_ids'].append(res.a.roi_feature_ids) if store_roi_sizes: a['roi_sizes'].append(roi.nfeatures) if store_roi_center_ids: a['roi_center_ids'].append(f) # store results inplace end = start + step results[:, start:end] = res.samples start = end if __debug__: msg = 'ROI %i (%i/%i), %i features' % \ (f + 1, i + 1, len(block), roi.nfeatures) debug('SLC', bar(float(i + 1) / len(block), msg), cr=True) if __debug__: # just to get to new line debug('SLC', '') # now make it a dataset and a list to make it compatible with the rest results = [Dataset(results, sa=first_res.sa, a=dict(a), fa=dict(fa))] if self.results_postproc_fx: if __debug__: debug( 'SLC', "Post-processing %d results in proc_block using %s" % (len(results), self.results_postproc_fx)) results = self.results_postproc_fx(results) if self.results_backend == 'native': pass # nothing special elif self.results_backend == 'hdf5': # store results in a temporary file and return a filename results_file = tempfile.mktemp(prefix=self.tmp_prefix, suffix='-%s.hdf5' % iblock) if __debug__: debug('SLC', "Storing results into %s" % results_file) h5save(results_file, results) if __debug__: debug('SLC_', "Results stored") results = results_file else: raise RuntimeError("Must not reach this point") return results
def _proc_block(self, block, ds, measure, seed=None, iblock='main'): """Little helper to capture the parts of the computation that can be parallelized Parameters ---------- seed RNG seed. Should be provided e.g. in child process invocations to guarantee that they all seed differently to not keep generating the same sequencies due to reusing the same copy of numpy's RNG block Critical for generating non-colliding temp filenames in case of hdf5 backend. Otherwise RNGs of different processes might collide in their temporary file names leading to problems. """ if seed is not None: mvpa2.seed(seed) if __debug__: debug('SLC', "Starting computing block for %i elements" % len(block)) start_time = time.time() results = [] store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids') store_roi_sizes = self.ca.is_enabled('roi_sizes') store_roi_center_ids = self.ca.is_enabled('roi_center_ids') assure_dataset = any( [store_roi_feature_ids, store_roi_sizes, store_roi_center_ids]) # put rois around all features in the dataset and compute the # measure within them bar = ProgressBar() for i, f in enumerate(block): res, roi = self.__process_roi(ds, f, measure, assure_dataset) results.append(res) if __debug__: msg = 'ROI %i (%i/%i), %i features' % \ (f + 1, i + 1, len(block), roi.nfeatures) debug('SLC', bar(float(i + 1) / len(block), msg), cr=True) if __debug__: # just to get to new line debug('SLC', '') if self.results_postproc_fx: if __debug__: debug( 'SLC', "Post-processing %d results in proc_block using %s" % (len(results), self.results_postproc_fx)) results = self.results_postproc_fx(results) if self.results_backend == 'native': pass # nothing special elif self.results_backend == 'hdf5': # store results in a temporary file and return a filename results_file = tempfile.mktemp(prefix=self.tmp_prefix, suffix='-%s.hdf5' % iblock) if __debug__: debug('SLC', "Storing results into %s" % results_file) h5save(results_file, results) if __debug__: debug('SLC_', "Results stored") results = results_file else: raise RuntimeError("Must not reach this point") return results
""" import numpy as np """ """ import mvpa2 from mvpa2.base import cfg from mvpa2.misc.data_generators import * from mvpa2.clfs.knn import kNN from mvpa2.misc.plot import * mvpa2.seed(0) # to reproduce the plot dataset_kwargs = dict(nfeatures=2, nchunks=10, snr=2, nlabels=4, means=[ [0,1], [1,0], [1,1], [0,0] ]) dataset_train = normal_feature_dataset(**dataset_kwargs) dataset_plot = normal_feature_dataset(**dataset_kwargs) # make a new figure pl.figure(figsize=(9, 9)) for i,k in enumerate((1, 3, 9, 20)): knn = kNN(k) print "Processing kNN(%i) problem..." % k