Ejemplo n.º 1
0
    def test_adhocsearchlight_perm_testing(self):
        # just a smoke test pretty much
        ds = datasets['3dmedium'].copy()
        #ds.samples += np.random.normal(size=ds.samples.shape)*10
        ds.fa['voxel_indices'] = ds.fa.myspace
        from mvpa2.mappers.fx import mean_sample
        from mvpa2.clfs.stats import MCNullDist
        permutator = AttributePermutator('targets', count=8,
                                         limit='chunks')
        distr_est = MCNullDist(permutator, tail='left',
                               enable_ca=['dist_samples'])
        slargs = (kNN(1),
                  NFoldPartitioner(0.5,
                                   selection_strategy='random',
                                   count=9))
        slkwargs = dict(radius=1, postproc=mean_sample())

        sl_nodistr = sphere_m1nnsearchlight(*slargs, **slkwargs)
        skip_if_no_external('scipy')    # needed for null_t
        sl = sphere_m1nnsearchlight(
            *slargs,
            null_dist=distr_est,
            enable_ca=['null_t'],
            reuse_neighbors=True,
            **slkwargs
            )
        mvpa2.seed()
        res_nodistr = sl_nodistr(ds)
        mvpa2.seed()
        res = sl(ds)
        # verify that we at least got the same main result
        # ah (yoh) -- null dist is estimated before the main
        # estimate so we can't guarantee correspondence :-/
        # assert_array_equal(res_nodistr, res)
        # only resemblance (TODO, may be we want to get/setstate
        # for rng before null_dist.fit?)

        # and dimensions correspond
        assert_array_equal(distr_est.ca.dist_samples.shape,
                           (1, ds.nfeatures, 8))
        assert_array_equal(sl.ca.null_t.samples.shape,
                           (1, ds.nfeatures))
Ejemplo n.º 2
0
    def test_adhocsearchlight_perm_testing(self):
        # just a smoke test pretty much
        ds = datasets['3dmedium'].copy()
        #ds.samples += np.random.normal(size=ds.samples.shape)*10
        ds.fa['voxel_indices'] = ds.fa.myspace
        from mvpa2.mappers.fx import mean_sample
        from mvpa2.clfs.stats import MCNullDist
        permutator = AttributePermutator('targets', count=8, limit='chunks')
        distr_est = MCNullDist(permutator,
                               tail='left',
                               enable_ca=['dist_samples'])
        slargs = (kNN(1),
                  NFoldPartitioner(0.5, selection_strategy='random', count=9))
        slkwargs = dict(radius=1, postproc=mean_sample())

        sl_nodistr = sphere_m1nnsearchlight(*slargs, **slkwargs)
        skip_if_no_external('scipy')  # needed for null_t
        sl = sphere_m1nnsearchlight(*slargs,
                                    null_dist=distr_est,
                                    enable_ca=['null_t'],
                                    reuse_neighbors=True,
                                    **slkwargs)
        mvpa2.seed()
        res_nodistr = sl_nodistr(ds)
        mvpa2.seed()
        res = sl(ds)
        # verify that we at least got the same main result
        # ah (yoh) -- null dist is estimated before the main
        # estimate so we can't guarantee correspondence :-/
        # assert_array_equal(res_nodistr, res)
        # only resemblance (TODO, may be we want to get/setstate
        # for rng before null_dist.fit?)

        # and dimensions correspond
        assert_array_equal(distr_est.ca.dist_samples.shape,
                           (1, ds.nfeatures, 8))
        assert_array_equal(sl.ca.null_t.samples.shape, (1, ds.nfeatures))
Ejemplo n.º 3
0
TODO

"""

import numpy as np
"""

"""

import mvpa2
from mvpa2.base import cfg
from mvpa2.misc.data_generators import *
from mvpa2.clfs.knn import kNN
from mvpa2.misc.plot import *

mvpa2.seed(0)  # to reproduce the plot

dataset_kwargs = dict(nfeatures=2,
                      nchunks=10,
                      snr=2,
                      nlabels=4,
                      means=[[0, 1], [1, 0], [1, 1], [0, 0]])

dataset_train = normal_feature_dataset(**dataset_kwargs)
dataset_plot = normal_feature_dataset(**dataset_kwargs)

# make a new figure
pl.figure(figsize=(9, 9))

for i, k in enumerate((1, 3, 9, 20)):
    knn = kNN(k)
Ejemplo n.º 4
0
def test_gnbsearchlight_permutations():
    import mvpa2
    from mvpa2.base.node import ChainNode
    from mvpa2.clfs.gnb import GNB
    from mvpa2.generators.base import  Repeater
    from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner
    #import mvpa2.generators.permutation
    #reload(mvpa2.generators.permutation)
    from mvpa2.generators.permutation import AttributePermutator
    from mvpa2.testing.datasets import datasets
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.mappers.fx import mean_sample
    from mvpa2.misc.errorfx import mean_mismatch_error
    from mvpa2.clfs.stats import MCNullDist
    from mvpa2.testing.tools import assert_raises, ok_, assert_array_less

    # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM']
    # mvpa2.debug.metrics += ['pid']
    count = 10
    nproc = 1 + int(mvpa2.externals.exists('pprocess'))
    ds = datasets['3dsmall'].copy()
    ds.fa['voxel_indices'] = ds.fa.myspace

    slkwargs = dict(radius=3, space='voxel_indices',  enable_ca=['roi_sizes'],
                    center_ids=[1, 10, 70, 100])

    mvpa2.seed(mvpa2._random_seed)
    clf  = GNB()
    splt = NFoldPartitioner(cvtype=2, attr='chunks')

    repeater   = Repeater(count=count)
    permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1)

    null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()),
                                    postproc=mean_sample(), errorfx=mean_mismatch_error,
                                    **slkwargs)

    distr_est = MCNullDist(repeater, tail='left', measure=null_sl,
                           enable_ca=['dist_samples'])
    sl = sphere_gnbsearchlight(clf, splt,
                               reuse_neighbors=True,
                               null_dist=distr_est, postproc=mean_sample(),
                               errorfx=mean_mismatch_error,
                               **slkwargs)
    if __debug__:                         # assert is done only without -O mode
        assert_raises(NotImplementedError, sl, ds)

    # "ad-hoc searchlights can't handle yet varying targets across partitions"
    if False:
        # after above limitation is removed -- enable
        sl_map = sl(ds)
        sl_null_prob = sl.ca.null_prob.samples.copy()

    mvpa2.seed(mvpa2._random_seed)
    ### 'normal' Searchlight
    clf  = GNB()
    splt = NFoldPartitioner(cvtype=2, attr='chunks')
    repeater   = Repeater(count=count)
    permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1)
    # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state
    # would be reused across all pprocesses
    null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()),
                              postproc=mean_sample())
    null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs)
    distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal,
                           enable_ca=['dist_samples'])

    cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error,
                         enable_ca=['stats'], postproc=mean_sample() )
    sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs)
    sl_map_normal = sl(ds)
    sl_null_prob_normal = sl.ca.null_prob.samples.copy()

    # For every feature -- we should get some variance in estimates In
    # case of failure they are all really close to each other (up to
    # numerical precision), so variance will be close to 0
    assert_array_less(-np.var(distr_est_normal.ca.dist_samples.samples[0],
                              axis=1), -1e-5)
    for s in distr_est_normal.ca.dist_samples.samples[0]:
        ok_(len(np.unique(s)) > 1)
Ejemplo n.º 5
0
    def test_spatial_searchlight(self,
                                 lrn_sllrn_SL_partitioner,
                                 do_roi=False,
                                 results_backend='native'):
        """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight)
        Test of and adhoc searchlight anyways requires a ground-truth
        comparison to the generic version, so we are doing sweepargs here
        """
        lrn, sllrn, SL, partitioner, correction = lrn_sllrn_SL_partitioner
        ## if results_backend == 'hdf5' and not common_variance:
        ##     # no need for full combination of all possible arguments here
        ##     return

        if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \
           and  isinstance(lrn, ChainMapper):
            raise SkipTest("Known to fail while trying to enable "
                           "training_stats for the ChainMapper (M1NN here)")

        # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn
        #      "learner" we must use a chainmapper atm
        if sllrn is None:
            sllrn = lrn
        ds = datasets['3dsmall'].copy()
        # Let's test multiclass here, so boost # of labels
        ds[6:18].T += 2
        ds.fa['voxel_indices'] = ds.fa.myspace

        # To assure that users do not run into incorrect operation due to overflows
        ds.samples += 5000
        ds.samples *= 1000
        ds.samples = ds.samples.astype(np.int16)

        # compute N-1 cross-validation for each sphere
        # YOH: unfortunately sample_clf_lin is not guaranteed
        #      to provide exactly the same results due to inherent
        #      iterative process.  Therefore lets use something quick
        #      and pure Python
        cv = CrossValidation(lrn, partitioner)

        skwargs = dict(
            radius=1,
            enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids'])

        if do_roi:
            # select some random set of features
            nroi = rnd.randint(1, ds.nfeatures)
            # and lets compute the full one as well once again so we have a reference
            # which will be excluded itself from comparisons but values will be compared
            # for selected roi_id
            sl_all = SL(sllrn, partitioner, **skwargs)
            result_all = sl_all(ds)
            # select random features
            roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi]
            skwargs['center_ids'] = roi_ids
        else:
            nroi = ds.nfeatures
            roi_ids = np.arange(nroi)
            result_all = None

        if results_backend == 'hdf5':
            skip_if_no_external('h5py')

        sls = [
            sphere_searchlight(cv, results_backend=results_backend, **skwargs),
            #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1))
            SL(sllrn, partitioner, indexsum='fancy', **skwargs)
        ]

        if externals.exists('scipy'):
            sls += [SL(sllrn, partitioner, indexsum='sparse', **skwargs)]

        # Test nproc just once
        if externals.exists('pprocess') and not self._tested_pprocess:
            sls += [sphere_searchlight(cv, nproc=2, **skwargs)]
            self._tested_pprocess = True

        # Provide the dataset and all those searchlights for testing
        #self._test_searchlights(ds, sls, roi_ids, result_all)
        #nroi = len(roi_ids)
        #do_roi = nroi != ds.nfeatures
        all_results = []
        for sl in sls:
            # run searchlight
            mvpa2.seed()  # reseed rng again for m1nnsl
            results = sl(ds)
            all_results.append(results)
            #print `sl`
            # check for correct number of spheres
            self.assertTrue(results.nfeatures == nroi)
            # and measures (one per xfold)
            if partitioner.cvtype == 1:
                self.assertTrue(len(results) == len(ds.UC))
            elif partitioner.cvtype == 0.5:
                # here we had 4 unique chunks, so 6 combinations
                # even though 20 max was specified for NFold
                self.assertTrue(len(results) == 6)
            else:
                raise RuntimeError("Unknown yet type of partitioner to check")
            # check for chance-level performance across all spheres
            # makes sense only if number of features was big enough
            # to get some stable estimate of mean
            if not do_roi or nroi > 20:
                # correction here is for M1NN class which has wider distribution
                self.assertTrue(0.67 - correction < results.samples.mean() <
                                0.85 + correction,
                                msg="Out of range mean result: "
                                "lrn: %s  sllrn: %s  NROI: %d  MEAN: %.3f" % (
                                    lrn,
                                    sllrn,
                                    nroi,
                                    results.samples.mean(),
                                ))

            mean_errors = results.samples.mean(axis=0)
            # that we do get different errors ;)
            self.assertTrue(len(np.unique(mean_errors) > 3))

            # check resonable sphere sizes
            self.assertTrue(len(sl.ca.roi_sizes) == nroi)
            self.assertTrue(len(sl.ca.roi_feature_ids) == nroi)
            for i, fids in enumerate(sl.ca.roi_feature_ids):
                self.assertTrue(len(fids) == sl.ca.roi_sizes[i])
            if do_roi:
                # for roi we should relax conditions a bit
                self.assertTrue(max(sl.ca.roi_sizes) <= 7)
                self.assertTrue(min(sl.ca.roi_sizes) >= 4)
            else:
                self.assertTrue(max(sl.ca.roi_sizes) == 7)
                self.assertTrue(min(sl.ca.roi_sizes) == 4)

            # check base-class state
            self.assertEqual(sl.ca.raw_results.nfeatures, nroi)

            # Test if we got results correctly for 'selected' roi ids
            if do_roi:
                assert_array_equal(result_all[:, roi_ids], results)

        if len(all_results) > 1:
            # if we had multiple searchlights, we can check either they all
            # gave the same result (they should have)
            aresults = np.array([a.samples for a in all_results])
            dresults = np.abs(aresults - aresults.mean(axis=0))
            dmax = np.max(dresults)
            self.assertTrue(dmax <= 1e-13)

        # Test the searchlight's reuse of neighbors
        for indexsum in ['fancy'] + (externals.exists('scipy') and ['sparse']
                                     or []):
            sl = SL(sllrn,
                    partitioner,
                    indexsum='fancy',
                    reuse_neighbors=True,
                    **skwargs)
            mvpa2.seed()
            result1 = sl(ds)
            mvpa2.seed()
            result2 = sl(ds)  # must be faster
            assert_array_equal(result1, result2)
Ejemplo n.º 6
0
    def test_spatial_searchlight(self, lrn_sllrn_SL_partitioner, do_roi=False,
                                 results_backend='native'):
        """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight)
        Test of and adhoc searchlight anyways requires a ground-truth
        comparison to the generic version, so we are doing sweepargs here
        """
        lrn, sllrn, SL, partitioner, correction = lrn_sllrn_SL_partitioner
        ## if results_backend == 'hdf5' and not common_variance:
        ##     # no need for full combination of all possible arguments here
        ##     return

        if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \
           and  isinstance(lrn, ChainMapper):
            raise SkipTest("Known to fail while trying to enable "
                           "training_stats for the ChainMapper (M1NN here)")


        # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn
        #      "learner" we must use a chainmapper atm
        if sllrn is None:
            sllrn = lrn
        ds = datasets['3dsmall'].copy()
        # Let's test multiclass here, so boost # of labels
        ds[6:18].T += 2
        ds.fa['voxel_indices'] = ds.fa.myspace

        # To assure that users do not run into incorrect operation due to overflows
        ds.samples += 5000
        ds.samples *= 1000
        ds.samples = ds.samples.astype(np.int16)

        # compute N-1 cross-validation for each sphere
        # YOH: unfortunately sample_clf_lin is not guaranteed
        #      to provide exactly the same results due to inherent
        #      iterative process.  Therefore lets use something quick
        #      and pure Python
        cv = CrossValidation(lrn, partitioner)

        skwargs = dict(radius=1, enable_ca=['roi_sizes', 'raw_results',
                                            'roi_feature_ids'])

        if do_roi:
            # select some random set of features
            nroi = rnd.randint(1, ds.nfeatures)
            # and lets compute the full one as well once again so we have a reference
            # which will be excluded itself from comparisons but values will be compared
            # for selected roi_id
            sl_all = SL(sllrn, partitioner, **skwargs)
            result_all = sl_all(ds)
            # select random features
            roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi]
            skwargs['center_ids'] = roi_ids
        else:
            nroi = ds.nfeatures
            roi_ids = np.arange(nroi)
            result_all = None

        if results_backend == 'hdf5':
            skip_if_no_external('h5py')

        sls = [sphere_searchlight(cv, results_backend=results_backend,
                                  **skwargs),
               #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1))
               SL(sllrn, partitioner, indexsum='fancy', **skwargs)
               ]

        if externals.exists('scipy'):
            sls += [ SL(sllrn, partitioner, indexsum='sparse', **skwargs)]

        # Test nproc just once
        if externals.exists('pprocess') and not self._tested_pprocess:
            sls += [sphere_searchlight(cv, nproc=2, **skwargs)]
            self._tested_pprocess = True

        # Provide the dataset and all those searchlights for testing
        #self._test_searchlights(ds, sls, roi_ids, result_all)
        #nroi = len(roi_ids)
        #do_roi = nroi != ds.nfeatures
        all_results = []
        for sl in sls:
            # run searchlight
            mvpa2.seed()                # reseed rng again for m1nnsl
            results = sl(ds)
            all_results.append(results)
            #print `sl`
            # check for correct number of spheres
            self.assertTrue(results.nfeatures == nroi)
            # and measures (one per xfold)
            if partitioner.cvtype == 1:
                self.assertTrue(len(results) == len(ds.UC))
            elif partitioner.cvtype == 0.5:
                # here we had 4 unique chunks, so 6 combinations
                # even though 20 max was specified for NFold
                self.assertTrue(len(results) == 6)
            else:
                raise RuntimeError("Unknown yet type of partitioner to check")
            # check for chance-level performance across all spheres
            # makes sense only if number of features was big enough
            # to get some stable estimate of mean
            if not do_roi or nroi > 20:
                # correction here is for M1NN class which has wider distribution
                self.assertTrue(
                    0.68 - correction < results.samples.mean() < 0.84 + correction,
                    msg="Out of range mean result: "
                    "lrn: %s  sllrn: %s  NROI: %d  MEAN: %.3f"
                    % (lrn, sllrn, nroi, results.samples.mean(),))

            mean_errors = results.samples.mean(axis=0)
            # that we do get different errors ;)
            self.assertTrue(len(np.unique(mean_errors) > 3))

            # check resonable sphere sizes
            self.assertTrue(len(sl.ca.roi_sizes) == nroi)
            self.assertTrue(len(sl.ca.roi_feature_ids) == nroi)
            for i, fids in enumerate(sl.ca.roi_feature_ids):
                self.assertTrue(len(fids) == sl.ca.roi_sizes[i])
            if do_roi:
                # for roi we should relax conditions a bit
                self.assertTrue(max(sl.ca.roi_sizes) <= 7)
                self.assertTrue(min(sl.ca.roi_sizes) >= 4)
            else:
                self.assertTrue(max(sl.ca.roi_sizes) == 7)
                self.assertTrue(min(sl.ca.roi_sizes) == 4)

            # check base-class state
            self.assertEqual(sl.ca.raw_results.nfeatures, nroi)

            # Test if we got results correctly for 'selected' roi ids
            if do_roi:
                assert_array_equal(result_all[:, roi_ids], results)

        if len(all_results) > 1:
            # if we had multiple searchlights, we can check either they all
            # gave the same result (they should have)
            aresults = np.array([a.samples for a in all_results])
            dresults = np.abs(aresults - aresults.mean(axis=0))
            dmax = np.max(dresults)
            self.assertTrue(dmax <= 1e-13)

        # Test the searchlight's reuse of neighbors
        for indexsum in ['fancy'] + (
            externals.exists('scipy') and ['sparse'] or []):
            sl = SL(sllrn, partitioner, indexsum='fancy',
                    reuse_neighbors=True, **skwargs)
            mvpa2.seed()
            result1 = sl(ds)
            mvpa2.seed()
            result2 = sl(ds)                # must be faster
            assert_array_equal(result1, result2)
Ejemplo n.º 7
0
"""

import mvpa2
import pylab as pl
import numpy as np
from mvpa2.misc.data_generators import normal_feature_dataset
from mvpa2.clfs.svm import LinearCSVMC
from mvpa2.generators.partition import NFoldPartitioner
from mvpa2.measures.base import CrossValidation
from mvpa2.mappers.zscore import zscore
"""
Generate a binary dataset without any signal (snr=0).
"""

mvpa2.seed(1)
ds_noise = normal_feature_dataset(perlabel=100,
                                  nlabels=2,
                                  nfeatures=2,
                                  snr=0,
                                  nonbogus_features=[0, 1])

# signal levels
sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]
"""

To mimic behavior of hard-margin SVM whenever classes become
separable, which is easier to comprehend, we are intentionally setting
very high C value.

"""
Ejemplo n.º 8
0
    def _proc_block(self, block, ds, measure, seed=None, iblock='main'):
        """Little helper to capture the parts of the computation that can be
        parallelized

        Parameters
        ----------
        seed
          RNG seed.  Should be provided e.g. in child process invocations
          to guarantee that they all seed differently to not keep generating
          the same sequencies due to reusing the same copy of numpy's RNG
        block
          Critical for generating non-colliding temp filenames in case
          of hdf5 backend.  Otherwise RNGs of different processes might
          collide in their temporary file names leading to problems.
        """
        if seed is not None:
            mvpa2.seed(seed)
        if __debug__:
            debug_slc_ = 'SLC_' in debug.active
            debug('SLC',
                  "Starting computing block for %i elements" % len(block))
        results = []
        store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids')
        store_roi_sizes = self.ca.is_enabled('roi_sizes')
        store_roi_center_ids = self.ca.is_enabled('roi_center_ids')

        assure_dataset = any([store_roi_feature_ids,
                              store_roi_sizes,
                              store_roi_center_ids])

        # put rois around all features in the dataset and compute the
        # measure within them
        for i, f in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine
            roi_specs = self._queryengine[f]

            if __debug__ and  debug_slc_:
                debug('SLC_', 'For %r query returned roi_specs %r'
                      % (f, roi_specs))

            if is_datasetlike(roi_specs):
                # TODO: unittest
                assert(len(roi_specs) == 1)
                roi_fids = roi_specs.samples[0]
            else:
                roi_fids = roi_specs

            # slice the dataset
            roi = ds[:, roi_fids]

            if is_datasetlike(roi_specs):
                for n, v in roi_specs.fa.iteritems():
                    roi.fa[n] = v

            if self.__add_center_fa:
                # add fa to indicate ROI seed if requested
                roi_seed = np.zeros(roi.nfeatures, dtype='bool')
                if f in roi_fids:
                    roi_seed[roi_fids.index(f)] = True
                else:
                    warning("Center feature attribute id %s not found" % f)
                roi.fa[self.__add_center_fa] = roi_seed

            # compute the datameasure and store in results
            res = measure(roi)

            if assure_dataset and not is_datasetlike(res):
                res = Dataset(np.atleast_1d(res))
            if store_roi_feature_ids:
                # add roi feature ids to intermediate result dataset for later
                # aggregation
                res.a['roi_feature_ids'] = roi_fids
            if store_roi_sizes:
                res.a['roi_sizes'] = roi.nfeatures
            if store_roi_center_ids:
                res.a['roi_center_ids'] = f
            results.append(res)

            if __debug__:
                debug('SLC', "Doing %i ROIs: %i (%i features) [%i%%]" \
                    % (len(block),
                       f + 1,
                       roi.nfeatures,
                       float(i + 1) / len(block) * 100,), cr=True)

        if self.results_postproc_fx:
            if __debug__:
                debug('SLC', "Post-processing %d results in proc_block using %s"
                      % (len(results), self.results_postproc_fx))
            results = self.results_postproc_fx(results)
        if self.results_backend == 'native':
            pass                        # nothing special
        elif self.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = tempfile.mktemp(prefix=self.tmp_prefix,
                                           suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, results)
            if __debug__:
                debug('SLC_', "Results stored")
            results = results_file
        else:
            raise RuntimeError("Must not reach this point")
        return results
Ejemplo n.º 9
0
 def newfunc(*arg, **kwargs):
     mvpa2.seed(mvpa2._random_seed)
     return func(*arg, **kwargs)
Ejemplo n.º 10
0
 def newfunc(*arg, **kwargs):
     mvpa2.seed(mvpa2._random_seed)
     return func(*arg, **kwargs)
    def _proc_block(self,
                    block,
                    datasets,
                    featselhyper,
                    queryengines,
                    seed=None,
                    iblock='main'):
        if seed is not None:
            mvpa2.seed(seed)
        if __debug__:
            debug('SLC',
                  'Starting computing block for %i elements' % len(block))
        bar = ProgressBar()
        projections = [
            csc_matrix((self.nfeatures, self.nfeatures),
                       dtype=self.params.dtype)
            for isub in range(self.ndatasets)
        ]
        for i, node_id in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine

            # Find the neighborhood for that selected nearest node
            roi_feature_ids_all = [qe[node_id] for qe in queryengines]
            # handling queryengines that return AttrDatasets
            for isub in range(len(roi_feature_ids_all)):
                if is_datasetlike(roi_feature_ids_all[isub]):
                    # making sure queryengine returned proper shaped output
                    assert (roi_feature_ids_all[isub].nsamples == 1)
                    roi_feature_ids_all[isub] = roi_feature_ids_all[
                        isub].samples[0, :].tolist()
            if len(roi_feature_ids_all) == 1:
                # just one was provided to be "broadcasted"
                roi_feature_ids_all *= len(datasets)
            # if qe returns zero-sized ROI for any subject, pass...
            if any(len(x) == 0 for x in roi_feature_ids_all):
                continue
            # selecting neighborhood for all subject for hyperalignment
            ds_temp = [
                sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)
            ]
            if self.force_roi_seed:
                roi_seed = np.array(
                    roi_feature_ids_all[self.params.ref_ds]) == node_id
                ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
            if __debug__:
                msg = 'ROI (%i/%i), %i features' % (
                    i + 1, len(block), ds_temp[self.params.ref_ds].nfeatures)
                debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)
            hmappers = featselhyper(ds_temp)
            assert (len(hmappers) == len(datasets))
            roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds]
            for isub, roi_feature_ids in enumerate(roi_feature_ids_all):
                if not self.params.combine_neighbormappers:
                    I = roi_feature_ids
                    #J = [roi_feature_ids[node_id]] * len(roi_feature_ids)
                    J = [node_id] * len(roi_feature_ids)
                    V = hmappers[isub].tolist()
                    if np.isscalar(V):
                        V = [V]
                else:
                    I, J, V = [], [], []
                    for f2, roi_feature_id_ref_ds in enumerate(
                            roi_feature_ids_ref_ds):
                        I += roi_feature_ids
                        J += [roi_feature_id_ref_ds] * len(roi_feature_ids)
                        V += hmappers[isub][:, f2].tolist()
                proj = coo_matrix(
                    (V, (I, J)),
                    shape=(max(self.nfeatures,
                               max(I) + 1), max(self.nfeatures,
                                                max(J) + 1)),
                    dtype=self.params.dtype)
                proj = proj.tocsc()
                # Cleaning up the current subject's projections to free up memory
                hmappers[isub] = [[] for _ in hmappers]
                projections[isub] = projections[isub] + proj

        if self.params.results_backend == 'native':
            return projections
        elif self.params.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = mktemp(prefix=self.params.tmp_prefix,
                                  suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, projections)
            if __debug__:
                debug('SLC_', "Results stored")
            return results_file
        else:
            raise RuntimeError("Must not reach this point")
Ejemplo n.º 12
0
This brief examples provides a demonstration.

"""
import mvpa2
import pylab as pl
import numpy as np
from mvpa2.misc.data_generators import normal_feature_dataset
from mvpa2.clfs.svm import LinearCSVMC
from mvpa2.generators.partition import NFoldPartitioner
from mvpa2.measures.base import CrossValidation
from mvpa2.mappers.zscore import zscore

"""
Generate a binary dataset without any signal (snr=0).
"""
mvpa2.seed(1);
ds_noise = normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=2, snr=0,
                                  nonbogus_features=[0,1])

# signal levels
sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]

"""

To mimic behavior of hard-margin SVM whenever classes become
separable, which is easier to comprehend, we are intentionally setting
very high C value.

"""

clf = LinearCSVMC(C=1000, enable_ca=['training_stats'])
Ejemplo n.º 13
0
    def _proc_block(self, block, datasets, featselhyper, queryengines, seed=None, iblock='main'):
        if seed is not None:
            mvpa2.seed(seed)
        if __debug__:
            debug('SLC', 'Starting computing block for %i elements' % len(block))
        bar = ProgressBar()
        projections = [csc_matrix((self.nfeatures, self.nfeatures),
                                  dtype=self.params.dtype)
                       for isub in range(self.ndatasets)]
        for i, node_id in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine

            # Find the neighborhood for that selected nearest node
            roi_feature_ids_all = [qe[node_id] for qe in queryengines]
            # handling queryengines that return AttrDatasets
            for isub in range(len(roi_feature_ids_all)):
                if is_datasetlike(roi_feature_ids_all[isub]):
                    # making sure queryengine returned proper shaped output
                    assert(roi_feature_ids_all[isub].nsamples == 1)
                    roi_feature_ids_all[isub] = roi_feature_ids_all[isub].samples[0, :].tolist()
            if len(roi_feature_ids_all) == 1:
                # just one was provided to be "broadcasted"
                roi_feature_ids_all *= len(datasets)
            # if qe returns zero-sized ROI for any subject, pass...
            if any(len(x)==0 for x in roi_feature_ids_all):
                continue
            # selecting neighborhood for all subject for hyperalignment
            ds_temp = [sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)]
            if self.force_roi_seed:
                roi_seed = np.array(roi_feature_ids_all[self.params.ref_ds]) == node_id
                ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
            if __debug__:
                msg = 'ROI (%i/%i), %i features' % (i + 1, len(block),
                                                    ds_temp[self.params.ref_ds].nfeatures)
                debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)
            hmappers = featselhyper(ds_temp)
            assert(len(hmappers) == len(datasets))
            roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds]
            for isub, roi_feature_ids in enumerate(roi_feature_ids_all):
                if not self.params.combine_neighbormappers:
                    I = roi_feature_ids
                    #J = [roi_feature_ids[node_id]] * len(roi_feature_ids)
                    J = [node_id] * len(roi_feature_ids)
                    V = hmappers[isub].tolist()
                    if np.isscalar(V):
                        V = [V]
                else:
                    I, J, V = [], [], []
                    for f2, roi_feature_id_ref_ds in enumerate(roi_feature_ids_ref_ds):
                        I += roi_feature_ids
                        J += [roi_feature_id_ref_ds] * len(roi_feature_ids)
                        V += hmappers[isub][:, f2].tolist()
                proj = coo_matrix(
                    (V, (I, J)),
                    shape=(max(self.nfeatures, max(I) + 1), max(self.nfeatures, max(J) + 1)),
                    dtype=self.params.dtype)
                proj = proj.tocsc()
                # Cleaning up the current subject's projections to free up memory
                hmappers[isub] = [[] for _ in hmappers]
                projections[isub] = projections[isub] + proj

        if self.params.results_backend == 'native':
            return projections
        elif self.params.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = mktemp(prefix=self.params.tmp_prefix,
                                  suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, projections)
            if __debug__:
                debug('SLC_', "Results stored")
            return results_file
        else:
            raise RuntimeError("Must not reach this point")
Ejemplo n.º 14
0
    def _proc_block_inplace(self,
                            block,
                            ds,
                            measure,
                            seed=None,
                            iblock='main'):
        """Little helper to capture the parts of the computation that can be
        parallelized. This method preallocates the output of the block,
        reducing the number of elementes to be hstacked down the processing
        line.

        Parameters
        ----------
        seed
          RNG seed.  Should be provided e.g. in child process invocations
          to guarantee that they all seed differently to not keep generating
          the same sequencies due to reusing the same copy of numpy's RNG
        block
          Critical for generating non-colliding temp filenames in case
          of hdf5 backend.  Otherwise RNGs of different processes might
          collide in their temporary file names leading to problems.
        """
        if seed is not None:
            mvpa2.seed(seed)
        if __debug__:
            debug('SLC',
                  "Starting computing block for %i elements" % len(block))

        store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids')
        store_roi_sizes = self.ca.is_enabled('roi_sizes')
        store_roi_center_ids = self.ca.is_enabled('roi_center_ids')

        assure_dataset = any(
            [store_roi_feature_ids, store_roi_sizes, store_roi_center_ids])

        # compute first result in block to get estimate of output
        if __debug__:
            debug('SLC', "Computing measure for first ROI to preallocate "
                  "output")
        first_res, roi = self.__process_roi(ds, block[0], measure,
                                            assure_dataset)
        nsamples, nfeatures = first_res.shape
        results = np.empty((nsamples, nfeatures * len(block)),
                           dtype=first_res.samples.dtype)
        if __debug__:
            debug('SLC', "Preallocated ouput of shape %s" % str(results.shape))
        results[:, :nfeatures] = first_res.samples
        start = nfeatures
        step = nfeatures

        # put rois around all features in the dataset and compute the
        # measure within them
        bar = ProgressBar()

        # initialize dictionaries to store fa and a
        fa = defaultdict(list)
        for first_res_fa in first_res.fa:
            val = first_res.fa[first_res_fa].value
            if isinstance(val, list):
                adder = fa[first_res_fa].extend
            else:
                adder = fa[first_res_fa].append
            adder(val)

        a = defaultdict(list)
        for first_res_a in first_res.a:
            val = first_res.a[first_res_a].value
            if first_res_a != 'roi_feature_ids' and isinstance(val, list):
                adder = a[first_res_a].extend
            else:
                adder = a[first_res_a].append
            adder(val)

        for i, f in enumerate(block[1:]):
            res, roi = self.__process_roi(ds, f, measure, assure_dataset)
            if store_roi_feature_ids:
                # add roi feature ids to intermediate result dataset for later
                # aggregation
                a['roi_feature_ids'].append(res.a.roi_feature_ids)
            if store_roi_sizes:
                a['roi_sizes'].append(roi.nfeatures)
            if store_roi_center_ids:
                a['roi_center_ids'].append(f)
            # store results inplace
            end = start + step
            results[:, start:end] = res.samples
            start = end

            if __debug__:
                msg = 'ROI %i (%i/%i), %i features' % \
                      (f + 1, i + 1, len(block), roi.nfeatures)
                debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)

        if __debug__:
            # just to get to new line
            debug('SLC', '')
        # now make it a dataset and a list to make it compatible with the rest
        results = [Dataset(results, sa=first_res.sa, a=dict(a), fa=dict(fa))]

        if self.results_postproc_fx:
            if __debug__:
                debug(
                    'SLC',
                    "Post-processing %d results in proc_block using %s" %
                    (len(results), self.results_postproc_fx))
            results = self.results_postproc_fx(results)
        if self.results_backend == 'native':
            pass  # nothing special
        elif self.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = tempfile.mktemp(prefix=self.tmp_prefix,
                                           suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, results)
            if __debug__:
                debug('SLC_', "Results stored")
            results = results_file
        else:
            raise RuntimeError("Must not reach this point")
        return results
Ejemplo n.º 15
0
    def _proc_block(self, block, ds, measure, seed=None, iblock='main'):
        """Little helper to capture the parts of the computation that can be
        parallelized

        Parameters
        ----------
        seed
          RNG seed.  Should be provided e.g. in child process invocations
          to guarantee that they all seed differently to not keep generating
          the same sequencies due to reusing the same copy of numpy's RNG
        block
          Critical for generating non-colliding temp filenames in case
          of hdf5 backend.  Otherwise RNGs of different processes might
          collide in their temporary file names leading to problems.
        """
        if seed is not None:
            mvpa2.seed(seed)
        if __debug__:
            debug('SLC',
                  "Starting computing block for %i elements" % len(block))
            start_time = time.time()
        results = []
        store_roi_feature_ids = self.ca.is_enabled('roi_feature_ids')
        store_roi_sizes = self.ca.is_enabled('roi_sizes')
        store_roi_center_ids = self.ca.is_enabled('roi_center_ids')

        assure_dataset = any(
            [store_roi_feature_ids, store_roi_sizes, store_roi_center_ids])

        # put rois around all features in the dataset and compute the
        # measure within them
        bar = ProgressBar()

        for i, f in enumerate(block):
            res, roi = self.__process_roi(ds, f, measure, assure_dataset)
            results.append(res)

            if __debug__:
                msg = 'ROI %i (%i/%i), %i features' % \
                            (f + 1, i + 1, len(block), roi.nfeatures)
                debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)

        if __debug__:
            # just to get to new line
            debug('SLC', '')

        if self.results_postproc_fx:
            if __debug__:
                debug(
                    'SLC',
                    "Post-processing %d results in proc_block using %s" %
                    (len(results), self.results_postproc_fx))
            results = self.results_postproc_fx(results)
        if self.results_backend == 'native':
            pass  # nothing special
        elif self.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = tempfile.mktemp(prefix=self.tmp_prefix,
                                           suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, results)
            if __debug__:
                debug('SLC_', "Results stored")
            results = results_file
        else:
            raise RuntimeError("Must not reach this point")
        return results
Ejemplo n.º 16
0
"""

import numpy as np


"""

"""

import mvpa2
from mvpa2.base import cfg
from mvpa2.misc.data_generators import *
from mvpa2.clfs.knn import kNN
from mvpa2.misc.plot import *

mvpa2.seed(0)                            # to reproduce the plot

dataset_kwargs = dict(nfeatures=2, nchunks=10,
    snr=2, nlabels=4, means=[ [0,1], [1,0], [1,1], [0,0] ])

dataset_train = normal_feature_dataset(**dataset_kwargs)
dataset_plot = normal_feature_dataset(**dataset_kwargs)


# make a new figure
pl.figure(figsize=(9, 9))

for i,k in enumerate((1, 3, 9, 20)):
    knn = kNN(k)

    print "Processing kNN(%i) problem..." % k