def __init__(self, **kwargs):
     _shpaldebug("Initializing.")
     ClassWithCollections.__init__(self, **kwargs)
     self.ndatasets = 0
     self.nfeatures = 0
     self.projections = None
     # This option makes the roi_seed in each SL to be selected during feature selection
     self.force_roi_seed = True
     if self.params.nproc is not None and self.params.nproc > 1 \
             and not externals.exists('pprocess'):
         raise RuntimeError("The 'pprocess' module is required for "
                            "multiprocess searchlights. Please either "
                            "install python-pprocess, or reduce `nproc` "
                            "to 1 (got nproc=%i) or set to default None"
                            % self.params.nproc)
     if not externals.exists('scipy'):
         raise RuntimeError("The 'scipy' module is required for "
                            "searchlight hyperalignment.")
     if self.params.results_backend == 'native':
         raise NotImplementedError("'native' mode to handle results is still a "
                                   "work in progress.")
         #warning("results_backend is set to 'native'. This has been known"
         #        "to result in longer run time when working with big datasets.")
     if self.params.results_backend == 'hdf5' and \
             not externals.exists('h5py'):
         raise RuntimeError("The 'hdf5' module is required for "
                            "when results_backend is set to 'hdf5'")
Esempio n. 2
0
    def __init__(self, sd=0, distribution='rdist', fpp=None, nbins=400, **kwargs):
        """L2-Norm the values, convert them to p-values of a given distribution.

        Parameters
        ----------
        sd : int
          Samples dimension (if len(x.shape)>1) on which to operate
        distribution : string
          Which distribution to use. Known are: 'rdist' (later normal should
          be there as well)
        fpp : float
          At what p-value (both tails) if not None, to control for false
          positives. It would iteratively prune the tails (tentative real positives)
          until empirical p-value becomes less or equal to numerical.
        nbins : int
          Number of bins for the iterative pruning of positives

        WARNING: Highly experimental/slow/etc: no theoretical grounds have been
        presented in any paper, nor proven
        """
        externals.exists('scipy', raise_=True)
        ClassWithCollections.__init__(self, **kwargs)

        self.sd = sd
        if not (distribution in ['rdist']):
            raise ValueError, "Actually only rdist supported at the moment" \
                  " got %s" % distribution
        self.distribution = distribution
        self.fpp = fpp
        self.nbins = nbins
Esempio n. 3
0
    def _acquire_externals(self, out):
        # Test and list all dependencies:
        sdeps = {True: [], False: [], 'Error': []}
        for dep in sorted(externals._KNOWN):
            try:
                sdeps[externals.exists(dep, force=False)] += [dep]
            except:
                sdeps['Error'] += [dep]
        out.write('EXTERNALS:\n')
        out.write(' Present:       %s\n' % ', '.join(sdeps[True]))
        out.write(' Absent:        %s\n' % ', '.join(sdeps[False]))
        if len(sdeps['Error']):
            out.write(' Errors in determining: %s\n' % ', '.join(sdeps['Error']))

        SV = ('.__version__', )              # standard versioning
        out.write(' Versions of critical externals:\n')
        # First the ones known to externals,
        for k, v in sorted(externals.versions.iteritems()):
            out.write('  %-12s: %s\n' % (k, str(v)))
        try:
            if externals.exists('matplotlib'):
                import matplotlib
                out.write(' Matplotlib backend: %s\n'
                          % matplotlib.get_backend())
        except Exception, exc:
            out.write(' Failed to determine backend of matplotlib due to "%s"'
                      % str(exc))
Esempio n. 4
0
    def __init__(self, generator, queryengine, errorfx=mean_mismatch_error,
                 indexsum=None,
                 reuse_neighbors=False,
                 splitter=None,
                 **kwargs):
        """Initialize the base class for "naive" searchlight classifiers

        Parameters
        ----------
        generator : `Generator`
          Some `Generator` to prepare partitions for cross-validation.
          It must not change "targets", thus e.g. no AttributePermutator's
        errorfx : func, optional
          Functor that computes a scalar error value from the vectors of
          desired and predicted values (e.g. subclass of `ErrorFunction`).
        indexsum : ('sparse', 'fancy'), optional
          What use to compute sums over arbitrary columns.  'fancy'
          corresponds to regular fancy indexing over columns, whenever
          in 'sparse', product of sparse matrices is used (usually
          faster, so is default if `scipy` is available).
        reuse_neighbors : bool, optional
          Compute neighbors information only once, thus allowing for
          efficient reuse on subsequent calls where dataset's feature
          attributes remain the same (e.g. during permutation testing)
        splitter : Splitter, optional
          Which will be used to split partitioned datasets.  If None specified
          then standard one operating on partitions will be used
        """

        # init base class first
        BaseSearchlight.__init__(self, queryengine, **kwargs)

        self._errorfx = errorfx
        self._generator = generator
        self._splitter = splitter

        # TODO: move into _call since resetting over default None
        #       obscures __repr__
        if indexsum is None:
            if externals.exists('scipy'):
                indexsum = 'sparse'
            else:
                indexsum = 'fancy'
        else:
            if indexsum == 'sparse' and not externals.exists('scipy'):
                warning("Scipy.sparse isn't available so taking 'fancy' as "
                        "'indexsum' method.")
                indexsum = 'fancy'
        self._indexsum = indexsum

        if not self.nproc in (None, 1):
            raise NotImplementedError, "For now only nproc=1 (or None for " \
                  "autodetection) is supported by GNBSearchlight"

        self.__pb = None            # statistics per each block/label
        self.__reuse_neighbors = reuse_neighbors

        # Storage to be used for neighborhood information
        self.__roi_fids = None
Esempio n. 5
0
    def _postcall(self, dataset, result):
        """Some postprocessing on the result
        """
        if self.__null_dist is None:
            # do base-class postcall and be done
            result = super(Measure, self)._postcall(dataset, result)
        else:
            # don't do a full base-class postcall, only do the
            # postproc-application here, to gain result compatibility with the
            # fitted null distribution -- necessary to be able to use
            # a Node's 'pass_attr' to pick up ca.null_prob
            result = self._apply_postproc(dataset, result)

            if self.ca.is_enabled('null_t'):
                # get probability under NULL hyp, but also request
                # either it belong to the right tail
                null_prob, null_right_tail = \
                           self.__null_dist.p(result, return_tails=True)
                self.ca.null_prob = null_prob

                externals.exists('scipy', raise_=True)
                from scipy.stats import norm

                # TODO: following logic should appear in NullDist,
                #       not here
                tail = self.null_dist.tail
                if tail == 'left':
                    acdf = np.abs(null_prob.samples)
                elif tail == 'right':
                    acdf = 1.0 - np.abs(null_prob.samples)
                elif tail in ['any', 'both']:
                    acdf = 1.0 - np.clip(np.abs(null_prob.samples), 0, 0.5)
                else:
                    raise RuntimeError, 'Unhandled tail %s' % tail
                # We need to clip to avoid non-informative inf's ;-)
                # that happens due to lack of precision in mantissa
                # which is 11 bits in double. We could clip values
                # around 0 at as low as 1e-100 (correspond to z~=21),
                # but for consistency lets clip at 1e-16 which leads
                # to distinguishable value around p=1 and max z=8.2.
                # Should be sufficient range of z-values ;-)
                clip = 1e-16
                null_t = norm.ppf(np.clip(acdf, clip, 1.0 - clip))
                # assure that we deal with arrays:
                null_t = np.array(null_t, ndmin=1, copy=False)
                null_t[~null_right_tail] *= -1.0 # revert sign for negatives
                null_t_ds = null_prob.copy(deep=False)
                null_t_ds.samples = null_t
                self.ca.null_t = null_t_ds          # store as a Dataset
            else:
                # get probability of result under NULL hypothesis if available
                # and don't request tail information
                self.ca.null_prob = self.__null_dist.p(result)
            # now do the second half of postcall and invoke pass_attr
            result = self._pass_attr(dataset, result)
        return result
Esempio n. 6
0
 def __init__(self, datameasure, queryengine, add_center_fa=False,
              results_backend='native',
              results_fx=None,
              tmp_prefix='tmpsl',
              nblocks=None,
              **kwargs):
     """
     Parameters
     ----------
     datameasure : callable
       Any object that takes a :class:`~mvpa2.datasets.base.Dataset`
       and returns some measure when called.
     add_center_fa : bool or str
       If True or a string, each searchlight ROI dataset will have a boolean
       vector as a feature attribute that indicates the feature that is the
       seed (e.g. sphere center) for the respective ROI. If True, the
       attribute is named 'roi_seed', the provided string is used as the name
       otherwise.
     results_backend : ('native', 'hdf5'), optional
       Specifies the way results are provided back from a processing block
       in case of nproc > 1. 'native' is pickling/unpickling of results by
       pprocess, while 'hdf5' would use h5save/h5load functionality.
       'hdf5' might be more time and memory efficient in some cases.
     results_fx : callable, optional
       Function to process/combine results of each searchlight
       block run.  By default it would simply append them all into
       the list.  It receives as keyword arguments sl, dataset,
       roi_ids, and results (iterable of lists).  It is the one to take
       care of assigning roi_* ca's
     tmp_prefix : str, optional
       If specified -- serves as a prefix for temporary files storage
       if results_backend == 'hdf5'.  Thus can specify the directory to use
       (trailing file path separator is not added automagically).
     nblocks : None or int
       Into how many blocks to split the computation (could be larger than
       nproc).  If None -- nproc is used.
     **kwargs
       In addition this class supports all keyword arguments of its
       base-class :class:`~mvpa2.measures.searchlight.BaseSearchlight`.
     """
     BaseSearchlight.__init__(self, queryengine, **kwargs)
     self.datameasure = datameasure
     self.results_backend = results_backend.lower()
     if self.results_backend == 'hdf5':
         # Assure having hdf5
         externals.exists('h5py', raise_=True)
     self.results_fx = Searchlight._concat_results \
                       if results_fx is None else results_fx
     self.tmp_prefix = tmp_prefix
     self.nblocks = nblocks
     if isinstance(add_center_fa, str):
         self.__add_center_fa = add_center_fa
     elif add_center_fa:
         self.__add_center_fa = 'roi_seed'
     else:
         self.__add_center_fa = False
Esempio n. 7
0
 def plot(self):
     """Plot correlation coefficients
     """
     externals.exists('pylab', raise_=True)
     import pylab as pl
     pl.plot(self['corrcoef'])
     pl.title('Auto-correlation of the sequence')
     pl.xlabel('Offset')
     pl.ylabel('Correlation Coefficient')
     pl.show()
Esempio n. 8
0
    def __init__(self, source):
        """Reader MEG data from texfiles or file-like objects.

        Parameters
        ----------
        source : str or file-like
          Strings are assumed to be filenames (with `.gz` suffix
          compressed), while all other object types are treated as file-like
          objects.
        """
        self.ntimepoints = None
        self.timepoints = None
        self.nsamples = None
        self.channelids = []
        self.data = []
        self.samplingrate = None

        # open textfiles
        if isinstance(source, str):
            if source.endswith(".gz"):
                externals.exists("gzip", raise_=True)
                import gzip

                source = gzip.open(source, "r")
            else:
                source = open(source, "r")

        # read file
        for line in source:
            # split ID
            colon = line.find(":")

            # ignore lines without id
            if colon == -1:
                continue

            id = line[:colon]
            data = line[colon + 1 :].strip()
            if id == "Sample Number":
                timepoints = np.fromstring(data, dtype=int, sep="\t")
                # one more as it starts with zero
                self.ntimepoints = int(timepoints.max()) + 1
                self.nsamples = int(len(timepoints) / self.ntimepoints)
            elif id == "Time":
                self.timepoints = np.fromstring(data, dtype=float, count=self.ntimepoints, sep="\t")
                self.samplingrate = self.ntimepoints / (self.timepoints[-1] - self.timepoints[0])
            else:
                # load data
                self.data.append(np.fromstring(data, dtype=float, sep="\t").reshape(self.nsamples, self.ntimepoints))
                # store id
                self.channelids.append(id)

        # reshape data from (channels x samples x timepoints) to
        # (samples x chanels x timepoints)
        self.data = np.swapaxes(np.array(self.data), 0, 1)
Esempio n. 9
0
    def test_externals_correct2nd_invocation(self):
        # always fails
        externals._KNOWN['checker2'] = 'raise ImportError'

        self.assertTrue(not externals.exists('checker2'),
                        msg="Should be False on 1st invocation")

        self.assertTrue(not externals.exists('checker2'),
                        msg="Should be False on 2nd invocation as well")

        externals._KNOWN.pop('checker2')
Esempio n. 10
0
    def plot(self):
        """Plot correlation coefficients
        """
        externals.exists("pylab", raise_=True)
        import pylab as pl

        pl.plot(self["corrcoef"])
        pl.title("Auto-correlation of the sequence")
        pl.xlabel("Offset")
        pl.ylabel("Correlation Coefficient")
        pl.show()
Esempio n. 11
0
File: rsa.py Progetto: PyMVPA/PyMVPA
    def _call(self, dataset):
        externals.exists('skl', raise_=True)
        from sklearn.linear_model import Lasso, Ridge
        from sklearn.preprocessing import scale

        # first run PDist
        compute_dsm = PDist(pairwise_metric=self.params.pairwise_metric,
                            center_data=self.params.center_data)
        dsm = compute_dsm(dataset)
        dsm_samples = dsm.samples

        if self.params.rank_data:
            dsm_samples = rankdata(dsm_samples)
            predictors = np.apply_along_axis(rankdata, 0, self.predictors)
        else:
            predictors = self.predictors

        if self.params.normalize:
            predictors = scale(predictors, axis=0)
            dsm_samples = scale(dsm_samples, axis=0)

        # keep only the item we want
        if self.keep_pairs is not None:
            dsm_samples = dsm_samples[self.keep_pairs]
            predictors = predictors[self.keep_pairs, :]

        # check that predictors and samples have the correct dimensions
        if dsm_samples.shape[0] != predictors.shape[0]:
            raise ValueError('computed dsm has {0} rows, while predictors have'
                             '{1} rows. Check that predictors have the right'
                             'shape'.format(dsm_samples.shape[0],
                                            predictors.shape[0]))

        # now fit the regression
        if self.params.method == 'lasso':
            reg = Lasso
        elif self.params.method == 'ridge':
            reg = Ridge
        else:
            raise ValueError('I do not know method {0}'.format(self.params.method))
        reg_ = reg(alpha=self.params.alpha, fit_intercept=self.params.fit_intercept)
        reg_.fit(predictors, dsm_samples)

        coefs = reg_.coef_.reshape(-1, 1)

        sa = ['coef' + str(i) for i in range(len(coefs))]

        if self.params.fit_intercept:
            coefs = np.vstack((coefs, reg_.intercept_))
            sa += ['intercept']

        return Dataset(coefs, sa={'coefs': sa})
Esempio n. 12
0
    def test_externals_no_double_invocation(self):
        # no external should be checking twice (unless specified
        # explicitely)

        class Checker(object):
            """Helper class to increment count of actual checks"""
            def __init__(self): self.checked = 0
            def check(self): self.checked += 1

        checker = Checker()

        externals._KNOWN['checker'] = 'checker.check()'
        externals.__dict__['checker'] = checker
        externals.exists('checker')
        self.assertEqual(checker.checked, 1)
        externals.exists('checker')
        self.assertEqual(checker.checked, 1)
        externals.exists('checker', force=True)
        self.assertEqual(checker.checked, 2)
        externals.exists('checker')
        self.assertEqual(checker.checked, 2)

        # restore original externals
        externals.__dict__.pop('checker')
        externals._KNOWN.pop('checker')
Esempio n. 13
0
    def _postcall(self, dataset, result):
        """Some postprocessing on the result
        """
        self.ca.raw_results = result

        # post-processing
        result = super(Measure, self)._postcall(dataset, result)
        if not self.__null_dist is None:
            if self.ca.is_enabled("null_t"):
                # get probability under NULL hyp, but also request
                # either it belong to the right tail
                null_prob, null_right_tail = self.__null_dist.p(result, return_tails=True)
                self.ca.null_prob = null_prob

                externals.exists("scipy", raise_=True)
                from scipy.stats import norm

                # TODO: following logic should appear in NullDist,
                #       not here
                tail = self.null_dist.tail
                if tail == "left":
                    acdf = np.abs(null_prob.samples)
                elif tail == "right":
                    acdf = 1.0 - np.abs(null_prob.samples)
                elif tail in ["any", "both"]:
                    acdf = 1.0 - np.clip(np.abs(null_prob.samples), 0, 0.5)
                else:
                    raise RuntimeError, "Unhandled tail %s" % tail
                # We need to clip to avoid non-informative inf's ;-)
                # that happens due to lack of precision in mantissa
                # which is 11 bits in double. We could clip values
                # around 0 at as low as 1e-100 (correspond to z~=21),
                # but for consistency lets clip at 1e-16 which leads
                # to distinguishable value around p=1 and max z=8.2.
                # Should be sufficient range of z-values ;-)
                clip = 1e-16
                null_t = norm.ppf(np.clip(acdf, clip, 1.0 - clip))
                # assure that we deal with arrays:
                null_t = np.array(null_t, ndmin=1, copy=False)
                null_t[~null_right_tail] *= -1.0  # revert sign for negatives
                null_t_ds = null_prob.copy(deep=False)
                null_t_ds.samples = null_t
                self.ca.null_t = null_t_ds  # store as a Dataset
            else:
                # get probability of result under NULL hypothesis if available
                # and don't request tail information
                self.ca.null_prob = self.__null_dist.p(result)

        return result
Esempio n. 14
0
    def test_swaroop_case(self, preallocate_output):
        """Test hdf5 backend to pass results on Swaroop's usecase
        """
        skip_if_no_external('h5py')
        from mvpa2.measures.base import Measure
        class sw_measure(Measure):
            def __init__(self):
                Measure.__init__(self, auto_train=True)
            def _call(self, dataset):
                # For performance measures -- increase to 50-200
                # np.sum here is just to get some meaningful value in
                # them
                #return np.ones(shape=(2, 2))*np.sum(dataset)
                return Dataset(
                    np.array([{'d': np.ones(shape=(5, 5)) * np.sum(dataset)}],
                             dtype=object))
        results = []
        ds = datasets['3dsmall'].copy(deep=True)
        ds.fa['voxel_indices'] = ds.fa.myspace

        our_custom_prefix = tempfile.mktemp()
        for backend in ['native'] + \
                (externals.exists('h5py') and ['hdf5'] or []):
            sl = sphere_searchlight(sw_measure(),
                                    radius=1,
                                    tmp_prefix=our_custom_prefix,
                                    results_backend=backend,
                                    preallocate_output=preallocate_output)
            t0 = time.time()
            results.append(np.asanyarray(sl(ds)))
            # print "Done for backend %s in %d sec" % (backend, time.time() - t0)
        # because of swaroop's ad-hoc (who only could recommend such
        # a construct?) use case, and absent fancy working assert_objectarray_equal
        # let's compare manually
        #assert_objectarray_equal(*results)
        if not externals.exists('h5py'):
            self.assertRaises(RuntimeError,
                              sphere_searchlight,
                              sw_measure(),
                              results_backend='hdf5')
            raise SkipTest('h5py required for test of backend="hdf5"')
        assert_equal(results[0].shape, results[1].shape)
        results = [r.flatten() for r in results]
        for x, y in zip(*results):
            assert_equal(x.keys(), y.keys())
            assert_array_equal(x['d'], y['d'])
        # verify that no junk is left behind
        tempfiles = glob.glob(our_custom_prefix + '*')
        assert_equal(len(tempfiles), 0)
Esempio n. 15
0
    def test_h5support(self):
        sh = (20, 20, 20)
        msk = np.zeros(sh)
        for i in xrange(0, sh[0], 2):
            msk[i, :, :] = 1
        vg = volgeom.VolGeom(sh, np.identity(4), mask=msk)

        density = 20

        outer = surf.generate_sphere(density) * 10. + 5
        inner = surf.generate_sphere(density) * 5. + 5

        intermediate = outer * .5 + inner * .5
        xyz = intermediate.vertices

        radius = 50

        backends = ['native', 'hdf5']

        for i, backend in enumerate(backends):
            if backend == 'hdf5' and not externals.exists('h5py'):
                continue

            sel = surf_voxel_selection.run_voxel_selection(radius, vg, inner,
                            outer, results_backend=backend)

            if i == 0:
                sel0 = sel
            else:
                assert_equal(sel0, sel)
Esempio n. 16
0
    def test_dist_p_value(self):
        """Basic testing of DistPValue"""
        if not externals.exists('scipy'):
            return
        ndb = 200
        ndu = 20
        nperd = 2
        pthr = 0.05
        Nbins = 400

        # Lets generate already normed data (on sphere) and add some nonbogus features
        datau = (np.random.normal(size=(nperd, ndb)))
        dist = np.sqrt((datau * datau).sum(axis=1))

        datas = (datau.T / dist.T).T
        tn = datax = datas[0, :]
        dataxmax = np.max(np.abs(datax))

        # now lets add true positive features
        tp = [-dataxmax * 1.1] * (ndu//2) + [dataxmax * 1.1] * (ndu//2)
        x = np.hstack((datax, tp))

        # lets add just pure normal to it
        x = np.vstack((x, np.random.normal(size=x.shape))).T
        for distPValue in (DistPValue(), DistPValue(fpp=0.05)):
            result = distPValue(x)
            self.assertTrue((result>=0).all)
            self.assertTrue((result<=1).all)

        if cfg.getboolean('tests', 'labile', default='yes'):
            self.assertTrue(distPValue.ca.positives_recovered[0] > 10)
            self.assertTrue((np.array(distPValue.ca.positives_recovered) +
                             np.array(distPValue.ca.nulldist_number) == ndb + ndu).all())
            self.assertEqual(distPValue.ca.positives_recovered[1], 0)
Esempio n. 17
0
File: sg.py Progetto: Anhmike/PyMVPA
    def __init__(self, normalizer_cls=None, normalizer_args=None, **kwargs):
        """
        Parameters
        ----------
        normalizer_cls : sg.Kernel.CKernelNormalizer
          Class to use as a normalizer for the kernel.  Will be instantiated
          upon compute().  Only supported for shogun >= 0.6.5.
          By default (if left None) assigns IdentityKernelNormalizer to assure no
          normalization.
        normalizer_args : None or list
          If necessary, provide a list of arguments for the normalizer.
        """
        SGKernel.__init__(self, **kwargs)
        if (normalizer_cls is not None) and (versions['shogun:rev'] < 3377):
            raise ValueError, \
               "Normalizer specification is supported only for sg >= 0.6.5. " \
               "Please upgrade shogun python modular bindings."

        if normalizer_cls is None and exists('sg ge 0.6.5'):
            normalizer_cls = sgk.IdentityKernelNormalizer
        self._normalizer_cls = normalizer_cls

        if normalizer_args is None:
            normalizer_args = []
        self._normalizer_args = normalizer_args
Esempio n. 18
0
    def test_preallocate_output(self, nblocks):
        ds = datasets['3dsmall'].copy()[:, :25] # smaller copy
        ds.fa['voxel_indices'] = ds.fa.myspace
        ds.fa['feature_id'] = np.arange(ds.nfeatures)

        def measure(ds):
            # return more than one sample
            return np.repeat(ds.fa.feature_id, 10, axis=0)

        nprocs = [1, 2] if externals.exists('pprocess') else [1]
        enable_ca = ['roi_sizes', 'raw_results', 'roi_feature_ids']
        for nproc in nprocs:
            sl = sphere_searchlight(measure,
                                    radius=0,
                                    center_ids=np.arange(ds.nfeatures),
                                    nproc=nproc,
                                    enable_ca=enable_ca,
                                    nblocks=nblocks
                                    )
            sl_inplace = sphere_searchlight(measure,
                                    radius=0,
                                    preallocate_output=True,
                                    center_ids=np.arange(ds.nfeatures),
                                    nproc=nproc,
                                    enable_ca=enable_ca,
                                    nblocks=nblocks
                                    )
            out = sl(ds)
            out_inplace = sl_inplace(ds)

            for c in enable_ca:
                assert_array_equal(sl.ca[c].value, sl_inplace.ca[c].value)
            assert_array_equal(out.samples, out_inplace.samples)
            assert_array_equal(out.fa.center_ids, out_inplace.fa.center_ids)
Esempio n. 19
0
def save(dataset, destination, name=None, compression=None):
    """Save Dataset into HDF5 file

    Parameters
    ----------
    dataset : `Dataset`
    destination : `h5py.highlevel.File` or str
    name : str, optional
    compression : None or int or {'gzip', 'szip', 'lzf'}, optional
      Level of compression for gzip, or another compression strategy.
    """
    if not externals.exists('h5py'):
        raise RuntimeError("Missing 'h5py' package -- saving is not possible.")

    import h5py
    from mvpa2.base.hdf5 import obj2hdf

    # look if we got an hdf file instance already
    if isinstance(destination, h5py.highlevel.File):
        own_file = False
        hdf = destination
    else:
        own_file = True
        hdf = h5py.File(destination, 'w')

    obj2hdf(hdf, dataset, name, compression=compression)

    # if we opened the file ourselves we close it now
    if own_file:
        hdf.close()
    return
Esempio n. 20
0
def skip_if_no_external(dep, ver_dep=None, min_version=None, max_version=None):
    """Raise SkipTest if external is missing

    Parameters
    ----------
    dep : string
      Name of the external
    ver_dep : string, optional
      If for version checking use some different key, e.g. shogun:rev.
      If not specified, `dep` will be used.
    min_version : None or string or tuple
      Minimal required version
    max_version : None or string or tuple
      Maximal required version
    """

    if not externals.exists(dep):
        raise SkipTest, "External %s is not present thus tests battery skipped" % dep

    if ver_dep is None:
        ver_dep = dep

    if min_version is not None and externals.versions[ver_dep] < min_version:
        raise SkipTest, "Minimal version %s of %s is required. Present version is %s" ". Test was skipped." % (
            min_version,
            ver_dep,
            externals.versions[ver_dep],
        )

    if max_version is not None and externals.versions[ver_dep] > max_version:
        raise SkipTest, "Maximal version %s of %s is required. Present version is %s" ". Test was skipped." % (
            min_version,
            ver_dep,
            externals.versions[ver_dep],
        )
Esempio n. 21
0
    def __init__(self, queryengine, roi_ids=None, nproc=None, **kwargs):
        """
        Parameters
        ----------
        queryengine : QueryEngine
          Engine to use to discover the "neighborhood" of each feature.
          See :class:`~mvpa2.misc.neighborhood.QueryEngine`.
        roi_ids : None or list(int) or str
          List of feature ids (not coordinates) the shall serve as ROI seeds
          (e.g. sphere centers). Alternatively, this can be the name of a
          feature attribute of the input dataset, whose non-zero values
          determine the feature ids. By default all features will be used.
        nproc : None or int
          How many processes to use for computation.  Requires `pprocess`
          external module.  If None -- all available cores will be used.
        **kwargs
          In addition this class supports all keyword arguments of its
          base-class :class:`~mvpa2.measures.base.Measure`.
      """
        Measure.__init__(self, **kwargs)

        if nproc is not None and nproc > 1 and not externals.exists('pprocess'):
            raise RuntimeError("The 'pprocess' module is required for "
                               "multiprocess searchlights. Please either "
                               "install python-pprocess, or reduce `nproc` "
                               "to 1 (got nproc=%i)" % nproc)

        self._queryengine = queryengine
        if roi_ids is not None and not isinstance(roi_ids, str) \
                and not len(roi_ids):
            raise ValueError, \
                  "Cannot run searchlight on an empty list of roi_ids"
        self.__roi_ids = roi_ids
        self.nproc = nproc
Esempio n. 22
0
def test_product_flatten():
    nsamples = 17
    product_name_values = [('chan', ['C1', 'C2']),
                         ('freq', np.arange(4, 20, 6)),
                         ('time', np.arange(-200, 800, 200))]

    shape = (nsamples,) + tuple(len(v) for _, v in product_name_values)

    sample_names = ['samp%d' % i for i in xrange(nsamples)]

    # generate random data in four dimensions
    data = np.random.normal(size=shape)
    ds = Dataset(data, sa=dict(sample_names=sample_names))

    # apply flattening to ds
    flattener = ProductFlattenMapper(product_name_values)

    # test I/O (only if h5py is available)
    if externals.exists('h5py'):
        from mvpa2.base.hdf5 import h5save, h5load
        import tempfile
        import os

        _, testfn = tempfile.mkstemp('mapper.h5py', 'test_product')
        h5save(testfn, flattener)
        flattener = h5load(testfn)
        os.unlink(testfn)

    mds = flattener(ds)

    prod = lambda x:reduce(operator.mul, x)

    # ensure the size is ok
    assert_equal(mds.shape, (nsamples,) + (prod(shape[1:]),))

    ndim = len(product_name_values)

    idxs = [range(len(v)) for _, v in product_name_values]
    for si in xrange(nsamples):
        for fi, p in enumerate(itertools.product(*idxs)):
            data_tup = (si,) + p

            x = mds[si, fi]

            # value should match
            assert_equal(data[data_tup], x.samples[0, 0])

            # indices should match as well
            all_idxs = tuple(x.fa['chan_freq_time_indices'].value.ravel())
            assert_equal(p, all_idxs)

            # values and indices in each dimension should match
            for i, (name, value) in enumerate(product_name_values):
                assert_equal(x.fa[name].value, value[p[i]])
                assert_equal(x.fa[name + '_indices'].value, p[i])

    product_name_values += [('foo', [1, 2, 3])]
    flattener = ProductFlattenMapper(product_name_values)
    assert_raises(ValueError, flattener, ds)
Esempio n. 23
0
    def _level3(self, datasets):
        params = self.params            # for quicker access ;)
        # create a mapper per dataset
        mappers = [deepcopy(params.alignment) for ds in datasets]

        # key different from level-2; the common space is uniform
        #temp_commonspace = commonspace
        # Fixing nproc=0
        if params.nproc == 0:
            from mvpa2.base import warning
            warning("nproc of 0 doesn't make sense. Setting nproc to 1.")
            params.nproc = 1
        # Checking for joblib, if not, set nproc to 1
        if params.nproc != 1:
            from mvpa2.base import externals, warning
            if not externals.exists('joblib'):
                warning("Setting nproc different from 1 requires joblib package, which "
                        "does not seem to exist. Setting nproc to 1.")
                params.nproc = 1

        # start from original input datasets again
        if params.nproc == 1:
            residuals = []
            for i, (m, ds_new) in enumerate(zip(mappers, datasets)):
                if __debug__:
                    debug('HPAL_', "Level 3: ds #%i" % i)
                m, residual = get_trained_mapper(ds_new, self.commonspace, m,
                                                 self.ca['residual_errors'].enabled)
                if self.ca['residual_errors'].enabled:
                    residuals.append(residual)
        else:
            if __debug__:
                debug('HPAL_', "Level 3: Using joblib with nproc = %d " % params.nproc)
            verbose_level_parallel = 20 \
                if (__debug__ and 'HPAL' in debug.active) else 0
            from joblib import Parallel, delayed
            import sys
            # joblib's 'multiprocessing' backend has known issues of failure on OSX
            # Tested with MacOS 10.12.13, python 2.7.13, joblib v0.10.3
            if params.joblib_backend is None:
                params.joblib_backend = 'threading' if sys.platform == 'darwin' \
                                        else 'multiprocessing'
            res = Parallel(
                    n_jobs=params.nproc, pre_dispatch=params.nproc,
                    backend=params.joblib_backend,
                    verbose=verbose_level_parallel
                    )(
                        delayed(get_trained_mapper)
                        (ds, self.commonspace, mapper, self.ca['residual_errors'].enabled)
                        for ds, mapper in zip(datasets, mappers)
                    )
            mappers = [m for m, r in res]
            if self.ca['residual_errors'].enabled:
                residuals = [r for m, r in res]

        if self.ca['residual_errors'].enabled:
            self.ca.residual_errors = Dataset(samples=np.array(residuals)[None, :])

        return mappers
Esempio n. 24
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
        else:
            roi_ids = np.arange(dataset.nfeatures)

        # pass to subclass
        results, roi_sizes = self._sl_call(dataset, roi_ids, nproc)

        if not roi_sizes is None:
            self.ca.roi_sizes = roi_sizes

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                mapper.append(StaticFeatureSelection(roi_ids,
                                                     dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
Esempio n. 25
0
    def _wm_reverse(self, data):
        if __debug__:
            debug('MAP', "Converting signal back using DWP")

        if self.__level is None:
            raise NotImplementedError
        else:
            if not externals.exists('pywt wp reconstruct'):
                raise NotImplementedError, \
                      "Reconstruction for a single level for versions of " \
                      "pywt < 0.1.7 (revision 103) is not supported"
            if not externals.exists('pywt wp reconstruct fixed'):
                warning("%s: Reverse mapping with this version of 'pywt' might "
                        "result in incorrect data in the tails of the signal. "
                        "Please check for an update of 'pywt', or be careful "
                        "when interpreting the edges of the reverse mapped "
                        "data." % self.__class__.__name__)
            return self.__reverse_single_level(data)
Esempio n. 26
0
    def _call(self, dataset):
        # This code is based on SciPy's stats.f_oneway()
        # Copyright (c) Gary Strangman.  All rights reserved
        # License: BSD
        #
        # However, it got tweaked and optimized to better fit into PyMVPA.

        # number of groups
        targets_sa = dataset.sa[self.get_space()]
        labels = targets_sa.value
        ul = targets_sa.unique

        na = len(ul)
        bign = float(dataset.nsamples)
        alldata = dataset.samples

        # total squares of sums
        sostot = np.sum(alldata, axis=0)
        sostot *= sostot
        sostot /= bign

        # total sum of squares
        sstot = np.sum(alldata * alldata, axis=0) - sostot

        # between group sum of squares
        ssbn = 0
        for l in ul:
            # all samples for the respective label
            d = alldata[labels == l]
            sos = np.sum(d, axis=0)
            sos *= sos
            ssbn += sos / float(len(d))

        ssbn -= sostot
        # within
        sswn = sstot - ssbn

        # degrees of freedom
        dfbn = na-1
        dfwn = bign - na

        # mean sums of squares
        msb = ssbn / float(dfbn)
        msw = sswn / float(dfwn)
        f = msb / msw
        # assure no NaNs -- otherwise it leads instead of
        # sane unittest failure (check of NaNs) to crazy
        #   File "mtrand.pyx", line 1661, in mtrand.shuffle
        #  TypeError: object of type 'numpy.int64' has no len()
        # without any sane backtrace
        f[np.isnan(f)] = 0

        if externals.exists('scipy'):
            from scipy.stats import fprob
            return Dataset(f[np.newaxis], fa={'fprob': fprob(dfbn, dfwn, f)})
        else:
            return Dataset(f[np.newaxis])
Esempio n. 27
0
    def _acquire_externals(self, out):
        # Test and list all dependencies:
        sdeps = {True: [], False: [], 'Error': []}
        for dep in sorted(externals._KNOWN):
            try:
                sdeps[externals.exists(dep, force=False)] += [dep]
            except:
                sdeps['Error'] += [dep]
        out.write('EXTERNALS:\n')
        out.write(' Present:       %s\n' % ', '.join(sdeps[True]))
        out.write(' Absent:        %s\n' % ', '.join(sdeps[False]))
        if len(sdeps['Error']):
            out.write(' Errors in determining: %s\n' % ', '.join(sdeps['Error']))

        SV = ('.__version__', )              # standard versioning
        out.write(' Versions of critical externals:\n')
        # First the ones known to externals,
        # TODO: make all of them set in externals.versions
        for k, v in externals.versions.iteritems():
            out.write('  %-12s: %s\n' % (k, str(v)))
        for e, mname, fs in (
            ('ctypes', None, SV),
            ('matplotlib', None, SV),
            ('lxml', None, ('.etree.__version__',)),
            ('nifti', None, SV),
            ('numpy', None, SV),
            ('openopt', 'openopt', SV),
            ('openopt', 'scikits.openopt', ('.openopt.__version__',)),
            ('pywt', None, SV),
            #('rpy', None, ('.rpy_version',)),
            ('shogun', None, ('.Classifier.Version_get_version_release()',)),
            ):
            try:
                if not externals.exists(e):
                    continue #sver = 'not present'
                else:
                    if mname is None:
                        mname = e
                    m = __import__(mname)
                    svers = [eval('m%s' % (f,)) for f in fs]
                    sver = ' '.join(svers)
            except Exception, exc:
                sver = 'failed to query due to "%s"' % str(exc)
            out.write('  %-12s: %s\n' % (e, sver))
Esempio n. 28
0
    def __init__(self, space='targets', **kwargs):
        ProjectionMapper.__init__(self, space=space, **kwargs)

        self._scale = None
        """Estimated scale"""
        if self.params.svd == 'dgesvd' and not externals.exists('liblapack.so'):
            warning("Reverting choice of svd for ProcrusteanMapper to be default "
                    "'numpy' since liblapack.so seems not to be available for "
                    "'dgesvd'")
            self.params.svd = 'numpy'
Esempio n. 29
0
    def __init__(self, gnb, generator, qe, errorfx=mean_mismatch_error,
                 indexsum=None, **kwargs):
        """Initialize a GNBSearchlight

        Parameters
        ----------
        gnb : `GNB`
          `GNB` classifier as the specification of what GNB parameters
          to use. Instance itself isn't used.
        generator : `Generator`
          Some `Generator` to prepare partitions for cross-validation.
        errorfx : func, optional
          Functor that computes a scalar error value from the vectors of
          desired and predicted values (e.g. subclass of `ErrorFunction`).
        indexsum : ('sparse', 'fancy'), optional
          What use to compute sums over arbitrary columns.  'fancy'
          corresponds to regular fancy indexing over columns, whenever
          in 'sparse', produce of sparse matrices is used (usually
          faster, so is default if `scipy` is available.
        """

        # init base class first
        BaseSearchlight.__init__(self, qe, **kwargs)

        self._errorfx = errorfx
        self._generator = generator
        self._gnb = gnb

        if indexsum is None:
            if externals.exists('scipy'):
                indexsum = 'sparse'
            else:
                indexsum = 'fancy'
        else:
            if indexsum == 'sparse' and not externals.exists('scipy'):
                warning("Scipy.sparse isn't available so taking 'fancy' as "
                        "'indexsum' method.")
                indexsum = 'fancy'
        self._indexsum = indexsum

        if not self.nproc in (None, 1):
            raise NotImplementedError, "For now only nproc=1 (or None for " \
                  "autodetection) is supported by GNBSearchlight"
Esempio n. 30
0
 def __init__(self, datameasure, queryengine, add_center_fa=False,
              results_backend='native',
              tmp_prefix='tmpsl',
              **kwargs):
     """
     Parameters
     ----------
     datameasure : callable
       Any object that takes a :class:`~mvpa2.datasets.base.Dataset`
       and returns some measure when called.
     add_center_fa : bool or str
       If True or a string, each searchlight ROI dataset will have a boolean
       vector as a feature attribute that indicates the feature that is the
       seed (e.g. sphere center) for the respective ROI. If True, the
       attribute is named 'roi_seed', the provided string is used as the name
       otherwise.
     results_backend : ('native', 'hdf5'), optional
       Specifies the way results are provided back from a processing block
       in case of nproc > 1. 'native' is pickling/unpickling of results by
       pprocess, while 'hdf5' would use h5save/h5load functionality.
       'hdf5' might be more time and memory efficient in some cases.
     tmp_prefix : str, optional
       If specified -- serves as a prefix for temporary files storage
       if results_backend == 'hdf5'.  Thus can specify the directory to use
       (trailing file path separator is not added automagically).
     **kwargs
       In addition this class supports all keyword arguments of its
       base-class :class:`~mvpa2.measures.searchlight.BaseSearchlight`.
     """
     BaseSearchlight.__init__(self, queryengine, **kwargs)
     self.datameasure = datameasure
     self.results_backend = results_backend.lower()
     if self.results_backend == 'hdf5':
         # Assure having hdf5
         externals.exists('h5py', raise_=True)
     self.tmp_prefix = tmp_prefix
     if isinstance(add_center_fa, str):
         self.__add_center_fa = add_center_fa
     elif add_center_fa:
         self.__add_center_fa = 'roi_seed'
     else:
         self.__add_center_fa = False
Esempio n. 31
0
#
# Testing
#

# import the main unittest interface
from mvpa2.tests import run as test

#
# Externals-dependent tune ups
#

# PyMVPA is useless without numpy
# Also, this check enforcing population of externals.versions
# for possible later version checks, hence don't remove
externals.exists('numpy', force=True, raise_=True)
# We might need to suppress the warnings:

# If instructed -- no python or numpy warnings (like ctypes version
# for slmr), e.g. for during doctests
if cfg.getboolean('warnings', 'suppress', default=False):
    import warnings
    warnings.simplefilter('ignore')
    # NumPy
    np.seterr(**dict([(x, 'ignore') for x in np.geterr()]))

if externals.exists('scipy'):
    externals._suppress_scipy_warnings()

# And check if we aren't under IPython so we could pacify completion
# a bit
Esempio n. 32
0
Provides interface to kernels defined in shogun toolbox.  Commonly
used kernels are provided with convenience classes: `LinearSGKernel`,
`RbfSGKernel`, `PolySGKernel`.  If you need to use some other shogun
kernel, use `CustomSGKernel` to define one.
"""

__docformat__ = 'restructuredtext'

import numpy as np

from mvpa2.base.externals import exists, versions
from mvpa2.kernels.base import Kernel
from mvpa2.base.param import Parameter

if exists('shogun', raise_=True):
    import shogun.Kernel as sgk
    from shogun.Features import RealFeatures
else:
    # Just to please sphinx documentation
    class Bogus(object):
        pass

    sgk = Bogus()
    sgk.LinearKernel = None
    sgk.GaussianKernel = None
    sgk.PolyKernel = None

if __debug__:
    from mvpa2.base import debug
Esempio n. 33
0
    def from_hdf5(cls, source, name=None):
        """Load a Dataset from HDF5 file

        Parameters
        ----------
        source : string or h5py.highlevel.File
          Filename or HDF5's File to load dataset from
        name : string, optional
          If file contains multiple entries at the 1st level, if
          provided, `name` specifies the group to be loaded as the
          AttrDataset.

        Returns
        -------
        AttrDataset

        Raises
        ------
        ValueError
        """
        if not externals.exists('h5py'):
            raise RuntimeError(
                "Missing 'h5py' package -- saving is not possible.")

        import h5py
        from mvpa2.base.hdf5 import hdf2obj

        # look if we got an hdf file instance already
        if isinstance(source, h5py.highlevel.File):
            own_file = False
            hdf = source
        else:
            own_file = True
            hdf = h5py.File(source, 'r')

        if not name is None:
            # some HDF5 subset is requested
            if not name in hdf:
                raise ValueError("Cannot find '%s' group in HDF file %s.  "
                                 "File contains groups: %s"
                                 % (name, source, hdf.keys()))

            # access the group that should contain the dataset
            dsgrp = hdf[name]
            res = hdf2obj(dsgrp)
            if not isinstance(res, AttrDataset):
                # TODO: unittest before committing
                raise ValueError, "%r in %s contains %s not a dataset.  " \
                      "File contains groups: %s." \
                      % (name, source, type(res), hdf.keys())
        else:
            # just consider the whole file
            res = hdf2obj(hdf)
            if not isinstance(res, AttrDataset):
                # TODO: unittest before committing
                raise ValueError, "Failed to load a dataset from %s.  " \
                      "Loaded %s instead." \
                      % (source, type(res))
        if own_file:
            hdf.close()
        return res
Esempio n. 34
0
import numpy as np

import os

from mvpa2.support.nibabel import afni_niml as niml
from mvpa2.support.nibabel import afni_niml_dset as niml_dset

from mvpa2.base.collections import SampleAttributesCollection, \
        FeatureAttributesCollection, DatasetAttributesCollection, \
        ArrayCollectable

from mvpa2.base import warning, debug, externals
from mvpa2.datasets.base import Dataset

if externals.exists('h5py'):
    from mvpa2.base.hdf5 import h5save, h5load

_PYMVPA_PREFIX = 'PYMVPA'
_PYMVPA_SEP = '_'


def from_niml_dset(dset, fa_labels=[], sa_labels=[], a_labels=[]):
    '''Convert a NIML dataset to a Dataset
    
    Parameters
    ----------
    dset: dict
        Dictionary with NIML key-value pairs, such as obtained from
        mvpa2.support.nibabel.afni_niml_dset.read()
    fa_labels: list
Esempio n. 35
0
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Fixer for rdist in scipy
"""
# For scipy import
from __future__ import absolute_import

__docformat__ = 'restructuredtext'

from mvpa2.base import externals, warning, cfg

if __debug__:
    from mvpa2.base import debug

if externals.exists('scipy', raise_=True):
    import scipy
    import scipy.stats
    import scipy.stats as stats

if not externals.exists('good scipy.stats.rdist'):
    if __debug__:
        debug("EXT", "Fixing up scipy.stats.rdist")
    # Lets fix it up, future imports of scipy.stats should carry fixed
    # version, isn't python is \emph{evil} ;-)
    import numpy as np

    from scipy.stats.distributions import rv_continuous
    from scipy import special
    import scipy.integrate