def __init__(self, **kwargs): _shpaldebug("Initializing.") ClassWithCollections.__init__(self, **kwargs) self.ndatasets = 0 self.nfeatures = 0 self.projections = None # This option makes the roi_seed in each SL to be selected during feature selection self.force_roi_seed = True if self.params.nproc is not None and self.params.nproc > 1 \ and not externals.exists('pprocess'): raise RuntimeError("The 'pprocess' module is required for " "multiprocess searchlights. Please either " "install python-pprocess, or reduce `nproc` " "to 1 (got nproc=%i) or set to default None" % self.params.nproc) if not externals.exists('scipy'): raise RuntimeError("The 'scipy' module is required for " "searchlight hyperalignment.") if self.params.results_backend == 'native': raise NotImplementedError("'native' mode to handle results is still a " "work in progress.") #warning("results_backend is set to 'native'. This has been known" # "to result in longer run time when working with big datasets.") if self.params.results_backend == 'hdf5' and \ not externals.exists('h5py'): raise RuntimeError("The 'hdf5' module is required for " "when results_backend is set to 'hdf5'")
def __init__(self, sd=0, distribution='rdist', fpp=None, nbins=400, **kwargs): """L2-Norm the values, convert them to p-values of a given distribution. Parameters ---------- sd : int Samples dimension (if len(x.shape)>1) on which to operate distribution : string Which distribution to use. Known are: 'rdist' (later normal should be there as well) fpp : float At what p-value (both tails) if not None, to control for false positives. It would iteratively prune the tails (tentative real positives) until empirical p-value becomes less or equal to numerical. nbins : int Number of bins for the iterative pruning of positives WARNING: Highly experimental/slow/etc: no theoretical grounds have been presented in any paper, nor proven """ externals.exists('scipy', raise_=True) ClassWithCollections.__init__(self, **kwargs) self.sd = sd if not (distribution in ['rdist']): raise ValueError, "Actually only rdist supported at the moment" \ " got %s" % distribution self.distribution = distribution self.fpp = fpp self.nbins = nbins
def _acquire_externals(self, out): # Test and list all dependencies: sdeps = {True: [], False: [], 'Error': []} for dep in sorted(externals._KNOWN): try: sdeps[externals.exists(dep, force=False)] += [dep] except: sdeps['Error'] += [dep] out.write('EXTERNALS:\n') out.write(' Present: %s\n' % ', '.join(sdeps[True])) out.write(' Absent: %s\n' % ', '.join(sdeps[False])) if len(sdeps['Error']): out.write(' Errors in determining: %s\n' % ', '.join(sdeps['Error'])) SV = ('.__version__', ) # standard versioning out.write(' Versions of critical externals:\n') # First the ones known to externals, for k, v in sorted(externals.versions.iteritems()): out.write(' %-12s: %s\n' % (k, str(v))) try: if externals.exists('matplotlib'): import matplotlib out.write(' Matplotlib backend: %s\n' % matplotlib.get_backend()) except Exception, exc: out.write(' Failed to determine backend of matplotlib due to "%s"' % str(exc))
def __init__(self, generator, queryengine, errorfx=mean_mismatch_error, indexsum=None, reuse_neighbors=False, splitter=None, **kwargs): """Initialize the base class for "naive" searchlight classifiers Parameters ---------- generator : `Generator` Some `Generator` to prepare partitions for cross-validation. It must not change "targets", thus e.g. no AttributePermutator's errorfx : func, optional Functor that computes a scalar error value from the vectors of desired and predicted values (e.g. subclass of `ErrorFunction`). indexsum : ('sparse', 'fancy'), optional What use to compute sums over arbitrary columns. 'fancy' corresponds to regular fancy indexing over columns, whenever in 'sparse', product of sparse matrices is used (usually faster, so is default if `scipy` is available). reuse_neighbors : bool, optional Compute neighbors information only once, thus allowing for efficient reuse on subsequent calls where dataset's feature attributes remain the same (e.g. during permutation testing) splitter : Splitter, optional Which will be used to split partitioned datasets. If None specified then standard one operating on partitions will be used """ # init base class first BaseSearchlight.__init__(self, queryengine, **kwargs) self._errorfx = errorfx self._generator = generator self._splitter = splitter # TODO: move into _call since resetting over default None # obscures __repr__ if indexsum is None: if externals.exists('scipy'): indexsum = 'sparse' else: indexsum = 'fancy' else: if indexsum == 'sparse' and not externals.exists('scipy'): warning("Scipy.sparse isn't available so taking 'fancy' as " "'indexsum' method.") indexsum = 'fancy' self._indexsum = indexsum if not self.nproc in (None, 1): raise NotImplementedError, "For now only nproc=1 (or None for " \ "autodetection) is supported by GNBSearchlight" self.__pb = None # statistics per each block/label self.__reuse_neighbors = reuse_neighbors # Storage to be used for neighborhood information self.__roi_fids = None
def _postcall(self, dataset, result): """Some postprocessing on the result """ if self.__null_dist is None: # do base-class postcall and be done result = super(Measure, self)._postcall(dataset, result) else: # don't do a full base-class postcall, only do the # postproc-application here, to gain result compatibility with the # fitted null distribution -- necessary to be able to use # a Node's 'pass_attr' to pick up ca.null_prob result = self._apply_postproc(dataset, result) if self.ca.is_enabled('null_t'): # get probability under NULL hyp, but also request # either it belong to the right tail null_prob, null_right_tail = \ self.__null_dist.p(result, return_tails=True) self.ca.null_prob = null_prob externals.exists('scipy', raise_=True) from scipy.stats import norm # TODO: following logic should appear in NullDist, # not here tail = self.null_dist.tail if tail == 'left': acdf = np.abs(null_prob.samples) elif tail == 'right': acdf = 1.0 - np.abs(null_prob.samples) elif tail in ['any', 'both']: acdf = 1.0 - np.clip(np.abs(null_prob.samples), 0, 0.5) else: raise RuntimeError, 'Unhandled tail %s' % tail # We need to clip to avoid non-informative inf's ;-) # that happens due to lack of precision in mantissa # which is 11 bits in double. We could clip values # around 0 at as low as 1e-100 (correspond to z~=21), # but for consistency lets clip at 1e-16 which leads # to distinguishable value around p=1 and max z=8.2. # Should be sufficient range of z-values ;-) clip = 1e-16 null_t = norm.ppf(np.clip(acdf, clip, 1.0 - clip)) # assure that we deal with arrays: null_t = np.array(null_t, ndmin=1, copy=False) null_t[~null_right_tail] *= -1.0 # revert sign for negatives null_t_ds = null_prob.copy(deep=False) null_t_ds.samples = null_t self.ca.null_t = null_t_ds # store as a Dataset else: # get probability of result under NULL hypothesis if available # and don't request tail information self.ca.null_prob = self.__null_dist.p(result) # now do the second half of postcall and invoke pass_attr result = self._pass_attr(dataset, result) return result
def __init__(self, datameasure, queryengine, add_center_fa=False, results_backend='native', results_fx=None, tmp_prefix='tmpsl', nblocks=None, **kwargs): """ Parameters ---------- datameasure : callable Any object that takes a :class:`~mvpa2.datasets.base.Dataset` and returns some measure when called. add_center_fa : bool or str If True or a string, each searchlight ROI dataset will have a boolean vector as a feature attribute that indicates the feature that is the seed (e.g. sphere center) for the respective ROI. If True, the attribute is named 'roi_seed', the provided string is used as the name otherwise. results_backend : ('native', 'hdf5'), optional Specifies the way results are provided back from a processing block in case of nproc > 1. 'native' is pickling/unpickling of results by pprocess, while 'hdf5' would use h5save/h5load functionality. 'hdf5' might be more time and memory efficient in some cases. results_fx : callable, optional Function to process/combine results of each searchlight block run. By default it would simply append them all into the list. It receives as keyword arguments sl, dataset, roi_ids, and results (iterable of lists). It is the one to take care of assigning roi_* ca's tmp_prefix : str, optional If specified -- serves as a prefix for temporary files storage if results_backend == 'hdf5'. Thus can specify the directory to use (trailing file path separator is not added automagically). nblocks : None or int Into how many blocks to split the computation (could be larger than nproc). If None -- nproc is used. **kwargs In addition this class supports all keyword arguments of its base-class :class:`~mvpa2.measures.searchlight.BaseSearchlight`. """ BaseSearchlight.__init__(self, queryengine, **kwargs) self.datameasure = datameasure self.results_backend = results_backend.lower() if self.results_backend == 'hdf5': # Assure having hdf5 externals.exists('h5py', raise_=True) self.results_fx = Searchlight._concat_results \ if results_fx is None else results_fx self.tmp_prefix = tmp_prefix self.nblocks = nblocks if isinstance(add_center_fa, str): self.__add_center_fa = add_center_fa elif add_center_fa: self.__add_center_fa = 'roi_seed' else: self.__add_center_fa = False
def plot(self): """Plot correlation coefficients """ externals.exists('pylab', raise_=True) import pylab as pl pl.plot(self['corrcoef']) pl.title('Auto-correlation of the sequence') pl.xlabel('Offset') pl.ylabel('Correlation Coefficient') pl.show()
def __init__(self, source): """Reader MEG data from texfiles or file-like objects. Parameters ---------- source : str or file-like Strings are assumed to be filenames (with `.gz` suffix compressed), while all other object types are treated as file-like objects. """ self.ntimepoints = None self.timepoints = None self.nsamples = None self.channelids = [] self.data = [] self.samplingrate = None # open textfiles if isinstance(source, str): if source.endswith(".gz"): externals.exists("gzip", raise_=True) import gzip source = gzip.open(source, "r") else: source = open(source, "r") # read file for line in source: # split ID colon = line.find(":") # ignore lines without id if colon == -1: continue id = line[:colon] data = line[colon + 1 :].strip() if id == "Sample Number": timepoints = np.fromstring(data, dtype=int, sep="\t") # one more as it starts with zero self.ntimepoints = int(timepoints.max()) + 1 self.nsamples = int(len(timepoints) / self.ntimepoints) elif id == "Time": self.timepoints = np.fromstring(data, dtype=float, count=self.ntimepoints, sep="\t") self.samplingrate = self.ntimepoints / (self.timepoints[-1] - self.timepoints[0]) else: # load data self.data.append(np.fromstring(data, dtype=float, sep="\t").reshape(self.nsamples, self.ntimepoints)) # store id self.channelids.append(id) # reshape data from (channels x samples x timepoints) to # (samples x chanels x timepoints) self.data = np.swapaxes(np.array(self.data), 0, 1)
def test_externals_correct2nd_invocation(self): # always fails externals._KNOWN['checker2'] = 'raise ImportError' self.assertTrue(not externals.exists('checker2'), msg="Should be False on 1st invocation") self.assertTrue(not externals.exists('checker2'), msg="Should be False on 2nd invocation as well") externals._KNOWN.pop('checker2')
def plot(self): """Plot correlation coefficients """ externals.exists("pylab", raise_=True) import pylab as pl pl.plot(self["corrcoef"]) pl.title("Auto-correlation of the sequence") pl.xlabel("Offset") pl.ylabel("Correlation Coefficient") pl.show()
def _call(self, dataset): externals.exists('skl', raise_=True) from sklearn.linear_model import Lasso, Ridge from sklearn.preprocessing import scale # first run PDist compute_dsm = PDist(pairwise_metric=self.params.pairwise_metric, center_data=self.params.center_data) dsm = compute_dsm(dataset) dsm_samples = dsm.samples if self.params.rank_data: dsm_samples = rankdata(dsm_samples) predictors = np.apply_along_axis(rankdata, 0, self.predictors) else: predictors = self.predictors if self.params.normalize: predictors = scale(predictors, axis=0) dsm_samples = scale(dsm_samples, axis=0) # keep only the item we want if self.keep_pairs is not None: dsm_samples = dsm_samples[self.keep_pairs] predictors = predictors[self.keep_pairs, :] # check that predictors and samples have the correct dimensions if dsm_samples.shape[0] != predictors.shape[0]: raise ValueError('computed dsm has {0} rows, while predictors have' '{1} rows. Check that predictors have the right' 'shape'.format(dsm_samples.shape[0], predictors.shape[0])) # now fit the regression if self.params.method == 'lasso': reg = Lasso elif self.params.method == 'ridge': reg = Ridge else: raise ValueError('I do not know method {0}'.format(self.params.method)) reg_ = reg(alpha=self.params.alpha, fit_intercept=self.params.fit_intercept) reg_.fit(predictors, dsm_samples) coefs = reg_.coef_.reshape(-1, 1) sa = ['coef' + str(i) for i in range(len(coefs))] if self.params.fit_intercept: coefs = np.vstack((coefs, reg_.intercept_)) sa += ['intercept'] return Dataset(coefs, sa={'coefs': sa})
def test_externals_no_double_invocation(self): # no external should be checking twice (unless specified # explicitely) class Checker(object): """Helper class to increment count of actual checks""" def __init__(self): self.checked = 0 def check(self): self.checked += 1 checker = Checker() externals._KNOWN['checker'] = 'checker.check()' externals.__dict__['checker'] = checker externals.exists('checker') self.assertEqual(checker.checked, 1) externals.exists('checker') self.assertEqual(checker.checked, 1) externals.exists('checker', force=True) self.assertEqual(checker.checked, 2) externals.exists('checker') self.assertEqual(checker.checked, 2) # restore original externals externals.__dict__.pop('checker') externals._KNOWN.pop('checker')
def _postcall(self, dataset, result): """Some postprocessing on the result """ self.ca.raw_results = result # post-processing result = super(Measure, self)._postcall(dataset, result) if not self.__null_dist is None: if self.ca.is_enabled("null_t"): # get probability under NULL hyp, but also request # either it belong to the right tail null_prob, null_right_tail = self.__null_dist.p(result, return_tails=True) self.ca.null_prob = null_prob externals.exists("scipy", raise_=True) from scipy.stats import norm # TODO: following logic should appear in NullDist, # not here tail = self.null_dist.tail if tail == "left": acdf = np.abs(null_prob.samples) elif tail == "right": acdf = 1.0 - np.abs(null_prob.samples) elif tail in ["any", "both"]: acdf = 1.0 - np.clip(np.abs(null_prob.samples), 0, 0.5) else: raise RuntimeError, "Unhandled tail %s" % tail # We need to clip to avoid non-informative inf's ;-) # that happens due to lack of precision in mantissa # which is 11 bits in double. We could clip values # around 0 at as low as 1e-100 (correspond to z~=21), # but for consistency lets clip at 1e-16 which leads # to distinguishable value around p=1 and max z=8.2. # Should be sufficient range of z-values ;-) clip = 1e-16 null_t = norm.ppf(np.clip(acdf, clip, 1.0 - clip)) # assure that we deal with arrays: null_t = np.array(null_t, ndmin=1, copy=False) null_t[~null_right_tail] *= -1.0 # revert sign for negatives null_t_ds = null_prob.copy(deep=False) null_t_ds.samples = null_t self.ca.null_t = null_t_ds # store as a Dataset else: # get probability of result under NULL hypothesis if available # and don't request tail information self.ca.null_prob = self.__null_dist.p(result) return result
def test_swaroop_case(self, preallocate_output): """Test hdf5 backend to pass results on Swaroop's usecase """ skip_if_no_external('h5py') from mvpa2.measures.base import Measure class sw_measure(Measure): def __init__(self): Measure.__init__(self, auto_train=True) def _call(self, dataset): # For performance measures -- increase to 50-200 # np.sum here is just to get some meaningful value in # them #return np.ones(shape=(2, 2))*np.sum(dataset) return Dataset( np.array([{'d': np.ones(shape=(5, 5)) * np.sum(dataset)}], dtype=object)) results = [] ds = datasets['3dsmall'].copy(deep=True) ds.fa['voxel_indices'] = ds.fa.myspace our_custom_prefix = tempfile.mktemp() for backend in ['native'] + \ (externals.exists('h5py') and ['hdf5'] or []): sl = sphere_searchlight(sw_measure(), radius=1, tmp_prefix=our_custom_prefix, results_backend=backend, preallocate_output=preallocate_output) t0 = time.time() results.append(np.asanyarray(sl(ds))) # print "Done for backend %s in %d sec" % (backend, time.time() - t0) # because of swaroop's ad-hoc (who only could recommend such # a construct?) use case, and absent fancy working assert_objectarray_equal # let's compare manually #assert_objectarray_equal(*results) if not externals.exists('h5py'): self.assertRaises(RuntimeError, sphere_searchlight, sw_measure(), results_backend='hdf5') raise SkipTest('h5py required for test of backend="hdf5"') assert_equal(results[0].shape, results[1].shape) results = [r.flatten() for r in results] for x, y in zip(*results): assert_equal(x.keys(), y.keys()) assert_array_equal(x['d'], y['d']) # verify that no junk is left behind tempfiles = glob.glob(our_custom_prefix + '*') assert_equal(len(tempfiles), 0)
def test_h5support(self): sh = (20, 20, 20) msk = np.zeros(sh) for i in xrange(0, sh[0], 2): msk[i, :, :] = 1 vg = volgeom.VolGeom(sh, np.identity(4), mask=msk) density = 20 outer = surf.generate_sphere(density) * 10. + 5 inner = surf.generate_sphere(density) * 5. + 5 intermediate = outer * .5 + inner * .5 xyz = intermediate.vertices radius = 50 backends = ['native', 'hdf5'] for i, backend in enumerate(backends): if backend == 'hdf5' and not externals.exists('h5py'): continue sel = surf_voxel_selection.run_voxel_selection(radius, vg, inner, outer, results_backend=backend) if i == 0: sel0 = sel else: assert_equal(sel0, sel)
def test_dist_p_value(self): """Basic testing of DistPValue""" if not externals.exists('scipy'): return ndb = 200 ndu = 20 nperd = 2 pthr = 0.05 Nbins = 400 # Lets generate already normed data (on sphere) and add some nonbogus features datau = (np.random.normal(size=(nperd, ndb))) dist = np.sqrt((datau * datau).sum(axis=1)) datas = (datau.T / dist.T).T tn = datax = datas[0, :] dataxmax = np.max(np.abs(datax)) # now lets add true positive features tp = [-dataxmax * 1.1] * (ndu//2) + [dataxmax * 1.1] * (ndu//2) x = np.hstack((datax, tp)) # lets add just pure normal to it x = np.vstack((x, np.random.normal(size=x.shape))).T for distPValue in (DistPValue(), DistPValue(fpp=0.05)): result = distPValue(x) self.assertTrue((result>=0).all) self.assertTrue((result<=1).all) if cfg.getboolean('tests', 'labile', default='yes'): self.assertTrue(distPValue.ca.positives_recovered[0] > 10) self.assertTrue((np.array(distPValue.ca.positives_recovered) + np.array(distPValue.ca.nulldist_number) == ndb + ndu).all()) self.assertEqual(distPValue.ca.positives_recovered[1], 0)
def __init__(self, normalizer_cls=None, normalizer_args=None, **kwargs): """ Parameters ---------- normalizer_cls : sg.Kernel.CKernelNormalizer Class to use as a normalizer for the kernel. Will be instantiated upon compute(). Only supported for shogun >= 0.6.5. By default (if left None) assigns IdentityKernelNormalizer to assure no normalization. normalizer_args : None or list If necessary, provide a list of arguments for the normalizer. """ SGKernel.__init__(self, **kwargs) if (normalizer_cls is not None) and (versions['shogun:rev'] < 3377): raise ValueError, \ "Normalizer specification is supported only for sg >= 0.6.5. " \ "Please upgrade shogun python modular bindings." if normalizer_cls is None and exists('sg ge 0.6.5'): normalizer_cls = sgk.IdentityKernelNormalizer self._normalizer_cls = normalizer_cls if normalizer_args is None: normalizer_args = [] self._normalizer_args = normalizer_args
def test_preallocate_output(self, nblocks): ds = datasets['3dsmall'].copy()[:, :25] # smaller copy ds.fa['voxel_indices'] = ds.fa.myspace ds.fa['feature_id'] = np.arange(ds.nfeatures) def measure(ds): # return more than one sample return np.repeat(ds.fa.feature_id, 10, axis=0) nprocs = [1, 2] if externals.exists('pprocess') else [1] enable_ca = ['roi_sizes', 'raw_results', 'roi_feature_ids'] for nproc in nprocs: sl = sphere_searchlight(measure, radius=0, center_ids=np.arange(ds.nfeatures), nproc=nproc, enable_ca=enable_ca, nblocks=nblocks ) sl_inplace = sphere_searchlight(measure, radius=0, preallocate_output=True, center_ids=np.arange(ds.nfeatures), nproc=nproc, enable_ca=enable_ca, nblocks=nblocks ) out = sl(ds) out_inplace = sl_inplace(ds) for c in enable_ca: assert_array_equal(sl.ca[c].value, sl_inplace.ca[c].value) assert_array_equal(out.samples, out_inplace.samples) assert_array_equal(out.fa.center_ids, out_inplace.fa.center_ids)
def save(dataset, destination, name=None, compression=None): """Save Dataset into HDF5 file Parameters ---------- dataset : `Dataset` destination : `h5py.highlevel.File` or str name : str, optional compression : None or int or {'gzip', 'szip', 'lzf'}, optional Level of compression for gzip, or another compression strategy. """ if not externals.exists('h5py'): raise RuntimeError("Missing 'h5py' package -- saving is not possible.") import h5py from mvpa2.base.hdf5 import obj2hdf # look if we got an hdf file instance already if isinstance(destination, h5py.highlevel.File): own_file = False hdf = destination else: own_file = True hdf = h5py.File(destination, 'w') obj2hdf(hdf, dataset, name, compression=compression) # if we opened the file ourselves we close it now if own_file: hdf.close() return
def skip_if_no_external(dep, ver_dep=None, min_version=None, max_version=None): """Raise SkipTest if external is missing Parameters ---------- dep : string Name of the external ver_dep : string, optional If for version checking use some different key, e.g. shogun:rev. If not specified, `dep` will be used. min_version : None or string or tuple Minimal required version max_version : None or string or tuple Maximal required version """ if not externals.exists(dep): raise SkipTest, "External %s is not present thus tests battery skipped" % dep if ver_dep is None: ver_dep = dep if min_version is not None and externals.versions[ver_dep] < min_version: raise SkipTest, "Minimal version %s of %s is required. Present version is %s" ". Test was skipped." % ( min_version, ver_dep, externals.versions[ver_dep], ) if max_version is not None and externals.versions[ver_dep] > max_version: raise SkipTest, "Maximal version %s of %s is required. Present version is %s" ". Test was skipped." % ( min_version, ver_dep, externals.versions[ver_dep], )
def __init__(self, queryengine, roi_ids=None, nproc=None, **kwargs): """ Parameters ---------- queryengine : QueryEngine Engine to use to discover the "neighborhood" of each feature. See :class:`~mvpa2.misc.neighborhood.QueryEngine`. roi_ids : None or list(int) or str List of feature ids (not coordinates) the shall serve as ROI seeds (e.g. sphere centers). Alternatively, this can be the name of a feature attribute of the input dataset, whose non-zero values determine the feature ids. By default all features will be used. nproc : None or int How many processes to use for computation. Requires `pprocess` external module. If None -- all available cores will be used. **kwargs In addition this class supports all keyword arguments of its base-class :class:`~mvpa2.measures.base.Measure`. """ Measure.__init__(self, **kwargs) if nproc is not None and nproc > 1 and not externals.exists('pprocess'): raise RuntimeError("The 'pprocess' module is required for " "multiprocess searchlights. Please either " "install python-pprocess, or reduce `nproc` " "to 1 (got nproc=%i)" % nproc) self._queryengine = queryengine if roi_ids is not None and not isinstance(roi_ids, str) \ and not len(roi_ids): raise ValueError, \ "Cannot run searchlight on an empty list of roi_ids" self.__roi_ids = roi_ids self.nproc = nproc
def test_product_flatten(): nsamples = 17 product_name_values = [('chan', ['C1', 'C2']), ('freq', np.arange(4, 20, 6)), ('time', np.arange(-200, 800, 200))] shape = (nsamples,) + tuple(len(v) for _, v in product_name_values) sample_names = ['samp%d' % i for i in xrange(nsamples)] # generate random data in four dimensions data = np.random.normal(size=shape) ds = Dataset(data, sa=dict(sample_names=sample_names)) # apply flattening to ds flattener = ProductFlattenMapper(product_name_values) # test I/O (only if h5py is available) if externals.exists('h5py'): from mvpa2.base.hdf5 import h5save, h5load import tempfile import os _, testfn = tempfile.mkstemp('mapper.h5py', 'test_product') h5save(testfn, flattener) flattener = h5load(testfn) os.unlink(testfn) mds = flattener(ds) prod = lambda x:reduce(operator.mul, x) # ensure the size is ok assert_equal(mds.shape, (nsamples,) + (prod(shape[1:]),)) ndim = len(product_name_values) idxs = [range(len(v)) for _, v in product_name_values] for si in xrange(nsamples): for fi, p in enumerate(itertools.product(*idxs)): data_tup = (si,) + p x = mds[si, fi] # value should match assert_equal(data[data_tup], x.samples[0, 0]) # indices should match as well all_idxs = tuple(x.fa['chan_freq_time_indices'].value.ravel()) assert_equal(p, all_idxs) # values and indices in each dimension should match for i, (name, value) in enumerate(product_name_values): assert_equal(x.fa[name].value, value[p[i]]) assert_equal(x.fa[name + '_indices'].value, p[i]) product_name_values += [('foo', [1, 2, 3])] flattener = ProductFlattenMapper(product_name_values) assert_raises(ValueError, flattener, ds)
def _level3(self, datasets): params = self.params # for quicker access ;) # create a mapper per dataset mappers = [deepcopy(params.alignment) for ds in datasets] # key different from level-2; the common space is uniform #temp_commonspace = commonspace # Fixing nproc=0 if params.nproc == 0: from mvpa2.base import warning warning("nproc of 0 doesn't make sense. Setting nproc to 1.") params.nproc = 1 # Checking for joblib, if not, set nproc to 1 if params.nproc != 1: from mvpa2.base import externals, warning if not externals.exists('joblib'): warning("Setting nproc different from 1 requires joblib package, which " "does not seem to exist. Setting nproc to 1.") params.nproc = 1 # start from original input datasets again if params.nproc == 1: residuals = [] for i, (m, ds_new) in enumerate(zip(mappers, datasets)): if __debug__: debug('HPAL_', "Level 3: ds #%i" % i) m, residual = get_trained_mapper(ds_new, self.commonspace, m, self.ca['residual_errors'].enabled) if self.ca['residual_errors'].enabled: residuals.append(residual) else: if __debug__: debug('HPAL_', "Level 3: Using joblib with nproc = %d " % params.nproc) verbose_level_parallel = 20 \ if (__debug__ and 'HPAL' in debug.active) else 0 from joblib import Parallel, delayed import sys # joblib's 'multiprocessing' backend has known issues of failure on OSX # Tested with MacOS 10.12.13, python 2.7.13, joblib v0.10.3 if params.joblib_backend is None: params.joblib_backend = 'threading' if sys.platform == 'darwin' \ else 'multiprocessing' res = Parallel( n_jobs=params.nproc, pre_dispatch=params.nproc, backend=params.joblib_backend, verbose=verbose_level_parallel )( delayed(get_trained_mapper) (ds, self.commonspace, mapper, self.ca['residual_errors'].enabled) for ds, mapper in zip(datasets, mappers) ) mappers = [m for m, r in res] if self.ca['residual_errors'].enabled: residuals = [r for m, r in res] if self.ca['residual_errors'].enabled: self.ca.residual_errors = Dataset(samples=np.array(residuals)[None, :]) return mappers
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: if max(roi_ids) >= dataset.nfeatures: raise IndexError, \ "Maximal center_id found is %s whenever given " \ "dataset has only %d features" \ % (max(roi_ids), dataset.nfeatures) else: roi_ids = np.arange(dataset.nfeatures) # pass to subclass results, roi_sizes = self._sl_call(dataset, roi_ids, nproc) if not roi_sizes is None: self.ca.roi_sizes = roi_sizes if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) mapper.append(StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:])) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def _wm_reverse(self, data): if __debug__: debug('MAP', "Converting signal back using DWP") if self.__level is None: raise NotImplementedError else: if not externals.exists('pywt wp reconstruct'): raise NotImplementedError, \ "Reconstruction for a single level for versions of " \ "pywt < 0.1.7 (revision 103) is not supported" if not externals.exists('pywt wp reconstruct fixed'): warning("%s: Reverse mapping with this version of 'pywt' might " "result in incorrect data in the tails of the signal. " "Please check for an update of 'pywt', or be careful " "when interpreting the edges of the reverse mapped " "data." % self.__class__.__name__) return self.__reverse_single_level(data)
def _call(self, dataset): # This code is based on SciPy's stats.f_oneway() # Copyright (c) Gary Strangman. All rights reserved # License: BSD # # However, it got tweaked and optimized to better fit into PyMVPA. # number of groups targets_sa = dataset.sa[self.get_space()] labels = targets_sa.value ul = targets_sa.unique na = len(ul) bign = float(dataset.nsamples) alldata = dataset.samples # total squares of sums sostot = np.sum(alldata, axis=0) sostot *= sostot sostot /= bign # total sum of squares sstot = np.sum(alldata * alldata, axis=0) - sostot # between group sum of squares ssbn = 0 for l in ul: # all samples for the respective label d = alldata[labels == l] sos = np.sum(d, axis=0) sos *= sos ssbn += sos / float(len(d)) ssbn -= sostot # within sswn = sstot - ssbn # degrees of freedom dfbn = na-1 dfwn = bign - na # mean sums of squares msb = ssbn / float(dfbn) msw = sswn / float(dfwn) f = msb / msw # assure no NaNs -- otherwise it leads instead of # sane unittest failure (check of NaNs) to crazy # File "mtrand.pyx", line 1661, in mtrand.shuffle # TypeError: object of type 'numpy.int64' has no len() # without any sane backtrace f[np.isnan(f)] = 0 if externals.exists('scipy'): from scipy.stats import fprob return Dataset(f[np.newaxis], fa={'fprob': fprob(dfbn, dfwn, f)}) else: return Dataset(f[np.newaxis])
def _acquire_externals(self, out): # Test and list all dependencies: sdeps = {True: [], False: [], 'Error': []} for dep in sorted(externals._KNOWN): try: sdeps[externals.exists(dep, force=False)] += [dep] except: sdeps['Error'] += [dep] out.write('EXTERNALS:\n') out.write(' Present: %s\n' % ', '.join(sdeps[True])) out.write(' Absent: %s\n' % ', '.join(sdeps[False])) if len(sdeps['Error']): out.write(' Errors in determining: %s\n' % ', '.join(sdeps['Error'])) SV = ('.__version__', ) # standard versioning out.write(' Versions of critical externals:\n') # First the ones known to externals, # TODO: make all of them set in externals.versions for k, v in externals.versions.iteritems(): out.write(' %-12s: %s\n' % (k, str(v))) for e, mname, fs in ( ('ctypes', None, SV), ('matplotlib', None, SV), ('lxml', None, ('.etree.__version__',)), ('nifti', None, SV), ('numpy', None, SV), ('openopt', 'openopt', SV), ('openopt', 'scikits.openopt', ('.openopt.__version__',)), ('pywt', None, SV), #('rpy', None, ('.rpy_version',)), ('shogun', None, ('.Classifier.Version_get_version_release()',)), ): try: if not externals.exists(e): continue #sver = 'not present' else: if mname is None: mname = e m = __import__(mname) svers = [eval('m%s' % (f,)) for f in fs] sver = ' '.join(svers) except Exception, exc: sver = 'failed to query due to "%s"' % str(exc) out.write(' %-12s: %s\n' % (e, sver))
def __init__(self, space='targets', **kwargs): ProjectionMapper.__init__(self, space=space, **kwargs) self._scale = None """Estimated scale""" if self.params.svd == 'dgesvd' and not externals.exists('liblapack.so'): warning("Reverting choice of svd for ProcrusteanMapper to be default " "'numpy' since liblapack.so seems not to be available for " "'dgesvd'") self.params.svd = 'numpy'
def __init__(self, gnb, generator, qe, errorfx=mean_mismatch_error, indexsum=None, **kwargs): """Initialize a GNBSearchlight Parameters ---------- gnb : `GNB` `GNB` classifier as the specification of what GNB parameters to use. Instance itself isn't used. generator : `Generator` Some `Generator` to prepare partitions for cross-validation. errorfx : func, optional Functor that computes a scalar error value from the vectors of desired and predicted values (e.g. subclass of `ErrorFunction`). indexsum : ('sparse', 'fancy'), optional What use to compute sums over arbitrary columns. 'fancy' corresponds to regular fancy indexing over columns, whenever in 'sparse', produce of sparse matrices is used (usually faster, so is default if `scipy` is available. """ # init base class first BaseSearchlight.__init__(self, qe, **kwargs) self._errorfx = errorfx self._generator = generator self._gnb = gnb if indexsum is None: if externals.exists('scipy'): indexsum = 'sparse' else: indexsum = 'fancy' else: if indexsum == 'sparse' and not externals.exists('scipy'): warning("Scipy.sparse isn't available so taking 'fancy' as " "'indexsum' method.") indexsum = 'fancy' self._indexsum = indexsum if not self.nproc in (None, 1): raise NotImplementedError, "For now only nproc=1 (or None for " \ "autodetection) is supported by GNBSearchlight"
def __init__(self, datameasure, queryengine, add_center_fa=False, results_backend='native', tmp_prefix='tmpsl', **kwargs): """ Parameters ---------- datameasure : callable Any object that takes a :class:`~mvpa2.datasets.base.Dataset` and returns some measure when called. add_center_fa : bool or str If True or a string, each searchlight ROI dataset will have a boolean vector as a feature attribute that indicates the feature that is the seed (e.g. sphere center) for the respective ROI. If True, the attribute is named 'roi_seed', the provided string is used as the name otherwise. results_backend : ('native', 'hdf5'), optional Specifies the way results are provided back from a processing block in case of nproc > 1. 'native' is pickling/unpickling of results by pprocess, while 'hdf5' would use h5save/h5load functionality. 'hdf5' might be more time and memory efficient in some cases. tmp_prefix : str, optional If specified -- serves as a prefix for temporary files storage if results_backend == 'hdf5'. Thus can specify the directory to use (trailing file path separator is not added automagically). **kwargs In addition this class supports all keyword arguments of its base-class :class:`~mvpa2.measures.searchlight.BaseSearchlight`. """ BaseSearchlight.__init__(self, queryengine, **kwargs) self.datameasure = datameasure self.results_backend = results_backend.lower() if self.results_backend == 'hdf5': # Assure having hdf5 externals.exists('h5py', raise_=True) self.tmp_prefix = tmp_prefix if isinstance(add_center_fa, str): self.__add_center_fa = add_center_fa elif add_center_fa: self.__add_center_fa = 'roi_seed' else: self.__add_center_fa = False
# # Testing # # import the main unittest interface from mvpa2.tests import run as test # # Externals-dependent tune ups # # PyMVPA is useless without numpy # Also, this check enforcing population of externals.versions # for possible later version checks, hence don't remove externals.exists('numpy', force=True, raise_=True) # We might need to suppress the warnings: # If instructed -- no python or numpy warnings (like ctypes version # for slmr), e.g. for during doctests if cfg.getboolean('warnings', 'suppress', default=False): import warnings warnings.simplefilter('ignore') # NumPy np.seterr(**dict([(x, 'ignore') for x in np.geterr()])) if externals.exists('scipy'): externals._suppress_scipy_warnings() # And check if we aren't under IPython so we could pacify completion # a bit
Provides interface to kernels defined in shogun toolbox. Commonly used kernels are provided with convenience classes: `LinearSGKernel`, `RbfSGKernel`, `PolySGKernel`. If you need to use some other shogun kernel, use `CustomSGKernel` to define one. """ __docformat__ = 'restructuredtext' import numpy as np from mvpa2.base.externals import exists, versions from mvpa2.kernels.base import Kernel from mvpa2.base.param import Parameter if exists('shogun', raise_=True): import shogun.Kernel as sgk from shogun.Features import RealFeatures else: # Just to please sphinx documentation class Bogus(object): pass sgk = Bogus() sgk.LinearKernel = None sgk.GaussianKernel = None sgk.PolyKernel = None if __debug__: from mvpa2.base import debug
def from_hdf5(cls, source, name=None): """Load a Dataset from HDF5 file Parameters ---------- source : string or h5py.highlevel.File Filename or HDF5's File to load dataset from name : string, optional If file contains multiple entries at the 1st level, if provided, `name` specifies the group to be loaded as the AttrDataset. Returns ------- AttrDataset Raises ------ ValueError """ if not externals.exists('h5py'): raise RuntimeError( "Missing 'h5py' package -- saving is not possible.") import h5py from mvpa2.base.hdf5 import hdf2obj # look if we got an hdf file instance already if isinstance(source, h5py.highlevel.File): own_file = False hdf = source else: own_file = True hdf = h5py.File(source, 'r') if not name is None: # some HDF5 subset is requested if not name in hdf: raise ValueError("Cannot find '%s' group in HDF file %s. " "File contains groups: %s" % (name, source, hdf.keys())) # access the group that should contain the dataset dsgrp = hdf[name] res = hdf2obj(dsgrp) if not isinstance(res, AttrDataset): # TODO: unittest before committing raise ValueError, "%r in %s contains %s not a dataset. " \ "File contains groups: %s." \ % (name, source, type(res), hdf.keys()) else: # just consider the whole file res = hdf2obj(hdf) if not isinstance(res, AttrDataset): # TODO: unittest before committing raise ValueError, "Failed to load a dataset from %s. " \ "Loaded %s instead." \ % (source, type(res)) if own_file: hdf.close() return res
import numpy as np import os from mvpa2.support.nibabel import afni_niml as niml from mvpa2.support.nibabel import afni_niml_dset as niml_dset from mvpa2.base.collections import SampleAttributesCollection, \ FeatureAttributesCollection, DatasetAttributesCollection, \ ArrayCollectable from mvpa2.base import warning, debug, externals from mvpa2.datasets.base import Dataset if externals.exists('h5py'): from mvpa2.base.hdf5 import h5save, h5load _PYMVPA_PREFIX = 'PYMVPA' _PYMVPA_SEP = '_' def from_niml_dset(dset, fa_labels=[], sa_labels=[], a_labels=[]): '''Convert a NIML dataset to a Dataset Parameters ---------- dset: dict Dictionary with NIML key-value pairs, such as obtained from mvpa2.support.nibabel.afni_niml_dset.read() fa_labels: list
# copyright and license terms. # ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Fixer for rdist in scipy """ # For scipy import from __future__ import absolute_import __docformat__ = 'restructuredtext' from mvpa2.base import externals, warning, cfg if __debug__: from mvpa2.base import debug if externals.exists('scipy', raise_=True): import scipy import scipy.stats import scipy.stats as stats if not externals.exists('good scipy.stats.rdist'): if __debug__: debug("EXT", "Fixing up scipy.stats.rdist") # Lets fix it up, future imports of scipy.stats should carry fixed # version, isn't python is \emph{evil} ;-) import numpy as np from scipy.stats.distributions import rv_continuous from scipy import special import scipy.integrate