def apply_falign(ds, ha): subjects = ds.sa['subject_id'].unique rois = ds.fa['annotation'].unique #FIXME: roi cannot be a fa hemis = ds.fa['hemi'].unique rds = ds.copy() sds = [] for subject in subjects: rds = [] for roi in rois: hds = [] for hemi in hemis: select = ({ 'subject_id': [subject] }, { 'annotation': [roi], 'hemi': [hemi] }) mds = ha[hemi][roi][subject].forward(ds[select]) mds.fa['annotation'] = ds[select].fa['annotation'] mds.fa['hemi'] = ds[select].fa['hemi'] hds.append(mds) rds.append(hstack(hds)) sds.append(hstack(rds)) return vstack(sds)
def results_fx(sl=None, dataset=None, roi_ids=None, results=None): """It will "process" the results by removing those files generated inside the measure """ res = [] print_("READY") for x in results: ok_(isinstance(x, list)) res.append(x) print_("R: ", x) for r in x: # Can happen if we requested those .ca's enabled # -- then automagically _proc_block would wrap # results in a dataset... Originally detected by # running with MVPA_DEBUG=.* which triggered # enabling all ca's if is_datasetlike(r): r = np.asscalar(r.samples) os.unlink(r) # remove generated file print_("WAITING") results_ds = hstack(sum(res, [])) # store the center ids as a feature attribute since we use # them for testing results_ds.fa['center_ids'] = roi_ids return results_ds
def _sl_call(self, dataset, roi_ids, nproc): """Classical generic searchlight implementation """ assert(self.results_backend in ('native', 'hdf5')) # compute if nproc is not None and nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), nproc) roi_blocks = np.array_split(roi_ids, nproc_needed) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug('SLC', "Starting off child processes for nproc=%i" % nproc_needed) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) for iblock, block in enumerate(roi_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, dataset, copy.copy(self.__datameasure), iblock=iblock) # collect results results = [] if self.ca.is_enabled('roi_sizes'): roi_sizes = [] else: roi_sizes = None for r, rsizes in p_results: results += self.__handle_results(r) if not roi_sizes is None: roi_sizes += rsizes else: # otherwise collect the results in a list results, roi_sizes = \ self._proc_block(roi_ids, dataset, self.__datameasure) results = self.__handle_results(results) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A' debug('SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack result_ds = hstack(results) if self.ca.is_enabled('roi_feature_ids'): self.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results] if __debug__: debug('SLC', " hstacked shape %s" % (result_ds.shape,)) return result_ds, roi_sizes
def _concat_results(sl=None, dataset=None, roi_ids=None, results=None): """The simplest implementation for collecting the results -- just put them into a list This this implementation simply collects them into a list and uses only sl. for assigning conditional attributes. But custom implementation might make use of more/less of them. Implemented as @staticmethod just to emphasize that in principle it is independent of the actual searchlight instance """ # collect results results = sum(results, []) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A' debug('SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack result_ds = hstack(results) if __debug__: debug('SLC', " hstacked shape %s" % (result_ds.shape,)) if sl.ca.is_enabled('roi_feature_ids'): sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results] if sl.ca.is_enabled('roi_sizes'): sl.ca.roi_sizes = [r.a.roi_sizes for r in results] if sl.ca.is_enabled('roi_center_ids'): sl.ca.roi_center_ids = [r.a.roi_center_ids for r in results] if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if roi_ids is None: result_ds.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) # NNO if the orignal mapper has no append (because it's not a # chainmapper, for example), we make our own chainmapper. feat_sel_mapper = StaticFeatureSelection( roi_ids, dshape=dataset.shape[1:]) if hasattr(mapper, 'append'): mapper.append(feat_sel_mapper) else: mapper = ChainMapper([dataset.a.mapper, feat_sel_mapper]) result_ds.a['mapper'] = mapper # store the center ids as a feature attribute result_ds.fa['center_ids'] = roi_ids return result_ds
def _concat_results(sl=None, dataset=None, roi_ids=None, results=None): """The simplest implementation for collecting the results -- just put them into a list This this implementation simply collects them into a list and uses only sl. for assigning conditional attributes. But custom implementation might make use of more/less of them. Implemented as @staticmethod just to emphasize that in principle it is independent of the actual searchlight instance """ # collect results results = sum(results, []) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A' debug('SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack result_ds = hstack(results) if __debug__: debug('SLC', " hstacked shape %s" % (result_ds.shape,)) if sl.ca.is_enabled('roi_feature_ids'): sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results] if sl.ca.is_enabled('roi_sizes'): sl.ca.roi_sizes = [r.a.roi_sizes for r in results] if sl.ca.is_enabled('roi_center_ids'): sl.ca.roi_center_ids = [r.a.roi_center_ids for r in results] return result_ds
def _concat_results(sl=None, dataset=None, roi_ids=None, results=None): """The simplest implementation for collecting the results -- just put them into a list This this implementation simply collects them into a list and uses only sl. for assigning conditional attributes. But custom implementation might make use of more/less of them. Implemented as @staticmethod just to emphasize that in principle it is independent of the actual searchlight instance """ # collect results results = sum(results, []) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray( results[0]).shape or 'N/A' debug( 'SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack result_ds = hstack(results) if __debug__: debug('SLC', " hstacked shape %s" % (result_ds.shape, )) if sl.ca.is_enabled('roi_feature_ids'): sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results] if sl.ca.is_enabled('roi_sizes'): sl.ca.roi_sizes = [r.a.roi_sizes for r in results] return result_ds
def test_surf_ring_queryengine(self): s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5) # add second layer s2 = surf.merge(s, (s + (.01, 0, 0))) ds = Dataset(samples=np.arange(20)[np.newaxis], fa=dict(node_indices=np.arange(39, 0, -2))) # add more features (with shared node indices) ds3 = hstack((ds, ds, ds)) radius = 2.5 inner_radius = 1.0 # Makes sure it raises error if inner_radius is >= radius assert_raises(ValueError, lambda: queryengine.SurfaceRingQueryEngine(surface=s2, inner_radius=2.5, radius=radius)) distance_metrics = ('euclidean', 'dijkstra', 'euclidean', 'dijkstra') for distance_metric, include_center in zip(distance_metrics, [True, False]*2): qe = queryengine.SurfaceRingQueryEngine(surface=s2, radius=radius, inner_radius=inner_radius, distance_metric=distance_metric, include_center=include_center) # untrained qe should give errors assert_raises(ValueError, lambda: qe.ids) assert_raises(ValueError, lambda: qe.query_byid(0)) # node index out of bounds should give error ds_ = ds.copy() ds_.fa.node_indices[0] = 100 assert_raises(ValueError, lambda: qe.train(ds_)) # lack of node indices should give error ds_.fa.pop('node_indices') assert_raises(ValueError, lambda: qe.train(ds_)) # train the qe qe.train(ds3) for node in np.arange(-1, s2.nvertices + 1): if node < 0 or node >= s2.nvertices: assert_raises(KeyError, lambda: qe.query_byid(node)) continue feature_ids = np.asarray(qe.query_byid(node)) # node indices relative to ds base_ids = feature_ids[feature_ids < 20] # should have multiples of 20 assert_equal(set(feature_ids), set((base_ids[np.newaxis].T + \ [0, 20, 40]).ravel())) node_indices = s2.circlearound_n2d(node, radius, distance_metric or 'dijkstra') fa_indices = [fa_index for fa_index, inode in enumerate(ds3.fa.node_indices) if inode in node_indices and node_indices[inode] > inner_radius] if include_center and node in ds3.fa.node_indices: fa_indices += np.where(ds3.fa.node_indices == node)[0].tolist() assert_equal(set(feature_ids), set(fa_indices))
def apply_falign(ds, ha): subjects = ds.sa["subject_id"].unique rois = ds.fa["annotation"].unique # FIXME: roi cannot be a fa hemis = ds.fa["hemi"].unique rds = ds.copy() sds = [] for subject in subjects: rds = [] for roi in rois: hds = [] for hemi in hemis: select = ({"subject_id": [subject]}, {"annotation": [roi], "hemi": [hemi]}) mds = ha[hemi][roi][subject].forward(ds[select]) mds.fa["annotation"] = ds[select].fa["annotation"] mds.fa["hemi"] = ds[select].fa["hemi"] hds.append(mds) rds.append(hstack(hds)) sds.append(hstack(rds)) return vstack(sds)
def test_surf_queryengine(self, qefn): s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5) # add second layer s2 = surf.merge(s, (s + (.01, 0, 0))) ds = Dataset(samples=np.arange(20)[np.newaxis], fa=dict(node_indices=np.arange(39, 0, -2))) # add more features (with shared node indices) ds3 = hstack((ds, ds, ds)) radius = 2.5 # Note: sweepargs it not used to avoid re-generating the same # surface and dataset multiple times. for distance_metric in ('euclidean', 'dijkstra', '<illegal>', None): builder = lambda: queryengine.SurfaceQueryEngine( s2, radius, distance_metric) if distance_metric in ('<illegal>', None): assert_raises(ValueError, builder) continue qe = builder() # test i/o and ensure that the untrained instance is not trained if externals.exists('h5py'): h5save(qefn, qe) qe = h5load(qefn) # untrained qe should give errors assert_raises(ValueError, lambda: qe.ids) assert_raises(ValueError, lambda: qe.query_byid(0)) # node index out of bounds should give error ds_ = ds.copy() ds_.fa.node_indices[0] = 100 assert_raises(ValueError, lambda: qe.train(ds_)) # lack of node indices should give error ds_.fa.pop('node_indices') assert_raises(ValueError, lambda: qe.train(ds_)) # train the qe qe.train(ds3) # test i/o and ensure that the loaded instance is trained if externals.exists('h5py'): h5save(qefn, qe) qe = h5load(qefn) for node in np.arange(-1, s2.nvertices + 1): if node < 0 or node >= s2.nvertices: assert_raises(KeyError, lambda: qe.query_byid(node)) continue feature_ids = np.asarray(qe.query_byid(node)) # node indices relative to ds base_ids = feature_ids[feature_ids < 20] # should have multiples of 20 assert_equal(set(feature_ids), set((base_ids[np.newaxis].T + \ [0, 20, 40]).ravel())) node_indices = list( s2.circlearound_n2d(node, radius, distance_metric or 'dijkstra')) fa_indices = [ fa_index for fa_index, node in enumerate(ds3.fa.node_indices) if node in node_indices ] assert_equal(set(feature_ids), set(fa_indices)) # smoke tests assert_true('SurfaceQueryEngine' in '%s' % qe) assert_true('SurfaceQueryEngine' in '%r' % qe)
def test_surf_voxel_selection(self): vol_shape = (10, 10, 10) vol_affine = np.identity(4) vol_affine[0, 0] = vol_affine[1, 1] = vol_affine[2, 2] = 5 vg = volgeom.VolGeom(vol_shape, vol_affine) density = 10 outer = surf.generate_sphere(density) * 25. + 15 inner = surf.generate_sphere(density) * 20. + 15 vs = volsurf.VolSurfMaximalMapping(vg, outer, inner) nv = outer.nvertices # select under variety of parameters # parameters are distance metric (dijkstra or euclidean), # radius, and number of searchlight centers params = [('d', 1., 10), ('d', 1., 50), ('d', 1., 100), ('d', 2., 100), ('e', 2., 100), ('d', 2., 100), ('d', 20, 100), ('euclidean', 5, None), ('dijkstra', 10, None)] # function that indicates for which parameters the full test is run test_full = lambda x: len(x[0]) > 1 or x[2] == 100 expected_labs = ['grey_matter_position', 'center_distances'] voxcount = [] tested_double_features = False for param in params: distance_metric, radius, ncenters = param srcs = range(0, nv, nv // (ncenters or nv)) sel = surf_voxel_selection.voxel_selection( vs, radius, source_surf_nodes=srcs, distance_metric=distance_metric) # see how many voxels were selected vg = sel.volgeom datalin = np.zeros((vg.nvoxels, 1)) mp = sel for k, idxs in mp.iteritems(): if idxs is not None: datalin[idxs] = 1 voxcount.append(np.sum(datalin)) if test_full(param): assert_equal(np.sum(datalin), np.sum(sel.get_mask())) assert_true(len('%s%r' % (sel, sel)) > 0) # see if voxels containing inner and outer # nodes were selected for sf in [inner, outer]: for k, idxs in mp.iteritems(): xyz = np.reshape(sf.vertices[k, :], (1, 3)) linidx = vg.xyz2lin(xyz) # only required if xyz is actually within the volume assert_equal(linidx in idxs, vg.contains_lin(linidx)) # check that it has all the attributes labs = sel.aux_keys() assert_true(all([lab in labs for lab in expected_labs])) if externals.exists('h5py'): # some I/O testing fd, fn = tempfile.mkstemp('.h5py', 'test') os.close(fd) h5save(fn, sel) sel2 = h5load(fn) os.remove(fn) assert_equal(sel, sel2) else: sel2 = sel # check that mask is OK even after I/O assert_array_equal(sel.get_mask(), sel2.get_mask()) # test I/O with surfaces # XXX the @tempfile decorator only supports a single filename # hence this method does not use it fd, outerfn = tempfile.mkstemp('outer.asc', 'test') os.close(fd) fd, innerfn = tempfile.mkstemp('inner.asc', 'test') os.close(fd) fd, volfn = tempfile.mkstemp('vol.nii', 'test') os.close(fd) surf.write(outerfn, outer, overwrite=True) surf.write(innerfn, inner, overwrite=True) img = sel.volgeom.get_empty_nifti_image() img.to_filename(volfn) sel3 = surf_voxel_selection.run_voxel_selection( radius, volfn, innerfn, outerfn, source_surf_nodes=srcs, distance_metric=distance_metric) outer4 = surf.read(outerfn) inner4 = surf.read(innerfn) vsm4 = vs = volsurf.VolSurfMaximalMapping(vg, inner4, outer4) # check that two ways of voxel selection match sel4 = surf_voxel_selection.voxel_selection( vsm4, radius, source_surf_nodes=srcs, distance_metric=distance_metric) assert_equal(sel3, sel4) os.remove(outerfn) os.remove(innerfn) os.remove(volfn) # compare sel3 with other selection results # NOTE: which voxels are precisely selected by sel can be quite # off from those in sel3, as writing the surfaces imposes # rounding errors and the sphere is very symmetric, which # means that different neighboring nodes are selected # to select a certain number of voxels. sel3cmp_difference_ratio = [(sel, .2), (sel4, 0.)] for selcmp, ratio in sel3cmp_difference_ratio: nunion = ndiff = 0 for k in selcmp.keys(): p = set(sel3.get(k)) q = set(selcmp.get(k)) nunion += len(p.union(q)) ndiff += len(p.symmetric_difference(q)) assert_true(float(ndiff) / float(nunion) <= ratio) # check searchlight call # as of late Aug 2012, this is with the fancy query engine # as implemented by Yarik mask = sel.get_mask() keys = None if ncenters is None else sel.keys() dset_data = np.reshape(np.arange(vg.nvoxels), vg.shape) dset_img = nb.Nifti1Image(dset_data, vg.affine) dset = fmri_dataset(samples=dset_img, mask=mask) qe = queryengine.SurfaceVerticesQueryEngine( sel, # you can optionally add additional # information about each near-disk-voxels add_fa=['center_distances', 'grey_matter_position']) # test i/o ensuring that when loading it is still trained if externals.exists('h5py'): fd, qefn = tempfile.mkstemp('qe.hdf5', 'test') os.close(fd) h5save(qefn, qe) qe = h5load(qefn) os.remove(qefn) assert_false('ERROR' in repr(qe)) # to check if repr works voxelcounter = _Voxel_Count_Measure() searchlight = Searchlight( voxelcounter, queryengine=qe, roi_ids=keys, nproc=1, enable_ca=['roi_feature_ids', 'roi_center_ids']) sl_dset = searchlight(dset) selected_count = sl_dset.samples[0, :] mp = sel for i, k in enumerate(sel.keys()): # check that number of selected voxels matches assert_equal(selected_count[i], len(mp[k])) assert_equal(searchlight.ca.roi_center_ids, sel.keys()) assert_array_equal(sl_dset.fa['center_ids'], qe.ids) # check nearest node is *really* the nearest node allvx = sel.get_targets() intermediate = outer * .5 + inner * .5 for vx in allvx: nearest = sel.target2nearest_source(vx) xyz = intermediate.vertices[nearest, :] sqsum = np.sum((xyz - intermediate.vertices)**2, 1) idx = np.argmin(sqsum) assert_equal(idx, nearest) if not tested_double_features: # test only once # see if we have multiple features for the same voxel, we would get them all dset1 = dset.copy() dset1.fa['dset'] = [1] dset2 = dset.copy() dset2.fa['dset'] = [2] dset_ = hstack((dset1, dset2), 'drop_nonunique') dset_.sa = dset1.sa # dset_.a.imghdr = dset1.a.imghdr assert_true('imghdr' in dset_.a.keys()) assert_equal(dset_.a['imghdr'].value, dset1.a['imghdr'].value) roi_feature_ids = searchlight.ca.roi_feature_ids sl_dset_ = searchlight(dset_) # and we should get twice the counts assert_array_equal(sl_dset_.samples, sl_dset.samples * 2) # compare old and new roi_feature_ids assert (len(roi_feature_ids) == len( searchlight.ca.roi_feature_ids)) nfeatures = dset.nfeatures for old, new in zip(roi_feature_ids, searchlight.ca.roi_feature_ids): # each new ids should comprise of old ones + (old + nfeatures) # since we hstack'ed two datasets assert_array_equal( np.hstack([(x, x + nfeatures) for x in old]), new) tested_double_features = True # check whether number of voxels were selected is as expected expected_voxcount = [22, 93, 183, 183, 183, 183, 183, 183, 183] assert_equal(voxcount, expected_voxcount)
def _call(self, ds): # local binding generator = self._generator node = self._node ca = self.ca space = self.get_space() concat_as = self._concat_as if self.ca.is_enabled("stats") and (not node.ca.has_key("stats") or not node.ca.is_enabled("stats")): warning("'stats' conditional attribute was enabled, but " "the assigned node '%s' either doesn't support it, " "or it is disabled" % node) # precharge conditional attributes ca.datasets = [] # run the node an all generated datasets results = [] for i, sds in enumerate(generator.generate(ds)): if __debug__: debug('REPM', "%d-th iteration of %s on %s", (i, self, sds)) if ca.is_enabled("datasets"): # store dataset in ca ca.datasets.append(sds) # run the beast result = node(sds) # callback if not self._callback is None: self._callback(data=sds, node=node, result=result) # subclass postprocessing result = self._repetition_postcall(sds, node, result) if space: # XXX maybe try to get something more informative from the # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks' # to indicate what was trained and what was tested. Now it is # more tricky, because `node` could be anything result.set_attr(space, (i, )) # store results.append(result) if ca.is_enabled("stats") and node.ca.has_key("stats") \ and node.ca.is_enabled("stats"): if not ca.is_set('stats'): # create empty stats container of matching type ca.stats = node.ca['stats'].value.__class__() # harvest summary stats ca['stats'].value.__iadd__(node.ca['stats'].value) # charge condition attribute self.ca.repetition_results = results # stack all results into a single Dataset if concat_as == 'samples': results = vstack(results) elif concat_as == 'features': results = hstack(results) else: raise ValueError("Unkown concatenation mode '%s'" % concat_as) # no need to store the raw results, since the Measure class will # automatically store them in a CA return results
def join_hemispheres(lhds, rhds): lhds.fa['hemi'] = ['lh'] * lhds.nfeatures rhds.fa['hemi'] = ['rh'] * rhds.nfeatures return hstack([lhds, rhds])
def test_surf_queryengine(self, qefn): s = surf.generate_plane((0, 0, 0), (0, 1, 0), (0, 0, 1), 4, 5) # add scond layer s2 = surf.merge(s, (s + (.01, 0, 0))) ds = Dataset(samples=np.arange(20)[np.newaxis], fa=dict(node_indices=np.arange(39, 0, -2))) # add more features (with shared node indices) ds3 = hstack((ds, ds, ds)) radius = 2.5 # Note: sweepargs it not used to avoid re-generating the same # surface and dataset multiple times. for distance_metric in ('euclidean', 'dijkstra', '<illegal>', None): builder = lambda: queryengine.SurfaceQueryEngine(s2, radius, distance_metric) if distance_metric in ('<illegal>', None): assert_raises(ValueError, builder) continue qe = builder() # test i/o and ensure that the untrained instance is not trained if externals.exists('h5py'): fd, qefn = tempfile.mkstemp('qe.hdf5', 'test'); os.close(fd) h5save(qefn, qe) qe = h5load(qefn) os.remove(qefn) # untrained qe should give errors assert_raises(ValueError, lambda:qe.ids) assert_raises(ValueError, lambda:qe.query_byid(0)) # node index out of bounds should give error ds_ = ds.copy() ds_.fa.node_indices[0] = 100 assert_raises(ValueError, lambda: qe.train(ds_)) # lack of node indices should give error ds_.fa.pop('node_indices') assert_raises(ValueError, lambda: qe.train(ds_)) # train the qe qe.train(ds3) # test i/o and ensure that the loaded instance is trained if externals.exists('h5py'): h5save(qefn, qe) qe = h5load(qefn) for node in np.arange(-1, s2.nvertices + 1): if node < 0 or node >= s2.nvertices: assert_raises(KeyError, lambda: qe.query_byid(node)) continue feature_ids = np.asarray(qe.query_byid(node)) # node indices relative to ds base_ids = feature_ids[feature_ids < 20] # should have multiples of 20 assert_equal(set(feature_ids), set((base_ids[np.newaxis].T + \ [0, 20, 40]).ravel())) node_indices = list(s2.circlearound_n2d(node, radius, distance_metric or 'dijkstra')) fa_indices = [fa_index for fa_index, node in enumerate(ds3.fa.node_indices) if node in node_indices] assert_equal(set(feature_ids), set(fa_indices)) # smoke tests assert_true('SurfaceQueryEngine' in '%s' % qe) assert_true('SurfaceQueryEngine' in '%r' % qe)
def test_surf_voxel_selection(self): vol_shape = (10, 10, 10) vol_affine = np.identity(4) vol_affine[0, 0] = vol_affine[1, 1] = vol_affine[2, 2] = 5 vg = volgeom.VolGeom(vol_shape, vol_affine) density = 10 outer = surf.generate_sphere(density) * 25. + 15 inner = surf.generate_sphere(density) * 20. + 15 vs = volsurf.VolSurfMaximalMapping(vg, outer, inner) nv = outer.nvertices # select under variety of parameters # parameters are distance metric (dijkstra or euclidean), # radius, and number of searchlight centers params = [('d', 1., 10), ('d', 1., 50), ('d', 1., 100), ('d', 2., 100), ('e', 2., 100), ('d', 2., 100), ('d', 20, 100), ('euclidean', 5, None), ('dijkstra', 10, None)] # function that indicates for which parameters the full test is run test_full = lambda x:len(x[0]) > 1 or x[2] == 100 expected_labs = ['grey_matter_position', 'center_distances'] voxcount = [] tested_double_features = False for param in params: distance_metric, radius, ncenters = param srcs = range(0, nv, nv // (ncenters or nv)) sel = surf_voxel_selection.voxel_selection(vs, radius, source_surf_nodes=srcs, distance_metric=distance_metric) # see how many voxels were selected vg = sel.volgeom datalin = np.zeros((vg.nvoxels, 1)) mp = sel for k, idxs in mp.iteritems(): if idxs is not None: datalin[idxs] = 1 voxcount.append(np.sum(datalin)) if test_full(param): assert_equal(np.sum(datalin), np.sum(sel.get_mask())) assert_true(len('%s%r' % (sel, sel)) > 0) # see if voxels containing inner and outer # nodes were selected for sf in [inner, outer]: for k, idxs in mp.iteritems(): xyz = np.reshape(sf.vertices[k, :], (1, 3)) linidx = vg.xyz2lin(xyz) # only required if xyz is actually within the volume assert_equal(linidx in idxs, vg.contains_lin(linidx)) # check that it has all the attributes labs = sel.aux_keys() assert_true(all([lab in labs for lab in expected_labs])) if externals.exists('h5py'): # some I/O testing fd, fn = tempfile.mkstemp('.h5py', 'test'); os.close(fd) h5save(fn, sel) sel2 = h5load(fn) os.remove(fn) assert_equal(sel, sel2) else: sel2 = sel # check that mask is OK even after I/O assert_array_equal(sel.get_mask(), sel2.get_mask()) # test I/O with surfaces # XXX the @tempfile decorator only supports a single filename # hence this method does not use it fd, outerfn = tempfile.mkstemp('outer.asc', 'test'); os.close(fd) fd, innerfn = tempfile.mkstemp('inner.asc', 'test'); os.close(fd) fd, volfn = tempfile.mkstemp('vol.nii', 'test'); os.close(fd) surf.write(outerfn, outer, overwrite=True) surf.write(innerfn, inner, overwrite=True) img = sel.volgeom.get_empty_nifti_image() img.to_filename(volfn) sel3 = surf_voxel_selection.run_voxel_selection(radius, volfn, innerfn, outerfn, source_surf_nodes=srcs, distance_metric=distance_metric) outer4 = surf.read(outerfn) inner4 = surf.read(innerfn) vsm4 = vs = volsurf.VolSurfMaximalMapping(vg, inner4, outer4) # check that two ways of voxel selection match sel4 = surf_voxel_selection.voxel_selection(vsm4, radius, source_surf_nodes=srcs, distance_metric=distance_metric) assert_equal(sel3, sel4) os.remove(outerfn) os.remove(innerfn) os.remove(volfn) # compare sel3 with other selection results # NOTE: which voxels are precisely selected by sel can be quite # off from those in sel3, as writing the surfaces imposes # rounding errors and the sphere is very symmetric, which # means that different neighboring nodes are selected # to select a certain number of voxels. sel3cmp_difference_ratio = [(sel, .2), (sel4, 0.)] for selcmp, ratio in sel3cmp_difference_ratio: nunion = ndiff = 0 for k in selcmp.keys(): p = set(sel3.get(k)) q = set(selcmp.get(k)) nunion += len(p.union(q)) ndiff += len(p.symmetric_difference(q)) assert_true(float(ndiff) / float(nunion) <= ratio) # check searchlight call # as of late Aug 2012, this is with the fancy query engine # as implemented by Yarik mask = sel.get_mask() keys = None if ncenters is None else sel.keys() dset_data = np.reshape(np.arange(vg.nvoxels), vg.shape) dset_img = nb.Nifti1Image(dset_data, vg.affine) dset = fmri_dataset(samples=dset_img, mask=mask) qe = queryengine.SurfaceVerticesQueryEngine(sel, # you can optionally add additional # information about each near-disk-voxels add_fa=['center_distances', 'grey_matter_position']) # test i/o ensuring that when loading it is still trained if externals.exists('h5py'): fd, qefn = tempfile.mkstemp('qe.hdf5', 'test'); os.close(fd) h5save(qefn, qe) qe = h5load(qefn) os.remove(qefn) assert_false('ERROR' in repr(qe)) # to check if repr works voxelcounter = _Voxel_Count_Measure() searchlight = Searchlight(voxelcounter, queryengine=qe, roi_ids=keys, nproc=1, enable_ca=['roi_feature_ids', 'roi_center_ids']) sl_dset = searchlight(dset) selected_count = sl_dset.samples[0, :] mp = sel for i, k in enumerate(sel.keys()): # check that number of selected voxels matches assert_equal(selected_count[i], len(mp[k])) assert_equal(searchlight.ca.roi_center_ids, sel.keys()) assert_array_equal(sl_dset.fa['center_ids'], qe.ids) # check nearest node is *really* the nearest node allvx = sel.get_targets() intermediate = outer * .5 + inner * .5 for vx in allvx: nearest = sel.target2nearest_source(vx) xyz = intermediate.vertices[nearest, :] sqsum = np.sum((xyz - intermediate.vertices) ** 2, 1) idx = np.argmin(sqsum) assert_equal(idx, nearest) if not tested_double_features: # test only once # see if we have multiple features for the same voxel, we would get them all dset1 = dset.copy() dset1.fa['dset'] = [1] dset2 = dset.copy() dset2.fa['dset'] = [2] dset_ = hstack((dset1, dset2), 'drop_nonunique') dset_.sa = dset1.sa #dset_.a.imghdr = dset1.a.imghdr assert_true('imghdr' in dset_.a.keys()) assert_equal(dset_.a['imghdr'].value, dset1.a['imghdr'].value) roi_feature_ids = searchlight.ca.roi_feature_ids sl_dset_ = searchlight(dset_) # and we should get twice the counts assert_array_equal(sl_dset_.samples, sl_dset.samples * 2) # compare old and new roi_feature_ids assert(len(roi_feature_ids) == len(searchlight.ca.roi_feature_ids)) nfeatures = dset.nfeatures for old, new in zip(roi_feature_ids, searchlight.ca.roi_feature_ids): # each new ids should comprise of old ones + (old + nfeatures) # since we hstack'ed two datasets assert_array_equal(np.hstack([(x, x + nfeatures) for x in old]), new) tested_double_features = True # check whether number of voxels were selected is as expected expected_voxcount = [22, 93, 183, 183, 183, 183, 183, 183, 183] assert_equal(voxcount, expected_voxcount)
def join_hemispheres(lhds, rhds): lhds.fa["hemi"] = ["lh"] * lhds.nfeatures rhds.fa["hemi"] = ["rh"] * rhds.nfeatures return hstack([lhds, rhds])
def _call(self, ds): # local binding generator = self._generator node = self._node ca = self.ca space = self.get_space() concat_as = self._concat_as if self.ca.is_enabled("stats") and (not node.ca.has_key("stats") or not node.ca.is_enabled("stats")): warning("'stats' conditional attribute was enabled, but " "the assigned node '%s' either doesn't support it, " "or it is disabled" % node) # precharge conditional attributes ca.datasets = [] # run the node an all generated datasets results = [] for i, sds in enumerate(generator.generate(ds)): if __debug__: debug('REPM', "%d-th iteration of %s on %s", (i, self, sds)) if ca.is_enabled("datasets"): # store dataset in ca ca.datasets.append(sds) # run the beast result = node(sds) # callback if not self._callback is None: self._callback(data=sds, node=node, result=result) # subclass postprocessing result = self._repetition_postcall(sds, node, result) if space: # XXX maybe try to get something more informative from the # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks' # to indicate what was trained and what was tested. Now it is # more tricky, because `node` could be anything result.set_attr(space, (i,)) # store results.append(result) if ca.is_enabled("stats") and node.ca.has_key("stats") \ and node.ca.is_enabled("stats"): if not ca.is_set('stats'): # create empty stats container of matching type ca.stats = node.ca['stats'].value.__class__() # harvest summary stats ca['stats'].value.__iadd__(node.ca['stats'].value) # charge condition attribute self.ca.repetition_results = results # stack all results into a single Dataset if concat_as == 'samples': results = vstack(results) elif concat_as == 'features': results = hstack(results) else: raise ValueError("Unkown concatenation mode '%s'" % concat_as) # no need to store the raw results, since the Measure class will # automatically store them in a CA return results