def __getitem__(self, args): # uniformize for checks below; it is not a tuple if just single slicing # spec is passed if not isinstance(args, tuple): args = (args, ) # if we get an slicing array for feature selection and it is *not* 1D # try feeding it through the mapper (if there is any) if len(args) > 1 and isinstance(args[1], np.ndarray) \ and len(args[1].shape) > 1 \ and self.a.has_key('mapper'): args = list(args) args[1] = self.a.mapper.forward1(args[1]) # check if any of the args is a dict, which would require fancy selection args_ = [] for i, arg in enumerate(args): if isinstance(arg, dict): col = (self.sa, self.fa)[i] args_.append(col.match(arg)) else: args_.append(arg) args = tuple(args_) # let the base do the work ds = super(Dataset, self).__getitem__(args) # and adjusting the mapper (if any) if len(args) > 1 and 'mapper' in ds.a: # create matching mapper # the mapper is just appended to the dataset. It could also be # actually used to perform the slicing and prevent duplication of # functionality between the Dataset.__getitem__ and the mapper. # However, __getitem__ is sometimes more efficient, since it can # slice samples and feature axis at the same time. Moreover, the # mvpa2.base.dataset.Dataset has no clue about mappers and should # be fully functional without them. subsetmapper = StaticFeatureSelection( args[1], dshape=self.samples.shape[1:]) # do not-act forward mapping to charge the output shape of the # slice mapper without having it to train on a full dataset (which # is most likely more expensive) subsetmapper.forward(np.zeros((1, ) + self.shape[1:], dtype='bool')) # mapper is ready to use -- simply store ds._append_mapper(subsetmapper) return ds
def test_repr(): # this time give mask only by its target length sm = StaticFeatureSelection(slice(None), space='myspace') # check reproduction sm_clone = eval(repr(sm)) assert_equal(repr(sm_clone), repr(sm))
def test_static_reverse_doesnt_work_after_feature_selection_tuneup_1(): ds_orig = datasets['uni2small'].copy() # doesn't matter which m = StaticFeatureSelection(np.arange(4)) m.train(ds_orig) ds = ds_orig.get_mapped(m) ds0_rev = ds.a.mapper.reverse1(ds.samples[0]) # should work assert_equal(ds0_rev.shape, (ds_orig.nfeatures,)) # direct feature selection ds_ = ds[:, [0, 2]] # should work but doesn't due to # RuntimeError: Cannot reverse-map data since the original data shape is unknown. Either set `dshape` in the constructor, or call train(). ds0_rev_ = ds_.a.mapper.reverse1(ds_.samples[0]) #ds0_rev_ = _verified_reverse1(ds_.a.mapper, ds_.samples[0]) assert_equal(ds0_rev_.shape, (ds_orig.nfeatures,))
def test_remove_invariant_as_a_mapper(): from mvpa2.featsel.helpers import RangeElementSelector from mvpa2.featsel.base import StaticFeatureSelection, SensitivityBasedFeatureSelection from mvpa2.testing.datasets import datasets from mvpa2.datasets.miscfx import remove_invariant_features mapper = SensitivityBasedFeatureSelection( lambda x: np.std(x, axis=0), RangeElementSelector(lower=0, inclusive=False), train_analyzer=False, auto_train=True) ds = datasets['uni2large'].copy() ds.a['mapper'] = StaticFeatureSelection(np.arange(ds.nfeatures)) ds.fa['index'] = np.arange(ds.nfeatures) ds.samples[:, [1, 8]] = 10 ds_out = mapper(ds) # Validate that we are getting the same results as remove_invariant_features ds_rifs = remove_invariant_features(ds) assert_array_equal(ds_out.samples, ds_rifs.samples) assert_array_equal(ds_out.fa.index, ds_rifs.fa.index) assert_equal(ds_out.fa.index[1], 2) assert_equal(ds_out.fa.index[8], 10)
def _concat_results(sl=None, dataset=None, roi_ids=None, results=None): """The simplest implementation for collecting the results -- just put them into a list This this implementation simply collects them into a list and uses only sl. for assigning conditional attributes. But custom implementation might make use of more/less of them. Implemented as @staticmethod just to emphasize that in principle it is independent of the actual searchlight instance """ # collect results results = sum(results, []) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A' debug('SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack result_ds = hstack(results) if __debug__: debug('SLC', " hstacked shape %s" % (result_ds.shape,)) if sl.ca.is_enabled('roi_feature_ids'): sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results] if sl.ca.is_enabled('roi_sizes'): sl.ca.roi_sizes = [r.a.roi_sizes for r in results] if sl.ca.is_enabled('roi_center_ids'): sl.ca.roi_center_ids = [r.a.roi_center_ids for r in results] if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if roi_ids is None: result_ds.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) # NNO if the orignal mapper has no append (because it's not a # chainmapper, for example), we make our own chainmapper. feat_sel_mapper = StaticFeatureSelection( roi_ids, dshape=dataset.shape[1:]) if hasattr(mapper, 'append'): mapper.append(feat_sel_mapper) else: mapper = ChainMapper([dataset.a.mapper, feat_sel_mapper]) result_ds.a['mapper'] = mapper # store the center ids as a feature attribute result_ds.fa['center_ids'] = roi_ids return result_ds
def __getitem__(self, args): # uniformize for checks below; it is not a tuple if just single slicing # spec is passed if not isinstance(args, tuple): args = (args,) # if we get an slicing array for feature selection and it is *not* 1D # try feeding it through the mapper (if there is any) if len(args) > 1 and isinstance(args[1], np.ndarray) \ and len(args[1].shape) > 1 \ and self.a.has_key('mapper'): args = list(args) args[1] = self.a.mapper.forward1(args[1]) # check if any of the args is a dict, which would require fancy selection args_ = [] for i, arg in enumerate(args): if isinstance(arg, dict): col = (self.sa, self.fa)[i] args_.append(col.match(arg)) else: args_.append(arg) args = tuple(args_) # let the base do the work ds = super(Dataset, self).__getitem__(args) # and adjusting the mapper (if any) if len(args) > 1 and 'mapper' in ds.a: # create matching mapper # the mapper is just appended to the dataset. It could also be # actually used to perform the slicing and prevent duplication of # functionality between the Dataset.__getitem__ and the mapper. # However, __getitem__ is sometimes more efficient, since it can # slice samples and feature axis at the same time. Moreover, the # mvpa2.base.dataset.Dataset has no clue about mappers and should # be fully functional without them. subsetmapper = StaticFeatureSelection(args[1], dshape=self.samples.shape[1:]) # do not-act forward mapping to charge the output shape of the # slice mapper without having it to train on a full dataset (which # is most likely more expensive) subsetmapper.forward(np.zeros((1,) + self.shape[1:], dtype='bool')) # mapper is ready to use -- simply store ds._append_mapper(subsetmapper) return ds
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: if max(roi_ids) >= dataset.nfeatures: raise IndexError, \ "Maximal center_id found is %s whenever given " \ "dataset has only %d features" \ % (max(roi_ids), dataset.nfeatures) else: roi_ids = np.arange(dataset.nfeatures) # pass to subclass results = self._sl_call(dataset, roi_ids, nproc) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) mapper.append( StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:])) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def test_selects(): mask = np.ones((3, 2), dtype='bool') mask[1, 1] = 0 mask0 = mask.copy() data = np.arange(6).reshape(mask.shape) map_ = mask_mapper(mask) # check if any exception is thrown if we get # out of the outIds #assert_raises(IndexError, map_.select_out, [0,1,2,6]) # remove 1,2 map_.append(StaticFeatureSelection([0, 3, 4])) assert_array_equal(map_.forward1(data), [0, 4, 5]) # remove 1 more map_.append(StaticFeatureSelection([0, 2])) assert_array_equal(map_.forward1(data), [0, 5]) # check if original mask wasn't perturbed assert_array_equal(mask, mask0) # check if original mask wasn't perturbed assert_array_equal(mask, mask0)
def test_subset_filler(): sm = StaticFeatureSelection(np.arange(3)) sm_f0 = StaticFeatureSelection(np.arange(3), filler=0) sm_fm1 = StaticFeatureSelection(np.arange(3), filler= -1) sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan) data = np.arange(12).astype(float).reshape((2, -1)) sm.train(data) data_forwarded = sm.forward(data) for m in (sm, sm_f0, sm_fm1, sm_fnan): m.train(data) assert_array_equal(data_forwarded, m.forward(data)) data_back_fm1 = sm_fm1.reverse(data_forwarded) ok_(np.all(data_back_fm1[:, 3:] == -1)) data_back_fnan = sm_fnan.reverse(data_forwarded) ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
def mask_mapper(mask=None, shape=None, space=None): """Factory method to create a chain of Flatten+StaticFeatureSelection Mappers Parameters ---------- mask : None or array an array in the original dataspace and its nonzero elements are used to define the features included in the dataset. Alternatively, the `shape` argument can be used to define the array dimensions. shape : None or tuple The shape of the array to be mapped. If `shape` is provided instead of `mask`, a full mask (all True) of the desired shape is constructed. If `shape` is specified in addition to `mask`, the provided mask is extended to have the same number of dimensions. inspace Provided to `FlattenMapper` """ if mask is None: if shape is None: raise ValueError, \ "Either `shape` or `mask` have to be specified." else: # make full dataspace mask if nothing else is provided mask = np.ones(shape, dtype='bool') else: if not shape is None: # expand mask to span all dimensions but first one # necessary e.g. if only one slice from timeseries of volumes is # requested. mask = np.array(mask, copy=False, subok=True, ndmin=len(shape)) # check for compatibility if not shape == mask.shape: raise ValueError, \ "The mask dataspace shape %s is not " \ "compatible with the provided shape %s." \ % (mask.shape, shape) fm = FlattenMapper(shape=mask.shape, space=space) flatmask = fm.forward1(mask) mapper = ChainMapper([ fm, StaticFeatureSelection(flatmask, dshape=flatmask.shape, oshape=(len(flatmask.nonzero()[0]), )) ]) return mapper
def _train(self, ds): # local binding fmeasure = self._fmeasure fselector = self._fselector scriterion = self._stopping_criterion bestdetector = self._bestdetector # init # Computed error for each tested features set. errors = [] # feature candidate are all features in the pattern object candidates = list(range(ds.nfeatures)) # initially empty list of selected features selected = [] # results in here please results = None # as long as there are candidates left # the loop will most likely get broken earlier if the stopping # criterion is reached while len(candidates): # measures for all candidates measures = [] # for all possible candidates for i, candidate in enumerate(candidates): if __debug__: debug('IFSC', "Tested %i" % i, cr=True) # take the new candidate and all already selected features # select a new temporay feature subset from the dataset # slice the full dataset, because for the initial iteration # steps this will be much mure effecient than splitting the # full ds into train and test at first fslm = StaticFeatureSelection(selected + [candidate]) fslm.train(ds) candidate_ds = fslm(ds) # activate the dataset splitter dsgen = self._splitter.generate(candidate_ds) # and derived the dataset part that is used for computing the selection # criterion trainds = next(dsgen) # compute data measure on the training part of this feature set measures.append(fmeasure(trainds)) # relies on ds.item() to work properly measures = [np.asscalar(m) for m in measures] # Select promissing feature candidates (staging) # IDs are only applicable to the current set of feature candidates tmp_staging_ids = fselector(measures) # translate into real candidate ids staging_ids = [candidates[i] for i in tmp_staging_ids] # mark them as selected and remove from candidates selected += staging_ids for i in staging_ids: candidates.remove(i) # actually run the performance measure to estimate "quality" of # selection fslm = StaticFeatureSelection(selected) fslm.train(ds) selectedds = fslm(ds) # split into train and test part trainds, testds = self._get_traintest_ds(selectedds) # evaluate and store error = self._evaluate_pmeasure(trainds, testds) errors.append(np.asscalar(error)) # intermediate cleanup, so the datasets do not hand around while # the next candidate evaluation is computed del trainds del testds # Check if it is time to stop and if we got # the best result stop = scriterion(errors) isthebest = bestdetector(errors) if __debug__: debug('IFSC', "nselected %i; error: %.4f " \ "best/stop=%d/%d\n" \ % (len(selected), errors[-1], isthebest, stop), cr=True, lf=True) if isthebest: # announce desired features to the underlying slice mapper # do copy to survive later selections self._safe_assign_slicearg(copy(selected)) # leave the loop when the criterion is reached if stop: break # charge state self.ca.errors = errors
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # test only flattening the first two dimensions fm_max = FlattenMapper(maxdims=2) fm_max.train(data) assert_equal(fm_max(data).shape, (4, 4, 4)) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [FlattenMapper(space='voxel'), ChainMapper([FlattenMapper(space='voxel'), StaticFeatureSelection(slice(None))])]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse1(target[0]), _verified_reverse1(fm, target[0])) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(StaticFeatureSelection(list(range(1, 16)))) assert_equal(cm.forward1(data[0]).shape, (15,)) assert_equal(len(cm), 2) # multiple slicing cm.append(StaticFeatureSelection([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # check reproduction if __debug__: # debug mode needs special test as it enhances the repr output # with module info and id() appendix for objects import mvpa2 cm_clone = eval(repr(cm)) assert_equal('#'.join(repr(cm_clone).split('#')[:-1]), '#'.join(repr(cm).split('#')[:-1])) else: cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8) # Lets construct a dataset with mapper assigned and see # if sub-selecting a feature adjusts trailing StaticFeatureSelection # appropriately ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1] tail_sfs = ds_subsel.a.mapper[-1] assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')
def test_subset_filler(): sm = StaticFeatureSelection(np.arange(3)) sm_f0 = StaticFeatureSelection(np.arange(3), filler=0) sm_fm1 = StaticFeatureSelection(np.arange(3), filler=-1) sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan) data = np.arange(12).astype(float).reshape((2, -1)) sm.train(data) data_forwarded = sm.forward(data) for m in (sm, sm_f0, sm_fm1, sm_fnan): m.train(data) assert_array_equal(data_forwarded, m.forward(data)) data_back_fm1 = sm_fm1.reverse(data_forwarded) ok_(np.all(data_back_fm1[:, 3:] == -1)) data_back_fnan = sm_fnan.reverse(data_forwarded) ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
def test_subset(): data = np.array( [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]) # float array doesn't work sm = StaticFeatureSelection(np.ones(16)) assert_raises(IndexError, sm.forward, data) # full mask sm = StaticFeatureSelection(slice(None)) # should not change single samples assert_array_equal(sm.forward(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.forward(data.copy()), data) sm.train(data) # same on reverse assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.reverse(data.copy()), data) # identical mappers sm_none = StaticFeatureSelection(slice(None)) sm_int = StaticFeatureSelection(np.arange(16)) sm_bool = StaticFeatureSelection(np.ones(16, dtype='bool')) sms = [sm_none, sm_int, sm_bool] # test subsets sids = [3, 4, 5, 6] bsubset = np.zeros(16, dtype='bool') bsubset[sids] = True subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]] # all test subset result in equivalent masks, hence should do the same to # the mapper and result in identical behavior for st in sms: for i, sub in enumerate(subsets): # shallow copy orig = copy(st) subsm = StaticFeatureSelection(sub) # should do copy-on-write for all important stuff!! orig += subsm # test if selection did its job if i == 3: # special case of multiplying features assert_array_equal(orig.forward1(data[0].copy()), subsets[i]) else: assert_array_equal(orig.forward1(data[0].copy()), sids) ## all of the above shouldn't change the original mapper #assert_array_equal(sm.get_mask(), np.arange(16)) # check for some bug catcher # no 3D input #assert_raises(IndexError, sm.forward, np.ones((3,2,1))) # no input of wrong length if __debug__: # checked only in __debug__ assert_raises(ValueError, sm.forward, np.ones(4)) # same on reverse #assert_raises(ValueError, sm.reverse, np.ones(16)) # invalid ids #assert_false(subsm.is_valid_inid(-1)) #assert_false(subsm.is_valid_inid(16)) # intended merge failures fsm = StaticFeatureSelection(np.arange(16)) assert_equal(fsm.__iadd__(None), NotImplemented) assert_equal(fsm.__iadd__(Dataset([2, 3, 4])), NotImplemented)
def _train(self, ds): # local binding fmeasure = self._fmeasure fselector = self._fselector scriterion = self._stopping_criterion bestdetector = self._bestdetector # init # Computed error for each tested features set. errors = [] # feature candidate are all features in the pattern object candidates = range(ds.nfeatures) # initially empty list of selected features selected = [] # results in here please results = None # as long as there are candidates left # the loop will most likely get broken earlier if the stopping # criterion is reached while len(candidates): # measures for all candidates measures = [] # for all possible candidates for i, candidate in enumerate(candidates): if __debug__: debug('IFSC', "Tested %i" % i, cr=True) # take the new candidate and all already selected features # select a new temporay feature subset from the dataset # slice the full dataset, because for the initial iteration # steps this will be much mure effecient than splitting the # full ds into train and test at first fslm = StaticFeatureSelection(selected + [candidate]) fslm.train(ds) candidate_ds = fslm(ds) # activate the dataset splitter dsgen = self._splitter.generate(candidate_ds) # and derived the dataset part that is used for computing the selection # criterion trainds = dsgen.next() # compute data measure on the training part of this feature set measures.append(fmeasure(trainds)) # relies on ds.item() to work properly measures = [np.asscalar(m) for m in measures] # Select promissing feature candidates (staging) # IDs are only applicable to the current set of feature candidates tmp_staging_ids = fselector(measures) # translate into real candidate ids staging_ids = [candidates[i] for i in tmp_staging_ids] # mark them as selected and remove from candidates selected += staging_ids for i in staging_ids: candidates.remove(i) # actually run the performance measure to estimate "quality" of # selection fslm = StaticFeatureSelection(selected) fslm.train(ds) selectedds = fslm(ds) # split into train and test part trainds, testds = self._get_traintest_ds(selectedds) # evaluate and store error = self._evaluate_pmeasure(trainds, testds) errors.append(np.asscalar(error)) # intermediate cleanup, so the datasets do not hand around while # the next candidate evaluation is computed del trainds del testds # Check if it is time to stop and if we got # the best result stop = scriterion(errors) isthebest = bestdetector(errors) if __debug__: debug('IFSC', "nselected %i; error: %.4f " \ "best/stop=%d/%d\n" \ % (len(selected), errors[-1], isthebest, stop), cr=True, lf=True) if isthebest: # announce desired features to the underlying slice mapper # do copy to survive later selections self._safe_assign_slicearg(copy(selected)) # leave the loop when the criterion is reached if stop: break # charge state self.ca.errors = errors
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: qe_ids = self._queryengine.ids # known to qe if not set(qe_ids).issuperset(roi_ids): raise IndexError( "Some roi_ids are not known to the query engine %s: %s" % (self._queryengine, set(roi_ids).difference(qe_ids))) else: roi_ids = self._queryengine.ids # pass to subclass results = self._sl_call(dataset, roi_ids, nproc) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) # NNO if the orignal mapper has no append (because it's not a # chainmapper, for example), we make our own chainmapper. # # THe original code was: # mapper.append(StaticFeatureSelection(roi_ids, # dshape=dataset.shape[1:])) feat_sel_mapper = StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:]) if 'append' in dir(mapper): mapper.append(feat_sel_mapper) else: mapper = ChainMapper([dataset.a.mapper, feat_sel_mapper]) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results