def selectFeatures(self, ids, plain=False, sort=False): """Select features given their ids. The methods behaves similar to Dataset.selectFeatures(), but additionally takes care of adjusting the embedded mapper appropriately. :Parameters: ids: sequence Iterable container to select ids plain: boolean Flag whether to return MappedDataset (or just Dataset) sort: boolean Flag whether to sort Ids. Order matters and selectFeatures assumes incremental order. If not such, in non-optimized code selectFeatures would verify the order and sort """ # call base method to get selected feature subset if plain: sdata = Dataset(self._data, self._dsattr, check_data=False, copy_samples=False, copy_data=False, copy_dsattr=False) return sdata.selectFeatures(ids=ids, sort=sort) else: sdata = Dataset.selectFeatures(self, ids=ids, sort=sort) # since we have new DataSet we better have a new mapper sdata._dsattr['mapper'] = copy.deepcopy(sdata._dsattr['mapper']) if sort: sdata._dsattr['mapper'].selectOut(sorted(ids)) else: sdata._dsattr['mapper'].selectOut(ids) return sdata
def testFeatureSelection(self): """Testing feature selection: sorted/not sorted, feature groups """ origdata = datasets["uni2large"].samples[:10, :20] data = Dataset(samples=origdata, labels=2, chunks=2) # define some feature groups data.defineFeatureGroups(N.repeat(range(4), 5)) unmasked = data.samples.copy() # default must be no mask self.failUnless(data.nfeatures == 20) features_to_select = [3, 0, 17] features_to_select_copy = copy.deepcopy(features_to_select) features_to_select_sorted = copy.deepcopy(features_to_select) features_to_select_sorted.sort() bsel = N.array([False] * 20) bsel[features_to_select] = True # check selection with feature list for sel, issorted in [ (data.selectFeatures(features_to_select, sort=False), False), (data.selectFeatures(features_to_select, sort=True), True), (data.select(slice(None), features_to_select), True), (data.select(slice(None), N.array(features_to_select)), True), (data.select(slice(None), bsel), True), ]: self.failUnless(sel.nfeatures == 3) # check size of the masked patterns self.failUnless(sel.samples.shape == (10, 3)) # check that the right features are selected fts = (features_to_select, features_to_select_sorted)[int(issorted)] self.failUnless((unmasked[:, fts] == sel.samples).all()) # check grouping information self.failUnless((sel._dsattr["featuregroups"] == [0, 0, 3]).all()) # check side effect on features_to_select parameter: self.failUnless(features_to_select == features_to_select_copy) # check selection by feature group id gsel = data.selectFeatures(groups=[2, 3]) self.failUnless(gsel.nfeatures == 10) self.failUnless(set(gsel._dsattr["featuregroups"]) == set([2, 3]))
def testCombinedPatternAndFeatureMasking(self): data = Dataset(samples=N.arange(20).reshape((4, 5)), labels=1, chunks=1) self.failUnless(data.nsamples == 4) self.failUnless(data.nfeatures == 5) fsel = data.selectFeatures([1, 2]) fpsel = fsel.selectSamples([0, 3]) self.failUnless(fpsel.nsamples == 2) self.failUnless(fpsel.nfeatures == 2) self.failUnless((fpsel.samples == [[1, 2], [16, 17]]).all())
def testApplyMapper(self): """Test creation of new dataset by applying a mapper""" mapper = MaskMapper(N.array([1, 0, 1])) dataset = Dataset(samples=N.arange(12).reshape((4, 3)), labels=1, chunks=1) seldataset = dataset.applyMapper(featuresmapper=mapper) self.failUnless((dataset.selectFeatures([0, 2]).samples == seldataset.samples).all()) # Lets do simple test on maskmapper reverse since it seems to # do evil things. Those checks are done only in __debug__ if __debug__: # should fail since in mask we have just 2 features now self.failUnlessRaises(ValueError, mapper.reverse, [10, 20, 30]) self.failUnlessRaises(ValueError, mapper.forward, [10, 20])