예제 #1
0
파일: mapped.py 프로젝트: gorlins/PyMVPA
    def selectFeatures(self, ids, plain=False, sort=False):
        """Select features given their ids.

        The methods behaves similar to Dataset.selectFeatures(), but
        additionally takes care of adjusting the embedded mapper
        appropriately.

        :Parameters:
          ids: sequence
            Iterable container to select ids
          plain: boolean
            Flag whether to return MappedDataset (or just Dataset)
          sort: boolean
            Flag whether to sort Ids. Order matters and selectFeatures
            assumes incremental order. If not such, in non-optimized
            code selectFeatures would verify the order and sort
        """

        # call base method to get selected feature subset
        if plain:
            sdata = Dataset(self._data, self._dsattr, check_data=False,
                            copy_samples=False, copy_data=False,
                            copy_dsattr=False)
            return sdata.selectFeatures(ids=ids, sort=sort)
        else:
            sdata = Dataset.selectFeatures(self, ids=ids, sort=sort)
            # since we have new DataSet we better have a new mapper
            sdata._dsattr['mapper'] = copy.deepcopy(sdata._dsattr['mapper'])
            if sort:
                sdata._dsattr['mapper'].selectOut(sorted(ids))
            else:
                sdata._dsattr['mapper'].selectOut(ids)
            return sdata
예제 #2
0
    def testFeatureSelection(self):
        """Testing feature selection: sorted/not sorted, feature groups
        """
        origdata = datasets["uni2large"].samples[:10, :20]
        data = Dataset(samples=origdata, labels=2, chunks=2)

        # define some feature groups
        data.defineFeatureGroups(N.repeat(range(4), 5))

        unmasked = data.samples.copy()

        # default must be no mask
        self.failUnless(data.nfeatures == 20)

        features_to_select = [3, 0, 17]
        features_to_select_copy = copy.deepcopy(features_to_select)
        features_to_select_sorted = copy.deepcopy(features_to_select)
        features_to_select_sorted.sort()

        bsel = N.array([False] * 20)
        bsel[features_to_select] = True
        # check selection with feature list
        for sel, issorted in [
            (data.selectFeatures(features_to_select, sort=False), False),
            (data.selectFeatures(features_to_select, sort=True), True),
            (data.select(slice(None), features_to_select), True),
            (data.select(slice(None), N.array(features_to_select)), True),
            (data.select(slice(None), bsel), True),
        ]:
            self.failUnless(sel.nfeatures == 3)

            # check size of the masked patterns
            self.failUnless(sel.samples.shape == (10, 3))

            # check that the right features are selected
            fts = (features_to_select, features_to_select_sorted)[int(issorted)]
            self.failUnless((unmasked[:, fts] == sel.samples).all())

            # check grouping information
            self.failUnless((sel._dsattr["featuregroups"] == [0, 0, 3]).all())

            # check side effect on features_to_select parameter:
            self.failUnless(features_to_select == features_to_select_copy)

        # check selection by feature group id
        gsel = data.selectFeatures(groups=[2, 3])
        self.failUnless(gsel.nfeatures == 10)
        self.failUnless(set(gsel._dsattr["featuregroups"]) == set([2, 3]))
예제 #3
0
    def testCombinedPatternAndFeatureMasking(self):
        data = Dataset(samples=N.arange(20).reshape((4, 5)), labels=1, chunks=1)

        self.failUnless(data.nsamples == 4)
        self.failUnless(data.nfeatures == 5)
        fsel = data.selectFeatures([1, 2])
        fpsel = fsel.selectSamples([0, 3])
        self.failUnless(fpsel.nsamples == 2)
        self.failUnless(fpsel.nfeatures == 2)

        self.failUnless((fpsel.samples == [[1, 2], [16, 17]]).all())
예제 #4
0
    def testApplyMapper(self):
        """Test creation of new dataset by applying a mapper"""
        mapper = MaskMapper(N.array([1, 0, 1]))
        dataset = Dataset(samples=N.arange(12).reshape((4, 3)), labels=1, chunks=1)
        seldataset = dataset.applyMapper(featuresmapper=mapper)
        self.failUnless((dataset.selectFeatures([0, 2]).samples == seldataset.samples).all())

        # Lets do simple test on maskmapper reverse since it seems to
        # do evil things. Those checks are done only in __debug__
        if __debug__:
            # should fail since in mask we have just 2 features now
            self.failUnlessRaises(ValueError, mapper.reverse, [10, 20, 30])
            self.failUnlessRaises(ValueError, mapper.forward, [10, 20])