예제 #1
0
    def testEvilSelects(self):
        """Test some obscure selections of samples via select() or __getitem__
        """
        origdata = datasets["uni2large"].samples[:100, :10].T
        data = Dataset(
            samples=origdata,
            #       0  1  2  3  4  5  6  7  8  9
            labels=[8, 9, 4, 3, 3, 3, 3, 2, 8, 9],
            chunks=[1, 2, 3, 2, 3, 1, 5, 6, 3, 6],
        )

        # malformed getitem
        if __debug__:
            # check is enforced only in __debug__
            self.failUnlessRaises(ValueError, data.__getitem__, "labels", "featu")

        # too many indicies
        self.failUnlessRaises(ValueError, data.__getitem__, 1, 1, 1)

        # various getitems which should carry the same result
        for sel in [
            data.select("chunks", [2, 6], labels=[3, 2], features=slice(None)),
            data.select("all", "all", labels=[2, 3], chunks=[2, 6]),
            data["chunks", [2, 6], "labels", [3, 2]],
            data[:, :, "chunks", [2, 6], "labels", [3, 2]],
            # get warnings but should work as the rest for now
            data[3:8, "chunks", [2, 6, 2, 6], "labels", [3, 2]],
        ]:
            self.failUnless(N.all(sel.origids == [3, 7]))
            self.failUnless(sel.nfeatures == 100)
            self.failUnless(N.all(sel.samples == origdata[[3, 7]]))

        target = origdata[[3, 7]]
        target = target[:, [1, 3]]
        # various getitems which should carry the same result
        for sel in [
            data.select("all", [1, 3], "chunks", [2, 6], labels=[3, 2]),
            data[:, [1, 3], "chunks", [2, 6], "labels", [3, 2]],
            data[:, [1, 3], "chunks", [2, 6], "labels", [3, 2]],
            # get warnings but should work as the rest for now
            data[3:8, [1, 1, 3, 1], "chunks", [2, 6, 2, 6], "labels", [3, 2]],
        ]:
            self.failUnless(N.all(sel.origids == [3, 7]))
            self.failUnless(sel.nfeatures == 2)
            self.failUnless(N.all(sel.samples == target))

        # Check if we get empty selection if requesting impossible
        self.failUnless(data.select(chunks=[23]).nsamples == 0)

        # Check .where()
        self.failUnless(N.all(data.where(chunks=[2, 6]) == [1, 3, 7, 9]))
        self.failUnless(N.all(data.where(chunks=[2, 6], labels=[22, 3]) == [3]))
        # both samples and features
        idx = data.where("all", [1, 3, 10], labels=[2, 3, 4])
        self.failUnless(N.all(idx[1] == [1, 3, 10]))
        self.failUnless(N.all(idx[0] == range(2, 8)))
        # empty query
        self.failUnless(data.where() is None)
        # empty result
        self.failUnless(data.where(labels=[123]) == [])
예제 #2
0
    def testFeatureSelection(self):
        """Testing feature selection: sorted/not sorted, feature groups
        """
        origdata = datasets["uni2large"].samples[:10, :20]
        data = Dataset(samples=origdata, labels=2, chunks=2)

        # define some feature groups
        data.defineFeatureGroups(N.repeat(range(4), 5))

        unmasked = data.samples.copy()

        # default must be no mask
        self.failUnless(data.nfeatures == 20)

        features_to_select = [3, 0, 17]
        features_to_select_copy = copy.deepcopy(features_to_select)
        features_to_select_sorted = copy.deepcopy(features_to_select)
        features_to_select_sorted.sort()

        bsel = N.array([False] * 20)
        bsel[features_to_select] = True
        # check selection with feature list
        for sel, issorted in [
            (data.selectFeatures(features_to_select, sort=False), False),
            (data.selectFeatures(features_to_select, sort=True), True),
            (data.select(slice(None), features_to_select), True),
            (data.select(slice(None), N.array(features_to_select)), True),
            (data.select(slice(None), bsel), True),
        ]:
            self.failUnless(sel.nfeatures == 3)

            # check size of the masked patterns
            self.failUnless(sel.samples.shape == (10, 3))

            # check that the right features are selected
            fts = (features_to_select, features_to_select_sorted)[int(issorted)]
            self.failUnless((unmasked[:, fts] == sel.samples).all())

            # check grouping information
            self.failUnless((sel._dsattr["featuregroups"] == [0, 0, 3]).all())

            # check side effect on features_to_select parameter:
            self.failUnless(features_to_select == features_to_select_copy)

        # check selection by feature group id
        gsel = data.selectFeatures(groups=[2, 3])
        self.failUnless(gsel.nfeatures == 10)
        self.failUnless(set(gsel._dsattr["featuregroups"]) == set([2, 3]))
예제 #3
0
    def testSampleSelection(self):
        origdata = datasets["uni2large"].samples[:100, :10].T
        data = Dataset(samples=origdata, labels=2, chunks=2)

        self.failUnless(data.nsamples == 10)

        # set single pattern to enabled
        for sel in [data.selectSamples(5), data.select(5), data.select(slice(5, 6))]:
            self.failUnless(sel.nsamples == 1)
            self.failUnless(data.nfeatures == 100)
            self.failUnless(sel.origids == [5])

        # check duplicate selections
        for sel in [
            data.selectSamples([5, 5]),
            # Following ones would fail since select removes
            # repetitions (XXX)
            # data.select([5,5]),
            # data.select([5,5], 'all'),
            # data.select([5,5], slice(None)),
        ]:
            self.failUnless(sel.nsamples == 2)
            self.failUnless((sel.samples[0] == data.samples[5]).all())
            self.failUnless((sel.samples[0] == sel.samples[1]).all())
            self.failUnless(len(sel.labels) == 2)
            self.failUnless(len(sel.chunks) == 2)
            self.failUnless((sel.origids == [5, 5]).all())

            self.failUnless(sel.samples.shape == (2, 100))

        # check selection by labels
        for sel in [
            data.selectSamples(data.idsbylabels(2)),
            data.select(labels=2),
            data.select("labels", 2),
            data.select("labels", [2]),
            data["labels", [2]],
            data["labels":[2], "labels":2],
            data["labels":[2]],
        ]:
            self.failUnless(sel.nsamples == data.nsamples)
            self.failUnless(N.all(sel.samples == data.samples))
        # not present label
        for sel in [
            data.selectSamples(data.idsbylabels(3)),
            data.select(labels=3),
            data.select("labels", 3),
            data.select("labels", [3]),
        ]:
            self.failUnless(sel.nsamples == 0)

        data = Dataset(samples=origdata, labels=[8, 9, 4, 3, 3, 3, 4, 2, 8, 9], chunks=2)
        for sel in [
            data.selectSamples(data.idsbylabels([2, 3])),
            data.select("labels", [2, 3]),
            data.select("labels", [2, 3], labels=[1, 2, 3, 4]),
            data.select("labels", [2, 3], chunks=[1, 2, 3, 4]),
            data["labels":[2, 3], "chunks":[1, 2, 3, 4]],
            data["chunks":[1, 2, 3, 4], "labels":[2, 3]],
        ]:
            self.failUnless(N.all(sel.origids == [3.0, 4.0, 5.0, 7.0]))

        # lets cause it to compute unique labels
        self.failUnless((data.uniquelabels == [2, 3, 4, 8, 9]).all())

        # select some samples removing some labels completely
        sel = data.selectSamples(data.idsbylabels([3, 4, 8, 9]))
        self.failUnlessEqual(Set(sel.uniquelabels), Set([3, 4, 8, 9]))
        self.failUnless((sel.origids == [0, 1, 2, 3, 4, 5, 6, 8, 9]).all())