def testEvilSelects(self): """Test some obscure selections of samples via select() or __getitem__ """ origdata = datasets["uni2large"].samples[:100, :10].T data = Dataset( samples=origdata, # 0 1 2 3 4 5 6 7 8 9 labels=[8, 9, 4, 3, 3, 3, 3, 2, 8, 9], chunks=[1, 2, 3, 2, 3, 1, 5, 6, 3, 6], ) # malformed getitem if __debug__: # check is enforced only in __debug__ self.failUnlessRaises(ValueError, data.__getitem__, "labels", "featu") # too many indicies self.failUnlessRaises(ValueError, data.__getitem__, 1, 1, 1) # various getitems which should carry the same result for sel in [ data.select("chunks", [2, 6], labels=[3, 2], features=slice(None)), data.select("all", "all", labels=[2, 3], chunks=[2, 6]), data["chunks", [2, 6], "labels", [3, 2]], data[:, :, "chunks", [2, 6], "labels", [3, 2]], # get warnings but should work as the rest for now data[3:8, "chunks", [2, 6, 2, 6], "labels", [3, 2]], ]: self.failUnless(N.all(sel.origids == [3, 7])) self.failUnless(sel.nfeatures == 100) self.failUnless(N.all(sel.samples == origdata[[3, 7]])) target = origdata[[3, 7]] target = target[:, [1, 3]] # various getitems which should carry the same result for sel in [ data.select("all", [1, 3], "chunks", [2, 6], labels=[3, 2]), data[:, [1, 3], "chunks", [2, 6], "labels", [3, 2]], data[:, [1, 3], "chunks", [2, 6], "labels", [3, 2]], # get warnings but should work as the rest for now data[3:8, [1, 1, 3, 1], "chunks", [2, 6, 2, 6], "labels", [3, 2]], ]: self.failUnless(N.all(sel.origids == [3, 7])) self.failUnless(sel.nfeatures == 2) self.failUnless(N.all(sel.samples == target)) # Check if we get empty selection if requesting impossible self.failUnless(data.select(chunks=[23]).nsamples == 0) # Check .where() self.failUnless(N.all(data.where(chunks=[2, 6]) == [1, 3, 7, 9])) self.failUnless(N.all(data.where(chunks=[2, 6], labels=[22, 3]) == [3])) # both samples and features idx = data.where("all", [1, 3, 10], labels=[2, 3, 4]) self.failUnless(N.all(idx[1] == [1, 3, 10])) self.failUnless(N.all(idx[0] == range(2, 8))) # empty query self.failUnless(data.where() is None) # empty result self.failUnless(data.where(labels=[123]) == [])
def testFeatureSelection(self): """Testing feature selection: sorted/not sorted, feature groups """ origdata = datasets["uni2large"].samples[:10, :20] data = Dataset(samples=origdata, labels=2, chunks=2) # define some feature groups data.defineFeatureGroups(N.repeat(range(4), 5)) unmasked = data.samples.copy() # default must be no mask self.failUnless(data.nfeatures == 20) features_to_select = [3, 0, 17] features_to_select_copy = copy.deepcopy(features_to_select) features_to_select_sorted = copy.deepcopy(features_to_select) features_to_select_sorted.sort() bsel = N.array([False] * 20) bsel[features_to_select] = True # check selection with feature list for sel, issorted in [ (data.selectFeatures(features_to_select, sort=False), False), (data.selectFeatures(features_to_select, sort=True), True), (data.select(slice(None), features_to_select), True), (data.select(slice(None), N.array(features_to_select)), True), (data.select(slice(None), bsel), True), ]: self.failUnless(sel.nfeatures == 3) # check size of the masked patterns self.failUnless(sel.samples.shape == (10, 3)) # check that the right features are selected fts = (features_to_select, features_to_select_sorted)[int(issorted)] self.failUnless((unmasked[:, fts] == sel.samples).all()) # check grouping information self.failUnless((sel._dsattr["featuregroups"] == [0, 0, 3]).all()) # check side effect on features_to_select parameter: self.failUnless(features_to_select == features_to_select_copy) # check selection by feature group id gsel = data.selectFeatures(groups=[2, 3]) self.failUnless(gsel.nfeatures == 10) self.failUnless(set(gsel._dsattr["featuregroups"]) == set([2, 3]))
def testSampleSelection(self): origdata = datasets["uni2large"].samples[:100, :10].T data = Dataset(samples=origdata, labels=2, chunks=2) self.failUnless(data.nsamples == 10) # set single pattern to enabled for sel in [data.selectSamples(5), data.select(5), data.select(slice(5, 6))]: self.failUnless(sel.nsamples == 1) self.failUnless(data.nfeatures == 100) self.failUnless(sel.origids == [5]) # check duplicate selections for sel in [ data.selectSamples([5, 5]), # Following ones would fail since select removes # repetitions (XXX) # data.select([5,5]), # data.select([5,5], 'all'), # data.select([5,5], slice(None)), ]: self.failUnless(sel.nsamples == 2) self.failUnless((sel.samples[0] == data.samples[5]).all()) self.failUnless((sel.samples[0] == sel.samples[1]).all()) self.failUnless(len(sel.labels) == 2) self.failUnless(len(sel.chunks) == 2) self.failUnless((sel.origids == [5, 5]).all()) self.failUnless(sel.samples.shape == (2, 100)) # check selection by labels for sel in [ data.selectSamples(data.idsbylabels(2)), data.select(labels=2), data.select("labels", 2), data.select("labels", [2]), data["labels", [2]], data["labels":[2], "labels":2], data["labels":[2]], ]: self.failUnless(sel.nsamples == data.nsamples) self.failUnless(N.all(sel.samples == data.samples)) # not present label for sel in [ data.selectSamples(data.idsbylabels(3)), data.select(labels=3), data.select("labels", 3), data.select("labels", [3]), ]: self.failUnless(sel.nsamples == 0) data = Dataset(samples=origdata, labels=[8, 9, 4, 3, 3, 3, 4, 2, 8, 9], chunks=2) for sel in [ data.selectSamples(data.idsbylabels([2, 3])), data.select("labels", [2, 3]), data.select("labels", [2, 3], labels=[1, 2, 3, 4]), data.select("labels", [2, 3], chunks=[1, 2, 3, 4]), data["labels":[2, 3], "chunks":[1, 2, 3, 4]], data["chunks":[1, 2, 3, 4], "labels":[2, 3]], ]: self.failUnless(N.all(sel.origids == [3.0, 4.0, 5.0, 7.0])) # lets cause it to compute unique labels self.failUnless((data.uniquelabels == [2, 3, 4, 8, 9]).all()) # select some samples removing some labels completely sel = data.selectSamples(data.idsbylabels([3, 4, 8, 9])) self.failUnlessEqual(Set(sel.uniquelabels), Set([3, 4, 8, 9])) self.failUnless((sel.origids == [0, 1, 2, 3, 4, 5, 6, 8, 9]).all())