def test_basic(self): dataset = data_generators.linear1d_gaussian_noise() k = GeneralizedLinearKernel() clf = GPR(k) clf.train(dataset) y = clf.predict(dataset.samples) assert_array_equal(y.shape, dataset.targets.shape)
def test_simple_n_minus_one_cv(self): data = get_mv_pattern(3) data.init_origids('samples') self.failUnless(data.nsamples == 120) self.failUnless(data.nfeatures == 2) self.failUnless( (data.sa.targets == \ [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all()) self.failUnless( (data.sa.chunks == \ [k for k in range(1, 7) for i in range(20)]).all()) assert_equal(len(np.unique(data.sa.origids)), data.nsamples) transerror = TransferError(sample_clf_nl) cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1), enable_ca=['confusion', 'training_confusion', 'samples_error']) results = cv(data) self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all()) # TODO: test accessibility of {training_,}confusion{,s} of # CrossValidatedTransferError self.failUnless(isinstance(cv.ca.samples_error, dict)) self.failUnless(len(cv.ca.samples_error) == data.nsamples) # one value for each origid assert_array_equal(sorted(cv.ca.samples_error.keys()), sorted(data.sa.origids)) for k, v in cv.ca.samples_error.iteritems(): self.failUnless(len(v) == 1)
def test_sphere_scaled(): s1 = ne.Sphere(3) s = ne.Sphere(3, element_sizes=(1, 1)) # Should give exactly the same results since element_sizes are 1s for p in ((0, 0), (-23, 1)): assert_array_equal(s1(p), s(p)) ok_(len(s(p)) == len(set(s(p)))) # Raise exception if query dimensionality does not match element_sizes assert_raises(ValueError, s, (1, )) s = ne.Sphere(3, element_sizes=(1.5, 2)) assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1), (2, 0)]) s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res])) ok_(len(res) == 7) # all neighbors so no more than 1 voxel away -- just a cube, for # some "sphere" effect radius had to be 3.0 ;) td = np.sqrt(3 * 1.5**2) s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res])) ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res])) ok_(len(res) == 27)
def test_simple_n_minus_one_cv(self): data = get_mv_pattern(3) data.init_origids('samples') self.failUnless( data.nsamples == 120 ) self.failUnless( data.nfeatures == 2 ) self.failUnless( (data.sa.targets == \ [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all()) self.failUnless( (data.sa.chunks == \ [k for k in range(1, 7) for i in range(20)]).all()) assert_equal(len(np.unique(data.sa.origids)), data.nsamples) transerror = TransferError(sample_clf_nl) cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1), enable_ca=['confusion', 'training_confusion', 'samples_error']) results = cv(data) self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all()) # TODO: test accessibility of {training_,}confusion{,s} of # CrossValidatedTransferError self.failUnless(isinstance(cv.ca.samples_error, dict)) self.failUnless(len(cv.ca.samples_error) == data.nsamples) # one value for each origid assert_array_equal(sorted(cv.ca.samples_error.keys()), sorted(data.sa.origids)) for k, v in cv.ca.samples_error.iteritems(): self.failUnless(len(v) == 1)
def test_sphere_scaled(): s1 = ne.Sphere(3) s = ne.Sphere(3, element_sizes=(1, 1)) # Should give exactly the same results since element_sizes are 1s for p in ((0, 0), (-23, 1)): assert_array_equal(s1(p), s(p)) ok_(len(s(p)) == len(set(s(p)))) # Raise exception if query dimensionality does not match element_sizes assert_raises(ValueError, s, (1,)) s = ne.Sphere(3, element_sizes=(1.5, 2)) assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1), (2, 0)]) s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res])) ok_(len(res) == 7) # all neighbors so no more than 1 voxel away -- just a cube, for # some "sphere" effect radius had to be 3.0 ;) td = np.sqrt(3*1.5**2) s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res])) ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res])) ok_(len(res) == 27)
def test_aggregation(self): data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1) ag_data = aggregate_features(data, np.mean) ok_(ag_data.nsamples == 4) ok_(ag_data.nfeatures == 1) assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
def test_size_random_prototypes(self): self.build_vector_based_pm() fraction = 0.5 prototype_number = max(int(len(self.samples)*fraction),1) ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.") self.prototypes2 = np.array(random.sample(self.samples, prototype_number)) self.pm2 = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes2) self.pm2.train(self.samples) assert_array_equal(self.pm2.proj.shape, (self.samples.shape[0], self.pm2.prototypes.shape[0]*len(self.similarities)))
def test_sphere(): # test sphere initialization s = ne.Sphere(1) center0 = (0, 0, 0) center1 = (1, 1, 1) assert_equal(len(s(center0)), 7) target = array([array([-1, 0, 0]), array([ 0, -1, 0]), array([ 0, 0, -1]), array([0, 0, 0]), array([0, 0, 1]), array([0, 1, 0]), array([1, 0, 0])]) # test of internals -- no recomputation of increments should be done prev_increments = s._increments assert_array_equal(s(center0), target) ok_(prev_increments is s._increments) # query lower dimensionality _ = s((0, 0)) ok_(not prev_increments is s._increments) # test Sphere call target = [array([0, 1, 1]), array([1, 0, 1]), array([1, 1, 0]), array([1, 1, 1]), array([1, 1, 2]), array([1, 2, 1]), array([2, 1, 1])] res = s(center1) assert_array_equal(array(res), target) # They all should be tuples ok_(np.all([isinstance(x, tuple) for x in res])) # test for larger diameter s = ne.Sphere(4) assert_equal(len(s(center1)), 257) # test extent keyword #s = ne.Sphere(4,extent=(1,1,1)) #assert_array_equal(array(s((0,0,0))), array([[0,0,0]])) # test Errors during initialisation and call #assert_raises(ValueError, ne.Sphere, 2) #assert_raises(ValueError, ne.Sphere, 1.0) # no longer extent available assert_raises(TypeError, ne.Sphere, 1, extent=(1)) assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0)) s = ne.Sphere(1) #assert_raises(ValueError, s, (1)) if __debug__: # No float coordinates allowed for now... # XXX might like to change that ;) # assert_raises(ValueError, s, (1.0, 1.0, 1.0))
def test_partitionmapper(): ds = give_data() oep = OddEvenPartitioner() parts = list(oep.generate(ds)) assert_equal(len(parts), 2) for i, p in enumerate(parts): assert_array_equal(p.sa['partitions'].unique, [1, 2]) assert_equal(p.a.partitions_set, i) assert_equal(len(p), len(ds))
def test_aggregation(self): data = dataset_wizard(np.arange(20).reshape((4, 5)), targets=1, chunks=1) ag_data = aggregate_features(data, np.mean) ok_(ag_data.nsamples == 4) ok_(ag_data.nfeatures == 1) assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], ] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(FeatureSliceMapper(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15,)) assert_equal(len(cm), 2) # multiple slicing cm.append(FeatureSliceMapper([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # check reproduction cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8)
def test_attrpermute(): ds = give_data() ds.sa['ids'] = range(len(ds)) pristine_data = ds.samples.copy() permutation = AttributePermutator(['targets', 'ids'], assure=True) pds = permutation(ds) # should not touch the data assert_array_equal(pristine_data, pds.samples) # even keep the very same array assert_true(pds.samples.base is ds.samples) # there is no way that it can be the same attribute assert_false(np.all(pds.sa.ids == ds.sa.ids)) # ids should reflect permutation setup assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids]) # other attribute should remain intact assert_array_equal(pds.sa.chunks, ds.sa.chunks) # now chunk-wise permutation permutation = AttributePermutator('ids', limit='chunks') pds = permutation(ds) # first ten should remain first ten assert_false(np.any(pds.sa.ids[:10] > 9)) # same thing, but only permute single chunk permutation = AttributePermutator('ids', limit={'chunks': 3}) pds = permutation(ds) # one chunk should change assert_false(np.any(pds.sa.ids[30:40] > 39)) assert_false(np.any(pds.sa.ids[30:40] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # or a list of chunks permutation = AttributePermutator('ids', limit={'chunks': [3,4]}) pds = permutation(ds) # two chunks should change assert_false(np.any(pds.sa.ids[30:50] > 49)) assert_false(np.any(pds.sa.ids[30:50] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # and now try generating more permutations nruns = 2 permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns) pds = list(permutation.generate(ds)) assert_equal(len(pds), nruns) for p in pds: assert_false(np.all(p.sa.ids == ds.sa.ids)) # permute feature attrs ds.fa['ids'] = range(ds.shape[1]) permutation = AttributePermutator('fa.ids', assure=True) pds = permutation(ds) assert_false(np.all(pds.fa.ids == ds.fa.ids))
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(FeatureSliceMapper(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15, )) assert_equal(len(cm), 2) # multiple slicing cm.append(FeatureSliceMapper([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # check reproduction cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8)
def test_streamline_equal_mapper(self): self.build_streamline_things() self.prototypes_equal = self.dataset.samples self.pm = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes_equal, demean=False) self.pm.train(self.dataset.samples) ## debug("MAP","projected data: "+str(self.pm.proj)) # check size: assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_equal)*len(self.similarities))) # test symmetry assert_array_almost_equal(self.pm.proj, self.pm.proj.T)
def test_glmnet_c(): # define binary prob data = datasets['dumb2'] # use GLMNET on binary problem clf = GLMNET_C() clf.ca.enable('estimates') clf.train(data) # test predictions pre = clf.predict(data.samples) assert_array_equal(pre, data.targets)
def test_glmnet_state(): #data = datasets['dumb2'] # for some reason the R code fails with the dumb data data = datasets['chirp_linear'] clf = GLMNET_R() clf.train(data) clf.ca.enable('predictions') p = clf.predict(data.samples) assert_array_equal(p, clf.ca.predictions)
def test_custom_split(self): #simulate half splitter hs = CustomPartitioner([(None,[0,1,2,3,4]),(None,[5,6,7,8,9])]) spl = Splitter(attr='partitions') splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ] self.failUnless(len(splits) == 2) for i,p in enumerate(splits): self.failUnless( len(p) == 2 ) self.failUnless( p[0].nsamples == 50 ) self.failUnless( p[1].nsamples == 50 ) assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4]) assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4]) # check fully customized split with working and validation set specified cs = CustomPartitioner([([0,3,4],[5,9])]) # we want to discared the unselected partition of the data, hence attr_value spl = Splitter(attr='partitions', attr_values=[1,2]) splits = [ list(spl.generate(p)) for p in cs.generate(self.data) ] self.failUnless(len(splits) == 1) for i,p in enumerate(splits): self.failUnless( len(p) == 2 ) self.failUnless( p[0].nsamples == 30 ) self.failUnless( p[1].nsamples == 20 ) self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all()) self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())
def test_forward_dense_array_mapper(): mask = np.ones((3, 2), dtype="bool") map_ = mask_mapper(mask) # test shape reports assert_equal(map_.forward1(mask).shape, (6,)) # test 1sample mapping assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 3, 4, 5]) # test 4sample mapping foursample = map_.forward(np.arange(24).reshape(4, 3, 2)) assert_array_equal( foursample, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]] ) # check incomplete masks mask[1, 1] = 0 map_ = mask_mapper(mask) assert_equal(map_.forward1(mask).shape, (5,)) assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 4, 5]) # check that it doesn't accept wrong dataspace assert_raises(ValueError, map_.forward, np.arange(4).reshape(2, 2)) # check fail if neither mask nor shape assert_raises(ValueError, mask_mapper) # check that a full mask is automatically created when providing shape m = mask_mapper(shape=(2, 3, 4)) mp = m.forward1(np.arange(24).reshape(2, 3, 4)) assert_array_equal(mp, np.arange(24))
def test_streamline_random_mapper(self): self.build_streamline_things() # Adding one more similarity to test multiple similarities in the streamline case: self.similarities.append(StreamlineSimilarity(distance=corouge)) fraction = 0.5 prototype_number = max(int(len(self.dataset.samples)*fraction),1) ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.") self.prototypes_random = self.dataset.samples[np.random.permutation(self.dataset.samples.size)][:prototype_number] ## debug("MAP","prototypes: "+str(self.prototypes_random)) self.pm = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes_random, demean=False) self.pm.train(self.dataset.samples) # , fraction=1.0) # test size: assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_random)*len(self.similarities)))
def test_label_splitter(self): oes = OddEvenSplitter(attr='targets') splits = [ (first, second) for (first, second) in oes(self.data) ] assert_array_equal(splits[0][0].sa['targets'].unique, [0,2]) assert_array_equal(splits[0][1].sa['targets'].unique, [1,3]) assert_array_equal(splits[1][0].sa['targets'].unique, [1,3]) assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])
def test_splitter(): ds = give_data() # split with defaults spl1 = Splitter('chunks') assert_raises(NotImplementedError, spl1, ds) splits = list(spl1.generate(ds)) assert_equal(len(splits), len(ds.sa['chunks'].unique)) for split in splits: # it should have perform basic slicing! assert_true(split.samples.base is ds.samples) assert_equal(len(split.sa['chunks'].unique), 1) assert_true('lastsplit' in split.a) assert_true(splits[-1].a.lastsplit) # now again, more customized spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4, noslicing=True) splits = list(spl2.generate(ds)) assert_equal(len(splits), 4) for split in splits: # it should NOT have perform basic slicing! assert_false(split.samples.base is ds.samples) assert_equal(len(split.sa['targets'].unique), 1) assert_equal(len(split.sa['chunks'].unique), 10) assert_true(splits[-1].a.lastsplit) # two should be identical assert_array_equal(splits[1].samples, splits[2].samples) # now go wild and split by feature attribute ds.fa['roi'] = np.repeat([0,1], 5) # splitter should auto-detect that this is a feature attribute spl3 = Splitter('roi') splits = list(spl3.generate(ds)) assert_equal(len(splits), 2) for split in splits: assert_true(split.samples.base is ds.samples) assert_equal(len(split.fa['roi'].unique), 1) assert_equal(split.shape, (100, 5)) # and finally test chained splitters cspl = ChainNode([spl2, spl3, spl1]) splits = list(cspl.generate(ds)) # 4 target splits and 2 roi splits each and 10 chunks each assert_equal(len(splits), 80)
def test_label_splitter(self): oes = OddEvenSplitter(attr='targets') splits = [(first, second) for (first, second) in oes(self.data)] assert_array_equal(splits[0][0].sa['targets'].unique, [0, 2]) assert_array_equal(splits[0][1].sa['targets'].unique, [1, 3]) assert_array_equal(splits[1][0].sa['targets'].unique, [1, 3]) assert_array_equal(splits[1][1].sa['targets'].unique, [0, 2])
def test_sifter(): # somewhat duplicating the doctest ds = Dataset(samples=np.arange(8).reshape((4,2)), sa={'chunks': [ 0 , 1 , 2 , 3 ], 'targets': ['c', 'c', 'p', 'p']}) par = ChainNode([NFoldPartitioner(cvtype=2, attr='chunks'), Sifter([('partitions', 2), ('targets', ['c', 'p'])]) ]) dss = list(par.generate(ds)) assert_equal(len(dss), 4) for ds_ in dss: testing = ds[ds_.sa.partitions == 2] assert_array_equal(np.unique(testing.sa.targets), ['c', 'p']) # and we still have both targets present in training training = ds[ds_.sa.partitions == 1] assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
def test_collections(): sa = SampleAttributesCollection() assert_equal(len(sa), 0) assert_raises(ValueError, sa.__setitem__, 'test', 0) l = range(5) sa['test'] = l # auto-wrapped assert_true(isinstance(sa['test'], ArrayCollectable)) assert_equal(len(sa), 1) # names which are already present in dict interface assert_raises(ValueError, sa.__setitem__, 'values', range(5)) sa_c = copy.deepcopy(sa) assert_equal(len(sa), len(sa_c)) assert_array_equal(sa.test, sa_c.test)
def test_label_splitter(self): oes = OddEvenPartitioner(attr='targets') spl = Splitter(attr='partitions') splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ] assert_array_equal(splits[0][0].sa['targets'].unique, [0,2]) assert_array_equal(splits[0][1].sa['targets'].unique, [1,3]) assert_array_equal(splits[1][0].sa['targets'].unique, [1,3]) assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])
def test_balancer(): ds = give_data() # only mark the selection in an attribute bal = Balancer() res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) assert_true(ds.samples is res.samples.base) # should kick out 2 samples in each chunk of 10 assert_almost_equal(np.mean(res.sa.balanced_set), 0.8) # same as above, but actually apply the selection bal = Balancer(apply_selection=True, count=5) # just run it once res = bal(ds) # we get a new dataset, with shared samples assert_false(ds is res) # should kick out 2 samples in each chunk of 10 assert_equal(len(res), int(0.8 * len(ds))) # now use it as a generator dses = list(bal.generate(ds)) assert_equal(len(dses), 5) # with limit bal = Balancer(limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(res.sa['chunks'].unique, (3,)) assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4) # fixed amount bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4) # fraction bal = Balancer(amount=0.499, limit=None, apply_selection=True) res = bal(ds) assert_array_equal( np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5), np.array(get_nelements_per_value(res.sa.targets).values())) # check on feature attribute ds.fa['one'] = np.tile([1,2], 5) ds.fa['chk'] = np.repeat([1,2], 5) bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True) res = bal(ds) assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
def test_subset_filler(): sm = FeatureSliceMapper(np.arange(3)) sm_f0 = FeatureSliceMapper(np.arange(3), filler=0) sm_fm1 = FeatureSliceMapper(np.arange(3), filler=-1) sm_fnan = FeatureSliceMapper(np.arange(3), filler=np.nan) data = np.arange(12).astype(float).reshape((2, -1)) sm.train(data) data_forwarded = sm.forward(data) for m in (sm, sm_f0, sm_fm1, sm_fnan): m.train(data) assert_array_equal(data_forwarded, m.forward(data)) data_back_fm1 = sm_fm1.reverse(data_forwarded) ok_(np.all(data_back_fm1[:, 3:] == -1)) data_back_fnan = sm_fnan.reverse(data_forwarded) ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
def test_subset_filler(): sm = StaticFeatureSelection(np.arange(3)) sm_f0 = StaticFeatureSelection(np.arange(3), filler=0) sm_fm1 = StaticFeatureSelection(np.arange(3), filler=-1) sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan) data = np.arange(12).astype(float).reshape((2, -1)) sm.train(data) data_forwarded = sm.forward(data) for m in (sm, sm_f0, sm_fm1, sm_fnan): m.train(data) assert_array_equal(data_forwarded, m.forward(data)) data_back_fm1 = sm_fm1.reverse(data_forwarded) ok_(np.all(data_back_fm1[:, 3:] == -1)) data_back_fnan = sm_fnan.reverse(data_forwarded) ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
def test_discarded_boundaries(self): ds = datasets['hollow'] # four runs ds.sa['chunks'] = np.repeat(np.arange(4), 10) # do odd even splitting for lots of boundaries in few splits part = ChainNode([OddEvenPartitioner(), StripBoundariesSamples('chunks', 1, 2)]) parts = [d.samples.sid for d in part.generate(ds)] # both dataset should have the same samples, because the boundaries are # identical and the same sample should be stripped assert_array_equal(parts[0], parts[1]) # we strip 3 samples per boundary assert_equal(len(parts[0]), len(ds) - (3 * 3)) for i in [9, 10, 11, 19, 20, 21, 29, 30, 31]: assert_false(i in parts[0])
def test_array_collectable(): c = ArrayCollectable() # empty by default assert_equal(c.name, None) assert_equal(c.value, None) # late assignment c.name = 'somename' assert_raises(ValueError, c._set, 12345) assert_equal(c.value, None) c.value = np.arange(5) assert_equal(c.name, 'somename') assert_array_equal(c.value, np.arange(5)) # immediate content data = np.random.random(size=(3,10)) c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3) assert_equal(c.name, 'myname') assert_array_equal(c.value, data) assert_equal(c.__doc__, "This is a test") assert_equal(str(c), 'myname') # repr from numpy import array e = eval(repr(c)) assert_equal(e.name, 'myname') assert_array_almost_equal(e.value, data) assert_equal(e.__doc__, "This is a test") # cannot assign array of wrong length assert_raises(ValueError, c._set, np.arange(5)) assert_equal(len(c), 3) # shallow copy DOES create a view of value array c.value = np.arange(3) d = copy.copy(c) assert_true(d.value.base is c.value) # names starting with _ are not allowed assert_raises(ValueError, c._set_name, "_underscore")
def test_array_collectable(): c = ArrayCollectable() # empty by default assert_equal(c.name, None) assert_equal(c.value, None) # late assignment c.name = 'somename' assert_raises(ValueError, c._set, 12345) assert_equal(c.value, None) c.value = np.arange(5) assert_equal(c.name, 'somename') assert_array_equal(c.value, np.arange(5)) # immediate content data = np.random.random(size=(3, 10)) c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3) assert_equal(c.name, 'myname') assert_array_equal(c.value, data) assert_equal(c.__doc__, "This is a test") assert_equal(str(c), 'myname') # repr from numpy import array e = eval(repr(c)) assert_equal(e.name, 'myname') assert_array_almost_equal(e.value, data) assert_equal(e.__doc__, "This is a test") # cannot assign array of wrong length assert_raises(ValueError, c._set, np.arange(5)) assert_equal(len(c), 3) # shallow copy DOES create a view of value array c.value = np.arange(3) d = copy.copy(c) assert_true(d.value.base is c.value) # names starting with _ are not allowed assert_raises(ValueError, c._set_name, "_underscore")
def test_mapper_vs_zscore(): """Test by comparing to results of elderly z-score function """ # data: 40 sample feature line in 20d space (40x20; samples x features) dss = [ dataset_wizard(np.concatenate( [np.arange(40) for i in range(20)]).reshape(20,-1).T, targets=1, chunks=1), ] + datasets.values() for ds in dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper(chunks_attr=None) assert_raises(RuntimeError, zsm.forward, ds1.samples) zsm.train(ds1) ds1z = zsm.forward(ds1.samples) zscore(ds2, chunks_attr=None) assert_array_almost_equal(ds1z, ds2.samples) assert_array_equal(ds1.samples, ds.samples)
def test_fxmapper(): origdata = np.arange(24).reshape(3,8) ds = Dataset(origdata.copy()) ds.samples *= -1 # test a mapper that doesn't change the shape # it shouldn't mapper along with axis it is applied m_s = FxMapper('samples', np.absolute) m_f = FxMapper('features', np.absolute) a_m = absolute_features() assert_array_equal(m_s.forward(ds), origdata) assert_array_equal(a_m.forward(ds), origdata) assert_array_equal(m_s.forward(ds), m_f.forward(ds))
def test_selects(): mask = np.ones((3, 2), dtype="bool") mask[1, 1] = 0 mask0 = mask.copy() data = np.arange(6).reshape(mask.shape) map_ = mask_mapper(mask) # check if any exception is thrown if we get # out of the outIds # assert_raises(IndexError, map_.select_out, [0,1,2,6]) # remove 1,2 map_.append(StaticFeatureSelection([0, 3, 4])) assert_array_equal(map_.forward1(data), [0, 4, 5]) # remove 1 more map_.append(StaticFeatureSelection([0, 2])) assert_array_equal(map_.forward1(data), [0, 5]) # check if original mask wasn't perturbed assert_array_equal(mask, mask0) # check if original mask wasn't perturbed assert_array_equal(mask, mask0)
def test_odd_even_split(self): oes = OddEvenSplitter() splits = [ (train, test) for (train, test) in oes(self.data) ] self.failUnless(len(splits) == 2) for i,p in enumerate(splits): self.failUnless( len(p) == 2 ) self.failUnless( p[0].nsamples == 50 ) self.failUnless( p[1].nsamples == 50 ) assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9]) assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8]) assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9]) assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8]) # check if it works on pure odd and even chunk ids moresplits = [ (train, test) for (train, test) in oes(splits[0][0])] for split in moresplits: self.failUnless(split[0] != None) self.failUnless(split[1] != None)
def test_selects(): mask = np.ones((3,2), dtype='bool') mask[1,1] = 0 mask0 = mask.copy() data = np.arange(6).reshape(mask.shape) map_ = mask_mapper(mask) # check if any exception is thrown if we get # out of the outIds #assert_raises(IndexError, map_.select_out, [0,1,2,6]) # remove 1,2 map_.append(FeatureSliceMapper([0,3,4])) assert_array_equal(map_.forward1(data), [0, 4, 5]) # remove 1 more map_.append(FeatureSliceMapper([0,2])) assert_array_equal(map_.forward1(data), [0, 5]) # check if original mask wasn't perturbed assert_array_equal(mask, mask0) # check if original mask wasn't perturbed assert_array_equal(mask, mask0)
def test_half_split(self): hs = HalfSplitter() splits = [ (train, test) for (train, test) in hs(self.data) ] self.failUnless(len(splits) == 2) for i,p in enumerate(splits): self.failUnless( len(p) == 2 ) self.failUnless( p[0].nsamples == 50 ) self.failUnless( p[1].nsamples == 50 ) assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4]) assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4]) # check if it works on pure odd and even chunk ids moresplits = [ (train, test) for (train, test) in hs(splits[0][0])] for split in moresplits: self.failUnless(split[0] != None) self.failUnless(split[1] != None)
def test_odd_even_split(self): oes = OddEvenSplitter() splits = [(train, test) for (train, test) in oes(self.data)] self.failUnless(len(splits) == 2) for i, p in enumerate(splits): self.failUnless(len(p) == 2) self.failUnless(p[0].nsamples == 50) self.failUnless(p[1].nsamples == 50) assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9]) assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8]) assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9]) assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8]) # check if it works on pure odd and even chunk ids moresplits = [(train, test) for (train, test) in oes(splits[0][0])] for split in moresplits: self.failUnless(split[0] != None) self.failUnless(split[1] != None)
def test_half_split(self): hs = HalfSplitter() splits = [(train, test) for (train, test) in hs(self.data)] self.failUnless(len(splits) == 2) for i, p in enumerate(splits): self.failUnless(len(p) == 2) self.failUnless(p[0].nsamples == 50) self.failUnless(p[1].nsamples == 50) assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4]) assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4]) # check if it works on pure odd and even chunk ids moresplits = [(train, test) for (train, test) in hs(splits[0][0])] for split in moresplits: self.failUnless(split[0] != None) self.failUnless(split[1] != None)
def test_forward_dense_array_mapper(): mask = np.ones((3,2), dtype='bool') map_ = mask_mapper(mask) # test shape reports assert_equal(map_.forward1(mask).shape, (6,)) # test 1sample mapping assert_array_equal(map_.forward1(np.arange(6).reshape(3,2)), [0,1,2,3,4,5]) # test 4sample mapping foursample = map_.forward(np.arange(24).reshape(4,3,2)) assert_array_equal(foursample, [[0,1,2,3,4,5], [6,7,8,9,10,11], [12,13,14,15,16,17], [18,19,20,21,22,23]]) # check incomplete masks mask[1,1] = 0 map_ = mask_mapper(mask) assert_equal(map_.forward1(mask).shape, (5,)) assert_array_equal(map_.forward1(np.arange(6).reshape(3,2)), [0,1,2,4,5]) # check that it doesn't accept wrong dataspace assert_raises(ValueError, map_.forward, np.arange(4).reshape(2,2)) # check fail if neither mask nor shape assert_raises(ValueError, mask_mapper) # check that a full mask is automatically created when providing shape m = mask_mapper(shape=(2, 3, 4)) mp = m.forward1(np.arange(24).reshape(2, 3, 4)) assert_array_equal(mp, np.arange(24))
def test_featuregroup_mapper(): ds = Dataset(np.arange(24).reshape(3,8)) ds.fa['roi'] = [0, 1] * 4 # just to check ds.sa['chunks'] = np.arange(3) # correct results csamples = [[3, 4], [11, 12], [19, 20]] croi = [0, 1] cchunks = np.arange(3) m = mean_group_feature(['roi']) mds = m.forward(ds) assert_equal(mds.shape, (3, 2)) assert_array_equal(mds.samples, csamples) assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4)) # FAs should simply remain the same assert_array_equal(mds.sa.chunks, np.arange(3))
def test_symmetry(self): self.build_vector_based_pm() assert_array_almost_equal(self.pm.proj[:,self.samples.shape[0]], self.pm.proj.T[self.samples.shape[0],:]) assert_array_equal(self.pm.proj[:,self.samples.shape[0]], self.pm.proj.T[self.samples.shape[0],:])
def test_subset(): data = np.array( [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]) # float array doesn't work sm = FeatureSliceMapper(np.ones(16)) assert_raises(IndexError, sm.forward, data) # full mask sm = FeatureSliceMapper(slice(None)) # should not change single samples assert_array_equal(sm.forward(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.forward(data.copy()), data) sm.train(data) # same on reverse assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.reverse(data.copy()), data) # identical mappers sm_none = FeatureSliceMapper(slice(None)) sm_int = FeatureSliceMapper(np.arange(16)) sm_bool = FeatureSliceMapper(np.ones(16, dtype='bool')) sms = [sm_none, sm_int, sm_bool] # test subsets sids = [3, 4, 5, 6] bsubset = np.zeros(16, dtype='bool') bsubset[sids] = True subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]] # all test subset result in equivalent masks, hence should do the same to # the mapper and result in identical behavior for st in sms: for i, sub in enumerate(subsets): # shallow copy orig = copy(st) subsm = FeatureSliceMapper(sub) # should do copy-on-write for all important stuff!! assert_true(orig.is_mergable(subsm)) orig += subsm # test if selection did its job if i == 3: # special case of multiplying features assert_array_equal(orig.forward1(data[0].copy()), subsets[i]) else: assert_array_equal(orig.forward1(data[0].copy()), sids) ## all of the above shouldn't change the original mapper #assert_array_equal(sm.get_mask(), np.arange(16)) # check for some bug catcher # no 3D input #assert_raises(IndexError, sm.forward, np.ones((3,2,1))) # no input of wrong length if __debug__: # checked only in __debug__ assert_raises(ValueError, sm.forward, np.ones(4))
def test_datasetmapping(): # 6 samples, 4 features data = np.arange(24).reshape(6, 4) ds = Dataset(data, sa={ 'timepoints': np.arange(6), 'multidim': data.copy() }, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, inspace='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength, ds.nfeatures)) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s + 1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, np.repeat(np.arange(boxlength), 4).reshape(2, -1)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal( rds.samples, np.array([[0, 1, 2, 3], [4, 5, 6, 7], [4, 5, 6, 7], [8, 9, 10, 11], [16, 17, 18, 19], [20, 21, 22, 23]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid)
def test_custom_split(self): #simulate half splitter hs = CustomSplitter([(None, [0, 1, 2, 3, 4]), (None, [5, 6, 7, 8, 9])]) splits = list(hs(self.data)) self.failUnless(len(splits) == 2) for i, p in enumerate(splits): self.failUnless(len(p) == 2) self.failUnless(p[0].nsamples == 50) self.failUnless(p[1].nsamples == 50) assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4]) assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4]) # check fully customized split with working and validation set specified cs = CustomSplitter([([0, 3, 4], [5, 9])]) splits = list(cs(self.data)) self.failUnless(len(splits) == 1) for i, p in enumerate(splits): self.failUnless(len(p) == 2) self.failUnless(p[0].nsamples == 30) self.failUnless(p[1].nsamples == 20) self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all()) self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all()) # full test with additional sampling and 3 datasets per split cs = CustomSplitter([([0, 3, 4], [5, 9], [2])], npertarget=[3, 4, 1], nrunspersplit=3) splits = list(cs(self.data)) self.failUnless(len(splits) == 3) for i, p in enumerate(splits): self.failUnless(len(p) == 3) self.failUnless(p[0].nsamples == 12) self.failUnless(p[1].nsamples == 16) self.failUnless(p[2].nsamples == 4) # lets test selection of samples by ratio and combined with # other ways cs = CustomSplitter([([0, 3, 4], [5, 9], [2])], npertarget=[[0.3, 0.6, 1.0, 0.5], 0.5, 'all'], nrunspersplit=3) csall = CustomSplitter([([0, 3, 4], [5, 9], [2])], nrunspersplit=3) # lets craft simpler dataset #ds = Dataset(samples=np.arange(12), targets=[1]*6+[2]*6, chunks=1) splits = list(cs(self.data)) splitsall = list(csall(self.data)) self.failUnless(len(splits) == 3) ul = self.data.sa['targets'].unique assert_array_equal( (np.array( splitsall[0][0].get_nsamples_per_attr('targets').values()) * [0.3, 0.6, 1.0, 0.5]).round().astype(int), np.array(splits[0][0].get_nsamples_per_attr('targets').values())) assert_array_equal( (np.array( splitsall[0][1].get_nsamples_per_attr('targets').values()) * 0.5).round().astype(int), np.array(splits[0][1].get_nsamples_per_attr('targets').values())) assert_array_equal( np.array( splitsall[0][2].get_nsamples_per_attr('targets').values()), np.array(splits[0][2].get_nsamples_per_attr('targets').values()))
def test_zscore(): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\ reshape((16, 1)) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) assert_equal(data.samples.mean(), 2.0) assert_equal(data.samples.std(), 1.0) data_samples = data.samples.copy() zscore(data, chunks_attr='chunks') # copy should stay intact assert_equal(data_samples.mean(), 2.0) assert_equal(data_samples.std(), 1.0) # we should be able to operate on ndarrays # But we can't change type inplace for an array, can't we? assert_raises(TypeError, zscore, data_samples, chunks_attr=None) # so lets do manually data_samples = data_samples.astype(float) zscore(data_samples, chunks_attr=None) assert_array_equal(data.samples, data_samples) print data_samples # check z-scoring check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype='float64').reshape(16, 1) assert_array_equal(data.samples, check) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) zscore(data, chunks_attr=None) assert_array_equal(data.samples, check) # check z-scoring taking set of labels as a baseline data = dataset_wizard(samples.copy(), targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples + 1.0) # check that zscore modifies in-place; only guaranteed if no upcasting is # necessary samples = samples.astype('float') data = dataset_wizard(samples, targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples) # these might be duplicating code above -- but twice is better than nothing # dataset: mean=2, std=1 raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) # dataset: mean=12, std=1 raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10 # zscore target check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0] ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) zm = ZScoreMapper() # should do global zscore by default zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check])) # should not modify the source assert_array_equal(pristine, ds) # if we tell it a different mean it should obey the order zm = ZScoreMapper(params=(3,1)) zm.train(ds) assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 ) assert_array_equal(pristine, ds) # let's look at chunk-wise z-scoring ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())), targets=range(32), chunks=[0] * 16 + [1] * 16) # by default chunk-wise zm = ZScoreMapper() zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # we should be able to do that same manually zm = ZScoreMapper(params={0: (2,1), 1: (12,1)}) zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
def test_sphere(): # test sphere initialization s = ne.Sphere(1) center0 = (0, 0, 0) center1 = (1, 1, 1) assert_equal(len(s(center0)), 7) target = array([ array([-1, 0, 0]), array([0, -1, 0]), array([0, 0, -1]), array([0, 0, 0]), array([0, 0, 1]), array([0, 1, 0]), array([1, 0, 0]) ]) # test of internals -- no recomputation of increments should be done prev_increments = s._increments assert_array_equal(s(center0), target) ok_(prev_increments is s._increments) # query lower dimensionality _ = s((0, 0)) ok_(not prev_increments is s._increments) # test Sphere call target = [ array([0, 1, 1]), array([1, 0, 1]), array([1, 1, 0]), array([1, 1, 1]), array([1, 1, 2]), array([1, 2, 1]), array([2, 1, 1]) ] res = s(center1) assert_array_equal(array(res), target) # They all should be tuples ok_(np.all([isinstance(x, tuple) for x in res])) # test for larger diameter s = ne.Sphere(4) assert_equal(len(s(center1)), 257) # test extent keyword #s = ne.Sphere(4,extent=(1,1,1)) #assert_array_equal(array(s((0,0,0))), array([[0,0,0]])) # test Errors during initialisation and call #assert_raises(ValueError, ne.Sphere, 2) #assert_raises(ValueError, ne.Sphere, 1.0) # no longer extent available assert_raises(TypeError, ne.Sphere, 1, extent=(1)) assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0)) s = ne.Sphere(1) #assert_raises(ValueError, s, (1)) if __debug__: # No float coordinates allowed for now... # XXX might like to change that ;) # assert_raises(ValueError, s, (1.0, 1.0, 1.0))
def test_size(self): self.build_vector_based_pm() assert_array_equal(self.pm.proj.shape, (self.samples.shape[0], self.prototypes.shape[0] * len(self.similarities)))
def test_query_engine(): data = np.arange(54) # indices in 3D ind = np.transpose((np.ones((3, 3, 3)).nonzero())) # sphere generator for 3 elements diameter sphere = ne.Sphere(1) # dataset with just one "space" ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind))}) # and the query engine attaching the generator to the "index-space" qe = ne.IndexQueryEngine(s_ind=sphere) # cannot train since the engine does not know about the second space assert_raises(ValueError, qe.train, ds) # now do it again with a full spec ds = Dataset([data, data], fa={ 's_ind': np.concatenate((ind, ind)), 't_ind': np.repeat([0, 1], 27) }) qe = ne.IndexQueryEngine(s_ind=sphere, t_ind=None) qe.train(ds) # internal representation check # YOH: invalid for new implementation with lookup tables (dictionaries) #assert_array_equal(qe._searcharray, # np.arange(54).reshape(qe._searcharray.shape) + 1) # should give us one corner, collapsing the 't_ind' assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # directly specifying an index for 't_ind' without having an ROI # generator, should give the same corner, but just once assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), [0, 1, 3, 9]) # just out of the mask -- no match assert_array_equal(qe(s_ind=(3, 3, 3)), []) # also out of the mask -- but single match assert_array_equal(qe(s_ind=(2, 2, 3), t_ind=1), [53]) # query by id assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), qe[0]) assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1]), qe(s_ind=(0, 0, 0))) # should not fail if t_ind is outside assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1, 10]), qe(s_ind=(0, 0, 0))) # should fail if asked about some unknown thing assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), buga=0) # Test by using some literal feature atttribute ds.fa['lit'] = ['roi1', 'ro2', 'r3'] * 18 # should work as well as before assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # should fail if asked about some unknown (yet) thing assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), lit='roi1') # Create qe which can query literals as well qe_lit = ne.IndexQueryEngine(s_ind=sphere, t_ind=None, lit=None) qe_lit.train(ds) # should work as well as before assert_array_equal(qe_lit(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # and subselect nicely -- only /3 ones assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit='roi1'), [0, 3, 9, 27, 30, 36]) assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit=['roi1', 'ro2']), [0, 1, 3, 9, 27, 28, 30, 36])
def test_n_group_split(self): """Test NGroupSplitter alongside with the reversal of the order of spit out datasets """ # Test 2 groups like HalfSplitter first hs = NGroupSplitter(2) hs_reversed = NGroupSplitter(2, reverse=True) for isreversed, splitter in enumerate((hs, hs_reversed)): splits = list(splitter(self.data)) self.failUnless(len(splits) == 2) for i, p in enumerate(splits): self.failUnless(len(p) == 2) self.failUnless(p[0].nsamples == 50) self.failUnless(p[1].nsamples == 50) assert_array_equal(splits[0][1 - isreversed].sa['chunks'].unique, [0, 1, 2, 3, 4]) assert_array_equal(splits[0][isreversed].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][1 - isreversed].sa['chunks'].unique, [5, 6, 7, 8, 9]) assert_array_equal(splits[1][isreversed].sa['chunks'].unique, [0, 1, 2, 3, 4]) # check if it works on pure odd and even chunk ids moresplits = list(hs(splits[0][0])) for split in moresplits: self.failUnless(split[0] != None) self.failUnless(split[1] != None) # now test more groups s5 = NGroupSplitter(5) s5_reversed = NGroupSplitter(5, reverse=True) # get the splits for isreversed, s5splitter in enumerate((s5, s5_reversed)): splits = list(s5splitter(self.data)) # must have 10 splits self.failUnless(len(splits) == 5) # check split content assert_array_equal(splits[0][1 - isreversed].sa['chunks'].unique, [0, 1]) assert_array_equal(splits[0][isreversed].sa['chunks'].unique, [2, 3, 4, 5, 6, 7, 8, 9]) assert_array_equal(splits[1][1 - isreversed].sa['chunks'].unique, [2, 3]) assert_array_equal(splits[1][isreversed].sa['chunks'].unique, [0, 1, 4, 5, 6, 7, 8, 9]) # ... assert_array_equal(splits[4][1 - isreversed].sa['chunks'].unique, [8, 9]) assert_array_equal(splits[4][isreversed].sa['chunks'].unique, [0, 1, 2, 3, 4, 5, 6, 7]) # Test for too many groups def splitcall(spl, dat): return [(train, test) for (train, test) in spl(dat)] s20 = NGroupSplitter(20) self.assertRaises(ValueError, splitcall, s20, self.data)
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [ FlattenMapper(inspace='voxel'), ChainMapper([ FlattenMapper(inspace='voxel'), FeatureSliceMapper(slice(None)) ]) ]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_mapper_aliases(): mm=mask_mapper(np.ones((3,4,2), dtype='bool')) assert_array_equal(mm(np.ones((2,3,4,2))), mm.forward(np.ones((2,3,4,2))))