def test_basic_collectable(): c = Collectable() # empty by default assert_equal(c.name, None) assert_equal(c.value, None) assert_equal(c.__doc__, None) # late assignment c.name = 'somename' c.value = 12345 assert_equal(c.name, 'somename') assert_equal(c.value, 12345) # immediate content c = Collectable('value', 'myname', "This is a test") assert_equal(c.name, 'myname') assert_equal(c.value, 'value') assert_equal(c.__doc__, "This is a test") assert_equal(str(c), 'myname') # repr e = eval(repr(c)) assert_equal(e.name, 'myname') assert_equal(e.value, 'value') assert_equal(e.__doc__, "This is a test") # shallow copy does not create a view of value array c.value = np.arange(5) d = copy.copy(c) assert_false(d.value.base is c.value) # names starting with _ are not allowed assert_raises(ValueError, c._set_name, "_underscore")
def test_permute_chunks(): def is_sorted(x): return np.array_equal(np.sort(x), x) ds = give_data() # change targets labels # there is no target labels permuting within chunks, # assure = True would be error ds.sa['targets'] = list(range(len(ds.sa.targets))) permutation = AttributePermutator(attr='targets', chunk_attr='chunks', strategy='chunks', assure=True) pds = permutation(ds) assert_false(is_sorted(pds.sa.targets)) assert_true(np.array_equal(pds.samples, ds.samples)) for chunk_id in np.unique(pds.sa.chunks): chunk_ds = pds[pds.sa.chunks == chunk_id] assert_true(is_sorted(chunk_ds.sa.targets)) permutation = AttributePermutator(attr='targets', strategy='chunks') assert_raises(ValueError, permutation, ds)
def test_forward_dense_array_mapper(): mask = np.ones((3, 2), dtype='bool') map_ = mask_mapper(mask) # test shape reports assert_equal(map_.forward1(mask).shape, (6, )) # test 1sample mapping assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 3, 4, 5]) # test 4sample mapping foursample = map_.forward(np.arange(24).reshape(4, 3, 2)) assert_array_equal(foursample, [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]]) # check incomplete masks mask[1, 1] = 0 map_ = mask_mapper(mask) assert_equal(map_.forward1(mask).shape, (5, )) assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)), [0, 1, 2, 4, 5]) # check that it doesn't accept wrong dataspace assert_raises(ValueError, map_.forward, np.arange(4).reshape(2, 2)) # check fail if neither mask nor shape assert_raises(ValueError, mask_mapper) # check that a full mask is automatically created when providing shape m = mask_mapper(shape=(2, 3, 4)) mp = m.forward1(np.arange(24).reshape(2, 3, 4)) assert_array_equal(mp, np.arange(24))
def test_product_flatten(): nsamples = 17 product_name_values = [('chan', ['C1', 'C2']), ('freq', np.arange(4, 20, 6)), ('time', np.arange(-200, 800, 200))] shape = (nsamples, ) + tuple(len(v) for _, v in product_name_values) sample_names = ['samp%d' % i for i in xrange(nsamples)] # generate random data in four dimensions data = np.random.normal(size=shape) ds = Dataset(data, sa=dict(sample_names=sample_names)) # apply flattening to ds flattener = ProductFlattenMapper(product_name_values) # test I/O (only if h5py is available) if externals.exists('h5py'): from mvpa2.base.hdf5 import h5save, h5load import tempfile import os fd, testfn = tempfile.mkstemp('mapper.h5py', 'test_product') os.close(fd) h5save(testfn, flattener) flattener = h5load(testfn) os.unlink(testfn) mds = flattener(ds) prod = lambda x: reduce(operator.mul, x) # ensure the size is ok assert_equal(mds.shape, (nsamples, ) + (prod(shape[1:]), )) ndim = len(product_name_values) idxs = [range(len(v)) for _, v in product_name_values] for si in xrange(nsamples): for fi, p in enumerate(itertools.product(*idxs)): data_tup = (si, ) + p x = mds[si, fi] # value should match assert_equal(data[data_tup], x.samples[0, 0]) # indices should match as well all_idxs = tuple(x.fa['chan_freq_time_indices'].value.ravel()) assert_equal(p, all_idxs) # values and indices in each dimension should match for i, (name, value) in enumerate(product_name_values): assert_equal(x.fa[name].value, value[p[i]]) assert_equal(x.fa[name + '_indices'].value, p[i]) product_name_values += [('foo', [1, 2, 3])] flattener = ProductFlattenMapper(product_name_values) assert_raises(ValueError, flattener, ds)
def test_permute_chunks(): def is_sorted(x): return np.array_equal(np.sort(x), x) ds = give_data() # change targets labels # there is no target labels permuting within chunks, # assure = True would be error ds.sa['targets'] = range(len(ds.sa.targets)) permutation = AttributePermutator(attr='targets', chunk_attr='chunks', strategy='chunks', assure=True) pds = permutation(ds) assert_false(is_sorted(pds.sa.targets)) assert_true(np.array_equal(pds.samples, ds.samples)) for chunk_id in np.unique(pds.sa.chunks): chunk_ds = pds[pds.sa.chunks == chunk_id] assert_true(is_sorted(chunk_ds.sa.targets)) permutation = AttributePermutator(attr='targets', strategy='chunks') assert_raises(ValueError, permutation, ds)
def test_corrstability_smoketest(ds): if not 'chunks' in ds.sa: return if len(ds.sa['targets'].unique) > 30: # was regression dataset return # very basic testing since cs = CorrStability() #ds = datasets['uni2small'] out = cs(ds) assert_equal(out.shape, (ds.nfeatures, )) ok_(np.all(out >= -1.001)) # it should be a correlation after all ok_(np.all(out <= 1.001)) # and theoretically those nonbogus features should have higher values if 'nonbogus_targets' in ds.fa: bogus_features = np.array([x == None for x in ds.fa.nonbogus_targets]) assert_array_less(np.mean(out[bogus_features]), np.mean(out[~bogus_features])) # and if we move targets to alternative location ds = ds.copy(deep=True) ds.sa['alt'] = ds.T ds.sa.pop('targets') assert_raises(KeyError, cs, ds) cs = CorrStability('alt') out_ = cs(ds) assert_array_equal(out, out_)
def test_sphere_scaled(): s1 = ne.Sphere(3) s = ne.Sphere(3, element_sizes=(1, 1)) # Should give exactly the same results since element_sizes are 1s for p in ((0, 0), (-23, 1)): assert_array_equal(s1(p), s(p)) ok_(len(s(p)) == len(set(s(p)))) # Raise exception if query dimensionality does not match element_sizes assert_raises(ValueError, s, (1, )) s = ne.Sphere(3, element_sizes=(1.5, 2)) assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1), (2, 0)]) s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res])) ok_(len(res) == 7) # all neighbors so no more than 1 voxel away -- just a cube, for # some "sphere" effect radius had to be 3.0 ;) td = np.sqrt(3 * 1.5**2) s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res])) ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res])) ok_(len(res) == 27)
def test_mapper_vs_zscore(): """Test by comparing to results of elderly z-score function """ # data: 40 sample feature line in 20d space (40x20; samples x features) dss = [ dataset_wizard(np.concatenate( [np.arange(40) for i in range(20)]).reshape(20,-1).T, targets=1, chunks=1), ] + datasets.values() for ds in dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper(chunks_attr=None) assert_raises(RuntimeError, zsm.forward, ds1.samples) idhashes = (idhash(ds1), idhash(ds1.samples)) zsm.train(ds1) idhashes_train = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_train) # forward dataset ds1z_ds = zsm.forward(ds1) idhashes_forwardds = (idhash(ds1), idhash(ds1.samples)) # must not modify samples in place! assert_equal(idhashes, idhashes_forwardds) # forward samples explicitly ds1z = zsm.forward(ds1.samples) idhashes_forward = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_forward) zscore(ds2, chunks_attr=None) assert_array_almost_equal(ds1z, ds2.samples) assert_array_equal(ds1.samples, ds.samples)
def test_sifter_with_balancing(): # extended previous test which was already # "... somewhat duplicating the doctest" ds = Dataset(samples=np.arange(12).reshape((-1, 2)), sa={'chunks': [ 0 , 1 , 2 , 3 , 4, 5 ], 'targets': ['c', 'c', 'c', 'p', 'p', 'p']}) # Without sifter -- just to assure that we do get all of them # i.e. 6*5*4*3/(4!) = 15 par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks')]) assert_equal(len(list(par.generate(ds))), 15) # so we will take 4 chunks out of available 7, but would care only # about those partitions where we have balanced number of 'c' and 'p' # entries assert_raises(ValueError, lambda x: list(Sifter([('targets', dict(wrong=1))]).generate(x)), ds) par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks'), Sifter([('partitions', 2), ('targets', dict(uvalues=['c', 'p'], balanced=True))]) ]) dss = list(par.generate(ds)) # print [ x[x.sa.partitions==2].sa.targets for x in dss ] assert_equal(len(dss), 9) for ds_ in dss: testing = ds[ds_.sa.partitions == 2] assert_array_equal(np.unique(testing.sa.targets), ['c', 'p']) # and we still have both targets present in training training = ds[ds_.sa.partitions == 1] assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
def test_sphere_scaled(): s1 = ne.Sphere(3) s = ne.Sphere(3, element_sizes=(1, 1)) # Should give exactly the same results since element_sizes are 1s for p in ((0, 0), (-23, 1)): assert_array_equal(s1(p), s(p)) ok_(len(s(p)) == len(set(s(p)))) # Raise exception if query dimensionality does not match element_sizes assert_raises(ValueError, s, (1,)) s = ne.Sphere(3, element_sizes=(1.5, 2)) assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1), (2, 0)]) s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res])) ok_(len(res) == 7) # all neighbors so no more than 1 voxel away -- just a cube, for # some "sphere" effect radius had to be 3.0 ;) td = np.sqrt(3*1.5**2) s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res])) ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res])) ok_(len(res) == 27)
def test_assert_objectarray_equal(): if versions['numpy'] < '1.4': raise SkipTest("Skipping because of known segfaults with numpy < 1.4") # explicit dtype so we could test with numpy < 1.6 a = np.array([np.array([0, 1]), np.array(1)], dtype=object) b = np.array([np.array([0, 1]), np.array(1)], dtype=object) # they should be ok for both types of comparison for strict in True, False: # good with self assert_objectarray_equal(a, a, strict=strict) # good with a copy assert_objectarray_equal(a, a.copy(), strict=strict) # good while operating with an identical one # see http://projects.scipy.org/numpy/ticket/2117 assert_objectarray_equal(a, b, strict=strict) # now check if we still fail for a good reason for value_equal, b in ( (False, np.array(1)), (False, np.array([1])), (False, np.array([np.array([0, 1]), np.array((1, 2))], dtype=object)), (False, np.array([np.array([0, 1]), np.array(1.1)], dtype=object)), (True, np.array([np.array([0, 1]), np.array(1.0)], dtype=object)), (True, np.array([np.array([0, 1]), np.array(1, dtype=object)], dtype=object)), ): assert_raises(AssertionError, assert_objectarray_equal, a, b) if value_equal: # but should not raise for non-default strict=False assert_objectarray_equal(a, b, strict=False) else: assert_raises(AssertionError, assert_objectarray_equal, a, b, strict=False)
def test_corrstability_smoketest(ds): if not 'chunks' in ds.sa: return if len(ds.sa['targets'].unique) > 30: # was regression dataset return # very basic testing since cs = CorrStability() #ds = datasets['uni2small'] out = cs(ds) assert_equal(out.shape, (ds.nfeatures,)) ok_(np.all(out >= -1.001)) # it should be a correlation after all ok_(np.all(out <= 1.001)) # and theoretically those nonbogus features should have higher values if 'nonbogus_targets' in ds.fa: bogus_features = np.array([x==None for x in ds.fa.nonbogus_targets]) assert_array_less(np.mean(out[bogus_features]), np.mean(out[~bogus_features])) # and if we move targets to alternative location ds = ds.copy(deep=True) ds.sa['alt'] = ds.T ds.sa.pop('targets') assert_raises(KeyError, cs, ds) cs = CorrStability('alt') out_ = cs(ds) assert_array_equal(out, out_)
def test_sifter_with_balancing(): # extended previous test which was already # "... somewhat duplicating the doctest" ds = Dataset(samples=np.arange(12).reshape((-1, 2)), sa={ 'chunks': [0, 1, 2, 3, 4, 5], 'targets': ['c', 'c', 'c', 'p', 'p', 'p'] }) # Without sifter -- just to assure that we do get all of them # i.e. 6*5*4*3/(4!) = 15 par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks')]) assert_equal(len(list(par.generate(ds))), 15) # so we will take 4 chunks out of available 7, but would care only # about those partitions where we have balanced number of 'c' and 'p' # entries assert_raises( ValueError, lambda x: list(Sifter([('targets', dict(wrong=1))]).generate(x)), ds) par = ChainNode([ NFoldPartitioner(cvtype=4, attr='chunks'), Sifter([('partitions', 2), ('targets', dict(uvalues=['c', 'p'], balanced=True))]) ]) dss = list(par.generate(ds)) # print [ x[x.sa.partitions==2].sa.targets for x in dss ] assert_equal(len(dss), 9) for ds_ in dss: testing = ds[ds_.sa.partitions == 2] assert_array_equal(np.unique(testing.sa.targets), ['c', 'p']) # and we still have both targets present in training training = ds[ds_.sa.partitions == 1] assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
def test_product_flatten(): nsamples = 17 product_name_values = [('chan', ['C1', 'C2']), ('freq', np.arange(4, 20, 6)), ('time', np.arange(-200, 800, 200))] shape = (nsamples,) + tuple(len(v) for _, v in product_name_values) sample_names = ['samp%d' % i for i in xrange(nsamples)] # generate random data in four dimensions data = np.random.normal(size=shape) ds = Dataset(data, sa=dict(sample_names=sample_names)) # apply flattening to ds flattener = ProductFlattenMapper(product_name_values) # test I/O (only if h5py is available) if externals.exists('h5py'): from mvpa2.base.hdf5 import h5save, h5load import tempfile import os _, testfn = tempfile.mkstemp('mapper.h5py', 'test_product') h5save(testfn, flattener) flattener = h5load(testfn) os.unlink(testfn) mds = flattener(ds) prod = lambda x:reduce(operator.mul, x) # ensure the size is ok assert_equal(mds.shape, (nsamples,) + (prod(shape[1:]),)) ndim = len(product_name_values) idxs = [range(len(v)) for _, v in product_name_values] for si in xrange(nsamples): for fi, p in enumerate(itertools.product(*idxs)): data_tup = (si,) + p x = mds[si, fi] # value should match assert_equal(data[data_tup], x.samples[0, 0]) # indices should match as well all_idxs = tuple(x.fa['chan_freq_time_indices'].value.ravel()) assert_equal(p, all_idxs) # values and indices in each dimension should match for i, (name, value) in enumerate(product_name_values): assert_equal(x.fa[name].value, value[p[i]]) assert_equal(x.fa[name + '_indices'].value, p[i]) product_name_values += [('foo', [1, 2, 3])] flattener = ProductFlattenMapper(product_name_values) assert_raises(ValueError, flattener, ds)
def test_vector_alignment_find_rotation_illegal_inputs(self): arr = np.asarray illegal_args = [[arr([1, 2]), arr([1, 3])], [arr([1, 2, 3]), arr([1, 3])], [arr([1, 2, 3]), np.random.normal(size=(3, 3))]] for illegal_arg in illegal_args: assert_raises((ValueError, IndexError), vector_alignment_find_rotation, *illegal_arg)
def test_vector_alignment_find_rotation_illegal_inputs(self): arr = np.asarray illegal_args = [ [arr([1, 2]), arr([1, 3])], [arr([1, 2, 3]), arr([1, 3])], [arr([1, 2, 3]), np.random.normal(size=(3, 3))] ] for illegal_arg in illegal_args: assert_raises((ValueError, IndexError), vector_alignment_find_rotation, *illegal_arg)
def test_attrmap_conflicts(): am_n = AttributeMap({'a':1, 'b':2, 'c':1}) am_t = AttributeMap({'a':1, 'b':2, 'c':1}, collisions_resolution='tuple') am_l = AttributeMap({'a':1, 'b':2, 'c':1}, collisions_resolution='lucky') q_f = ['a', 'b', 'a', 'c'] # should have no effect on forward mapping ok_(np.all(am_n.to_numeric(q_f) == am_t.to_numeric(q_f))) ok_(np.all(am_t.to_numeric(q_f) == am_l.to_numeric(q_f))) assert_raises(ValueError, am_n.to_literal, [2]) r_t = am_t.to_literal([2, 1]) r_l = am_l.to_literal([2, 1])
def test_splitter(): ds = give_data() # split with defaults spl1 = Splitter('chunks') assert_raises(NotImplementedError, spl1, ds) splits = list(spl1.generate(ds)) assert_equal(len(splits), len(ds.sa['chunks'].unique)) for split in splits: # it should have perform basic slicing! assert_true(split.samples.base is ds.samples) assert_equal(len(split.sa['chunks'].unique), 1) assert_true('lastsplit' in split.a) assert_true(splits[-1].a.lastsplit) # now again, more customized spl2 = Splitter('targets', attr_values=[0, 1, 1, 2, 3, 3, 3], count=4, noslicing=True) splits = list(spl2.generate(ds)) assert_equal(len(splits), 4) for split in splits: # it should NOT have perform basic slicing! assert_false(split.samples.base is ds.samples) assert_equal(len(split.sa['targets'].unique), 1) assert_equal(len(split.sa['chunks'].unique), 10) assert_true(splits[-1].a.lastsplit) # two should be identical assert_array_equal(splits[1].samples, splits[2].samples) # now go wild and split by feature attribute ds.fa['roi'] = np.repeat([0, 1], 5) # splitter should auto-detect that this is a feature attribute spl3 = Splitter('roi') splits = list(spl3.generate(ds)) assert_equal(len(splits), 2) for split in splits: assert_true(split.samples.base is ds.samples) assert_equal(len(split.fa['roi'].unique), 1) assert_equal(split.shape, (100, 5)) # and finally test chained splitters cspl = ChainNode([spl2, spl3, spl1]) splits = list(cspl.generate(ds)) # 4 target splits and 2 roi splits each and 10 chunks each assert_equal(len(splits), 80)
def test_mean_tpr(): # Let's test now on some disbalanced sets assert_raises(ValueError, mean_tpr, [1], []) assert_raises(ValueError, mean_tpr, [], [1]) assert_raises(ValueError, mean_tpr, [], []) # now interesting one where there were no target when it was in predicted assert_raises(ValueError, mean_tpr, [1], [0]) assert_raises(ValueError, mean_tpr, [0, 1], [0, 0]) # but it should be ok to have some targets not present in prediction assert_equal(mean_tpr([0, 0], [0, 1]), .5) # the same regardless how many samples in 0-class, if all misclassified # (winner by # of samples takes all) assert_equal(mean_tpr([0, 0, 0], [0, 0, 1]), .5) # whenever mean-accuracy would be different assert_almost_equal(mean_match_accuracy([0, 0, 0], [0, 0, 1]), 2/3.)
def test_mean_tpr(): # Let's test now on some disbalanced sets assert_raises(ValueError, mean_tpr, [1], []) assert_raises(ValueError, mean_tpr, [], [1]) assert_raises(ValueError, mean_tpr, [], []) # now interesting one where there were no target when it was in predicted assert_raises(ValueError, mean_tpr, [1], [0]) assert_raises(ValueError, mean_tpr, [0, 1], [0, 0]) # but it should be ok to have some targets not present in prediction assert_equal(mean_tpr([0, 0], [0, 1]), .5) # the same regardless how many samples in 0-class, if all misclassified # (winner by # of samples takes all) assert_equal(mean_tpr([0, 0, 0], [0, 0, 1]), .5) # whenever mean-accuracy would be different assert_almost_equal(mean_match_accuracy([0, 0, 0], [0, 0, 1]), 2 / 3.)
def test_splitter(): ds = give_data() # split with defaults spl1 = Splitter('chunks') assert_raises(NotImplementedError, spl1, ds) splits = list(spl1.generate(ds)) assert_equal(len(splits), len(ds.sa['chunks'].unique)) for split in splits: # it should have perform basic slicing! assert_true(split.samples.base is ds.samples) assert_equal(len(split.sa['chunks'].unique), 1) assert_true('lastsplit' in split.a) assert_true(splits[-1].a.lastsplit) # now again, more customized spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4, noslicing=True) splits = list(spl2.generate(ds)) assert_equal(len(splits), 4) for split in splits: # it should NOT have perform basic slicing! assert_false(split.samples.base is ds.samples) assert_equal(len(split.sa['targets'].unique), 1) assert_equal(len(split.sa['chunks'].unique), 10) assert_true(splits[-1].a.lastsplit) # two should be identical assert_array_equal(splits[1].samples, splits[2].samples) # now go wild and split by feature attribute ds.fa['roi'] = np.repeat([0,1], 5) # splitter should auto-detect that this is a feature attribute spl3 = Splitter('roi') splits = list(spl3.generate(ds)) assert_equal(len(splits), 2) for split in splits: assert_true(split.samples.base is ds.samples) assert_equal(len(split.fa['roi'].unique), 1) assert_equal(split.shape, (100, 5)) # and finally test chained splitters cspl = ChainNode([spl2, spl3, spl1]) splits = list(cspl.generate(ds)) # 4 target splits and 2 roi splits each and 10 chunks each assert_equal(len(splits), 80)
def test_collections(): sa = SampleAttributesCollection() assert_equal(len(sa), 0) assert_raises(ValueError, sa.__setitem__, 'test', 0) l = range(5) sa['test'] = l # auto-wrapped assert_true(isinstance(sa['test'], ArrayCollectable)) assert_equal(len(sa), 1) # names which are already present in dict interface assert_raises(ValueError, sa.__setitem__, 'values', range(5)) sa_c = copy.deepcopy(sa) assert_equal(len(sa), len(sa_c)) assert_array_equal(sa.test, sa_c.test)
def test_reverse_dense_array_mapper(): mask = np.ones((3, 2), dtype='bool') mask[1, 1] = 0 map_ = mask_mapper(mask) rmapped = map_.reverse1(np.arange(1, 6)) assert_equal(rmapped.shape, (3, 2)) assert_equal(rmapped[1, 1], 0) assert_equal(rmapped[2, 1], 5) # check that it doesn't accept wrong dataspace assert_raises(ValueError, map_.forward, np.arange(6)) rmapped2 = map_.reverse(np.arange(1, 11).reshape(2, 5)) assert_equal(rmapped2.shape, (2, 3, 2)) assert_equal(rmapped2[0, 1, 1], 0) assert_equal(rmapped2[1, 1, 1], 0) assert_equal(rmapped2[0, 2, 1], 5) assert_equal(rmapped2[1, 2, 1], 10)
def test_cached_query_engine(): """Test cached query engine """ sphere = ne.Sphere(1) # dataset with just one "space" ds = datasets['3dlarge'] qe0 = ne.IndexQueryEngine(myspace=sphere) qec = ne.CachedQueryEngine(qe0) # and ground truth one qe = ne.IndexQueryEngine(myspace=sphere) results_ind = [] results_kw = [] def cmp_res(res1, res2): comp = [x == y for x, y in zip(res1, res2)] ok_(np.all(comp)) for iq, q in enumerate((qe, qec)): q.train(ds) # sequential train on the same should be ok in both cases q.train(ds) res_ind = [q[fid] for fid in xrange(ds.nfeatures)] res_kw = [q(myspace=x) for x in ds.fa.myspace] # test if results match cmp_res(res_ind, res_kw) results_ind.append(res_ind) results_kw.append(res_kw) # now check if results of cached were the same as of regular run cmp_res(results_ind[0], results_ind[1]) # Now do sanity checks assert_raises(ValueError, qec.train, ds[:, :-1]) assert_raises(ValueError, qec.train, ds.copy()) ds2 = ds.copy() qec.untrain() qec.train(ds2) # should be the same results on the copy cmp_res(results_ind[0], [qec[fid] for fid in xrange(ds.nfeatures)]) cmp_res(results_kw[0], [qec(myspace=x) for x in ds.fa.myspace]) ok_(qec.train(ds2) is None)
def test_query_engine(): data = np.arange(54) # indices in 3D ind = np.transpose((np.ones((3, 3, 3)).nonzero())) # sphere generator for 3 elements diameter sphere = ne.Sphere(1) # dataset with just one "space" ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind))}) # and the query engine attaching the generator to the "index-space" qe = ne.IndexQueryEngine(s_ind=sphere) # cannot train since the engine does not know about the second space assert_raises(ValueError, qe.train, ds) # now do it again with a full spec ds = Dataset([data, data], fa={ 's_ind': np.concatenate((ind, ind)), 't_ind': np.repeat([0, 1], 27) }) qe = ne.IndexQueryEngine(s_ind=sphere, t_ind=None) qe.train(ds) # internal representation check # YOH: invalid for new implementation with lookup tables (dictionaries) #assert_array_equal(qe._searcharray, # np.arange(54).reshape(qe._searcharray.shape) + 1) # should give us one corner, collapsing the 't_ind' assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # directly specifying an index for 't_ind' without having an ROI # generator, should give the same corner, but just once assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), [0, 1, 3, 9]) # just out of the mask -- no match assert_array_equal(qe(s_ind=(3, 3, 3)), []) # also out of the mask -- but single match assert_array_equal(qe(s_ind=(2, 2, 3), t_ind=1), [53]) # query by id assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), qe[0]) assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1]), qe(s_ind=(0, 0, 0))) # should not fail if t_ind is outside assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1, 10]), qe(s_ind=(0, 0, 0))) # should fail if asked about some unknown thing assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), buga=0) # Test by using some literal feature atttribute ds.fa['lit'] = ['roi1', 'ro2', 'r3'] * 18 # should work as well as before assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # should fail if asked about some unknown (yet) thing assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), lit='roi1') # Create qe which can query literals as well qe_lit = ne.IndexQueryEngine(s_ind=sphere, t_ind=None, lit=None) qe_lit.train(ds) # should work as well as before assert_array_equal(qe_lit(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # and subselect nicely -- only /3 ones assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit='roi1'), [0, 3, 9, 27, 30, 36]) assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit=['roi1', 'ro2']), [0, 1, 3, 9, 27, 28, 30, 36])
def test_query_engine(): data = np.arange(54) # indices in 3D ind = np.transpose((np.ones((3, 3, 3)).nonzero())) # sphere generator for 3 elements diameter sphere = ne.Sphere(1) # dataset with just one "space" ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind))}) # and the query engine attaching the generator to the "index-space" qe = ne.IndexQueryEngine(s_ind=sphere) # cannot train since the engine does not know about the second space assert_raises(ValueError, qe.train, ds) # now do it again with a full spec ds = Dataset([data, data], fa={'s_ind': np.concatenate((ind, ind)), 't_ind': np.repeat([0,1], 27)}) qe = ne.IndexQueryEngine(s_ind=sphere, t_ind=None) qe.train(ds) # internal representation check # YOH: invalid for new implementation with lookup tables (dictionaries) #assert_array_equal(qe._searcharray, # np.arange(54).reshape(qe._searcharray.shape) + 1) # should give us one corner, collapsing the 't_ind' assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # directly specifying an index for 't_ind' without having an ROI # generator, should give the same corner, but just once assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), [0, 1, 3, 9]) # just out of the mask -- no match assert_array_equal(qe(s_ind=(3, 3, 3)), []) # also out of the mask -- but single match assert_array_equal(qe(s_ind=(2, 2, 3), t_ind=1), [53]) # query by id assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=0), qe[0]) assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1]), qe(s_ind=(0, 0, 0))) # should not fail if t_ind is outside assert_array_equal(qe(s_ind=(0, 0, 0), t_ind=[0, 1, 10]), qe(s_ind=(0, 0, 0))) # should fail if asked about some unknown thing assert_raises(ValueError, qe.__call__, s_ind=(0, 0, 0), buga=0) # Test by using some literal feature atttribute ds.fa['lit'] = ['roi1', 'ro2', 'r3']*18 # should work as well as before assert_array_equal(qe(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # should fail if asked about some unknown (yet) thing assert_raises(ValueError, qe.__call__, s_ind=(0,0,0), lit='roi1') # Create qe which can query literals as well qe_lit = ne.IndexQueryEngine(s_ind=sphere, t_ind=None, lit=None) qe_lit.train(ds) # should work as well as before assert_array_equal(qe_lit(s_ind=(0, 0, 0)), [0, 1, 3, 9, 27, 28, 30, 36]) # and subselect nicely -- only /3 ones assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit='roi1'), [0, 3, 9, 27, 30, 36]) assert_array_equal(qe_lit(s_ind=(0, 0, 0), lit=['roi1', 'ro2']), [0, 1, 3, 9, 27, 28, 30, 36])
def test_assert_objectarray_equal(): if versions['numpy'] < '1.4': raise SkipTest("Skipping because of known segfaults with numpy < 1.4") # explicit dtype so we could test with numpy < 1.6 a = np.array([np.array([0, 1]), np.array(1)], dtype=object) b = np.array([np.array([0, 1]), np.array(1)], dtype=object) # they should be ok for both types of comparison for strict in True, False: # good with self assert_objectarray_equal(a, a, strict=strict) # good with a copy assert_objectarray_equal(a, a.copy(), strict=strict) # good while operating with an identical one # see http://projects.scipy.org/numpy/ticket/2117 assert_objectarray_equal(a, b, strict=strict) # now check if we still fail for a good reason for value_equal, b in ( (False, np.array(1)), (False, np.array([1])), (False, np.array([np.array([0, 1]), np.array((1, 2))], dtype=object)), (False, np.array([np.array([0, 1]), np.array(1.1)], dtype=object)), (True, np.array([np.array([0, 1]), np.array(1.0)], dtype=object)), (True, np.array( [np.array([0, 1]), np.array(1, dtype=object)], dtype=object)), ): assert_raises(AssertionError, assert_objectarray_equal, a, b) if value_equal: # but should not raise for non-default strict=False assert_objectarray_equal(a, b, strict=False) else: assert_raises(AssertionError, assert_objectarray_equal, a, b, strict=False)
def test_sphere(): # test sphere initialization s = ne.Sphere(1) center0 = (0, 0, 0) center1 = (1, 1, 1) assert_equal(len(s(center0)), 7) target = array([array([-1, 0, 0]), array([ 0, -1, 0]), array([ 0, 0, -1]), array([0, 0, 0]), array([0, 0, 1]), array([0, 1, 0]), array([1, 0, 0])]) # test of internals -- no recomputation of increments should be done prev_increments = s._increments assert_array_equal(s(center0), target) ok_(prev_increments is s._increments) # query lower dimensionality _ = s((0, 0)) ok_(not prev_increments is s._increments) # test Sphere call target = [array([0, 1, 1]), array([1, 0, 1]), array([1, 1, 0]), array([1, 1, 1]), array([1, 1, 2]), array([1, 2, 1]), array([2, 1, 1])] res = s(center1) assert_array_equal(array(res), target) # They all should be tuples ok_(np.all([isinstance(x, tuple) for x in res])) # test for larger diameter s = ne.Sphere(4) assert_equal(len(s(center1)), 257) # test extent keyword #s = ne.Sphere(4,extent=(1,1,1)) #assert_array_equal(array(s((0,0,0))), array([[0,0,0]])) # test Errors during initialisation and call #assert_raises(ValueError, ne.Sphere, 2) #assert_raises(ValueError, ne.Sphere, 1.0) # no longer extent available assert_raises(TypeError, ne.Sphere, 1, extent=(1)) assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0)) s = ne.Sphere(1) #assert_raises(ValueError, s, (1)) if __debug__: # No float coordinates allowed for now... # XXX might like to change that ;) # assert_raises(ValueError, s, (1.0, 1.0, 1.0))
def test_mapper_vs_zscore(): """Test by comparing to results of elderly z-score function """ # data: 40 sample feature line in 20d space (40x20; samples x features) dss = [ dataset_wizard(np.concatenate([np.arange(40) for i in range(20)]).reshape(20, -1).T, targets=1, chunks=1), ] + datasets.values() for ds in dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper(chunks_attr=None) assert_raises(RuntimeError, zsm.forward, ds1.samples) idhashes = (idhash(ds1), idhash(ds1.samples)) zsm.train(ds1) idhashes_train = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_train) # forward dataset ds1z_ds = zsm.forward(ds1) idhashes_forwardds = (idhash(ds1), idhash(ds1.samples)) # must not modify samples in place! assert_equal(idhashes, idhashes_forwardds) # forward samples explicitly ds1z = zsm.forward(ds1.samples) idhashes_forward = (idhash(ds1), idhash(ds1.samples)) assert_equal(idhashes, idhashes_forward) zscore(ds2, chunks_attr=None) assert_array_almost_equal(ds1z, ds2.samples) assert_array_equal(ds1.samples, ds.samples)
def test_gifti_dataset(fn, format_, include_nodes): expected_ds = _get_test_dataset(include_nodes) expected_ds_sa = expected_ds.copy(deep=True) expected_ds_sa.sa['chunks'] = [4, 3, 2, 1, 3, 2] expected_ds_sa.sa['targets'] = ['t%d' % i for i in xrange(6)] # build GIFTI file from scratch gifti_string = _build_gifti_string(format_, include_nodes) with open(fn, 'w') as f: f.write(gifti_string) # reading GIFTI file ds = gifti_dataset(fn) assert_datasets_almost_equal(ds, expected_ds) # test GiftiImage input img = nb_giftiio.read(fn) ds2 = gifti_dataset(img) assert_datasets_almost_equal(ds2, expected_ds) # test using Nibabel's output from write nb_giftiio.write(img, fn) ds3 = gifti_dataset(fn) assert_datasets_almost_equal(ds3, expected_ds) # test targets and chunks arguments ds3_sa = gifti_dataset(fn, targets=expected_ds_sa.targets, chunks=expected_ds_sa.chunks) assert_datasets_almost_equal(ds3_sa, expected_ds_sa) # test map2gifti img2 = map2gifti(ds) ds4 = gifti_dataset(img2) assert_datasets_almost_equal(ds4, expected_ds) map2gifti(ds, fn, encoding=format_) ds5 = gifti_dataset(fn) assert_datasets_almost_equal(ds5, expected_ds) # test map2gifti with array input; nodes are not stored map2gifti(ds.samples, fn) ds6 = gifti_dataset(fn) if include_nodes: assert_raises(AssertionError, assert_datasets_almost_equal, ds6, expected_ds) else: assert_datasets_almost_equal(ds6, expected_ds) assert_raises(TypeError, gifti_dataset, ds3_sa) assert_raises(TypeError, map2gifti, img, fn)
def test_array_collectable(): c = ArrayCollectable() # empty by default assert_equal(c.name, None) assert_equal(c.value, None) # late assignment c.name = 'somename' assert_raises(ValueError, c._set, 12345) assert_equal(c.value, None) c.value = np.arange(5) assert_equal(c.name, 'somename') assert_array_equal(c.value, np.arange(5)) # immediate content data = np.random.random(size=(3,10)) c = ArrayCollectable(data.copy(), 'myname', "This is a test", length=3) assert_equal(c.name, 'myname') assert_array_equal(c.value, data) assert_equal(c.__doc__, "This is a test") assert_equal(str(c), 'myname') # repr from numpy import array e = eval(repr(c)) assert_equal(e.name, 'myname') assert_array_almost_equal(e.value, data) assert_equal(e.__doc__, "This is a test") # cannot assign array of wrong length assert_raises(ValueError, c._set, np.arange(5)) assert_equal(len(c), 3) # shallow copy DOES create a view of value array c.value = np.arange(3) d = copy.copy(c) assert_true(d.value.base is c.value) # names starting with _ are not allowed assert_raises(ValueError, c._set_name, "_underscore")
def test_gifti_dataset(fn, format_, include_nodes): expected_ds = _get_test_dataset(include_nodes) expected_ds_sa = expected_ds.copy(deep=True) expected_ds_sa.sa['chunks'] = [4, 3, 2, 1, 3, 2] expected_ds_sa.sa['targets'] = ['t%d' % i for i in xrange(6)] # build GIFTI file from scratch gifti_string = _build_gifti_string(format_, include_nodes) with open(fn, 'w') as f: f.write(gifti_string) # reading GIFTI file ds = gifti_dataset(fn) assert_datasets_almost_equal(ds, expected_ds) # test GiftiImage input img = nb_giftiio.read(fn) ds2 = gifti_dataset(img) assert_datasets_almost_equal(ds2, expected_ds) # test using Nibabel's output from write nb_giftiio.write(img, fn) ds3 = gifti_dataset(fn) assert_datasets_almost_equal(ds3, expected_ds) # test targets and chunks arguments ds3_sa = gifti_dataset(fn, targets=expected_ds_sa.targets, chunks=expected_ds_sa.chunks) assert_datasets_almost_equal(ds3_sa, expected_ds_sa) # test map2gifti img2 = map2gifti(ds) ds4 = gifti_dataset(img2) assert_datasets_almost_equal(ds4, expected_ds) # test float64 and int64, which must be converted to float32 and int32 fa = dict() if include_nodes: fa['node_indices'] = ds.fa.node_indices.astype(np.int64) ds_float64 = Dataset(samples=ds.samples.astype(np.float64), fa=fa) ds_float64_again = gifti_dataset(map2gifti(ds_float64)) assert_equal(ds_float64_again.samples.dtype, np.float32) if include_nodes: assert_equal(ds_float64_again.fa.node_indices.dtype, np.int32) # test contents of GIFTI image assert (isinstance(img2, nb_gifti.GiftiImage)) nsamples = ds.samples.shape[0] if include_nodes: node_arr = img2.darrays[0] assert_equal(node_arr.intent, intent_codes.code['NIFTI_INTENT_NODE_INDEX']) assert_equal(node_arr.coordsys, None) assert_equal(node_arr.data.dtype, np.int32) assert_equal(node_arr.datatype, data_type_codes['int32']) first_data_array_pos = 1 narrays = nsamples + 1 else: first_data_array_pos = 0 narrays = nsamples assert_equal(len(img.darrays), narrays) for i in xrange(nsamples): arr = img2.darrays[i + first_data_array_pos] # check intent code illegal_intents = ['NIFTI_INTENT_NODE_INDEX', 'NIFTI_INTENT_GENMATRIX', 'NIFTI_INTENT_POINTSET', 'NIFTI_INTENT_TRIANGLE'] assert (arr.intent not in [intent_codes.code[s] for s in illegal_intents]) # although the GIFTI standard is not very clear about whether # arrays with other intent than NODE_INDEX can have a # GiftiCoordSystem, FreeSurfer's mris_convert # does not seem to like its presence. Thus we make sure that # it's not there. assert_equal(arr.coordsys, None) assert_equal(arr.data.dtype, np.float32) assert_equal(arr.datatype, data_type_codes['float32']) # another test for map2gifti, setting the encoding explicitly map2gifti(ds, fn, encoding=format_) ds5 = gifti_dataset(fn) assert_datasets_almost_equal(ds5, expected_ds) # test map2gifti with array input; nodes are not stored map2gifti(ds.samples, fn) ds6 = gifti_dataset(fn) if include_nodes: assert_raises(AssertionError, assert_datasets_almost_equal, ds6, expected_ds) else: assert_datasets_almost_equal(ds6, expected_ds) assert_raises(TypeError, gifti_dataset, ds3_sa) assert_raises(TypeError, map2gifti, img, fn)
def test_attrpermute(): # Was about to use borrowkwargs but didn't work out . Test doesn't hurt doc = AttributePermutator.__init__.__doc__ assert_in('limit : ', doc) assert_not_in('collection : ', doc) ds = give_data() ds.sa['ids'] = range(len(ds)) pristine_data = ds.samples.copy() permutation = AttributePermutator(['targets', 'ids'], assure=True) pds = permutation(ds) # should not touch the data assert_array_equal(pristine_data, pds.samples) # even keep the very same array assert_true(pds.samples.base is ds.samples) # there is no way that it can be the same attribute assert_false(np.all(pds.sa.ids == ds.sa.ids)) # ids should reflect permutation setup assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids]) # other attribute should remain intact assert_array_equal(pds.sa.chunks, ds.sa.chunks) # now chunk-wise permutation permutation = AttributePermutator('ids', limit='chunks') pds = permutation(ds) # first ten should remain first ten assert_false(np.any(pds.sa.ids[:10] > 9)) # verify that implausible assure=True would not work permutation = AttributePermutator('targets', limit='ids', assure=True) assert_raises(RuntimeError, permutation, ds) # same thing, but only permute single chunk permutation = AttributePermutator('ids', limit={'chunks': 3}) pds = permutation(ds) # one chunk should change assert_false(np.any(pds.sa.ids[30:40] > 39)) assert_false(np.any(pds.sa.ids[30:40] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # or a list of chunks permutation = AttributePermutator('ids', limit={'chunks': [3, 4]}) pds = permutation(ds) # two chunks should change assert_false(np.any(pds.sa.ids[30:50] > 49)) assert_false(np.any(pds.sa.ids[30:50] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # and now try generating more permutations nruns = 2 permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns) pds = list(permutation.generate(ds)) assert_equal(len(pds), nruns) for p in pds: assert_false(np.all(p.sa.ids == ds.sa.ids)) # permute feature attrs ds.fa['ids'] = range(ds.shape[1]) permutation = AttributePermutator('fa.ids', assure=True) pds = permutation(ds) assert_false(np.all(pds.fa.ids == ds.fa.ids)) # now chunk-wise uattrs strategy (reassignment) permutation = AttributePermutator('targets', limit='chunks', strategy='uattrs', assure=True) pds = permutation(ds) # Due to assure above -- we should have changed things assert_not_equal(zip(ds.targets), zip(pds.targets)) # in each chunk we should have unique remappings for c in ds.UC: chunk_idx = ds.C == c otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx] # we still have the same targets assert_equal(set(ptargets), set(otargets)) # we have only 1-to-1 mappings assert_true(len(set(zip(otargets, ptargets))), len(set(otargets))) ds.sa['odds'] = ds.sa.ids % 2 # test combinations permutation = AttributePermutator(['targets', 'odds'], limit='chunks', strategy='uattrs', assure=True) pds = permutation(ds) # Due to assure above -- we should have changed things assert_not_equal(zip(ds.targets, ds.sa.odds), zip(pds.targets, pds.sa.odds)) # In each chunk we should have unique remappings for c in ds.UC: chunk_idx = ds.C == c otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx] oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx] # we still have the same targets assert_equal(set(ptargets), set(otargets)) assert_equal(set(oodds), set(podds)) # at the end we have the same mapping assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))
def test_cosmo_exceptions(): m = _create_small_mat_dataset_dict() m.pop('samples') assert_raises(KeyError, cosmo.cosmo_dataset, m) assert_raises(ValueError, cosmo.from_any, m) assert_raises(ValueError, cosmo.from_any, ['illegal input']) mapping = {1: arr([1, 2]), 2: arr([2, 0, 0])} qe = cosmo.CosmoQueryEngine(mapping) # should be fine assert_raises(TypeError, cosmo.CosmoQueryEngine, []) mapping[1] = 1.5 assert_raises(TypeError, cosmo.CosmoQueryEngine, mapping) mapping[1] = 'foo' assert_raises(TypeError, cosmo.CosmoQueryEngine, mapping) mapping[1] = -1 assert_raises(TypeError, cosmo.CosmoQueryEngine, mapping) mapping[1] = arr([1.5, 2.1]) assert_raises(ValueError, cosmo.CosmoQueryEngine, mapping) neighbors = _create_small_mat_nbrhood_dict()['neighbors'] qe = cosmo.CosmoQueryEngine.from_mat(neighbors) # should be fine neighbors[0, 0][0] = -1 assert_raises(ValueError, cosmo.CosmoQueryEngine.from_mat, neighbors) neighbors[0, 0] = arr(1.5) assert_raises(ValueError, cosmo.CosmoQueryEngine.from_mat, neighbors) for illegal_nbrhood in (['fail'], cosmo.QueryEngineInterface): assert_raises((TypeError, ValueError), lambda x: cosmo.CosmoSearchlight([], x), illegal_nbrhood)
def test_attrpermute(): ds = give_data() ds.sa['ids'] = range(len(ds)) pristine_data = ds.samples.copy() permutation = AttributePermutator(['targets', 'ids'], assure=True) pds = permutation(ds) # should not touch the data assert_array_equal(pristine_data, pds.samples) # even keep the very same array assert_true(pds.samples.base is ds.samples) # there is no way that it can be the same attribute assert_false(np.all(pds.sa.ids == ds.sa.ids)) # ids should reflect permutation setup assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids]) # other attribute should remain intact assert_array_equal(pds.sa.chunks, ds.sa.chunks) # now chunk-wise permutation permutation = AttributePermutator('ids', limit='chunks') pds = permutation(ds) # first ten should remain first ten assert_false(np.any(pds.sa.ids[:10] > 9)) # verify that implausible assure=True would not work permutation = AttributePermutator('targets', limit='ids', assure=True) assert_raises(RuntimeError, permutation, ds) # same thing, but only permute single chunk permutation = AttributePermutator('ids', limit={'chunks': 3}) pds = permutation(ds) # one chunk should change assert_false(np.any(pds.sa.ids[30:40] > 39)) assert_false(np.any(pds.sa.ids[30:40] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # or a list of chunks permutation = AttributePermutator('ids', limit={'chunks': [3,4]}) pds = permutation(ds) # two chunks should change assert_false(np.any(pds.sa.ids[30:50] > 49)) assert_false(np.any(pds.sa.ids[30:50] < 30)) # the rest not assert_array_equal(pds.sa.ids[:30], range(30)) # and now try generating more permutations nruns = 2 permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns) pds = list(permutation.generate(ds)) assert_equal(len(pds), nruns) for p in pds: assert_false(np.all(p.sa.ids == ds.sa.ids)) # permute feature attrs ds.fa['ids'] = range(ds.shape[1]) permutation = AttributePermutator('fa.ids', assure=True) pds = permutation(ds) assert_false(np.all(pds.fa.ids == ds.fa.ids)) # now chunk-wise uattrs strategy (reassignment) permutation = AttributePermutator('targets', limit='chunks', strategy='uattrs', assure=True) pds = permutation(ds) # Due to assure above -- we should have changed things assert_not_equal(zip(ds.targets), zip(pds.targets)) # in each chunk we should have unique remappings for c in ds.UC: chunk_idx = ds.C == c otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx] # we still have the same targets assert_equal(set(ptargets), set(otargets)) # we have only 1-to-1 mappings assert_true(len(set(zip(otargets, ptargets))), len(set(otargets))) ds.sa['odds'] = ds.sa.ids % 2 # test combinations permutation = AttributePermutator(['targets', 'odds'], limit='chunks', strategy='uattrs', assure=True) pds = permutation(ds) # Due to assure above -- we should have changed things assert_not_equal(zip(ds.targets, ds.sa.odds), zip(pds.targets, pds.sa.odds)) # In each chunk we should have unique remappings for c in ds.UC: chunk_idx = ds.C == c otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx] oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx] # we still have the same targets assert_equal(set(ptargets), set(otargets)) assert_equal(set(oodds), set(podds)) # at the end we have the same mapping assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # test only flattening the first two dimensions fm_max = FlattenMapper(maxdims=2) fm_max.train(data) assert_equal(fm_max(data).shape, (4, 4, 4)) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [FlattenMapper(space='voxel'), ChainMapper([FlattenMapper(space='voxel'), StaticFeatureSelection(slice(None))])]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse1(target[0]), _verified_reverse1(fm, target[0])) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_zscore(): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\ reshape((16, 1)) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) assert_equal(data.samples.mean(), 2.0) assert_equal(data.samples.std(), 1.0) data_samples = data.samples.copy() zscore(data, chunks_attr='chunks') # copy should stay intact assert_equal(data_samples.mean(), 2.0) assert_equal(data_samples.std(), 1.0) # we should be able to operate on ndarrays # But we can't change type inplace for an array, can't we? assert_raises(TypeError, zscore, data_samples, chunks_attr=None) # so lets do manually data_samples = data_samples.astype(float) zscore(data_samples, chunks_attr=None) assert_array_equal(data.samples, data_samples) # check z-scoring check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype='float64').reshape(16, 1) assert_array_equal(data.samples, check) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) zscore(data, chunks_attr=None) assert_array_equal(data.samples, check) # check z-scoring taking set of labels as a baseline data = dataset_wizard(samples.copy(), targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples + 1.0) # check that zscore modifies in-place; only guaranteed if no upcasting is # necessary samples = samples.astype('float') data = dataset_wizard(samples, targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples) # these might be duplicating code above -- but twice is better than nothing # dataset: mean=2, std=1 raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) # dataset: mean=12, std=1 raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10 # zscore target check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0] ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) zm = ZScoreMapper() # should do global zscore by default zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check])) # should not modify the source assert_array_equal(pristine, ds) # if we tell it a different mean it should obey the order zm = ZScoreMapper(params=(3,1)) zm.train(ds) assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 ) assert_array_equal(pristine, ds) # let's look at chunk-wise z-scoring ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())), targets=range(32), chunks=[0] * 16 + [1] * 16) # by default chunk-wise zm = ZScoreMapper() zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # we should be able to do that same manually zm = ZScoreMapper(params={0: (2,1), 1: (12,1)}) zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check]))
def test_attrmap(): map_default = {'eins': 0, 'zwei': 2, 'sieben': 1} map_custom = {'eins': 11, 'zwei': 22, 'sieben': 33} literal = ['eins', 'zwei', 'sieben', 'eins', 'sieben', 'eins'] literal_nonmatching = ['uno', 'dos', 'tres'] num_default = [0, 2, 1, 0, 1, 0] num_custom = [11, 22, 33, 11, 33, 11] # no custom mapping given am = AttributeMap() assert_false(am) ok_(len(am) == 0) assert_array_equal(am.to_numeric(literal), num_default) assert_array_equal(am.to_literal(num_default), literal) ok_(am) ok_(len(am) == 3) # # Tests for recursive mapping + preserving datatype class myarray(np.ndarray): pass assert_raises(KeyError, am.to_literal, [(1, 2), 2, 0]) literal_fancy = [(1, 2), 2, [0], np.array([0, 1]).view(myarray)] literal_fancy_tuple = tuple(literal_fancy) literal_fancy_array = np.array(literal_fancy, dtype=object) for l in (literal_fancy, literal_fancy_tuple, literal_fancy_array): res = am.to_literal(l, recurse=True) assert_equal(res[0], ('sieben', 'zwei')) assert_equal(res[1], 'zwei') assert_equal(res[2], ['eins']) assert_array_equal(res[3], ['eins', 'sieben']) # types of result and subsequences should be preserved ok_(isinstance(res, l.__class__)) ok_(isinstance(res[0], tuple)) ok_(isinstance(res[1], str)) ok_(isinstance(res[2], list)) ok_(isinstance(res[3], myarray)) # yet another example a = np.empty(1, dtype=object) a[0] = (0, 1) res = am.to_literal(a, recurse=True) ok_(isinstance(res[0], tuple)) # # with custom mapping am = AttributeMap(map=map_custom) assert_array_equal(am.to_numeric(literal), num_custom) assert_array_equal(am.to_literal(num_custom), literal) # if not numeric nothing is mapped assert_array_equal(am.to_numeric(num_custom), num_custom) # even if the map doesn't fit assert_array_equal(am.to_numeric(num_default), num_default) # need to_numeric first am = AttributeMap() assert_raises(RuntimeError, am.to_literal, [1,2,3]) # stupid args assert_raises(ValueError, AttributeMap, map=num_custom) # map mismatch am = AttributeMap(map=map_custom) if __debug__: # checked only in __debug__ assert_raises(KeyError, am.to_numeric, literal_nonmatching) # needs reset and should work afterwards am.clear() assert_array_equal(am.to_numeric(literal_nonmatching), [2, 0, 1]) # and now reverse am = AttributeMap(map=map_custom) assert_raises(KeyError, am.to_literal, num_default) # dict-like interface am = AttributeMap() ok_([(k, v) for k, v in am.iteritems()] == [])
def test_sphere(): # test sphere initialization s = ne.Sphere(1) center0 = (0, 0, 0) center1 = (1, 1, 1) assert_equal(len(s(center0)), 7) target = array([ array([-1, 0, 0]), array([0, -1, 0]), array([0, 0, -1]), array([0, 0, 0]), array([0, 0, 1]), array([0, 1, 0]), array([1, 0, 0]) ]) # test of internals -- no recomputation of increments should be done prev_increments = s._increments assert_array_equal(s(center0), target) ok_(prev_increments is s._increments) # query lower dimensionality _ = s((0, 0)) ok_(not prev_increments is s._increments) # test Sphere call target = [ array([0, 1, 1]), array([1, 0, 1]), array([1, 1, 0]), array([1, 1, 1]), array([1, 1, 2]), array([1, 2, 1]), array([2, 1, 1]) ] res = s(center1) assert_array_equal(array(res), target) # They all should be tuples ok_(np.all([isinstance(x, tuple) for x in res])) # test for larger diameter s = ne.Sphere(4) assert_equal(len(s(center1)), 257) # test extent keyword #s = ne.Sphere(4,extent=(1,1,1)) #assert_array_equal(array(s((0,0,0))), array([[0,0,0]])) # test Errors during initialisation and call #assert_raises(ValueError, ne.Sphere, 2) #assert_raises(ValueError, ne.Sphere, 1.0) # no longer extent available assert_raises(TypeError, ne.Sphere, 1, extent=(1)) assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0)) s = ne.Sphere(1) #assert_raises(ValueError, s, (1)) if __debug__: # No float coordinates allowed for now... # XXX might like to change that ;) # assert_raises(ValueError, s, (1.0, 1.0, 1.0))
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:,0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm.forward(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T) # now test for proper data shape data = np.ones((10,3,4,2)) sp = [ 2, 4, 3, 5 ] trans = BoxcarMapper(sp, 4).forward(data) assert_equal(trans.shape, (4,4,3,4,2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [ 2, 4, 3, 5 ] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # now in a dataset ds = Dataset([singlesample]) assert_equal(ds.shape, (1,) + singlesample.shape) # after reverse mapping the 'sample axis' should vanish and the original 3d # shape of the samples should be restored assert_equal(ds.shape[1:], m.reverse(ds).shape) # multiple samples should just be concatenated along the samples axis ds = Dataset([singlesample, singlesample]) assert_equal((np.prod(ds.shape[:2]),) + singlesample.shape[1:], m.reverse(ds).shape) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_gnbsearchlight_permutations(): import mvpa2 from mvpa2.base.node import ChainNode from mvpa2.clfs.gnb import GNB from mvpa2.generators.base import Repeater from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner #import mvpa2.generators.permutation #reload(mvpa2.generators.permutation) from mvpa2.generators.permutation import AttributePermutator from mvpa2.testing.datasets import datasets from mvpa2.measures.base import CrossValidation from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.mappers.fx import mean_sample from mvpa2.misc.errorfx import mean_mismatch_error from mvpa2.clfs.stats import MCNullDist from mvpa2.testing.tools import assert_raises, ok_, assert_array_less # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM'] # mvpa2.debug.metrics += ['pid'] count = 10 nproc = 1 + int(mvpa2.externals.exists('pprocess')) ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace slkwargs = dict(radius=3, space='voxel_indices', enable_ca=['roi_sizes'], center_ids=[1, 10, 70, 100]) mvpa2.seed(mvpa2._random_seed) clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) distr_est = MCNullDist(repeater, tail='left', measure=null_sl, enable_ca=['dist_samples']) sl = sphere_gnbsearchlight(clf, splt, reuse_neighbors=True, null_dist=distr_est, postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) if __debug__: # assert is done only without -O mode assert_raises(NotImplementedError, sl, ds) # "ad-hoc searchlights can't handle yet varying targets across partitions" if False: # after above limitation is removed -- enable sl_map = sl(ds) sl_null_prob = sl.ca.null_prob.samples.copy() mvpa2.seed(mvpa2._random_seed) ### 'normal' Searchlight clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state # would be reused across all pprocesses null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample()) null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs) distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal, enable_ca=['dist_samples']) cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error, enable_ca=['stats'], postproc=mean_sample() ) sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs) sl_map_normal = sl(ds) sl_null_prob_normal = sl.ca.null_prob.samples.copy() # For every feature -- we should get some variance in estimates In # case of failure they are all really close to each other (up to # numerical precision), so variance will be close to 0 assert_array_less(-np.var(distr_est_normal.ca.dist_samples.samples[0], axis=1), -1e-5) for s in distr_est_normal.ca.dist_samples.samples[0]: ok_(len(np.unique(s)) > 1)
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:, 0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm.forward(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9) + 1)).T) # now test for proper data shape data = np.ones((10, 3, 4, 2)) sp = [2, 4, 3, 5] trans = BoxcarMapper(sp, 4).forward(data) assert_equal(trans.shape, (4, 4, 3, 4, 2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [2, 4, 3, 5] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # now in a dataset ds = Dataset([singlesample]) assert_equal(ds.shape, (1, ) + singlesample.shape) # after reverse mapping the 'sample axis' should vanish and the original 3d # shape of the samples should be restored assert_equal(ds.shape[1:], m.reverse(ds).shape) # multiple samples should just be concatenated along the samples axis ds = Dataset([singlesample, singlesample]) assert_equal((np.prod(ds.shape[:2]), ) + singlesample.shape[1:], m.reverse(ds).shape) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_zscore(): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\ reshape((16, 1)) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) assert_equal(data.samples.mean(), 2.0) assert_equal(data.samples.std(), 1.0) data_samples = data.samples.copy() zscore(data, chunks_attr='chunks') # copy should stay intact assert_equal(data_samples.mean(), 2.0) assert_equal(data_samples.std(), 1.0) # we should be able to operate on ndarrays # But we can't change type inplace for an array, can't we? assert_raises(TypeError, zscore, data_samples, chunks_attr=None) # so lets do manually data_samples = data_samples.astype(float) zscore(data_samples, chunks_attr=None) assert_array_equal(data.samples, data_samples) # check z-scoring check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype='float64').reshape(16, 1) assert_array_equal(data.samples, check) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) zscore(data, chunks_attr=None) assert_array_equal(data.samples, check) # check z-scoring taking set of labels as a baseline data = dataset_wizard(samples.copy(), targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples + 1.0) # check that zscore modifies in-place; only guaranteed if no upcasting is # necessary samples = samples.astype('float') data = dataset_wizard(samples, targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples) # verify that if param_est is set but chunks_attr is None # performs zscoring across entire dataset correctly data = data.copy() data_01 = data.select({'targets': [0, 1]}) zscore(data_01, chunks_attr=None) zscore(data, chunks_attr=None, param_est=('targets', [0, 1])) assert_array_equal(data_01.samples, data.select({'targets': [0, 1]})) # these might be duplicating code above -- but twice is better than nothing # dataset: mean=2, std=1 raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) # dataset: mean=12, std=1 raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10 # zscore target check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0] ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) zm = ZScoreMapper() # should do global zscore by default zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check])) # should not modify the source assert_array_equal(pristine, ds) # if we tell it a different mean it should obey the order zm = ZScoreMapper(params=(3,1)) zm.train(ds) assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 ) assert_array_equal(pristine, ds) # let's look at chunk-wise z-scoring ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())), targets=range(32), chunks=[0] * 16 + [1] * 16) # by default chunk-wise zm = ZScoreMapper() zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # we should be able to do that same manually zm = ZScoreMapper(params={0: (2,1), 1: (12,1)}) zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # And just a smoke test for warnings reporting whenever # of # samples per chunk is low. # on 1 sample per chunk zds1 = ZScoreMapper(chunks_attr='chunks', auto_train=True)( ds[[0, -1]]) ok_(np.all(zds1.samples == 0)) # they all should be 0 # on 2 samples per chunk zds2 = ZScoreMapper(chunks_attr='chunks', auto_train=True)( ds[[0, 1, -10, -1]]) assert_array_equal(np.unique(zds2.samples), [-1., 1]) # they all should be -1 or 1 # on 3 samples per chunk -- different warning ZScoreMapper(chunks_attr='chunks', auto_train=True)( ds[[0, 1, 2, -3, -2, -1]]) # test if std provided as a list not as an array is handled # properly -- should zscore all features (not just first/none # as it was before) ds = dataset_wizard(np.arange(32).reshape((8,-1)), targets=range(8), chunks=[0] * 8) means = [0, 1, -10, 10] std0 = np.std(ds[:, 0]) # std deviation of first one stds = [std0, 10, .1, 1] zm = ZScoreMapper(params=(means, stds), auto_train=True) dsz = zm(ds) assert_array_almost_equal((np.mean(ds, axis=0) - np.asanyarray(means))/np.array(stds), np.mean(dsz, axis=0)) assert_array_almost_equal(np.std(ds, axis=0)/np.array(stds), np.std(dsz, axis=0))
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # test only flattening the first two dimensions fm_max = FlattenMapper(maxdims=2) fm_max.train(data) assert_equal(fm_max(data).shape, (4, 4, 4)) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [FlattenMapper(space='voxel'), ChainMapper([FlattenMapper(space='voxel'), StaticFeatureSelection(slice(None))])]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(StaticFeatureSelection(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15,)) assert_equal(len(cm), 2) # multiple slicing cm.append(StaticFeatureSelection([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # check reproduction if __debug__: # debug mode needs special test as it enhances the repr output # with module info and id() appendix for objects import mvpa2 cm_clone = eval(repr(cm)) assert_equal('#'.join(repr(cm_clone).split('#')[:-1]), '#'.join(repr(cm).split('#')[:-1])) else: cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8) # Lets construct a dataset with mapper assigned and see # if sub-selecting a feature adjusts trailing StaticFeatureSelection # appropriately ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1] tail_sfs = ds_subsel.a.mapper[-1] assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')
def test_subset(): data = np.array( [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]) # float array doesn't work sm = StaticFeatureSelection(np.ones(16)) assert_raises(IndexError, sm.forward, data) # full mask sm = StaticFeatureSelection(slice(None)) # should not change single samples assert_array_equal(sm.forward(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.forward(data.copy()), data) sm.train(data) # same on reverse assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.reverse(data.copy()), data) # identical mappers sm_none = StaticFeatureSelection(slice(None)) sm_int = StaticFeatureSelection(np.arange(16)) sm_bool = StaticFeatureSelection(np.ones(16, dtype='bool')) sms = [sm_none, sm_int, sm_bool] # test subsets sids = [3, 4, 5, 6] bsubset = np.zeros(16, dtype='bool') bsubset[sids] = True subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]] # all test subset result in equivalent masks, hence should do the same to # the mapper and result in identical behavior for st in sms: for i, sub in enumerate(subsets): # shallow copy orig = copy(st) subsm = StaticFeatureSelection(sub) # should do copy-on-write for all important stuff!! orig += subsm # test if selection did its job if i == 3: # special case of multiplying features assert_array_equal(orig.forward1(data[0].copy()), subsets[i]) else: assert_array_equal(orig.forward1(data[0].copy()), sids) ## all of the above shouldn't change the original mapper #assert_array_equal(sm.get_mask(), np.arange(16)) # check for some bug catcher # no 3D input #assert_raises(IndexError, sm.forward, np.ones((3,2,1))) # no input of wrong length if __debug__: # checked only in __debug__ assert_raises(ValueError, sm.forward, np.ones(4)) # same on reverse #assert_raises(ValueError, sm.reverse, np.ones(16)) # invalid ids #assert_false(subsm.is_valid_inid(-1)) #assert_false(subsm.is_valid_inid(16)) # intended merge failures fsm = StaticFeatureSelection(np.arange(16)) assert_equal(fsm.__iadd__(None), NotImplemented) assert_equal(fsm.__iadd__(Dataset([2, 3, 4])), NotImplemented)
def test_product_flatten(): nsamples = 17 product_name_values = [('chan', ['C1', 'C2']), ('freq', np.arange(4, 20, 6)), ('time', np.arange(-200, 800, 200))] shape = (nsamples,) + tuple(len(v) for _, v in product_name_values) sample_names = ['samp%d' % i for i in range(nsamples)] # generate random data in four dimensions data = np.random.normal(size=shape) ds = Dataset(data, sa=dict(sample_names=sample_names)) for n, v in product_name_values: ds.a[n] = v # apply flattening to ds names, values = list(zip(*(product_name_values))) flattened_ds = None # test both with explicit values for factor_values and without for with_values in (False, True): # the order of False and True is critical. # In the first iteration flattened_ds is set and used in the second # iteration args = {} if with_values: factor_values = [v for n, v in product_name_values] args['factor_values'] = factor_values flattener = ProductFlattenMapper(names, **args) # test I/O (only if h5py is available) if externals.exists('h5py'): from mvpa2.base.hdf5 import h5save, h5load import tempfile import os fd, testfn = tempfile.mkstemp('mapper.h5py', 'test_product') os.close(fd) h5save(testfn, flattener) flattener = h5load(testfn) os.unlink(testfn) if flattened_ds is None: assert_raises(ValueError, flattener.reverse, ds) else: ds_ = flattener.reverse(flattened_ds) assert_equal(ds.samples, ds_.samples) mds = flattener(ds) prod = lambda x: reduce(operator.mul, x) # ensure the size is ok assert_equal(mds.shape, (nsamples,) + (prod(shape[1:]),)) idxs = [list(range(len(v))) for v in values] for si in range(nsamples): for fi, p in enumerate(itertools.product(*idxs)): data_tup = (si,) + p x = mds[si, fi] # value should match assert_equal(data[data_tup], x.samples[0, 0]) # indices should match as well all_idxs = tuple(x.fa['chan_freq_time_indices'].value.ravel()) assert_equal(p, all_idxs) # values and indices in each dimension should match for i, (name, value) in enumerate(product_name_values): assert_equal(x.fa[name].value, value[p[i]]) assert_equal(x.fa[name + '_indices'].value, p[i]) dsr = flattener.reverse(mds) assert_equal(dsr.shape, ds.shape) names += ('foo',) flattener = ProductFlattenMapper(names) assert_raises(KeyError, flattener, ds) # for next iterations flattened_ds = mds
def test_rfe_sensmap(): # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2013q3/002538.html # just a smoke test. fails with from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import FeatureSelectionClassifier from mvpa2.measures.base import CrossValidation, RepeatedMeasure from mvpa2.generators.splitters import Splitter from mvpa2.generators.partition import NFoldPartitioner from mvpa2.misc.errorfx import mean_mismatch_error from mvpa2.mappers.fx import mean_sample from mvpa2.mappers.fx import maxofabs_sample from mvpa2.generators.base import Repeater from mvpa2.featsel.rfe import RFE from mvpa2.featsel.helpers import FractionTailSelector, BestDetector from mvpa2.featsel.helpers import NBackHistoryStopCrit from mvpa2.datasets import vstack from mvpa2.misc.data_generators import normal_feature_dataset # Let's simulate the beast -- 6 categories total groupped into 3 # super-ordinate, and actually without any 'superordinate' effect # since subordinate categories independent fds = normal_feature_dataset(nlabels=3, snr=1, # 100, # pure signal! ;) perlabel=9, nfeatures=6, nonbogus_features=range(3), nchunks=3) clfsvm = LinearCSVMC() rfesvm = RFE(clfsvm.get_sensitivity_analyzer(postproc=maxofabs_sample()), CrossValidation( clfsvm, NFoldPartitioner(), errorfx=mean_mismatch_error, postproc=mean_sample()), Repeater(2), fselector=FractionTailSelector(0.70, mode='select', tail='upper'), stopping_criterion=NBackHistoryStopCrit(BestDetector(), 10), update_sensitivity=True) fclfsvm = FeatureSelectionClassifier(clfsvm, rfesvm) sensanasvm = fclfsvm.get_sensitivity_analyzer(postproc=maxofabs_sample()) # manually repeating/splitting so we do both RFE sensitivity and classification senses, errors = [], [] for i, pset in enumerate(NFoldPartitioner().generate(fds)): # split partitioned dataset split = [d for d in Splitter('partitions').generate(pset)] senses.append(sensanasvm(split[0])) # and it also should train the classifier so we would ask it about error errors.append(mean_mismatch_error(fclfsvm.predict(split[1]), split[1].targets)) senses = vstack(senses) errors = vstack(errors) # Let's compare against rerunning the beast simply for classification with CV errors_cv = CrossValidation(fclfsvm, NFoldPartitioner(), errorfx=mean_mismatch_error)(fds) # and they should match assert_array_equal(errors, errors_cv) # buggy! cv_sensana_svm = RepeatedMeasure(sensanasvm, NFoldPartitioner()) senses_rm = cv_sensana_svm(fds) #print senses.samples, senses_rm.samples #print errors, errors_cv.samples assert_raises(AssertionError, assert_array_almost_equal, senses.samples, senses_rm.samples) raise SkipTest("Known failure for repeated measures: https://github.com/PyMVPA/PyMVPA/issues/117")
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(StaticFeatureSelection(list(range(1, 16)))) assert_equal(cm.forward1(data[0]).shape, (15,)) assert_equal(len(cm), 2) # multiple slicing cm.append(StaticFeatureSelection([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # check reproduction if __debug__: # debug mode needs special test as it enhances the repr output # with module info and id() appendix for objects import mvpa2 cm_clone = eval(repr(cm)) assert_equal('#'.join(repr(cm_clone).split('#')[:-1]), '#'.join(repr(cm).split('#')[:-1])) else: cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8) # Lets construct a dataset with mapper assigned and see # if sub-selecting a feature adjusts trailing StaticFeatureSelection # appropriately ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1] tail_sfs = ds_subsel.a.mapper[-1] assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')
def raiser(*args, **kwargs): assert_raises(AssertionError, f, *args, **kwargs)