def test_corrstability_smoketest(ds): if not 'chunks' in ds.sa: return if len(ds.sa['targets'].unique) > 30: # was regression dataset return # very basic testing since cs = CorrStability() #ds = datasets['uni2small'] out = cs(ds) assert_equal(out.shape, (ds.nfeatures,)) ok_(np.all(out >= -1.001)) # it should be a correlation after all ok_(np.all(out <= 1.001)) # and theoretically those nonbogus features should have higher values if 'nonbogus_targets' in ds.fa: bogus_features = np.array([x==None for x in ds.fa.nonbogus_targets]) assert_array_less(np.mean(out[bogus_features]), np.mean(out[~bogus_features])) # and if we move targets to alternative location ds = ds.copy(deep=True) ds.sa['alt'] = ds.T ds.sa.pop('targets') assert_raises(KeyError, cs, ds) cs = CorrStability('alt') out_ = cs(ds) assert_array_equal(out, out_)
def test_exclude_targets_combinations_subjectchunks(): partitioner = ChainNode([NFoldPartitioner(attr='subjects'), ExcludeTargetsCombinationsPartitioner( k=1, targets_attr='chunks', space='partitions')], space='partitions') # targets do not need even to be defined! ds = Dataset(np.arange(18).reshape(9, 2), sa={'chunks': np.arange(9) // 3, 'subjects': np.arange(9) % 3}) dss = list(partitioner.generate(ds)) assert_equal(len(dss), 9) testing_subjs, testing_chunks = [], [] for ds_ in dss: testing_partition = ds_.sa.partitions == 2 training_partition = ds_.sa.partitions == 1 # must be scalars -- so implicit test here # if not -- would be error testing_subj = np.asscalar(np.unique(ds_.sa.subjects[testing_partition])) testing_subjs.append(testing_subj) testing_chunk = np.asscalar(np.unique(ds_.sa.chunks[testing_partition])) testing_chunks.append(testing_chunk) # and those must not appear for training ok_(not testing_subj in ds_.sa.subjects[training_partition]) ok_(not testing_chunk in ds_.sa.chunks[training_partition]) # and we should have gone through all chunks/subjs pairs testing_pairs = set(zip(testing_subjs, testing_chunks)) assert_equal(len(testing_pairs), 9) # yoh: equivalent to set(itertools.product(range(3), range(3)))) # but .product is N/A for python2.5 assert_equal(testing_pairs, set(zip(*np.where(np.ones((3,3))))))
def test_corrstability_smoketest(ds): if not 'chunks' in ds.sa: return if len(ds.sa['targets'].unique) > 30: # was regression dataset return # very basic testing since cs = CorrStability() #ds = datasets['uni2small'] out = cs(ds) assert_equal(out.shape, (ds.nfeatures, )) ok_(np.all(out >= -1.001)) # it should be a correlation after all ok_(np.all(out <= 1.001)) # and theoretically those nonbogus features should have higher values if 'nonbogus_targets' in ds.fa: bogus_features = np.array([x == None for x in ds.fa.nonbogus_targets]) assert_array_less(np.mean(out[bogus_features]), np.mean(out[~bogus_features])) # and if we move targets to alternative location ds = ds.copy(deep=True) ds.sa['alt'] = ds.T ds.sa.pop('targets') assert_raises(KeyError, cs, ds) cs = CorrStability('alt') out_ = cs(ds) assert_array_equal(out, out_)
def test_zcore_repr(): # Just basic test if everything is sane... no proper comparison for m in (ZScoreMapper(chunks_attr=None), ZScoreMapper(params=(3, 1)), ZScoreMapper()): mr = eval(repr(m)) ok_(isinstance(mr, ZScoreMapper))
def test_aggregation(self): data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1) ag_data = aggregate_features(data, np.mean) ok_(ag_data.nsamples == 4) ok_(ag_data.nfeatures == 1) assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
def test_attrmap_conflicts(): am_n = AttributeMap({'a':1, 'b':2, 'c':1}) am_t = AttributeMap({'a':1, 'b':2, 'c':1}, collisions_resolution='tuple') am_l = AttributeMap({'a':1, 'b':2, 'c':1}, collisions_resolution='lucky') q_f = ['a', 'b', 'a', 'c'] # should have no effect on forward mapping ok_(np.all(am_n.to_numeric(q_f) == am_t.to_numeric(q_f))) ok_(np.all(am_t.to_numeric(q_f) == am_l.to_numeric(q_f))) assert_raises(ValueError, am_n.to_literal, [2]) r_t = am_t.to_literal([2, 1]) r_l = am_l.to_literal([2, 1])
def test_subset_filler(): sm = StaticFeatureSelection(np.arange(3)) sm_f0 = StaticFeatureSelection(np.arange(3), filler=0) sm_fm1 = StaticFeatureSelection(np.arange(3), filler= -1) sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan) data = np.arange(12).astype(float).reshape((2, -1)) sm.train(data) data_forwarded = sm.forward(data) for m in (sm, sm_f0, sm_fm1, sm_fnan): m.train(data) assert_array_equal(data_forwarded, m.forward(data)) data_back_fm1 = sm_fm1.reverse(data_forwarded) ok_(np.all(data_back_fm1[:, 3:] == -1)) data_back_fnan = sm_fnan.reverse(data_forwarded) ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
def test_subset_filler(): sm = StaticFeatureSelection(np.arange(3)) sm_f0 = StaticFeatureSelection(np.arange(3), filler=0) sm_fm1 = StaticFeatureSelection(np.arange(3), filler=-1) sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan) data = np.arange(12).astype(float).reshape((2, -1)) sm.train(data) data_forwarded = sm.forward(data) for m in (sm, sm_f0, sm_fm1, sm_fnan): m.train(data) assert_array_equal(data_forwarded, m.forward(data)) data_back_fm1 = sm_fm1.reverse(data_forwarded) ok_(np.all(data_back_fm1[:, 3:] == -1)) data_back_fnan = sm_fnan.reverse(data_forwarded) ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
def test_cached_query_engine(): """Test cached query engine """ sphere = ne.Sphere(1) # dataset with just one "space" ds = datasets['3dlarge'] qe0 = ne.IndexQueryEngine(myspace=sphere) qec = ne.CachedQueryEngine(qe0) # and ground truth one qe = ne.IndexQueryEngine(myspace=sphere) results_ind = [] results_kw = [] def cmp_res(res1, res2): comp = [x == y for x, y in zip(res1, res2)] ok_(np.all(comp)) for iq, q in enumerate((qe, qec)): q.train(ds) # sequential train on the same should be ok in both cases q.train(ds) res_ind = [q[fid] for fid in xrange(ds.nfeatures)] res_kw = [q(myspace=x) for x in ds.fa.myspace] # test if results match cmp_res(res_ind, res_kw) results_ind.append(res_ind) results_kw.append(res_kw) # now check if results of cached were the same as of regular run cmp_res(results_ind[0], results_ind[1]) # Now do sanity checks assert_raises(ValueError, qec.train, ds[:, :-1]) assert_raises(ValueError, qec.train, ds.copy()) ds2 = ds.copy() qec.untrain() qec.train(ds2) # should be the same results on the copy cmp_res(results_ind[0], [qec[fid] for fid in xrange(ds.nfeatures)]) cmp_res(results_kw[0], [qec(myspace=x) for x in ds.fa.myspace]) ok_(qec.train(ds2) is None)
def test_sphere(): # test sphere initialization s = ne.Sphere(1) center0 = (0, 0, 0) center1 = (1, 1, 1) assert_equal(len(s(center0)), 7) target = array([array([-1, 0, 0]), array([ 0, -1, 0]), array([ 0, 0, -1]), array([0, 0, 0]), array([0, 0, 1]), array([0, 1, 0]), array([1, 0, 0])]) # test of internals -- no recomputation of increments should be done prev_increments = s._increments assert_array_equal(s(center0), target) ok_(prev_increments is s._increments) # query lower dimensionality _ = s((0, 0)) ok_(not prev_increments is s._increments) # test Sphere call target = [array([0, 1, 1]), array([1, 0, 1]), array([1, 1, 0]), array([1, 1, 1]), array([1, 1, 2]), array([1, 2, 1]), array([2, 1, 1])] res = s(center1) assert_array_equal(array(res), target) # They all should be tuples ok_(np.all([isinstance(x, tuple) for x in res])) # test for larger diameter s = ne.Sphere(4) assert_equal(len(s(center1)), 257) # test extent keyword #s = ne.Sphere(4,extent=(1,1,1)) #assert_array_equal(array(s((0,0,0))), array([[0,0,0]])) # test Errors during initialisation and call #assert_raises(ValueError, ne.Sphere, 2) #assert_raises(ValueError, ne.Sphere, 1.0) # no longer extent available assert_raises(TypeError, ne.Sphere, 1, extent=(1)) assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0)) s = ne.Sphere(1) #assert_raises(ValueError, s, (1)) if __debug__: # No float coordinates allowed for now... # XXX might like to change that ;) # assert_raises(ValueError, s, (1.0, 1.0, 1.0))
def test_samples_attributes(self): sa = SampleAttributes(os.path.join(pymvpa_dataroot, 'attributes_literal.txt'), literallabels=True) ok_(sa.nrows == 1452, msg='There should be 1452 samples') # convert to event list, with some custom attr ev = find_events(**sa) ok_(len(ev) == 17 * (max(sa.chunks) + 1), msg='Not all events got detected.') ok_(ev[0]['targets'] == ev[-1]['targets'] == 'rest', msg='First and last event are rest condition.') ok_(ev[-1]['onset'] + ev[-1]['duration'] == sa.nrows, msg='Something is wrong with the timiing of the events')
def test_samples_attributes(self): sa = SampleAttributes(pathjoin(pymvpa_dataroot, 'attributes_literal.txt'), literallabels=True) ok_(sa.nrows == 1452, msg='There should be 1452 samples') # convert to event list, with some custom attr ev = find_events(**sa) ok_(len(ev) == 17 * (max(sa.chunks) + 1), msg='Not all events got detected.') ok_(ev[0]['targets'] == ev[-1]['targets'] == 'rest', msg='First and last event are rest condition.') ok_(ev[-1]['onset'] + ev[-1]['duration'] == sa.nrows, msg='Something is wrong with the timiing of the events')
def test_sphere_distance_func(): # Test some other distance se = ne.Sphere(3) sm = ne.Sphere(3, distance_func=manhatten_distance) rese = se((10, 5)) resm = sm((10, 5)) for res in rese, resm: # basic test for duplicates (I think we forgotten to test for them) ok_(len(res) == len(set(res))) # in manhatten distance we should all be no further than 3 "steps" away ok_(np.all([np.sum(np.abs(np.array(x) - (10, 5))) <= 3 for x in resm])) # in euclidean we are taking shortcuts ;) ok_(np.any([np.sum(np.abs(np.array(x) - (10, 5))) > 3 for x in rese]))
def test_zscore(): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\ reshape((16, 1)) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) assert_equal(data.samples.mean(), 2.0) assert_equal(data.samples.std(), 1.0) data_samples = data.samples.copy() zscore(data, chunks_attr='chunks') # copy should stay intact assert_equal(data_samples.mean(), 2.0) assert_equal(data_samples.std(), 1.0) # we should be able to operate on ndarrays # But we can't change type inplace for an array, can't we? assert_raises(TypeError, zscore, data_samples, chunks_attr=None) # so lets do manually data_samples = data_samples.astype(float) zscore(data_samples, chunks_attr=None) assert_array_equal(data.samples, data_samples) # check z-scoring check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype='float64').reshape(16, 1) assert_array_equal(data.samples, check) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) zscore(data, chunks_attr=None) assert_array_equal(data.samples, check) # check z-scoring taking set of labels as a baseline data = dataset_wizard(samples.copy(), targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples + 1.0) # check that zscore modifies in-place; only guaranteed if no upcasting is # necessary samples = samples.astype('float') data = dataset_wizard(samples, targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples) # verify that if param_est is set but chunks_attr is None # performs zscoring across entire dataset correctly data = data.copy() data_01 = data.select({'targets': [0, 1]}) zscore(data_01, chunks_attr=None) zscore(data, chunks_attr=None, param_est=('targets', [0, 1])) assert_array_equal(data_01.samples, data.select({'targets': [0, 1]})) # these might be duplicating code above -- but twice is better than nothing # dataset: mean=2, std=1 raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) # dataset: mean=12, std=1 raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10 # zscore target check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0] ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) zm = ZScoreMapper() # should do global zscore by default zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check])) # should not modify the source assert_array_equal(pristine, ds) # if we tell it a different mean it should obey the order zm = ZScoreMapper(params=(3,1)) zm.train(ds) assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1 ) assert_array_equal(pristine, ds) # let's look at chunk-wise z-scoring ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())), targets=range(32), chunks=[0] * 16 + [1] * 16) # by default chunk-wise zm = ZScoreMapper() zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # we should be able to do that same manually zm = ZScoreMapper(params={0: (2,1), 1: (12,1)}) zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # And just a smoke test for warnings reporting whenever # of # samples per chunk is low. # on 1 sample per chunk zds1 = ZScoreMapper(chunks_attr='chunks', auto_train=True)( ds[[0, -1]]) ok_(np.all(zds1.samples == 0)) # they all should be 0 # on 2 samples per chunk zds2 = ZScoreMapper(chunks_attr='chunks', auto_train=True)( ds[[0, 1, -10, -1]]) assert_array_equal(np.unique(zds2.samples), [-1., 1]) # they all should be -1 or 1 # on 3 samples per chunk -- different warning ZScoreMapper(chunks_attr='chunks', auto_train=True)( ds[[0, 1, 2, -3, -2, -1]]) # test if std provided as a list not as an array is handled # properly -- should zscore all features (not just first/none # as it was before) ds = dataset_wizard(np.arange(32).reshape((8,-1)), targets=range(8), chunks=[0] * 8) means = [0, 1, -10, 10] std0 = np.std(ds[:, 0]) # std deviation of first one stds = [std0, 10, .1, 1] zm = ZScoreMapper(params=(means, stds), auto_train=True) dsz = zm(ds) assert_array_almost_equal((np.mean(ds, axis=0) - np.asanyarray(means))/np.array(stds), np.mean(dsz, axis=0)) assert_array_almost_equal(np.std(ds, axis=0)/np.array(stds), np.std(dsz, axis=0))
def test_attrmap(): map_default = {'eins': 0, 'zwei': 2, 'sieben': 1} map_custom = {'eins': 11, 'zwei': 22, 'sieben': 33} literal = ['eins', 'zwei', 'sieben', 'eins', 'sieben', 'eins'] literal_nonmatching = ['uno', 'dos', 'tres'] num_default = [0, 2, 1, 0, 1, 0] num_custom = [11, 22, 33, 11, 33, 11] # no custom mapping given am = AttributeMap() assert_false(am) ok_(len(am) == 0) assert_array_equal(am.to_numeric(literal), num_default) assert_array_equal(am.to_literal(num_default), literal) ok_(am) ok_(len(am) == 3) # # Tests for recursive mapping + preserving datatype class myarray(np.ndarray): pass assert_raises(KeyError, am.to_literal, [(1, 2), 2, 0]) literal_fancy = [(1, 2), 2, [0], np.array([0, 1]).view(myarray)] literal_fancy_tuple = tuple(literal_fancy) literal_fancy_array = np.array(literal_fancy, dtype=object) for l in (literal_fancy, literal_fancy_tuple, literal_fancy_array): res = am.to_literal(l, recurse=True) assert_equal(res[0], ('sieben', 'zwei')) assert_equal(res[1], 'zwei') assert_equal(res[2], ['eins']) assert_array_equal(res[3], ['eins', 'sieben']) # types of result and subsequences should be preserved ok_(isinstance(res, l.__class__)) ok_(isinstance(res[0], tuple)) ok_(isinstance(res[1], str)) ok_(isinstance(res[2], list)) ok_(isinstance(res[3], myarray)) # yet another example a = np.empty(1, dtype=object) a[0] = (0, 1) res = am.to_literal(a, recurse=True) ok_(isinstance(res[0], tuple)) # # with custom mapping am = AttributeMap(map=map_custom) assert_array_equal(am.to_numeric(literal), num_custom) assert_array_equal(am.to_literal(num_custom), literal) # if not numeric nothing is mapped assert_array_equal(am.to_numeric(num_custom), num_custom) # even if the map doesn't fit assert_array_equal(am.to_numeric(num_default), num_default) # need to_numeric first am = AttributeMap() assert_raises(RuntimeError, am.to_literal, [1, 2, 3]) # stupid args assert_raises(ValueError, AttributeMap, map=num_custom) # map mismatch am = AttributeMap(map=map_custom) if __debug__: # checked only in __debug__ assert_raises(KeyError, am.to_numeric, literal_nonmatching) # needs reset and should work afterwards am.clear() assert_array_equal(am.to_numeric(literal_nonmatching), [2, 0, 1]) # and now reverse am = AttributeMap(map=map_custom) assert_raises(KeyError, am.to_literal, num_default) # dict-like interface am = AttributeMap() ok_([(k, v) for k, v in am.items()] == [])
def test_attrmap(): map_default = {'eins': 0, 'zwei': 2, 'sieben': 1} map_custom = {'eins': 11, 'zwei': 22, 'sieben': 33} literal = ['eins', 'zwei', 'sieben', 'eins', 'sieben', 'eins'] literal_nonmatching = ['uno', 'dos', 'tres'] num_default = [0, 2, 1, 0, 1, 0] num_custom = [11, 22, 33, 11, 33, 11] # no custom mapping given am = AttributeMap() assert_false(am) ok_(len(am) == 0) assert_array_equal(am.to_numeric(literal), num_default) assert_array_equal(am.to_literal(num_default), literal) ok_(am) ok_(len(am) == 3) # # Tests for recursive mapping + preserving datatype class myarray(np.ndarray): pass assert_raises(KeyError, am.to_literal, [(1, 2), 2, 0]) literal_fancy = [(1, 2), 2, [0], np.array([0, 1]).view(myarray)] literal_fancy_tuple = tuple(literal_fancy) literal_fancy_array = np.array(literal_fancy, dtype=object) for l in (literal_fancy, literal_fancy_tuple, literal_fancy_array): res = am.to_literal(l, recurse=True) assert_equal(res[0], ('sieben', 'zwei')) assert_equal(res[1], 'zwei') assert_equal(res[2], ['eins']) assert_array_equal(res[3], ['eins', 'sieben']) # types of result and subsequences should be preserved ok_(isinstance(res, l.__class__)) ok_(isinstance(res[0], tuple)) ok_(isinstance(res[1], str)) ok_(isinstance(res[2], list)) ok_(isinstance(res[3], myarray)) # yet another example a = np.empty(1, dtype=object) a[0] = (0, 1) res = am.to_literal(a, recurse=True) ok_(isinstance(res[0], tuple)) # # with custom mapping am = AttributeMap(map=map_custom) assert_array_equal(am.to_numeric(literal), num_custom) assert_array_equal(am.to_literal(num_custom), literal) # if not numeric nothing is mapped assert_array_equal(am.to_numeric(num_custom), num_custom) # even if the map doesn't fit assert_array_equal(am.to_numeric(num_default), num_default) # need to_numeric first am = AttributeMap() assert_raises(RuntimeError, am.to_literal, [1,2,3]) # stupid args assert_raises(ValueError, AttributeMap, map=num_custom) # map mismatch am = AttributeMap(map=map_custom) if __debug__: # checked only in __debug__ assert_raises(KeyError, am.to_numeric, literal_nonmatching) # needs reset and should work afterwards am.clear() assert_array_equal(am.to_numeric(literal_nonmatching), [2, 0, 1]) # and now reverse am = AttributeMap(map=map_custom) assert_raises(KeyError, am.to_literal, num_default) # dict-like interface am = AttributeMap() ok_([(k, v) for k, v in am.iteritems()] == [])
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # test only flattening the first two dimensions fm_max = FlattenMapper(maxdims=2) fm_max.train(data) assert_equal(fm_max(data).shape, (4, 4, 4)) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [FlattenMapper(space='voxel'), ChainMapper([FlattenMapper(space='voxel'), StaticFeatureSelection(slice(None))])]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse1(target[0]), _verified_reverse1(fm, target[0])) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # test only flattening the first two dimensions fm_max = FlattenMapper(maxdims=2) fm_max.train(data) assert_equal(fm_max(data).shape, (4, 4, 4)) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [FlattenMapper(space='voxel'), ChainMapper([FlattenMapper(space='voxel'), StaticFeatureSelection(slice(None))])]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def cmp_res(res1, res2): comp = [x == y for x, y in zip(res1, res2)] ok_(np.all(comp))
def test_sphere_scaled(): s1 = ne.Sphere(3) s = ne.Sphere(3, element_sizes=(1, 1)) # Should give exactly the same results since element_sizes are 1s for p in ((0, 0), (-23, 1)): assert_array_equal(s1(p), s(p)) ok_(len(s(p)) == len(set(s(p)))) # Raise exception if query dimensionality does not match element_sizes assert_raises(ValueError, s, (1, )) s = ne.Sphere(3, element_sizes=(1.5, 2)) assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1), (2, 0)]) s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res])) ok_(len(res) == 7) # all neighbors so no more than 1 voxel away -- just a cube, for # some "sphere" effect radius had to be 3.0 ;) td = np.sqrt(3 * 1.5**2) s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res])) ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res])) ok_(len(res) == 27)
def test_sphere_scaled(): s1 = ne.Sphere(3) s = ne.Sphere(3, element_sizes=(1, 1)) # Should give exactly the same results since element_sizes are 1s for p in ((0, 0), (-23, 1)): assert_array_equal(s1(p), s(p)) ok_(len(s(p)) == len(set(s(p)))) # Raise exception if query dimensionality does not match element_sizes assert_raises(ValueError, s, (1,)) s = ne.Sphere(3, element_sizes=(1.5, 2)) assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1), (2, 0)]) s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res])) ok_(len(res) == 7) # all neighbors so no more than 1 voxel away -- just a cube, for # some "sphere" effect radius had to be 3.0 ;) td = np.sqrt(3*1.5**2) s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5)) res = s((0, 0, 0)) ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res])) ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res])) ok_(len(res) == 27)
def test_sphere(): # test sphere initialization s = ne.Sphere(1) center0 = (0, 0, 0) center1 = (1, 1, 1) assert_equal(len(s(center0)), 7) target = array([ array([-1, 0, 0]), array([0, -1, 0]), array([0, 0, -1]), array([0, 0, 0]), array([0, 0, 1]), array([0, 1, 0]), array([1, 0, 0]) ]) # test of internals -- no recomputation of increments should be done prev_increments = s._increments assert_array_equal(s(center0), target) ok_(prev_increments is s._increments) # query lower dimensionality _ = s((0, 0)) ok_(not prev_increments is s._increments) # test Sphere call target = [ array([0, 1, 1]), array([1, 0, 1]), array([1, 1, 0]), array([1, 1, 1]), array([1, 1, 2]), array([1, 2, 1]), array([2, 1, 1]) ] res = s(center1) assert_array_equal(array(res), target) # They all should be tuples ok_(np.all([isinstance(x, tuple) for x in res])) # test for larger diameter s = ne.Sphere(4) assert_equal(len(s(center1)), 257) # test extent keyword #s = ne.Sphere(4,extent=(1,1,1)) #assert_array_equal(array(s((0,0,0))), array([[0,0,0]])) # test Errors during initialisation and call #assert_raises(ValueError, ne.Sphere, 2) #assert_raises(ValueError, ne.Sphere, 1.0) # no longer extent available assert_raises(TypeError, ne.Sphere, 1, extent=(1)) assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0)) s = ne.Sphere(1) #assert_raises(ValueError, s, (1)) if __debug__: # No float coordinates allowed for now... # XXX might like to change that ;) # assert_raises(ValueError, s, (1.0, 1.0, 1.0))
def test_zscore(): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).\ reshape((16, 1)) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) assert_equal(data.samples.mean(), 2.0) assert_equal(data.samples.std(), 1.0) data_samples = data.samples.copy() zscore(data, chunks_attr='chunks') # copy should stay intact assert_equal(data_samples.mean(), 2.0) assert_equal(data_samples.std(), 1.0) # we should be able to operate on ndarrays # But we can't change type inplace for an array, can't we? assert_raises(TypeError, zscore, data_samples, chunks_attr=None) # so lets do manually data_samples = data_samples.astype(float) zscore(data_samples, chunks_attr=None) assert_array_equal(data.samples, data_samples) # check z-scoring check = np.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype='float64').reshape(16, 1) assert_array_equal(data.samples, check) data = dataset_wizard(samples.copy(), targets=range(16), chunks=[0] * 16) zscore(data, chunks_attr=None) assert_array_equal(data.samples, check) # check z-scoring taking set of labels as a baseline data = dataset_wizard(samples.copy(), targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples + 1.0) # check that zscore modifies in-place; only guaranteed if no upcasting is # necessary samples = samples.astype('float') data = dataset_wizard(samples, targets=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, param_est=('targets', [0, 1])) assert_array_equal(samples, data.samples) # these might be duplicating code above -- but twice is better than nothing # dataset: mean=2, std=1 raw = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) # dataset: mean=12, std=1 raw2 = np.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)) + 10 # zscore target check = [-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0] ds = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) pristine = dataset_wizard(raw.copy(), targets=range(16), chunks=[0] * 16) zm = ZScoreMapper() # should do global zscore by default zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check])) # should not modify the source assert_array_equal(pristine, ds) # if we tell it a different mean it should obey the order zm = ZScoreMapper(params=(3, 1)) zm.train(ds) assert_array_almost_equal(zm.forward(ds), np.transpose([check]) - 1) assert_array_equal(pristine, ds) # let's look at chunk-wise z-scoring ds = dataset_wizard(np.hstack((raw.copy(), raw2.copy())), targets=range(32), chunks=[0] * 16 + [1] * 16) # by default chunk-wise zm = ZScoreMapper() zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # we should be able to do that same manually zm = ZScoreMapper(params={0: (2, 1), 1: (12, 1)}) zm.train(ds) # train assert_array_almost_equal(zm.forward(ds), np.transpose([check + check])) # And just a smoke test for warnings reporting whenever # of # samples per chunk is low. # on 1 sample per chunk zds1 = ZScoreMapper(chunks_attr='chunks', auto_train=True)(ds[[0, -1]]) ok_(np.all(zds1.samples == 0)) # they all should be 0 # on 2 samples per chunk zds2 = ZScoreMapper(chunks_attr='chunks', auto_train=True)(ds[[0, 1, -10, -1]]) assert_array_equal(np.unique(zds2.samples), [-1., 1]) # they all should be -1 or 1 # on 3 samples per chunk -- different warning ZScoreMapper(chunks_attr='chunks', auto_train=True)(ds[[0, 1, 2, -3, -2, -1]]) # test if std provided as a list not as an array is handled # properly -- should zscore all features (not just first/none # as it was before) ds = dataset_wizard(np.arange(32).reshape((8, -1)), targets=range(8), chunks=[0] * 8) means = [0, 1, -10, 10] std0 = np.std(ds[:, 0]) # std deviation of first one stds = [std0, 10, .1, 1] zm = ZScoreMapper(params=(means, stds), auto_train=True) dsz = zm(ds) assert_array_almost_equal( (np.mean(ds, axis=0) - np.asanyarray(means)) / np.array(stds), np.mean(dsz, axis=0)) assert_array_almost_equal( np.std(ds, axis=0) / np.array(stds), np.std(dsz, axis=0))
def test_gnbsearchlight_permutations(): import mvpa2 from mvpa2.base.node import ChainNode from mvpa2.clfs.gnb import GNB from mvpa2.generators.base import Repeater from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner #import mvpa2.generators.permutation #reload(mvpa2.generators.permutation) from mvpa2.generators.permutation import AttributePermutator from mvpa2.testing.datasets import datasets from mvpa2.measures.base import CrossValidation from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.mappers.fx import mean_sample from mvpa2.misc.errorfx import mean_mismatch_error from mvpa2.clfs.stats import MCNullDist from mvpa2.testing.tools import assert_raises, ok_, assert_array_less # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM'] # mvpa2.debug.metrics += ['pid'] count = 10 nproc = 1 + int(mvpa2.externals.exists('pprocess')) ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace slkwargs = dict(radius=3, space='voxel_indices', enable_ca=['roi_sizes'], center_ids=[1, 10, 70, 100]) mvpa2.seed(mvpa2._random_seed) clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) distr_est = MCNullDist(repeater, tail='left', measure=null_sl, enable_ca=['dist_samples']) sl = sphere_gnbsearchlight(clf, splt, reuse_neighbors=True, null_dist=distr_est, postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) if __debug__: # assert is done only without -O mode assert_raises(NotImplementedError, sl, ds) # "ad-hoc searchlights can't handle yet varying targets across partitions" if False: # after above limitation is removed -- enable sl_map = sl(ds) sl_null_prob = sl.ca.null_prob.samples.copy() mvpa2.seed(mvpa2._random_seed) ### 'normal' Searchlight clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state # would be reused across all pprocesses null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample()) null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs) distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal, enable_ca=['dist_samples']) cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error, enable_ca=['stats'], postproc=mean_sample() ) sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs) sl_map_normal = sl(ds) sl_null_prob_normal = sl.ca.null_prob.samples.copy() # For every feature -- we should get some variance in estimates In # case of failure they are all really close to each other (up to # numerical precision), so variance will be close to 0 assert_array_less(-np.var(distr_est_normal.ca.dist_samples.samples[0], axis=1), -1e-5) for s in distr_est_normal.ca.dist_samples.samples[0]: ok_(len(np.unique(s)) > 1)
def test_multiclass_pairs_svm_searchlight(): from mvpa2.measures.searchlight import sphere_searchlight import mvpa2.clfs.meta #reload(mvpa2.clfs.meta) from mvpa2.clfs.meta import MulticlassClassifier from mvpa2.datasets import Dataset from mvpa2.clfs.svm import LinearCSVMC #import mvpa2.testing.datasets #reload(mvpa2.testing.datasets) from mvpa2.testing.datasets import datasets from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.testing import ok_, assert_equal, assert_array_equal from mvpa2.sandbox.multiclass import get_pairwise_accuracies # Some parameters used in the test below nproc = 1 + int(mvpa2.externals.exists('pprocess')) ntargets = 4 # number of targets npairs = ntargets*(ntargets-1)/2 center_ids = [35, 55, 1] ds = datasets['3dsmall'].copy() # redefine C,T so we have a multiclass task nsamples = len(ds) ds.sa.targets = range(ntargets) * (nsamples//ntargets) ds.sa.chunks = np.arange(nsamples) // ntargets # and add some obvious signal where it is due ds.samples[:, 55] += 15*ds.sa.targets # for all 4 targets ds.samples[:, 35] += 15*(ds.sa.targets % 2) # so we have conflicting labels # while 35 would still be just for 2 categories which would conflict mclf = MulticlassClassifier(LinearCSVMC(), pass_attr=['sa.chunks', 'ca.raw_predictions_ds'], enable_ca=['raw_predictions_ds']) label_pairs = mclf._get_binary_pairs(ds) def place_sa_as_samples(ds): # add a degenerate dimension for the hstacking in the searchlight ds.samples = ds.sa.raw_predictions_ds[:, None] ds.sa.pop('raw_predictions_ds') # no need to drag the copy return ds mcv = CrossValidation(mclf, OddEvenPartitioner(), errorfx=None, postproc=place_sa_as_samples) sl = sphere_searchlight(mcv, nproc=nproc, radius=2, space='myspace', center_ids=center_ids) slmap = sl(ds) ok_('chunks' in slmap.sa) ok_('cvfolds' in slmap.sa) ok_('targets' in slmap.sa) # so for each SL we got all pairwise tests assert_equal(slmap.shape, (nsamples, len(center_ids), npairs)) assert_array_equal(np.unique(slmap.sa.cvfolds), [0, 1]) # Verify that we got right labels in each 'pair' # all searchlights should have the same set of labels for a given # pair of targets label_pairs_ = np.apply_along_axis( np.unique, 0, ## reshape slmap so we have only simple pairs in the columns np.reshape(slmap, (-1, npairs))).T # need to prep that list of pairs obtained from MulticlassClassifier # and since it is 1-vs-1, they all should be just pairs of lists of # 1 element so should work assert_equal(len(label_pairs_), npairs) assert_array_equal(np.squeeze(np.array(label_pairs)), label_pairs_) assert_equal(label_pairs_.shape, (npairs, 2)) # for this particular case out = get_pairwise_accuracies(slmap) out123 = get_pairwise_accuracies(slmap, select=[1, 2, 3]) assert_array_equal(np.unique(out123.T), np.arange(1, 4)) # so we got at least correct targets # test that we extracted correct accuracies # First 3 in out.T should have category 0, so skip them and compare otherwise assert_array_equal(out.samples[3:], out123.samples) ok_(np.all(out.samples[:, 1] == 1.), "This was with super-strong result")