def test_searchlight_errors_per_trial(): # To make sure that searchlight can return error/accuracy per trial from mvpa2.clfs.gnb import GNB from mvpa2.generators.partition import OddEvenPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight from mvpa2.testing.datasets import datasets from mvpa2.misc.errorfx import prediction_target_matches dataset = datasets['3dsmall'].copy() # randomly permute samples so we break any random correspondence # to strengthen tests below sample_idx = np.arange(len(dataset)) dataset = dataset[np.random.permutation(sample_idx)] dataset.sa.targets = ['L%d' % l for l in dataset.sa.targets] dataset.fa['voxel_indices'] = dataset.fa.myspace sample_clf = GNB() # fast and deterministic part = OddEvenPartitioner() # only do partial to save time cv = CrossValidation(sample_clf, part, errorfx=None) #prediction_target_matches) # Just to compare error cv_error = CrossValidation(sample_clf, part) # Large searchlight radius so we get entire ROI, 2 centers just to make sure # that all stacking works correctly sl = sphere_searchlight(cv, radius=10, center_ids=[0, 1]) results = sl(dataset) sl_gnb = sphere_gnbsearchlight(sample_clf, part, radius=10, errorfx=None, center_ids=[0, 1]) results_gnbsl = sl_gnb(dataset) # inspect both results # verify that partitioning was done correctly partitions = list(part.generate(dataset)) for res in (results, results_gnbsl): assert('targets' in res.sa.keys()) # should carry targets assert('cvfolds' in res.sa.keys()) # should carry cvfolds for ipart in xrange(len(partitions)): assert_array_equal(dataset[partitions[ipart].sa.partitions == 2].targets, res.sa.targets[res.sa.cvfolds == ipart]) assert_datasets_equal(results, results_gnbsl) # one "accuracy" per each trial assert_equal(results.shape, (len(dataset), 2)) # with accuracies the same in both searchlights since the same # features were to be selected in both cases due too large radii errors_dataset = cv(dataset) assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 0]) assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 1]) # and error matching (up to precision) the one if we run with default error function assert_array_almost_equal(np.mean(results.targets[:, None] != results.samples, axis=0)[0], np.mean(cv_error(dataset)))
def test_partitionmapper(): ds = give_data() oep = OddEvenPartitioner() parts = list(oep.generate(ds)) assert_equal(len(parts), 2) for i, p in enumerate(parts): assert_array_equal(p.sa['partitions'].unique, [1, 2]) assert_equal(p.a.partitions_set, i) assert_equal(len(p), len(ds))
def test_partitionmapper(): ds = give_data() oep = OddEvenPartitioner() parts = list(oep.generate(ds)) assert_equal(len(parts), 2) for i, p in enumerate(parts): assert_array_equal(p.sa['partitions'].unique, [1, 2]) assert_equal(p.a.partitions_set, i) assert_equal(len(p), len(ds))
def generate_testing_datasets(specs): # Lets permute upon each invocation of test, so we could possibly # trigger some funny cases nonbogus_pool = np.random.permutation([0, 1, 3, 5]) datasets = {} # use a partitioner to flag odd/even samples as training and test ttp = OddEvenPartitioner(space='train', count=1) for kind, spec in specs.iteritems(): # set of univariate datasets for nlabels in [2, 3, 4]: basename = 'uni%d%s' % (nlabels, kind) nonbogus_features = nonbogus_pool[:nlabels] dataset = normal_feature_dataset( nlabels=nlabels, nonbogus_features=nonbogus_features, **spec) # full dataset datasets[basename] = list(ttp.generate(dataset))[0] # sample 3D total = 2 * spec['perlabel'] nchunks = spec['nchunks'] data = np.random.standard_normal((total, 3, 6, 6)) labels = np.concatenate( (np.repeat(0, spec['perlabel']), np.repeat(1, spec['perlabel']))) data[:, 1, 0, 0] += 2 * labels # add some signal chunks = np.asarray(range(nchunks) * (total // nchunks)) mask = np.ones((3, 6, 6), dtype='bool') mask[0, 0, 0] = 0 mask[1, 3, 2] = 0 ds = Dataset.from_wizard(samples=data, targets=labels, chunks=chunks, mask=mask, space='myspace') # and to stress tests on manipulating sa/fa possibly containing # attributes of dtype object ds.sa['test_object'] = [['a'], [1, 2]] * (ds.nsamples // 2) datasets['3d%s' % kind] = ds # some additional datasets datasets['dumb2'] = dumb_feature_binary_dataset() datasets['dumb'] = dumb_feature_dataset() # dataset with few invariant features _dsinv = dumb_feature_dataset() _dsinv.samples = np.hstack((_dsinv.samples, np.zeros( (_dsinv.nsamples, 1)), np.ones((_dsinv.nsamples, 1)))) datasets['dumbinv'] = _dsinv # Datasets for regressions testing datasets['sin_modulated'] = list( ttp.generate(multiple_chunks(sin_modulated, 4, 30, 1)))[0] # use the same full for training datasets['sin_modulated_train'] = datasets['sin_modulated'] datasets['sin_modulated_test'] = sin_modulated(30, 1, flat=True) # simple signal for linear regressors datasets['chirp_linear'] = multiple_chunks(chirp_linear, 6, 50, 10, 2, 0.3, 0.1) datasets['chirp_linear_test'] = chirp_linear(20, 5, 2, 0.4, 0.1) datasets['wr1996'] = multiple_chunks(wr1996, 4, 50) datasets['wr1996_test'] = wr1996(50) datasets['hollow'] = Dataset(HollowSamples((40, 20)), sa={'targets': np.tile(['one', 'two'], 20)}) return datasets
def generate_testing_datasets(specs): # Lets permute upon each invocation of test, so we could possibly # trigger some funny cases nonbogus_pool = np.random.permutation([0, 1, 3, 5]) datasets = {} # use a partitioner to flag odd/even samples as training and test ttp = OddEvenPartitioner(space='train', count=1) for kind, spec in specs.iteritems(): # set of univariate datasets for nlabels in [ 2, 3, 4 ]: basename = 'uni%d%s' % (nlabels, kind) nonbogus_features = nonbogus_pool[:nlabels] dataset = normal_feature_dataset( nlabels=nlabels, nonbogus_features=nonbogus_features, **spec) # full dataset datasets[basename] = list(ttp.generate(dataset))[0] # sample 3D total = 2*spec['perlabel'] nchunks = spec['nchunks'] data = np.random.standard_normal(( total, 3, 6, 6 )) labels = np.concatenate( ( np.repeat( 0, spec['perlabel'] ), np.repeat( 1, spec['perlabel'] ) ) ) data[:, 1, 0, 0] += 2*labels # add some signal chunks = np.asarray(range(nchunks)*(total/nchunks)) mask = np.ones((3, 6, 6), dtype='bool') mask[0, 0, 0] = 0 mask[1, 3, 2] = 0 ds = Dataset.from_wizard(samples=data, targets=labels, chunks=chunks, mask=mask, space='myspace') # and to stress tests on manipulating sa/fa possibly containing # attributes of dtype object ds.sa['test_object'] = [['a'], [1, 2]] * (ds.nsamples/2) datasets['3d%s' % kind] = ds # some additional datasets datasets['dumb2'] = dumb_feature_binary_dataset() datasets['dumb'] = dumb_feature_dataset() # dataset with few invariant features _dsinv = dumb_feature_dataset() _dsinv.samples = np.hstack((_dsinv.samples, np.zeros((_dsinv.nsamples, 1)), np.ones((_dsinv.nsamples, 1)))) datasets['dumbinv'] = _dsinv # Datasets for regressions testing datasets['sin_modulated'] = list(ttp.generate(multiple_chunks(sin_modulated, 4, 30, 1)))[0] # use the same full for training datasets['sin_modulated_train'] = datasets['sin_modulated'] datasets['sin_modulated_test'] = sin_modulated(30, 1, flat=True) # simple signal for linear regressors datasets['chirp_linear'] = multiple_chunks(chirp_linear, 6, 50, 10, 2, 0.3, 0.1) datasets['chirp_linear_test'] = chirp_linear(20, 5, 2, 0.4, 0.1) datasets['wr1996'] = multiple_chunks(wr1996, 4, 50) datasets['wr1996_test'] = wr1996(50) datasets['hollow'] = Dataset(HollowSamples((40,20)), sa={'targets': np.tile(['one', 'two'], 20)}) return datasets