Example #1
0
def test_searchlight_errors_per_trial():
    # To make sure that searchlight can return error/accuracy per trial
    from mvpa2.clfs.gnb import GNB
    from mvpa2.generators.partition import OddEvenPartitioner
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
    from mvpa2.testing.datasets import datasets
    from mvpa2.misc.errorfx import prediction_target_matches

    dataset = datasets['3dsmall'].copy()
    # randomly permute samples so we break any random correspondence
    # to strengthen tests below
    sample_idx = np.arange(len(dataset))
    dataset = dataset[np.random.permutation(sample_idx)]

    dataset.sa.targets = ['L%d' % l for l in dataset.sa.targets]
    dataset.fa['voxel_indices'] = dataset.fa.myspace
    sample_clf = GNB()              # fast and deterministic

    part = OddEvenPartitioner()
    # only do partial to save time
    cv = CrossValidation(sample_clf, part, errorfx=None) #prediction_target_matches)
    # Just to compare error
    cv_error = CrossValidation(sample_clf, part)

    # Large searchlight radius so we get entire ROI, 2 centers just to make sure
    # that all stacking works correctly
    sl = sphere_searchlight(cv, radius=10, center_ids=[0, 1])
    results = sl(dataset)

    sl_gnb = sphere_gnbsearchlight(sample_clf, part, radius=10, errorfx=None,
                                   center_ids=[0, 1])
    results_gnbsl = sl_gnb(dataset)

    # inspect both results
    # verify that partitioning was done correctly
    partitions = list(part.generate(dataset))
    for res in (results, results_gnbsl):
        assert('targets' in res.sa.keys())  # should carry targets
        assert('cvfolds' in res.sa.keys())  # should carry cvfolds
        for ipart in xrange(len(partitions)):
            assert_array_equal(dataset[partitions[ipart].sa.partitions == 2].targets,
                               res.sa.targets[res.sa.cvfolds == ipart])

    assert_datasets_equal(results, results_gnbsl)

    # one "accuracy" per each trial
    assert_equal(results.shape, (len(dataset), 2))
    # with accuracies the same in both searchlights since the same
    # features were to be selected in both cases due too large radii
    errors_dataset = cv(dataset)
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 0])
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 1])
    # and error matching (up to precision) the one if we run with default error function
    assert_array_almost_equal(np.mean(results.targets[:, None] != results.samples, axis=0)[0],
                              np.mean(cv_error(dataset)))
Example #2
0
def test_partitionmapper():
    ds = give_data()
    oep = OddEvenPartitioner()
    parts = list(oep.generate(ds))
    assert_equal(len(parts), 2)
    for i, p in enumerate(parts):
        assert_array_equal(p.sa['partitions'].unique, [1, 2])
        assert_equal(p.a.partitions_set, i)
        assert_equal(len(p), len(ds))
Example #3
0
def test_partitionmapper():
    ds = give_data()
    oep = OddEvenPartitioner()
    parts = list(oep.generate(ds))
    assert_equal(len(parts), 2)
    for i, p in enumerate(parts):
        assert_array_equal(p.sa['partitions'].unique, [1, 2])
        assert_equal(p.a.partitions_set, i)
        assert_equal(len(p), len(ds))
Example #4
0
def generate_testing_datasets(specs):
    # Lets permute upon each invocation of test, so we could possibly
    # trigger some funny cases
    nonbogus_pool = np.random.permutation([0, 1, 3, 5])

    datasets = {}

    # use a partitioner to flag odd/even samples as training and test
    ttp = OddEvenPartitioner(space='train', count=1)

    for kind, spec in specs.iteritems():
        # set of univariate datasets
        for nlabels in [2, 3, 4]:
            basename = 'uni%d%s' % (nlabels, kind)
            nonbogus_features = nonbogus_pool[:nlabels]

            dataset = normal_feature_dataset(
                nlabels=nlabels, nonbogus_features=nonbogus_features, **spec)

            # full dataset
            datasets[basename] = list(ttp.generate(dataset))[0]

        # sample 3D
        total = 2 * spec['perlabel']
        nchunks = spec['nchunks']
        data = np.random.standard_normal((total, 3, 6, 6))
        labels = np.concatenate(
            (np.repeat(0, spec['perlabel']), np.repeat(1, spec['perlabel'])))
        data[:, 1, 0, 0] += 2 * labels  # add some signal
        chunks = np.asarray(range(nchunks) * (total // nchunks))
        mask = np.ones((3, 6, 6), dtype='bool')
        mask[0, 0, 0] = 0
        mask[1, 3, 2] = 0
        ds = Dataset.from_wizard(samples=data,
                                 targets=labels,
                                 chunks=chunks,
                                 mask=mask,
                                 space='myspace')
        # and to stress tests on manipulating sa/fa possibly containing
        # attributes of dtype object
        ds.sa['test_object'] = [['a'], [1, 2]] * (ds.nsamples // 2)
        datasets['3d%s' % kind] = ds

    # some additional datasets
    datasets['dumb2'] = dumb_feature_binary_dataset()
    datasets['dumb'] = dumb_feature_dataset()
    # dataset with few invariant features
    _dsinv = dumb_feature_dataset()
    _dsinv.samples = np.hstack((_dsinv.samples, np.zeros(
        (_dsinv.nsamples, 1)), np.ones((_dsinv.nsamples, 1))))
    datasets['dumbinv'] = _dsinv

    # Datasets for regressions testing
    datasets['sin_modulated'] = list(
        ttp.generate(multiple_chunks(sin_modulated, 4, 30, 1)))[0]
    # use the same full for training
    datasets['sin_modulated_train'] = datasets['sin_modulated']
    datasets['sin_modulated_test'] = sin_modulated(30, 1, flat=True)

    # simple signal for linear regressors
    datasets['chirp_linear'] = multiple_chunks(chirp_linear, 6, 50, 10, 2, 0.3,
                                               0.1)
    datasets['chirp_linear_test'] = chirp_linear(20, 5, 2, 0.4, 0.1)

    datasets['wr1996'] = multiple_chunks(wr1996, 4, 50)
    datasets['wr1996_test'] = wr1996(50)

    datasets['hollow'] = Dataset(HollowSamples((40, 20)),
                                 sa={'targets': np.tile(['one', 'two'], 20)})

    return datasets
Example #5
0
def generate_testing_datasets(specs):
    # Lets permute upon each invocation of test, so we could possibly
    # trigger some funny cases
    nonbogus_pool = np.random.permutation([0, 1, 3, 5])

    datasets = {}

    # use a partitioner to flag odd/even samples as training and test
    ttp = OddEvenPartitioner(space='train', count=1)

    for kind, spec in specs.iteritems():
        # set of univariate datasets
        for nlabels in [ 2, 3, 4 ]:
            basename = 'uni%d%s' % (nlabels, kind)
            nonbogus_features = nonbogus_pool[:nlabels]

            dataset = normal_feature_dataset(
                nlabels=nlabels,
                nonbogus_features=nonbogus_features,
                **spec)

            # full dataset
            datasets[basename] = list(ttp.generate(dataset))[0]

        # sample 3D
        total = 2*spec['perlabel']
        nchunks = spec['nchunks']
        data = np.random.standard_normal(( total, 3, 6, 6 ))
        labels = np.concatenate( ( np.repeat( 0, spec['perlabel'] ),
                                  np.repeat( 1, spec['perlabel'] ) ) )
        data[:, 1, 0, 0] += 2*labels           # add some signal
        chunks = np.asarray(range(nchunks)*(total/nchunks))
        mask = np.ones((3, 6, 6), dtype='bool')
        mask[0, 0, 0] = 0
        mask[1, 3, 2] = 0
        ds = Dataset.from_wizard(samples=data, targets=labels, chunks=chunks,
                                 mask=mask, space='myspace')
        # and to stress tests on manipulating sa/fa possibly containing
        # attributes of dtype object
        ds.sa['test_object'] = [['a'], [1, 2]] * (ds.nsamples/2)
        datasets['3d%s' % kind] = ds


    # some additional datasets
    datasets['dumb2'] = dumb_feature_binary_dataset()
    datasets['dumb'] = dumb_feature_dataset()
    # dataset with few invariant features
    _dsinv = dumb_feature_dataset()
    _dsinv.samples = np.hstack((_dsinv.samples,
                               np.zeros((_dsinv.nsamples, 1)),
                               np.ones((_dsinv.nsamples, 1))))
    datasets['dumbinv'] = _dsinv

    # Datasets for regressions testing
    datasets['sin_modulated'] = list(ttp.generate(multiple_chunks(sin_modulated, 4, 30, 1)))[0]
    # use the same full for training
    datasets['sin_modulated_train'] = datasets['sin_modulated']
    datasets['sin_modulated_test'] = sin_modulated(30, 1, flat=True)

    # simple signal for linear regressors
    datasets['chirp_linear'] = multiple_chunks(chirp_linear, 6, 50, 10, 2, 0.3, 0.1)
    datasets['chirp_linear_test'] = chirp_linear(20, 5, 2, 0.4, 0.1)

    datasets['wr1996'] = multiple_chunks(wr1996, 4, 50)
    datasets['wr1996_test'] = wr1996(50)

    datasets['hollow'] = Dataset(HollowSamples((40,20)),
                                 sa={'targets': np.tile(['one', 'two'], 20)})

    return datasets