Beispiel #1
0
def test_factorialpartitioner_big():
    # just to see that we can cope with relatively large datasets/numbers
    ds = normal_feature_dataset(nlabels=6,
                                perlabel=66,
                                nfeatures=2,
                                nchunks=11)

    # and now let's do factorial partitioner

    def partition(ds_=ds, **kwargs):
        partitioner = FactorialPartitioner(
            partitioner=NFoldPartitioner(attr='targets'),
            attr='chunks',
            **kwargs)
        return [p.sa.partitions for p in partitioner.generate(ds_)]

    # prohibitively large
    # print len(partition(ds))
    t0 = time()
    assert_equal(len(partition(ds, count=2, selection_strategy='first')), 2)
    # Those time limits are really a stretch. on a any reasonable box not too busy
    # should be done in fraction of a second, but allow to catch "naive"
    # implementation
    assert(time() - t0 < 3)

    assert_equal(len(partition(ds, count=2, selection_strategy='random')), 2)
    assert(time() - t0 < 3)
Beispiel #2
0
    def test_slicing(self):
        hs = HalfPartitioner()
        spl = Splitter(attr="partitions")
        splits = list(hs.generate(self.data))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is self.data.samples)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]

        # with numpy 1.7.0b1 "chaining" was deprecated so let's create
        # check function appropriate for the given numpy version
        _a = np.arange(5)
        __a = _a[:4][:3]
        if __a.base is _a:
            # 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base is base

        elif __a.base.base is _a:
            # prior 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base.base is base

        else:
            raise RuntimeError("Uknown handling of .base by numpy")

        for s in splits:
            # we get slicing all the time
            assert_true(is_the_same_base(s[0].samples))
            assert_true(is_the_same_base(s[1].samples))
        spl = Splitter(attr="partitions", noslicing=True)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
        for s in splits:
            # we no slicing at all
            assert_false(s[0].samples.base is self.data.samples)
            assert_false(s[1].samples.base is self.data.samples)
        nfs = NFoldPartitioner()
        spl = Splitter(attr="partitions")
        splits = [list(spl.generate(p)) for p in nfs.generate(self.data)]
        for i, s in enumerate(splits):
            # training only first and last split
            if i == 0 or i == len(splits) - 1:
                assert_true(is_the_same_base(s[0].samples))
            else:
                assert_true(s[0].samples.base is None)
            # we get slicing all the time
            assert_true(is_the_same_base(s[1].samples))
        step_ds = Dataset(np.random.randn(20, 2), sa={"chunks": np.tile([0, 1], 10)})
        oes = OddEvenPartitioner()
        spl = Splitter(attr="partitions")
        splits = list(oes.generate(step_ds))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is step_ds.samples)
        splits = [list(spl.generate(p)) for p in oes.generate(step_ds)]
        assert_equal(len(splits), 2)
        for s in splits:
            # we get slicing all the time
            assert_true(is_the_same_base(s[0].samples, step_ds.samples))
            assert_true(is_the_same_base(s[1].samples, step_ds.samples))
Beispiel #3
0
def test_product_flatten():
    nsamples = 17
    product_name_values = [('chan', ['C1', 'C2']),
                           ('freq', np.arange(4, 20, 6)),
                           ('time', np.arange(-200, 800, 200))]

    shape = (nsamples, ) + tuple(len(v) for _, v in product_name_values)

    sample_names = ['samp%d' % i for i in xrange(nsamples)]

    # generate random data in four dimensions
    data = np.random.normal(size=shape)
    ds = Dataset(data, sa=dict(sample_names=sample_names))

    # apply flattening to ds
    flattener = ProductFlattenMapper(product_name_values)

    # test I/O (only if h5py is available)
    if externals.exists('h5py'):
        from mvpa2.base.hdf5 import h5save, h5load
        import tempfile
        import os

        fd, testfn = tempfile.mkstemp('mapper.h5py', 'test_product')
        os.close(fd)
        h5save(testfn, flattener)
        flattener = h5load(testfn)
        os.unlink(testfn)

    mds = flattener(ds)

    prod = lambda x: reduce(operator.mul, x)

    # ensure the size is ok
    assert_equal(mds.shape, (nsamples, ) + (prod(shape[1:]), ))

    ndim = len(product_name_values)

    idxs = [range(len(v)) for _, v in product_name_values]
    for si in xrange(nsamples):
        for fi, p in enumerate(itertools.product(*idxs)):
            data_tup = (si, ) + p

            x = mds[si, fi]

            # value should match
            assert_equal(data[data_tup], x.samples[0, 0])

            # indices should match as well
            all_idxs = tuple(x.fa['chan_freq_time_indices'].value.ravel())
            assert_equal(p, all_idxs)

            # values and indices in each dimension should match
            for i, (name, value) in enumerate(product_name_values):
                assert_equal(x.fa[name].value, value[p[i]])
                assert_equal(x.fa[name + '_indices'].value, p[i])

    product_name_values += [('foo', [1, 2, 3])]
    flattener = ProductFlattenMapper(product_name_values)
    assert_raises(ValueError, flattener, ds)
Beispiel #4
0
def test_conditional_attr():
    import copy
    import cPickle
    for node in (TestNodeOnDefault(enable_ca=['test', 'stats']),
                 TestNodeOffDefault(enable_ca=['test', 'stats'])):
        node.ca.test = range(5)
        node.ca.stats = ConfusionMatrix(labels=['one', 'two'])
        node.ca.stats.add(('one', 'two', 'one', 'two'),
                          ('one', 'two', 'two', 'one'))
        node.ca.stats.compute()

        dc_node = copy.deepcopy(node)
        assert_equal(set(node.ca.enabled), set(dc_node.ca.enabled))
        assert (node.ca['test'].enabled)
        assert (node.ca['stats'].enabled)
        assert_array_equal(node.ca['test'].value, dc_node.ca['test'].value)
        assert_array_equal(node.ca['stats'].value.matrix,
                           dc_node.ca['stats'].value.matrix)

        # check whether values survive pickling
        pickled = cPickle.dumps(node)
        up_node = cPickle.loads(pickled)
        assert_array_equal(up_node.ca['test'].value, range(5))
        assert_array_equal(up_node.ca['stats'].value.matrix,
                           node.ca['stats'].value.matrix)
Beispiel #5
0
def test_repeater():
    reps = 4
    r = Repeater(reps, space='OMG')
    dsl = [ds for ds in r.generate(Dataset([0,1]))]
    assert_equal(len(dsl), reps)
    for i, ds in enumerate(dsl):
        assert_equal(ds.a.OMG, i)
Beispiel #6
0
def test_repeater():
    reps = 4
    r = Repeater(reps, space='OMG')
    dsl = [ds for ds in r.generate(Dataset([0, 1]))]
    assert_equal(len(dsl), reps)
    for i, ds in enumerate(dsl):
        assert_equal(ds.a.OMG, i)
Beispiel #7
0
def test_sifter_with_balancing():
    # extended previous test which was already
    # "... somewhat duplicating the doctest"
    ds = Dataset(samples=np.arange(12).reshape((-1, 2)),
                 sa={'chunks':   [ 0 ,  1 ,  2 ,  3 ,  4,   5 ],
                     'targets':  ['c', 'c', 'c', 'p', 'p', 'p']})

    # Without sifter -- just to assure that we do get all of them
    # i.e. 6*5*4*3/(4!) = 15
    par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks')])
    assert_equal(len(list(par.generate(ds))), 15)

    # so we will take 4 chunks out of available 7, but would care only
    # about those partitions where we have balanced number of 'c' and 'p'
    # entries
    assert_raises(ValueError,
                  lambda x: list(Sifter([('targets', dict(wrong=1))]).generate(x)),
                  ds)

    par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks'),
                     Sifter([('partitions', 2),
                             ('targets',
                              dict(uvalues=['c', 'p'],
                                   balanced=True))])
                     ])
    dss = list(par.generate(ds))
    # print [ x[x.sa.partitions==2].sa.targets for x in dss ]
    assert_equal(len(dss), 9)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Beispiel #8
0
def test_sifter_with_balancing():
    # extended previous test which was already
    # "... somewhat duplicating the doctest"
    ds = Dataset(samples=np.arange(12).reshape((-1, 2)),
                 sa={
                     'chunks': [0, 1, 2, 3, 4, 5],
                     'targets': ['c', 'c', 'c', 'p', 'p', 'p']
                 })

    # Without sifter -- just to assure that we do get all of them
    # i.e. 6*5*4*3/(4!) = 15
    par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks')])
    assert_equal(len(list(par.generate(ds))), 15)

    # so we will take 4 chunks out of available 7, but would care only
    # about those partitions where we have balanced number of 'c' and 'p'
    # entries
    assert_raises(
        ValueError,
        lambda x: list(Sifter([('targets', dict(wrong=1))]).generate(x)), ds)

    par = ChainNode([
        NFoldPartitioner(cvtype=4, attr='chunks'),
        Sifter([('partitions', 2),
                ('targets', dict(uvalues=['c', 'p'], balanced=True))])
    ])
    dss = list(par.generate(ds))
    # print [ x[x.sa.partitions==2].sa.targets for x in dss ]
    assert_equal(len(dss), 9)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Beispiel #9
0
def test_remove_invariant_as_a_mapper():
    from mvpa2.featsel.helpers import RangeElementSelector
    from mvpa2.featsel.base import StaticFeatureSelection, SensitivityBasedFeatureSelection
    from mvpa2.testing.datasets import datasets
    from mvpa2.datasets.miscfx import remove_invariant_features

    mapper = SensitivityBasedFeatureSelection(
              lambda x: np.std(x, axis=0),
              RangeElementSelector(lower=0, inclusive=False),
              train_analyzer=False,
              auto_train=True)

    ds = datasets['uni2large'].copy()

    ds.a['mapper'] = StaticFeatureSelection(np.arange(ds.nfeatures))
    ds.fa['index'] = np.arange(ds.nfeatures)
    ds.samples[:, [1, 8]] = 10

    ds_out = mapper(ds)

    # Validate that we are getting the same results as remove_invariant_features
    ds_rifs = remove_invariant_features(ds)
    assert_array_equal(ds_out.samples, ds_rifs.samples)
    assert_array_equal(ds_out.fa.index, ds_rifs.fa.index)

    assert_equal(ds_out.fa.index[1], 2)
    assert_equal(ds_out.fa.index[8], 10)
    def assert_coordinates_almost_equal_modulo_rotation(p_xyz, q_xyz,
                                                        max_difference):
        assert_equal(p_xyz.shape, q_xyz.shape)
        n, three = p_xyz.shape
        assert_equal(three, 3)

        n_pairs_to_test = 50

        get_random_int = lambda: int(random.uniform(0, n))
        get_distance = lambda x, y: np.linalg.norm(x - y)

        # ensure that we test for at least some distances, i.e.
        # that the presence of nans everywhere would not lead to a 'skipped'
        # test
        did_distance_test = False

        # compute some pairwise distances between nodes, and verity these
        # are more or lress the same in p_xyz and q_xyz
        for _ in xrange(n_pairs_to_test):
            a = get_random_int()
            b = get_random_int()

            d_p = get_distance(p_xyz[a], p_xyz[b])
            d_q = get_distance(q_xyz[a], q_xyz[b])

            if not any(np.isnan([d_p, d_q])):
                assert (abs(d_p - d_q) < max_difference)
                did_distance_test = True

        assert (did_distance_test)
Beispiel #11
0
def test_addaxis():
    from mvpa2.mappers.shape import AddAxisMapper
    ds = Dataset(np.arange(24).reshape(2, 3, 4),
                 sa={'testsa': np.arange(2)},
                 fa={'testfa': np.arange(3)})
    ds0 = AddAxisMapper(pos=0)(ds)
    assert_array_equal(ds0.shape, (1,) + ds.shape)
    # sas have extra dimension
    assert_array_equal(ds0.sa.testsa[0], ds.sa.testsa)
    # fas are duplicated
    assert_array_equal(ds0.fa.testfa[0], ds0.fa.testfa[1])
    ds1 = AddAxisMapper(pos=1)(ds)
    assert_array_equal(ds1.shape, (2, 1, 3, 4))
    # same sample attribute
    assert_equal(ds1.sa, ds.sa)
    # fas have extra dimension
    assert_array_equal(ds1.fa.testfa[0], ds.fa.testfa)
    ds2 = AddAxisMapper(pos=2)(ds)
    assert_array_equal(ds2.shape, (2, 3, 1, 4))
    # no change to attribute collections
    assert_equal(ds2.sa, ds.sa)
    assert_equal(ds2.fa, ds.fa)
    # append an axis
    ds3 = AddAxisMapper(pos=3)(ds)
    assert_array_equal(ds3.shape, ds.shape + (1,))
    # reverse indexing
    ds_1 = AddAxisMapper(pos=-1)(ds)
    assert_array_equal(ds3.samples, ds_1.samples)
    assert_equal(ds3.sa, ds_1.sa)
    assert_equal(ds3.fa, ds_1.fa)
    # add multiple axes
    ds4 = AddAxisMapper(pos=4)(ds)
    assert_array_equal(ds4.shape, ds.shape + (1, 1))
Beispiel #12
0
            def _predict(self, ds_):
                # also called for estimating training error
                assert(ds_ is not ds)  # we pass a shallow copy
                assert(len(ds_) < len(ds))
                assert_equal(len(ds_.sa['partitions'].unique), 1)

                return ['c', 'd']
def test_forward_dense_array_mapper():
    mask = np.ones((3, 2), dtype='bool')
    map_ = mask_mapper(mask)

    # test shape reports
    assert_equal(map_.forward1(mask).shape, (6, ))

    # test 1sample mapping
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)),
                       [0, 1, 2, 3, 4, 5])

    # test 4sample mapping
    foursample = map_.forward(np.arange(24).reshape(4, 3, 2))
    assert_array_equal(foursample,
                       [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11],
                        [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]])

    # check incomplete masks
    mask[1, 1] = 0
    map_ = mask_mapper(mask)
    assert_equal(map_.forward1(mask).shape, (5, ))
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)),
                       [0, 1, 2, 4, 5])

    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_.forward, np.arange(4).reshape(2, 2))

    # check fail if neither mask nor shape
    assert_raises(ValueError, mask_mapper)

    # check that a full mask is automatically created when providing shape
    m = mask_mapper(shape=(2, 3, 4))
    mp = m.forward1(np.arange(24).reshape(2, 3, 4))
    assert_array_equal(mp, np.arange(24))
Beispiel #14
0
            def _predict(self, ds_):
                # also called for estimating training error
                assert (ds_ is not ds)  # we pass a shallow copy
                assert (len(ds_) < len(ds))
                assert_equal(len(ds_.sa['partitions'].unique), 1)

                return ['c', 'd']
Beispiel #15
0
def test_sampleslicemapper():
    # this does nothing but Dataset.__getitem__ which is tested elsewhere -- but
    # at least we run it
    ds = datasets['uni2small']
    ssm = SampleSliceMapper(slice(3, 8, 2))
    sds = ssm(ds)
    assert_equal(len(sds), 3)
Beispiel #16
0
def test_repr():
    # this time give mask only by its target length
    sm = StaticFeatureSelection(slice(None), space='myspace')

    # check reproduction
    sm_clone = eval(repr(sm))
    assert_equal(repr(sm_clone), repr(sm))
Beispiel #17
0
def test_corrstability_smoketest(ds):
    if not 'chunks' in ds.sa:
        return
    if len(ds.sa['targets'].unique) > 30:
        # was regression dataset
        return
    # very basic testing since
    cs = CorrStability()
    #ds = datasets['uni2small']
    out = cs(ds)
    assert_equal(out.shape, (ds.nfeatures,))
    ok_(np.all(out >= -1.001))  # it should be a correlation after all
    ok_(np.all(out <= 1.001))

    # and theoretically those nonbogus features should have higher values
    if 'nonbogus_targets' in ds.fa:
        bogus_features = np.array([x==None for x in  ds.fa.nonbogus_targets])
        assert_array_less(np.mean(out[bogus_features]), np.mean(out[~bogus_features]))
    # and if we move targets to alternative location
    ds = ds.copy(deep=True)
    ds.sa['alt'] = ds.T
    ds.sa.pop('targets')
    assert_raises(KeyError, cs, ds)
    cs = CorrStability('alt')
    out_ = cs(ds)
    assert_array_equal(out, out_)
def test_corrstability_smoketest(ds):
    if not 'chunks' in ds.sa:
        return
    if len(ds.sa['targets'].unique) > 30:
        # was regression dataset
        return
    # very basic testing since
    cs = CorrStability()
    #ds = datasets['uni2small']
    out = cs(ds)
    assert_equal(out.shape, (ds.nfeatures, ))
    ok_(np.all(out >= -1.001))  # it should be a correlation after all
    ok_(np.all(out <= 1.001))

    # and theoretically those nonbogus features should have higher values
    if 'nonbogus_targets' in ds.fa:
        bogus_features = np.array([x == None for x in ds.fa.nonbogus_targets])
        assert_array_less(np.mean(out[bogus_features]),
                          np.mean(out[~bogus_features]))
    # and if we move targets to alternative location
    ds = ds.copy(deep=True)
    ds.sa['alt'] = ds.T
    ds.sa.pop('targets')
    assert_raises(KeyError, cs, ds)
    cs = CorrStability('alt')
    out_ = cs(ds)
    assert_array_equal(out, out_)
Beispiel #19
0
def test_addaxis():
    from mvpa2.mappers.shape import AddAxisMapper
    ds = Dataset(np.arange(24).reshape(2, 3, 4),
                 sa={'testsa': np.arange(2)},
                 fa={'testfa': np.arange(3)})
    ds0 = AddAxisMapper(pos=0)(ds)
    assert_array_equal(ds0.shape, (1,) + ds.shape)
    # sas have extra dimension
    assert_array_equal(ds0.sa.testsa[0], ds.sa.testsa)
    # fas are duplicated
    assert_array_equal(ds0.fa.testfa[0], ds0.fa.testfa[1])
    ds1 = AddAxisMapper(pos=1)(ds)
    assert_array_equal(ds1.shape, (2, 1, 3, 4))
    # same sample attribute
    assert_equal(ds1.sa, ds.sa)
    # fas have extra dimension
    assert_array_equal(ds1.fa.testfa[0], ds.fa.testfa)
    ds2 = AddAxisMapper(pos=2)(ds)
    assert_array_equal(ds2.shape, (2, 3, 1, 4))
    # no change to attribute collections
    assert_equal(ds2.sa, ds.sa)
    assert_equal(ds2.fa, ds.fa)
    # append an axis
    ds3 = AddAxisMapper(pos=3)(ds)
    assert_array_equal(ds3.shape, ds.shape + (1,))
    # reverse indexing
    ds_1 = AddAxisMapper(pos= -1)(ds)
    assert_array_equal(ds3.samples, ds_1.samples)
    assert_equal(ds3.sa, ds_1.sa)
    assert_equal(ds3.fa, ds_1.fa)
    # add multiple axes
    ds4 = AddAxisMapper(pos=4)(ds)
    assert_array_equal(ds4.shape, ds.shape + (1, 1))
Beispiel #20
0
def test_repr():
    # this time give mask only by its target length
    sm = StaticFeatureSelection(slice(None), space='myspace')

    # check reproduction
    sm_clone = eval(repr(sm))
    assert_equal(repr(sm_clone), repr(sm))
    def assert_coordinates_almost_equal_modulo_rotation(p_xyz, q_xyz,
                                                        max_difference):
        assert_equal(p_xyz.shape, q_xyz.shape)
        n, three = p_xyz.shape
        assert_equal(three, 3)

        n_pairs_to_test = 50

        get_random_int = lambda: int(random.uniform(0, n))
        get_distance = lambda x, y: np.linalg.norm(x - y)

        # ensure that we test for at least some distances, i.e.
        # that the presence of nans everywhere would not lead to a 'skipped'
        # test
        did_distance_test = False

        # compute some pairwise distances between nodes, and verity these
        # are more or lress the same in p_xyz and q_xyz
        for _ in xrange(n_pairs_to_test):
            a = get_random_int()
            b = get_random_int()

            d_p = get_distance(p_xyz[a], p_xyz[b])
            d_q = get_distance(q_xyz[a], q_xyz[b])

            if not any(np.isnan([d_p, d_q])):
                assert (abs(d_p - d_q) < max_difference)
                did_distance_test = True

        assert (did_distance_test)
Beispiel #22
0
def test_factorialpartitioner_big():
    # just to see that we can cope with relatively large datasets/numbers
    ds = normal_feature_dataset(nlabels=6,
                                perlabel=66,
                                nfeatures=2,
                                nchunks=11)

    # and now let's do factorial partitioner

    def partition(ds_=ds, **kwargs):
        partitioner = FactorialPartitioner(
            partitioner=NFoldPartitioner(attr='targets'),
            attr='chunks',
            **kwargs)
        return [p.sa.partitions for p in partitioner.generate(ds_)]

    # prohibitively large
    # print len(partition(ds))
    t0 = time()
    assert_equal(len(partition(ds, count=2, selection_strategy='first')), 2)
    # Those time limits are really a stretch. on a any reasonable box not too busy
    # should be done in fraction of a second, but allow to catch "naive"
    # implementation
    assert (time() - t0 < 3)

    assert_equal(len(partition(ds, count=2, selection_strategy='random')), 2)
    assert (time() - t0 < 3)
Beispiel #23
0
def test_sampleslicemapper():
    # this does nothing but Dataset.__getitem__ which is tested elsewhere -- but
    # at least we run it
    ds = datasets['uni2small']
    ssm = SampleSliceMapper(slice(3, 8, 2))
    sds = ssm(ds)
    assert_equal(len(sds), 3)
Beispiel #24
0
def test_product_flatten():
    nsamples = 17
    product_name_values = [('chan', ['C1', 'C2']),
                         ('freq', np.arange(4, 20, 6)),
                         ('time', np.arange(-200, 800, 200))]

    shape = (nsamples,) + tuple(len(v) for _, v in product_name_values)

    sample_names = ['samp%d' % i for i in xrange(nsamples)]

    # generate random data in four dimensions
    data = np.random.normal(size=shape)
    ds = Dataset(data, sa=dict(sample_names=sample_names))

    # apply flattening to ds
    flattener = ProductFlattenMapper(product_name_values)

    # test I/O (only if h5py is available)
    if externals.exists('h5py'):
        from mvpa2.base.hdf5 import h5save, h5load
        import tempfile
        import os

        _, testfn = tempfile.mkstemp('mapper.h5py', 'test_product')
        h5save(testfn, flattener)
        flattener = h5load(testfn)
        os.unlink(testfn)

    mds = flattener(ds)

    prod = lambda x:reduce(operator.mul, x)

    # ensure the size is ok
    assert_equal(mds.shape, (nsamples,) + (prod(shape[1:]),))

    ndim = len(product_name_values)

    idxs = [range(len(v)) for _, v in product_name_values]
    for si in xrange(nsamples):
        for fi, p in enumerate(itertools.product(*idxs)):
            data_tup = (si,) + p

            x = mds[si, fi]

            # value should match
            assert_equal(data[data_tup], x.samples[0, 0])

            # indices should match as well
            all_idxs = tuple(x.fa['chan_freq_time_indices'].value.ravel())
            assert_equal(p, all_idxs)

            # values and indices in each dimension should match
            for i, (name, value) in enumerate(product_name_values):
                assert_equal(x.fa[name].value, value[p[i]])
                assert_equal(x.fa[name + '_indices'].value, p[i])

    product_name_values += [('foo', [1, 2, 3])]
    flattener = ProductFlattenMapper(product_name_values)
    assert_raises(ValueError, flattener, ds)
Beispiel #25
0
    def test_slicing(self):
        hs = HalfPartitioner()
        spl = Splitter(attr='partitions')
        splits = list(hs.generate(self.data))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is self.data.samples)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]

        # with numpy 1.7.0b1 "chaining" was deprecated so let's create
        # check function appropriate for the given numpy version
        _a = np.arange(5)
        __a = _a[:4][:3]
        if __a.base is _a:
            # 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base is base
        elif __a.base.base is _a:
            # prior 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base.base is base
        else:
            raise RuntimeError("Uknown handling of .base by numpy")

        for s in splits:
            # we get slicing all the time
            assert_true(is_the_same_base(s[0].samples))
            assert_true(is_the_same_base(s[1].samples))
        spl = Splitter(attr='partitions', noslicing=True)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
        for s in splits:
            # we no slicing at all
            assert_false(s[0].samples.base is self.data.samples)
            assert_false(s[1].samples.base is self.data.samples)
        nfs = NFoldPartitioner()
        spl = Splitter(attr='partitions')
        splits = [list(spl.generate(p)) for p in nfs.generate(self.data)]
        for i, s in enumerate(splits):
            # training only first and last split
            if i == 0 or i == len(splits) - 1:
                assert_true(is_the_same_base(s[0].samples))
            else:
                assert_true(s[0].samples.base is None)
            # we get slicing all the time
            assert_true(s[1].samples.base.base is self.data.samples)
        step_ds = Dataset(np.random.randn(20, 2),
                          sa={'chunks': np.tile([0, 1], 10)})
        oes = OddEvenPartitioner()
        spl = Splitter(attr='partitions')
        splits = list(oes.generate(step_ds))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is step_ds.samples)
        splits = [list(spl.generate(p)) for p in oes.generate(step_ds)]
        assert_equal(len(splits), 2)
        for s in splits:
            # we get slicing all the time
            assert_true(s[0].samples.base.base is step_ds.samples)
            assert_true(s[1].samples.base.base is step_ds.samples)
Beispiel #26
0
def test_strip_boundary():
    ds = datasets['hollow']
    ds.sa['btest'] = np.repeat([0, 1], 20)
    sn = StripBoundariesSamples('btest', 1, 2)
    sds = sn(ds)
    assert_equal(len(sds), len(ds) - 3)
    for i in [19, 20, 21]:
        assert_false(i in sds.samples.sid)
Beispiel #27
0
def test_strip_boundary():
    ds = datasets['hollow']
    ds.sa['btest'] = np.repeat([0, 1], 20)
    sn = StripBoundariesSamples('btest', 1, 2)
    sds = sn(ds)
    assert_equal(len(sds), len(ds) - 3)
    for i in [19, 20, 21]:
        assert_false(i in sds.samples.sid)
Beispiel #28
0
def test_eep_bin():
    eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin'))

    assert_equal(eb.nchannels, 32)
    assert_equal(eb.nsamples, 2)
    assert_equal(eb.ntimepoints, 4)
    assert_true(eb.t0 - eb.dt < 0.00000001)
    assert_equal(len(eb.channels), 32)
    assert_equal(eb.data.shape, (2, 32, 4))
Beispiel #29
0
def test_eep_bin():
    eb = EEPBin(os.path.join(pymvpa_dataroot, 'eep.bin'))

    assert_equal(eb.nchannels, 32)
    assert_equal(eb.nsamples, 2)
    assert_equal(eb.ntimepoints, 4)
    assert_true(eb.t0 - eb.dt < 0.00000001)
    assert_equal(len(eb.channels), 32)
    assert_equal(eb.data.shape, (2, 32, 4))
Beispiel #30
0
 def test_nfold_random_counted_selection_partitioner_huge(self):
     # Just test that it completes in a reasonable time and does
     # not blow up as if would do if it was not limited by count
     kwargs = dict(count=10)
     ds = dataset_wizard(np.arange(1000).reshape((-1, 1)), targets=range(1000), chunks=range(500) * 2)
     split_partitions_random = [
         tuple(x.sa.partitions) for x in NFoldPartitioner(100, selection_strategy="random", **kwargs).generate(ds)
     ]
     assert_equal(len(split_partitions_random), 10)  # we get just 10
Beispiel #31
0
def test_searchlight_errors_per_trial():
    # To make sure that searchlight can return error/accuracy per trial
    from mvpa2.clfs.gnb import GNB
    from mvpa2.generators.partition import OddEvenPartitioner
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
    from mvpa2.testing.datasets import datasets
    from mvpa2.misc.errorfx import prediction_target_matches

    dataset = datasets['3dsmall'].copy()
    # randomly permute samples so we break any random correspondence
    # to strengthen tests below
    sample_idx = np.arange(len(dataset))
    dataset = dataset[np.random.permutation(sample_idx)]

    dataset.sa.targets = ['L%d' % l for l in dataset.sa.targets]
    dataset.fa['voxel_indices'] = dataset.fa.myspace
    sample_clf = GNB()              # fast and deterministic

    part = OddEvenPartitioner()
    # only do partial to save time
    cv = CrossValidation(sample_clf, part, errorfx=None) #prediction_target_matches)
    # Just to compare error
    cv_error = CrossValidation(sample_clf, part)

    # Large searchlight radius so we get entire ROI, 2 centers just to make sure
    # that all stacking works correctly
    sl = sphere_searchlight(cv, radius=10, center_ids=[0, 1])
    results = sl(dataset)

    sl_gnb = sphere_gnbsearchlight(sample_clf, part, radius=10, errorfx=None,
                                   center_ids=[0, 1])
    results_gnbsl = sl_gnb(dataset)

    # inspect both results
    # verify that partitioning was done correctly
    partitions = list(part.generate(dataset))
    for res in (results, results_gnbsl):
        assert('targets' in res.sa.keys())  # should carry targets
        assert('cvfolds' in res.sa.keys())  # should carry cvfolds
        for ipart in xrange(len(partitions)):
            assert_array_equal(dataset[partitions[ipart].sa.partitions == 2].targets,
                               res.sa.targets[res.sa.cvfolds == ipart])

    assert_datasets_equal(results, results_gnbsl)

    # one "accuracy" per each trial
    assert_equal(results.shape, (len(dataset), 2))
    # with accuracies the same in both searchlights since the same
    # features were to be selected in both cases due too large radii
    errors_dataset = cv(dataset)
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 0])
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 1])
    # and error matching (up to precision) the one if we run with default error function
    assert_array_almost_equal(np.mean(results.targets[:, None] != results.samples, axis=0)[0],
                              np.mean(cv_error(dataset)))
Beispiel #32
0
def test_sphere():
    # test sphere initialization
    s = ne.Sphere(1)
    center0 = (0, 0, 0)
    center1 = (1, 1, 1)
    assert_equal(len(s(center0)), 7)
    target = array([array([-1,  0,  0]),
              array([ 0, -1,  0]),
              array([ 0,  0, -1]),
              array([0, 0, 0]),
              array([0, 0, 1]),
              array([0, 1, 0]),
              array([1, 0, 0])])
    # test of internals -- no recomputation of increments should be done
    prev_increments = s._increments
    assert_array_equal(s(center0), target)
    ok_(prev_increments is s._increments)
    # query lower dimensionality
    _ = s((0, 0))
    ok_(not prev_increments is s._increments)

    # test Sphere call
    target = [array([0, 1, 1]),
              array([1, 0, 1]),
              array([1, 1, 0]),
              array([1, 1, 1]),
              array([1, 1, 2]),
              array([1, 2, 1]),
              array([2, 1, 1])]
    res = s(center1)
    assert_array_equal(array(res), target)
    # They all should be tuples
    ok_(np.all([isinstance(x, tuple) for x in res]))

    # test for larger diameter
    s = ne.Sphere(4)
    assert_equal(len(s(center1)), 257)

    # test extent keyword
    #s = ne.Sphere(4,extent=(1,1,1))
    #assert_array_equal(array(s((0,0,0))), array([[0,0,0]]))

    # test Errors during initialisation and call
    #assert_raises(ValueError, ne.Sphere, 2)
    #assert_raises(ValueError, ne.Sphere, 1.0)

    # no longer extent available
    assert_raises(TypeError, ne.Sphere, 1, extent=(1))
    assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0))

    s = ne.Sphere(1)
    #assert_raises(ValueError, s, (1))
    if __debug__:
        # No float coordinates allowed for now...
        # XXX might like to change that ;)
        # 
        assert_raises(ValueError, s, (1.0, 1.0, 1.0))
Beispiel #33
0
def test_attrpermute():
    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))
Beispiel #34
0
 def test_nfold_random_counted_selection_partitioner_huge(self):
     # Just test that it completes in a reasonable time and does
     # not blow up as if would do if it was not limited by count
     kwargs = dict(count=10)
     ds = dataset_wizard(np.arange(1000).reshape((-1, 1)),
                         targets=range(1000),
                         chunks=range(500) * 2)
     split_partitions_random = [
         tuple(x.sa.partitions) for x in NFoldPartitioner(
             100, selection_strategy='random', **kwargs).generate(ds)
     ]
     assert_equal(len(split_partitions_random), 10)  # we get just 10
Beispiel #35
0
    def test_read_fsl_design(self):
        fname = os.path.join(pymvpa_dataroot,
                             'sample_design.fsf')
        # use our function
        design = read_fsl_design(fname)
        # and just load manually to see either we match fine
        set_lines = [x for x in open(fname).readlines()
                     if x.startswith('set ')]
        assert_equal(len(set_lines), len(design))

        # figure out which one is missing
        """TODO: would require the same special treatment for _files fields
Beispiel #36
0
def test_glmnet_r_sensitivities():
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_train=False)(None)

    assert_equal(sens.shape, (1, data.nfeatures))
Beispiel #37
0
def test_glmnet_r_sensitivities():
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_train=False)(None)

    assert_equal(sens.shape, (1, data.nfeatures))
Beispiel #38
0
    def test_read_fsl_design(self):
        fname = os.path.join(pymvpa_dataroot,
                             'sample_design.fsf')
        # use our function
        design = read_fsl_design(fname)
        # and just load manually to see either we match fine
        set_lines = [x for x in open(fname).readlines()
                     if x.startswith('set ')]
        assert_equal(len(set_lines), len(design))

        # figure out which one is missing
        """TODO: would require the same special treatment for _files fields
Beispiel #39
0
def test_glmnet_c_sensitivities():
    data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_train=False)(None)

    #failUnless(sens.shape == (data.nfeatures,))
    assert_equal(sens.shape, (len(data.UT), data.nfeatures))
Beispiel #40
0
def test_glmnet_c_sensitivities():
    data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.train(data)

    # now ask for the sensitivities WITHOUT having to pass the dataset
    # again
    sens = clf.get_sensitivity_analyzer(force_train=False)(None)

    #failUnless(sens.shape == (data.nfeatures,))
    assert_equal(sens.shape, (len(data.UT), data.nfeatures))
Beispiel #41
0
    def test_simple_n_minus_one_cv(self):
        data = get_mv_pattern(3)
        data.init_origids("samples")

        self.assertTrue(data.nsamples == 120)
        self.assertTrue(data.nfeatures == 2)
        self.assertTrue((data.sa.targets == [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0] * 6).all())
        self.assertTrue((data.sa.chunks == [k for k in range(1, 7) for i in range(20)]).all())
        assert_equal(len(np.unique(data.sa.origids)), data.nsamples)

        cv = CrossValidation(sample_clf_nl, NFoldPartitioner(), enable_ca=["stats", "training_stats"])
        #                               'samples_error'])

        results = cv(data)
        self.assertTrue((results.samples < 0.2).all() and (results.samples >= 0.0).all())
Beispiel #42
0
def test_attrmap_repr():
    assert_equal(repr(AttributeMap()), "AttributeMap()")
    assert_equal(repr(AttributeMap(dict(a=2, b=1))),
                 "AttributeMap({'a': 2, 'b': 1})")
    assert_equal(repr(AttributeMap(dict(a=2, b=1), mapnumeric=True)),
                 "AttributeMap({'a': 2, 'b': 1}, mapnumeric=True)")
    assert_equal(repr(AttributeMap(dict(a=2, b=1), mapnumeric=True, collisions_resolution='tuple')),
                 "AttributeMap({'a': 2, 'b': 1}, mapnumeric=True, collisions_resolution='tuple')")
Beispiel #43
0
def test_mean_tpr():
    # Let's test now on some disbalanced sets
    assert_raises(ValueError, mean_tpr, [1], [])
    assert_raises(ValueError, mean_tpr, [], [1])
    assert_raises(ValueError, mean_tpr, [], [])

    # now interesting one where there were no target when it was in predicted
    assert_raises(ValueError, mean_tpr, [1], [0])
    assert_raises(ValueError, mean_tpr, [0, 1], [0, 0])
    # but it should be ok to have some targets not present in prediction
    assert_equal(mean_tpr([0, 0], [0, 1]), .5)
    # the same regardless how many samples in 0-class, if all misclassified
    # (winner by # of samples takes all)
    assert_equal(mean_tpr([0, 0, 0], [0, 0, 1]), .5)
    # whenever mean-accuracy would be different
    assert_almost_equal(mean_match_accuracy([0, 0, 0], [0, 0, 1]), 2/3.)
Beispiel #44
0
def test_mean_tpr():
    # Let's test now on some disbalanced sets
    assert_raises(ValueError, mean_tpr, [1], [])
    assert_raises(ValueError, mean_tpr, [], [1])
    assert_raises(ValueError, mean_tpr, [], [])

    # now interesting one where there were no target when it was in predicted
    assert_raises(ValueError, mean_tpr, [1], [0])
    assert_raises(ValueError, mean_tpr, [0, 1], [0, 0])
    # but it should be ok to have some targets not present in prediction
    assert_equal(mean_tpr([0, 0], [0, 1]), .5)
    # the same regardless how many samples in 0-class, if all misclassified
    # (winner by # of samples takes all)
    assert_equal(mean_tpr([0, 0, 0], [0, 0, 1]), .5)
    # whenever mean-accuracy would be different
    assert_almost_equal(mean_match_accuracy([0, 0, 0], [0, 0, 1]), 2 / 3.)
Beispiel #45
0
def test_static_reverse_doesnt_work_after_feature_selection_tuneup_1():
    ds_orig = datasets['uni2small'].copy()  # doesn't matter which

    m = StaticFeatureSelection(np.arange(4))
    m.train(ds_orig)
    ds = ds_orig.get_mapped(m)
    ds0_rev = ds.a.mapper.reverse1(ds.samples[0])  # should work
    assert_equal(ds0_rev.shape, (ds_orig.nfeatures,))

    # direct feature selection
    ds_ = ds[:, [0, 2]]
    # should work but doesn't due to
    # RuntimeError: Cannot reverse-map data since the original data shape is unknown. Either set `dshape` in the constructor, or call train().
    ds0_rev_ = ds_.a.mapper.reverse1(ds_.samples[0])
    #ds0_rev_ = _verified_reverse1(ds_.a.mapper, ds_.samples[0])
    assert_equal(ds0_rev_.shape, (ds_orig.nfeatures,))
Beispiel #46
0
def test_distances():
    a = np.array([3,8])
    b = np.array([6,4])
    # test distances or yarik recalls unit testing ;)
    assert_equal(cartesian_distance(a, b), 5.0)
    assert_equal(manhattan_distance(a, b), 7)
    assert_equal(absmin_distance(a, b), 4)
    # test that fixing typo didn't impact results
    assert_equal(manhattan_distance(a, b), manhatten_distance(a, b))
Beispiel #47
0
def test_distances():
    a = np.array([3, 8])
    b = np.array([6, 4])
    # test distances or yarik recalls unit testing ;)
    assert_equal(cartesian_distance(a, b), 5.0)
    assert_equal(manhattan_distance(a, b), 7)
    assert_equal(absmin_distance(a, b), 4)
    # test that fixing typo didn't impact results
    assert_equal(manhattan_distance(a, b), manhatten_distance(a, b))
Beispiel #48
0
def test_attrmap_repr():
    assert_equal(repr(AttributeMap()), "AttributeMap()")
    d = dict(a=2, b=1)
    assert_equal(repr(AttributeMap(d)),
                 "AttributeMap(%r)" % (d,))
    assert_equal(repr(AttributeMap(dict(a=2, b=1), mapnumeric=True)),
                 "AttributeMap(%r, mapnumeric=True)" % (d,))
    assert_equal(repr(AttributeMap(dict(a=2, b=1), mapnumeric=True, collisions_resolution='tuple')),
                 "AttributeMap(%r, mapnumeric=True, collisions_resolution='tuple')" % (d,))
Beispiel #49
0
def test_sifter():
    # somewhat duplicating the doctest
    ds = Dataset(samples=np.arange(8).reshape((4,2)),
                 sa={'chunks':   [ 0 ,  1 ,  2 ,  3 ],
                     'targets':  ['c', 'c', 'p', 'p']})
    par = ChainNode([NFoldPartitioner(cvtype=2, attr='chunks'),
                     Sifter([('partitions', 2),
                             ('targets', ['c', 'p'])])
                     ])
    dss = list(par.generate(ds))
    assert_equal(len(dss), 4)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Beispiel #50
0
    def test_discarded_boundaries(self):
        ds = datasets["hollow"]
        # four runs
        ds.sa["chunks"] = np.repeat(np.arange(4), 10)
        # do odd even splitting for lots of boundaries in few splits
        part = ChainNode([OddEvenPartitioner(), StripBoundariesSamples("chunks", 1, 2)])

        parts = [d.samples.sid for d in part.generate(ds)]

        # both dataset should have the same samples, because the boundaries are
        # identical and the same sample should be stripped
        assert_array_equal(parts[0], parts[1])

        # we strip 3 samples per boundary
        assert_equal(len(parts[0]), len(ds) - (3 * 3))

        for i in [9, 10, 11, 19, 20, 21, 29, 30, 31]:
            assert_false(i in parts[0])
Beispiel #51
0
def test_sifter():
    # somewhat duplicating the doctest
    ds = Dataset(samples=np.arange(8).reshape((4, 2)), sa={"chunks": [0, 1, 2, 3], "targets": ["c", "c", "p", "p"]})
    for sift_targets_definition in (["c", "p"], dict(uvalues=["c", "p"])):
        par = ChainNode(
            [
                NFoldPartitioner(cvtype=2, attr="chunks"),
                Sifter([("partitions", 2), ("targets", sift_targets_definition)]),
            ]
        )
        dss = list(par.generate(ds))
        assert_equal(len(dss), 4)
        for ds_ in dss:
            testing = ds[ds_.sa.partitions == 2]
            assert_array_equal(np.unique(testing.sa.targets), ["c", "p"])
            # and we still have both targets  present in training
            training = ds[ds_.sa.partitions == 1]
            assert_array_equal(np.unique(training.sa.targets), ["c", "p"])
Beispiel #52
0
 def test_slicing(self):
     hs = HalfPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(hs.generate(self.data))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is self.data.samples)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is self.data.samples)
         assert_true(s[1].samples.base.base is self.data.samples)
     spl = Splitter(attr='partitions', noslicing=True)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we no slicing at all
         assert_false(s[0].samples.base is self.data.samples)
         assert_false(s[1].samples.base is self.data.samples)
     nfs = NFoldPartitioner()
     spl = Splitter(attr='partitions')
     splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
     for i, s in enumerate(splits):
         # training only first and last split
         if i == 0 or i == len(splits) - 1:
             assert_true(s[0].samples.base.base is self.data.samples)
         else:
             assert_true(s[0].samples.base is None)
         # we get slicing all the time
         assert_true(s[1].samples.base.base is self.data.samples)
     step_ds = Dataset(np.random.randn(20,2),
                       sa={'chunks': np.tile([0,1], 10)})
     oes = OddEvenPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(oes.generate(step_ds))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is step_ds.samples)
     splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ]
     assert_equal(len(splits), 2)
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is step_ds.samples)
         assert_true(s[1].samples.base.base is step_ds.samples)
Beispiel #53
0
    def test_discarded_boundaries(self):
        ds = datasets['hollow']
        # four runs
        ds.sa['chunks'] = np.repeat(np.arange(4), 10)
        # do odd even splitting for lots of boundaries in few splits
        part = ChainNode([OddEvenPartitioner(),
                          StripBoundariesSamples('chunks', 1, 2)])

        parts = [d.samples.sid for d in part.generate(ds)]

        # both dataset should have the same samples, because the boundaries are
        # identical and the same sample should be stripped
        assert_array_equal(parts[0], parts[1])

        # we strip 3 samples per boundary
        assert_equal(len(parts[0]), len(ds) - (3 * 3))

        for i in [9, 10, 11, 19, 20, 21, 29, 30, 31]:
            assert_false(i in parts[0])
Beispiel #54
0
def test_sifter():
    # somewhat duplicating the doctest
    ds = Dataset(samples=np.arange(8).reshape((4,2)),
                 sa={'chunks':   [ 0 ,  1 ,  2 ,  3 ],
                     'targets':  ['c', 'c', 'p', 'p']})
    for sift_targets_definition in (['c', 'p'],
                                    dict(uvalues=['c', 'p'])):
        par = ChainNode([NFoldPartitioner(cvtype=2, attr='chunks'),
                         Sifter([('partitions', 2),
                                 ('targets', sift_targets_definition)])
                         ])
        dss = list(par.generate(ds))
        assert_equal(len(dss), 4)
        for ds_ in dss:
            testing = ds[ds_.sa.partitions == 2]
            assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
            # and we still have both targets  present in training
            training = ds[ds_.sa.partitions == 1]
            assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Beispiel #55
0
def test_mean_tpr_balanced():
    # in case of the balanced sets we should expect to match mean_match_accuracy
    for nclass in range(2, 4):
        for nsample in range(1, 3):
            target = np.repeat(np.arange(nclass), nsample)
            # perfect match
            assert_equal(mean_match_accuracy(target, target), 1.0)
            assert_equal(mean_tpr(target, target), 1.0)
            # perfect mismatch -- shift by nsample, so no target matches
            estimate = np.roll(target, nsample)
            assert_equal(mean_match_accuracy(target, estimate), 0)
            assert_equal(mean_tpr(target, estimate), 0)
            # do few permutations and see if both match
            for i in range(5):
                np.random.shuffle(estimate)
                assert_equal(mean_tpr(target, estimate),
                             mean_match_accuracy(target, estimate))
                assert_almost_equal(mean_tpr(target, estimate),
                                    1 - mean_fnr(target, estimate))
Beispiel #56
0
def test_attrmap_repr():
    assert_equal(repr(AttributeMap()), "AttributeMap()")
    d = dict(a=2, b=1)
    assert_equal(repr(AttributeMap(d)), "AttributeMap(%r)" % (d, ))
    assert_equal(repr(AttributeMap(dict(a=2, b=1), mapnumeric=True)),
                 "AttributeMap(%r, mapnumeric=True)" % (d, ))
    assert_equal(
        repr(
            AttributeMap(dict(a=2, b=1),
                         mapnumeric=True,
                         collisions_resolution='tuple')),
        "AttributeMap(%r, mapnumeric=True, collisions_resolution='tuple')" %
        (d, ))
    def _assert_rotation_maps_vector(r, x, y):
        # rotation must be 3x3 numpy array
        assert_equal(r.shape, (3, 3))
        assert_is_instance(r, np.ndarray)

        # rotation applied to x must yield direction of y
        # (modulo rounding errors)
        def normed(v):
            n_v = np.linalg.norm(v)

            return 0 if n_v == 0 else v / n_v

        rx = r.dot(x)

        rx_normed = normed(rx)
        y_normed = normed(y)
        assert_vector_direction_almost_equal(rx_normed, y_normed)

        # since it is a rotation, the result must have the same
        # L2 norm as the input
        assert_almost_equal(np.linalg.norm(x), np.linalg.norm(rx))