Esempio n. 1
0
def test_permute_chunks():

    def is_sorted(x):
        return np.array_equal(np.sort(x), x)

    ds = give_data()
    # change targets labels
    # there is no target labels permuting within chunks,
    # assure = True would be error
    ds.sa['targets'] = range(len(ds.sa.targets))
    permutation = AttributePermutator(attr='targets',
                                      chunk_attr='chunks',
                                      strategy='chunks',
                                      assure=True)

    pds = permutation(ds)

    assert_false(is_sorted(pds.sa.targets))
    assert_true(np.array_equal(pds.samples, ds.samples))
    for chunk_id in np.unique(pds.sa.chunks):
        chunk_ds = pds[pds.sa.chunks == chunk_id]
        assert_true(is_sorted(chunk_ds.sa.targets))
        
    permutation = AttributePermutator(attr='targets',
                                      strategy='chunks')
    assert_raises(ValueError, permutation, ds)
Esempio n. 2
0
def test_basic_collectable():
    c = Collectable()

    # empty by default
    assert_equal(c.name, None)
    assert_equal(c.value, None)
    assert_equal(c.__doc__, None)

    # late assignment
    c.name = 'somename'
    c.value = 12345
    assert_equal(c.name, 'somename')
    assert_equal(c.value, 12345)

    # immediate content
    c = Collectable('value', 'myname', "This is a test")
    assert_equal(c.name, 'myname')
    assert_equal(c.value, 'value')
    assert_equal(c.__doc__, "This is a test")
    assert_equal(str(c), 'myname')

    # repr
    e = eval(repr(c))
    assert_equal(e.name, 'myname')
    assert_equal(e.value, 'value')
    assert_equal(e.__doc__, "This is a test")

    # shallow copy does not create a view of value array
    c.value = np.arange(5)
    d = copy.copy(c)
    assert_false(d.value.base is c.value)

    # names starting with _ are not allowed
    assert_raises(ValueError, c._set_name, "_underscore")
Esempio n. 3
0
    def test_slicing(self):
        hs = HalfPartitioner()
        spl = Splitter(attr="partitions")
        splits = list(hs.generate(self.data))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is self.data.samples)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]

        # with numpy 1.7.0b1 "chaining" was deprecated so let's create
        # check function appropriate for the given numpy version
        _a = np.arange(5)
        __a = _a[:4][:3]
        if __a.base is _a:
            # 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base is base

        elif __a.base.base is _a:
            # prior 1.7.0b1
            def is_the_same_base(x, base=self.data.samples):
                return x.base.base is base

        else:
            raise RuntimeError("Uknown handling of .base by numpy")

        for s in splits:
            # we get slicing all the time
            assert_true(is_the_same_base(s[0].samples))
            assert_true(is_the_same_base(s[1].samples))
        spl = Splitter(attr="partitions", noslicing=True)
        splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
        for s in splits:
            # we no slicing at all
            assert_false(s[0].samples.base is self.data.samples)
            assert_false(s[1].samples.base is self.data.samples)
        nfs = NFoldPartitioner()
        spl = Splitter(attr="partitions")
        splits = [list(spl.generate(p)) for p in nfs.generate(self.data)]
        for i, s in enumerate(splits):
            # training only first and last split
            if i == 0 or i == len(splits) - 1:
                assert_true(is_the_same_base(s[0].samples))
            else:
                assert_true(s[0].samples.base is None)
            # we get slicing all the time
            assert_true(is_the_same_base(s[1].samples))
        step_ds = Dataset(np.random.randn(20, 2), sa={"chunks": np.tile([0, 1], 10)})
        oes = OddEvenPartitioner()
        spl = Splitter(attr="partitions")
        splits = list(oes.generate(step_ds))
        for s in splits:
            # partitioned dataset shared the data
            assert_true(s.samples.base is step_ds.samples)
        splits = [list(spl.generate(p)) for p in oes.generate(step_ds)]
        assert_equal(len(splits), 2)
        for s in splits:
            # we get slicing all the time
            assert_true(is_the_same_base(s[0].samples, step_ds.samples))
            assert_true(is_the_same_base(s[1].samples, step_ds.samples))
Esempio n. 4
0
def test_strip_boundary():
    ds = datasets['hollow']
    ds.sa['btest'] = np.repeat([0, 1], 20)
    sn = StripBoundariesSamples('btest', 1, 2)
    sds = sn(ds)
    assert_equal(len(sds), len(ds) - 3)
    for i in [19, 20, 21]:
        assert_false(i in sds.samples.sid)
Esempio n. 5
0
def test_balancer():
    ds = give_data()
    # only mark the selection in an attribute
    bal = Balancer()
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    assert_true(ds.samples is res.samples.base)
    # should kick out 2 samples in each chunk of 10
    assert_almost_equal(np.mean(res.sa.balanced_set), 0.8)
    # same as above, but actually apply the selection
    bal = Balancer(apply_selection=True, count=5)
    # just run it once
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    # should kick out 2 samples in each chunk of 10
    assert_equal(len(res), int(0.8 * len(ds)))
    # now use it as a generator
    dses = list(bal.generate(ds))
    assert_equal(len(dses), 5)
    # with limit
    bal = Balancer(limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(res.sa['chunks'].unique, (3,))
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [2] * 4)
    # same but include all offlimit samples
    bal = Balancer(limit={'chunks': 3}, include_offlimit=True,
                   apply_selection=True)
    res = bal(ds)
    assert_array_equal(res.sa['chunks'].unique, range(10))
    # chunk three still balanced, but the rest is not, i.e. all samples included
    assert_equal(get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(),
                 [2] * 4)
    assert_equal(get_nelements_per_value(res.sa.chunks).values(),
                 [10, 10, 10, 8, 10, 10, 10, 10, 10, 10])
    # fixed amount
    bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [1] * 4)
    # fraction
    bal = Balancer(amount=0.499, limit=None, apply_selection=True)
    res = bal(ds)
    assert_array_equal(
            np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5),
            np.array(get_nelements_per_value(res.sa.targets).values()))
    # check on feature attribute
    ds.fa['one'] = np.tile([1,2], 5)
    ds.fa['chk'] = np.repeat([1,2], 5)
    bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.fa.one).values(),
                 [4] * 2)
Esempio n. 6
0
 def test_reprs(self):
     # very very basic test to see that there is no errors in reprs
     # of partitioners
     import mvpa2.generators.partition as mgp
     for sclass in (x for x in dir(mgp) if x.endswith('Partitioner')):
         args = (1,)
         if sclass == 'ExcludeTargetsCombinationsPartitioner':
             args += (1,1)
         pclass = getattr(mgp, sclass)
         r = repr(pclass(*args))
         assert_false('ERROR' in r)
Esempio n. 7
0
def test_cosmo_do_not_store_unsupported_datatype():
    ds = Dataset(np.zeros((0, 0)))

    class ArbitraryClass(object):
        pass

    ds.a['unused'] = ArbitraryClass()
    c = cosmo.map2cosmo(ds)
    assert_false('a' in c.keys())

    ds.a['foo'] = np.zeros((1,))
    c = cosmo.map2cosmo(ds)
    assert_true('a' in c.keys())
Esempio n. 8
0
def test_splitter():
    ds = give_data()
    # split with defaults
    spl1 = Splitter('chunks')
    assert_raises(NotImplementedError, spl1, ds)

    splits = list(spl1.generate(ds))
    assert_equal(len(splits), len(ds.sa['chunks'].unique))

    for split in splits:
        # it should have perform basic slicing!
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.sa['chunks'].unique), 1)
        assert_true('lastsplit' in split.a)
    assert_true(splits[-1].a.lastsplit)

    # now again, more customized
    spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
                   noslicing=True)
    splits = list(spl2.generate(ds))
    assert_equal(len(splits), 4)
    for split in splits:
        # it should NOT have perform basic slicing!
        assert_false(split.samples.base is ds.samples)
        assert_equal(len(split.sa['targets'].unique), 1)
        assert_equal(len(split.sa['chunks'].unique), 10)
    assert_true(splits[-1].a.lastsplit)

    # two should be identical
    assert_array_equal(splits[1].samples, splits[2].samples)

    # now go wild and split by feature attribute
    ds.fa['roi'] = np.repeat([0,1], 5)
    # splitter should auto-detect that this is a feature attribute
    spl3 = Splitter('roi')
    splits = list(spl3.generate(ds))
    assert_equal(len(splits), 2)
    for split in splits:
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.fa['roi'].unique), 1)
        assert_equal(split.shape, (100, 5))

    # and finally test chained splitters
    cspl = ChainNode([spl2, spl3, spl1])
    splits = list(cspl.generate(ds))
    # 4 target splits and 2 roi splits each and 10 chunks each
    assert_equal(len(splits), 80)
Esempio n. 9
0
    def test_discarded_boundaries(self):
        ds = datasets["hollow"]
        # four runs
        ds.sa["chunks"] = np.repeat(np.arange(4), 10)
        # do odd even splitting for lots of boundaries in few splits
        part = ChainNode([OddEvenPartitioner(), StripBoundariesSamples("chunks", 1, 2)])

        parts = [d.samples.sid for d in part.generate(ds)]

        # both dataset should have the same samples, because the boundaries are
        # identical and the same sample should be stripped
        assert_array_equal(parts[0], parts[1])

        # we strip 3 samples per boundary
        assert_equal(len(parts[0]), len(ds) - (3 * 3))

        for i in [9, 10, 11, 19, 20, 21, 29, 30, 31]:
            assert_false(i in parts[0])
Esempio n. 10
0
 def test_slicing(self):
     hs = HalfPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(hs.generate(self.data))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is self.data.samples)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is self.data.samples)
         assert_true(s[1].samples.base.base is self.data.samples)
     spl = Splitter(attr='partitions', noslicing=True)
     splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
     for s in splits:
         # we no slicing at all
         assert_false(s[0].samples.base is self.data.samples)
         assert_false(s[1].samples.base is self.data.samples)
     nfs = NFoldPartitioner()
     spl = Splitter(attr='partitions')
     splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
     for i, s in enumerate(splits):
         # training only first and last split
         if i == 0 or i == len(splits) - 1:
             assert_true(s[0].samples.base.base is self.data.samples)
         else:
             assert_true(s[0].samples.base is None)
         # we get slicing all the time
         assert_true(s[1].samples.base.base is self.data.samples)
     step_ds = Dataset(np.random.randn(20,2),
                       sa={'chunks': np.tile([0,1], 10)})
     oes = OddEvenPartitioner()
     spl = Splitter(attr='partitions')
     splits = list(oes.generate(step_ds))
     for s in splits:
         # partitioned dataset shared the data
         assert_true(s.samples.base is step_ds.samples)
     splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ]
     assert_equal(len(splits), 2)
     for s in splits:
         # we get slicing all the time
         assert_true(s[0].samples.base.base is step_ds.samples)
         assert_true(s[1].samples.base.base is step_ds.samples)
Esempio n. 11
0
def test_permute_chunks():
    def is_sorted(x):
        return np.array_equal(np.sort(x), x)

    ds = give_data()
    # change targets labels
    # there is no target labels permuting within chunks,
    # assure = True would be error
    ds.sa['targets'] = range(len(ds.sa.targets))
    permutation = AttributePermutator(attr='targets',
                                      chunk_attr='chunks',
                                      strategy='chunks',
                                      assure=True)

    pds = permutation(ds)

    assert_false(is_sorted(pds.sa.targets))
    assert_true(np.array_equal(pds.samples, ds.samples))
    for chunk_id in np.unique(pds.sa.chunks):
        chunk_ds = pds[pds.sa.chunks == chunk_id]
        assert_true(is_sorted(chunk_ds.sa.targets))
Esempio n. 12
0
def test_transpose():
    from mvpa2.mappers.shape import TransposeMapper

    ds = Dataset(np.arange(24).reshape(2, 3, 4), sa={"testsa": np.arange(2)}, fa={"testfa": np.arange(3)})
    tp = TransposeMapper()
    tds = tp(ds)
    assert_equal(tds.shape, (3, 2, 4))
    assert_true("testfa" in tds.sa)
    assert_true("testsa" in tds.fa)
    assert_false(tds.fa is tds.sa)
    # and back
    ttds = tp(tds)
    assert_array_equal(ttds.samples, ds.samples)
    assert_equal(ttds.sa, ds.sa)
    assert_equal(ttds.fa, ds.fa)
    # or this way
    rds = tp.reverse(tds)
    assert_array_equal(rds.samples, ds.samples)
    assert_equal(rds.sa, ds.sa)
    assert_equal(rds.fa, ds.fa)
    assert_array_equal(rds.samples, ttds.samples)
    assert_equal(rds.sa, ttds.sa)
    assert_equal(rds.fa, ttds.fa)
Esempio n. 13
0
def test_transpose():
    from mvpa2.mappers.shape import TransposeMapper
    ds = Dataset(np.arange(24).reshape(2, 3, 4),
                 sa={'testsa': np.arange(2)},
                 fa={'testfa': np.arange(3)})
    tp = TransposeMapper()
    tds = tp(ds)
    assert_equal(tds.shape, (3, 2, 4))
    assert_true('testfa' in tds.sa)
    assert_true('testsa' in tds.fa)
    assert_false(tds.fa is tds.sa)
    # and back
    ttds = tp(tds)
    assert_array_equal(ttds.samples, ds.samples)
    assert_equal(ttds.sa, ds.sa)
    assert_equal(ttds.fa, ds.fa)
    # or this way
    rds = tp.reverse(tds)
    assert_array_equal(rds.samples, ds.samples)
    assert_equal(rds.sa, ds.sa)
    assert_equal(rds.fa, ds.fa)
    assert_array_equal(rds.samples, ttds.samples)
    assert_equal(rds.sa, ttds.sa)
    assert_equal(rds.fa, ttds.fa)
Esempio n. 14
0
def test_attrpermute():

    # Was about to use borrowkwargs but didn't work out . Test doesn't hurt
    doc = AttributePermutator.__init__.__doc__
    assert_in('limit : ', doc)
    assert_not_in('collection : ', doc)

    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # verify that implausible assure=True would not work
    permutation = AttributePermutator('targets', limit='ids', assure=True)
    assert_raises(RuntimeError, permutation, ds)

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    def assert_all_different_permutations(pds):
        assert_equal(len(pds), nruns)
        for i, p in enumerate(pds):
            assert_false(np.all(p.sa.ids == ds.sa.ids))
            for p_ in pds[i+1:]:
                assert_false(np.all(p.sa.ids == p_.sa.ids))

    permutation = AttributePermutator(['targets', 'ids'],
                                      assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_all_different_permutations(pds)

    # if we provide seeding, and generate, it should also return different datasets
    permutation = AttributePermutator(['targets', 'ids'],
                                      count=nruns, rng=1)
    pds1 = list(permutation.generate(ds))
    assert_all_different_permutations(pds)

    # but if we regenerate -- should all be the same to before
    pds2 = list(permutation.generate(ds))
    assert_equal(len(pds1), len(pds2))
    for p1, p2 in zip(pds1, pds2):
        assert_datasets_equal(p1, p2)

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))

    # now chunk-wise uattrs strategy (reassignment)
    permutation = AttributePermutator('targets', limit='chunks',
                                      strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets), zip(pds.targets))
    # in each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        # we have only 1-to-1 mappings
        assert_true(len(set(zip(otargets, ptargets))), len(set(otargets)))

    ds.sa['odds'] = ds.sa.ids % 2
    # test combinations
    permutation = AttributePermutator(['targets', 'odds'], limit='chunks',
                                       strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets,   ds.sa.odds),
                     zip(pds.targets, pds.sa.odds))
    # In each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        assert_equal(set(oodds), set(podds))
        # at the end we have the same mapping
        assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))
Esempio n. 15
0
def test_flatten():
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray)
    pristinedata = data.copy()
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target).view(myarray)
    index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3],
                            [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3],
                            [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3],
                            [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]])

    # test only flattening the first two dimensions
    fm_max = FlattenMapper(maxdims=2)
    fm_max.train(data)
    assert_equal(fm_max(data).shape, (4, 4, 4))

    # array subclass survives
    ok_(isinstance(data, myarray))

    # actually, there should be no difference between a plain FlattenMapper and
    # a chain that only has a FlattenMapper as the one element
    for fm in [FlattenMapper(space='voxel'),
               ChainMapper([FlattenMapper(space='voxel'),
                            StaticFeatureSelection(slice(None))])]:
        # not working if untrained
        assert_raises(RuntimeError,
                      fm.forward1,
                      np.arange(np.sum(samples_shape) + 1))

        fm.train(data)

        ok_(isinstance(fm.forward(data), myarray))
        ok_(isinstance(fm.forward1(data[2]), myarray))
        assert_array_equal(fm.forward(data), target)
        assert_array_equal(fm.forward1(data[2]), target[2])
        assert_raises(ValueError, fm.forward, np.arange(4))

        # all of that leaves that data unmodified
        assert_array_equal(data, pristinedata)

        # reverse mapping
        ok_(isinstance(fm.reverse(target), myarray))
        ok_(isinstance(fm.reverse1(target[0]), myarray))
        ok_(isinstance(fm.reverse(target[1:2]), myarray))
        assert_array_equal(fm.reverse(target), data)
        assert_array_equal(fm.reverse1(target[0]), data[0])
        assert_array_equal(fm.reverse1(target[0]),
                           _verified_reverse1(fm, target[0]))
        assert_array_equal(fm.reverse(target[1:2]), data[1:2])
        assert_raises(ValueError, fm.reverse, np.arange(14))

        # check one dimensional data, treated as scalar samples
        oned = np.arange(5)
        fm.train(Dataset(oned))
        # needs 2D
        assert_raises(ValueError, fm.forward, oned)
        # doesn't match mapper, since Dataset turns `oned` into (5,1)
        assert_raises(ValueError, fm.forward, oned)
        assert_equal(Dataset(oned).nfeatures, 1)

        # try dataset mode, with some feature attribute
        fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape)
        ds = Dataset(data, fa={'awesome': fattr.copy()})
        assert_equal(ds.samples.shape, data_shape)
        fm.train(ds)
        dsflat = fm.forward(ds)
        ok_(isinstance(dsflat, Dataset))
        ok_(isinstance(dsflat.samples, myarray))
        assert_array_equal(dsflat.samples, target)
        assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape)))
        assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable))
        # test index creation
        assert_array_equal(index_target, dsflat.fa.voxel)

        # and back
        revds = fm.reverse(dsflat)
        ok_(isinstance(revds, Dataset))
        ok_(isinstance(revds.samples, myarray))
        assert_array_equal(revds.samples, data)
        assert_array_equal(revds.fa.awesome, fattr)
        assert_true(isinstance(revds.fa['awesome'], ArrayCollectable))
        assert_false('voxel' in revds.fa)
Esempio n. 16
0
def test_attrpermute():
    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))
Esempio n. 17
0
 def assert_all_different_permutations(pds):
     assert_equal(len(pds), nruns)
     for i, p in enumerate(pds):
         assert_false(np.all(p.sa.ids == ds.sa.ids))
         for p_ in pds[i+1:]:
             assert_false(np.all(p.sa.ids == p_.sa.ids))
Esempio n. 18
0
def test_flatten():
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray)
    pristinedata = data.copy()
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target).view(myarray)
    index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3],
                            [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3],
                            [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3],
                            [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]])

    # test only flattening the first two dimensions
    fm_max = FlattenMapper(maxdims=2)
    fm_max.train(data)
    assert_equal(fm_max(data).shape, (4, 4, 4))

    # array subclass survives
    ok_(isinstance(data, myarray))

    # actually, there should be no difference between a plain FlattenMapper and
    # a chain that only has a FlattenMapper as the one element
    for fm in [FlattenMapper(space='voxel'),
               ChainMapper([FlattenMapper(space='voxel'),
                            StaticFeatureSelection(slice(None))])]:
        # not working if untrained
        assert_raises(RuntimeError,
                      fm.forward1,
                      np.arange(np.sum(samples_shape) + 1))

        fm.train(data)

        ok_(isinstance(fm.forward(data), myarray))
        ok_(isinstance(fm.forward1(data[2]), myarray))
        assert_array_equal(fm.forward(data), target)
        assert_array_equal(fm.forward1(data[2]), target[2])
        assert_raises(ValueError, fm.forward, np.arange(4))

        # all of that leaves that data unmodified
        assert_array_equal(data, pristinedata)

        # reverse mapping
        ok_(isinstance(fm.reverse(target), myarray))
        ok_(isinstance(fm.reverse1(target[0]), myarray))
        ok_(isinstance(fm.reverse(target[1:2]), myarray))
        assert_array_equal(fm.reverse(target), data)
        assert_array_equal(fm.reverse1(target[0]), data[0])
        assert_array_equal(fm.reverse(target[1:2]), data[1:2])
        assert_raises(ValueError, fm.reverse, np.arange(14))

        # check one dimensional data, treated as scalar samples
        oned = np.arange(5)
        fm.train(Dataset(oned))
        # needs 2D
        assert_raises(ValueError, fm.forward, oned)
        # doesn't match mapper, since Dataset turns `oned` into (5,1)
        assert_raises(ValueError, fm.forward, oned)
        assert_equal(Dataset(oned).nfeatures, 1)

        # try dataset mode, with some feature attribute
        fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape)
        ds = Dataset(data, fa={'awesome': fattr.copy()})
        assert_equal(ds.samples.shape, data_shape)
        fm.train(ds)
        dsflat = fm.forward(ds)
        ok_(isinstance(dsflat, Dataset))
        ok_(isinstance(dsflat.samples, myarray))
        assert_array_equal(dsflat.samples, target)
        assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape)))
        assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable))
        # test index creation
        assert_array_equal(index_target, dsflat.fa.voxel)

        # and back
        revds = fm.reverse(dsflat)
        ok_(isinstance(revds, Dataset))
        ok_(isinstance(revds.samples, myarray))
        assert_array_equal(revds.samples, data)
        assert_array_equal(revds.fa.awesome, fattr)
        assert_true(isinstance(revds.fa['awesome'], ArrayCollectable))
        assert_false('voxel' in revds.fa)
Esempio n. 19
0
def test_balancer():
    ds = give_data()
    ds.sa['ids'] = np.arange(len(ds))  # some sa to ease tracking of samples

    # only mark the selection in an attribute
    bal = Balancer()
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    assert_true(ds.samples is res.samples.base)
    # should kick out 2 samples in each chunk of 10
    assert_almost_equal(np.mean(res.sa.balanced_set), 0.8)
    # same as above, but actually apply the selection
    bal = Balancer(apply_selection=True, count=5)
    # just run it once
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    # should kick out 2 samples in each chunk of 10
    assert_equal(len(res), int(0.8 * len(ds)))
    # now use it as a generator
    dses = list(bal.generate(ds))
    assert_equal(len(dses), 5)

    # if we rerun again, it would be a different selection
    res2 = bal(ds)
    assert_true(np.any(res.sa.ids != bal(ds).sa.ids))

    # but if we create a balancer providing seed rng int,
    # should be identical results
    bal = Balancer(apply_selection=True, count=5, rng=1)
    assert_false(np.any(bal(ds).sa.ids != bal(ds).sa.ids))

    # But results should differ if we use .generate to produce those multiple
    # balanced datasets
    b = Balancer(apply_selection=True, count=3, rng=1)
    balanced = list(b.generate(ds))
    assert_false(all(balanced[0].sa.ids == balanced[1].sa.ids))
    assert_false(all(balanced[0].sa.ids == balanced[2].sa.ids))
    assert_false(all(balanced[1].sa.ids == balanced[2].sa.ids))

    # And should be exactly the same
    for ds_a, ds_b in zip(balanced, b.generate(ds)):
        assert_datasets_equal(ds_a, ds_b)

    # Contribution by Chris Markiewicz
    # And interleaving __call__ and generator fetches
    gen1 = b.generate(ds)
    gen2 = b.generate(ds)

    seq1, seq2, seq3 = [], [], []

    for i in xrange(3):
        seq1.append(gen1.next())
        seq2.append(gen2.next())
        seq3.append(b(ds))

    # Produces expected sequences

    for i in xrange(3):
        assert_datasets_equal(balanced[i], seq1[i])
        assert_datasets_equal(balanced[i], seq2[i])

    # And all __call__s return the same result
    ds_a = seq3[0]
    for ds_b in seq3[1:]:
        assert_array_equal(ds_a.sa.ids, ds_b.sa.ids)

    # with limit
    bal = Balancer(limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(res.sa['chunks'].unique, (3,))
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [2] * 4)
    # same but include all offlimit samples
    bal = Balancer(limit={'chunks': 3}, include_offlimit=True,
                   apply_selection=True)
    res = bal(ds)
    assert_array_equal(res.sa['chunks'].unique, range(10))
    # chunk three still balanced, but the rest is not, i.e. all samples included
    assert_equal(get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(),
                 [2] * 4)
    assert_equal(get_nelements_per_value(res.sa.chunks).values(),
                 [10, 10, 10, 8, 10, 10, 10, 10, 10, 10])
    # fixed amount
    bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [1] * 4)
    # fraction
    bal = Balancer(amount=0.499, limit=None, apply_selection=True)
    res = bal(ds)
    assert_array_equal(
            np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5),
            np.array(get_nelements_per_value(res.sa.targets).values()))
    # check on feature attribute
    ds.fa['one'] = np.tile([1, 2], 5)
    ds.fa['chk'] = np.repeat([1, 2], 5)
    bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.fa.one).values(),
                 [4] * 2)
Esempio n. 20
0
def test_balancer():
    ds = give_data()
    ds.sa['ids'] = np.arange(len(ds))  # some sa to ease tracking of samples

    # only mark the selection in an attribute
    bal = Balancer()
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    assert_true(ds.samples is res.samples.base)
    # should kick out 2 samples in each chunk of 10
    assert_almost_equal(np.mean(res.sa.balanced_set), 0.8)
    # same as above, but actually apply the selection
    bal = Balancer(apply_selection=True, count=5)
    # just run it once
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    # should kick out 2 samples in each chunk of 10
    assert_equal(len(res), int(0.8 * len(ds)))
    # now use it as a generator
    dses = list(bal.generate(ds))
    assert_equal(len(dses), 5)

    # if we rerun again, it would be a different selection
    res2 = bal(ds)
    assert_true(np.any(res.sa.ids != bal(ds).sa.ids))

    # but if we create a balancer providing seed rng int,
    # should be identical results
    bal = Balancer(apply_selection=True, count=5, rng=1)
    assert_false(np.any(bal(ds).sa.ids != bal(ds).sa.ids))

    # But results should differ if we use .generate to produce those multiple
    # balanced datasets
    b = Balancer(apply_selection=True, count=3, rng=1)
    balanced = list(b.generate(ds))
    assert_false(all(balanced[0].sa.ids == balanced[1].sa.ids))
    assert_false(all(balanced[0].sa.ids == balanced[2].sa.ids))
    assert_false(all(balanced[1].sa.ids == balanced[2].sa.ids))

    # And should be exactly the same
    for ds_a, ds_b in zip(balanced, b.generate(ds)):
        assert_datasets_equal(ds_a, ds_b)

    # Contribution by Chris Markiewicz
    # And interleaving __call__ and generator fetches
    gen1 = b.generate(ds)
    gen2 = b.generate(ds)

    seq1, seq2, seq3 = [], [], []

    for i in xrange(3):
        seq1.append(gen1.next())
        seq2.append(gen2.next())
        seq3.append(b(ds))

    # Produces expected sequences

    for i in xrange(3):
        assert_datasets_equal(balanced[i], seq1[i])
        assert_datasets_equal(balanced[i], seq2[i])

    # And all __call__s return the same result
    ds_a = seq3[0]
    for ds_b in seq3[1:]:
        assert_array_equal(ds_a.sa.ids, ds_b.sa.ids)

    # with limit
    bal = Balancer(limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(res.sa['chunks'].unique, (3, ))
    assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4)
    # same but include all offlimit samples
    bal = Balancer(limit={'chunks': 3},
                   include_offlimit=True,
                   apply_selection=True)
    res = bal(ds)
    assert_array_equal(res.sa['chunks'].unique, range(10))
    # chunk three still balanced, but the rest is not, i.e. all samples included
    assert_equal(
        get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(),
        [2] * 4)
    assert_equal(
        get_nelements_per_value(res.sa.chunks).values(),
        [10, 10, 10, 8, 10, 10, 10, 10, 10, 10])
    # fixed amount
    bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4)
    # fraction
    bal = Balancer(amount=0.499, limit=None, apply_selection=True)
    res = bal(ds)
    assert_array_equal(
        np.round(
            np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5),
        np.array(get_nelements_per_value(res.sa.targets).values()))
    # check on feature attribute
    ds.fa['one'] = np.tile([1, 2], 5)
    ds.fa['chk'] = np.repeat([1, 2], 5)
    bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
Esempio n. 21
0
def test_attrpermute():
    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # verify that implausible assure=True would not work
    permutation = AttributePermutator('targets', limit='ids', assure=True)
    assert_raises(RuntimeError, permutation, ds)

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'],
                                      assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))

    # now chunk-wise uattrs strategy (reassignment)
    permutation = AttributePermutator('targets', limit='chunks',
                                      strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets), zip(pds.targets))
    # in each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        # we have only 1-to-1 mappings
        assert_true(len(set(zip(otargets, ptargets))), len(set(otargets)))

    ds.sa['odds'] = ds.sa.ids % 2
    # test combinations
    permutation = AttributePermutator(['targets', 'odds'], limit='chunks',
                                       strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets,   ds.sa.odds),
                     zip(pds.targets, pds.sa.odds))
    # In each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        assert_equal(set(oodds), set(podds))
        # at the end we have the same mapping
        assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))
Esempio n. 22
0
def test_attrmap():
    map_default = {'eins': 0, 'zwei': 2, 'sieben': 1}
    map_custom = {'eins': 11, 'zwei': 22, 'sieben': 33}
    literal = ['eins', 'zwei', 'sieben', 'eins', 'sieben', 'eins']
    literal_nonmatching = ['uno', 'dos', 'tres']
    num_default = [0, 2, 1, 0, 1, 0]
    num_custom = [11, 22, 33, 11, 33, 11]

    # no custom mapping given
    am = AttributeMap()
    assert_false(am)
    ok_(len(am) == 0)
    assert_array_equal(am.to_numeric(literal), num_default)
    assert_array_equal(am.to_literal(num_default), literal)
    ok_(am)
    ok_(len(am) == 3)

    #
    # Tests for recursive mapping + preserving datatype
    class myarray(np.ndarray):
        pass

    assert_raises(KeyError, am.to_literal, [(1, 2), 2, 0])
    literal_fancy = [(1, 2), 2, [0], np.array([0, 1]).view(myarray)]
    literal_fancy_tuple = tuple(literal_fancy)
    literal_fancy_array = np.array(literal_fancy, dtype=object)

    for l in (literal_fancy, literal_fancy_tuple, literal_fancy_array):
        res = am.to_literal(l, recurse=True)
        assert_equal(res[0], ('sieben', 'zwei'))
        assert_equal(res[1], 'zwei')
        assert_equal(res[2], ['eins'])
        assert_array_equal(res[3], ['eins', 'sieben'])

        # types of result and subsequences should be preserved
        ok_(isinstance(res, l.__class__))
        ok_(isinstance(res[0], tuple))
        ok_(isinstance(res[1], str))
        ok_(isinstance(res[2], list))
        ok_(isinstance(res[3], myarray))

    # yet another example
    a = np.empty(1, dtype=object)
    a[0] = (0, 1)
    res = am.to_literal(a, recurse=True)
    ok_(isinstance(res[0], tuple))

    #
    # with custom mapping
    am = AttributeMap(map=map_custom)
    assert_array_equal(am.to_numeric(literal), num_custom)
    assert_array_equal(am.to_literal(num_custom), literal)

    # if not numeric nothing is mapped
    assert_array_equal(am.to_numeric(num_custom), num_custom)
    # even if the map doesn't fit
    assert_array_equal(am.to_numeric(num_default), num_default)

    # need to_numeric first
    am = AttributeMap()
    assert_raises(RuntimeError, am.to_literal, [1, 2, 3])
    # stupid args
    assert_raises(ValueError, AttributeMap, map=num_custom)

    # map mismatch
    am = AttributeMap(map=map_custom)
    if __debug__:
        # checked only in __debug__
        assert_raises(KeyError, am.to_numeric, literal_nonmatching)
    # needs reset and should work afterwards
    am.clear()
    assert_array_equal(am.to_numeric(literal_nonmatching), [2, 0, 1])
    # and now reverse
    am = AttributeMap(map=map_custom)
    assert_raises(KeyError, am.to_literal, num_default)

    # dict-like interface
    am = AttributeMap()

    ok_([(k, v) for k, v in am.items()] == [])
Esempio n. 23
0
def test_attrmap():
    map_default = {'eins': 0, 'zwei': 2, 'sieben': 1}
    map_custom = {'eins': 11, 'zwei': 22, 'sieben': 33}
    literal = ['eins', 'zwei', 'sieben', 'eins', 'sieben', 'eins']
    literal_nonmatching = ['uno', 'dos', 'tres']
    num_default = [0, 2, 1, 0, 1, 0]
    num_custom = [11, 22, 33, 11, 33, 11]

    # no custom mapping given
    am = AttributeMap()
    assert_false(am)
    ok_(len(am) == 0)
    assert_array_equal(am.to_numeric(literal), num_default)
    assert_array_equal(am.to_literal(num_default), literal)
    ok_(am)
    ok_(len(am) == 3)

    #
    # Tests for recursive mapping + preserving datatype
    class myarray(np.ndarray):
        pass

    assert_raises(KeyError, am.to_literal, [(1, 2), 2, 0])
    literal_fancy = [(1, 2), 2, [0], np.array([0, 1]).view(myarray)]
    literal_fancy_tuple = tuple(literal_fancy)
    literal_fancy_array = np.array(literal_fancy, dtype=object)

    for l in (literal_fancy, literal_fancy_tuple,
              literal_fancy_array):
        res = am.to_literal(l, recurse=True)
        assert_equal(res[0], ('sieben', 'zwei'))
        assert_equal(res[1], 'zwei')
        assert_equal(res[2], ['eins'])
        assert_array_equal(res[3], ['eins', 'sieben'])

        # types of result and subsequences should be preserved
        ok_(isinstance(res, l.__class__))
        ok_(isinstance(res[0], tuple))
        ok_(isinstance(res[1], str))
        ok_(isinstance(res[2], list))
        ok_(isinstance(res[3], myarray))

    # yet another example
    a = np.empty(1, dtype=object)
    a[0] = (0, 1)
    res = am.to_literal(a, recurse=True)
    ok_(isinstance(res[0], tuple))

    #
    # with custom mapping
    am = AttributeMap(map=map_custom)
    assert_array_equal(am.to_numeric(literal), num_custom)
    assert_array_equal(am.to_literal(num_custom), literal)

    # if not numeric nothing is mapped
    assert_array_equal(am.to_numeric(num_custom), num_custom)
    # even if the map doesn't fit
    assert_array_equal(am.to_numeric(num_default), num_default)

    # need to_numeric first
    am = AttributeMap()
    assert_raises(RuntimeError, am.to_literal, [1,2,3])
    # stupid args
    assert_raises(ValueError, AttributeMap, map=num_custom)

    # map mismatch
    am = AttributeMap(map=map_custom)
    if __debug__:
        # checked only in __debug__
        assert_raises(KeyError, am.to_numeric, literal_nonmatching)
    # needs reset and should work afterwards
    am.clear()
    assert_array_equal(am.to_numeric(literal_nonmatching), [2, 0, 1])
    # and now reverse
    am = AttributeMap(map=map_custom)
    assert_raises(KeyError, am.to_literal, num_default)

    # dict-like interface
    am = AttributeMap()

    ok_([(k, v) for k, v in am.iteritems()] == [])
Esempio n. 24
0
def test_attrpermute():

    # Was about to use borrowkwargs but didn't work out . Test doesn't hurt
    doc = AttributePermutator.__init__.__doc__
    assert_in('limit : ', doc)
    assert_not_in('collection : ', doc)

    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # verify that implausible assure=True would not work
    permutation = AttributePermutator('targets', limit='ids', assure=True)
    assert_raises(RuntimeError, permutation, ds)

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3, 4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'],
                                      assure=True,
                                      count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))

    # now chunk-wise uattrs strategy (reassignment)
    permutation = AttributePermutator('targets',
                                      limit='chunks',
                                      strategy='uattrs',
                                      assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets), zip(pds.targets))
    # in each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        # we have only 1-to-1 mappings
        assert_true(len(set(zip(otargets, ptargets))), len(set(otargets)))

    ds.sa['odds'] = ds.sa.ids % 2
    # test combinations
    permutation = AttributePermutator(['targets', 'odds'],
                                      limit='chunks',
                                      strategy='uattrs',
                                      assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets, ds.sa.odds), zip(pds.targets,
                                                      pds.sa.odds))
    # In each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        assert_equal(set(oodds), set(podds))
        # at the end we have the same mapping
        assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))
Esempio n. 25
0
 def assert_all_different_permutations(pds):
     assert_equal(len(pds), nruns)
     for i, p in enumerate(pds):
         assert_false(np.all(p.sa.ids == ds.sa.ids))
         for p_ in pds[i + 1:]:
             assert_false(np.all(p.sa.ids == p_.sa.ids))