Example #1
0
    def test_nfold_random_counted_selection_partitioner(self):
        # Lets get somewhat extensive but complete one and see if
        # everything is legit. 0.5 must correspond to 50%, in our case
        # 5 out of 10 unique chunks
        split_partitions = [
            tuple(x.sa.partitions)
            for x in NFoldPartitioner(0.5).generate(self.data)]
        # 252 is # of combinations of 5 from 10
        assert_equal(len(split_partitions), 252)

        # verify that all of them are unique
        assert_equal(len(set(split_partitions)), 252)

        # now let's limit our query
        kwargs = dict(count=10, selection_strategy='random')
        split10_partitions = [
            tuple(x.sa.partitions)
            for x in NFoldPartitioner(5, **kwargs).generate(self.data)]
        split10_partitions_ = [
            tuple(x.sa.partitions)
            for x in NFoldPartitioner(0.5, **kwargs).generate(self.data)]
        # to make sure that I deal with sets of tuples correctly:
        assert_equal(len(set(split10_partitions)), 10)
        assert_equal(len(split10_partitions), 10)
        assert_equal(len(split10_partitions_), 10)
        # and they must differ (same ones are possible but very very unlikely)
        assert_not_equal(split10_partitions, split10_partitions_)
        # but every one of them must be within known exhaustive set
        assert_equal(set(split_partitions).intersection(split10_partitions),
                     set(split10_partitions))
        assert_equal(set(split_partitions).intersection(split10_partitions_),
                     set(split10_partitions_))
Example #2
0
    def test_nfold_random_counted_selection_partitioner(self):
        return
        # Lets get somewhat extensive but complete one and see if
        # everything is legit. 0.5 must correspond to 50%, in our case
        # 5 out of 10 unique chunks
        split_partitions = [
            tuple(x.sa.partitions)
            for x in NFoldPartitioner(0.5).generate(self.data)]
        # 252 is # of combinations of 5 from 10
        assert_equal(len(split_partitions), 252)

        # verify that all of them are unique
        assert_equal(len(set(split_partitions)), 252)

        # now let's limit our query
        kwargs = dict(count=10, selection_strategy='random')
        split10_partitions = [
            tuple(x.sa.partitions)
            for x in NFoldPartitioner(5, **kwargs).generate(self.data)]
        split10_partitions_ = [
            tuple(x.sa.partitions)
            for x in NFoldPartitioner(0.5, **kwargs).generate(self.data)]
        # to make sure that I deal with sets of tuples correctly:
        assert_equal(len(set(split10_partitions)), 10)
        assert_equal(len(split10_partitions), 10)
        assert_equal(len(split10_partitions_), 10)
        # and they must differ (same ones are possible but very very unlikely)
        assert_not_equal(split10_partitions, split10_partitions_)
        # but every one of them must be within known exhaustive set
        assert_equal(set(split_partitions).intersection(split10_partitions),
                     set(split10_partitions))
        assert_equal(set(split_partitions).intersection(split10_partitions_),
                     set(split10_partitions_))
Example #3
0
def test_attrpermute():

    # Was about to use borrowkwargs but didn't work out . Test doesn't hurt
    doc = AttributePermutator.__init__.__doc__
    assert_in('limit : ', doc)
    assert_not_in('collection : ', doc)

    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # verify that implausible assure=True would not work
    permutation = AttributePermutator('targets', limit='ids', assure=True)
    assert_raises(RuntimeError, permutation, ds)

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3, 4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'],
                                      assure=True,
                                      count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))

    # now chunk-wise uattrs strategy (reassignment)
    permutation = AttributePermutator('targets',
                                      limit='chunks',
                                      strategy='uattrs',
                                      assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets), zip(pds.targets))
    # in each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        # we have only 1-to-1 mappings
        assert_true(len(set(zip(otargets, ptargets))), len(set(otargets)))

    ds.sa['odds'] = ds.sa.ids % 2
    # test combinations
    permutation = AttributePermutator(['targets', 'odds'],
                                      limit='chunks',
                                      strategy='uattrs',
                                      assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets, ds.sa.odds), zip(pds.targets,
                                                      pds.sa.odds))
    # In each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        assert_equal(set(oodds), set(podds))
        # at the end we have the same mapping
        assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))
Example #4
0
def test_attrpermute():
    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # verify that implausible assure=True would not work
    permutation = AttributePermutator('targets', limit='ids', assure=True)
    assert_raises(RuntimeError, permutation, ds)

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'],
                                      assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))

    # now chunk-wise uattrs strategy (reassignment)
    permutation = AttributePermutator('targets', limit='chunks',
                                      strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets), zip(pds.targets))
    # in each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        # we have only 1-to-1 mappings
        assert_true(len(set(zip(otargets, ptargets))), len(set(otargets)))

    ds.sa['odds'] = ds.sa.ids % 2
    # test combinations
    permutation = AttributePermutator(['targets', 'odds'], limit='chunks',
                                       strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets,   ds.sa.odds),
                     zip(pds.targets, pds.sa.odds))
    # In each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        assert_equal(set(oodds), set(podds))
        # at the end we have the same mapping
        assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))
Example #5
0
def test_attrpermute():

    # Was about to use borrowkwargs but didn't work out . Test doesn't hurt
    doc = AttributePermutator.__init__.__doc__
    assert_in('limit : ', doc)
    assert_not_in('collection : ', doc)

    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # verify that implausible assure=True would not work
    permutation = AttributePermutator('targets', limit='ids', assure=True)
    assert_raises(RuntimeError, permutation, ds)

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    def assert_all_different_permutations(pds):
        assert_equal(len(pds), nruns)
        for i, p in enumerate(pds):
            assert_false(np.all(p.sa.ids == ds.sa.ids))
            for p_ in pds[i+1:]:
                assert_false(np.all(p.sa.ids == p_.sa.ids))

    permutation = AttributePermutator(['targets', 'ids'],
                                      assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_all_different_permutations(pds)

    # if we provide seeding, and generate, it should also return different datasets
    permutation = AttributePermutator(['targets', 'ids'],
                                      count=nruns, rng=1)
    pds1 = list(permutation.generate(ds))
    assert_all_different_permutations(pds)

    # but if we regenerate -- should all be the same to before
    pds2 = list(permutation.generate(ds))
    assert_equal(len(pds1), len(pds2))
    for p1, p2 in zip(pds1, pds2):
        assert_datasets_equal(p1, p2)

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))

    # now chunk-wise uattrs strategy (reassignment)
    permutation = AttributePermutator('targets', limit='chunks',
                                      strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets), zip(pds.targets))
    # in each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        # we have only 1-to-1 mappings
        assert_true(len(set(zip(otargets, ptargets))), len(set(otargets)))

    ds.sa['odds'] = ds.sa.ids % 2
    # test combinations
    permutation = AttributePermutator(['targets', 'odds'], limit='chunks',
                                       strategy='uattrs', assure=True)
    pds = permutation(ds)
    # Due to assure above -- we should have changed things
    assert_not_equal(zip(ds.targets,   ds.sa.odds),
                     zip(pds.targets, pds.sa.odds))
    # In each chunk we should have unique remappings
    for c in ds.UC:
        chunk_idx = ds.C == c
        otargets, ptargets = ds.targets[chunk_idx], pds.sa.targets[chunk_idx]
        oodds, podds = ds.sa.odds[chunk_idx], pds.sa.odds[chunk_idx]
        # we still have the same targets
        assert_equal(set(ptargets), set(otargets))
        assert_equal(set(oodds), set(podds))
        # at the end we have the same mapping
        assert_equal(set(zip(otargets, oodds)), set(zip(ptargets, podds)))