Ejemplo n.º 1
0
def test_chained_crossvalidation_searchlight():
    from mvpa2.clfs.gnb import GNB
    from mvpa2.clfs.meta import MappedClassifier
    from mvpa2.generators.partition import NFoldPartitioner
    from mvpa2.mappers.base import ChainMapper
    from mvpa2.mappers.base import Mapper
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.testing.datasets import datasets

    dataset = datasets['3dlarge'].copy()
    dataset.fa['voxel_indices'] = dataset.fa.myspace
    sample_clf = GNB()              # fast and deterministic

    class ZScoreFeaturesMapper(Mapper):
        """Very basic mapper which would take care about standardizing
        all features within each sample separately
        """
        def _forward_data(self, data):
            return (data - np.mean(data, axis=1)[:, None])/np.std(data, axis=1)[:, None]

    # only do partial to save time
    sl_kwargs = dict(radius=2, center_ids=[3, 50])
    clf_mapped = MappedClassifier(sample_clf, ZScoreFeaturesMapper())
    cv = CrossValidation(clf_mapped, NFoldPartitioner())
    sl = sphere_searchlight(cv, **sl_kwargs)
    results_mapped = sl(dataset)

    cv_chained = ChainMapper([ZScoreFeaturesMapper(auto_train=True),
                              CrossValidation(sample_clf, NFoldPartitioner())])
    sl_chained = sphere_searchlight(cv_chained, **sl_kwargs)
    results_chained = sl_chained(dataset)

    assert_array_equal(results_mapped, results_chained)
Ejemplo n.º 2
0
def test_mapper_vs_zscore():
    """Test by comparing to results of elderly z-score function
    """
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    dss = [
        dataset_wizard(np.concatenate(
            [np.arange(40) for i in range(20)]).reshape(20,-1).T,
                targets=1, chunks=1),
        ] + datasets.values()

    for ds in dss:
        ds1 = deepcopy(ds)
        ds2 = deepcopy(ds)

        zsm = ZScoreMapper(chunks_attr=None)
        assert_raises(RuntimeError, zsm.forward, ds1.samples)
        idhashes = (idhash(ds1), idhash(ds1.samples))
        zsm.train(ds1)
        idhashes_train = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_train)

        # forward dataset
        ds1z_ds = zsm.forward(ds1)
        idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
        # must not modify samples in place!
        assert_equal(idhashes, idhashes_forwardds)

        # forward samples explicitly
        ds1z = zsm.forward(ds1.samples)
        idhashes_forward = (idhash(ds1), idhash(ds1.samples))
        assert_equal(idhashes, idhashes_forward)

        zscore(ds2, chunks_attr=None)
        assert_array_almost_equal(ds1z, ds2.samples)
        assert_array_equal(ds1.samples, ds.samples)
Ejemplo n.º 3
0
def test_remove_invariant_as_a_mapper():
    from mvpa2.featsel.helpers import RangeElementSelector
    from mvpa2.featsel.base import StaticFeatureSelection, SensitivityBasedFeatureSelection
    from mvpa2.testing.datasets import datasets
    from mvpa2.datasets.miscfx import remove_invariant_features

    mapper = SensitivityBasedFeatureSelection(
              lambda x: np.std(x, axis=0),
              RangeElementSelector(lower=0, inclusive=False),
              train_analyzer=False,
              auto_train=True)

    ds = datasets['uni2large'].copy()

    ds.a['mapper'] = StaticFeatureSelection(np.arange(ds.nfeatures))
    ds.fa['index'] = np.arange(ds.nfeatures)
    ds.samples[:, [1, 8]] = 10

    ds_out = mapper(ds)

    # Validate that we are getting the same results as remove_invariant_features
    ds_rifs = remove_invariant_features(ds)
    assert_array_equal(ds_out.samples, ds_rifs.samples)
    assert_array_equal(ds_out.fa.index, ds_rifs.fa.index)

    assert_equal(ds_out.fa.index[1], 2)
    assert_equal(ds_out.fa.index[8], 10)
Ejemplo n.º 4
0
 def test_basic(self):
     dataset = data_generators.linear1d_gaussian_noise()
     k = GeneralizedLinearKernel()
     clf = GPR(k)
     clf.train(dataset)
     y = clf.predict(dataset.samples)
     assert_array_equal(y.shape, dataset.targets.shape)
def test_corrstability_smoketest(ds):
    if not 'chunks' in ds.sa:
        return
    if len(ds.sa['targets'].unique) > 30:
        # was regression dataset
        return
    # very basic testing since
    cs = CorrStability()
    #ds = datasets['uni2small']
    out = cs(ds)
    assert_equal(out.shape, (ds.nfeatures, ))
    ok_(np.all(out >= -1.001))  # it should be a correlation after all
    ok_(np.all(out <= 1.001))

    # and theoretically those nonbogus features should have higher values
    if 'nonbogus_targets' in ds.fa:
        bogus_features = np.array([x == None for x in ds.fa.nonbogus_targets])
        assert_array_less(np.mean(out[bogus_features]),
                          np.mean(out[~bogus_features]))
    # and if we move targets to alternative location
    ds = ds.copy(deep=True)
    ds.sa['alt'] = ds.T
    ds.sa.pop('targets')
    assert_raises(KeyError, cs, ds)
    cs = CorrStability('alt')
    out_ = cs(ds)
    assert_array_equal(out, out_)
Ejemplo n.º 6
0
def test_sphere_scaled():
    s1 = ne.Sphere(3)
    s = ne.Sphere(3, element_sizes=(1, 1))

    # Should give exactly the same results since element_sizes are 1s
    for p in ((0, 0), (-23, 1)):
        assert_array_equal(s1(p), s(p))
        ok_(len(s(p)) == len(set(s(p))))

    # Raise exception if query dimensionality does not match element_sizes
    assert_raises(ValueError, s, (1,))

    s = ne.Sphere(3, element_sizes=(1.5, 2))
    assert_array_equal(s((0, 0)),
                       [(-2, 0), (-1, -1), (-1, 0), (-1, 1),
                        (0, -1), (0, 0), (0, 1),
                        (1, -1), (1, 0), (1, 1), (2, 0)])

    s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res]))
    ok_(len(res) == 7)

    # all neighbors so no more than 1 voxel away -- just a cube, for
    # some "sphere" effect radius had to be 3.0 ;)
    td = np.sqrt(3*1.5**2)
    s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res]))
    ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res]))
    ok_(len(res) == 27)
Ejemplo n.º 7
0
def test_identity():
    # IdentityNeighborhood() behaves like Sphere(0.5) without all of the
    # computation. Test on a few different coordinates.
    neighborhood = ne.IdentityNeighborhood()
    sphere = ne.Sphere(0.5)
    for center in ((0, 0, 0), (1, 1, 1), (0, 0), (0, )):
        assert_array_equal(neighborhood(center), sphere(center))
Ejemplo n.º 8
0
def test_sifter_with_balancing():
    # extended previous test which was already
    # "... somewhat duplicating the doctest"
    ds = Dataset(samples=np.arange(12).reshape((-1, 2)),
                 sa={'chunks':   [ 0 ,  1 ,  2 ,  3 ,  4,   5 ],
                     'targets':  ['c', 'c', 'c', 'p', 'p', 'p']})

    # Without sifter -- just to assure that we do get all of them
    # i.e. 6*5*4*3/(4!) = 15
    par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks')])
    assert_equal(len(list(par.generate(ds))), 15)

    # so we will take 4 chunks out of available 7, but would care only
    # about those partitions where we have balanced number of 'c' and 'p'
    # entries
    assert_raises(ValueError,
                  lambda x: list(Sifter([('targets', dict(wrong=1))]).generate(x)),
                  ds)

    par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks'),
                     Sifter([('partitions', 2),
                             ('targets',
                              dict(uvalues=['c', 'p'],
                                   balanced=True))])
                     ])
    dss = list(par.generate(ds))
    # print [ x[x.sa.partitions==2].sa.targets for x in dss ]
    assert_equal(len(dss), 9)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Ejemplo n.º 9
0
def test_corrstability_smoketest(ds):
    if not 'chunks' in ds.sa:
        return
    if len(ds.sa['targets'].unique) > 30:
        # was regression dataset
        return
    # very basic testing since
    cs = CorrStability()
    #ds = datasets['uni2small']
    out = cs(ds)
    assert_equal(out.shape, (ds.nfeatures,))
    ok_(np.all(out >= -1.001))  # it should be a correlation after all
    ok_(np.all(out <= 1.001))

    # and theoretically those nonbogus features should have higher values
    if 'nonbogus_targets' in ds.fa:
        bogus_features = np.array([x==None for x in  ds.fa.nonbogus_targets])
        assert_array_less(np.mean(out[bogus_features]), np.mean(out[~bogus_features]))
    # and if we move targets to alternative location
    ds = ds.copy(deep=True)
    ds.sa['alt'] = ds.T
    ds.sa.pop('targets')
    assert_raises(KeyError, cs, ds)
    cs = CorrStability('alt')
    out_ = cs(ds)
    assert_array_equal(out, out_)
Ejemplo n.º 10
0
def test_identity():
    # IdentityNeighborhood() behaves like Sphere(0.5) without all of the
    # computation. Test on a few different coordinates.
    neighborhood = ne.IdentityNeighborhood()
    sphere = ne.Sphere(0.5)
    for center in ((0, 0, 0), (1, 1, 1), (0, 0), (0, )):
        assert_array_equal(neighborhood(center), sphere(center))
Ejemplo n.º 11
0
    def test_cv_no_generator_custom_splitter(self):
        ds = Dataset(np.arange(4), sa={'category': ['to', 'to', 'from', 'from'],
                                       'targets': ['a', 'b', 'c', 'd']})

        class Measure(Classifier):

            def _train(self, ds_):
                assert_array_equal(ds_.samples, ds.samples[2:])
                assert_array_equal(ds_.sa.category, ['from'] * len(ds_))

            def _predict(self, ds_):
                assert(ds_ is not ds)  # we pass a shallow copy
                # could be called to predit training or testing data
                if np.all(ds_.sa.targets != ['c', 'd']):
                    assert_array_equal(ds_.samples, ds.samples[:2])
                    assert_array_equal(ds_.sa.category, ['to'] * len(ds_))
                else:
                    assert_array_equal(ds_.sa.category, ['from'] * len(ds_))

                return ['c', 'd']

        measure = Measure()
        cv = CrossValidation(measure, splitter=Splitter('category', ['from', 'to']))
        res = cv(ds)
        assert_array_equal(res, [[1]])  # failed perfectly ;-)
Ejemplo n.º 12
0
def test_sphere_scaled():
    s1 = ne.Sphere(3)
    s = ne.Sphere(3, element_sizes=(1, 1))

    # Should give exactly the same results since element_sizes are 1s
    for p in ((0, 0), (-23, 1)):
        assert_array_equal(s1(p), s(p))
        ok_(len(s(p)) == len(set(s(p))))

    # Raise exception if query dimensionality does not match element_sizes
    assert_raises(ValueError, s, (1, ))

    s = ne.Sphere(3, element_sizes=(1.5, 2))
    assert_array_equal(s((0, 0)), [(-2, 0), (-1, -1), (-1, 0), (-1, 1),
                                   (0, -1), (0, 0), (0, 1), (1, -1), (1, 0),
                                   (1, 1), (2, 0)])

    s = ne.Sphere(1.5, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= 1.5 for x in res]))
    ok_(len(res) == 7)

    # all neighbors so no more than 1 voxel away -- just a cube, for
    # some "sphere" effect radius had to be 3.0 ;)
    td = np.sqrt(3 * 1.5**2)
    s = ne.Sphere(td, element_sizes=(1.5, 1.5, 1.5))
    res = s((0, 0, 0))
    ok_(np.all([np.sqrt(np.sum(np.array(x)**2)) <= td for x in res]))
    ok_(np.all([np.sum(np.abs(x) > 1) == 0 for x in res]))
    ok_(len(res) == 27)
Ejemplo n.º 13
0
    def test_cv_no_generator_custom_splitter(self):
        ds = Dataset(np.arange(4),
                     sa={
                         'category': ['to', 'to', 'from', 'from'],
                         'targets': ['a', 'b', 'c', 'd']
                     })

        class Measure(Classifier):
            def _train(self, ds_):
                assert_array_equal(ds_.samples, ds.samples[2:])
                assert_array_equal(ds_.sa.category, ['from'] * len(ds_))

            def _predict(self, ds_):
                assert (ds_ is not ds)  # we pass a shallow copy
                # could be called to predit training or testing data
                if np.all(ds_.sa.targets != ['c', 'd']):
                    assert_array_equal(ds_.samples, ds.samples[:2])
                    assert_array_equal(ds_.sa.category, ['to'] * len(ds_))
                else:
                    assert_array_equal(ds_.sa.category, ['from'] * len(ds_))

                return ['c', 'd']

        measure = Measure()
        cv = CrossValidation(measure,
                             splitter=Splitter('category', ['from', 'to']))
        res = cv(ds)
        assert_array_equal(res, [[1]])  # failed perfectly ;-)
Ejemplo n.º 14
0
def test_sifter_with_balancing():
    # extended previous test which was already
    # "... somewhat duplicating the doctest"
    ds = Dataset(samples=np.arange(12).reshape((-1, 2)),
                 sa={
                     'chunks': [0, 1, 2, 3, 4, 5],
                     'targets': ['c', 'c', 'c', 'p', 'p', 'p']
                 })

    # Without sifter -- just to assure that we do get all of them
    # i.e. 6*5*4*3/(4!) = 15
    par = ChainNode([NFoldPartitioner(cvtype=4, attr='chunks')])
    assert_equal(len(list(par.generate(ds))), 15)

    # so we will take 4 chunks out of available 7, but would care only
    # about those partitions where we have balanced number of 'c' and 'p'
    # entries
    assert_raises(
        ValueError,
        lambda x: list(Sifter([('targets', dict(wrong=1))]).generate(x)), ds)

    par = ChainNode([
        NFoldPartitioner(cvtype=4, attr='chunks'),
        Sifter([('partitions', 2),
                ('targets', dict(uvalues=['c', 'p'], balanced=True))])
    ])
    dss = list(par.generate(ds))
    # print [ x[x.sa.partitions==2].sa.targets for x in dss ]
    assert_equal(len(dss), 9)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Ejemplo n.º 15
0
 def test_basic(self):
     dataset = data_generators.linear1d_gaussian_noise()
     k = GeneralizedLinearKernel()
     clf = GPR(k)
     clf.train(dataset)
     y = clf.predict(dataset.samples)
     assert_array_equal(y.shape, dataset.targets.shape)
Ejemplo n.º 16
0
    def test_aggregation(self):
        data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1)

        ag_data = aggregate_features(data, np.mean)

        ok_(ag_data.nsamples == 4)
        ok_(ag_data.nfeatures == 1)
        assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
Ejemplo n.º 17
0
    def test_aggregation(self):
        data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1)

        ag_data = aggregate_features(data, np.mean)

        ok_(ag_data.nsamples == 4)
        ok_(ag_data.nfeatures == 1)
        assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
Ejemplo n.º 18
0
def _assert_ds_mat_attributes_equal(ds, m, attr_keys=('a', 'sa', 'fa')):
    # ds is a Dataset object, m a matlab-like dictionary
    for attr_k in attr_keys:
        attr_v = getattr(ds, attr_k)

        for k in attr_v.keys():
            v = attr_v[k].value
            assert_array_equal(m[attr_k][k][0, 0].ravel(), v)
Ejemplo n.º 19
0
 def test_basic(self):
     skip_if_no_external('scipy')  # needed by GPR code
     dataset = data_generators.linear1d_gaussian_noise()
     k = GeneralizedLinearKernel()
     clf = GPR(k)
     clf.train(dataset)
     y = clf.predict(dataset.samples)
     assert_array_equal(y.shape, dataset.targets.shape)
Ejemplo n.º 20
0
 def test_basic(self):
     skip_if_no_external('scipy') # needed by GPR code
     dataset = data_generators.linear1d_gaussian_noise()
     k = GeneralizedLinearKernel()
     clf = GPR(k)
     clf.train(dataset)
     y = clf.predict(dataset.samples)
     assert_array_equal(y.shape, dataset.targets.shape)
Ejemplo n.º 21
0
def _assert_ds_mat_attributes_equal(ds, m, attr_keys=('a', 'sa', 'fa')):
    # ds is a Dataset object, m a matlab-like dictionary
    for attr_k in attr_keys:
        attr_v = getattr(ds, attr_k)

        for k in attr_v.keys():
            v = attr_v[k].value
            assert_array_equal(m[attr_k][k][0, 0].ravel(), v)
Ejemplo n.º 22
0
 def test_size_random_prototypes(self):
     self.build_vector_based_pm()
     fraction = 0.5
     prototype_number = max(int(len(self.samples)*fraction),1)
     ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
     self.prototypes2 = np.array(random.sample(list(self.samples), prototype_number))
     self.pm2 = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes2)
     self.pm2.train(self.samples)
     assert_array_equal(self.pm2.proj.shape, (self.samples.shape[0], self.pm2.prototypes.shape[0]*len(self.similarities)))
Ejemplo n.º 23
0
def _assert_ds_less_or_equal(x, y):
    # x and y are a Dataset; x should contain a subset of
    # elements in .sa, fa, a and have the same samples as y
    # Note: no support for fancy objects such as mappers
    assert_array_equal(x.samples, y.samples)
    for label in ('a', 'fa', 'sa'):
        vx = getattr(x, label)
        vy = getattr(y, label)
        _assert_array_collectable_less_or_equal(vx, vy)
Ejemplo n.º 24
0
def test_partitionmapper():
    ds = give_data()
    oep = OddEvenPartitioner()
    parts = list(oep.generate(ds))
    assert_equal(len(parts), 2)
    for i, p in enumerate(parts):
        assert_array_equal(p.sa['partitions'].unique, [1, 2])
        assert_equal(p.a.partitions_set, i)
        assert_equal(len(p), len(ds))
Ejemplo n.º 25
0
def _assert_ds_less_or_equal(x, y):
    # x and y are a Dataset; x should contain a subset of
    # elements in .sa, fa, a and have the same samples as y
    # Note: no support for fancy objects such as mappers
    assert_array_equal(x.samples, y.samples)
    for label in ('a', 'fa', 'sa'):
        vx = getattr(x, label)
        vy = getattr(y, label)
        _assert_array_collectable_less_or_equal(vx, vy)
Ejemplo n.º 26
0
 def test_size_random_prototypes(self):
     self.build_vector_based_pm()
     fraction = 0.5
     prototype_number = max(int(len(self.samples)*fraction),1)
     ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
     self.prototypes2 = np.array(random.sample(list(self.samples), prototype_number))
     self.pm2 = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes2)
     self.pm2.train(self.samples)
     assert_array_equal(self.pm2.proj.shape, (self.samples.shape[0], self.pm2.prototypes.shape[0]*len(self.similarities)))
Ejemplo n.º 27
0
def test_sphere():
    # test sphere initialization
    s = ne.Sphere(1)
    center0 = (0, 0, 0)
    center1 = (1, 1, 1)
    assert_equal(len(s(center0)), 7)
    target = array([array([-1,  0,  0]),
              array([ 0, -1,  0]),
              array([ 0,  0, -1]),
              array([0, 0, 0]),
              array([0, 0, 1]),
              array([0, 1, 0]),
              array([1, 0, 0])])
    # test of internals -- no recomputation of increments should be done
    prev_increments = s._increments
    assert_array_equal(s(center0), target)
    ok_(prev_increments is s._increments)
    # query lower dimensionality
    _ = s((0, 0))
    ok_(not prev_increments is s._increments)

    # test Sphere call
    target = [array([0, 1, 1]),
              array([1, 0, 1]),
              array([1, 1, 0]),
              array([1, 1, 1]),
              array([1, 1, 2]),
              array([1, 2, 1]),
              array([2, 1, 1])]
    res = s(center1)
    assert_array_equal(array(res), target)
    # They all should be tuples
    ok_(np.all([isinstance(x, tuple) for x in res]))

    # test for larger diameter
    s = ne.Sphere(4)
    assert_equal(len(s(center1)), 257)

    # test extent keyword
    #s = ne.Sphere(4,extent=(1,1,1))
    #assert_array_equal(array(s((0,0,0))), array([[0,0,0]]))

    # test Errors during initialisation and call
    #assert_raises(ValueError, ne.Sphere, 2)
    #assert_raises(ValueError, ne.Sphere, 1.0)

    # no longer extent available
    assert_raises(TypeError, ne.Sphere, 1, extent=(1))
    assert_raises(TypeError, ne.Sphere, 1, extent=(1.0, 1.0, 1.0))

    s = ne.Sphere(1)
    #assert_raises(ValueError, s, (1))
    if __debug__:
        # No float coordinates allowed for now...
        # XXX might like to change that ;)
        # 
        assert_raises(ValueError, s, (1.0, 1.0, 1.0))
Ejemplo n.º 28
0
def test_partitionmapper():
    ds = give_data()
    oep = OddEvenPartitioner()
    parts = list(oep.generate(ds))
    assert_equal(len(parts), 2)
    for i, p in enumerate(parts):
        assert_array_equal(p.sa['partitions'].unique, [1, 2])
        assert_equal(p.a.partitions_set, i)
        assert_equal(len(p), len(ds))
Ejemplo n.º 29
0
def test_balancer():
    ds = give_data()
    # only mark the selection in an attribute
    bal = Balancer()
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    assert_true(ds.samples is res.samples.base)
    # should kick out 2 samples in each chunk of 10
    assert_almost_equal(np.mean(res.sa.balanced_set), 0.8)
    # same as above, but actually apply the selection
    bal = Balancer(apply_selection=True, count=5)
    # just run it once
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    # should kick out 2 samples in each chunk of 10
    assert_equal(len(res), int(0.8 * len(ds)))
    # now use it as a generator
    dses = list(bal.generate(ds))
    assert_equal(len(dses), 5)
    # with limit
    bal = Balancer(limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(res.sa['chunks'].unique, (3, ))
    assert_equal(get_nelements_per_value(res.sa.targets).values(), [2] * 4)
    # same but include all offlimit samples
    bal = Balancer(limit={'chunks': 3},
                   include_offlimit=True,
                   apply_selection=True)
    res = bal(ds)
    assert_array_equal(res.sa['chunks'].unique, range(10))
    # chunk three still balanced, but the rest is not, i.e. all samples included
    assert_equal(
        get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(),
        [2] * 4)
    assert_equal(
        get_nelements_per_value(res.sa.chunks).values(),
        [10, 10, 10, 8, 10, 10, 10, 10, 10, 10])
    # fixed amount
    bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.sa.targets).values(), [1] * 4)
    # fraction
    bal = Balancer(amount=0.499, limit=None, apply_selection=True)
    res = bal(ds)
    assert_array_equal(
        np.round(
            np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5),
        np.array(get_nelements_per_value(res.sa.targets).values()))
    # check on feature attribute
    ds.fa['one'] = np.tile([1, 2], 5)
    ds.fa['chk'] = np.repeat([1, 2], 5)
    bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.fa.one).values(), [4] * 2)
Ejemplo n.º 30
0
    def test_nonfinite_features_removal(self):
        r = np.random.normal(size=(4, 5))
        ds = dataset_wizard(r, targets=1, chunks=1)
        ds.samples[2,0]=np.NaN
        ds.samples[3,3]=np.Inf

        dsc = remove_nonfinite_features(ds)

        self.assertTrue(dsc.nfeatures == 3)
        assert_array_equal(ds[:, [1, 2, 4]].samples, dsc.samples)
Ejemplo n.º 31
0
    def test_nonfinite_features_removal(self):
        r = np.random.normal(size=(4, 5))
        ds = dataset_wizard(r, targets=1, chunks=1)
        ds.samples[2,0]=np.NaN
        ds.samples[3,3]=np.Inf

        dsc = remove_nonfinite_features(ds)

        self.assertTrue(dsc.nfeatures == 3)
        assert_array_equal(ds[:, [1, 2, 4]].samples, dsc.samples)
Ejemplo n.º 32
0
def test_balancer():
    ds = give_data()
    # only mark the selection in an attribute
    bal = Balancer()
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    assert_true(ds.samples is res.samples.base)
    # should kick out 2 samples in each chunk of 10
    assert_almost_equal(np.mean(res.sa.balanced_set), 0.8)
    # same as above, but actually apply the selection
    bal = Balancer(apply_selection=True, count=5)
    # just run it once
    res = bal(ds)
    # we get a new dataset, with shared samples
    assert_false(ds is res)
    # should kick out 2 samples in each chunk of 10
    assert_equal(len(res), int(0.8 * len(ds)))
    # now use it as a generator
    dses = list(bal.generate(ds))
    assert_equal(len(dses), 5)
    # with limit
    bal = Balancer(limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(res.sa['chunks'].unique, (3,))
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [2] * 4)
    # same but include all offlimit samples
    bal = Balancer(limit={'chunks': 3}, include_offlimit=True,
                   apply_selection=True)
    res = bal(ds)
    assert_array_equal(res.sa['chunks'].unique, range(10))
    # chunk three still balanced, but the rest is not, i.e. all samples included
    assert_equal(get_nelements_per_value(res[res.sa.chunks == 3].sa.targets).values(),
                 [2] * 4)
    assert_equal(get_nelements_per_value(res.sa.chunks).values(),
                 [10, 10, 10, 8, 10, 10, 10, 10, 10, 10])
    # fixed amount
    bal = Balancer(amount=1, limit={'chunks': 3}, apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.sa.targets).values(),
                 [1] * 4)
    # fraction
    bal = Balancer(amount=0.499, limit=None, apply_selection=True)
    res = bal(ds)
    assert_array_equal(
            np.round(np.array(get_nelements_per_value(ds.sa.targets).values()) * 0.5),
            np.array(get_nelements_per_value(res.sa.targets).values()))
    # check on feature attribute
    ds.fa['one'] = np.tile([1,2], 5)
    ds.fa['chk'] = np.repeat([1,2], 5)
    bal = Balancer(attr='one', amount=2, limit='chk', apply_selection=True)
    res = bal(ds)
    assert_equal(get_nelements_per_value(res.fa.one).values(),
                 [4] * 2)
Ejemplo n.º 33
0
def test_identity_mapper(s):
    idm = IdentityMapper()
    # doesn't matter what you throw at it
    assert_true(idm.forward(s) is s)
    assert_true(idm.forward1(s) is s)
    assert_true(idm.reverse(s) is s)
    assert_true(idm.reverse1(s) is s)
    # even like this it should work, but type conversion
    # can happen
    assert_array_equal(_verified_reverse1(idm, s), s)
    assert_array_equal(idm.reverse1(s), s)
Ejemplo n.º 34
0
def test_attrpermute():
    ds = give_data()
    ds.sa['ids'] = range(len(ds))
    pristine_data = ds.samples.copy()
    permutation = AttributePermutator(['targets', 'ids'], assure=True)
    pds = permutation(ds)
    # should not touch the data
    assert_array_equal(pristine_data, pds.samples)
    # even keep the very same array
    assert_true(pds.samples.base is ds.samples)
    # there is no way that it can be the same attribute
    assert_false(np.all(pds.sa.ids == ds.sa.ids))
    # ids should reflect permutation setup
    assert_array_equal(pds.sa.targets, ds.sa.targets[pds.sa.ids])
    # other attribute should remain intact
    assert_array_equal(pds.sa.chunks, ds.sa.chunks)

    # now chunk-wise permutation
    permutation = AttributePermutator('ids', limit='chunks')
    pds = permutation(ds)
    # first ten should remain first ten
    assert_false(np.any(pds.sa.ids[:10] > 9))

    # same thing, but only permute single chunk
    permutation = AttributePermutator('ids', limit={'chunks': 3})
    pds = permutation(ds)
    # one chunk should change
    assert_false(np.any(pds.sa.ids[30:40] > 39))
    assert_false(np.any(pds.sa.ids[30:40] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # or a list of chunks
    permutation = AttributePermutator('ids', limit={'chunks': [3,4]})
    pds = permutation(ds)
    # two chunks should change
    assert_false(np.any(pds.sa.ids[30:50] > 49))
    assert_false(np.any(pds.sa.ids[30:50] < 30))
    # the rest not
    assert_array_equal(pds.sa.ids[:30], range(30))

    # and now try generating more permutations
    nruns = 2
    permutation = AttributePermutator(['targets', 'ids'], assure=True, count=nruns)
    pds = list(permutation.generate(ds))
    assert_equal(len(pds), nruns)
    for p in pds:
        assert_false(np.all(p.sa.ids == ds.sa.ids))

    # permute feature attrs
    ds.fa['ids'] = range(ds.shape[1])
    permutation = AttributePermutator('fa.ids', assure=True)
    pds = permutation(ds)
    assert_false(np.all(pds.fa.ids == ds.fa.ids))
Ejemplo n.º 35
0
    def test_streamline_equal_mapper(self):
        self.build_streamline_things()

        self.prototypes_equal = self.dataset.samples
        self.pm = PrototypeMapper(similarities=self.similarities,
                                  prototypes=self.prototypes_equal,
                                  demean=False)
        self.pm.train(self.dataset.samples)
        ## debug("MAP","projected data: "+str(self.pm.proj))
        # check size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_equal)*len(self.similarities)))
        # test symmetry
        assert_array_almost_equal(self.pm.proj, self.pm.proj.T)
Ejemplo n.º 36
0
    def test_streamline_equal_mapper(self):
        self.build_streamline_things()

        self.prototypes_equal = self.dataset.samples
        self.pm = PrototypeMapper(similarities=self.similarities,
                                  prototypes=self.prototypes_equal,
                                  demean=False)
        self.pm.train(self.dataset.samples)
        ## debug("MAP","projected data: "+str(self.pm.proj))
        # check size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_equal)*len(self.similarities)))
        # test symmetry
        assert_array_almost_equal(self.pm.proj, self.pm.proj.T)
Ejemplo n.º 37
0
def test_glmnet_state():
    #data = datasets['dumb2']
    # for some reason the R code fails with the dumb data
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    clf.ca.enable('predictions')

    p = clf.predict(data.samples)

    assert_array_equal(p, clf.ca.predictions)
Ejemplo n.º 38
0
def test_glmnet_c():
    # define binary prob
    data = datasets['dumb2']

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.ca.enable('estimates')

    clf.train(data)

    # test predictions
    pre = clf.predict(data.samples)

    assert_array_equal(pre, data.targets)
Ejemplo n.º 39
0
    def test_custom_split(self):
        #simulate half splitter
        hs = CustomPartitioner([(None,[0,1,2,3,4]),(None,[5,6,7,8,9])])
        spl = Splitter(attr='partitions')
        splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
        self.failUnless(len(splits) == 2)

        for i,p in enumerate(splits):
            self.failUnless( len(p) == 2 )
            self.failUnless( p[0].nsamples == 50 )
            self.failUnless( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])


        # check fully customized split with working and validation set specified
        cs = CustomPartitioner([([0,3,4],[5,9])])
        # we want to discared the unselected partition of the data, hence attr_value
        # these two splitters should do exactly the same thing
        splitters = (Splitter(attr='partitions', attr_values=[1,2]),
                     Splitter(attr='partitions', ignore_values=(0,)))
        for spl in splitters:
            splits = [ list(spl.generate(p)) for p in cs.generate(self.data) ]
            self.failUnless(len(splits) == 1)

            for i,p in enumerate(splits):
                self.failUnless( len(p) == 2 )
                self.failUnless( p[0].nsamples == 30 )
                self.failUnless( p[1].nsamples == 20 )

            self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all())
            self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())
Ejemplo n.º 40
0
def test_conditional_attr():
    import copy
    import cPickle
    for node in (TestNodeOnDefault(enable_ca=['test', 'stats']),
                 TestNodeOffDefault(enable_ca=['test', 'stats'])):
        node.ca.test = range(5)
        node.ca.stats = ConfusionMatrix(labels=['one', 'two'])
        node.ca.stats.add(('one', 'two', 'one', 'two'),
                          ('one', 'two', 'two', 'one'))
        node.ca.stats.compute()

        dc_node = copy.deepcopy(node)
        assert_equal(set(node.ca.enabled), set(dc_node.ca.enabled))
        assert (node.ca['test'].enabled)
        assert (node.ca['stats'].enabled)
        assert_array_equal(node.ca['test'].value, dc_node.ca['test'].value)
        assert_array_equal(node.ca['stats'].value.matrix,
                           dc_node.ca['stats'].value.matrix)

        # check whether values survive pickling
        pickled = cPickle.dumps(node)
        up_node = cPickle.loads(pickled)
        assert_array_equal(up_node.ca['test'].value, range(5))
        assert_array_equal(up_node.ca['stats'].value.matrix,
                           node.ca['stats'].value.matrix)
Ejemplo n.º 41
0
    def test_odd_even_split(self):
        oes = OddEvenPartitioner()
        spl = Splitter(attr='partitions')

        splits = [list(spl.generate(p)) for p in oes.generate(self.data)]

        self.assertTrue(len(splits) == 2)

        for i, p in enumerate(splits):
            self.assertTrue(len(p) == 2)
            self.assertTrue(p[0].nsamples == 50)
            self.assertTrue(p[1].nsamples == 50)

        assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8])

        # check if it works on pure odd and even chunk ids
        moresplits = [
            list(spl.generate(p)) for p in oes.generate(splits[0][0])
        ]

        for split in moresplits:
            self.assertTrue(split[0] != None)
            self.assertTrue(split[1] != None)
Ejemplo n.º 42
0
def test_forward_dense_array_mapper():
    mask = np.ones((3, 2), dtype='bool')
    map_ = mask_mapper(mask)

    # test shape reports
    assert_equal(map_.forward1(mask).shape, (6, ))

    # test 1sample mapping
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)),
                       [0, 1, 2, 3, 4, 5])

    # test 4sample mapping
    foursample = map_.forward(np.arange(24).reshape(4, 3, 2))
    assert_array_equal(foursample,
                       [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11],
                        [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]])

    # check incomplete masks
    mask[1, 1] = 0
    map_ = mask_mapper(mask)
    assert_equal(map_.forward1(mask).shape, (5, ))
    assert_array_equal(map_.forward1(np.arange(6).reshape(3, 2)),
                       [0, 1, 2, 4, 5])

    # check that it doesn't accept wrong dataspace
    assert_raises(ValueError, map_.forward, np.arange(4).reshape(2, 2))

    # check fail if neither mask nor shape
    assert_raises(ValueError, mask_mapper)

    # check that a full mask is automatically created when providing shape
    m = mask_mapper(shape=(2, 3, 4))
    mp = m.forward1(np.arange(24).reshape(2, 3, 4))
    assert_array_equal(mp, np.arange(24))
Ejemplo n.º 43
0
def test_glmnet_c():
    # define binary prob
    data = datasets['dumb2']

    # use GLMNET on binary problem
    clf = GLMNET_C()
    clf.ca.enable('estimates')

    clf.train(data)

    # test predictions
    pre = clf.predict(data.samples)

    assert_array_equal(pre, data.targets)
Ejemplo n.º 44
0
    def test_custom_split(self):
        #simulate half splitter
        hs = CustomPartitioner([(None,[0,1,2,3,4]),(None,[5,6,7,8,9])])
        spl = Splitter(attr='partitions')
        splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
        self.assertTrue(len(splits) == 2)

        for i,p in enumerate(splits):
            self.assertTrue( len(p) == 2 )
            self.assertTrue( p[0].nsamples == 50 )
            self.assertTrue( p[1].nsamples == 50 )

        assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
        assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
        assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])


        # check fully customized split with working and validation set specified
        cs = CustomPartitioner([([0,3,4],[5,9])])
        # we want to discared the unselected partition of the data, hence attr_value
        # these two splitters should do exactly the same thing
        splitters = (Splitter(attr='partitions', attr_values=[1,2]),
                     Splitter(attr='partitions', ignore_values=(0,)))
        for spl in splitters:
            splits = [ list(spl.generate(p)) for p in cs.generate(self.data) ]
            self.assertTrue(len(splits) == 1)

            for i,p in enumerate(splits):
                self.assertTrue( len(p) == 2 )
                self.assertTrue( p[0].nsamples == 30 )
                self.assertTrue( p[1].nsamples == 20 )

            self.assertTrue((splits[0][1].sa['chunks'].unique == [5, 9]).all())
            self.assertTrue((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())
Ejemplo n.º 45
0
def test_glmnet_state():
    #data = datasets['dumb2']
    # for some reason the R code fails with the dumb data
    data = datasets['chirp_linear']

    clf = GLMNET_R()

    clf.train(data)

    clf.ca.enable('predictions')

    p = clf.predict(data.samples)

    assert_array_equal(p, clf.ca.predictions)
Ejemplo n.º 46
0
def test_splitter():
    ds = give_data()
    # split with defaults
    spl1 = Splitter('chunks')
    assert_raises(NotImplementedError, spl1, ds)

    splits = list(spl1.generate(ds))
    assert_equal(len(splits), len(ds.sa['chunks'].unique))

    for split in splits:
        # it should have perform basic slicing!
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.sa['chunks'].unique), 1)
        assert_true('lastsplit' in split.a)
    assert_true(splits[-1].a.lastsplit)

    # now again, more customized
    spl2 = Splitter('targets',
                    attr_values=[0, 1, 1, 2, 3, 3, 3],
                    count=4,
                    noslicing=True)
    splits = list(spl2.generate(ds))
    assert_equal(len(splits), 4)
    for split in splits:
        # it should NOT have perform basic slicing!
        assert_false(split.samples.base is ds.samples)
        assert_equal(len(split.sa['targets'].unique), 1)
        assert_equal(len(split.sa['chunks'].unique), 10)
    assert_true(splits[-1].a.lastsplit)

    # two should be identical
    assert_array_equal(splits[1].samples, splits[2].samples)

    # now go wild and split by feature attribute
    ds.fa['roi'] = np.repeat([0, 1], 5)
    # splitter should auto-detect that this is a feature attribute
    spl3 = Splitter('roi')
    splits = list(spl3.generate(ds))
    assert_equal(len(splits), 2)
    for split in splits:
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.fa['roi'].unique), 1)
        assert_equal(split.shape, (100, 5))

    # and finally test chained splitters
    cspl = ChainNode([spl2, spl3, spl1])
    splits = list(cspl.generate(ds))
    # 4 target splits and 2 roi splits each and 10 chunks each
    assert_equal(len(splits), 80)
Ejemplo n.º 47
0
def test_cosmo_queryengine(fn):
    skip_if_no_external('scipy', min_version='0.8')
    nbrhood_mat = _create_small_mat_nbrhood_dict()
    neighbors = nbrhood_mat['neighbors']
    savemat(fn, nbrhood_mat)

    # test dict in matlab form, filename, and through QueryEngine loader
    for input in (nbrhood_mat, fn, cosmo.CosmoQueryEngine.from_mat(neighbors)):
        qe = cosmo.from_any(input)
        assert_array_equal(qe.ids, [0, 1, 2, 3])

        for i in qe.ids:
            nbr_fids_base0 = neighbors[0, i][0] - 1
            assert_array_equal(qe.query_byid(i), nbr_fids_base0)

        _assert_ds_mat_attributes_equal(qe, nbrhood_mat, ('fa', 'a'))
Ejemplo n.º 48
0
def test_cosmo_queryengine(fn):
    skip_if_no_external('scipy', min_version='0.8')
    nbrhood_mat = _create_small_mat_nbrhood_dict()
    neighbors = nbrhood_mat['neighbors']
    savemat(fn, nbrhood_mat)

    # test dict in matlab form, filename, and through QueryEngine loader
    for input in (nbrhood_mat, fn, cosmo.CosmoQueryEngine.from_mat(neighbors)):
        qe = cosmo.from_any(input)
        assert_array_equal(qe.ids, [0, 1, 2, 3])

        for i in qe.ids:
            nbr_fids_base0 = neighbors[0, i][0] - 1
            assert_array_equal(qe.query_byid(i), nbr_fids_base0)

        _assert_ds_mat_attributes_equal(qe, nbrhood_mat, ('fa', 'a'))
Ejemplo n.º 49
0
    def test_streamline_random_mapper(self):
        self.build_streamline_things()

        # Adding one more similarity to test multiple similarities in the streamline case:
        self.similarities.append(StreamlineSimilarity(distance=corouge))

        fraction = 0.5
        prototype_number = max(int(len(self.dataset.samples)*fraction),1)
        ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
        self.prototypes_random = self.dataset.samples[np.random.permutation(self.dataset.samples.size)][:prototype_number]
        ## debug("MAP","prototypes: "+str(self.prototypes_random))

        self.pm = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes_random, demean=False)
        self.pm.train(self.dataset.samples) # , fraction=1.0)
        # test size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_random)*len(self.similarities)))
Ejemplo n.º 50
0
    def test_streamline_random_mapper(self):
        self.build_streamline_things()

        # Adding one more similarity to test multiple similarities in the streamline case:
        self.similarities.append(StreamlineSimilarity(distance=corouge))

        fraction = 0.5
        prototype_number = max(int(len(self.dataset.samples)*fraction),1)
        ## debug("MAP","Generating "+str(prototype_number)+" random prototypes.")
        self.prototypes_random = self.dataset.samples[np.random.permutation(self.dataset.samples.size)][:prototype_number]
        ## debug("MAP","prototypes: "+str(self.prototypes_random))

        self.pm = PrototypeMapper(similarities=self.similarities, prototypes=self.prototypes_random, demean=False)
        self.pm.train(self.dataset.samples) # , fraction=1.0)
        # test size:
        assert_array_equal(self.pm.proj.shape, (len(self.dataset.samples), len(self.prototypes_random)*len(self.similarities)))
Ejemplo n.º 51
0
def test_splitter():
    ds = give_data()
    # split with defaults
    spl1 = Splitter('chunks')
    assert_raises(NotImplementedError, spl1, ds)

    splits = list(spl1.generate(ds))
    assert_equal(len(splits), len(ds.sa['chunks'].unique))

    for split in splits:
        # it should have perform basic slicing!
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.sa['chunks'].unique), 1)
        assert_true('lastsplit' in split.a)
    assert_true(splits[-1].a.lastsplit)

    # now again, more customized
    spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
                   noslicing=True)
    splits = list(spl2.generate(ds))
    assert_equal(len(splits), 4)
    for split in splits:
        # it should NOT have perform basic slicing!
        assert_false(split.samples.base is ds.samples)
        assert_equal(len(split.sa['targets'].unique), 1)
        assert_equal(len(split.sa['chunks'].unique), 10)
    assert_true(splits[-1].a.lastsplit)

    # two should be identical
    assert_array_equal(splits[1].samples, splits[2].samples)

    # now go wild and split by feature attribute
    ds.fa['roi'] = np.repeat([0,1], 5)
    # splitter should auto-detect that this is a feature attribute
    spl3 = Splitter('roi')
    splits = list(spl3.generate(ds))
    assert_equal(len(splits), 2)
    for split in splits:
        assert_true(split.samples.base is ds.samples)
        assert_equal(len(split.fa['roi'].unique), 1)
        assert_equal(split.shape, (100, 5))

    # and finally test chained splitters
    cspl = ChainNode([spl2, spl3, spl1])
    splits = list(cspl.generate(ds))
    # 4 target splits and 2 roi splits each and 10 chunks each
    assert_equal(len(splits), 80)
Ejemplo n.º 52
0
def test_collections():
    sa = SampleAttributesCollection()
    assert_equal(len(sa), 0)

    assert_raises(ValueError, sa.__setitem__, 'test', 0)
    l = range(5)
    sa['test'] = l
    # auto-wrapped
    assert_true(isinstance(sa['test'], ArrayCollectable))
    assert_equal(len(sa), 1)

    # names which are already present in dict interface
    assert_raises(ValueError, sa.__setitem__, 'values', range(5))

    sa_c = copy.deepcopy(sa)
    assert_equal(len(sa), len(sa_c))
    assert_array_equal(sa.test, sa_c.test)
Ejemplo n.º 53
0
def test_sifter():
    # somewhat duplicating the doctest
    ds = Dataset(samples=np.arange(8).reshape((4,2)),
                 sa={'chunks':   [ 0 ,  1 ,  2 ,  3 ],
                     'targets':  ['c', 'c', 'p', 'p']})
    par = ChainNode([NFoldPartitioner(cvtype=2, attr='chunks'),
                     Sifter([('partitions', 2),
                             ('targets', ['c', 'p'])])
                     ])
    dss = list(par.generate(ds))
    assert_equal(len(dss), 4)
    for ds_ in dss:
        testing = ds[ds_.sa.partitions == 2]
        assert_array_equal(np.unique(testing.sa.targets), ['c', 'p'])
        # and we still have both targets  present in training
        training = ds[ds_.sa.partitions == 1]
        assert_array_equal(np.unique(training.sa.targets), ['c', 'p'])
Ejemplo n.º 54
0
def test_collections():
    sa = SampleAttributesCollection()
    assert_equal(len(sa), 0)

    assert_raises(ValueError, sa.__setitem__, 'test', 0)
    l = range(5)
    sa['test'] = l
    # auto-wrapped
    assert_true(isinstance(sa['test'], ArrayCollectable))
    assert_equal(len(sa), 1)

    # names which are already present in dict interface
    assert_raises(ValueError, sa.__setitem__, 'values', range(5))

    sa_c = copy.deepcopy(sa)
    assert_equal(len(sa), len(sa_c))
    assert_array_equal(sa.test, sa_c.test)
Ejemplo n.º 55
0
    def test_label_splitter(self):
        oes = OddEvenPartitioner(attr='targets')
        spl = Splitter(attr='partitions')

        splits = [list(spl.generate(p)) for p in oes.generate(self.data)]

        assert_array_equal(splits[0][0].sa['targets'].unique, [0, 2])
        assert_array_equal(splits[0][1].sa['targets'].unique, [1, 3])
        assert_array_equal(splits[1][0].sa['targets'].unique, [1, 3])
        assert_array_equal(splits[1][1].sa['targets'].unique, [0, 2])
Ejemplo n.º 56
0
    def test_label_splitter(self):
        oes = OddEvenPartitioner(attr='targets')
        spl = Splitter(attr='partitions')

        splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]

        assert_array_equal(splits[0][0].sa['targets'].unique, [0,2])
        assert_array_equal(splits[0][1].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][0].sa['targets'].unique, [1,3])
        assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])
Ejemplo n.º 57
0
def test_subset_filler():
    sm = StaticFeatureSelection(np.arange(3))
    sm_f0 = StaticFeatureSelection(np.arange(3), filler=0)
    sm_fm1 = StaticFeatureSelection(np.arange(3), filler=-1)
    sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
Ejemplo n.º 58
0
def test_subset_filler():
    sm = StaticFeatureSelection(np.arange(3))
    sm_f0 = StaticFeatureSelection(np.arange(3), filler=0)
    sm_fm1 = StaticFeatureSelection(np.arange(3), filler= -1)
    sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))