Beispiel #1
0
def test_stack_add_attributes():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['ok'] = data0.sa['ok'] = np.arange(5)
    data1.fa['ok'] = data1.sa['ok'] = np.arange(5)
    data0.fa['nok'] = data0.sa['nok'] = [0]
    data1.fa['nok'] = data1.sa['nok'] = np.arange(5)

    # function, collection name, the other collection name
    for xstack, colname, ocolname in ((vstack, 'fa', 'sa'),
                                      (hstack, 'sa', 'fa')):
        for add_param in None, 'update', 'drop_nonunique':

            kw = {colname: add_param} if add_param else {}
            r = xstack((data0, data1), **kw)
            COL = lambda x: getattr(x, colname)
            col = COL(r)
            ocol = getattr(r, ocolname)

            # in any scenario, the other collection should have got
            # both names and be just fine
            assert_array_equal(ocol['nok'].value, [0] * 5 + list(range(5)))
            assert_array_equal(ocol['ok'].value, list(range(5)) * 2)

            if add_param in ('update',):
                # will be of the last dataset
                assert_array_equal(col['nok'].value, COL(data1)['nok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
            elif add_param in (None, 'drop_nonunique'):
                assert('nok' not in col)  # must be dropped since not unique
                # both the same but let's check ;)
                assert_array_equal(col['ok'].value, COL(data0)['ok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
Beispiel #2
0
def test_stack_add_attributes():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['ok'] = data0.sa['ok'] = np.arange(5)
    data1.fa['ok'] = data1.sa['ok'] = np.arange(5)
    data0.fa['nok'] = data0.sa['nok'] = [0]
    data1.fa['nok'] = data1.sa['nok'] = np.arange(5)

    # function, collection name, the other collection name
    for xstack, colname, ocolname in ((vstack, 'fa', 'sa'),
                                      (hstack, 'sa', 'fa')):
        for add_param in None, 'update', 'drop_nonunique':

            kw = {colname: add_param} if add_param else {}
            r = xstack((data0, data1), **kw)
            COL = lambda x: getattr(x, colname)
            col = COL(r)
            ocol = getattr(r, ocolname)

            # in any scenario, the other collection should have got
            # both names and be just fine
            assert_array_equal(ocol['nok'].value, [0] * 5 + range(5))
            assert_array_equal(ocol['ok'].value, range(5) * 2)

            if add_param in ('update',):
                # will be of the last dataset
                assert_array_equal(col['nok'].value, COL(data1)['nok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
            elif add_param in (None, 'drop_nonunique'):
                assert('nok' not in col)  # must be dropped since not unique
                # both the same but let's check ;)
                assert_array_equal(col['ok'].value, COL(data0)['ok'].value)
                assert_array_equal(col['ok'].value, COL(data1)['ok'].value)
Beispiel #3
0
def test_mergeds():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['one'] = np.ones(5)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1, chunks=1)
    data1.fa['one'] = np.zeros(5)
    data2 = Dataset.from_wizard(np.ones((3, 5)), targets=2, chunks=1)
    data3 = Dataset.from_wizard(np.ones((4, 5)), targets=2)
    data4 = Dataset.from_wizard(np.ones((2, 5)), targets=3, chunks=2)
    data4.fa['test'] = np.arange(5)

    # cannot merge if there are attributes missing in one of the datasets
    assert_raises(DatasetError, data1.append, data0)

    merged = data1.copy()
    merged.append(data2)

    ok_( merged.nfeatures == 5 )
    l12 = [1]*5 + [2]*3
    l1 = [1]*8
    ok_((merged.targets == l12).all())
    ok_((merged.chunks == l1).all())

    data_append = data1.copy()
    data_append.append(data2)

    ok_(data_append.nfeatures == 5)
    ok_((data_append.targets == l12).all())
    ok_((data_append.chunks == l1).all())

    #
    # appending
    #

    # we need the same samples attributes in both datasets
    assert_raises(DatasetError, data2.append, data3)

    #
    # vstacking
    #
    if __debug__:
        # tested only in __debug__
        assert_raises(ValueError, vstack, (data0, data1, data2, data3))
    datasets = (data1, data2, data4)
    merged = vstack(datasets)
    assert_equal(merged.shape,
                 (np.sum([len(ds) for ds in datasets]), data1.nfeatures))
    assert_true('test' in merged.fa)
    assert_array_equal(merged.sa.targets, [1]*5 + [2]*3 + [3]*2)

    #
    # hstacking
    #
    assert_raises(ValueError, hstack, datasets)
    datasets = (data0, data1)
    merged = hstack(datasets)
    assert_equal(merged.shape,
                 (len(data1), np.sum([ds.nfeatures for ds in datasets])))
    assert_true('chunks' in merged.sa)
    assert_array_equal(merged.fa.one, [1]*5 + [0]*5)
Beispiel #4
0
def test_mergeds():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.fa['one'] = np.ones(5)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1, chunks=1)
    data1.fa['one'] = np.zeros(5)
    data2 = Dataset.from_wizard(np.ones((3, 5)), targets=2, chunks=1)
    data3 = Dataset.from_wizard(np.ones((4, 5)), targets=2)
    data4 = Dataset.from_wizard(np.ones((2, 5)), targets=3, chunks=2)
    data4.fa['test'] = np.arange(5)

    # cannot merge if there are attributes missing in one of the datasets
    assert_raises(DatasetError, data1.append, data0)

    merged = data1.copy()
    merged.append(data2)

    ok_(merged.nfeatures == 5)
    l12 = [1] * 5 + [2] * 3
    l1 = [1] * 8
    ok_((merged.targets == l12).all())
    ok_((merged.chunks == l1).all())

    data_append = data1.copy()
    data_append.append(data2)

    ok_(data_append.nfeatures == 5)
    ok_((data_append.targets == l12).all())
    ok_((data_append.chunks == l1).all())

    #
    # appending
    #

    # we need the same samples attributes in both datasets
    assert_raises(DatasetError, data2.append, data3)

    #
    # vstacking
    #
    if __debug__:
        # tested only in __debug__
        assert_raises(ValueError, vstack, (data0, data1, data2, data3))
    datasets = (data1, data2, data4)
    merged = vstack(datasets)
    assert_equal(merged.shape,
                 (np.sum([len(ds) for ds in datasets]), data1.nfeatures))
    assert_true('test' in merged.fa)
    assert_array_equal(merged.sa.targets, [1] * 5 + [2] * 3 + [3] * 2)

    #
    # hstacking
    #
    assert_raises(ValueError, hstack, datasets)
    datasets = (data0, data1)
    merged = hstack(datasets)
    assert_equal(merged.shape,
                 (len(data1), np.sum([ds.nfeatures for ds in datasets])))
    assert_true('chunks' in merged.sa)
    assert_array_equal(merged.fa.one, [1] * 5 + [0] * 5)
Beispiel #5
0
def test_ex_from_masked():
    ds = Dataset.from_wizard(samples=np.atleast_2d(np.arange(5)).view(myarray),
                             targets=1,
                             chunks=1)
    # simple sequence has to be a single pattern
    assert_equal(ds.nsamples, 1)
    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    # check correct pattern layout (1x5)
    assert_array_equal(ds.samples, [[0, 1, 2, 3, 4]])

    # check for single label and origin
    assert_array_equal(ds.targets, [1])
    assert_array_equal(ds.chunks, [1])

    # now try adding pattern with wrong shape
    assert_raises(
        ValueError, vstack,
        (ds, Dataset.from_wizard(np.ones((2, 3)), targets=1, chunks=1)))

    # now add two real patterns
    ds = vstack((ds,
                 Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                     targets=2,
                                     chunks=2)))
    assert_equal(ds.nsamples, 3)
    assert_array_equal(ds.targets, [1, 2, 2])
    assert_array_equal(ds.chunks, [1, 2, 2])

    # test unique class labels
    ds = vstack((ds,
                 Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                     targets=3,
                                     chunks=5)))
    assert_array_equal(ds.sa['targets'].unique, [1, 2, 3])

    # test wrong attributes length
    assert_raises(ValueError,
                  Dataset.from_wizard,
                  np.random.standard_normal((4, 2, 3, 4)),
                  targets=[1, 2, 3],
                  chunks=2)
    assert_raises(ValueError,
                  Dataset.from_wizard,
                  np.random.standard_normal((4, 2, 3, 4)),
                  targets=[1, 2, 3, 4],
                  chunks=[2, 2, 2])

    # no test one that is using from_masked
    ds = datasets['3dlarge']
    for a in ds.sa:
        assert_equal(len(ds.sa[a].value), len(ds))
    for a in ds.fa:
        assert_equal(len(ds.fa[a].value), ds.nfeatures)
Beispiel #6
0
def test_stack_add_dataset_attributes():
    data0 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data0.a['one'] = np.ones(2)
    data0.a['two'] = 2
    data0.a['three'] = 'three'
    data0.a['common'] = range(10)
    data0.a['array'] = np.arange(10)
    data1 = Dataset.from_wizard(np.ones((5, 5)), targets=1)
    data1.a['one'] = np.ones(3)
    data1.a['two'] = 3
    data1.a['four'] = 'four'
    data1.a['common'] = range(10)
    data1.a['array'] = np.arange(10)


    vstacker = lambda x: vstack((data0, data1), a=x)
    hstacker = lambda x: hstack((data0, data1), a=x)

    add_params = (1, None, 'unique', 'uniques', 'all', 'drop_nonunique')

    for stacker in (vstacker, hstacker):
        for add_param in add_params:
            if add_param == 'unique':
                assert_raises(DatasetError, stacker, add_param)
                continue

            r = stacker(add_param)

            if add_param == 1:
                assert_array_equal(data1.a.one, r.a.one)
                assert_equal(r.a.two, 3)
                assert_equal(r.a.four, 'four')
                assert_true('three' not in r.a.keys())
                assert_true('array' in r.a.keys())
            elif add_param == 'uniques':
                assert_equal(set(r.a.keys()),
                             set(['one', 'two', 'three',
                                  'four', 'common', 'array']))
                assert_equal(r.a.two, (2, 3))
                assert_equal(r.a.four, ('four',))
            elif add_param == 'all':
                assert_equal(set(r.a.keys()),
                             set(['one', 'two', 'three',
                                  'four', 'common', 'array']))
                assert_equal(r.a.two, (2, 3))
                assert_equal(r.a.three, ('three', None))
            elif add_param == 'drop_nonunique':
                assert_equal(set(r.a.keys()),
                             set(['common', 'three', 'four', 'array']))
                assert_equal(r.a.three, 'three')
                assert_equal(r.a.four, 'four')
                assert_equal(r.a.common, range(10))
                assert_array_equal(r.a.array, np.arange(10))
Beispiel #7
0
def test_labelpermutation_randomsampling():
    ds = Dataset.from_wizard(np.ones((5, 10)),     targets=range(5), chunks=1)
    for i in xrange(1, 5):
        ds.append(Dataset.from_wizard(np.ones((5, 10)) + i,
                                      targets=range(5), chunks=i+1))
    # assign some feature attributes
    ds.fa['roi'] = np.repeat(np.arange(5), 2)
    ds.fa['lucky'] = np.arange(10)%2
    # use subclass for testing if it would survive
    ds.samples = ds.samples.view(myarray)

    ok_(ds.get_nsamples_per_attr('targets') == {0:5, 1:5, 2:5, 3:5, 4:5})
    sample = ds.random_samples(2)
    ok_(sample.get_nsamples_per_attr('targets').values() == [ 2, 2, 2, 2, 2 ])
    ok_((ds.sa['chunks'].unique == range(1, 6)).all())
Beispiel #8
0
def test_labelpermutation_randomsampling():
    ds = Dataset.from_wizard(np.ones((5, 10)),     targets=range(5), chunks=1)
    for i in xrange(1, 5):
        ds.append(Dataset.from_wizard(np.ones((5, 10)) + i,
                                      targets=range(5), chunks=i+1))
    # assign some feature attributes
    ds.fa['roi'] = np.repeat(np.arange(5), 2)
    ds.fa['lucky'] = np.arange(10)%2
    # use subclass for testing if it would survive
    ds.samples = ds.samples.view(myarray)

    ok_(ds.get_nsamples_per_attr('targets') == {0:5, 1:5, 2:5, 3:5, 4:5})
    sample = ds.random_samples(2)
    ok_(sample.get_nsamples_per_attr('targets').values() == [ 2, 2, 2, 2, 2 ])
    ok_((ds.sa['chunks'].unique == range(1, 6)).all())
Beispiel #9
0
def test_origmask_extraction():
    origdata = np.random.standard_normal((10, 2, 4, 3))
    data = Dataset.from_wizard(origdata, targets=2, chunks=2)

    # check with custom mask
    sel = data[:, 5]
    ok_(sel.samples.shape[1] == 1)
Beispiel #10
0
def test_feature_masking():
    mask = np.zeros((5, 3), dtype='bool')
    mask[2, 1] = True
    mask[4, 0] = True
    data = Dataset.from_wizard(np.arange(60).reshape((4, 5, 3)),
                               targets=1, chunks=1, mask=mask)

    # check simple masking
    ok_(data.nfeatures == 2)

    # selection should be idempotent
    ok_(data[:, mask].nfeatures == data.nfeatures)
    # check that correct feature get selected
    assert_array_equal(data[:, 1].samples[:, 0], [12, 27, 42, 57])
    # XXX put back when coord -> fattr is implemented
    #ok_(tuple(data[:, 1].a.mapper.getInId(0)) == (4, 0))
    ok_(data[:, 1].a.mapper.forward1(mask).shape == (1,))

    # check sugarings
    # XXX put me back
    #self.failUnless(np.all(data.I == data.origids))
    assert_array_equal(data.C, data.chunks)
    assert_array_equal(data.UC, np.unique(data.chunks))
    assert_array_equal(data.T, data.targets)
    assert_array_equal(data.UT, np.unique(data.targets))
    assert_array_equal(data.S, data.samples)
    assert_array_equal(data.O, data.mapper.reverse(data.samples))
Beispiel #11
0
def test_labelschunks_access():
    samples = np.arange(12).reshape((4, 3)).view(myarray)
    labels = range(4)
    chunks = [1, 1, 2, 2]
    ds = Dataset.from_wizard(samples, labels, chunks)

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    assert_array_equal(ds.targets, labels)
    assert_array_equal(ds.chunks, chunks)

    # moreover they should point to the same thing
    ok_(ds.targets is ds.sa.targets)
    ok_(ds.targets is ds.sa['targets'].value)
    ok_(ds.chunks is ds.sa.chunks)
    ok_(ds.chunks is ds.sa['chunks'].value)

    # assignment should work at all levels including 1st
    ds.targets = chunks
    assert_array_equal(ds.targets, chunks)
    ok_(ds.targets is ds.sa.targets)
    ok_(ds.targets is ds.sa['targets'].value)

    # test broadcasting
    # but not for plain scalars
    assert_raises(ValueError, ds.set_attr, 'sa.bc', 5)
    # and not for plain plain str
    assert_raises(TypeError, ds.set_attr, 'sa.bc', "mike")
    # but for any iterable of len == 1
    ds.set_attr('sa.bc', (5,))
    ds.set_attr('sa.dc', ["mike"])
    assert_array_equal(ds.sa.bc, [5] * len(ds))
    assert_array_equal(ds.sa.dc, ["mike"] * len(ds))
Beispiel #12
0
 def get_data(self):
     data = np.random.standard_normal(( 100, 2, 2, 2 ))
     labels = np.concatenate( ( np.repeat( 0, 50 ),
                               np.repeat( 1, 50 ) ) )
     chunks = np.repeat( range(5), 10 )
     chunks = np.concatenate( (chunks, chunks) )
     return Dataset.from_wizard(samples=data, targets=labels, chunks=chunks)
Beispiel #13
0
def test_feature_masking():
    mask = np.zeros((5, 3), dtype='bool')
    mask[2, 1] = True
    mask[4, 0] = True
    data = Dataset.from_wizard(np.arange(60).reshape((4, 5, 3)),
                               targets=1,
                               chunks=1,
                               mask=mask)

    # check simple masking
    ok_(data.nfeatures == 2)

    # selection should be idempotent
    ok_(data[:, mask].nfeatures == data.nfeatures)
    # check that correct feature get selected
    assert_array_equal(data[:, 1].samples[:, 0], [12, 27, 42, 57])
    # XXX put back when coord -> fattr is implemented
    #ok_(tuple(data[:, 1].a.mapper.getInId(0)) == (4, 0))
    ok_(data[:, 1].a.mapper.forward1(mask).shape == (1, ))

    # check sugarings
    # XXX put me back
    #self.assertTrue(np.all(data.I == data.origids))
    assert_array_equal(data.C, data.chunks)
    assert_array_equal(data.UC, np.unique(data.chunks))
    assert_array_equal(data.T, data.targets)
    assert_array_equal(data.UT, np.unique(data.targets))
    assert_array_equal(data.S, data.samples)
    assert_array_equal(data.O, data.mapper.reverse(data.samples))
Beispiel #14
0
def test_multidim_attrs():
    samples = np.arange(24).reshape(2, 3, 4)
    # have a dataset with two samples -- mapped from 2d into 1d
    # but have 2d labels and 3d chunks -- whatever that is
    ds = Dataset.from_wizard(samples.copy(),
                             targets=samples.copy(),
                             chunks=np.random.normal(size=(2, 10, 4, 2)))
    assert_equal(ds.nsamples, 2)
    assert_equal(ds.nfeatures, 12)
    assert_equal(ds.sa.targets.shape, (2, 3, 4))
    assert_equal(ds.sa.chunks.shape, (2, 10, 4, 2))

    # try slicing
    subds = ds[0]
    assert_equal(subds.nsamples, 1)
    assert_equal(subds.nfeatures, 12)
    assert_equal(subds.sa.targets.shape, (1, 3, 4))
    assert_equal(subds.sa.chunks.shape, (1, 10, 4, 2))

    # add multidim feature attr
    fattr = ds.mapper.forward(samples)
    assert_equal(fattr.shape, (2, 12))
    # should puke -- first axis is #samples
    assert_raises(ValueError, ds.fa.__setitem__, 'moresamples', fattr)
    # but that should be fine
    ds.fa['moresamples'] = fattr.T
    assert_equal(ds.fa.moresamples.shape, (12, 2))
Beispiel #15
0
def test_origmask_extraction():
    origdata = np.random.standard_normal((10, 2, 4, 3))
    data = Dataset.from_wizard(origdata, targets=2, chunks=2)

    # check with custom mask
    sel = data[:, 5]
    ok_(sel.samples.shape[1] == 1)
Beispiel #16
0
def test_labelschunks_access():
    samples = np.arange(12).reshape((4, 3)).view(myarray)
    labels = range(4)
    chunks = [1, 1, 2, 2]
    ds = Dataset.from_wizard(samples, labels, chunks)

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    assert_array_equal(ds.targets, labels)
    assert_array_equal(ds.chunks, chunks)

    # moreover they should point to the same thing
    ok_(ds.targets is ds.sa.targets)
    ok_(ds.targets is ds.sa['targets'].value)
    ok_(ds.chunks is ds.sa.chunks)
    ok_(ds.chunks is ds.sa['chunks'].value)

    # assignment should work at all levels including 1st
    ds.targets = chunks
    assert_array_equal(ds.targets, chunks)
    ok_(ds.targets is ds.sa.targets)
    ok_(ds.targets is ds.sa['targets'].value)

    # test broadcasting
    # but not for plain scalars
    assert_raises(ValueError, ds.set_attr, 'sa.bc', 5)
    # and not for plain plain str
    assert_raises(TypeError, ds.set_attr, 'sa.bc', "mike")
    # but for any iterable of len == 1
    ds.set_attr('sa.bc', (5, ))
    ds.set_attr('sa.dc', ["mike"])
    assert_array_equal(ds.sa.bc, [5] * len(ds))
    assert_array_equal(ds.sa.dc, ["mike"] * len(ds))
Beispiel #17
0
def test_multidim_attrs():
    samples = np.arange(24).reshape(2, 3, 4)
    # have a dataset with two samples -- mapped from 2d into 1d
    # but have 2d labels and 3d chunks -- whatever that is
    ds = Dataset.from_wizard(samples.copy(),
                             targets=samples.copy(),
                             chunks=np.random.normal(size=(2,10,4,2)))
    assert_equal(ds.nsamples, 2)
    assert_equal(ds.nfeatures, 12)
    assert_equal(ds.sa.targets.shape, (2, 3, 4))
    assert_equal(ds.sa.chunks.shape, (2, 10, 4, 2))

    # try slicing
    subds = ds[0]
    assert_equal(subds.nsamples, 1)
    assert_equal(subds.nfeatures, 12)
    assert_equal(subds.sa.targets.shape, (1, 3, 4))
    assert_equal(subds.sa.chunks.shape, (1, 10, 4, 2))

    # add multidim feature attr
    fattr = ds.mapper.forward(samples)
    assert_equal(fattr.shape, (2, 12))
    # should puke -- first axis is #samples
    assert_raises(ValueError, ds.fa.__setitem__, 'moresamples', fattr)
    # but that should be fine
    ds.fa['moresamples'] = fattr.T
    assert_equal(ds.fa.moresamples.shape, (12, 2))
Beispiel #18
0
def test_samples_shape():
    ds = Dataset.from_wizard(np.ones((10, 2, 3, 4)), targets=1, chunks=1)
    ok_(ds.samples.shape == (10, 24))

    # what happens to 1D samples
    ds = Dataset(np.arange(5))
    assert_equal(ds.shape, (5, 1))
    assert_equal(ds.nfeatures, 1)
Beispiel #19
0
def test_samples_shape():
    ds = Dataset.from_wizard(np.ones((10, 2, 3, 4)), targets=1, chunks=1)
    ok_(ds.samples.shape == (10, 24))

    # what happens to 1D samples
    ds = Dataset(np.arange(5))
    assert_equal(ds.shape, (5, 1))
    assert_equal(ds.nfeatures, 1)
Beispiel #20
0
def test_ex_from_masked():
    ds = Dataset.from_wizard(samples=np.atleast_2d(np.arange(5)).view(myarray),
                             targets=1, chunks=1)
    # simple sequence has to be a single pattern
    assert_equal(ds.nsamples, 1)
    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    # check correct pattern layout (1x5)
    assert_array_equal(ds.samples, [[0, 1, 2, 3, 4]])

    # check for single label and origin
    assert_array_equal(ds.targets, [1])
    assert_array_equal(ds.chunks, [1])

    # now try adding pattern with wrong shape
    assert_raises(DatasetError, ds.append,
                  Dataset.from_wizard(np.ones((2,3)), targets=1, chunks=1))

    # now add two real patterns
    ds.append(Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                  targets=2, chunks=2))
    assert_equal(ds.nsamples, 3)
    assert_array_equal(ds.targets, [1, 2, 2])
    assert_array_equal(ds.chunks, [1, 2, 2])

    # test unique class labels
    ds.append(Dataset.from_wizard(np.random.standard_normal((2, 5)),
                                  targets=3, chunks=5))
    assert_array_equal(ds.sa['targets'].unique, [1, 2, 3])

    # test wrong attributes length
    assert_raises(ValueError, Dataset.from_wizard,
                  np.random.standard_normal((4,2,3,4)), targets=[1, 2, 3],
                  chunks=2)
    assert_raises(ValueError, Dataset.from_wizard,
                  np.random.standard_normal((4,2,3,4)), targets=[1, 2, 3, 4],
                  chunks=[2, 2, 2])

    # no test one that is using from_masked
    ds = datasets['3dlarge']
    for a in ds.sa:
        assert_equal(len(ds.sa[a].value), len(ds))
    for a in ds.fa:
        assert_equal(len(ds.fa[a].value), ds.nfeatures)
Beispiel #21
0
def test_shape_conversion():
    ds = Dataset.from_wizard(np.arange(24).reshape((2, 3, 4)).view(myarray),
                             targets=1, chunks=1)
    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    assert_equal(ds.nsamples, 2)
    assert_equal(ds.samples.shape, (2, 12))
    assert_array_equal(ds.samples, [range(12), range(12, 24)])
 def setUp(self):
     data = np.random.standard_normal((100, 3, 4, 2))
     labels = np.concatenate((np.repeat(0, 50), np.repeat(1, 50)))
     chunks = np.repeat(range(5), 10)
     chunks = np.concatenate((chunks, chunks))
     mask = np.ones((3, 4, 2), dtype="bool")
     mask[0, 0, 0] = 0
     mask[1, 3, 1] = 0
     self.dataset = Dataset.from_wizard(samples=data, targets=labels, chunks=chunks, mask=mask)
Beispiel #23
0
def test_shape_conversion():
    ds = Dataset.from_wizard(np.arange(24).reshape((2, 3, 4)).view(myarray),
                             targets=1, chunks=1)
    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    assert_equal(ds.nsamples, 2)
    assert_equal(ds.samples.shape, (2, 12))
    assert_array_equal(ds.samples, [range(12), range(12, 24)])
Beispiel #24
0
    def transform(self, ds):

        data = np.dstack([copy_matrix(array_to_matrix(a)) for a in ds.samples])
        data = np.hstack([d for d in data[:, :]]).T

        attr = self._edit_attr(ds, data.shape)

        ds_ = Dataset.from_wizard(data)
        ds_ = add_attributes(ds_, attr)

        return ds_
 def setUp(self):
     data = np.random.standard_normal((100, 3, 4, 2))
     labels = np.concatenate((np.repeat(0, 50), np.repeat(1, 50)))
     chunks = np.repeat(range(5), 10)
     chunks = np.concatenate((chunks, chunks))
     mask = np.ones((3, 4, 2), dtype='bool')
     mask[0, 0, 0] = 0
     mask[1, 3, 1] = 0
     self.dataset = Dataset.from_wizard(samples=data,
                                        targets=labels,
                                        chunks=chunks,
                                        mask=mask)
Beispiel #26
0
def test_basic_datamapping():
    samples = np.arange(24).reshape((4, 3, 2)).view(myarray)

    ds = Dataset.from_wizard(samples)

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    # mapper should end up in the dataset
    ok_(ds.a.has_key('mapper'))

    # check correct mapping
    ok_(ds.nsamples == 4)
    ok_(ds.nfeatures == 6)
Beispiel #27
0
def test_basic_datamapping():
    samples = np.arange(24).reshape((4, 3, 2)).view(myarray)

    ds = Dataset.from_wizard(samples)

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    # mapper should end up in the dataset
    ok_(ds.a.has_key('mapper'))

    # check correct mapping
    ok_(ds.nsamples == 4)
    ok_(ds.nfeatures == 6)
Beispiel #28
0
def load_mat_ds(path, subj, folder, **kwargs):

    data = load_mat_data(path, subj, folder, **kwargs)

    # load attributes
    attr = load_attributes(path, subj, folder, **kwargs)

    attr, labels = edit_attr(attr, data.shape)

    ds = Dataset.from_wizard(data, attr.targets)
    ds = add_subjectname(ds, subj)
    ds = add_attributes(ds, attr)

    #ds.fa['roi_labels'] = labels
    ds.fa['matrix_values'] = np.ones_like(data[0])

    ds.sa['chunks'] = LabelEncoder().fit_transform(ds.sa['name'])

    return ds
Beispiel #29
0
def load_mat_ds(path, subj, folder, **kwargs):   
    
        
    data = load_mat_data(path, subj, folder, **kwargs)
    
    # load attributes
    attr = load_attributes(path, subj, folder, **kwargs)
    
    attr, labels = edit_attr(attr, data.shape)
    
    
    ds = Dataset.from_wizard(data, attr.targets)
    ds = add_subjectname(ds, subj)
    ds = add_attributes(ds, attr)
    
    #ds.fa['roi_labels'] = labels
    ds.fa['matrix_values'] = np.ones_like(data[0])
    
    ds.sa['chunks'] = LabelEncoder().fit_transform(ds.sa['name'])
    
    return ds
Beispiel #30
0
def test_masked_featureselection():
    origdata = np.random.standard_normal((10, 2, 4, 3, 5)).view(myarray)
    data = Dataset.from_wizard(origdata, targets=2, chunks=2)

    unmasked = data.samples.copy()
    # array subclass survives
    ok_(isinstance(data.samples, myarray))

    # default must be no mask
    ok_(data.nfeatures == 120)
    ok_(data.a.mapper.forward1(origdata[0]).shape == (120,))

    # check that full mask uses all features
    # this uses auto-mapping of selection arrays in __getitem__
    sel = data[:, np.ones((2, 4, 3, 5), dtype='bool')]
    ok_(sel.nfeatures == data.samples.shape[1])
    ok_(data.nfeatures == 120)

    # check partial array mask
    partial_mask = np.zeros((2, 4, 3, 5), dtype='bool')
    partial_mask[0, 0, 2, 2] = 1
    partial_mask[1, 2, 2, 0] = 1

    sel = data[:, partial_mask]
    ok_(sel.nfeatures == 2)

    # check that feature selection does not change source data
    ok_(data.nfeatures == 120)
    ok_(data.a.mapper.forward1(origdata[0]).shape == (120,))

    # check selection with feature list
    sel = data[:, [0, 37, 119]]
    ok_(sel.nfeatures == 3)

    # check size of the masked samples
    ok_(sel.samples.shape == (10, 3))

    # check that the right features are selected
    assert_array_equal(unmasked[:, [0, 37, 119]], sel.samples)
Beispiel #31
0
def test_masked_featureselection():
    origdata = np.random.standard_normal((10, 2, 4, 3, 5)).view(myarray)
    data = Dataset.from_wizard(origdata, targets=2, chunks=2)

    unmasked = data.samples.copy()
    # array subclass survives
    ok_(isinstance(data.samples, myarray))

    # default must be no mask
    ok_(data.nfeatures == 120)
    ok_(data.a.mapper.forward1(origdata[0]).shape == (120, ))

    # check that full mask uses all features
    # this uses auto-mapping of selection arrays in __getitem__
    sel = data[:, np.ones((2, 4, 3, 5), dtype='bool')]
    ok_(sel.nfeatures == data.samples.shape[1])
    ok_(data.nfeatures == 120)

    # check partial array mask
    partial_mask = np.zeros((2, 4, 3, 5), dtype='bool')
    partial_mask[0, 0, 2, 2] = 1
    partial_mask[1, 2, 2, 0] = 1

    sel = data[:, partial_mask]
    ok_(sel.nfeatures == 2)

    # check that feature selection does not change source data
    ok_(data.nfeatures == 120)
    ok_(data.a.mapper.forward1(origdata[0]).shape == (120, ))

    # check selection with feature list
    sel = data[:, [0, 37, 119]]
    ok_(sel.nfeatures == 3)

    # check size of the masked samples
    ok_(sel.samples.shape == (10, 3))

    # check that the right features are selected
    assert_array_equal(unmasked[:, [0, 37, 119]], sel.samples)
Beispiel #32
0
 def _update_ds(self, ds, X, y):
     
     ds_ = Dataset.from_wizard(X)
     
     samples_difference = len(y) - len(ds.targets) 
     
     for key in ds.sa.keys():
         
         values = ds.sa[key].value      
         values_ = sample_generator(key, values, samples_difference, y)
         u, c = np.unique(values_, return_counts=True)
         logger.debug("%s - sample per key: %s" %(key, str([u,c])))
         logger.debug(values_)
         
         ds_.sa[key] = values_
         
         
     return ds_   
     
     
     
     
     
Beispiel #33
0
def generate_testing_datasets(specs):
    # Lets permute upon each invocation of test, so we could possibly
    # trigger some funny cases
    nonbogus_pool = np.random.permutation([0, 1, 3, 5])

    datasets = {}

    # use a partitioner to flag odd/even samples as training and test
    ttp = OddEvenPartitioner(space='train', count=1)

    for kind, spec in specs.iteritems():
        # set of univariate datasets
        for nlabels in [2, 3, 4]:
            basename = 'uni%d%s' % (nlabels, kind)
            nonbogus_features = nonbogus_pool[:nlabels]

            dataset = normal_feature_dataset(
                nlabels=nlabels, nonbogus_features=nonbogus_features, **spec)

            # full dataset
            datasets[basename] = list(ttp.generate(dataset))[0]

        # sample 3D
        total = 2 * spec['perlabel']
        nchunks = spec['nchunks']
        data = np.random.standard_normal((total, 3, 6, 6))
        labels = np.concatenate(
            (np.repeat(0, spec['perlabel']), np.repeat(1, spec['perlabel'])))
        data[:, 1, 0, 0] += 2 * labels  # add some signal
        chunks = np.asarray(range(nchunks) * (total // nchunks))
        mask = np.ones((3, 6, 6), dtype='bool')
        mask[0, 0, 0] = 0
        mask[1, 3, 2] = 0
        ds = Dataset.from_wizard(samples=data,
                                 targets=labels,
                                 chunks=chunks,
                                 mask=mask,
                                 space='myspace')
        # and to stress tests on manipulating sa/fa possibly containing
        # attributes of dtype object
        ds.sa['test_object'] = [['a'], [1, 2]] * (ds.nsamples // 2)
        datasets['3d%s' % kind] = ds

    # some additional datasets
    datasets['dumb2'] = dumb_feature_binary_dataset()
    datasets['dumb'] = dumb_feature_dataset()
    # dataset with few invariant features
    _dsinv = dumb_feature_dataset()
    _dsinv.samples = np.hstack((_dsinv.samples, np.zeros(
        (_dsinv.nsamples, 1)), np.ones((_dsinv.nsamples, 1))))
    datasets['dumbinv'] = _dsinv

    # Datasets for regressions testing
    datasets['sin_modulated'] = list(
        ttp.generate(multiple_chunks(sin_modulated, 4, 30, 1)))[0]
    # use the same full for training
    datasets['sin_modulated_train'] = datasets['sin_modulated']
    datasets['sin_modulated_test'] = sin_modulated(30, 1, flat=True)

    # simple signal for linear regressors
    datasets['chirp_linear'] = multiple_chunks(chirp_linear, 6, 50, 10, 2, 0.3,
                                               0.1)
    datasets['chirp_linear_test'] = chirp_linear(20, 5, 2, 0.4, 0.1)

    datasets['wr1996'] = multiple_chunks(wr1996, 4, 50)
    datasets['wr1996_test'] = wr1996(50)

    datasets['hollow'] = Dataset(HollowSamples((40, 20)),
                                 sa={'targets': np.tile(['one', 'two'], 20)})

    return datasets
Beispiel #34
0
def generate_testing_datasets(specs):
    # Lets permute upon each invocation of test, so we could possibly
    # trigger some funny cases
    nonbogus_pool = np.random.permutation([0, 1, 3, 5])

    datasets = {}

    # use a partitioner to flag odd/even samples as training and test
    ttp = OddEvenPartitioner(space='train', count=1)

    for kind, spec in specs.iteritems():
        # set of univariate datasets
        for nlabels in [ 2, 3, 4 ]:
            basename = 'uni%d%s' % (nlabels, kind)
            nonbogus_features = nonbogus_pool[:nlabels]

            dataset = normal_feature_dataset(
                nlabels=nlabels,
                nonbogus_features=nonbogus_features,
                **spec)

            # full dataset
            datasets[basename] = list(ttp.generate(dataset))[0]

        # sample 3D
        total = 2*spec['perlabel']
        nchunks = spec['nchunks']
        data = np.random.standard_normal(( total, 3, 6, 6 ))
        labels = np.concatenate( ( np.repeat( 0, spec['perlabel'] ),
                                  np.repeat( 1, spec['perlabel'] ) ) )
        data[:, 1, 0, 0] += 2*labels           # add some signal
        chunks = np.asarray(range(nchunks)*(total/nchunks))
        mask = np.ones((3, 6, 6), dtype='bool')
        mask[0, 0, 0] = 0
        mask[1, 3, 2] = 0
        ds = Dataset.from_wizard(samples=data, targets=labels, chunks=chunks,
                                 mask=mask, space='myspace')
        # and to stress tests on manipulating sa/fa possibly containing
        # attributes of dtype object
        ds.sa['test_object'] = [['a'], [1, 2]] * (ds.nsamples/2)
        datasets['3d%s' % kind] = ds


    # some additional datasets
    datasets['dumb2'] = dumb_feature_binary_dataset()
    datasets['dumb'] = dumb_feature_dataset()
    # dataset with few invariant features
    _dsinv = dumb_feature_dataset()
    _dsinv.samples = np.hstack((_dsinv.samples,
                               np.zeros((_dsinv.nsamples, 1)),
                               np.ones((_dsinv.nsamples, 1))))
    datasets['dumbinv'] = _dsinv

    # Datasets for regressions testing
    datasets['sin_modulated'] = list(ttp.generate(multiple_chunks(sin_modulated, 4, 30, 1)))[0]
    # use the same full for training
    datasets['sin_modulated_train'] = datasets['sin_modulated']
    datasets['sin_modulated_test'] = sin_modulated(30, 1, flat=True)

    # simple signal for linear regressors
    datasets['chirp_linear'] = multiple_chunks(chirp_linear, 6, 50, 10, 2, 0.3, 0.1)
    datasets['chirp_linear_test'] = chirp_linear(20, 5, 2, 0.4, 0.1)

    datasets['wr1996'] = multiple_chunks(wr1996, 4, 50)
    datasets['wr1996_test'] = wr1996(50)

    datasets['hollow'] = Dataset(HollowSamples((40,20)),
                                 sa={'targets': np.tile(['one', 'two'], 20)})

    return datasets
Beispiel #35
0
def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    pristinedata = data.copy()
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target)

    # if it is not trained it knows nothing
    cm.train(data)

    # a new mapper should appear when doing feature selection
    cm.append(StaticFeatureSelection(range(1, 16)))
    assert_equal(cm.forward1(data[0]).shape, (15,))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(StaticFeatureSelection([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # check reproduction
    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        import mvpa2
        cm_clone = eval(repr(cm))
        assert_equal('#'.join(repr(cm_clone).split('#')[:-1]),
                     '#'.join(repr(cm).split('#')[:-1]))
    else:
        cm_clone = eval(repr(cm))
        assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    cm.train(data)
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)

    # Lets construct a dataset with mapper assigned and see
    # if sub-selecting a feature adjusts trailing StaticFeatureSelection
    # appropriately
    ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1]
    tail_sfs = ds_subsel.a.mapper[-1]
    assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')
Beispiel #36
0
def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target)

    # if it is not trained it knows nothing
    cm.train(data)

    # a new mapper should appear when doing feature selection
    cm.append(StaticFeatureSelection(list(range(1, 16))))
    assert_equal(cm.forward1(data[0]).shape, (15,))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(StaticFeatureSelection([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # check reproduction
    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        import mvpa2
        cm_clone = eval(repr(cm))
        assert_equal('#'.join(repr(cm_clone).split('#')[:-1]),
                     '#'.join(repr(cm).split('#')[:-1]))
    else:
        cm_clone = eval(repr(cm))
        assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    cm.train(data)
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)

    # Lets construct a dataset with mapper assigned and see
    # if sub-selecting a feature adjusts trailing StaticFeatureSelection
    # appropriately
    ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1]
    tail_sfs = ds_subsel.a.mapper[-1]
    assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')