Ejemplo n.º 1
0
def test_icamapper():
    # data: 40 sample feature line in 2d space (40x2; samples x features)
    samples = np.vstack([np.arange(40.) for i in range(2)]).T
    samples -= samples.mean()
    samples +=  np.random.normal(size=samples.shape, scale=0.1)
    ndlin = Dataset(samples)

    pm = ICAMapper()
    pm.train(ndlin.copy())
    assert_equal(pm.proj.shape, (2, 2))

    p = pm.forward(ndlin.copy())
    assert_equal(p.shape, (40, 2))
    # check that the mapped data can be fully recovered by 'reverse()'
    assert_array_almost_equal(pm.reverse(p), ndlin)
Ejemplo n.º 2
0
def test_icamapper():
    # data: 40 sample feature line in 2d space (40x2; samples x features)
    samples = np.vstack([np.arange(40.) for i in range(2)]).T
    samples -= samples.mean()
    samples += np.random.normal(size=samples.shape, scale=0.1)
    ndlin = Dataset(samples)

    pm = ICAMapper()
    pm.train(ndlin.copy())
    assert_equal(pm.proj.shape, (2, 2))

    p = pm.forward(ndlin.copy())
    assert_equal(p.shape, (40, 2))
    # check that the mapped data can be fully recovered by 'reverse()'
    assert_array_almost_equal(pm.reverse(p), ndlin)
Ejemplo n.º 3
0
def test_icamapper():
    # data: 40 sample feature line in 2d space (40x2; samples x features)
    samples = np.vstack([np.arange(40.) for i in range(2)]).T
    samples -= samples.mean()
    samples +=  np.random.normal(size=samples.shape, scale=0.1)
    ndlin = Dataset(samples)

    pm = ICAMapper()
    try:
        pm.train(ndlin.copy())
        assert_equal(pm.proj.shape, (2, 2))
        p = pm.forward(ndlin.copy())
        assert_equal(p.shape, (40, 2))
        # check that the mapped data can be fully recovered by 'reverse()'
        assert_array_almost_equal(pm.reverse(p), ndlin)
    except mdp.NodeException:
        # do not puke if the ICA did not converge at all -- that is not our
        # fault but MDP's
        pass
Ejemplo n.º 4
0
def test_pcamapper():
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    ndlin = Dataset(np.concatenate([np.arange(40)
                               for i in range(20)]).reshape(20,-1).T)

    pm = PCAMapper()
    # train PCA
    assert_raises(mdp.NodeException, pm.train, ndlin)
    ndlin.samples = ndlin.samples.astype('float')
    ndlin_noise = ndlin.copy()
    ndlin_noise.samples += np.random.random(size=ndlin.samples.shape)
    # we have no variance for more than one PCA component, hence just one
    # actual non-zero eigenvalue
    assert_raises(mdp.NodeException, pm.train, ndlin)
    pm.train(ndlin_noise)
    assert_equal(pm.proj.shape, (20, 20))
    # now project data into PCA space
    p = pm.forward(ndlin.samples)
    assert_equal(p.shape, (40, 20))
    # check that the mapped data can be fully recovered by 'reverse()'
    assert_array_almost_equal(pm.reverse(p), ndlin)
Ejemplo n.º 5
0
def test_pcamapper():
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    ndlin = Dataset(
        np.concatenate([np.arange(40) for i in range(20)]).reshape(20, -1).T)

    pm = PCAMapper()
    # train PCA
    assert_raises(mdp.NodeException, pm.train, ndlin)
    ndlin.samples = ndlin.samples.astype('float')
    ndlin_noise = ndlin.copy()
    ndlin_noise.samples += np.random.random(size=ndlin.samples.shape)
    # we have no variance for more than one PCA component, hence just one
    # actual non-zero eigenvalue
    assert_raises(mdp.NodeException, pm.train, ndlin)
    pm.train(ndlin_noise)
    assert_equal(pm.proj.shape, (20, 20))
    # now project data into PCA space
    p = pm.forward(ndlin.samples)
    assert_equal(p.shape, (40, 20))
    # check that the mapped data can be fully recovered by 'reverse()'
    assert_array_almost_equal(pm.reverse(p), ndlin)
Ejemplo n.º 6
0
def test_from_wizard():
    samples = np.arange(12).reshape((4, 3)).view(myarray)
    labels = range(4)
    chunks = [1, 1, 2, 2]

    ds = Dataset(samples, sa={'targets': labels, 'chunks': chunks})
    ds.init_origids('both')
    first = ds.sa.origids
    # now do again and check that they get regenerated
    ds.init_origids('both')
    assert_false(first is ds.sa.origids)
    assert_array_equal(first, ds.sa.origids)

    ok_(is_datasetlike(ds))
    ok_(not is_datasetlike(labels))

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    ## XXX stuff that needs thought:

    # ds.sa (empty) has this in the public namespace:
    #   add, get, getvalue, has_key, is_set, items, listing, name, names
    #   owner, remove, reset, setvalue, which_set
    # maybe we need some form of leightweightCollection?

    assert_array_equal(ds.samples, samples)
    assert_array_equal(ds.sa.targets, labels)
    assert_array_equal(ds.sa.chunks, chunks)

    # same should work for shortcuts
    assert_array_equal(ds.targets, labels)
    assert_array_equal(ds.chunks, chunks)

    ok_(sorted(ds.sa.keys()) == ['chunks', 'origids', 'targets'])
    ok_(sorted(ds.fa.keys()) == ['origids'])
    # add some more
    ds.a['random'] = 'blurb'

    # check stripping attributes from a copy
    cds = ds.copy() # full copy
    ok_(sorted(cds.sa.keys()) == ['chunks', 'origids', 'targets'])
    ok_(sorted(cds.fa.keys()) == ['origids'])
    ok_(sorted(cds.a.keys()) == ['random'])
    cds = ds.copy(sa=[], fa=[], a=[]) # plain copy
    ok_(cds.sa.keys() == [])
    ok_(cds.fa.keys() == [])
    ok_(cds.a.keys() == [])
    cds = ds.copy(sa=['targets'], fa=None, a=['random']) # partial copy
    ok_(cds.sa.keys() == ['targets'])
    ok_(cds.fa.keys() == ['origids'])
    ok_(cds.a.keys() == ['random'])

    # there is not necessarily a mapper present
    ok_(not ds.a.has_key('mapper'))

    # has to complain about misshaped samples attributes
    assert_raises(ValueError, Dataset.from_wizard, samples, labels + labels)

    # check that we actually have attributes of the expected type
    ok_(isinstance(ds.sa['targets'], ArrayCollectable))

    # the dataset will take care of not adding stupid stuff
    assert_raises(ValueError, ds.sa.__setitem__, 'stupid', np.arange(3))
    assert_raises(ValueError, ds.fa.__setitem__, 'stupid', np.arange(4))
    # or change proper attributes to stupid shapes
    try:
        ds.sa.targets = np.arange(3)
    except ValueError:
        pass
    else:
        ok_(False, msg="Assigning value with improper shape to attribute "
                       "did not raise exception.")