def test_icamapper(): # data: 40 sample feature line in 2d space (40x2; samples x features) samples = np.vstack([np.arange(40.) for i in range(2)]).T samples -= samples.mean() samples += np.random.normal(size=samples.shape, scale=0.1) ndlin = Dataset(samples) pm = ICAMapper() pm.train(ndlin.copy()) assert_equal(pm.proj.shape, (2, 2)) p = pm.forward(ndlin.copy()) assert_equal(p.shape, (40, 2)) # check that the mapped data can be fully recovered by 'reverse()' assert_array_almost_equal(pm.reverse(p), ndlin)
def test_icamapper(): # data: 40 sample feature line in 2d space (40x2; samples x features) samples = np.vstack([np.arange(40.) for i in range(2)]).T samples -= samples.mean() samples += np.random.normal(size=samples.shape, scale=0.1) ndlin = Dataset(samples) pm = ICAMapper() try: pm.train(ndlin.copy()) assert_equal(pm.proj.shape, (2, 2)) p = pm.forward(ndlin.copy()) assert_equal(p.shape, (40, 2)) # check that the mapped data can be fully recovered by 'reverse()' assert_array_almost_equal(pm.reverse(p), ndlin) except mdp.NodeException: # do not puke if the ICA did not converge at all -- that is not our # fault but MDP's pass
def test_pcamapper(): # data: 40 sample feature line in 20d space (40x20; samples x features) ndlin = Dataset(np.concatenate([np.arange(40) for i in range(20)]).reshape(20,-1).T) pm = PCAMapper() # train PCA assert_raises(mdp.NodeException, pm.train, ndlin) ndlin.samples = ndlin.samples.astype('float') ndlin_noise = ndlin.copy() ndlin_noise.samples += np.random.random(size=ndlin.samples.shape) # we have no variance for more than one PCA component, hence just one # actual non-zero eigenvalue assert_raises(mdp.NodeException, pm.train, ndlin) pm.train(ndlin_noise) assert_equal(pm.proj.shape, (20, 20)) # now project data into PCA space p = pm.forward(ndlin.samples) assert_equal(p.shape, (40, 20)) # check that the mapped data can be fully recovered by 'reverse()' assert_array_almost_equal(pm.reverse(p), ndlin)
def test_pcamapper(): # data: 40 sample feature line in 20d space (40x20; samples x features) ndlin = Dataset( np.concatenate([np.arange(40) for i in range(20)]).reshape(20, -1).T) pm = PCAMapper() # train PCA assert_raises(mdp.NodeException, pm.train, ndlin) ndlin.samples = ndlin.samples.astype('float') ndlin_noise = ndlin.copy() ndlin_noise.samples += np.random.random(size=ndlin.samples.shape) # we have no variance for more than one PCA component, hence just one # actual non-zero eigenvalue assert_raises(mdp.NodeException, pm.train, ndlin) pm.train(ndlin_noise) assert_equal(pm.proj.shape, (20, 20)) # now project data into PCA space p = pm.forward(ndlin.samples) assert_equal(p.shape, (40, 20)) # check that the mapped data can be fully recovered by 'reverse()' assert_array_almost_equal(pm.reverse(p), ndlin)
def test_from_wizard(): samples = np.arange(12).reshape((4, 3)).view(myarray) labels = range(4) chunks = [1, 1, 2, 2] ds = Dataset(samples, sa={'targets': labels, 'chunks': chunks}) ds.init_origids('both') first = ds.sa.origids # now do again and check that they get regenerated ds.init_origids('both') assert_false(first is ds.sa.origids) assert_array_equal(first, ds.sa.origids) ok_(is_datasetlike(ds)) ok_(not is_datasetlike(labels)) # array subclass survives ok_(isinstance(ds.samples, myarray)) ## XXX stuff that needs thought: # ds.sa (empty) has this in the public namespace: # add, get, getvalue, has_key, is_set, items, listing, name, names # owner, remove, reset, setvalue, which_set # maybe we need some form of leightweightCollection? assert_array_equal(ds.samples, samples) assert_array_equal(ds.sa.targets, labels) assert_array_equal(ds.sa.chunks, chunks) # same should work for shortcuts assert_array_equal(ds.targets, labels) assert_array_equal(ds.chunks, chunks) ok_(sorted(ds.sa.keys()) == ['chunks', 'origids', 'targets']) ok_(sorted(ds.fa.keys()) == ['origids']) # add some more ds.a['random'] = 'blurb' # check stripping attributes from a copy cds = ds.copy() # full copy ok_(sorted(cds.sa.keys()) == ['chunks', 'origids', 'targets']) ok_(sorted(cds.fa.keys()) == ['origids']) ok_(sorted(cds.a.keys()) == ['random']) cds = ds.copy(sa=[], fa=[], a=[]) # plain copy ok_(cds.sa.keys() == []) ok_(cds.fa.keys() == []) ok_(cds.a.keys() == []) cds = ds.copy(sa=['targets'], fa=None, a=['random']) # partial copy ok_(cds.sa.keys() == ['targets']) ok_(cds.fa.keys() == ['origids']) ok_(cds.a.keys() == ['random']) # there is not necessarily a mapper present ok_(not ds.a.has_key('mapper')) # has to complain about misshaped samples attributes assert_raises(ValueError, Dataset.from_wizard, samples, labels + labels) # check that we actually have attributes of the expected type ok_(isinstance(ds.sa['targets'], ArrayCollectable)) # the dataset will take care of not adding stupid stuff assert_raises(ValueError, ds.sa.__setitem__, 'stupid', np.arange(3)) assert_raises(ValueError, ds.fa.__setitem__, 'stupid', np.arange(4)) # or change proper attributes to stupid shapes try: ds.sa.targets = np.arange(3) except ValueError: pass else: ok_(False, msg="Assigning value with improper shape to attribute " "did not raise exception.")