Beispiel #1
0
    def setUp(self):
        # data: 40 sample feature line in 20d space (40x20; samples x features)
        self.ndlin = Dataset(samples=N.concatenate(
                        [N.arange(40) for i in range(20)]).reshape(20,-1).T, labels=1, chunks=1)

        # data: 10 sample feature line in 40d space
        #       (10x40; samples x features)
        self.largefeat = Dataset(samples=N.concatenate(
                        [N.arange(10) for i in range(40)]).reshape(40,-1).T, labels=1, chunks=1)

        self.pm = PCAMapper()
Beispiel #2
0
class PCAMapperTests(unittest.TestCase):

    def setUp(self):
        # data: 40 sample feature line in 20d space (40x20; samples x features)
        self.ndlin = Dataset(samples=N.concatenate(
                        [N.arange(40) for i in range(20)]).reshape(20,-1).T, labels=1, chunks=1)

        # data: 10 sample feature line in 40d space
        #       (10x40; samples x features)
        self.largefeat = Dataset(samples=N.concatenate(
                        [N.arange(10) for i in range(40)]).reshape(40,-1).T, labels=1, chunks=1)

        self.pm = PCAMapper()


    def testSimplePCA(self):
        # train PCA
        self.pm.train(self.ndlin)

        self.failUnlessEqual(self.pm.mix.shape, (20, 20))

        # now project data into PCA space
        p = self.pm.forward(self.ndlin.samples)

        # only first eigenvalue significant
        self.failUnless(self.pm.sv[:1] > 1.0)
        self.failUnless((self.pm.sv[1:] < 0.0001).all())

        # only variance of first component significant
        var = p.var(axis=0)

        # test that only one component has variance
        self.failUnless(var[:1] > 1.0)
        self.failUnless((var[1:] < 0.0001).all())

        # check that the mapped data can be fully recovered by 'reverse()'
        self.failUnless((N.round(self.pm.reverse(p)) == self.ndlin.samples).all())


    def testAutoOptimizePCA(self):
        # train PCA
        self.pm.train(self.largefeat)

        # mixing matrix cannot be square
#        self.failUnlessEqual(self.pm.mix.shape, (10, 40))

        # only first eigenvalue significant
        self.failUnless(self.pm.sv[:1] > 10)
        self.failUnless((self.pm.sv[1:] < 10).all())

        # now project data into PCA space
        p = self.pm.forward(self.largefeat.samples)

        # only variance of first component significant
        var = p.var(axis=0)
        # test that only one component has variance
        self.failUnless(var[:1] > 1.0)
        self.failUnless((var[1:] < 0.0001).all())

        # check that the mapped data can be fully recovered by 'reverse()'
        rp = self.pm.reverse(p)
        self.failUnlessEqual(rp.shape, self.largefeat.samples.shape)
        self.failUnless((N.round(rp) == self.largefeat.samples).all())

        self.failUnlessEqual(self.pm.getInSize(), 40)
#        self.failUnlessEqual(self.pm.getOutSize(), 10)
        self.failUnlessEqual(self.pm.getOutSize(), 40)

        # copy mapper
        pm2 = deepcopy(self.pm)

        # now remove all but the first 2 components from the mapper
        pm2.selectOut([0,1])

        # sanity check
        self.failUnlessEqual(pm2.getInSize(), 40)
        self.failUnlessEqual(pm2.getOutSize(), 2)

        # but orginal mapper must be left intact
        self.failUnlessEqual(self.pm.getInSize(), 40)
#        self.failUnlessEqual(self.pm.getOutSize(), 10)
        self.failUnlessEqual(self.pm.getOutSize(), 40)

        # data should still be fully recoverable by 'reverse()'
        rp2 = pm2.reverse(p[:,[0,1]])
        self.failUnlessEqual(rp2.shape, self.largefeat.samples.shape)
        self.failUnless((N.round(rp2) == self.largefeat.samples).all())