def testCompareToZscore(self): """Test by comparing to results of elderly z-score function """ for ds in self.dss: ds1 = deepcopy(ds) ds2 = deepcopy(ds) zsm = ZScoreMapper() zsm.train(ds1) ds1z = zsm.forward(ds1.samples) zscore(ds2, perchunk=False) self.failUnless(N.linalg.norm(ds1z - ds2.samples) < 1e-12) self.failUnless((ds1.samples == ds.samples).all(), msg="It seems we modified original dataset!") ds0 = zsm.reverse(ds1z) self.failUnless(N.linalg.norm(ds0 - ds.samples) < 1e-12, msg="Can't reconstruct from z-scores")
def testZScoring(self): """Test z-scoring transformation """ # dataset: mean=2, std=1 samples = N.array((0, 1, 3, 4, 2, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2)).reshape((16, 1)) data = Dataset(samples=samples, labels=range(16), chunks=[0] * 16) self.failUnlessEqual(data.samples.mean(), 2.0) self.failUnlessEqual(data.samples.std(), 1.0) zscore(data, perchunk=True) # check z-scoring check = N.array([-2, -1, 1, 2, 0, 0, 1, -1, -1, 1, 1, -1, 0, 0, 0, 0], dtype="float64").reshape(16, 1) self.failUnless((data.samples == check).all()) data = Dataset(samples=samples, labels=range(16), chunks=[0] * 16) zscore(data, perchunk=False) self.failUnless((data.samples == check).all()) # check z-scoring taking set of labels as a baseline data = Dataset(samples=samples, labels=[0, 2, 2, 2, 1] + [2] * 11, chunks=[0] * 16) zscore(data, baselinelabels=[0, 1]) self.failUnless((samples == data.samples + 1.0).all())
# respective labels, i.e. samples with same labels are plotted # in adjacent columns/rows. # Note, that the first and largest group corresponds to the # 'rest' condition in the dataset P.subplot(122) plotSamplesDistance(ds, sortbyattr='labels') P.title('Sample distances (sorted by labels)') if cfg.getboolean('examples', 'interactive', True): P.show() # z-score features individually per chunk print 'Detrending data' detrend(ds, perchunk=True, model='regress', polyord=2) print 'Z-Scoring data' zscore(ds) P.subplot(121) plotSamplesDistance(ds, sortbyattr='chunks') P.title('Distances: z-scored, detrended (sorted by chunks)') P.subplot(122) plotSamplesDistance(ds, sortbyattr='labels') P.title('Distances: z-scored, detrended (sorted by labels)') if cfg.getboolean('examples', 'interactive', True): P.show() # XXX add some more, maybe show effect of preprocessing """ Outputs of the example script. Data prior to preprocessing