def test_reflection(self, rep=10): for i in range(rep): from mvpa2.testing.datasets import get_random_rotation d = np.random.random((100, 2)) T = get_random_rotation(d.shape[1]) d2 = np.dot(d, T) # scale it up a bit d2 *= 1.2 # add a reflection by flipping the first dimension d2[:, 0] *= -1 ds = dataset_wizard(samples=d, targets=d2) norm0 = np.linalg.norm(d - d2) mapper = ProcrusteanMapper(scaling=False, reflection=False) mapper.train(ds) norm1 = np.linalg.norm(d2 - mapper.forward(ds).samples) eps = 1e-7 self.assertLess(norm1, norm0 + eps, msg='Procrustes should reduce difference, ' 'but %f > %f' % (norm1, norm0)) mapper = ProcrusteanMapper(scaling=True, reflection=False) mapper.train(ds) norm2 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess(norm2, norm1 + eps, msg='Procrustes with scaling should work better, ' 'but %f > %f' % (norm2, norm1)) mapper = ProcrusteanMapper(scaling=False, reflection=True) mapper.train(ds) norm3 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess( norm3, norm1 + eps, msg='Procrustes with reflection should work better, ' 'but %f > %f' % (norm3, norm1)) mapper = ProcrusteanMapper(scaling=True, reflection=True) mapper.train(ds) norm4 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess(norm4, norm3 + eps, msg='Procrustes with scaling should work better, ' 'but %f > %f' % (norm4, norm3)) self.assertLess( norm4, norm2 + eps, msg='Procrustes with reflection should work better, ' 'but %f > %f' % (norm4, norm2))
def test_reflection(self, rep=10): for i in range(rep): from mvpa2.testing.datasets import get_random_rotation d = np.random.random((100, 2)) T = get_random_rotation(d.shape[1]) d2 = np.dot(d, T) # scale it up a bit d2 *= 1.2 # add a reflection by flipping the first dimension d2[:, 0] *= -1 ds = dataset_wizard(samples=d, targets=d2) norm0 = np.linalg.norm(d - d2) mapper = ProcrusteanMapper(scaling=False, reflection=False) mapper.train(ds) norm1 = np.linalg.norm(d2 - mapper.forward(ds).samples) eps = 1e-7 self.assertLess(norm1, norm0 + eps, msg='Procrustes should reduce difference, ' 'but %f > %f' % (norm1, norm0)) mapper = ProcrusteanMapper(scaling=True, reflection=False) mapper.train(ds) norm2 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess(norm2, norm1 + eps, msg='Procrustes with scaling should work better, ' 'but %f > %f' % (norm2, norm1)) mapper = ProcrusteanMapper(scaling=False, reflection=True) mapper.train(ds) norm3 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess(norm3, norm1 + eps, msg='Procrustes with reflection should work better, ' 'but %f > %f' % (norm3, norm1)) mapper = ProcrusteanMapper(scaling=True, reflection=True) mapper.train(ds) norm4 = np.linalg.norm(d2 - mapper.forward(ds).samples) self.assertLess(norm4, norm3 + eps, msg='Procrustes with scaling should work better, ' 'but %f > %f' % (norm4, norm3)) self.assertLess(norm4, norm2 + eps, msg='Procrustes with reflection should work better, ' 'but %f > %f' % (norm4, norm2))
def test_basic_functioning(self, ref_ds, zscore_common): # get a dataset with some prominent trends in it ds4l = datasets['uni4large'] # lets select for now only meaningful features ds_orig = ds4l[:, ds4l.a.nonbogus_features] nf = ds_orig.nfeatures n = 5 # # of datasets to generate Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \ = [], [], [], [], [] # now lets compose derived datasets by using some random # rotation(s) for i in xrange(n): R = get_random_rotation(ds_orig.nfeatures) Rs.append(R) ds_ = ds_orig.copy() # reusing random data from dataset itself random_scales += [ds_orig.samples[i, 3] * 100] random_shifts += [ds_orig.samples[i+10] * 10] random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]] ds_.samples = np.dot(ds_orig.samples, R) * random_scales[-1] \ + random_shifts[-1] dss_rotated_clean.append(ds_) ds_ = ds_.copy() ds_.samples = ds_.samples + 0.1 * random_noise dss_rotated.append(ds_) ha = Hyperalignment(ref_ds=ref_ds, zscore_common=zscore_common) if ref_ds is None: ref_ds = 0 # by default should be this one # Lets test two scenarios -- in one with no noise -- we should get # close to perfect reconstruction. If noise was added -- not so good for noisy, dss in ((False, dss_rotated_clean), (True, dss_rotated)): mappers = ha(dss) self.failUnlessEqual(ref_ds, ha.ca.choosen_ref_ds) # Map data back dss_clean_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated_clean)] ds_norm = np.linalg.norm(dss[ref_ds].samples) nddss = [] ndcss = [] ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \ * random_scales[ref_ds] \ + random_shifts[ref_ds] for ds_back in dss_clean_back: # if we used zscoring of common, we cannot rely # that range/offset could be matched, so lets use # corrcoef ndcs = np.diag(np.corrcoef(ds_back.samples.T, ds_orig_Rref.T)[nf:, :nf], k=0) ndcss += [ndcs] dds = ds_back.samples - ds_orig_Rref ndds = np.linalg.norm(dds) / ds_norm nddss += [ndds] if not noisy or cfg.getboolean('tests', 'labile', default='yes'): # First compare correlations self.failUnless(np.all(np.array(ndcss) >= (0.9, 0.85)[int(noisy)]), msg="Should have reconstructed original dataset more or" " less. Got correlations %s in %s case." % (ndcss, ('clean', 'noisy')[int(noisy)])) if not zscore_common: # only reasonable without zscoring self.failUnless(np.all(np.array(nddss) <= (1e-10, 1e-2)[int(noisy)]), msg="Should have reconstructed original dataset more or" " less. Got normed differences %s in %s case." % (nddss, ('clean', 'noisy')[int(noisy)])) # Lets see how well we do if asked to compute residuals ha = Hyperalignment(ref_ds=ref_ds, level2_niter=2, enable_ca=['residual_errors']) mappers = ha(dss_rotated_clean) self.failUnless(np.all(ha.ca.residual_errors.sa.levels == ['1', '2:0', '2:1', '3'])) rerrors = ha.ca.residual_errors.samples # just basic tests: self.failUnlessEqual(rerrors[0, ref_ds], 0) self.failUnlessEqual(rerrors.shape, (4, n)) pass
def test_simple(self, svd, oblique): d_orig = datasets['uni2large'].samples d_orig2 = datasets['uni4large'].samples for sdim, nf_s, nf_t, full_test \ in (('Same 2D', 2, 2, True), ('Same 10D', 10, 10, True), ('2D -> 3D', 2, 3, True), ('3D -> 2D', 3, 2, False)): # figure out some "random" rotation d = max(nf_s, nf_t) R = get_random_rotation(nf_s, nf_t, d_orig) if nf_s == nf_t: adR = np.abs(1.0 - np.linalg.det(R)) self.assertTrue(adR < 1e-10, "Determinant of rotation matrix should " "be 1. Got it 1+%g" % adR) self.assertTrue(norm(np.dot(R, R.T) - np.eye(R.shape[0])) < 1e-10) for (s, scaling), demean in itertools.product( ((0.3, True), (1.0, False)), (False, True)): pm = ProcrusteanMapper(scaling=scaling, oblique=oblique, svd=svd, demean=demean) # pm2 = ProcrusteanMapper(scaling=scaling, oblique=oblique) if demean: t1, t2 = d_orig[23, 1], d_orig[22, 1] else: t1, t2 = 0, 0 full_test = False # although runs, not intended to perform properly # Create source/target data d = d_orig[:, :nf_s] d_s = d + t1 d_t = np.dot(s * d, R) + t2 # train bloody mapper(s) ds = dataset_wizard(samples=d_s, targets=d_t) pm.train(ds) ## not possible with new interface #pm2.train(d_s, d_t) ## verify that both created the same transformation #npm2proj = norm(pm.proj - pm2.proj) #self.assertTrue(npm2proj <= 1e-10, # msg="Got transformation different by norm %g." # " Had to be less than 1e-10" % npm2proj) #self.assertTrue(norm(pm._offset_in - pm2._offset_in) <= 1e-10) #self.assertTrue(norm(pm._offset_out - pm2._offset_out) <= 1e-10) # do forward transformation on the same source data d_s_f = pm.forward(d_s) self.assertEqual(d_s_f.shape, d_t.shape, msg="Mapped shape should be identical to the d_t") dsf = d_s_f - d_t ndsf = norm(dsf)/norm(d_t) if full_test: dsR = norm(s*R - pm.proj) if not oblique: self.assertTrue(dsR <= 1e-12, msg="We should have got reconstructed rotation+scaling " "perfectly. Now got d scale*R=%g" % dsR) self.assertTrue(np.abs(s - pm._scale) < 1e-12, msg="We should have got reconstructed scale " "perfectly. Now got %g for %g" % (pm._scale, s)) self.assertTrue(ndsf <= 1e-12, msg="%s: Failed to get to the target space correctly." " normed error=%g" % (sdim, ndsf)) # Test if we get back d_s_f_r = pm.reverse(d_s_f) # Test if recon proj is true inverse except for high->low projection if nf_s <= nf_t: assert_almost_equal(np.dot(pm._proj, pm._recon),np.eye(pm._proj.shape[0]), err_msg="Deviation from identity matrix is too large") dsfr = d_s_f_r - d_s ndsfr = norm(dsfr)/norm(d_s) if full_test: self.assertTrue(ndsfr <= 1e-12, msg="%s: Failed to reconstruct into source space correctly." " normed error=%g" % (sdim, ndsfr))
def test_simple(self, svd, oblique): d_orig = datasets['uni2large'].samples d_orig2 = datasets['uni4large'].samples for sdim, nf_s, nf_t, full_test \ in (('Same 2D', 2, 2, True), ('Same 10D', 10, 10, True), ('2D -> 3D', 2, 3, True), ('3D -> 2D', 3, 2, False)): # figure out some "random" rotation d = max(nf_s, nf_t) R = get_random_rotation(nf_s, nf_t, d_orig) if nf_s == nf_t: adR = np.abs(1.0 - np.linalg.det(R)) self.assertTrue( adR < 1e-10, "Determinant of rotation matrix should " "be 1. Got it 1+%g" % adR) self.assertTrue( norm(np.dot(R, R.T) - np.eye(R.shape[0])) < 1e-10) for (s, scaling), demean in itertools.product( ((0.3, True), (1.0, False)), (False, True)): pm = ProcrusteanMapper(scaling=scaling, oblique=oblique, svd=svd, demean=demean) # pm2 = ProcrusteanMapper(scaling=scaling, oblique=oblique) if demean: t1, t2 = d_orig[23, 1], d_orig[22, 1] else: t1, t2 = 0, 0 full_test = False # although runs, not intended to perform properly # Create source/target data d = d_orig[:, :nf_s] d_s = d + t1 d_t = np.dot(s * d, R) + t2 # train bloody mapper(s) ds = dataset_wizard(samples=d_s, targets=d_t) pm.train(ds) ## not possible with new interface #pm2.train(d_s, d_t) ## verify that both created the same transformation #npm2proj = norm(pm.proj - pm2.proj) #self.assertTrue(npm2proj <= 1e-10, # msg="Got transformation different by norm %g." # " Had to be less than 1e-10" % npm2proj) #self.assertTrue(norm(pm._offset_in - pm2._offset_in) <= 1e-10) #self.assertTrue(norm(pm._offset_out - pm2._offset_out) <= 1e-10) # do forward transformation on the same source data d_s_f = pm.forward(d_s) self.assertEqual( d_s_f.shape, d_t.shape, msg="Mapped shape should be identical to the d_t") dsf = d_s_f - d_t ndsf = norm(dsf) / norm(d_t) if full_test: dsR = norm(s * R - pm.proj) if not oblique: self.assertTrue( dsR <= 1e-12, msg= "We should have got reconstructed rotation+scaling " "perfectly. Now got d scale*R=%g" % dsR) self.assertTrue( np.abs(s - pm._scale) < 1e-12, msg="We should have got reconstructed scale " "perfectly. Now got %g for %g" % (pm._scale, s)) self.assertTrue( ndsf <= 1e-12, msg="%s: Failed to get to the target space correctly." " normed error=%g" % (sdim, ndsf)) # Test if we get back d_s_f_r = pm.reverse(d_s_f) # Test if recon proj is true inverse except for high->low projection if nf_s <= nf_t: assert_almost_equal( np.dot(pm._proj, pm._recon), np.eye(pm._proj.shape[0]), err_msg="Deviation from identity matrix is too large") dsfr = d_s_f_r - d_s ndsfr = norm(dsfr) / norm(d_s) if full_test: self.assertTrue( ndsfr <= 1e-12, msg= "%s: Failed to reconstruct into source space correctly." " normed error=%g" % (sdim, ndsfr))
def test_basic_functioning(self, ref_ds, zscore_common, zscore_all): ha = Hyperalignment(ref_ds=ref_ds, zscore_all=zscore_all, zscore_common=zscore_common) if ref_ds is None: ref_ds = 0 # by default should be this one # get a dataset with some prominent trends in it ds4l = datasets['uni4large'] # lets select for now only meaningful features ds_orig = ds4l[:, ds4l.a.nonbogus_features] nf = ds_orig.nfeatures n = 4 # # of datasets to generate Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \ = [], [], [], [], [] # now lets compose derived datasets by using some random # rotation(s) for i in xrange(n): ## if False: # i == ref_ds: # # Do not rotate the target space so we could check later on # # if we transform back nicely # R = np.eye(ds_orig.nfeatures) ## else: R = get_random_rotation(ds_orig.nfeatures) Rs.append(R) ds_ = ds_orig.copy() # reusing random data from dataset itself random_scales += [ds_orig.samples[i, 3] * 100] random_shifts += [ds_orig.samples[i+10] * 10] random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]] ds_.samples = np.dot(ds_orig.samples, R) * random_scales[-1] \ + random_shifts[-1] ## if (zscore_common or zscore_all): ## # for later on testing of "precise" reconstruction ## zscore(ds_, chunks_attr=None) dss_rotated_clean.append(ds_) ds_ = ds_.copy() ds_.samples = ds_.samples + 0.1 * random_noise dss_rotated.append(ds_) # Lets test two scenarios -- in one with no noise -- we should get # close to perfect reconstruction. If noise was added -- not so good for noisy, dss in ((False, dss_rotated_clean), (True, dss_rotated)): # to verify that original datasets didn't get changed by # Hyperalignment store their idhashes of samples idhashes = [idhash(ds.samples) for ds in dss] idhashes_targets = [idhash(ds.targets) for ds in dss] mappers = ha(dss) idhashes_ = [idhash(ds.samples) for ds in dss] idhashes_targets_ = [idhash(ds.targets) for ds in dss] self.assertEqual(idhashes, idhashes_, msg="Hyperalignment must not change original data.") self.assertEqual(idhashes_targets, idhashes_targets_, msg="Hyperalignment must not change original data targets.") self.assertEqual(ref_ds, ha.ca.choosen_ref_ds) # Map data back dss_clean_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated_clean)] ds_norm = np.linalg.norm(dss[ref_ds].samples) nddss = [] ndcss = [] ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \ * random_scales[ref_ds] \ + random_shifts[ref_ds] if zscore_common or zscore_all: zscore(Dataset(ds_orig_Rref), chunks_attr=None) for ds_back in dss_clean_back: # if we used zscoring of common, we cannot rely # that range/offset could be matched, so lets use # corrcoef ndcs = np.diag(np.corrcoef(ds_back.samples.T, ds_orig_Rref.T)[nf:, :nf], k=0) ndcss += [ndcs] dds = ds_back.samples - ds_orig_Rref ndds = np.linalg.norm(dds) / ds_norm nddss += [ndds] snoisy = ('clean', 'noisy')[int(noisy)] do_labile = cfg.getboolean('tests', 'labile', default='yes') if not noisy or do_labile: # First compare correlations self.assertTrue(np.all(np.array(ndcss) >= (0.9, 0.85)[int(noisy)]), msg="Should have reconstructed original dataset more or" " less. Got correlations %s in %s case." % (ndcss, snoisy)) if not (zscore_all or zscore_common): # if we didn't zscore -- all of them should be really close self.assertTrue(np.all(np.array(nddss) <= (1e-10, 1e-1)[int(noisy)]), msg="Should have reconstructed original dataset well " "without zscoring. Got normed differences %s in %s case." % (nddss, snoisy)) elif do_labile: # otherwise they all should be somewhat close #print snoisy, ref_ds, nddss self.assertTrue(np.all(np.array(nddss) >= nddss[ref_ds]), msg="Should have reconstructed orig_ds best of all. " "Got normed differences %s in %s case with ref_ds=%d." % (nddss, snoisy, ref_ds)) self.assertTrue(np.all(np.array(nddss) <= (.2, 3)[int(noisy)]), msg="Should have reconstructed original dataset more or" " less for all. Got normed differences %s in %s case." % (nddss, snoisy)) self.assertTrue(np.all(nddss[ref_ds] <= .05), msg="Should have reconstructed original dataset quite " "well even with zscoring. Got normed differences %s " "in %s case." % (nddss, snoisy)) # Lets see how well we do if asked to compute residuals ha = Hyperalignment(ref_ds=ref_ds, level2_niter=2, enable_ca=['residual_errors']) mappers = ha(dss_rotated_clean) self.assertTrue(np.all(ha.ca.residual_errors.sa.levels == ['1', '2:0', '2:1', '3'])) rerrors = ha.ca.residual_errors.samples # just basic tests: self.assertEqual(rerrors[0, ref_ds], 0) self.assertEqual(rerrors.shape, (4, n))