Example #1
0
    def test_reflection(self, rep=10):
        for i in range(rep):
            from mvpa2.testing.datasets import get_random_rotation
            d = np.random.random((100, 2))
            T = get_random_rotation(d.shape[1])
            d2 = np.dot(d, T)
            # scale it up a bit
            d2 *= 1.2
            # add a reflection by flipping the first dimension
            d2[:, 0] *= -1
            ds = dataset_wizard(samples=d, targets=d2)

            norm0 = np.linalg.norm(d - d2)

            mapper = ProcrusteanMapper(scaling=False, reflection=False)
            mapper.train(ds)
            norm1 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            eps = 1e-7
            self.assertLess(norm1,
                            norm0 + eps,
                            msg='Procrustes should reduce difference, '
                            'but %f > %f' % (norm1, norm0))

            mapper = ProcrusteanMapper(scaling=True, reflection=False)
            mapper.train(ds)
            norm2 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            self.assertLess(norm2,
                            norm1 + eps,
                            msg='Procrustes with scaling should work better, '
                            'but %f > %f' % (norm2, norm1))

            mapper = ProcrusteanMapper(scaling=False, reflection=True)
            mapper.train(ds)
            norm3 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            self.assertLess(
                norm3,
                norm1 + eps,
                msg='Procrustes with reflection should work better, '
                'but %f > %f' % (norm3, norm1))

            mapper = ProcrusteanMapper(scaling=True, reflection=True)
            mapper.train(ds)
            norm4 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            self.assertLess(norm4,
                            norm3 + eps,
                            msg='Procrustes with scaling should work better, '
                            'but %f > %f' % (norm4, norm3))
            self.assertLess(
                norm4,
                norm2 + eps,
                msg='Procrustes with reflection should work better, '
                'but %f > %f' % (norm4, norm2))
Example #2
0
    def test_reflection(self, rep=10):
        for i in range(rep):
            from mvpa2.testing.datasets import get_random_rotation
            d = np.random.random((100, 2))
            T = get_random_rotation(d.shape[1])
            d2 = np.dot(d, T)
            # scale it up a bit
            d2 *= 1.2
            # add a reflection by flipping the first dimension
            d2[:, 0] *= -1
            ds = dataset_wizard(samples=d, targets=d2)

            norm0 = np.linalg.norm(d - d2)

            mapper = ProcrusteanMapper(scaling=False, reflection=False)
            mapper.train(ds)
            norm1 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            eps = 1e-7
            self.assertLess(norm1, norm0 + eps,
                            msg='Procrustes should reduce difference, '
                            'but %f > %f' % (norm1, norm0))

            mapper = ProcrusteanMapper(scaling=True, reflection=False)
            mapper.train(ds)
            norm2 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            self.assertLess(norm2, norm1 + eps,
                            msg='Procrustes with scaling should work better, '
                            'but %f > %f' % (norm2, norm1))

            mapper = ProcrusteanMapper(scaling=False, reflection=True)
            mapper.train(ds)
            norm3 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            self.assertLess(norm3, norm1 + eps,
                            msg='Procrustes with reflection should work better, '
                            'but %f > %f' % (norm3, norm1))

            mapper = ProcrusteanMapper(scaling=True, reflection=True)
            mapper.train(ds)
            norm4 = np.linalg.norm(d2 - mapper.forward(ds).samples)
            self.assertLess(norm4, norm3 + eps,
                            msg='Procrustes with scaling should work better, '
                            'but %f > %f' % (norm4, norm3))
            self.assertLess(norm4, norm2 + eps,
                            msg='Procrustes with reflection should work better, '
                            'but %f > %f' % (norm4, norm2))
Example #3
0
    def test_basic_functioning(self, ref_ds, zscore_common):
        # get a dataset with some prominent trends in it
        ds4l = datasets['uni4large']
        # lets select for now only meaningful features
        ds_orig = ds4l[:, ds4l.a.nonbogus_features]
        nf = ds_orig.nfeatures
        n = 5 # # of datasets to generate
        Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \
            = [], [], [], [], []
        # now lets compose derived datasets by using some random
        # rotation(s)
        for i in xrange(n):
            R = get_random_rotation(ds_orig.nfeatures)
            Rs.append(R)
            ds_ = ds_orig.copy()
            # reusing random data from dataset itself
            random_scales += [ds_orig.samples[i, 3] * 100]
            random_shifts += [ds_orig.samples[i+10] * 10]
            random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]]
            ds_.samples = np.dot(ds_orig.samples, R) * random_scales[-1] \
                          + random_shifts[-1]
            dss_rotated_clean.append(ds_)

            ds_ = ds_.copy()
            ds_.samples = ds_.samples + 0.1 * random_noise
            dss_rotated.append(ds_)

        ha = Hyperalignment(ref_ds=ref_ds, zscore_common=zscore_common)
        if ref_ds is None:
            ref_ds = 0                      # by default should be this one
        # Lets test two scenarios -- in one with no noise -- we should get
        # close to perfect reconstruction.  If noise was added -- not so good
        for noisy, dss in ((False, dss_rotated_clean),
                           (True, dss_rotated)):
            mappers = ha(dss)
            self.failUnlessEqual(ref_ds, ha.ca.choosen_ref_ds)
            # Map data back

            dss_clean_back = [m.forward(ds_)
                              for m, ds_ in zip(mappers, dss_rotated_clean)]

            ds_norm = np.linalg.norm(dss[ref_ds].samples)
            nddss = []
            ndcss = []
            ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \
                           * random_scales[ref_ds] \
                           + random_shifts[ref_ds]
            for ds_back in dss_clean_back:
                # if we used zscoring of common, we cannot rely
                # that range/offset could be matched, so lets use
                # corrcoef
                ndcs = np.diag(np.corrcoef(ds_back.samples.T,
                                           ds_orig_Rref.T)[nf:, :nf], k=0)
                ndcss += [ndcs]
                dds = ds_back.samples - ds_orig_Rref
                ndds = np.linalg.norm(dds) / ds_norm
                nddss += [ndds]
            if not noisy or cfg.getboolean('tests', 'labile', default='yes'):
                # First compare correlations
                self.failUnless(np.all(np.array(ndcss)
                                       >= (0.9, 0.85)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less. Got correlations %s in %s case."
                        % (ndcss, ('clean', 'noisy')[int(noisy)]))
                if not zscore_common:
                    # only reasonable without zscoring
                    self.failUnless(np.all(np.array(nddss)
                                           <= (1e-10, 1e-2)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less. Got normed differences %s in %s case."
                        % (nddss, ('clean', 'noisy')[int(noisy)]))

        # Lets see how well we do if asked to compute residuals
        ha = Hyperalignment(ref_ds=ref_ds, level2_niter=2,
                            enable_ca=['residual_errors'])
        mappers = ha(dss_rotated_clean)
        self.failUnless(np.all(ha.ca.residual_errors.sa.levels ==
                              ['1', '2:0', '2:1', '3']))
        rerrors = ha.ca.residual_errors.samples
        # just basic tests:
        self.failUnlessEqual(rerrors[0, ref_ds], 0)
        self.failUnlessEqual(rerrors.shape, (4, n))
        pass
Example #4
0
    def test_simple(self, svd, oblique):
        d_orig = datasets['uni2large'].samples
        d_orig2 = datasets['uni4large'].samples
        for sdim, nf_s, nf_t, full_test \
                in (('Same 2D',  2,  2,  True),
                    ('Same 10D', 10, 10, True),
                    ('2D -> 3D', 2,  3,  True),
                    ('3D -> 2D', 3,  2,  False)):
            # figure out some "random" rotation
            d = max(nf_s, nf_t)
            R = get_random_rotation(nf_s, nf_t, d_orig)
            if nf_s == nf_t:
                adR = np.abs(1.0 - np.linalg.det(R))
                self.assertTrue(adR < 1e-10,
                                "Determinant of rotation matrix should "
                                "be 1. Got it 1+%g" % adR)
                self.assertTrue(norm(np.dot(R, R.T)
                                     - np.eye(R.shape[0])) < 1e-10)
            for (s, scaling), demean in itertools.product(
                    ((0.3, True), (1.0, False)),
                    (False, True)):
                pm = ProcrusteanMapper(scaling=scaling, oblique=oblique,
                                       svd=svd, demean=demean)
                # pm2 = ProcrusteanMapper(scaling=scaling, oblique=oblique)
                if demean:
                    t1, t2 = d_orig[23, 1], d_orig[22, 1]
                else:
                    t1, t2 = 0, 0
                    full_test = False # although runs, not intended to perform properly

                # Create source/target data
                d = d_orig[:, :nf_s]
                d_s = d + t1
                d_t = np.dot(s * d, R) + t2

                # train bloody mapper(s)
                ds = dataset_wizard(samples=d_s, targets=d_t)
                pm.train(ds)
                ## not possible with new interface
                #pm2.train(d_s, d_t)

                ## verify that both created the same transformation
                #npm2proj = norm(pm.proj - pm2.proj)
                #self.assertTrue(npm2proj <= 1e-10,
                #                msg="Got transformation different by norm %g."
                #                " Had to be less than 1e-10" % npm2proj)
                #self.assertTrue(norm(pm._offset_in - pm2._offset_in) <= 1e-10)
                #self.assertTrue(norm(pm._offset_out - pm2._offset_out) <= 1e-10)

                # do forward transformation on the same source data
                d_s_f = pm.forward(d_s)

                self.assertEqual(d_s_f.shape, d_t.shape,
                                 msg="Mapped shape should be identical to the d_t")

                dsf = d_s_f - d_t
                ndsf = norm(dsf)/norm(d_t)
                if full_test:
                    dsR = norm(s*R - pm.proj)

                    if not oblique:
                        self.assertTrue(dsR <= 1e-12,
                                        msg="We should have got reconstructed rotation+scaling "
                                            "perfectly. Now got d scale*R=%g" % dsR)

                        self.assertTrue(np.abs(s - pm._scale) < 1e-12,
                                        msg="We should have got reconstructed scale "
                                            "perfectly. Now got %g for %g" % (pm._scale, s))

                    self.assertTrue(ndsf <= 1e-12,
                                    msg="%s: Failed to get to the target space correctly."
                                        " normed error=%g" % (sdim, ndsf))

                # Test if we get back
                d_s_f_r = pm.reverse(d_s_f)
                # Test if recon proj is true inverse except for high->low projection
                if nf_s <= nf_t:
                    assert_almost_equal(np.dot(pm._proj, pm._recon),np.eye(pm._proj.shape[0]),
                                             err_msg="Deviation from identity matrix is too large")
                dsfr = d_s_f_r - d_s
                ndsfr = norm(dsfr)/norm(d_s)
                if full_test:
                    self.assertTrue(ndsfr <= 1e-12,
                                    msg="%s: Failed to reconstruct into source space correctly."
                                        " normed error=%g" % (sdim, ndsfr))
Example #5
0
    def test_simple(self, svd, oblique):
        d_orig = datasets['uni2large'].samples
        d_orig2 = datasets['uni4large'].samples
        for sdim, nf_s, nf_t, full_test \
                in (('Same 2D',  2,  2,  True),
                    ('Same 10D', 10, 10, True),
                    ('2D -> 3D', 2,  3,  True),
                    ('3D -> 2D', 3,  2,  False)):
            # figure out some "random" rotation
            d = max(nf_s, nf_t)
            R = get_random_rotation(nf_s, nf_t, d_orig)
            if nf_s == nf_t:
                adR = np.abs(1.0 - np.linalg.det(R))
                self.assertTrue(
                    adR < 1e-10, "Determinant of rotation matrix should "
                    "be 1. Got it 1+%g" % adR)
                self.assertTrue(
                    norm(np.dot(R, R.T) - np.eye(R.shape[0])) < 1e-10)
            for (s, scaling), demean in itertools.product(
                ((0.3, True), (1.0, False)), (False, True)):
                pm = ProcrusteanMapper(scaling=scaling,
                                       oblique=oblique,
                                       svd=svd,
                                       demean=demean)
                # pm2 = ProcrusteanMapper(scaling=scaling, oblique=oblique)
                if demean:
                    t1, t2 = d_orig[23, 1], d_orig[22, 1]
                else:
                    t1, t2 = 0, 0
                    full_test = False  # although runs, not intended to perform properly

                # Create source/target data
                d = d_orig[:, :nf_s]
                d_s = d + t1
                d_t = np.dot(s * d, R) + t2

                # train bloody mapper(s)
                ds = dataset_wizard(samples=d_s, targets=d_t)
                pm.train(ds)
                ## not possible with new interface
                #pm2.train(d_s, d_t)

                ## verify that both created the same transformation
                #npm2proj = norm(pm.proj - pm2.proj)
                #self.assertTrue(npm2proj <= 1e-10,
                #                msg="Got transformation different by norm %g."
                #                " Had to be less than 1e-10" % npm2proj)
                #self.assertTrue(norm(pm._offset_in - pm2._offset_in) <= 1e-10)
                #self.assertTrue(norm(pm._offset_out - pm2._offset_out) <= 1e-10)

                # do forward transformation on the same source data
                d_s_f = pm.forward(d_s)

                self.assertEqual(
                    d_s_f.shape,
                    d_t.shape,
                    msg="Mapped shape should be identical to the d_t")

                dsf = d_s_f - d_t
                ndsf = norm(dsf) / norm(d_t)
                if full_test:
                    dsR = norm(s * R - pm.proj)

                    if not oblique:
                        self.assertTrue(
                            dsR <= 1e-12,
                            msg=
                            "We should have got reconstructed rotation+scaling "
                            "perfectly. Now got d scale*R=%g" % dsR)

                        self.assertTrue(
                            np.abs(s - pm._scale) < 1e-12,
                            msg="We should have got reconstructed scale "
                            "perfectly. Now got %g for %g" % (pm._scale, s))

                    self.assertTrue(
                        ndsf <= 1e-12,
                        msg="%s: Failed to get to the target space correctly."
                        " normed error=%g" % (sdim, ndsf))

                # Test if we get back
                d_s_f_r = pm.reverse(d_s_f)
                # Test if recon proj is true inverse except for high->low projection
                if nf_s <= nf_t:
                    assert_almost_equal(
                        np.dot(pm._proj, pm._recon),
                        np.eye(pm._proj.shape[0]),
                        err_msg="Deviation from identity matrix is too large")
                dsfr = d_s_f_r - d_s
                ndsfr = norm(dsfr) / norm(d_s)
                if full_test:
                    self.assertTrue(
                        ndsfr <= 1e-12,
                        msg=
                        "%s: Failed to reconstruct into source space correctly."
                        " normed error=%g" % (sdim, ndsfr))
Example #6
0
    def test_basic_functioning(self, ref_ds, zscore_common, zscore_all):
        ha = Hyperalignment(ref_ds=ref_ds,
                            zscore_all=zscore_all,
                            zscore_common=zscore_common)
        if ref_ds is None:
            ref_ds = 0                      # by default should be this one

        # get a dataset with some prominent trends in it
        ds4l = datasets['uni4large']
        # lets select for now only meaningful features
        ds_orig = ds4l[:, ds4l.a.nonbogus_features]
        nf = ds_orig.nfeatures
        n = 4 # # of datasets to generate
        Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \
            = [], [], [], [], []

        # now lets compose derived datasets by using some random
        # rotation(s)
        for i in xrange(n):
            ## if False: # i == ref_ds:
            #     # Do not rotate the target space so we could check later on
            #     # if we transform back nicely
            #     R = np.eye(ds_orig.nfeatures)
            ## else:
            R = get_random_rotation(ds_orig.nfeatures)

            Rs.append(R)
            ds_ = ds_orig.copy()
            # reusing random data from dataset itself
            random_scales += [ds_orig.samples[i, 3] * 100]
            random_shifts += [ds_orig.samples[i+10] * 10]
            random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]]
            ds_.samples = np.dot(ds_orig.samples, R) * random_scales[-1] \
                          + random_shifts[-1]

            ## if (zscore_common or zscore_all):
            ##     # for later on testing of "precise" reconstruction
            ##     zscore(ds_, chunks_attr=None)

            dss_rotated_clean.append(ds_)

            ds_ = ds_.copy()
            ds_.samples = ds_.samples + 0.1 * random_noise
            dss_rotated.append(ds_)

        # Lets test two scenarios -- in one with no noise -- we should get
        # close to perfect reconstruction.  If noise was added -- not so good
        for noisy, dss in ((False, dss_rotated_clean),
                           (True, dss_rotated)):
            # to verify that original datasets didn't get changed by
            # Hyperalignment store their idhashes of samples
            idhashes = [idhash(ds.samples) for ds in dss]
            idhashes_targets = [idhash(ds.targets) for ds in dss]

            mappers = ha(dss)

            idhashes_ = [idhash(ds.samples) for ds in dss]
            idhashes_targets_ = [idhash(ds.targets) for ds in dss]
            self.assertEqual(idhashes, idhashes_,
                msg="Hyperalignment must not change original data.")
            self.assertEqual(idhashes_targets, idhashes_targets_,
                msg="Hyperalignment must not change original data targets.")

            self.assertEqual(ref_ds, ha.ca.choosen_ref_ds)

            # Map data back

            dss_clean_back = [m.forward(ds_)
                              for m, ds_ in zip(mappers, dss_rotated_clean)]

            ds_norm = np.linalg.norm(dss[ref_ds].samples)
            nddss = []
            ndcss = []
            ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \
                           * random_scales[ref_ds] \
                           + random_shifts[ref_ds]
            if zscore_common or zscore_all:
                zscore(Dataset(ds_orig_Rref), chunks_attr=None)
            for ds_back in dss_clean_back:
                # if we used zscoring of common, we cannot rely
                # that range/offset could be matched, so lets use
                # corrcoef
                ndcs = np.diag(np.corrcoef(ds_back.samples.T,
                                           ds_orig_Rref.T)[nf:, :nf], k=0)
                ndcss += [ndcs]
                dds = ds_back.samples - ds_orig_Rref
                ndds = np.linalg.norm(dds) / ds_norm
                nddss += [ndds]
            snoisy = ('clean', 'noisy')[int(noisy)]
            do_labile = cfg.getboolean('tests', 'labile', default='yes')
            if not noisy or do_labile:
                # First compare correlations
                self.assertTrue(np.all(np.array(ndcss)
                                       >= (0.9, 0.85)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less. Got correlations %s in %s case."
                        % (ndcss, snoisy))
                if not (zscore_all or zscore_common):
                    # if we didn't zscore -- all of them should be really close
                    self.assertTrue(np.all(np.array(nddss)
                                       <= (1e-10, 1e-1)[int(noisy)]),
                        msg="Should have reconstructed original dataset well "
                        "without zscoring. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                elif do_labile:
                    # otherwise they all should be somewhat close
                    #print snoisy, ref_ds,  nddss
                    self.assertTrue(np.all(np.array(nddss) >= nddss[ref_ds]),
                        msg="Should have reconstructed orig_ds best of all. "
                        "Got normed differences %s in %s case with ref_ds=%d."
                        % (nddss, snoisy, ref_ds))
                    self.assertTrue(np.all(np.array(nddss)
                                           <= (.2, 3)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less for all. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                    self.assertTrue(np.all(nddss[ref_ds] <= .05),
                        msg="Should have reconstructed original dataset quite "
                        "well even with zscoring. Got normed differences %s "
                        "in %s case." % (nddss, snoisy))

        # Lets see how well we do if asked to compute residuals
        ha = Hyperalignment(ref_ds=ref_ds, level2_niter=2,
                            enable_ca=['residual_errors'])
        mappers = ha(dss_rotated_clean)
        self.assertTrue(np.all(ha.ca.residual_errors.sa.levels ==
                              ['1', '2:0', '2:1', '3']))
        rerrors = ha.ca.residual_errors.samples
        # just basic tests:
        self.assertEqual(rerrors[0, ref_ds], 0)
        self.assertEqual(rerrors.shape, (4, n))