Exemple #1
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]] * n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [
         random_affine_transformation(dss_rotated[i]) for i in xrange(n)
     ]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [
         m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)
     ]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
         "reconstructed SVD dataset. Got correlations %s." % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(np.corrcoef(sd.samples.T,
                                    ds_orig.samples.T)[nf_true:, :nf_true],
                        k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
                     msg="Expected original dimensions after "
                     "SVD. Got correlations %s." % sv_corrs_orig)
 def get_testdata(self):
     # get a dataset with some prominent trends in it
     ds4l = datasets['uni4large']
     # lets select for now only meaningful features
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     zscore(ds_orig, chunks_attr=None)
     n = 4  # # of datasets to generate
     Rs, dss_rotated, dss_rotated_clean = [], [], []
     # now lets compose derived datasets by using some random
     # rotation(s)
     while len(dss_rotated_clean) < n:
         ds_ = random_affine_transformation(ds_orig,
                                            scale_fac=1.0,
                                            shift_fac=0.)
         if ds_.a.random_scale <= 0:
             continue
         Rs.append(ds_.a.random_rotation)
         zscore(ds_, chunks_attr=None)
         dss_rotated_clean.append(ds_)
         i = len(dss_rotated_clean) - 1
         ds_2 = hstack(
             [ds_, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]])
         zscore(ds_2, chunks_attr=None)
         dss_rotated.append(ds_2)
     return ds_orig, dss_rotated, dss_rotated_clean, Rs
def test_random_affine_transformation():
    ds = Dataset.from_wizard(np.random.randn(8, 3, 2))
    ds_d = random_affine_transformation(ds)
    # compare original to the inverse of the distortion using reported
    # parameters
    assert_array_almost_equal(
        np.dot((ds_d.samples - ds_d.a.random_shift) / ds_d.a.random_scale,
               ds_d.a.random_rotation.T), ds.samples)
Exemple #4
0
def test_random_affine_transformation():
    ds = Dataset.from_wizard(np.random.randn(8,3,2))
    ds_d = random_affine_transformation(ds)
    # compare original to the inverse of the distortion using reported
    # parameters
    assert_array_almost_equal(
        np.dot((ds_d.samples - ds_d.a.random_shift) / ds_d.a.random_scale,
               ds_d.a.random_rotation.T),
        ds.samples)
Exemple #5
0
def test_timesegments_classification():
    # TODO: RF our construction of fake datasets for testing hyperalignment
    # so we could reuse it here and test classification performance
    ds_orig = datasets['uni4large']
    n = 3
    dss = [ds_orig.copy(deep=True) for i in range(n)]

    def nohyper(dss):
        return [IdentityMapper() for ds in dss]

    # clean case, assume "nohyper" which would be by default
    errors = timesegments_classification(dss)
    for ds in dss:
        # must not add any attribute, such as subjects
        assert ('subjects' not in ds.sa)
    assert_array_equal(errors, 0)

    # very noisy case -- we must not be able to classify anything reasonably
    dss_noisy = [ds.copy() for ds in dss]
    for ds in dss_noisy:
        ds.samples = np.random.normal(size=ds.samples.shape)
    errors_nonoverlapping = timesegments_classification(
        dss_noisy, nohyper, overlapping_windows=False)
    assert (np.all(errors_nonoverlapping <= 1.))
    assert (np.all(0.75 <= errors_nonoverlapping))

    errors_overlapping = timesegments_classification(dss_noisy, nohyper)
    # nononverlapping error should be less for random result
    assert_array_lequal(np.mean(errors_nonoverlapping),
                        np.mean(errors_overlapping))

    # now the ultimate test with real hyperalignment on when we don't need much
    # of it anyways

    #import pdb; pdb.set_trace()
    dss_rotated = [
        random_affine_transformation(ds_orig, scale_fac=100, shift_fac=10)
        for _ in dss
    ]
    errors_hyper = timesegments_classification(dss_rotated, Hyperalignment())
    # Hyperalignment must not screw up and rotated and classify perfectly
    # since we didn't add any noise whatsoever
    assert_array_equal(errors, 0)
Exemple #6
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     # not sure yet why on windows only is not precise
     cmp_ = assert_array_equal if (not on_windows) else assert_array_almost_equal
     [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)]  # "Mappers differ when using nproc>1."
     cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
Exemple #7
0
    def test_hypal_michael_caused_problem(self):
        from mvpa2.misc import data_generators
        from mvpa2.mappers.zscore import zscore
        # Fake data
        ds = data_generators.normal_feature_dataset(nfeatures=20)
        ds_all = [data_generators.random_affine_transformation(ds) for i in range(3)]
        _ = [zscore(sd, chunks_attr=None) for sd in ds_all]
        # Making random data per subject for testing with bias added to first subject
        ds_test = [np.random.rand(1, ds.nfeatures) for i in range(len(ds_all))]
        ds_test[0] += np.arange(1, ds.nfeatures + 1) * 100
        assert(np.corrcoef(ds_test[2], ds_test[1])[0, 1] < 0.99)  # that would have been rudiculous if it was

        # Test with varying alpha so we for sure to not have that issue now
        for alpha in (0, 0.01, 0.5, 0.99, 1.0):
            hyper09 = Hyperalignment(alpha=alpha)
            mappers = hyper09([sd for sd in ds_all])
            ds_test_a = [m.forward(sd) for m, sd in zip(mappers, ds_test)]
            ds_test_a = [mappers[0].reverse(sd) for sd in ds_test_a]
            corr = np.corrcoef(ds_test_a[2], ds_test_a[1])[0, 1]
            assert(corr < 0.99)
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     self.assertTrue(
         np.all([np.array_equal(m.proj, mp.proj)
                for m, mp in zip(mappers, mappers_nproc)]),
         msg="Mappers differ when using nproc>1.")
     assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
    def test_hypal_michael_caused_problem(self):
        from mvpa2.misc import data_generators
        from mvpa2.mappers.zscore import zscore
        # Fake data
        ds = data_generators.normal_feature_dataset(nfeatures=20)
        ds_all = [data_generators.random_affine_transformation(ds) for i in range(3)]
        _ = [zscore(sd, chunks_attr=None) for sd in ds_all]
        # Making random data per subject for testing with bias added to first subject
        ds_test = [np.random.rand(1, ds.nfeatures) for i in range(len(ds_all))]
        ds_test[0] += np.arange(1, ds.nfeatures + 1) * 100
        assert(np.corrcoef(ds_test[2], ds_test[1])[0, 1] < 0.99)  # that would have been ridiculous if it was

        # Test with varying alpha so we for sure to not have that issue now
        for alpha in (0, 0.01, 0.5, 0.99, 1.0):
            hyper09 = Hyperalignment(alpha=alpha)
            mappers = hyper09([sd for sd in ds_all])
            ds_test_a = [m.forward(sd) for m, sd in zip(mappers, ds_test)]
            ds_test_a = [mappers[0].reverse(sd) for sd in ds_test_a]
            corr = np.corrcoef(ds_test_a[2], ds_test_a[1])[0, 1]
            assert(corr < 0.99)
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     self.assertTrue(
         np.all([np.array_equal(m.proj, mp.proj)
                for m, mp in zip(mappers, mappers_nproc)]),
         msg="Mappers differ when using nproc>1.")
     assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
def test_timesegments_classification():
    # TODO: RF our construction of fake datasets for testing hyperalignment
    # so we could reuse it here and test classification performance
    ds_orig = datasets['uni4large']
    n = 3
    dss = [ds_orig.copy(deep=True) for i in xrange(n)]

    def nohyper(dss):
        return [IdentityMapper() for ds in dss]

    # clean case, assume "nohyper" which would be by default
    errors = timesegments_classification(dss)
    for ds in dss:
        # must not add any attribute, such as subjects
        assert('subjects' not in ds.sa)
    assert_array_equal(errors, 0)

    # very noisy case -- we must not be able to classify anything reasonably
    dss_noisy = [ds.copy() for ds in dss]
    for ds in dss_noisy:
        ds.samples = np.random.normal(size=ds.samples.shape)
    errors_nonoverlapping = timesegments_classification(dss_noisy, nohyper,
                                                        overlapping_windows=False)
    assert(np.all(errors_nonoverlapping <= 1.))
    assert(np.all(0.85 <= errors_nonoverlapping))

    errors_overlapping = timesegments_classification(dss_noisy, nohyper)
    # nononverlapping error should be less for random result
    assert_array_lequal(np.mean(errors_nonoverlapping), np.mean(errors_overlapping))

    # now the ultimate test with real hyperalignment on when we don't need much
    # of it anyways

    #import pdb; pdb.set_trace()
    dss_rotated = [random_affine_transformation(ds_orig, scale_fac=100, shift_fac=10)
                   for _ in dss]
    errors_hyper = timesegments_classification(dss_rotated, Hyperalignment())
    # Hyperalignment must not screw up and rotated and classify perfectly
    # since we didn't add any noise whatsoever
    assert_array_equal(errors, 0)
 def get_testdata(self):
     # get a dataset with some prominent trends in it
     ds4l = datasets['uni4large']
     # lets select for now only meaningful features
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     zscore(ds_orig, chunks_attr=None)
     n = 4  # # of datasets to generate
     Rs, dss_rotated, dss_rotated_clean = [], [], []
     # now lets compose derived datasets by using some random
     # rotation(s)
     while len(dss_rotated_clean) < n:
         ds_ = random_affine_transformation(ds_orig, scale_fac=1.0, shift_fac=0.)
         if ds_.a.random_scale <= 0:
             continue
         Rs.append(ds_.a.random_rotation)
         zscore(ds_, chunks_attr=None)
         dss_rotated_clean.append(ds_)
         i = len(dss_rotated_clean) - 1
         ds_2 = hstack([ds_, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]])
         zscore(ds_2, chunks_attr=None)
         dss_rotated.append(ds_2)
     return ds_orig, dss_rotated, dss_rotated_clean, Rs
Exemple #13
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [
         random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
         for i in range(4)
     ]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     # not sure yet why on windows only is not precise
     cmp_ = assert_array_equal if (
         not on_windows) else assert_array_almost_equal
     [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)
      ]  # "Mappers differ when using nproc>1."
     cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
Exemple #14
0
    def test_basic_functioning(self, ref_ds, zscore_common, zscore_all):
        ha = Hyperalignment(ref_ds=ref_ds,
                            zscore_all=zscore_all,
                            zscore_common=zscore_common)
        if ref_ds is None:
            ref_ds = 0  # by default should be this one

        # get a dataset with some prominent trends in it
        ds4l = datasets['uni4large']
        # lets select for now only meaningful features
        ds_orig = ds4l[:, ds4l.a.nonbogus_features]
        nf = ds_orig.nfeatures
        n = 4  # # of datasets to generate
        Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \
            = [], [], [], [], []

        # now lets compose derived datasets by using some random
        # rotation(s)
        for i in xrange(n):
            ## if False: # i == ref_ds:
            #     # Do not rotate the target space so we could check later on
            #     # if we transform back nicely
            #     R = np.eye(ds_orig.nfeatures)
            ## else:
            ds_ = random_affine_transformation(ds_orig,
                                               scale_fac=100,
                                               shift_fac=10)
            Rs.append(ds_.a.random_rotation)
            # reusing random data from dataset itself
            random_scales += [ds_.a.random_scale]
            random_shifts += [ds_.a.random_shift]
            random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]]

            ## if (zscore_common or zscore_all):
            ##     # for later on testing of "precise" reconstruction
            ##     zscore(ds_, chunks_attr=None)

            dss_rotated_clean.append(ds_)

            ds_ = ds_.copy()
            ds_.samples = ds_.samples + 0.1 * random_noise
            dss_rotated.append(ds_)

        # Lets test two scenarios -- in one with no noise -- we should get
        # close to perfect reconstruction.  If noise was added -- not so good
        for noisy, dss in ((False, dss_rotated_clean), (True, dss_rotated)):
            # to verify that original datasets didn't get changed by
            # Hyperalignment store their idhashes of samples
            idhashes = [idhash(ds.samples) for ds in dss]
            idhashes_targets = [idhash(ds.targets) for ds in dss]

            mappers = ha(dss)

            idhashes_ = [idhash(ds.samples) for ds in dss]
            idhashes_targets_ = [idhash(ds.targets) for ds in dss]
            self.assertEqual(
                idhashes,
                idhashes_,
                msg="Hyperalignment must not change original data.")
            self.assertEqual(
                idhashes_targets,
                idhashes_targets_,
                msg="Hyperalignment must not change original data targets.")

            self.assertEqual(ref_ds, ha.ca.chosen_ref_ds)

            # Map data back

            dss_clean_back = [
                m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated_clean)
            ]

            ds_norm = np.linalg.norm(dss[ref_ds].samples)
            nddss = []
            ndcss = []
            ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \
                * random_scales[ref_ds] \
                + random_shifts[ref_ds]
            if zscore_common or zscore_all:
                zscore(Dataset(ds_orig_Rref), chunks_attr=None)
            for ds_back in dss_clean_back:
                # if we used zscoring of common, we cannot rely
                # that range/offset could be matched, so lets use
                # corrcoef
                ndcs = np.diag(np.corrcoef(ds_back.samples.T,
                                           ds_orig_Rref.T)[nf:, :nf],
                               k=0)
                ndcss += [ndcs]
                dds = ds_back.samples - ds_orig_Rref
                ndds = np.linalg.norm(dds) / ds_norm
                nddss += [ndds]
            snoisy = ('clean', 'noisy')[int(noisy)]
            do_labile = cfg.getboolean('tests', 'labile', default='yes')
            if not noisy or do_labile:
                # First compare correlations
                self.assertTrue(
                    np.all(np.array(ndcss) >= (0.9, 0.85)[int(noisy)]),
                    msg="Should have reconstructed original dataset more or"
                    " less. Got correlations %s in %s case." % (ndcss, snoisy))
                if not (zscore_all or zscore_common):
                    # if we didn't zscore -- all of them should be really close
                    self.assertTrue(
                        np.all(np.array(nddss) <= (1e-10, 1e-1)[int(noisy)]),
                        msg="Should have reconstructed original dataset well "
                        "without zscoring. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                elif do_labile:
                    # otherwise they all should be somewhat close
                    self.assertTrue(
                        np.all(np.array(nddss) <= (.2, 3)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less for all. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                    self.assertTrue(
                        np.all(nddss[ref_ds] <= .09),
                        msg="Should have reconstructed original dataset quite "
                        "well even with zscoring. Got normed differences %s "
                        "in %s case." % (nddss, snoisy))
                    # yoh: and leave 5% of difference for a chance and numerical
                    #      fluctuations ;)
                    self.assertTrue(
                        np.all(np.array(nddss) >= 0.95 * nddss[ref_ds]),
                        msg="Should have reconstructed orig_ds best of all. "
                        "Got normed differences %s in %s case with ref_ds=%d."
                        % (nddss, snoisy, ref_ds))

        # Lets see how well we do if asked to compute residuals
        ha = Hyperalignment(
            ref_ds=ref_ds,
            level2_niter=2,
            enable_ca=['training_residual_errors', 'residual_errors'])
        mappers = ha(dss_rotated_clean)
        self.assertTrue(
            np.all(ha.ca.training_residual_errors.sa.levels ==
                   ['1', '2:0', '2:1']))
        rterrors = ha.ca.training_residual_errors.samples
        # just basic tests:
        self.assertEqual(rterrors[0, ref_ds], 0)
        self.assertEqual(rterrors.shape, (3, n))
        rerrors = ha.ca.residual_errors.samples
        self.assertEqual(rerrors.shape, (1, n))
Exemple #15
0
    def test_basic_functioning(self, ref_ds, zscore_common, zscore_all):
        ha = Hyperalignment(ref_ds=ref_ds,
                            zscore_all=zscore_all,
                            zscore_common=zscore_common)
        if ref_ds is None:
            ref_ds = 0                      # by default should be this one

        # get a dataset with some prominent trends in it
        ds4l = datasets['uni4large']
        # lets select for now only meaningful features
        ds_orig = ds4l[:, ds4l.a.nonbogus_features]
        nf = ds_orig.nfeatures
        n = 4 # # of datasets to generate
        Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \
            = [], [], [], [], []

        # now lets compose derived datasets by using some random
        # rotation(s)
        for i in xrange(n):
            ## if False: # i == ref_ds:
            #     # Do not rotate the target space so we could check later on
            #     # if we transform back nicely
            #     R = np.eye(ds_orig.nfeatures)
            ## else:
            ds_ = random_affine_transformation(ds_orig, scale_fac=100, shift_fac=10)
            Rs.append(ds_.a.random_rotation)
            # reusing random data from dataset itself
            random_scales += [ds_.a.random_scale]
            random_shifts += [ds_.a.random_shift]
            random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]]

            ## if (zscore_common or zscore_all):
            ##     # for later on testing of "precise" reconstruction
            ##     zscore(ds_, chunks_attr=None)

            dss_rotated_clean.append(ds_)

            ds_ = ds_.copy()
            ds_.samples = ds_.samples + 0.1 * random_noise
            dss_rotated.append(ds_)

        # Lets test two scenarios -- in one with no noise -- we should get
        # close to perfect reconstruction.  If noise was added -- not so good
        for noisy, dss in ((False, dss_rotated_clean),
                           (True, dss_rotated)):
            # to verify that original datasets didn't get changed by
            # Hyperalignment store their idhashes of samples
            idhashes = [idhash(ds.samples) for ds in dss]
            idhashes_targets = [idhash(ds.targets) for ds in dss]

            mappers = ha(dss)

            idhashes_ = [idhash(ds.samples) for ds in dss]
            idhashes_targets_ = [idhash(ds.targets) for ds in dss]
            self.assertEqual(idhashes, idhashes_,
                msg="Hyperalignment must not change original data.")
            self.assertEqual(idhashes_targets, idhashes_targets_,
                msg="Hyperalignment must not change original data targets.")

            self.assertEqual(ref_ds, ha.ca.chosen_ref_ds)

            # Map data back

            dss_clean_back = [m.forward(ds_)
                              for m, ds_ in zip(mappers, dss_rotated_clean)]

            ds_norm = np.linalg.norm(dss[ref_ds].samples)
            nddss = []
            ndcss = []
            ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \
                           * random_scales[ref_ds] \
                           + random_shifts[ref_ds]
            if zscore_common or zscore_all:
                zscore(Dataset(ds_orig_Rref), chunks_attr=None)
            for ds_back in dss_clean_back:
                # if we used zscoring of common, we cannot rely
                # that range/offset could be matched, so lets use
                # corrcoef
                ndcs = np.diag(np.corrcoef(ds_back.samples.T,
                                           ds_orig_Rref.T)[nf:, :nf], k=0)
                ndcss += [ndcs]
                dds = ds_back.samples - ds_orig_Rref
                ndds = np.linalg.norm(dds) / ds_norm
                nddss += [ndds]
            snoisy = ('clean', 'noisy')[int(noisy)]
            do_labile = cfg.getboolean('tests', 'labile', default='yes')
            if not noisy or do_labile:
                # First compare correlations
                self.assertTrue(np.all(np.array(ndcss)
                                       >= (0.9, 0.85)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less. Got correlations %s in %s case."
                        % (ndcss, snoisy))
                if not (zscore_all or zscore_common):
                    # if we didn't zscore -- all of them should be really close
                    self.assertTrue(np.all(np.array(nddss)
                                       <= (1e-10, 1e-1)[int(noisy)]),
                        msg="Should have reconstructed original dataset well "
                        "without zscoring. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                elif do_labile:
                    # otherwise they all should be somewhat close
                    self.assertTrue(np.all(np.array(nddss)
                                           <= (.2, 3)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less for all. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                    self.assertTrue(np.all(nddss[ref_ds] <= .09),
                        msg="Should have reconstructed original dataset quite "
                        "well even with zscoring. Got normed differences %s "
                        "in %s case." % (nddss, snoisy))
                    # yoh: and leave 5% of difference for a chance and numerical
                    #      fluctuations ;)
                    self.assertTrue(np.all(np.array(nddss) >= 0.95*nddss[ref_ds]),
                        msg="Should have reconstructed orig_ds best of all. "
                        "Got normed differences %s in %s case with ref_ds=%d."
                        % (nddss, snoisy, ref_ds))

        # Lets see how well we do if asked to compute residuals
        ha = Hyperalignment(ref_ds=ref_ds, level2_niter=2,
                            enable_ca=['training_residual_errors',
                                       'residual_errors'])
        mappers = ha(dss_rotated_clean)
        self.assertTrue(np.all(ha.ca.training_residual_errors.sa.levels ==
                              ['1', '2:0', '2:1']))
        rterrors = ha.ca.training_residual_errors.samples
        # just basic tests:
        self.assertEqual(rterrors[0, ref_ds], 0)
        self.assertEqual(rterrors.shape, (3, n))
        rerrors = ha.ca.residual_errors.samples
        self.assertEqual(rerrors.shape, (1, n))
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]]*n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [random_affine_transformation(dss_rotated[i])
                    for i in xrange(n)]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [m.forward(ds_)
                 for m, ds_ in zip(mappers_orig, dss_rotated)]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_)
                     for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
             "reconstructed SVD dataset. Got correlations %s."
             % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(
             np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true],
             k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
         msg="Expected original dimensions after "
             "SVD. Got correlations %s."
             % sv_corrs_orig)