def test_simple_svd(self): pm = SVDMapper() # train SVD pm.train(self.ndlin) self.failUnlessEqual(pm.proj.shape, (20, 20)) # now project data into PCA space p = pm.forward(self.ndlin) # only first eigenvalue significant self.failUnless(pm.sv[:1] > 1.0) self.failUnless((pm.sv[1:] < 0.0001).all()) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.failUnless(var[:1] > 1.0) self.failUnless((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' pr = pm.reverse(p) self.failUnlessEqual(pr.shape, (40,20)) self.failUnless(np.abs(pm.reverse(p) - self.ndlin).sum() < 0.0001)
def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]] * n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [ random_affine_transformation(dss_rotated[i]) for i in xrange(n) ] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [ m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated) ] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag(np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)
def test_simple_svd(self): pm = SVDMapper() # train SVD pm.train(self.ndlin) self.assertEqual(pm.proj.shape, (20, 20)) # now project data into PCA space p = pm.forward(self.ndlin) # only first eigenvalue significant self.assertTrue(pm.sv[:1] > 1.0) self.assertTrue((pm.sv[1:] < 0.0001).all()) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.assertTrue(var[:1] > 1.0) self.assertTrue((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' pr = pm.reverse(p) self.assertEqual(pr.shape, (40, 20)) self.assertTrue(np.abs(pm.reverse(p) - self.ndlin).sum() < 0.0001)
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [ slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes) ] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
def _get_hypesvs(self, sl_connectomes, local_common_model=None): ''' Hyperalign connectomes and return mapppers and trained SVDMapper of common space. Parameters ---------- sl_connectomes: a list of connectomes to hyperalign local_common_model: a reference common model to be used. Returns ------- a tuple (sl_hmappers, svm, local_common_model) sl_hmappers: a list of mappers corresponding to input list in that order. svm: a svm mapper based on the input data. if given a common model, this is None. local_common_model: If local_common_model is provided as input, this will be None. Otherwise, local_common_model will be computed here and returned. ''' # TODO Should we z-score sl_connectomes? return_model = False if self.params.save_model is None else True if local_common_model is not None: ha = Hyperalignment(level2_niter=0) if not is_datasetlike(local_common_model): local_common_model = Dataset(samples=local_common_model) ha.train([local_common_model]) sl_hmappers = ha(sl_connectomes) return sl_hmappers, None, None ha = Hyperalignment() sl_hmappers = ha(sl_connectomes) sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)] _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes] sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1) svm = SVDMapper(force_train=True) svm.train(sl_connectomes) if return_model: local_common_model = svm.forward(sl_connectomes) else: local_common_model = None return sl_hmappers, svm, local_common_model
def test_more_svd(self): pm = SVDMapper() # train SVD pm.train(self.largefeat) # mixing matrix cannot be square self.failUnlessEqual(pm.proj.shape, (40, 10)) # only first singular value significant self.failUnless(pm.sv[:1] > 10) self.failUnless((pm.sv[1:] < 10).all()) # now project data into SVD space p = pm.forward(self.largefeat) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.failUnless(var[:1] > 1.0) self.failUnless((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' rp = pm.reverse(p) self.failUnlessEqual(rp.shape, self.largefeat.shape) self.failUnless((np.round(rp) == self.largefeat).all()) # copy mapper pm2 = deepcopy(pm) # now make new random data and do forward->reverse check data = np.random.normal(size=(98,40)) data_f = pm.forward(data) self.failUnlessEqual(data_f.shape, (98,10)) data_r = pm.reverse(data_f) self.failUnlessEqual(data_r.shape, (98,40))
def test_more_svd(self): pm = SVDMapper() # train SVD pm.train(self.largefeat) # mixing matrix cannot be square self.assertEqual(pm.proj.shape, (40, 10)) # only first singular value significant self.assertTrue(pm.sv[:1] > 10) self.assertTrue((pm.sv[1:] < 10).all()) # now project data into SVD space p = pm.forward(self.largefeat) # only variance of first component significant var = p.var(axis=0) # test that only one component has variance self.assertTrue(var[:1] > 1.0) self.assertTrue((var[1:] < 0.0001).all()) # check that the mapped data can be fully recovered by 'reverse()' rp = pm.reverse(p) self.assertEqual(rp.shape, self.largefeat.shape) self.assertTrue((np.round(rp) == self.largefeat).all()) # copy mapper pm2 = deepcopy(pm) # now make new random data and do forward->reverse check data = np.random.normal(size=(98, 40)) data_f = pm.forward(data) self.assertEqual(data_f.shape, (98, 10)) data_r = pm.reverse(data_f) self.assertEqual(data_r.shape, (98, 40))
def test_hpal_svd_combo(self): # get seed dataset ds4l = datasets['uni4large'] ds_orig = ds4l[:, ds4l.a.nonbogus_features] # XXX Is this SVD mapping required? svm = SVDMapper() svm.train(ds_orig) ds_svs = svm.forward(ds_orig) ds_orig.samples = ds_svs.samples nf_true = ds_orig.nfeatures n = 4 # # of datasets to generate # Adding non-shared dimensions for each subject dss_rotated = [[]]*n for i in range(n): dss_rotated[i] = hstack( (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]])) # rotate data nf = dss_rotated[0].nfeatures dss_rotated = [random_affine_transformation(dss_rotated[i]) for i in xrange(n)] # Test if it is close to doing hpal+SVD in sequence outside hpal # First, as we do in sequence outside hpal ha = Hyperalignment() mappers_orig = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)] dss_mean = np.mean([sd.samples for sd in dss_back], axis=0) svm = SVDMapper() svm.train(dss_mean) dss_sv = [svm.forward(sd) for sd in dss_back] # Test for SVD dimensionality reduction even with 2 training subjects for output_dim in [1, 4]: ha = Hyperalignment(output_dim=output_dim) ha.train(dss_rotated[:2]) mappers = ha(dss_rotated) dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)] for sd in dss_back: assert (sd.nfeatures == output_dim) # Check if combined hpal+SVD works as expected sv_corrs = [] for sd1, sd2 in zip(dss_sv, dss_back): ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf], k=0) sv_corrs.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs)) >= 0.95), msg="Hyperalignment with dimensionality reduction should have " "reconstructed SVD dataset. Got correlations %s." % sv_corrs) # Check if it recovers original SVs sv_corrs_orig = [] for sd in dss_back: ndcs = np.diag( np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true], k=0) sv_corrs_orig.append(ndcs) self.assertTrue( np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9), msg="Expected original dimensions after " "SVD. Got correlations %s." % sv_corrs_orig)