def apply_slhyper(queryengine,
                   dss=[ds0, ds1],
                   return_mappers=False,
                   **kw):
     """Helper for a common code to create/call slhyper"""
     slhyper = SearchlightHyperalignment(queryengine=queryengine, **kw)
     mappers = slhyper(dss)
     proj = [m.proj.todense() for m in mappers]
     return (proj, mappers) if return_mappers else proj
    def __call__(self, datasets):
        """ estimate mappers for each dataset.
        Parameters
        ---------- 
        datasets : list of datasets
        
        Returns 
        -------
        mappers_iter1: mappers from the first HA iteration
        mappers_iter2: mappers from the second HA iteration
        
        """
        debug.active += ['SHPAL', 'SLC']

        mask_node_indices = np.concatenate(self.mask_node_indices)
        qe = self.queryengine
        nproc = self.nproc
        dtype = self.dtype
        nblocks = self.nblocks
                
        ha_iter1 = SearchlightHyperalignment(queryengine=qe, 
                                      nproc=nproc, 
                                      nblocks=nblocks, 
                                      mask_node_ids=mask_node_indices,
                                      dtype=dtype)
        
        mappers_iter1 = ha_iter1(datasets)
        aligned_iter1_datasets = self._apply_mappers(datasets, mappers_iter1)
        
        
        h2a_input_data = self._prep_h2a_data(aligned_iter1_datasets, mask_node_indices)
        ha_iter2 = SearchlightHyperalignment(queryengine=qe, 
                                      nproc=nproc, 
                                      nblocks=nblocks, 
                                      mask_node_ids=mask_node_indices,
                                      dtype=dtype)
        mappers_iter2 = ha_iter2(h2a_input_data)
        # push the original data through the trained model
        mappers_final = ha_iter2(datasets)
        
        if self.get_all_mappers:
            return mappers_iter1, mappers_iter2, mappers_final
        return mappers_final
def run_hyperalignment(subjects_to_analyze, out_dir):
    # Load subject data
    ds_all = []
    for subject_label in subjects_to_analyze:
        ds_all.append(h5load('%s/sub-%s_data.hdf5' % (out_dir, subject_label)))
    # Initialize searchlight hyperalignment
    slhyper = SearchlightHyperalignment(radius=2,
                                        nblocks=10,
                                        sparse_radius=5,
                                        dtype='float16')
    hmappers = slhyper(ds_all)
    return hmappers
    def test_searchlight_hyperalignment_warnings_and_exceptions(self):
        skip_if_no_external('scipy')
        skip_if_no_external('hdf5')  # needed for default results backend hdf5

        ds_orig = datasets['3dsmall'][:, :1]  # tiny dataset just to test exceptions
        ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace
        slhyper = SearchlightHyperalignment()
        self.assertRaises(ValueError, slhyper, [ds_orig])  # need more than 1
        ds_orig.samples += 1.0  # not zscored for sure
        # TODO: we need assert_warnings to also capture our own warnings,
        # currently they are just suppressed :-/  So this is just a smoke test
        mappers = slhyper([ds_orig, ds_orig.copy()])
def align_one_target(sources_train,
                     sources_test,
                     target_train,
                     target_test,
                     method,
                     masker,
                     pairwise_method,
                     clustering,
                     n_pieces,
                     n_jobs,
                     decoding_dir=None,
                     srm_atlas=None,
                     srm_components=40,
                     ha_radius=5,
                     ha_sparse_radius=3,
                     smoothing_fwhm=6,
                     surface=False):
    overhead_time = 0
    aligned_sources_test = []

    if surface == "rh":

        clustering = clustering.replace("lh", "rh")
        # clustering = load_surf_data(
        #    "/storage/store2/tbazeill/schaeffer/FreeSurfer5.3/fsaverage/label/rh.Schaefer2018_700Parcels_17Networks_order.annot")
        sources_train = np.asarray(
            [t.replace("lh", "rh") for t in sources_train])
        sources_test = np.asarray(
            [t.replace("lh", "rh") for t in sources_test])
        target_train.replace("lh", "rh")
        target_test.replace("lh", "rh")
    if surface in ["rh", "lh"]:
        from nilearn.surface import load_surf_data
        clustering = load_surf_data(clustering)
    if method == "anat_inter_subject":
        fit_start = time.process_time()
        if surface in ["lh", "rh"]:
            aligned_sources_test = load_clean(sources_test, masker)
            aligned_target_test = load_clean(target_test, masker)
        else:
            aligned_sources_test = np.vstack(
                [masker.transform(s) for s in sources_test])
            aligned_target_test = masker.transform(target_test)

    elif method == "smoothing":
        fit_start = time.process_time()
        smoothing_masker = NiftiMasker(mask_img=masker.mask_img_,
                                       smoothing_fwhm=smoothing_fwhm).fit()
        aligned_sources_test = np.vstack(
            [smoothing_masker.transform(s) for s in sources_test])
        aligned_target_test = smoothing_masker.transform(target_test)
    elif method in ["pairwise", "intra_subject"]:
        fit_start = time.process_time()
        for source_train, source_test in zip(sources_train, sources_test):
            if method == "pairwise":
                if surface in ["lh", "rh"]:
                    source_align = SurfacePairwiseAlignment(
                        alignment_method=pairwise_method,
                        clustering=clustering,
                        n_jobs=n_jobs)
                else:
                    source_align = PairwiseAlignment(
                        alignment_method=pairwise_method,
                        clustering=clustering,
                        n_pieces=n_pieces,
                        mask=masker,
                        n_jobs=n_jobs)
                source_align.fit(source_train, target_train)
                aligned_sources_test.append(
                    source_align.transform(source_test))
            elif method == "intra_subject":
                source_align = IntraSubjectAlignment(
                    alignment_method="ridge_cv",
                    clustering=clustering,
                    n_pieces=n_pieces,
                    mask=masker,
                    n_jobs=n_jobs)
                source_align.fit(source_train, source_test)
                aligned_sources_test.append(
                    source_align.transform(target_train))

        if surface in ["lh", "rh"]:
            aligned_sources_test = np.vstack(aligned_sources_test)
            aligned_target_test = load_clean(target_test, masker)
        else:
            aligned_target_test = masker.transform(target_test)
            aligned_sources_test = np.vstack(
                [masker.transform(t) for t in aligned_sources_test])
    elif method == "srm":
        common_time = time.process_time()
        fastsrm = FastSRM(atlas=srm_atlas,
                          n_components=srm_components,
                          n_iter=1000,
                          n_jobs=n_jobs,
                          aggregate="mean",
                          temp_dir=decoding_dir)

        reduced_SR = fastsrm.fit_transform(
            [masker.transform(t).T for t in sources_train])
        overhead_time = time.process_time() - common_time

        fit_start = time.process_time()
        fastsrm.aggregate = None

        fastsrm.add_subjects([masker.transform(t).T for t in [target_train]],
                             reduced_SR)
        aligned_test = fastsrm.transform([
            masker.transform(t).T
            for t in np.hstack([sources_test, [target_test]])
        ])
        aligned_sources_test = np.hstack(aligned_test[:-1]).T
        aligned_target_test = aligned_test[-1].T
    elif method == "HA":
        overhead_time = 0
        fit_start = time.process_time()

        from mvpa2.algorithms.searchlight_hyperalignment import SearchlightHyperalignment
        from mvpa2.datasets.base import Dataset
        pymvpa_datasets = []

        flat_mask = load_img(masker.mask_img_).get_fdata().flatten()
        n_voxels = flat_mask.sum()
        flat_coord_grid = make_coordinates_grid(
            masker.mask_img_.shape).reshape((-1, 3))
        masked_coord_grid = flat_coord_grid[flat_mask != 0]
        for sub, sub_data in enumerate(
                np.hstack([[target_train], sources_train])):
            d = Dataset(masker.transform(sub_data))
            d.fa['voxel_indices'] = masked_coord_grid
            pymvpa_datasets.append(d)
        ha = SearchlightHyperalignment(radius=ha_radius,
                                       nproc=1,
                                       sparse_radius=ha_sparse_radius)
        ha.__call__(pymvpa_datasets)
        aligned_sources_test = []
        for j, source_test in enumerate(sources_test):
            if surface in ["lh", "rh"]:
                array_source = load_clean(source_test, masker)
            else:
                array_source = masker.transform(source_test)
            aligned_sources_test.append(
                array_source.dot(ha.projections[j + 1].proj.toarray()))
        aligned_sources_test = np.vstack(aligned_sources_test)
        aligned_target_test = masker.transform(target_test).dot(
            ha.projections[0].proj.toarray())

    fit_time = time.process_time() - fit_start

    return aligned_sources_test, aligned_target_test, fit_time, overhead_time
# where to write out intermediate files
os.environ['TMPDIR'] = '/dartfs-hpc/scratch/f002d44/temp'
os.environ['TEMP'] = '/dartfs-hpc/scratch/f002d44/temp'
os.environ['TMP'] = '/dartfs-hpc/scratch/f002d44/temp'

t0 = time.time()
print('-------- beginning hyperalignment at {t0} --------'.format(t0=t0))
debug.active += ['SHPAL', 'SLC']

N_PROCS = 16
N_BLOCKS = 128

slhyper = SearchlightHyperalignment(
    queryengine=qe,  # pass it our surface query engine
    nproc=N_PROCS,  # the number of processes we want to use
    nblocks=
    N_BLOCKS,  # the number of blocks we want to divide that into (the more you have the less memory it takes)
    mask_node_ids=node_indices,  # tell it which nodes you are masking 
    dtype='float64')

transformations = slhyper(dss)
elapsed = time.time() - t0
print('-------- time elapsed: {elapsed} --------'.format(elapsed=elapsed))
h5save(outdir + 'hyperalignment_mappers.hdf5.gz',
       transformations,
       compression=9)

# 7. You did it! Way to go. That saved a HDF5 file of each subject's transformation matrices into the common space.
# Now we save each individual's mapper as a npz.

from scipy.sparse import save_npz, load_npz
 def __init__(self, **kwargs):
     SearchlightHyperalignment.__init__(self, **kwargs)
    ds.fa['voxel_indices'] = range(ds.shape[1])
    #ds.sa['chunks'] = np.repeat(i,cnx_tx)
    mv.zscore(ds, chunks_attr=None)
    dss.append(ds)
    
    
print('Number of data sets in dss: ')
print(len(dss))
print('Size of data sets: ')
print(dss[0].shape)
    
# create SL hyperalignment instance
hyper = SearchlightHyperalignment(
    queryengine=qe,
    compute_recon=False, # We don't need to project back from common space to subject space
    nproc=1, 
    nblocks=N_BLOCKS,
    dtype ='float64'
)

# start timer
t0 = time.time()

# hyperalign dss
mappers = hyper(dss)

#save mappers
try:
    h5save(toutdir, mappers)
    print('saved hdf5 mappers')
except: 
    def test_custom_qas(self):
        # Test if we could provide custom QEs per each of the datasets
        skip_if_no_external('scipy')
        skip_if_no_external('hdf5')  # needed for default results backend hdf5

        ns, nf = 10, 4  # # of samples/features -- a very BIG dataset ;)
        ds0 = Dataset(np.random.normal(size=(ns, nf)))
        zscore(ds0, chunks_attr=None)
        ds1 = ds0[:, [3, 0, 1, 2]]  # features circular shifted to the right

        qe0 = FancyQE([[0], [1], [2], [3]])  # does nothing
        qe1 = FancyQE([[1], [2], [3], [0]])  # knows to look into the right

        def apply_slhyper(queryengine,
                          dss=[ds0, ds1],
                          return_mappers=False,
                          **kw):
            """Helper for a common code to create/call slhyper"""
            slhyper = SearchlightHyperalignment(queryengine=queryengine, **kw)
            mappers = slhyper(dss)
            proj = [m.proj.todense() for m in mappers]
            return (proj, mappers) if return_mappers else proj

        # since this single qe resulted in trying to match non-matching time series
        # projections should be non-identity, but no offdiagonal elements
        assert_no_offdiag(apply_slhyper(qe0))

        # both are provided
        projs, mappers = apply_slhyper([qe0, qe1], return_mappers=True)
        tprojs_shifted = [np.eye(nf), np.roll(np.eye(nf), 1, axis=0)]
        assert_array_equal(
            projs[0],
            tprojs_shifted[0])  # must be identity since we made them so
        assert_array_equal(
            projs[1],
            tprojs_shifted[1])  # pretty much incorporating that shift

        # TODO -- not identity assert_array_equal(projs[0], np.eye(len(p)))  # must be identity since we made them so
        # and must restore data properly
        assert_array_almost_equal(mappers[0].forward(ds0),
                                  mappers[1].forward(ds1))

        # give more then # of qes
        assert_raises(ValueError,
                      SearchlightHyperalignment(queryengine=[qe0, qe1]),
                      [ds0, ds1, ds0])

        # The one having no voxels for the "1st" id in "subj1"
        qe1_ = FancyQE([[1], [], [3], [0]])  # knows to look into the right

        projs = apply_slhyper(qe1_)
        assert_no_offdiag(projs)
        for proj in projs:
            # assess that both have '2nd' one 0
            # but not the others!
            assert_array_equal(
                np.diagonal(proj) != 0, [True, True, False, True])

        # smoke test whenever combine is False
        # In this case should work ok
        apply_slhyper(qe0, combine_neighbormappers=False)
        # this one ok as well since needs only matching ones in ref_ds
        apply_slhyper([qe0, qe1], combine_neighbormappers=False)
        # here since features do not match node_ids -- should raise ValueError
        assert_raises(ValueError,
                      apply_slhyper,
                      qe1,
                      combine_neighbormappers=False)
        assert_raises(ValueError,
                      apply_slhyper, [qe0, qe1],
                      ref_ds=1,
                      combine_neighbormappers=False)

        # and now only one qe lacking for that id
        projs = apply_slhyper([qe0, qe1_])
        tproj0 = np.eye(nf)
        tproj0[1, 1] = 0
        tprojs_shifted_1st0 = [tproj0, np.roll(tproj0, 1, axis=0)]
        for proj, tproj in zip(projs, tprojs_shifted_1st0):
            # assess that both have '2nd' one 0
            # but not the others!
            assert_array_equal(proj, tproj)

        # And now a test with varying number of selected fids, no shift
        qe0 = FancyQE([[0], [1, 2], [1, 2, 3], [0, 1, 2, 3]])
        projs = apply_slhyper(qe0)
        # Test that in general we get larger coefficients for "correct" transformation
        for p, tproj in zip(projs, tprojs_shifted):
            assert (np.all(np.asarray(p)[tproj > 0] >= 1.0))
            assert_array_lequal(np.mean(np.asarray(p)[tproj == 0]), 0.3)

        qe1 = FancyQE([[0, 1, 2, 3], [1, 2, 3], [2, 3], [3]])
        # Just a smoke test, for now TODO
        projs = apply_slhyper([qe0, qe1])
 def test_searchlight_hyperalignment(self):
     skip_if_no_external('scipy')
     skip_if_no_external('h5py')
     ds_orig = datasets['3dsmall'].copy()[:, :15]
     ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace
     space = 'voxel_indices'
     # total number of datasets for the analysis
     nds = 5
     zscore(ds_orig, chunks_attr=None)
     dss = [ds_orig]
     # create a few distorted datasets to match the desired number of datasets
     # not sure if this truly mimics the real data, but at least we can test
     # implementation
     while len(dss) < nds - 1:
         sd = local_random_affine_transformations(
             ds_orig,
             scatter_neighborhoods(Sphere(1),
                                   ds_orig.fa[space].value,
                                   deterministic=True)[1],
             Sphere(2),
             space=space,
             scale_fac=1.0,
             shift_fac=0.0)
         # sometimes above function returns dataset with nans, infs, we don't want that.
         if np.sum(np.isnan(sd.samples)+np.isinf(sd.samples)) == 0 \
                 and np.all(sd.samples.std(0)):
             dss.append(sd)
     ds_orig_noisy = ds_orig.copy()
     ds_orig_noisy.samples += 0.1 * np.random.random(
         size=ds_orig_noisy.shape)
     dss.append(ds_orig_noisy)
     _ = [zscore(sd, chunks_attr=None) for sd in dss[1:]]
     # we should have some distortion
     for ds in dss[1:]:
         assert_false(np.all(ds_orig.samples == ds.samples))
     # testing checks
     slhyp = SearchlightHyperalignment(ref_ds=1, exclude_from_model=[1])
     self.assertRaises(ValueError, slhyp, dss[:3])
     slhyp = SearchlightHyperalignment(ref_ds=3)
     self.assertRaises(ValueError, slhyp, dss[:3])
     # explicit test of exclude_from_model
     slhyp = SearchlightHyperalignment(ref_ds=2,
                                       exclude_from_model=[1],
                                       featsel=0.7)
     projs1 = slhyp(dss)
     aligned1 = [proj.forward(ds) for proj, ds in zip(projs1, dss)]
     samples = dss[1].samples.copy()
     dss[1].samples += 0.1 * np.random.random(size=dss[1].shape)
     projs2 = slhyp(dss)
     aligned2 = [proj.forward(ds) for proj, ds in zip(projs1, dss)]
     for i in [0, 2, 3, 4]:
         assert_array_almost_equal(projs1[i].proj.todense(),
                                   projs2[i].proj.todense())
         assert_array_almost_equal(aligned1[i].samples, aligned2[i].samples)
     assert_false(
         np.all(projs1[1].proj.todense() == projs1[2].proj.todense()))
     assert_false(np.all(aligned1[1].samples == aligned2[1].samples))
     dss[1].samples = samples
     # store projections for each mapper separately
     projs = list()
     # run the algorithm with all combinations of the two major parameters
     # for projection calculation.
     for kwargs in [{
             'combine_neighbormappers': True,
             'nproc': 1 + int(externals.exists('pprocess'))
     }, {
             'combine_neighbormappers': True,
             'dtype': 'float64',
             'compute_recon': True
     }, {
             'combine_neighbormappers': True,
             'exclude_from_model': [2, 4]
     }, {
             'combine_neighbormappers': False
     }, {
             'combine_neighbormappers': False,
             'mask_node_ids': np.arange(dss[0].nfeatures).tolist()
     }, {
             'combine_neighbormappers': True,
             'sparse_radius': 1
     }, {
             'combine_neighbormappers': True,
             'nblocks': 2
     }]:
         slhyp = SearchlightHyperalignment(radius=2, **kwargs)
         mappers = slhyp(dss)
         # one mapper per input ds
         assert_equal(len(mappers), nds)
         projs.append(mappers)
     # some checks
     for midx in range(nds):
         # making sure mask_node_ids options works as expected
         assert_array_almost_equal(projs[3][midx].proj.todense(),
                                   projs[4][midx].proj.todense())
         # recon check
         assert_array_almost_equal(projs[0][midx].proj.todense(),
                                   projs[1][midx].recon.T.todense(),
                                   decimal=5)
         assert_equal(projs[1][midx].proj.dtype, 'float64')
         assert_equal(projs[0][midx].proj.dtype, 'float32')
     # making sure the projections make sense
     for proj in projs:
         # no .max on sparse matrices on older scipy (e.g. on precise) so conver to array first
         max_weight = proj[0].proj.toarray().max(1).squeeze()
         diag_weight = proj[0].proj.diagonal()
         # Check to make sure diagonal is the max weight, in almost all rows for reference subject
         assert (np.sum(max_weight == diag_weight) / float(len(diag_weight))
                 >= 0.80)
         # and not true for other subjects
         for i in range(1, nds - 1):
             assert (np.sum(proj[i].proj.toarray().max(1).squeeze() ==
                            proj[i].proj.diagonal()) /
                     float(proj[i].proj.shape[0]) < 0.80)
         # Check to make sure projection weights match across duplicate datasets
         max_weight = proj[-1].proj.toarray().max(1).squeeze()
         diag_weight = proj[-1].proj.diagonal()
         # Check to make sure diagonal is the max weight, in almost all rows for reference subject
         assert (np.sum(max_weight == diag_weight) / float(len(diag_weight))
                 >= 0.80)
     # project data
     dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)]
     _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper]
     ndcss = []
     nf = ds_orig.nfeatures
     for ds_hyper in dss_hyper:
         ndcs = np.diag(np.corrcoef(ds_hyper.samples.T,
                                    ds_orig.samples.T)[nf:, :nf],
                        k=0)
         ndcss += [ndcs]
     assert_true(np.median(ndcss[0]) > 0.9)
     # noisy copy of original dataset should be similar to original after hyperalignment
     assert_true(np.median(ndcss[-1]) > 0.9)
     assert_true(np.all([np.median(ndcs) > 0.2 for ndcs in ndcss[1:-2]]))
        for nst, split in enumerate(splits):
            if nst == 0:
                runs = [1, 2, 3]
            elif nst == 1:
                runs = [4, 5]
            dss, node_indices = load_dss(lr, runs, subjects)
            for ds in dss:
                print(ds.shape)

            debug.active += ['SHPAL', 'SLC']
            ha = SearchlightHyperalignment(
                queryengine=SurfaceQueryEngine(surf, radius),
                nproc=N_JOBS,
                nblocks=128,
                compute_recon=False,
                featsel=1.0,
                mask_node_ids=node_indices,
                dtype='float64',
            )

            Ts = ha(dss)
            for T, subj in zip(Ts, subjects):
                out_fn = '/out_fn/split_{split}/{subj}_{lr}h_{radius}_{runs}.hdf5.gz'\
                         ''.format(split=split, subj=subj, lr=lr, radius=radius, runs='-'.join([str(_) for _ in runs]))
                h5save(out_fn, T, compression=9)

    for subj in subjects:
        for lr in 'lr':
            for nst, split in enumerate(splits):
                if nst == 0:
 def __init__(self, **kwargs):
     SearchlightHyperalignment.__init__(self, **kwargs)