Exemple #1
0
 def test_hyper_ref_ds_range_checks(self):
     # If supplied ref_ds can't be fit into non-negative int
     # it should thrown an exception
     with self.assertRaises(ValueError):
         ha = Hyperalignment(ref_ds=-1.5)
     # But work if it can fit, int(-0.5)=0
     ha = Hyperalignment(ref_ds=0.5)
     # or int(3.5)=3
     ha = Hyperalignment(ref_ds=3.5)
     # if ref_ds is out of range...
     ds_all = [datasets['uni4small'] for i in range(3)]
     # Making sure it raises error if ref_ds is out of range
     self.assertRaises(ValueError, ha, ds_all)
Exemple #2
0
def hyperalignment(input_data, output_data, mask, output_suffix, training_runs,
                   testing_runs, **kwargs):
    """

    Parameters
    ----------
    input_data: Input data path.
    output_data: Output data path.
    mask: Path to mask file.
    output_suffix: Filename suffix for saving aligned data.
    training_runs: List of runs to be used for training.
    testing_runs: List of runs to be used for testing.
    kwargs: Passed onto Hyperalignment

    Returns
    -------
    Nothing
    """
    # XXX TODO Use mask to load from nifti file
    dss_train = load_data(input_data, training_runs)  #, mask)
    dss_test = load_data(input_data, testing_runs)  #, mask)
    # Initialize hyperalignment
    ha = Hyperalignment(**kwargs)
    # Run hyperalignment on training data
    hmappers = ha(dss_train)
    # Align and save data
    dss_aligned = {}
    for split, dss in (('train', dss_train), ('test', dss_test)):
        dss_hyper = [hm.forward(sd) for hm, sd in zip(hmappers, dss)]
        if output_data is not None:
            save_data(dss_hyper, output_suffix + split)
        dss_aligned[split] = dss_hyper

    return dss_aligned
 def test_hyper_input_dataset_check(self):
     # If supplied with only one dataset during training,
     # make sure it doesn't run multiple levels and crap out
     ha = Hyperalignment()
     ds_all = [datasets['uni4small'] for i in range(3)]
     # Make sure it raises TypeError if a list is not passed
     self.assertRaises(TypeError, ha, ds_all[0])
     self.assertRaises(TypeError, ha.train, ds_all[0])
     # And it doesn't crap out with a single dataset for training
     ha.train([ds_all[0]])
     zscore(ds_all[0], chunks_attr=None)
     assert_array_equal(ha.commonspace, ds_all[0].samples)
     # make sure it accepts tuple of ndarray
     ha = Hyperalignment()
     m = ha(tuple(ds_all))
     ha = Hyperalignment()
     dss_arr = np.empty(len(ds_all), dtype=object)
     for i in range(len(ds_all)):
         dss_arr[i] = ds_all[i]
     m = ha(dss_arr)
Exemple #4
0
 def fa(s_list):
     try:
         return Hyperalignment(alpha=alpha)(s_list)
     except:
         logger.warning('Hyperalignment failed for {hemi} {roi}.'.format(
             hemi=s_list[0].fa.hemi[0], roi=s_list[0].fa.annotation[0]))
         logger.warning('Inserting identity mappers.')
         return [
             StaticProjectionMapper(numpy.eye(s.fa.attr_length))
             for s in s_list
         ]
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     self.assertTrue(
         np.all([np.array_equal(m.proj, mp.proj)
                for m, mp in zip(mappers, mappers_nproc)]),
         msg="Mappers differ when using nproc>1.")
     assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
Exemple #6
0
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]] * n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4:i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [
         random_affine_transformation(dss_rotated[i]) for i in xrange(n)
     ]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [
         m.forward(ds_) for m, ds_ in zip(mappers_orig, dss_rotated)
     ]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
         "reconstructed SVD dataset. Got correlations %s." % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(np.corrcoef(sd.samples.T,
                                    ds_orig.samples.T)[nf_true:, :nf_true],
                        k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
                     msg="Expected original dimensions after "
                     "SVD. Got correlations %s." % sv_corrs_orig)
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
Exemple #8
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [
         random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
         for i in range(4)
     ]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     # not sure yet why on windows only is not precise
     cmp_ = assert_array_equal if (
         not on_windows) else assert_array_almost_equal
     [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)
      ]  # "Mappers differ when using nproc>1."
     cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
 def __init__(self, ref_ds=0, hyperalignment=Hyperalignment(ref_ds=0),
              featsel=1.0, full_matrix=True, use_same_features=False,
              exclude_from_model=None, dtype='float32', **kwargs):
     """
     For description of parameters see :class:`SearchlightHyperalignment`
     """
     super(FeatureSelectionHyperalignment, self).__init__(**kwargs)
     self.ref_ds = ref_ds
     self.hyperalignment = hyperalignment
     self.featsel = featsel
     self.use_same_features = use_same_features
     self.exclude_from_model = exclude_from_model
     if self.exclude_from_model is None:
         self.exclude_from_model = []
     self.full_matrix = full_matrix
     self.dtype = dtype
Exemple #10
0
 def test_hyper_input_dataset_check(self):
     # If supplied with only one dataset during training,
     # make sure it doesn't run multiple levels and crap out
     ha = Hyperalignment()
     ds_all = [datasets['uni4small'] for i in range(3)]
     # Make sure it raises TypeError if a list is not passed
     self.assertRaises(TypeError, ha, ds_all[0])
     self.assertRaises(TypeError, ha.train, ds_all[0])
     # And it doesn't crap out with a single dataset for training
     ha.train([ds_all[0]])
     zscore(ds_all[0], chunks_attr=None)
     assert_array_equal(ha.commonspace, ds_all[0].samples)
     # make sure it accepts tuple of ndarray
     ha = Hyperalignment()
     m = ha(tuple(ds_all))
     ha = Hyperalignment()
     dss_arr = np.empty(len(ds_all), dtype=object)
     for i in range(len(ds_all)):
         dss_arr[i] = ds_all[i]
     m = ha(dss_arr)
Exemple #11
0
def test_timesegments_classification():
    # TODO: RF our construction of fake datasets for testing hyperalignment
    # so we could reuse it here and test classification performance
    ds_orig = datasets['uni4large']
    n = 3
    dss = [ds_orig.copy(deep=True) for i in range(n)]

    def nohyper(dss):
        return [IdentityMapper() for ds in dss]

    # clean case, assume "nohyper" which would be by default
    errors = timesegments_classification(dss)
    for ds in dss:
        # must not add any attribute, such as subjects
        assert ('subjects' not in ds.sa)
    assert_array_equal(errors, 0)

    # very noisy case -- we must not be able to classify anything reasonably
    dss_noisy = [ds.copy() for ds in dss]
    for ds in dss_noisy:
        ds.samples = np.random.normal(size=ds.samples.shape)
    errors_nonoverlapping = timesegments_classification(
        dss_noisy, nohyper, overlapping_windows=False)
    assert (np.all(errors_nonoverlapping <= 1.))
    assert (np.all(0.75 <= errors_nonoverlapping))

    errors_overlapping = timesegments_classification(dss_noisy, nohyper)
    # nononverlapping error should be less for random result
    assert_array_lequal(np.mean(errors_nonoverlapping),
                        np.mean(errors_overlapping))

    # now the ultimate test with real hyperalignment on when we don't need much
    # of it anyways

    #import pdb; pdb.set_trace()
    dss_rotated = [
        random_affine_transformation(ds_orig, scale_fac=100, shift_fac=10)
        for _ in dss
    ]
    errors_hyper = timesegments_classification(dss_rotated, Hyperalignment())
    # Hyperalignment must not screw up and rotated and classify perfectly
    # since we didn't add any noise whatsoever
    assert_array_equal(errors, 0)
    def test_hypal_michael_caused_problem(self):
        from mvpa2.misc import data_generators
        from mvpa2.mappers.zscore import zscore
        # Fake data
        ds = data_generators.normal_feature_dataset(nfeatures=20)
        ds_all = [data_generators.random_affine_transformation(ds) for i in range(3)]
        _ = [zscore(sd, chunks_attr=None) for sd in ds_all]
        # Making random data per subject for testing with bias added to first subject
        ds_test = [np.random.rand(1, ds.nfeatures) for i in range(len(ds_all))]
        ds_test[0] += np.arange(1, ds.nfeatures + 1) * 100
        assert(np.corrcoef(ds_test[2], ds_test[1])[0, 1] < 0.99)  # that would have been ridiculous if it was

        # Test with varying alpha so we for sure to not have that issue now
        for alpha in (0, 0.01, 0.5, 0.99, 1.0):
            hyper09 = Hyperalignment(alpha=alpha)
            mappers = hyper09([sd for sd in ds_all])
            ds_test_a = [m.forward(sd) for m, sd in zip(mappers, ds_test)]
            ds_test_a = [mappers[0].reverse(sd) for sd in ds_test_a]
            corr = np.corrcoef(ds_test_a[2], ds_test_a[1])[0, 1]
            assert(corr < 0.99)
Exemple #13
0
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     # not sure yet why on windows only is not precise
     cmp_ = assert_array_equal if (not on_windows) else assert_array_almost_equal
     [cmp_(m.proj, mp.proj) for m, mp in zip(mappers, mappers_nproc)]  # "Mappers differ when using nproc>1."
     cmp_(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
 def test_hpal_joblib(self):
     skip_if_no_external('joblib')
     # get seed dataset
     ds4l = datasets['uni4large']
     dss_rotated = [random_affine_transformation(ds4l, scale_fac=100, shift_fac=10)
                    for i in range(4)]
     ha = Hyperalignment(nproc=1, enable_ca=['residual_errors'])
     ha.train(dss_rotated[:2])
     mappers = ha(dss_rotated)
     ha_proc = Hyperalignment(nproc=2, enable_ca=['residual_errors'])
     ha_proc.train(dss_rotated[:2])
     mappers_nproc = ha_proc(dss_rotated)
     self.assertTrue(
         np.all([np.array_equal(m.proj, mp.proj)
                for m, mp in zip(mappers, mappers_nproc)]),
         msg="Mappers differ when using nproc>1.")
     assert_array_equal(ha.ca.residual_errors.samples, ha_proc.ca.residual_errors.samples)
     # smoke test
     ha = Hyperalignment(nproc=0)
     mappers = ha(dss_rotated)
    def _get_hypesvs(self, sl_connectomes, local_common_model=None):
        '''
        Hyperalign connectomes and return mapppers
        and trained SVDMapper of common space.

        Parameters
        ----------
        sl_connectomes: a list of connectomes to hyperalign
        local_common_model: a reference common model to be used.

        Returns
        -------
        a tuple (sl_hmappers, svm, local_common_model)
        sl_hmappers: a list of mappers corresponding to input list in that order.
        svm: a svm mapper based on the input data. if given a common model, this is None.
        local_common_model: If local_common_model is provided as input, this will be None.
            Otherwise, local_common_model will be computed here and returned.
        '''
        # TODO Should we z-score sl_connectomes?
        return_model = False if self.params.save_model is None else True
        if local_common_model is not None:
            ha = Hyperalignment(level2_niter=0)
            if not is_datasetlike(local_common_model):
                local_common_model = Dataset(samples=local_common_model)
            ha.train([local_common_model])
            sl_hmappers = ha(sl_connectomes)
            return sl_hmappers, None, None
        ha = Hyperalignment()
        sl_hmappers = ha(sl_connectomes)
        sl_connectomes = [
            slhm.forward(slc) for slhm, slc in zip(sl_hmappers, sl_connectomes)
        ]
        _ = [zscore(slc, chunks_attr=None) for slc in sl_connectomes]
        sl_connectomes = np.dstack(sl_connectomes).mean(axis=-1)
        svm = SVDMapper(force_train=True)
        svm.train(sl_connectomes)
        if return_model:
            local_common_model = svm.forward(sl_connectomes)
        else:
            local_common_model = None
        return sl_hmappers, svm, local_common_model
class SearchlightHyperalignment(ClassWithCollections):
    """
    Given a list of datasets, provide a list of mappers
    into common space using searchlight based hyperalignment.
    :ref:`Guntupalli et al., Cerebral Cortex (2016)`

    1) Input datasets should all be of the same size in terms of
    nsamples and nfeatures, and be coarsely aligned (using anatomy).
    2) All features in all datasets should be zscored.
    3) Datasets should have feature attribute `voxel_indices`
    containing spatial coordinates of all features
    """

    # TODO: add {training_,}residual_errors .ca ?

    ## Parameters common with Hyperalignment but overriden

    ref_ds = Parameter(
        0,
        constraints=EnsureInt() & EnsureRange(min=0),
        doc="""Index of a dataset to use as a reference. First dataset is used
            as default. If you supply exclude_from_model list, you should supply
            the ref_ds index as index before you remove those excluded datasets.
            Note that unlike regular Hyperalignment, there is no automagic
            choosing of the "best" ref_ds by default.""")

    ## Parameters specific to SearchlightHyperalignment

    queryengine = Parameter(
        None,
        doc="""A single (or a list of query engines, one per each dataset) to be
        used.  If not provided, volumetric searchlight, with spherical
        neighborhood as instructed by radius parameter will be used.""")

    radius = Parameter(
        3,
        constraints=EnsureInt() & EnsureRange(min=1),
        doc="""Radius of a searchlight sphere in number of voxels to be used if
         no `queryengine` argument was provided.""")

    nproc = Parameter(1,
                      constraints=EnsureInt() & EnsureRange(min=1)
                      | EnsureNone(),
                      doc="""Number of cores to use.""")

    nblocks = Parameter(
        None,
        constraints=EnsureInt() & EnsureRange(min=1) | EnsureNone(),
        doc="""Number of blocks to divide to process. Higher number results in
            smaller memory consumption.""")

    sparse_radius = Parameter(
        None,
        constraints=(EnsureRange(min=1) & EnsureInt() | EnsureNone()),
        doc="""Radius supplied to scatter_neighborhoods in units of voxels.
            This is effectively the distance between the centers where
            hyperalignment is performed in searchlights.  ATM applicable only
            if no custom queryengine was provided.
            If None, hyperalignment is performed at every voxel (default).""")

    hyperalignment = Parameter(
        Hyperalignment(ref_ds=None),
        doc="""Hyperalignment instance to be used in each searchlight sphere.
            Default is just the Hyperalignment instance with default
            parameters. Its `ref_ds` parameter would be overridden by the
            `ref_ds` parameter of this SearchlightHyperalignment instance
            because we want to be consistent and only need one `ref_ds`.""")

    combine_neighbormappers = Parameter(
        True,
        constraints=EnsureBool(),
        doc="""This param determines whether to combine mappers for each voxel
            from its neighborhood searchlights or just use the mapper for which
            it is the center voxel. This will not be applicable for certain
            queryengines whose ids and neighborhoods are from different spaces,
            such as for SurfaceVerticesQueryEngine""")

    compute_recon = Parameter(
        True,
        constraints=EnsureBool(),
        doc="""This param determines whether to compute reverse mappers for each
            subject from common-space to subject space. These will be stored in
            the StaticProjectionMapper() and used when reverse() is called.
            Enabling it will double the size of the mappers returned.""")

    featsel = Parameter(
        1.0,
        constraints=EnsureFloat() & EnsureRange(min=0.0, max=1.0)
        | EnsureInt() & EnsureRange(min=2),
        doc=
        """Determines if feature selection will be performed in each searchlight.
            1.0: Use all features. < 1.0 is understood as selecting that
            proportion of features in each searchlight of ref_ds using feature scores;
            > 1.0 is understood as selecting at most that many features in each
            searchlight.""")

    # TODO: Should we get rid of this feature?
    use_same_features = Parameter(
        False,
        constraints=EnsureBool(),
        doc="""Select the same (best) features when doing feature selection for
            all datasets.""")

    exclude_from_model = Parameter(
        [],
        constraints=EnsureListOf(int),
        doc="""List of dataset indices that will not participate in building
            common model.  These will still get mappers back but they don't
            influence the model or voxel selection.""")

    mask_node_ids = Parameter(
        None,
        constraints=EnsureListOf(int) | EnsureNone(),
        doc="""You can specify a mask to compute searchlight hyperalignment only
            within this mask.  These would be a list of voxel indices.""")

    dtype = Parameter(
        'float32',
        constraints='str',
        doc="""dtype of elements transformation matrices to save on memory for
            big datasets""")

    results_backend = Parameter(
        'hdf5',
        constraints=EnsureChoice('hdf5', 'native'),
        doc="""'hdf5' or 'native'. See Searchlight documentation.""")

    tmp_prefix = Parameter(
        'tmpsl',
        constraints='str',
        doc="""Prefix for temporary files. See Searchlight documentation.""")

    def __init__(self, **kwargs):
        _shpaldebug("Initializing.")
        ClassWithCollections.__init__(self, **kwargs)
        self.ndatasets = 0
        self.nfeatures = 0
        self.projections = None
        # This option makes the roi_seed in each SL to be selected during feature selection
        self.force_roi_seed = True
        if self.params.nproc is not None and self.params.nproc > 1 \
                and not externals.exists('pprocess'):
            raise RuntimeError("The 'pprocess' module is required for "
                               "multiprocess searchlights. Please either "
                               "install python-pprocess, or reduce `nproc` "
                               "to 1 (got nproc=%i) or set to default None" %
                               self.params.nproc)
        if not externals.exists('scipy'):
            raise RuntimeError("The 'scipy' module is required for "
                               "searchlight hyperalignment.")
        if self.params.results_backend == 'native':
            raise NotImplementedError(
                "'native' mode to handle results is still a "
                "work in progress.")
            #warning("results_backend is set to 'native'. This has been known"
            #        "to result in longer run time when working with big datasets.")
        if self.params.results_backend == 'hdf5' and \
                not externals.exists('h5py'):
            raise RuntimeError("The 'hdf5' module is required for "
                               "when results_backend is set to 'hdf5'")

    def _proc_block(self,
                    block,
                    datasets,
                    featselhyper,
                    queryengines,
                    seed=None,
                    iblock='main'):
        if seed is not None:
            mvpa2.seed(seed)
        if __debug__:
            debug('SLC',
                  'Starting computing block for %i elements' % len(block))
        bar = ProgressBar()
        projections = [
            csc_matrix((self.nfeatures, self.nfeatures),
                       dtype=self.params.dtype)
            for isub in range(self.ndatasets)
        ]
        for i, node_id in enumerate(block):
            # retrieve the feature ids of all features in the ROI from the query
            # engine

            # Find the neighborhood for that selected nearest node
            roi_feature_ids_all = [qe[node_id] for qe in queryengines]
            # handling queryengines that return AttrDatasets
            for isub in range(len(roi_feature_ids_all)):
                if is_datasetlike(roi_feature_ids_all[isub]):
                    # making sure queryengine returned proper shaped output
                    assert (roi_feature_ids_all[isub].nsamples == 1)
                    roi_feature_ids_all[isub] = roi_feature_ids_all[
                        isub].samples[0, :].tolist()
            if len(roi_feature_ids_all) == 1:
                # just one was provided to be "broadcasted"
                roi_feature_ids_all *= len(datasets)
            # if qe returns zero-sized ROI for any subject, pass...
            if any(len(x) == 0 for x in roi_feature_ids_all):
                continue
            # selecting neighborhood for all subject for hyperalignment
            ds_temp = [
                sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)
            ]
            if self.force_roi_seed:
                roi_seed = np.array(
                    roi_feature_ids_all[self.params.ref_ds]) == node_id
                ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
            if __debug__:
                msg = 'ROI (%i/%i), %i features' % (
                    i + 1, len(block), ds_temp[self.params.ref_ds].nfeatures)
                debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)
            hmappers = featselhyper(ds_temp)
            assert (len(hmappers) == len(datasets))
            roi_feature_ids_ref_ds = roi_feature_ids_all[self.params.ref_ds]
            for isub, roi_feature_ids in enumerate(roi_feature_ids_all):
                if not self.params.combine_neighbormappers:
                    I = roi_feature_ids
                    #J = [roi_feature_ids[node_id]] * len(roi_feature_ids)
                    J = [node_id] * len(roi_feature_ids)
                    V = hmappers[isub].tolist()
                    if np.isscalar(V):
                        V = [V]
                else:
                    I, J, V = [], [], []
                    for f2, roi_feature_id_ref_ds in enumerate(
                            roi_feature_ids_ref_ds):
                        I += roi_feature_ids
                        J += [roi_feature_id_ref_ds] * len(roi_feature_ids)
                        V += hmappers[isub][:, f2].tolist()
                proj = coo_matrix(
                    (V, (I, J)),
                    shape=(max(self.nfeatures,
                               max(I) + 1), max(self.nfeatures,
                                                max(J) + 1)),
                    dtype=self.params.dtype)
                proj = proj.tocsc()
                # Cleaning up the current subject's projections to free up memory
                hmappers[isub] = [[] for _ in hmappers]
                projections[isub] = projections[isub] + proj

        if self.params.results_backend == 'native':
            return projections
        elif self.params.results_backend == 'hdf5':
            # store results in a temporary file and return a filename
            results_file = mktemp(prefix=self.params.tmp_prefix,
                                  suffix='-%s.hdf5' % iblock)
            if __debug__:
                debug('SLC', "Storing results into %s" % results_file)
            h5save(results_file, projections)
            if __debug__:
                debug('SLC_', "Results stored")
            return results_file
        else:
            raise RuntimeError("Must not reach this point")

    def __handle_results(self, results):
        if self.params.results_backend == 'hdf5':
            # 'results' must be just a filename
            assert (isinstance(results, str))
            if __debug__:
                debug('SLC', "Loading results from %s" % results)
            results_data = h5load(results)
            os.unlink(results)
            if __debug__:
                debug('SLC_',
                      "Loaded results of len=%d from" % len(results_data))
            for isub, res in enumerate(results_data):
                self.projections[isub] = self.projections[isub] + res
            if __debug__:
                debug('SLC_', "Finished adding results")
            return

    def __handle_all_results(self, results):
        """Helper generator to decorate passing the results out to
        results_fx
        """
        for r in results:
            yield self.__handle_results(r)

    @due.dcite(
        Doi('10.1093/cercor/bhw068'),
        description="Full cortex hyperalignment of data to a common space",
        tags=["implementation"])
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based
        hyperalignment.

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained StaticProjectionMappers of the same length as datasets
        """

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided" %
                             (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets" %
                    (self, self.ndatasets))

        selected = [
            _ for _ in range(ndatasets) if _ not in params.exclude_from_model
        ]
        ref_ds_train = selected.index(params.ref_ds)
        params.hyperalignment.params.ref_ds = ref_ds_train
        warning('Using %dth dataset as the reference dataset (%dth after '
                'excluding datasets)' % (params.ref_ds, ref_ds_train))
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds',
                                                               1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning(
                            '%d %s are too different (max diff=%g) from %d in '
                            'dataset %d to come from a zscored dataset. '
                            'Please zscore datasets first for correct operation '
                            '(unless if was intentional)' %
                            (np.sum(vals_comp), fname, np.max(
                                np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(
            ref_ds=params.ref_ds,
            featsel=params.featsel,
            hyperalignment=params.hyperalignment,
            full_matrix=params.combine_neighbormappers,
            use_same_features=params.use_same_features,
            exclude_from_model=params.exclude_from_model,
            dtype=params.dtype)

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                        externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(datasets,
                                                      params.queryengine,
                                                      params.radius,
                                                      params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)]
        if isinstance(qe, SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError(
                    "Mapping from voxels to surface nodes is not "
                    "implmented yet. Try setting combine_neighbormappers to True."
                )
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
        else:
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices,
                    deterministic=True)
                roi_ids = sidx
            else:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices[
                        params.mask_node_ids],
                    deterministic=True)
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)
        ]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug(
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, params.nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block,
                        datasets,
                        copy.copy(hmeasure),
                        queryengines,
                        seed=seed,
                        iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [
                self._proc_block(block, datasets, hmeasure, queryengines)
                for block in node_blocks
            ]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration
        list(results_ds)

        _shpaldebug(
            'Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T)
            if params.compute_recon else StaticProjectionMapper(proj=proj)
            for proj in self.projections
        ]
        return self.projections

    def _get_verified_ids(self, queryengines):
        """Helper to return ids of queryengines, verifying that they are the same"""
        qe0 = queryengines[0]
        roi_ids = qe0.ids
        for qe in queryengines:
            if qe is not qe0:
                # if a different query engine (so wasn't just replicated)
                if np.any(qe.ids != qe0.ids):
                    raise RuntimeError(
                        "Query engine %s provided different ids than %s. Not supported"
                        % (qe0, qe))
        return roi_ids

    def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds):
        """Helper to return trained query engine(s), either list of one or one per each dataset

        if queryengine is None then IndexQueryEngine based on radius is created
        """
        ndatasets = len(datasets)
        if queryengine:
            if isinstance(queryengine, (list, tuple)):
                queryengines = queryengine
                if len(queryengines) != ndatasets:
                    raise ValueError(
                        "%d query engines were specified although %d datasets "
                        "provided" % (len(queryengines), ndatasets))
                _shpaldebug("Training provided query engines")
                for qe, ds in zip(queryengines, datasets):
                    qe.train(ds)
            else:
                queryengine.train(datasets[ref_ds])
                queryengines = [queryengine]
        else:
            _shpaldebug(
                'No custom query engines were provided. Setting up the '
                'volumetric query engine on voxel_indices.')
            queryengine = IndexQueryEngine(voxel_indices=Sphere(radius))
            queryengine.train(datasets[ref_ds])
            queryengines = [queryengine]
        return queryengines
Exemple #17
0
    def test_basic_functioning(self, ref_ds, zscore_common, zscore_all):
        ha = Hyperalignment(ref_ds=ref_ds,
                            zscore_all=zscore_all,
                            zscore_common=zscore_common)
        if ref_ds is None:
            ref_ds = 0  # by default should be this one

        # get a dataset with some prominent trends in it
        ds4l = datasets['uni4large']
        # lets select for now only meaningful features
        ds_orig = ds4l[:, ds4l.a.nonbogus_features]
        nf = ds_orig.nfeatures
        n = 4  # # of datasets to generate
        Rs, dss_rotated, dss_rotated_clean, random_shifts, random_scales \
            = [], [], [], [], []

        # now lets compose derived datasets by using some random
        # rotation(s)
        for i in xrange(n):
            ## if False: # i == ref_ds:
            #     # Do not rotate the target space so we could check later on
            #     # if we transform back nicely
            #     R = np.eye(ds_orig.nfeatures)
            ## else:
            ds_ = random_affine_transformation(ds_orig,
                                               scale_fac=100,
                                               shift_fac=10)
            Rs.append(ds_.a.random_rotation)
            # reusing random data from dataset itself
            random_scales += [ds_.a.random_scale]
            random_shifts += [ds_.a.random_shift]
            random_noise = ds4l.samples[:, ds4l.a.bogus_features[:4]]

            ## if (zscore_common or zscore_all):
            ##     # for later on testing of "precise" reconstruction
            ##     zscore(ds_, chunks_attr=None)

            dss_rotated_clean.append(ds_)

            ds_ = ds_.copy()
            ds_.samples = ds_.samples + 0.1 * random_noise
            dss_rotated.append(ds_)

        # Lets test two scenarios -- in one with no noise -- we should get
        # close to perfect reconstruction.  If noise was added -- not so good
        for noisy, dss in ((False, dss_rotated_clean), (True, dss_rotated)):
            # to verify that original datasets didn't get changed by
            # Hyperalignment store their idhashes of samples
            idhashes = [idhash(ds.samples) for ds in dss]
            idhashes_targets = [idhash(ds.targets) for ds in dss]

            mappers = ha(dss)

            idhashes_ = [idhash(ds.samples) for ds in dss]
            idhashes_targets_ = [idhash(ds.targets) for ds in dss]
            self.assertEqual(
                idhashes,
                idhashes_,
                msg="Hyperalignment must not change original data.")
            self.assertEqual(
                idhashes_targets,
                idhashes_targets_,
                msg="Hyperalignment must not change original data targets.")

            self.assertEqual(ref_ds, ha.ca.chosen_ref_ds)

            # Map data back

            dss_clean_back = [
                m.forward(ds_) for m, ds_ in zip(mappers, dss_rotated_clean)
            ]

            ds_norm = np.linalg.norm(dss[ref_ds].samples)
            nddss = []
            ndcss = []
            ds_orig_Rref = np.dot(ds_orig.samples, Rs[ref_ds]) \
                * random_scales[ref_ds] \
                + random_shifts[ref_ds]
            if zscore_common or zscore_all:
                zscore(Dataset(ds_orig_Rref), chunks_attr=None)
            for ds_back in dss_clean_back:
                # if we used zscoring of common, we cannot rely
                # that range/offset could be matched, so lets use
                # corrcoef
                ndcs = np.diag(np.corrcoef(ds_back.samples.T,
                                           ds_orig_Rref.T)[nf:, :nf],
                               k=0)
                ndcss += [ndcs]
                dds = ds_back.samples - ds_orig_Rref
                ndds = np.linalg.norm(dds) / ds_norm
                nddss += [ndds]
            snoisy = ('clean', 'noisy')[int(noisy)]
            do_labile = cfg.getboolean('tests', 'labile', default='yes')
            if not noisy or do_labile:
                # First compare correlations
                self.assertTrue(
                    np.all(np.array(ndcss) >= (0.9, 0.85)[int(noisy)]),
                    msg="Should have reconstructed original dataset more or"
                    " less. Got correlations %s in %s case." % (ndcss, snoisy))
                if not (zscore_all or zscore_common):
                    # if we didn't zscore -- all of them should be really close
                    self.assertTrue(
                        np.all(np.array(nddss) <= (1e-10, 1e-1)[int(noisy)]),
                        msg="Should have reconstructed original dataset well "
                        "without zscoring. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                elif do_labile:
                    # otherwise they all should be somewhat close
                    self.assertTrue(
                        np.all(np.array(nddss) <= (.2, 3)[int(noisy)]),
                        msg="Should have reconstructed original dataset more or"
                        " less for all. Got normed differences %s in %s case."
                        % (nddss, snoisy))
                    self.assertTrue(
                        np.all(nddss[ref_ds] <= .09),
                        msg="Should have reconstructed original dataset quite "
                        "well even with zscoring. Got normed differences %s "
                        "in %s case." % (nddss, snoisy))
                    # yoh: and leave 5% of difference for a chance and numerical
                    #      fluctuations ;)
                    self.assertTrue(
                        np.all(np.array(nddss) >= 0.95 * nddss[ref_ds]),
                        msg="Should have reconstructed orig_ds best of all. "
                        "Got normed differences %s in %s case with ref_ds=%d."
                        % (nddss, snoisy, ref_ds))

        # Lets see how well we do if asked to compute residuals
        ha = Hyperalignment(
            ref_ds=ref_ds,
            level2_niter=2,
            enable_ca=['training_residual_errors', 'residual_errors'])
        mappers = ha(dss_rotated_clean)
        self.assertTrue(
            np.all(ha.ca.training_residual_errors.sa.levels ==
                   ['1', '2:0', '2:1']))
        rterrors = ha.ca.training_residual_errors.samples
        # just basic tests:
        self.assertEqual(rterrors[0, ref_ds], 0)
        self.assertEqual(rterrors.shape, (3, n))
        rerrors = ha.ca.residual_errors.samples
        self.assertEqual(rerrors.shape, (1, n))
Exemple #18
0
    def _test_on_swaroop_data(self):  # pragma: no cover
        #
        print "Running swaroops test on data we don't have"
        #from mvpa2.datasets.miscfx import zscore
        #from mvpa2.featsel.helpers import FixedNElementTailSelector
        #   or just for lazy ones like yarik atm
        #enable to test from mvpa2.suite import *
        subj = ['cb', 'dm', 'hj', 'kd', 'kl', 'mh', 'ph', 'rb', 'se', 'sm']
        ds = []
        for sub in subj:
            ds.append(
                fmri_dataset(samples=sub + '_movie.nii.gz',
                             mask=sub + '_mask_vt.nii.gz'))
        '''
        Compute feature ranks in each dataset
        based on correlation with other datasets
        '''
        feature_scores = [np.zeros(d.nfeatures) for d in ds]
        '''
        for i in range(len(subj)):
            ds_temp = ds[i].samples - np.mean(ds[i].samples, axis=0)
            ds_temp = ds_temp / np.sqrt( np.sum( np.square(ds_temp), axis=0) )
            for j in range(i+1,len(subj)):
            ds_temp2 = ds[j].samples - np.mean(ds[j].samples, axis=0)
            ds_temp2 = ds_temp2 / np.sqrt( np.sum( np.square(ds_temp2), axis=0) )
            corr_temp= np.asarray(np.mat(np.transpose(ds_temp))*np.mat(ds_temp2))
            feature_scores[i] = feature_scores[i] + np.max(corr_temp, axis = 1)
            feature_scores[j] = feature_scores[j] + np.max(corr_temp, axis = 0)
        '''
        for i, sd in enumerate(ds):
            ds_temp = sd.copy()
            zscore(ds_temp, chunks_attr=None)
            for j, sd2 in enumerate(ds[i + 1:]):
                ds_temp2 = sd2.copy()
                zscore(ds_temp2, chunks_attr=None)
                corr_temp = np.dot(ds_temp.samples.T, ds_temp2.samples)
                feature_scores[i] = feature_scores[i] + \
                                    np.max(corr_temp, axis = 1)
                feature_scores[j+i+1] = feature_scores[j+i+1] + \
                                        np.max(corr_temp, axis = 0)

        for i, sd in enumerate(ds):
            sd.fa['bsc_scores'] = feature_scores[i]

        fselector = FixedNElementTailSelector(2000,
                                              tail='upper',
                                              mode='select')

        ds_fs = [sd[:, fselector(sd.fa.bsc_scores)] for sd in ds]

        hyper = Hyperalignment()
        mapper_results = hyper(ds_fs)

        md_cd = ColumnData('labels.txt', header=['label'])
        md_labels = [int(x) for x in md_cd['label']]
        for run in range(8):
            md_labels[192 * run:192 * run + 3] = [-1] * 3

        mkdg_ds = []
        for sub in subj:
            mkdg_ds.append(
                fmri_dataset(samples=sub + '_mkdg.nii.gz',
                             targets=md_labels,
                             chunks=np.repeat(range(8), 192),
                             mask=sub + '_mask_vt.nii.gz'))

        m = mean_group_sample(['targets', 'chunks'])

        mkdg_ds = [ds_.get_mapped(m) for ds_ in mkdg_ds]
        mkdg_ds = [ds_[ds_.sa.targets != -1] for ds_ in mkdg_ds]
        [zscore(ds_, param_est=('targets', [0])) for ds_ in mkdg_ds]
        mkdg_ds = [ds_[ds_.sa.targets != 0] for ds_ in mkdg_ds]

        for i, sd in enumerate(mkdg_ds):
            sd.fa['bsc_scores'] = feature_scores[i]

        mkdg_ds_fs = [sd[:, fselector(sd.fa.bsc_scores)] for sd in mkdg_ds]
        mkdg_ds_mapped = [
            sd.get_mapped(mapper_results[i]) for i, sd in enumerate(mkdg_ds_fs)
        ]

        # within-subject classification
        within_acc = []
        clf = clfswh['multiclass', 'linear', 'NU_SVC'][0]
        cvterr = CrossValidation(clf,
                                 NFoldPartitioner(),
                                 enable_ca=['confusion'])
        for sd in mkdg_ds_fs:
            wsc = cvterr(sd)
            within_acc.append(1 - np.mean(wsc))

        within_acc_mapped = []
        for sd in mkdg_ds_mapped:
            wsc = cvterr(sd)
            within_acc_mapped.append(1 - np.mean(wsc))

        print np.mean(within_acc)
        print np.mean(within_acc_mapped)

        mkdg_ds_all = vstack(mkdg_ds_mapped)
        mkdg_ds_all.sa['subject'] = np.repeat(range(10), 56)
        mkdg_ds_all.sa['chunks'] = mkdg_ds_all.sa['subject']

        bsc = cvterr(mkdg_ds_all)
        print 1 - np.mean(bsc)
        mkdg_all = vstack(mkdg_ds_fs)
        mkdg_all.sa['chunks'] = np.repeat(range(10), 56)
        bsc_orig = cvterr(mkdg_all)
        print 1 - np.mean(bsc_orig)
        pass
    
    aligned_dirname = os.path.join(utils.common_space_dir, 'parcel_{n:03d}'.format(n=parcel_num))
    mapper_dirname = os.path.join(utils.trans_matrices, 'parcel_{n:03d}'.format(n=parcel_num))

    for d in [aligned_dirname, mapper_dirname]:
        if not os.path.exists(d):
            os.makedirs(d)


    train_dss = [utils.prep_parcelwise_data(sub, parcel_num, 'sponpain') for sub in sub_list]
    print('-------- size of training data sets {A} -------------'.format(A=train_dss[0].shape))
    print('-------- beginning hyperalignment parcel {A} --------'.format(A=parcel_num))

    # train hyperalignment model on all subject's sponpain data for this parcel
    print('-------- length of train subjects={A} '.format(A=str(len(train_dss))))
    ha = Hyperalignment(nproc=NPROC, joblib_backend='multiprocessing')
    debug.active += ['HPAL']
    t0 = time.time()
    ha.train(train_dss)
    mappers = ha(train_dss)
    t1 = time.time()
    print('-------- done training hyperalignment at {B} --------'.format(B=str(timedelta(seconds=t1-t0))))
    del train_dss

    pool = mp.Pool(NPROC)
    data_fns = [os.path.join(aligned_dirname,'{s}_aligned_cleaned_bladder_ts_noZ.hdf5'.format(s=s)) for s in sub_list]
    mapper_fns = [os.path.join(mapper_dirname,'{s}_trained_mapper.hdf5noZ.gz'.format(s=s)) for s in sub_list]
    iterable = zip(data_fns, mapper_fns, sub_list, mappers, np.repeat(parcel_num, len(mappers)))
    pool.map(apply_mappers, iterable)
    t2=time.time()
    print('-------- done aligning & saving test data at {B} --------'.format(B=str(timedelta(seconds=t2-t1))))
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based
        hyperalignment.

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained StaticProjectionMappers of the same length as datasets
        """

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided" %
                             (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets" %
                    (self, self.ndatasets))

        if params.ref_ds != params.hyperalignment.params.ref_ds:
            warning(
                'Supplied ref_ds & hyperalignment instance ref_ds:%d differ.' %
                params.hyperalignment.params.ref_ds)
            warning('Using default hyperalignment instance with ref_ds: %d' %
                    params.ref_ds)
            params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds)
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds',
                                                               1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning(
                            '%d %s are too different (max diff=%g) from %d in '
                            'dataset %d to come from a zscored dataset. '
                            'Please zscore datasets first for correct operation '
                            '(unless if was intentional)' %
                            (np.sum(vals_comp), fname, np.max(
                                np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(
            featsel=params.featsel,
            hyperalignment=params.hyperalignment,
            full_matrix=params.combine_neighbormappers,
            use_same_features=params.use_same_features,
            exclude_from_model=params.exclude_from_model,
            dtype=params.dtype)

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                        externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(datasets,
                                                      params.queryengine,
                                                      params.radius,
                                                      params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError(
                    "Mapping from voxels to surface nodes is not "
                    "implmented yet. Try setting combine_neighbormappers to True."
                )
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
        else:
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices,
                    deterministic=True)
                roi_ids = sidx
            else:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices[
                        params.mask_node_ids],
                    deterministic=True)
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)
        ]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug(
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, params.nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block,
                        datasets,
                        copy.copy(hmeasure),
                        queryengines,
                        seed=seed,
                        iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [
                self._proc_block(block, datasets, hmeasure, queryengines)
                for block in node_blocks
            ]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration
        list(results_ds)

        _shpaldebug(
            'Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T)
            if params.compute_recon else StaticProjectionMapper(proj=proj)
            for proj in self.projections
        ]
        return self.projections
    df_results.loc[parcel, 'Voxels_in_parcel'] = sum(indices[PARCEL_NUMBER])

    myvoxels = np.nonzero(indices[PARCEL_NUMBER])
    dss = []
    for sub in range(len(mats)):
        ds = mats[sub][:, myvoxels[0]]
        ds = mv.Dataset(ds)
        ds.fa['voxel_indices'] = range(ds.shape[1])
        mv.zscore(ds, chunks_attr=None)
        dss.append(ds)

    print('Size of Training data sets: {0}'.format(dss[0].shape))
    print('Beginning Hyperalignment.')

    # create hyperalignment instance
    hyper = Hyperalignment(nproc=1, )
    hyper.train(dss)

    # get mappers to common space created by hyper.train (2x procrustes iteration)
    mappers = hyper(dss)

    # apply mappers back onto training data
    ds_hyper = [h.forward(sd) for h, sd in zip(mappers, dss)]

    train_aa_isc = compute_average_similarity(dss)
    train_ha_isc = compute_average_similarity(ds_hyper)

    df_results.loc[parcel, 'Train_AA_ISC'] = np.mean(train_aa_isc)
    df_results.loc[parcel, 'Train_HA_ISC'] = np.mean(train_ha_isc)

    # create test dss
Exemple #22
0
def run(args):
    print(args.data)
    dss = [arg2ds(d)[:, :100] for d in args.data]
    verbose(1, "Loaded %i input datasets" % len(dss))
    if __debug__:
        for i, ds in enumerate(dss):
            debug('CMDLINE', "dataset %i: %s" % (i, str(ds)))
    # TODO at this point more check could be done, e.g. ref_ds > len(dss)
    # assemble parameters
    params = dict([(param, getattr(args, param))
                   for param in _supported_parameters])
    if __debug__:
        debug('CMDLINE', "configured parameters: '%s'" % params)
    # assemble CAs
    enabled_ca = [ca for ca in _supported_cas if getattr(args, ca)]
    if __debug__:
        debug('CMDLINE', "enabled conditional attributes: '%s'" % enabled_ca)
    hyper = Hyperalignment(enable_ca=enabled_ca,
                           alignment=ProcrusteanMapper(svd='dgesvd',
                                                       space='commonspace'),
                           **params)
    verbose(1, "Running hyperalignment")
    promappers = hyper(dss)
    verbose(2, "Alignment reference is dataset %i" % hyper.ca.chosen_ref_ds)
    verbose(1, "Writing output")
    # save on memory and remove the training data
    del dss
    if args.commonspace:
        if __debug__:
            debug('CMDLINE', "write commonspace as hdf5")
        h5save('%s%s.hdf5' % (args.output_prefix,
                              _output_specs['commonspace']['output_suffix']),
               hyper.commonspace,
               compression=args.hdf5_compression)
    for ca in _supported_cas:
        if __debug__:
            debug('CMDLINE', "check conditional attribute: '%s'" % ca)
        if getattr(args, ca):
            if __debug__:
                debug('CMDLINE', "store conditional attribute: '%s'" % ca)
            np.savetxt(
                '%s%s' %
                (args.output_prefix, _supported_cas[ca]['output_suffix']),
                hyper.ca[ca].value.samples)
    if args.store_transformation:
        for i, pm in enumerate(promappers):
            if __debug__:
                debug('CMDLINE', "store mapper %i: %s" % (i, str(pm)))
            h5save('%s%s.hdf5' % (args.output_prefix, '_map%.3i' % i),
                   pm,
                   compression=args.hdf5_compression)
    if args.transform:
        tdss, dss = _transform_dss(args.transform, promappers, args)
        del dss
        verbose(1, "Store transformed datasets")
        for i, td in enumerate(tdss):
            if __debug__:
                debug('CMDLINE',
                      "store transformed data %i: %s" % (i, str(td)))
            h5save('%s%s.hdf5' % (args.output_prefix, '_transformed%.3i' % i),
                   td,
                   compression=args.hdf5_compression)
 def test_hpal_svd_combo(self):
     # get seed dataset
     ds4l = datasets['uni4large']
     ds_orig = ds4l[:, ds4l.a.nonbogus_features]
     # XXX Is this SVD mapping required?
     svm = SVDMapper()
     svm.train(ds_orig)
     ds_svs = svm.forward(ds_orig)
     ds_orig.samples = ds_svs.samples
     nf_true = ds_orig.nfeatures
     n = 4  # # of datasets to generate
     # Adding non-shared dimensions for each subject
     dss_rotated = [[]]*n
     for i in range(n):
         dss_rotated[i] = hstack(
             (ds_orig, ds4l[:, ds4l.a.bogus_features[i * 4: i * 4 + 4]]))
     # rotate data
     nf = dss_rotated[0].nfeatures
     dss_rotated = [random_affine_transformation(dss_rotated[i])
                    for i in xrange(n)]
     # Test if it is close to doing hpal+SVD in sequence outside hpal
     # First, as we do in sequence outside hpal
     ha = Hyperalignment()
     mappers_orig = ha(dss_rotated)
     dss_back = [m.forward(ds_)
                 for m, ds_ in zip(mappers_orig, dss_rotated)]
     dss_mean = np.mean([sd.samples for sd in dss_back], axis=0)
     svm = SVDMapper()
     svm.train(dss_mean)
     dss_sv = [svm.forward(sd) for sd in dss_back]
     # Test for SVD dimensionality reduction even with 2 training subjects
     for output_dim in [1, 4]:
         ha = Hyperalignment(output_dim=output_dim)
         ha.train(dss_rotated[:2])
         mappers = ha(dss_rotated)
         dss_back = [m.forward(ds_)
                     for m, ds_ in zip(mappers, dss_rotated)]
         for sd in dss_back:
             assert (sd.nfeatures == output_dim)
     # Check if combined hpal+SVD works as expected
     sv_corrs = []
     for sd1, sd2 in zip(dss_sv, dss_back):
         ndcs = np.diag(np.corrcoef(sd1.samples.T, sd2.samples.T)[nf:, :nf],
                        k=0)
         sv_corrs.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs)) >= 0.95),
         msg="Hyperalignment with dimensionality reduction should have "
             "reconstructed SVD dataset. Got correlations %s."
             % sv_corrs)
     # Check if it recovers original SVs
     sv_corrs_orig = []
     for sd in dss_back:
         ndcs = np.diag(
             np.corrcoef(sd.samples.T, ds_orig.samples.T)[nf_true:, :nf_true],
             k=0)
         sv_corrs_orig.append(ndcs)
     self.assertTrue(
         np.all(np.abs(np.array(sv_corrs_orig)) >= 0.9),
         msg="Expected original dimensions after "
             "SVD. Got correlations %s."
             % sv_corrs_orig)