Beispiel #1
0
def arg2neighbor(arg):
    # [[shape:]shape:]params
    comp = arg.split(':')
    if not len(comp):
        # need at least a radius
        raise ValueError("incomplete neighborhood specification")
    if len(comp) == 1:
        # [file|sphere radius]
        attr = 'voxel_indices'
        arg = comp[0]
        if os.path.isfile(arg) and arg.endswith('.py'):
            neighbor = script2obj(arg)
        else:
            from mvpa2.misc.neighborhood import Sphere
            neighbor = Sphere(int(arg))
    elif len(comp) == 2:
        # attr:[file|sphere radius]
        attr = comp[0]
        arg = comp[1]
        if os.path.isfile(arg) and arg.endswith('.py'):
            neighbor = script2obj(arg)
        else:
            from mvpa2.misc.neighborhood import Sphere
            neighbor = Sphere(int(arg))
    elif len(comp) > 2:
        attr = comp[0]
        shape = comp[1]
        params = [float(c) for c in comp[2:]]
        import mvpa2.misc.neighborhood as neighb
        neighbor = getattr(neighb, shape)(*params)
    return attr, neighbor
Beispiel #2
0
 def test_cached_qe_gnbsearchlight(self):
     ds1 = datasets['3dsmall'].copy(deep=True)
     qe = IndexQueryEngine(myspace=Sphere(2))
     cached_qe = CachedQueryEngine(qe)
     gnb_sl = GNBSearchlight(GNB(), NFoldPartitioner(), qe=cached_qe)
     res = gnb_sl(ds1)
     assert_false(cached_qe.ids is None)
    def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds):
        """Helper to return trained query engine(s), either list of one or one per each dataset

        if queryengine is None then IndexQueryEngine based on radius is created
        """
        ndatasets = len(datasets)
        if queryengine:
            if isinstance(queryengine, (list, tuple)):
                queryengines = queryengine
                if len(queryengines) != ndatasets:
                    raise ValueError(
                        "%d query engines were specified although %d datasets "
                        "provided" % (len(queryengines), ndatasets))
                _shpaldebug("Training provided query engines")
                for qe, ds in zip(queryengines, datasets):
                    qe.train(ds)
            else:
                queryengine.train(datasets[ref_ds])
                queryengines = [queryengine]
        else:
            _shpaldebug(
                'No custom query engines were provided. Setting up the '
                'volumetric query engine on voxel_indices.')
            queryengine = IndexQueryEngine(voxel_indices=Sphere(radius))
            queryengine.train(datasets[ref_ds])
            queryengines = [queryengine]
        return queryengines
Beispiel #4
0
 def test_gnbsearghlight_exclude_partition(self):
     # just a smoke test with a custom partitioner
     ds1 = datasets['3dsmall'].copy(deep=True)
     gnb_sl = GNBSearchlight(GNB(),
                             generator=CustomPartitioner([([0], [1])]),
                             qe=IndexQueryEngine(myspace=Sphere(2)),
                             errorfx=None)
     res = gnb_sl(ds1)
Beispiel #5
0
def test_simple_sim1_clean_per_subject():
    # no noise -- all must be clear
    dissims = [[0.9], [0.8], [0.5], [0.3]]
    args = (64, 64), dissims
    kwargs = dict(roi_neighborhood=Sphere(6), nruns=3, nsubjects=2)

    # clean case
    signal_clean, cluster_truth, dss = simple_sim1(*args,
                                                   noise_subject_std=0,
                                                   noise_independent_std=0,
                                                   noise_common_std=0,
                                                   **kwargs)
    # ,1 since we have only 1 value of dissim per each or ROIs
    assert_equal(signal_clean.shape, (64, 64, 1))
    # all dss should be identical to a_clean
    for ds in dss:
        for samples in ds[0].a.mapper.reverse(ds).samples:
            assert_array_almost_equal(signal_clean[..., 0], samples)

    # Now lets generate common noise
    signal_clean, cluster_truth, dss = simple_sim1(*args,
                                                   noise_subject_std=0,
                                                   noise_independent_std=0,
                                                   noise_common_std=100,
                                                   **kwargs)

    # corr coeffs should be really high across all the runs and subjects
    all_subj_runs = np.corrcoef(np.vstack(dss))
    assert_true(
        np.all(
            np.abs(all_subj_runs[np.triu_indices(len(all_subj_runs))]) > 0.7))
    # but low to signal_clean
    assert_true(
        np.all(
            np.abs(np.corrcoef(signal_clean.flatten(), np.vstack(dss))[0, 1:])
            < 0.3))

    # Now lets generate per subject common noise
    signal_clean, cluster_truth, dss = simple_sim1(*args,
                                                   noise_subject_std=100,
                                                   noise_independent_std=0,
                                                   noise_common_std=0,
                                                   **kwargs)

    # corr coeffs should be really high across all the runs within each
    # subject but otherwise having low correlation
    all_subj_runs = np.corrcoef(np.vstack(dss))
    # TODO: fix up
    # assert_true(np.all(np.abs(
    #         all_subj_runs[np.triu_indices(len(all_subj_runs))])
    #         > 0.8))  ## TODO: fix up
    # and low to signal_clean
    assert_true(
        np.all(
            np.abs(np.corrcoef(signal_clean.flatten(), np.vstack(dss))[0, 1:])
            < 0.3))
Beispiel #6
0
    def test_splitter_gnbsearghlight(self):
        ds1 = datasets['3dsmall'].copy(deep=True)

        gnb_sl = GNBSearchlight(GNB(),
                                generator=CustomPartitioner([([0], [1])]),
                                qe=IndexQueryEngine(myspace=Sphere(2)),
                                splitter=Splitter(attr='partitions',
                                                  attr_values=[1, 2]),
                                errorfx=None)
        res = gnb_sl(ds1)
        assert_equal(res.nsamples, (ds1.chunks == 1).sum())
Beispiel #7
0
def sphere_searchlight(datameasure,
                       radius=1,
                       center_ids=None,
                       space='voxel_indices',
                       **kwargs):
    """Creates a `Searchlight` to run a scalar `Measure` on
    all possible spheres of a certain size within a dataset.

    The idea for a searchlight algorithm stems from a paper by
    :ref:`Kriegeskorte et al. (2006) <KGB06>`.

    Parameters
    ----------
    datameasure : callable
      Any object that takes a :class:`~mvpa2.datasets.base.Dataset`
      and returns some measure when called.
    radius : int
      All features within this radius around the center will be part
      of a sphere. Radius is in grid-indices, i.e. ``1`` corresponds
      to all immediate neighbors, regardless of the physical distance.
    center_ids : list of int
      List of feature ids (not coordinates) the shall serve as sphere
      centers. Alternatively, this can be the name of a feature attribute
      of the input dataset, whose non-zero values determine the feature
      ids.  By default all features will be used (it is passed as ``roi_ids``
      argument of Searchlight).
    space : str
      Name of a feature attribute of the input dataset that defines the spatial
      coordinates of all features.
    **kwargs
      In addition this class supports all keyword arguments of its
      base-class :class:`~mvpa2.measures.base.Measure`.

    Notes
    -----
    If `Searchlight` is used as `SensitivityAnalyzer` one has to make
    sure that the specified scalar `Measure` returns large
    (absolute) values for high sensitivities and small (absolute) values
    for low sensitivities. Especially when using error functions usually
    low values imply high performance and therefore high sensitivity.
    This would in turn result in sensitivity maps that have low
    (absolute) values indicating high sensitivities and this conflicts
    with the intended behavior of a `SensitivityAnalyzer`.
    """
    # build a matching query engine from the arguments
    kwa = {space: Sphere(radius)}
    qe = IndexQueryEngine(**kwa)
    # init the searchlight with the queryengine
    return Searchlight(datameasure,
                       queryengine=qe,
                       roi_ids=center_ids,
                       **kwargs)
def sphere_gnbsearchlight(gnb,
                          generator,
                          radius=1,
                          center_ids=None,
                          space='voxel_indices',
                          *args,
                          **kwargs):
    """Creates a `GNBSearchlight` to assess :term:`cross-validation`
    classification performance of GNB on all possible spheres of a
    certain size within a dataset.

    The idea of taking advantage of naiveness of GNB for the sake of
    quick searchlight-ing stems from Francisco Pereira (paper under
    review).

    Parameters
    ----------
    radius : float
      All features within this radius around the center will be part
      of a sphere.
    center_ids : list of int
      List of feature ids (not coordinates) the shall serve as sphere
      centers. By default all features will be used (it is passed
      roi_ids argument for Searchlight).
    space : str
      Name of a feature attribute of the input dataset that defines the spatial
      coordinates of all features.
    **kwargs
      In addition this class supports all keyword arguments of
      :class:`~mvpa2.measures.gnbsearchlight.GNBSearchlight`.

    Notes
    -----
    If any `BaseSearchlight` is used as `SensitivityAnalyzer` one has to make
    sure that the specified scalar `Measure` returns large
    (absolute) values for high sensitivities and small (absolute) values
    for low sensitivities. Especially when using error functions usually
    low values imply high performance and therefore high sensitivity.
    This would in turn result in sensitivity maps that have low
    (absolute) values indicating high sensitivities and this conflicts
    with the intended behavior of a `SensitivityAnalyzer`.
    """
    # build a matching query engine from the arguments
    kwa = {space: Sphere(radius)}
    qe = IndexQueryEngine(**kwa)
    # init the searchlight with the queryengine
    return GNBSearchlight(gnb,
                          generator,
                          qe,
                          roi_ids=center_ids,
                          *args,
                          **kwargs)
    def test_add_center_fa(self):
        # just a smoke test pretty much
        ds = datasets['3dsmall'].copy()

        # check that we do not mark anything as center whenever there is none
        def check_no_center(ds):
            assert (not np.any(ds.fa.center))
            return 1.0

        # or just a single center in our case
        def check_center(ds):
            assert (np.sum(ds.fa.center) == 1)
            return 1.0

        for n, check in [(HollowSphere(1, 0), check_no_center),
                         (Sphere(0), check_center), (Sphere(1), check_center)]:
            Searchlight(check,
                        IndexQueryEngine(myspace=n),
                        add_center_fa='center')(ds)
            # and no changes to original ds data, etc
            assert_array_equal(datasets['3dsmall'].fa.keys(), ds.fa.keys())
            assert_array_equal(datasets['3dsmall'].samples, ds.samples)
Beispiel #10
0
 def test_1d_multispace_searchlight(self):
     ds = Dataset([np.arange(6)])
     ds.fa['coord1'] = np.repeat(np.arange(3), 2)
     # add a second space to the dataset
     ds.fa['coord2'] = np.tile(np.arange(2), 3)
     measure = lambda x: "+".join([str(x) for x in x.samples[0]])
     # simply select each feature once
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
 def test_searchlight_hyperalignment(self):
     skip_if_no_external('scipy')
     skip_if_no_external('h5py')
     ds_orig = datasets['3dsmall'].copy()[:, :15]
     ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace
     space = 'voxel_indices'
     # total number of datasets for the analysis
     nds = 5
     zscore(ds_orig, chunks_attr=None)
     dss = [ds_orig]
     # create a few distorted datasets to match the desired number of datasets
     # not sure if this truly mimics the real data, but at least we can test
     # implementation
     while len(dss) < nds - 1:
         sd = local_random_affine_transformations(
             ds_orig,
             scatter_neighborhoods(Sphere(1),
                                   ds_orig.fa[space].value,
                                   deterministic=True)[1],
             Sphere(2),
             space=space,
             scale_fac=1.0,
             shift_fac=0.0)
         # sometimes above function returns dataset with nans, infs, we don't want that.
         if np.sum(np.isnan(sd.samples)+np.isinf(sd.samples)) == 0 \
                 and np.all(sd.samples.std(0)):
             dss.append(sd)
     ds_orig_noisy = ds_orig.copy()
     ds_orig_noisy.samples += 0.1 * np.random.random(
         size=ds_orig_noisy.shape)
     dss.append(ds_orig_noisy)
     _ = [zscore(sd, chunks_attr=None) for sd in dss[1:]]
     # we should have some distortion
     for ds in dss[1:]:
         assert_false(np.all(ds_orig.samples == ds.samples))
     # testing checks
     slhyp = SearchlightHyperalignment(ref_ds=1, exclude_from_model=[1])
     self.assertRaises(ValueError, slhyp, dss[:3])
     slhyp = SearchlightHyperalignment(ref_ds=3)
     self.assertRaises(ValueError, slhyp, dss[:3])
     # explicit test of exclude_from_model
     slhyp = SearchlightHyperalignment(ref_ds=2,
                                       exclude_from_model=[1],
                                       featsel=0.7)
     projs1 = slhyp(dss)
     aligned1 = [proj.forward(ds) for proj, ds in zip(projs1, dss)]
     samples = dss[1].samples.copy()
     dss[1].samples += 0.1 * np.random.random(size=dss[1].shape)
     projs2 = slhyp(dss)
     aligned2 = [proj.forward(ds) for proj, ds in zip(projs1, dss)]
     for i in [0, 2, 3, 4]:
         assert_array_almost_equal(projs1[i].proj.todense(),
                                   projs2[i].proj.todense())
         assert_array_almost_equal(aligned1[i].samples, aligned2[i].samples)
     assert_false(
         np.all(projs1[1].proj.todense() == projs1[2].proj.todense()))
     assert_false(np.all(aligned1[1].samples == aligned2[1].samples))
     dss[1].samples = samples
     # store projections for each mapper separately
     projs = list()
     # run the algorithm with all combinations of the two major parameters
     # for projection calculation.
     for kwargs in [{
             'combine_neighbormappers': True,
             'nproc': 1 + int(externals.exists('pprocess'))
     }, {
             'combine_neighbormappers': True,
             'dtype': 'float64',
             'compute_recon': True
     }, {
             'combine_neighbormappers': True,
             'exclude_from_model': [2, 4]
     }, {
             'combine_neighbormappers': False
     }, {
             'combine_neighbormappers': False,
             'mask_node_ids': np.arange(dss[0].nfeatures).tolist()
     }, {
             'combine_neighbormappers': True,
             'sparse_radius': 1
     }, {
             'combine_neighbormappers': True,
             'nblocks': 2
     }]:
         slhyp = SearchlightHyperalignment(radius=2, **kwargs)
         mappers = slhyp(dss)
         # one mapper per input ds
         assert_equal(len(mappers), nds)
         projs.append(mappers)
     # some checks
     for midx in range(nds):
         # making sure mask_node_ids options works as expected
         assert_array_almost_equal(projs[3][midx].proj.todense(),
                                   projs[4][midx].proj.todense())
         # recon check
         assert_array_almost_equal(projs[0][midx].proj.todense(),
                                   projs[1][midx].recon.T.todense(),
                                   decimal=5)
         assert_equal(projs[1][midx].proj.dtype, 'float64')
         assert_equal(projs[0][midx].proj.dtype, 'float32')
     # making sure the projections make sense
     for proj in projs:
         # no .max on sparse matrices on older scipy (e.g. on precise) so conver to array first
         max_weight = proj[0].proj.toarray().max(1).squeeze()
         diag_weight = proj[0].proj.diagonal()
         # Check to make sure diagonal is the max weight, in almost all rows for reference subject
         assert (np.sum(max_weight == diag_weight) / float(len(diag_weight))
                 >= 0.80)
         # and not true for other subjects
         for i in range(1, nds - 1):
             assert (np.sum(proj[i].proj.toarray().max(1).squeeze() ==
                            proj[i].proj.diagonal()) /
                     float(proj[i].proj.shape[0]) < 0.80)
         # Check to make sure projection weights match across duplicate datasets
         max_weight = proj[-1].proj.toarray().max(1).squeeze()
         diag_weight = proj[-1].proj.diagonal()
         # Check to make sure diagonal is the max weight, in almost all rows for reference subject
         assert (np.sum(max_weight == diag_weight) / float(len(diag_weight))
                 >= 0.80)
     # project data
     dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)]
     _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper]
     ndcss = []
     nf = ds_orig.nfeatures
     for ds_hyper in dss_hyper:
         ndcs = np.diag(np.corrcoef(ds_hyper.samples.T,
                                    ds_orig.samples.T)[nf:, :nf],
                        k=0)
         ndcss += [ndcs]
     assert_true(np.median(ndcss[0]) > 0.9)
     # noisy copy of original dataset should be similar to original after hyperalignment
     assert_true(np.median(ndcss[-1]) > 0.9)
     assert_true(np.all([np.median(ndcs) > 0.2 for ndcs in ndcss[1:-2]]))
Beispiel #12
0
        ds.a['mapper'] = dss_subject[
            0].a.mapper  # .a are not transferred by vstack
        dss.append(ds)

    # Instrumental noise -- the most banal
    assert (len(dss) == nsubjects)
    assert (len(dss) == nsubjects)
    assert (len(dss[0]) == nruns * len(dissim))

    return np.tanh(signal_clean), cluster_truth, dss


if __name__ == '__main__':
    a_clean, cluster_truth, dss = simple_sim1((64, 64),
                                              [[1], [0.8], [0.5], [0.3]],
                                              roi_neighborhood=Sphere(6),
                                              nruns=3,
                                              nsubjects=2,
                                              noise_subject_n=1,
                                              noise_subject_std=5,
                                              noise_subject_smooth=5,
                                              noise_independent_std=4,
                                              noise_independent_smooth=1.5,
                                              noise_common_n=1,
                                              noise_common_std=3)

    # just a little helper
    def get2d(ds):
        return dss[0].a.mapper.reverse(ds)

    import pylab as pl
    def test_voxel_selection(self):
        '''Compare surface and volume based searchlight'''
        '''
        Tests to see whether results are identical for surface-based
        searchlight (just one plane; Euclidean distnace) and volume-based
        searchlight.

        Note that the current value is a float; if it were int, it would
        specify the number of voxels in each searchlight'''

        radius = 10.
        '''Define input filenames'''
        epi_fn = os.path.join(pymvpa_dataroot, 'bold.nii.gz')
        maskfn = os.path.join(pymvpa_dataroot, 'mask.nii.gz')
        '''
        Use the EPI datafile to define a surface.
        The surface has as many nodes as there are voxels
        and is parallel to the volume 'slice'
        '''
        vg = volgeom.from_any(maskfn, mask_volume=True)

        aff = vg.affine
        nx, ny, nz = vg.shape[:3]
        '''Plane goes in x and y direction, so we take these vectors
        from the affine transformation matrix of the volume'''
        plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny)
        '''
        Simulate pial and white matter as just above and below
        the central plane
        '''
        normal_vec = aff[:3, 2]
        outer = plane + normal_vec
        inner = plane + -normal_vec
        '''
        Combine volume and surface information
        '''
        vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner)
        '''
        Run voxel selection with specified radius (in mm), using
        Euclidean distance measure
        '''
        surf_voxsel = surf_voxel_selection.voxel_selection(vsm,
                                                           radius,
                                                           distance_metric='e')
        '''Define the measure'''

        # run_slow=True would give an actual cross-validation with meaningful
        # accuracies. Because this is a unit-test only the number of voxels
        # in each searchlight is tested.
        run_slow = False

        if run_slow:
            meas = CrossValidation(GNB(),
                                   OddEvenPartitioner(),
                                   errorfx=lambda p, t: np.mean(p == t))
            postproc = mean_sample
        else:
            meas = _Voxel_Count_Measure()
            postproc = lambda x: x
        '''
        Surface analysis: define the query engine, cross validation,
        and searchlight
        '''
        surf_qe = SurfaceVerticesQueryEngine(surf_voxsel)
        surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc)
        '''
        new (Sep 2012): also test 'simple' queryengine wrapper function
        '''

        surf_qe2 = disc_surface_queryengine(radius,
                                            maskfn,
                                            inner,
                                            outer,
                                            plane,
                                            volume_mask=True,
                                            distance_metric='euclidean')
        surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc)
        '''
        Same for the volume analysis
        '''
        element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2])))
        sph = Sphere(radius, element_sizes=element_sizes)
        kwa = {'voxel_indices': sph}

        vol_qe = IndexQueryEngine(**kwa)
        vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc)
        '''The following steps are similar to start_easy.py'''
        attr = SampleAttributes(
            os.path.join(pymvpa_dataroot, 'attributes_literal.txt'))

        mask = surf_voxsel.get_mask()

        dataset = fmri_dataset(samples=os.path.join(pymvpa_dataroot,
                                                    'bold.nii.gz'),
                               targets=attr.targets,
                               chunks=attr.chunks,
                               mask=mask)

        if run_slow:
            # do chunkswise linear detrending on dataset

            poly_detrend(dataset, polyord=1, chunks_attr='chunks')

            # zscore dataset relative to baseline ('rest') mean
            zscore(dataset,
                   chunks_attr='chunks',
                   param_est=('targets', ['rest']))

        # select class face and house for this demo analysis
        # would work with full datasets (just a little slower)
        dataset = dataset[np.array(
            [l in ['face', 'house'] for l in dataset.sa.targets],
            dtype='bool')]
        '''Apply searchlight to datasets'''
        surf_dset = surf_sl(dataset)
        surf_dset2 = surf_sl2(dataset)
        vol_dset = vol_sl(dataset)

        surf_data = surf_dset.samples
        surf_data2 = surf_dset2.samples
        vol_data = vol_dset.samples

        assert_array_equal(surf_data, surf_data2)
        assert_array_equal(surf_data, vol_data)
Beispiel #14
0
def simple_sim1(
        shape,
        dissims,
        rois_arrangement='circle',
        roi_neighborhood=Sphere(5),
        nruns=1,
        nsubjects=1,
        # noise components -- we just add normal for now also with
        # spatial smoothing to possibly create difference in noise
        # characteristics across different kinds
        #
        # "Instrumental noise" -- generic nuisance
        noise_independent_std=0.4,
        noise_independent_smooth=3.,
        # "Intrinsic signal", specific per each subject (due to
        # motion, whatever) -- might be fun for someone to cluster,
        # but irrelevant for us
        noise_subject_n=1,
        noise_subject_std=0.4,
        noise_subject_smooth=1.5,
        # "Intrinsic common signal" -- probably generalizes across
        # subjects and fun for someone studying veins to get those
        # reproducible clusters.  It will be mixed in also with
        # different weights per each run.
        # Again -- might be fun for someone to cluster, but not for us
        # since it would not be representative of the original signal
        noise_common_n=1,
        noise_common_std=0.4,
        noise_common_smooth=2.):
    """Simulate "data" containing similarity matrices with 3 noise
    components for multiple subjects

    Noise components are:

    - random normal noise, also spatially smoothed (should have smaller
    sigma for smoothing probably than for intrinsic noise)

    - intrinsic noise which is composed from a set of random fields,
    generated by random normal noise with subsequent spatial filtering,
    which are then mixed into each run data with random weights.  They
    are to simulate subject-specific intrinsic signals such as artifacts
    due to motion, possible subject-specific physiological processes

    - intrinsic common noise across subjects intrinsic noise (e.g. all of them
    have similar blood distribution networks and other physiological
    parameters, and some intrinsic networks, which although similar in
    space would have different mix-in coefficients across subject/runs)

    Theoretically, decomposition methods (such as ICA, PCA, etc) should help to
    identify such common noise components and filter them out.  Also methods
    which iteratively remove non-informative projections (such as GLMdenoise)
    should be effective to identify those mix-ins

    TODO: now mix-in happens with purely normal random weights,  ideally we
    should color those as well
    """
    ndissims = len(dissims)

    # first we fisher transform so we can add normal noise
    # check first that we don't have extreme values that might give infinity
    dissims = np.array(dissims)
    dissims = 1. - dissims
    dissims[dissims == 1] = 0.99
    dissims[dissims == -1] = -0.99
    # fisher
    dissims = np.arctanh(dissims)

    # generate target clean "picture"
    d = np.asanyarray(dissims[0])
    signal_clean = np.zeros(shape + (len(vector_form(d)), ))

    # generate ground truth for clustering
    cluster_truth = np.zeros(shape, dtype='int')

    if rois_arrangement == 'circle':
        radius = min(shape[:2]) / 4.
        center = np.array((radius * 2, ) * len(shape)).astype(int)
        # arrange at quarter distance from center
        for i, dissim in enumerate(dissims):
            dissim = vector_form(dissim)
            # that is kinda boring -- the same dissimilarity to each
            # voxel???
            #
            # TODO: come up with a better arrangement/idea, e.g. to
            # generate an MVPA pattern which would satisfy the
            # dissimilarity (not exactly but at least close).  That
            # would make more sense
            roi_center = center.copy()
            roi_center[0] += int(radius * np.cos(2 * np.pi * i / ndissims))
            roi_center[1] += int(radius * np.sin(2 * np.pi * i / ndissims))
            for coords in roi_neighborhood(roi_center):
                acoords = np.asanyarray(coords)
                if np.all(acoords >= [0]*len(coords)) and \
                   np.all(acoords < signal_clean.shape[:len(coords)]):
                    signal_clean.__setitem__(coords, dissim)
                    cluster_truth.__setitem__(coords, i + 1)
    else:
        raise ValueError("I know only circle")

    # generated randomly and will be mixed into subjects with different weights
    # TODO: static across runs within subject??  if so -- would be no different
    #       from having RSAs?
    common_noises = get_intrinsic_noises(signal_clean.shape,
                                         std=noise_common_std,
                                         sigma=noise_common_smooth,
                                         n=noise_common_n)
    assert common_noises[0].ndim == 3, "There should be no time comp"

    # Now lets generate per subject and per run data by adding some noise(s)
    # all_signals = []
    dss = []
    for isubject in xrange(nsubjects):
        # Interesting noise, simulating some underlying process which has nothing
        # to do with original design/similarity but having spatial structure which
        # repeats through runs with random weights (consider it to be a principal component)

        # generated randomly for each subject separately, but they should have
        # common structure across runs
        subj_specific_noises = get_intrinsic_noises(signal_clean.shape,
                                                    std=noise_subject_std,
                                                    sigma=noise_subject_smooth,
                                                    n=noise_subject_n)
        assert subj_specific_noises[
            0].ndim == 3, "There should be no time comp"
        # subject_signals = []
        dss_subject = []
        subj_common_noises = [
            noise * np.random.normal() for noise in common_noises
        ]

        subj_specific_mixins = generate_mixins(nruns)
        subj_common_mixins = generate_mixins(nruns)

        for run in range(nruns):
            signal_run = signal_clean.copy()
            for noise in subj_specific_noises:
                signal_run += noise * subj_specific_mixins[run]
            for noise in subj_common_noises:
                signal_run += noise * subj_common_mixins[run]
            # generic noise -- no common structure across subjects/runs
            signal_run += filter_each_2d(
                np.random.normal(size=signal_clean.shape) *
                noise_independent_std, noise_independent_smooth)

            # go back to correlations with inverse of fisher
            signal_run = np.tanh(signal_run)
            # rollaxis to bring similarities into leading dimension
            ds = Dataset(np.rollaxis(signal_run, 2, 0))
            ds.sa['chunks'] = [run]
            ds.sa['dissimilarity'] = np.arange(len(dissim))  # Lame one for now
            ds_flat = ds.get_mapped(
                FlattenMapper(shape=ds.shape[1:], space='pixel_indices'))
            dss_subject.append(ds_flat)
            #subject_signals.append(signal_run)
        #all_signals.append(subject_signals)
        ds = dsvstack(dss_subject)
        ds.a['mapper'] = dss_subject[
            0].a.mapper  # .a are not transferred by vstack
        dss.append(ds)

    # Instrumental noise -- the most banal
    assert (len(dss) == nsubjects)
    assert (len(dss) == nsubjects)
    assert (len(dss[0]) == nruns * len(dissim))

    return np.tanh(signal_clean), cluster_truth, dss
N_BLOCKS=128
cnx_tx = 489
toutdir = os.path.join(basedir, 'transformation_matrices', 'iterative_cha_olp4cbp_mappers' +'_' + 'subs-' + str(nsubs) + '_'+ 'radius1-10_radius2-' +  str(HYPERALIGNMENT_RADIUS) + '.hdf5.gz')
print(toutdir)

# load nifti as a pymvpa dataset and then use that as ref_ds in the queryengine definition
# mask with data in brainmask so only 170k (size of connectomes) voxels are included
ref_ds = fmri_dataset(os.path.join(helperfiles,'newbrainmask.nii'), mask=os.path.join(helperfiles,'newbrainmask.nii'))
print('Size of brain mask:')
print(str(len(ref_ds.fa.voxel_indices)))

# set searchlight sphere radius
sl_radius = HYPERALIGNMENT_RADIUS

#create query engine
qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius))
qe.train(ref_ds)

# load all subject 
nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*'))
print('Loading participant data from: ')
print(chamats)
mysubs = nfiles[0:nsubs]

# import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets
dss = []
for sub in range(len(mysubs)):
    ds = mv.Dataset(np.load(mysubs[sub]))
    ds.fa['voxel_indices'] = range(ds.shape[1])
    #ds.sa['chunks'] = np.repeat(i,cnx_tx)
    mv.zscore(ds, chunks_attr=None)
Beispiel #16
0
    def get_masked_array(self, nt=None, dilate=None):
        '''Provides a masked numpy array
        
        Parameters
        ----------
        nt: int or None
            Number of timepoints (or samples). Each feature has the
            same value (1 if in the mask, 0 otherwise) for each
            sample. If nt is None, then the output is 3D; otherwise
            it is 4D with 'nt' values in the last dimension.
        dilate: callable or int or None
            Speficiation of mask dilation.
            If a callable, it should be a a neighborhood function 
            (like Sphere(..)) that can map a single voxel coordinate 
            (represented as a triple of indices) to a list of voxel
            coordinates that define the neighboorhood of that
            coordinate. For example, Sphere(3) can be used to dilate the
            original mask by 3 voxels. If an int, then it uses 
            Sphere(dilate) to dilate the mask. If set to None
            the mask is not dilated. 
            
        Returns
        -------
        msk: numpy.ndarray
            an array with values 1. for values inside the mask
            and values of 0 elsewhere. If the instance has no mask,
            then all values are 1. 
        '''

        data_vec = np.zeros((self.nvoxels, ), dtype=np.float32)
        if self.mask is None:
            data_vec[:] = 1
        else:
            data_vec[self.mask] = 1

        # see if the mask has to be dilated.
        # if all voxels are already in the mask this can be omitted
        if not dilate is None and \
                    self.nvoxels_mask != self.nvoxels:

            if type(dilate) is int:
                dilate = Sphere(dilate)

            # offsets
            deltas = dilate((0, 0, 0))

            # positions of nonzero voxels
            data_ijks = self.lin2ijk(np.nonzero(data_vec)[0])

            # helper function
            def add_tuple(x, y):
                return (x[0] + y[0], x[1] + y[1], x[2] + y[2])

            # gather all subindices ehre
            dilate_ijk = set()

            # all combinations of offsets and positions of voxels in the mask
            for delta in deltas:
                if delta != (0, 0, 0):
                    for data_ijk in data_ijks:
                        pos = add_tuple(delta, data_ijk)
                        dilate_ijk.add(pos)

            if dilate_ijk:
                dilate_lin = self._ijk2lin_unmasked(list(dilate_ijk))
                lin_mask = self.contains_lin(dilate_lin, apply_mask=False)
                data_vec[dilate_lin[lin_mask]] = 1

        sh = self.shape
        data_t1 = np.reshape(data_vec, sh[:3])

        if not nt is None:
            sh = (sh[0], sh[1], sh[2], nt)
            data = np.zeros(sh, data_vec.dtype)
            for t in xrange(nt):
                data[:, :, :, t] = data_t1
            return data
        else:
            return data_t1
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based
        hyperalignment.

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained StaticProjectionMappers of the same length as datasets
        """

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided" %
                             (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets" %
                    (self, self.ndatasets))

        selected = [
            _ for _ in range(ndatasets) if _ not in params.exclude_from_model
        ]
        ref_ds_train = selected.index(params.ref_ds)
        params.hyperalignment.params.ref_ds = ref_ds_train
        warning('Using %dth dataset as the reference dataset (%dth after '
                'excluding datasets)' % (params.ref_ds, ref_ds_train))
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds',
                                                               1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning(
                            '%d %s are too different (max diff=%g) from %d in '
                            'dataset %d to come from a zscored dataset. '
                            'Please zscore datasets first for correct operation '
                            '(unless if was intentional)' %
                            (np.sum(vals_comp), fname, np.max(
                                np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(
            ref_ds=params.ref_ds,
            featsel=params.featsel,
            hyperalignment=params.hyperalignment,
            full_matrix=params.combine_neighbormappers,
            use_same_features=params.use_same_features,
            exclude_from_model=params.exclude_from_model,
            dtype=params.dtype)

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                        externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(datasets,
                                                      params.queryengine,
                                                      params.radius,
                                                      params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)]
        if isinstance(qe, SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError(
                    "Mapping from voxels to surface nodes is not "
                    "implmented yet. Try setting combine_neighbormappers to True."
                )
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
        else:
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices,
                    deterministic=True)
                roi_ids = sidx
            else:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices[
                        params.mask_node_ids],
                    deterministic=True)
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)
        ]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug(
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, params.nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block,
                        datasets,
                        copy.copy(hmeasure),
                        queryengines,
                        seed=seed,
                        iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [
                self._proc_block(block, datasets, hmeasure, queryengines)
                for block in node_blocks
            ]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration
        list(results_ds)

        _shpaldebug(
            'Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T)
            if params.compute_recon else StaticProjectionMapper(proj=proj)
            for proj in self.projections
        ]
        return self.projections