コード例 #1
0
def simple_sim1(
        shape,
        dissims,
        rois_arrangement='circle',
        roi_neighborhood=Sphere(5),
        nruns=1,
        nsubjects=1,
        # noise components -- we just add normal for now also with
        # spatial smoothing to possibly create difference in noise
        # characteristics across different kinds
        #
        # "Instrumental noise" -- generic nuisance
        noise_independent_std=0.4,
        noise_independent_smooth=3.,
        # "Intrinsic signal", specific per each subject (due to
        # motion, whatever) -- might be fun for someone to cluster,
        # but irrelevant for us
        noise_subject_n=1,
        noise_subject_std=0.4,
        noise_subject_smooth=1.5,
        # "Intrinsic common signal" -- probably generalizes across
        # subjects and fun for someone studying veins to get those
        # reproducible clusters.  It will be mixed in also with
        # different weights per each run.
        # Again -- might be fun for someone to cluster, but not for us
        # since it would not be representative of the original signal
        noise_common_n=1,
        noise_common_std=0.4,
        noise_common_smooth=2.):
    """Simulate "data" containing similarity matrices with 3 noise
    components for multiple subjects

    Noise components are:

    - random normal noise, also spatially smoothed (should have smaller
    sigma for smoothing probably than for intrinsic noise)

    - intrinsic noise which is composed from a set of random fields,
    generated by random normal noise with subsequent spatial filtering,
    which are then mixed into each run data with random weights.  They
    are to simulate subject-specific intrinsic signals such as artifacts
    due to motion, possible subject-specific physiological processes

    - intrinsic common noise across subjects intrinsic noise (e.g. all of them
    have similar blood distribution networks and other physiological
    parameters, and some intrinsic networks, which although similar in
    space would have different mix-in coefficients across subject/runs)

    Theoretically, decomposition methods (such as ICA, PCA, etc) should help to
    identify such common noise components and filter them out.  Also methods
    which iteratively remove non-informative projections (such as GLMdenoise)
    should be effective to identify those mix-ins

    TODO: now mix-in happens with purely normal random weights,  ideally we
    should color those as well
    """
    ndissims = len(dissims)

    # first we fisher transform so we can add normal noise
    # check first that we don't have extreme values that might give infinity
    dissims = np.array(dissims)
    dissims = 1. - dissims
    dissims[dissims == 1] = 0.99
    dissims[dissims == -1] = -0.99
    # fisher
    dissims = np.arctanh(dissims)

    # generate target clean "picture"
    d = np.asanyarray(dissims[0])
    signal_clean = np.zeros(shape + (len(vector_form(d)), ))

    # generate ground truth for clustering
    cluster_truth = np.zeros(shape, dtype='int')

    if rois_arrangement == 'circle':
        radius = min(shape[:2]) / 4.
        center = np.array((radius * 2, ) * len(shape)).astype(int)
        # arrange at quarter distance from center
        for i, dissim in enumerate(dissims):
            dissim = vector_form(dissim)
            # that is kinda boring -- the same dissimilarity to each
            # voxel???
            #
            # TODO: come up with a better arrangement/idea, e.g. to
            # generate an MVPA pattern which would satisfy the
            # dissimilarity (not exactly but at least close).  That
            # would make more sense
            roi_center = center.copy()
            roi_center[0] += int(radius * np.cos(2 * np.pi * i / ndissims))
            roi_center[1] += int(radius * np.sin(2 * np.pi * i / ndissims))
            for coords in roi_neighborhood(roi_center):
                acoords = np.asanyarray(coords)
                if np.all(acoords >= [0]*len(coords)) and \
                   np.all(acoords < signal_clean.shape[:len(coords)]):
                    signal_clean.__setitem__(coords, dissim)
                    cluster_truth.__setitem__(coords, i + 1)
    else:
        raise ValueError("I know only circle")

    # generated randomly and will be mixed into subjects with different weights
    # TODO: static across runs within subject??  if so -- would be no different
    #       from having RSAs?
    common_noises = get_intrinsic_noises(signal_clean.shape,
                                         std=noise_common_std,
                                         sigma=noise_common_smooth,
                                         n=noise_common_n)
    assert common_noises[0].ndim == 3, "There should be no time comp"

    # Now lets generate per subject and per run data by adding some noise(s)
    # all_signals = []
    dss = []
    for isubject in xrange(nsubjects):
        # Interesting noise, simulating some underlying process which has nothing
        # to do with original design/similarity but having spatial structure which
        # repeats through runs with random weights (consider it to be a principal component)

        # generated randomly for each subject separately, but they should have
        # common structure across runs
        subj_specific_noises = get_intrinsic_noises(signal_clean.shape,
                                                    std=noise_subject_std,
                                                    sigma=noise_subject_smooth,
                                                    n=noise_subject_n)
        assert subj_specific_noises[
            0].ndim == 3, "There should be no time comp"
        # subject_signals = []
        dss_subject = []
        subj_common_noises = [
            noise * np.random.normal() for noise in common_noises
        ]

        subj_specific_mixins = generate_mixins(nruns)
        subj_common_mixins = generate_mixins(nruns)

        for run in range(nruns):
            signal_run = signal_clean.copy()
            for noise in subj_specific_noises:
                signal_run += noise * subj_specific_mixins[run]
            for noise in subj_common_noises:
                signal_run += noise * subj_common_mixins[run]
            # generic noise -- no common structure across subjects/runs
            signal_run += filter_each_2d(
                np.random.normal(size=signal_clean.shape) *
                noise_independent_std, noise_independent_smooth)

            # go back to correlations with inverse of fisher
            signal_run = np.tanh(signal_run)
            # rollaxis to bring similarities into leading dimension
            ds = Dataset(np.rollaxis(signal_run, 2, 0))
            ds.sa['chunks'] = [run]
            ds.sa['dissimilarity'] = np.arange(len(dissim))  # Lame one for now
            ds_flat = ds.get_mapped(
                FlattenMapper(shape=ds.shape[1:], space='pixel_indices'))
            dss_subject.append(ds_flat)
            #subject_signals.append(signal_run)
        #all_signals.append(subject_signals)
        ds = dsvstack(dss_subject)
        ds.a['mapper'] = dss_subject[
            0].a.mapper  # .a are not transferred by vstack
        dss.append(ds)

    # Instrumental noise -- the most banal
    assert (len(dss) == nsubjects)
    assert (len(dss) == nsubjects)
    assert (len(dss[0]) == nruns * len(dissim))

    return np.tanh(signal_clean), cluster_truth, dss
コード例 #2
0
ファイル: sim.py プロジェクト: mvdoc/reprclust
def simple_sim1(shape, dissims,
                rois_arrangement='circle',
                roi_neighborhood=Sphere(5),
                nruns=1, nsubjects=1,
                # noise components -- we just add normal for now also with
                # spatial smoothing to possibly create difference in noise
                # characteristics across different kinds
                #
                # "Instrumental noise" -- generic nuisance
                noise_independent_std=0.4, noise_independent_smooth=3.,
                # "Intrinsic signal", specific per each subject (due to
                # motion, whatever) -- might be fun for someone to cluster,
                # but irrelevant for us
                noise_subject_n=1, noise_subject_std=0.4, noise_subject_smooth=1.5,
                # "Intrinsic common signal" -- probably generalizes across
                # subjects and fun for someone studying veins to get those
                # reproducible clusters.  It will be mixed in also with
                # different weights per each run.
                # Again -- might be fun for someone to cluster, but not for us
                # since it would not be representative of the original signal
                noise_common_n=1, noise_common_std=0.4, noise_common_smooth=2.
                ):
    """Simulate "data" containing similarity matrices with 3 noise
    components for multiple subjects

    Noise components are:

    - random normal noise, also spatially smoothed (should have smaller
    sigma for smoothing probably than for intrinsic noise)

    - intrinsic noise which is composed from a set of random fields,
    generated by random normal noise with subsequent spatial filtering,
    which are then mixed into each run data with random weights.  They
    are to simulate subject-specific intrinsic signals such as artifacts
    due to motion, possible subject-specific physiological processes

    - intrinsic common noise across subjects intrinsic noise (e.g. all of them
    have similar blood distribution networks and other physiological
    parameters, and some intrinsic networks, which although similar in
    space would have different mix-in coefficients across subject/runs)

    Theoretically, decomposition methods (such as ICA, PCA, etc) should help to
    identify such common noise components and filter them out.  Also methods
    which iteratively remove non-informative projections (such as GLMdenoise)
    should be effective to identify those mix-ins

    TODO: now mix-in happens with purely normal random weights,  ideally we
    should color those as well
    """
    ndissims = len(dissims)

    # first we fisher transform so we can add normal noise
    # check first that we don't have extreme values that might give infinity
    dissims = np.array(dissims)
    dissims = 1. - dissims
    dissims[dissims==1] = 0.99
    dissims[dissims==-1] = -0.99
    # fisher
    dissims = np.arctanh(dissims)

    # generate target clean "picture"
    d = np.asanyarray(dissims[0])
    signal_clean = np.zeros(shape + (len(vector_form(d)),))

    # generate ground truth for clustering
    cluster_truth = np.zeros(shape, dtype='int')

    if rois_arrangement == 'circle':
        radius = min(shape[:2])/4.
        center = np.array((radius*2,) * len(shape)).astype(int)
        # arrange at quarter distance from center
        for i, dissim in enumerate(dissims):
            dissim = vector_form(dissim)
            # that is kinda boring -- the same dissimilarity to each
            # voxel???
            #
            # TODO: come up with a better arrangement/idea, e.g. to
            # generate an MVPA pattern which would satisfy the
            # dissimilarity (not exactly but at least close).  That
            # would make more sense
            roi_center = center.copy()
            roi_center[0] += int(radius * np.cos(2*np.pi*i/ndissims))
            roi_center[1] += int(radius * np.sin(2*np.pi*i/ndissims))
            for coords in roi_neighborhood(roi_center):
                acoords = np.asanyarray(coords)
                if np.all(acoords >= [0]*len(coords)) and \
                   np.all(acoords < signal_clean.shape[:len(coords)]):
                    signal_clean.__setitem__(coords, dissim)
                    cluster_truth.__setitem__(coords, i+1)
    else:
        raise ValueError("I know only circle")

    # generated randomly and will be mixed into subjects with different weights
    # TODO: static across runs within subject??  if so -- would be no different
    #       from having RSAs?
    common_noises = get_intrinsic_noises(
        signal_clean.shape,
        std=noise_common_std,
        sigma=noise_common_smooth,
        n=noise_common_n)
    assert common_noises[0].ndim == 3, "There should be no time comp"

    # Now lets generate per subject and per run data by adding some noise(s)
    # all_signals = []
    dss = []
    for isubject in xrange(nsubjects):
        # Interesting noise, simulating some underlying process which has nothing
        # to do with original design/similarity but having spatial structure which
        # repeats through runs with random weights (consider it to be a principal component)

        # generated randomly for each subject separately, but they should have
        # common structure across runs
        subj_specific_noises = get_intrinsic_noises(signal_clean.shape,
                                                std=noise_subject_std,
                                                sigma=noise_subject_smooth,
                                                n=noise_subject_n)
        assert subj_specific_noises[0].ndim == 3, "There should be no time comp"
        # subject_signals = []
        dss_subject = []
        subj_common_noises = [noise * np.random.normal()
                              for noise in common_noises]

        subj_specific_mixins = generate_mixins(nruns)
        subj_common_mixins = generate_mixins(nruns)

        for run in range(nruns):
            signal_run = signal_clean.copy()
            for noise in subj_specific_noises:
                signal_run += noise * subj_specific_mixins[run]
            for noise in subj_common_noises:
                signal_run += noise * subj_common_mixins[run]
            # generic noise -- no common structure across subjects/runs
            signal_run += filter_each_2d(
                np.random.normal(size=signal_clean.shape)*noise_independent_std,
                noise_independent_smooth)

            # go back to correlations with inverse of fisher
            signal_run = np.tanh(signal_run)
            # rollaxis to bring similarities into leading dimension
            ds = Dataset(np.rollaxis(signal_run, 2, 0))
            ds.sa['chunks'] = [run]
            ds.sa['dissimilarity'] = np.arange(len(dissim))  # Lame one for now
            ds_flat = ds.get_mapped(FlattenMapper(shape=ds.shape[1:],
                                                  space='pixel_indices'))
            dss_subject.append(ds_flat)
            #subject_signals.append(signal_run)
        #all_signals.append(subject_signals)
        ds = dsvstack(dss_subject)
        ds.a['mapper'] = dss_subject[0].a.mapper   # .a are not transferred by vstack
        dss.append(ds)

    # Instrumental noise -- the most banal
    assert(len(dss) == nsubjects)
    assert(len(dss) == nsubjects)
    assert(len(dss[0]) == nruns*len(dissim))

    return np.tanh(signal_clean), cluster_truth, dss
コード例 #3
0
                                              nsubjects=2,
                                              noise_subject_n=1,
                                              noise_subject_std=5,
                                              noise_subject_smooth=5,
                                              noise_independent_std=4,
                                              noise_independent_smooth=1.5,
                                              noise_common_n=1,
                                              noise_common_std=3)

    # just a little helper
    def get2d(ds):
        return dss[0].a.mapper.reverse(ds)

    import pylab as pl
    pl.clf()
    DS = dsvstack(dss)
    # Sample plots
    for s in [0, 1]:
        ds2 = get2d(dss[0])
        for r in [0, 1]:
            pl.subplot(3, 3, 1 + r + s * 3)
            pl.imshow(ds2[ds2.sa.chunks == r].samples[0],
                      interpolation='nearest')
            pl.ylabel('subj%d' % s)
            pl.xlabel('run1')
        pl.subplot(3, 3, 3 + s * 3)
        pl.imshow(get2d(mean_group_sample(['dissimilarity'
                                           ])(dss[0]).samples)[0],
                  interpolation='nearest')
        pl.xlabel('mean')
コード例 #4
0
ファイル: sim.py プロジェクト: mvdoc/reprclust
if __name__ == '__main__':
    a_clean, cluster_truth, dss = simple_sim1(
        (64, 64), [[1], [0.8], [0.5], [0.3]],
        roi_neighborhood=Sphere(6),
        nruns=3, nsubjects=2,
        noise_subject_n=1, noise_subject_std=5, noise_subject_smooth=5,
        noise_independent_std=4, noise_independent_smooth=1.5,
        noise_common_n=1, noise_common_std=3)

    # just a little helper
    def get2d(ds):
        return dss[0].a.mapper.reverse(ds)

    import pylab as pl
    pl.clf()
    DS = dsvstack(dss)
    # Sample plots
    for s in [0, 1]:
        ds2 = get2d(dss[0])
        for r in [0, 1]:
            pl.subplot(3,3,1+r+s*3); pl.imshow(ds2[ds2.sa.chunks == r].samples[0], interpolation='nearest'); pl.ylabel('subj%d' % s);  pl.xlabel('run1');
        pl.subplot(3,3,3+s*3); pl.imshow(get2d(mean_group_sample(['dissimilarity'])(dss[0]).samples)[0], interpolation='nearest'); pl.xlabel('mean');

    ds = dsvstack(dss)
    ds.a['mapper'] = dss[0].a.mapper
    ds_mean = mean_group_sample(['dissimilarity', 'chunks'])(ds)
    for r in [0, 1]:
        ds_mean_run0 = ds.a.mapper.reverse(ds_mean[ds_mean.chunks == r])
        pl.subplot(3,3,1+r+2*3); pl.imshow(ds_mean_run0.samples[0], interpolation='nearest'); pl.ylabel('mean(subj)');  pl.xlabel('run%d' % r)
    ds_global_mean = mean_group_sample(['dissimilarity'])(ds)
    pl.subplot(3,3,3+2*3); pl.imshow(get2d(ds_global_mean).samples[0], interpolation='nearest'); pl.xlabel('mean');