コード例 #1
0
def test_featuregroup_mapper():
    ds = Dataset(np.arange(24).reshape(3, 8))
    ds.fa['roi'] = [0, 1] * 4
    # just to check
    ds.sa['chunks'] = np.arange(3)

    # correct results
    csamples = [[3, 4], [11, 12], [19, 20]]

    m = mean_group_feature(['roi'])
    mds = m.forward(ds)
    assert_equal(mds.shape, (3, 2))
    assert_array_equal(mds.samples, csamples)
    assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4))
    # FAs should simply remain the same
    assert_array_equal(mds.sa.chunks, np.arange(3))

    # now without grouping
    m = mean_feature()
    # forwarding just the samples should yield the same result
    assert_array_equal(m.forward(ds.samples), m.forward(ds).samples)

    # And when operating on a dataset with >1D samples, then operate
    # only across "features", i.e. 1st dimension
    ds = Dataset(np.arange(24).reshape(3, 2, 2, 2))
    mapped = ds.get_mapped(m)
    assert_array_equal(m.forward(ds.samples), mapped.samples)
    assert_array_equal(mapped.samples.shape, (3, 2, 2))
    assert_array_equal(mapped.samples, np.mean(ds.samples, axis=1))
    # and still could map back? ;) not ATM, so just to ensure consistency
    assert_raises(NotImplementedError, mapped.a.mapper.reverse, mapped.samples)
    # but it should also work with standard 2d sample arrays
    ds = Dataset(np.arange(24).reshape(3, 8))
    mapped = ds.get_mapped(m)
    assert_array_equal(mapped.samples.shape, (3, 1))
コード例 #2
0
ファイル: test_fxmapper.py プロジェクト: schoeke/PyMVPA
def test_featuregroup_mapper():
    ds = Dataset(np.arange(24).reshape(3,8))
    ds.fa['roi'] = [0, 1] * 4
    # just to check
    ds.sa['chunks'] = np.arange(3)

    # correct results
    csamples = [[3, 4], [11, 12], [19, 20]]
    croi = [0, 1]
    cchunks = np.arange(3)

    m = mean_group_feature(['roi'])
    mds = m.forward(ds)
    assert_equal(mds.shape, (3, 2))
    assert_array_equal(mds.samples, csamples)
    assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4))
    # FAs should simply remain the same
    assert_array_equal(mds.sa.chunks, np.arange(3))

    # now without grouping
    m = mean_feature()
    # forwarding just the samples should yield the same result
    assert_array_equal(m.forward(ds.samples),
                       m.forward(ds).samples)

    # And when operating on a dataset with >1D samples, then operate
    # only across "features", i.e. 1st dimension
    ds = Dataset(np.arange(24).reshape(3,2,2,2))
    mapped = ds.get_mapped(m)
    assert_array_equal(m.forward(ds.samples),
                       mapped.samples)
    assert_array_equal(mapped.samples.shape, (3, 2, 2))
    assert_array_equal(mapped.samples, np.mean(ds.samples, axis=1))
    # and still could map back? ;) not ATM, so just to ensure consistency
    assert_raises(NotImplementedError,
                  mapped.a.mapper.reverse, mapped.samples)
    # but it should also work with standard 2d sample arrays
    ds = Dataset(np.arange(24).reshape(3,8))
    mapped = ds.get_mapped(m)
    assert_array_equal(mapped.samples.shape, (3, 1))
コード例 #3
0
ファイル: mri.py プロジェクト: JohnGriffiths/nidata
def fmri_dataset(samples, targets=None, chunks=None, mask=None,
                 sprefix='voxel', tprefix='time', add_fa=None,):
    """Create a dataset from an fMRI timeseries image.

    The timeseries image serves as the samples data, with each volume becoming
    a sample. All 3D volume samples are flattened into one-dimensional feature
    vectors, optionally being masked (i.e. subset of voxels corresponding to
    non-zero elements in a mask image).

    In addition to (optional) samples attributes for targets and chunks the
    returned dataset contains a number of additional attributes:

    Samples attributes (per each volume):

      * volume index (time_indices)
      * volume acquisition time (time_coord)

    Feature attributes (per each voxel):

      * voxel indices (voxel_indices), sometimes referred to as ijk

    Dataset attributes:

      * dump of the image (e.g. NIfTI) header data (imghdr)
      * class of the image (e.g. Nifti1Image) (imgtype)
      * volume extent (voxel_dim)
      * voxel extent (voxel_eldim)

    The default attribute name is listed in parenthesis, but may be altered by
    the corresponding prefix arguments. The validity of the attribute values
    relies on correct settings in the NIfTI image header.

    Parameters
    ----------
    samples : str or NiftiImage or list
      fMRI timeseries, specified either as a filename (single file 4D image),
      an image instance (4D image), or a list of filenames or image instances
      (each list item corresponding to a 3D volume).
    targets : scalar or sequence
      Label attribute for each volume in the timeseries, or a scalar value that
      is assigned to all samples.
    chunks : scalar or sequence
      Chunk attribute for each volume in the timeseries, or a scalar value that
      is assigned to all samples.
    mask : str or NiftiImage
      Filename or image instance of a 3D volume mask. Voxels corresponding to
      non-zero elements in the mask will be selected. The mask has to be in the
      same space (orientation and dimensions) as the timeseries image
    sprefix : str or None
      Prefix for attribute names describing spatial properties of the
      timeseries. If None, no such attributes are stored in the dataset.
    tprefix : str or None
      Prefix for attribute names describing temporal properties of the
      timeseries. If None, no such attributes are stored in the dataset.
    add_fa : dict or None
      Optional dictionary with additional volumetric data that shall be stored
      as feature attributes in the dataset. The dictionary key serves as the
      feature attribute name. Each value might be of any type supported by the
      'mask' argument of this function.

    Returns
    -------
    Dataset
    """
    # load the samples
    imgdata, imghdr, img = _load_anyimg(samples, ensure=True, enforce_dim=4)

    # figure out what the mask is, but only handle known cases, the rest
    # goes directly into the mapper which maybe knows more
    maskimg = _load_anyimg(mask)
    if maskimg is None:
        pass
    else:
        # take just data and ignore the header
        mask = maskimg[0]

    # compile the samples attributes
    sa = {}
    if not targets is None:
        sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets')
    if not chunks is None:
        sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks')

    # create a dataset
    ds = Dataset(imgdata, sa=sa)
    if sprefix is None:
        space = None
    else:
        space = sprefix + '_indices'
    ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], space=space))

    # now apply the mask if any
    if not mask is None:
        flatmask = ds.a.mapper.forward1(mask)
        # direct slicing is possible, and it is potentially more efficient,
        # so let's use it
        #mapper = StaticFeatureSelection(flatmask)
        #ds = ds.get_mapped(StaticFeatureSelection(flatmask))
        ds = ds[:, flatmask != 0]

    # load and store additional feature attributes
    if not add_fa is None:
        for fattr in add_fa:
            value = _load_anyimg(add_fa[fattr], ensure=True)[0]
            ds.fa[fattr] = ds.a.mapper.forward1(value)

    # store interesting NIfTI props in the dataset in a more portable way
    ds.a['imgaffine'] = img.get_affine()
    ds.a['imgtype'] = img.__class__.__name__
    # stick the header instance in as is, and ...
    ds.a['imghdr'] = imghdr
    # ... let strip_nibabel() be the central place to take care of any header
    # conversion into non-NiBabel dtypes
    strip_nibabel(ds)

    # If there is a space assigned , store the extent of that space
    if sprefix is not None:
        ds.a[sprefix + '_dim'] = imgdata.shape[1:]
        # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x)
        ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr)
        # TODO extend with the unit
    if tprefix is not None:
        ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int')
        ds.sa[tprefix + '_coords'] = \
            np.arange(len(ds), dtype='float') * _get_dt(imghdr)
        # TODO extend with the unit

    return ds
コード例 #4
0
def eeglab_dataset(samples):
    '''Make a Dataset instance from EEGLAB input data

    Parameters
    ----------
    samples: str
        Filename of EEGLAB text file

    Returns
    -------
    ds: mvpa2.base.dataset.Dataset
        Dataset with the contents of the input file
    '''
    if not isinstance(samples, basestring):
        raise ValueError("Samples should be a string")

    if _looks_like_filename(samples):
        if not os.path.exists(samples):
            raise ValueError("Input looks like a filename, but file"
                                " %s does not exist" % samples)
        with open(samples) as f:
            samples = f.read()

    lines = samples.split('\n')
    samples = []
    cur_sample = None

    for i, line in enumerate(lines):
        if not line:
            continue
        if i == 0:
            # first line contains the channel names
            channel_labels = line.split()
            n_channels = len(channel_labels)
        else:
            # first value is the time point, the remainders the value 
            # for each channel
            values = map(float, line.split())
            t = values[0]  # time 
            eeg = values[1:] # values for each electrode

            if len(eeg) != n_channels:
                raise ValueError("Line %d: expected %d values but found %d" %
                                    (n_channels, len(eeg)))

            if cur_sample is None or t < prev_t:
                # new sample
                cur_sample = []
                samples.append(cur_sample)

            cur_sample.append((t, eeg))
            prev_t = t

    # get and verify number of elements in each dimension
    n_samples = len(samples)
    n_timepoints_all = map(len, samples)

    n_timepoints_unique = set(n_timepoints_all)
    if len(n_timepoints_unique) != 1:
        raise ValueError("Different number of time points in different"
                            "samples: found %d different lengths" %
                            len(n_timepoints_unique))

    n_timepoints = n_timepoints_all[0]

    shape = (n_samples, n_timepoints, n_channels)

    # allocate space for data
    data = np.zeros(shape)

    # make a list of all channels and timepoints
    channel_array = np.asarray(channel_labels)
    timepoint_array = np.asarray([samples[0][i][0]
                                  for i in xrange(n_timepoints)])

    dts = timepoint_array[1:] - timepoint_array[:-1]
    if not np.all(dts == dts[0]):
        raise ValueError("Delta time points are different")

    # put the values in the data array
    for i, sample in enumerate(samples):
        for j, (t, values) in enumerate(sample):
            # check that the time is the same
            if i > 0 and timepoint_array[j] != t:
                raise ValueError("Sample %d, time point %s is different "
                                 "than the first sample (%s)" %
                                 (i, t, timepoint_array[j]))

            for k, value in enumerate(values):
                data[i, j, k] = value

    samples = None # and let gc do it's job

    # make a Dataset instance with the data
    ds = Dataset(data)

    # append a flatten_mapper to go from 3D (sample X time X channel)
    # to 2D (sample X (time X channel))
    flatten_mapper = FlattenMapper(shape=shape[1:], space='time_channel_indices')
    ds = ds.get_mapped(flatten_mapper)

    # make this a 3D array of the proper size
    channel_array_3D = np.tile(channel_array, (1, n_timepoints, 1))
    timepoint_array_3D = np.tile(np.reshape(timepoint_array, (-1, 1)),
                                            (1, 1, n_channels))

    # for consistency use the flattan_mapper defined above to 
    # flatten channel and timepoint names as well
    ds.fa['channelids'] = flatten_mapper.forward(channel_array_3D).ravel()
    ds.fa['timepoints'] = flatten_mapper.forward(timepoint_array_3D).ravel()

    # make some dynamic properties
    # XXX at the moment we don't have propert 'protection' in case
    # the feature space is sliced in a way so that some channels and/or
    # timepoints occur more often than others 
    _eeglab_set_attributes(ds)

    return ds
コード例 #5
0
def fmri_dataset(
    samples,
    targets=None,
    chunks=None,
    mask=None,
    sprefix='voxel',
    tprefix='time',
    add_fa=None,
):
    """Create a dataset from an fMRI timeseries image.

    The timeseries image serves as the samples data, with each volume becoming
    a sample. All 3D volume samples are flattened into one-dimensional feature
    vectors, optionally being masked (i.e. subset of voxels corresponding to
    non-zero elements in a mask image).

    In addition to (optional) samples attributes for targets and chunks the
    returned dataset contains a number of additional attributes:

    Samples attributes (per each volume):

      * volume index (time_indices)
      * volume acquisition time (time_coord)

    Feature attributes (per each voxel):

      * voxel indices (voxel_indices), sometimes referred to as ijk

    Dataset attributes:

      * dump of the image (e.g. NIfTI) header data (imghdr)
      * class of the image (e.g. Nifti1Image) (imgtype)
      * volume extent (voxel_dim)
      * voxel extent (voxel_eldim)

    The default attribute name is listed in parenthesis, but may be altered by
    the corresponding prefix arguments. The validity of the attribute values
    relies on correct settings in the NIfTI image header.

    Parameters
    ----------
    samples : str or NiftiImage or list
      fMRI timeseries, specified either as a filename (single file 4D image),
      an image instance (4D image), or a list of filenames or image instances
      (each list item corresponding to a 3D volume).
    targets : scalar or sequence
      Label attribute for each volume in the timeseries, or a scalar value that
      is assigned to all samples.
    chunks : scalar or sequence
      Chunk attribute for each volume in the timeseries, or a scalar value that
      is assigned to all samples.
    mask : str or NiftiImage
      Filename or image instance of a 3D volume mask. Voxels corresponding to
      non-zero elements in the mask will be selected. The mask has to be in the
      same space (orientation and dimensions) as the timeseries image
    sprefix : str or None
      Prefix for attribute names describing spatial properties of the
      timeseries. If None, no such attributes are stored in the dataset.
    tprefix : str or None
      Prefix for attribute names describing temporal properties of the
      timeseries. If None, no such attributes are stored in the dataset.
    add_fa : dict or None
      Optional dictionary with additional volumetric data that shall be stored
      as feature attributes in the dataset. The dictionary key serves as the
      feature attribute name. Each value might be of any type supported by the
      'mask' argument of this function.

    Returns
    -------
    Dataset
    """
    # load the samples
    imgdata, imghdr, img = _load_anyimg(samples, ensure=True, enforce_dim=4)

    # figure out what the mask is, but only handle known cases, the rest
    # goes directly into the mapper which maybe knows more
    maskimg = _load_anyimg(mask)
    if maskimg is None:
        pass
    else:
        # take just data and ignore the header
        mask = maskimg[0]

    # compile the samples attributes
    sa = {}
    if targets is not None:
        sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets')
    if chunks is not None:
        sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks')

    # create a dataset
    ds = Dataset(imgdata, sa=sa)
    if sprefix is None:
        space = None
    else:
        space = sprefix + '_indices'
    ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], space=space))

    # now apply the mask if any
    if mask is not None:
        flatmask = ds.a.mapper.forward1(mask)
        # direct slicing is possible, and it is potentially more efficient,
        # so let's use it
        #mapper = StaticFeatureSelection(flatmask)
        #ds = ds.get_mapped(StaticFeatureSelection(flatmask))
        ds = ds[:, flatmask != 0]

    # load and store additional feature attributes
    if add_fa is not None:
        for fattr in add_fa:
            value = _load_anyimg(add_fa[fattr], ensure=True)[0]
            ds.fa[fattr] = ds.a.mapper.forward1(value)

    # store interesting NIfTI props in the dataset in a more portable way
    ds.a['imgaffine'] = img.affine
    ds.a['imgtype'] = img.__class__.__name__
    # stick the header instance in as is, and ...
    ds.a['imghdr'] = imghdr
    # ... let strip_nibabel() be the central place to take care of any header
    # conversion into non-NiBabel dtypes
    strip_nibabel(ds)

    # If there is a space assigned , store the extent of that space
    if sprefix is not None:
        ds.a[sprefix + '_dim'] = imgdata.shape[1:]
        # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x)
        ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr)
        # TODO extend with the unit
    if tprefix is not None:
        ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int')
        ds.sa[tprefix + '_coords'] = \
            np.arange(len(ds), dtype='float') * _get_dt(imghdr)
        # TODO extend with the unit

    return ds
コード例 #6
0
def simple_sim1(
        shape,
        dissims,
        rois_arrangement='circle',
        roi_neighborhood=Sphere(5),
        nruns=1,
        nsubjects=1,
        # noise components -- we just add normal for now also with
        # spatial smoothing to possibly create difference in noise
        # characteristics across different kinds
        #
        # "Instrumental noise" -- generic nuisance
        noise_independent_std=0.4,
        noise_independent_smooth=3.,
        # "Intrinsic signal", specific per each subject (due to
        # motion, whatever) -- might be fun for someone to cluster,
        # but irrelevant for us
        noise_subject_n=1,
        noise_subject_std=0.4,
        noise_subject_smooth=1.5,
        # "Intrinsic common signal" -- probably generalizes across
        # subjects and fun for someone studying veins to get those
        # reproducible clusters.  It will be mixed in also with
        # different weights per each run.
        # Again -- might be fun for someone to cluster, but not for us
        # since it would not be representative of the original signal
        noise_common_n=1,
        noise_common_std=0.4,
        noise_common_smooth=2.):
    """Simulate "data" containing similarity matrices with 3 noise
    components for multiple subjects

    Noise components are:

    - random normal noise, also spatially smoothed (should have smaller
    sigma for smoothing probably than for intrinsic noise)

    - intrinsic noise which is composed from a set of random fields,
    generated by random normal noise with subsequent spatial filtering,
    which are then mixed into each run data with random weights.  They
    are to simulate subject-specific intrinsic signals such as artifacts
    due to motion, possible subject-specific physiological processes

    - intrinsic common noise across subjects intrinsic noise (e.g. all of them
    have similar blood distribution networks and other physiological
    parameters, and some intrinsic networks, which although similar in
    space would have different mix-in coefficients across subject/runs)

    Theoretically, decomposition methods (such as ICA, PCA, etc) should help to
    identify such common noise components and filter them out.  Also methods
    which iteratively remove non-informative projections (such as GLMdenoise)
    should be effective to identify those mix-ins

    TODO: now mix-in happens with purely normal random weights,  ideally we
    should color those as well
    """
    ndissims = len(dissims)

    # first we fisher transform so we can add normal noise
    # check first that we don't have extreme values that might give infinity
    dissims = np.array(dissims)
    dissims = 1. - dissims
    dissims[dissims == 1] = 0.99
    dissims[dissims == -1] = -0.99
    # fisher
    dissims = np.arctanh(dissims)

    # generate target clean "picture"
    d = np.asanyarray(dissims[0])
    signal_clean = np.zeros(shape + (len(vector_form(d)), ))

    # generate ground truth for clustering
    cluster_truth = np.zeros(shape, dtype='int')

    if rois_arrangement == 'circle':
        radius = min(shape[:2]) / 4.
        center = np.array((radius * 2, ) * len(shape)).astype(int)
        # arrange at quarter distance from center
        for i, dissim in enumerate(dissims):
            dissim = vector_form(dissim)
            # that is kinda boring -- the same dissimilarity to each
            # voxel???
            #
            # TODO: come up with a better arrangement/idea, e.g. to
            # generate an MVPA pattern which would satisfy the
            # dissimilarity (not exactly but at least close).  That
            # would make more sense
            roi_center = center.copy()
            roi_center[0] += int(radius * np.cos(2 * np.pi * i / ndissims))
            roi_center[1] += int(radius * np.sin(2 * np.pi * i / ndissims))
            for coords in roi_neighborhood(roi_center):
                acoords = np.asanyarray(coords)
                if np.all(acoords >= [0]*len(coords)) and \
                   np.all(acoords < signal_clean.shape[:len(coords)]):
                    signal_clean.__setitem__(coords, dissim)
                    cluster_truth.__setitem__(coords, i + 1)
    else:
        raise ValueError("I know only circle")

    # generated randomly and will be mixed into subjects with different weights
    # TODO: static across runs within subject??  if so -- would be no different
    #       from having RSAs?
    common_noises = get_intrinsic_noises(signal_clean.shape,
                                         std=noise_common_std,
                                         sigma=noise_common_smooth,
                                         n=noise_common_n)
    assert common_noises[0].ndim == 3, "There should be no time comp"

    # Now lets generate per subject and per run data by adding some noise(s)
    # all_signals = []
    dss = []
    for isubject in xrange(nsubjects):
        # Interesting noise, simulating some underlying process which has nothing
        # to do with original design/similarity but having spatial structure which
        # repeats through runs with random weights (consider it to be a principal component)

        # generated randomly for each subject separately, but they should have
        # common structure across runs
        subj_specific_noises = get_intrinsic_noises(signal_clean.shape,
                                                    std=noise_subject_std,
                                                    sigma=noise_subject_smooth,
                                                    n=noise_subject_n)
        assert subj_specific_noises[
            0].ndim == 3, "There should be no time comp"
        # subject_signals = []
        dss_subject = []
        subj_common_noises = [
            noise * np.random.normal() for noise in common_noises
        ]

        subj_specific_mixins = generate_mixins(nruns)
        subj_common_mixins = generate_mixins(nruns)

        for run in range(nruns):
            signal_run = signal_clean.copy()
            for noise in subj_specific_noises:
                signal_run += noise * subj_specific_mixins[run]
            for noise in subj_common_noises:
                signal_run += noise * subj_common_mixins[run]
            # generic noise -- no common structure across subjects/runs
            signal_run += filter_each_2d(
                np.random.normal(size=signal_clean.shape) *
                noise_independent_std, noise_independent_smooth)

            # go back to correlations with inverse of fisher
            signal_run = np.tanh(signal_run)
            # rollaxis to bring similarities into leading dimension
            ds = Dataset(np.rollaxis(signal_run, 2, 0))
            ds.sa['chunks'] = [run]
            ds.sa['dissimilarity'] = np.arange(len(dissim))  # Lame one for now
            ds_flat = ds.get_mapped(
                FlattenMapper(shape=ds.shape[1:], space='pixel_indices'))
            dss_subject.append(ds_flat)
            #subject_signals.append(signal_run)
        #all_signals.append(subject_signals)
        ds = dsvstack(dss_subject)
        ds.a['mapper'] = dss_subject[
            0].a.mapper  # .a are not transferred by vstack
        dss.append(ds)

    # Instrumental noise -- the most banal
    assert (len(dss) == nsubjects)
    assert (len(dss) == nsubjects)
    assert (len(dss[0]) == nruns * len(dissim))

    return np.tanh(signal_clean), cluster_truth, dss
コード例 #7
0
ファイル: sim.py プロジェクト: mvdoc/reprclust
def simple_sim1(shape, dissims,
                rois_arrangement='circle',
                roi_neighborhood=Sphere(5),
                nruns=1, nsubjects=1,
                # noise components -- we just add normal for now also with
                # spatial smoothing to possibly create difference in noise
                # characteristics across different kinds
                #
                # "Instrumental noise" -- generic nuisance
                noise_independent_std=0.4, noise_independent_smooth=3.,
                # "Intrinsic signal", specific per each subject (due to
                # motion, whatever) -- might be fun for someone to cluster,
                # but irrelevant for us
                noise_subject_n=1, noise_subject_std=0.4, noise_subject_smooth=1.5,
                # "Intrinsic common signal" -- probably generalizes across
                # subjects and fun for someone studying veins to get those
                # reproducible clusters.  It will be mixed in also with
                # different weights per each run.
                # Again -- might be fun for someone to cluster, but not for us
                # since it would not be representative of the original signal
                noise_common_n=1, noise_common_std=0.4, noise_common_smooth=2.
                ):
    """Simulate "data" containing similarity matrices with 3 noise
    components for multiple subjects

    Noise components are:

    - random normal noise, also spatially smoothed (should have smaller
    sigma for smoothing probably than for intrinsic noise)

    - intrinsic noise which is composed from a set of random fields,
    generated by random normal noise with subsequent spatial filtering,
    which are then mixed into each run data with random weights.  They
    are to simulate subject-specific intrinsic signals such as artifacts
    due to motion, possible subject-specific physiological processes

    - intrinsic common noise across subjects intrinsic noise (e.g. all of them
    have similar blood distribution networks and other physiological
    parameters, and some intrinsic networks, which although similar in
    space would have different mix-in coefficients across subject/runs)

    Theoretically, decomposition methods (such as ICA, PCA, etc) should help to
    identify such common noise components and filter them out.  Also methods
    which iteratively remove non-informative projections (such as GLMdenoise)
    should be effective to identify those mix-ins

    TODO: now mix-in happens with purely normal random weights,  ideally we
    should color those as well
    """
    ndissims = len(dissims)

    # first we fisher transform so we can add normal noise
    # check first that we don't have extreme values that might give infinity
    dissims = np.array(dissims)
    dissims = 1. - dissims
    dissims[dissims==1] = 0.99
    dissims[dissims==-1] = -0.99
    # fisher
    dissims = np.arctanh(dissims)

    # generate target clean "picture"
    d = np.asanyarray(dissims[0])
    signal_clean = np.zeros(shape + (len(vector_form(d)),))

    # generate ground truth for clustering
    cluster_truth = np.zeros(shape, dtype='int')

    if rois_arrangement == 'circle':
        radius = min(shape[:2])/4.
        center = np.array((radius*2,) * len(shape)).astype(int)
        # arrange at quarter distance from center
        for i, dissim in enumerate(dissims):
            dissim = vector_form(dissim)
            # that is kinda boring -- the same dissimilarity to each
            # voxel???
            #
            # TODO: come up with a better arrangement/idea, e.g. to
            # generate an MVPA pattern which would satisfy the
            # dissimilarity (not exactly but at least close).  That
            # would make more sense
            roi_center = center.copy()
            roi_center[0] += int(radius * np.cos(2*np.pi*i/ndissims))
            roi_center[1] += int(radius * np.sin(2*np.pi*i/ndissims))
            for coords in roi_neighborhood(roi_center):
                acoords = np.asanyarray(coords)
                if np.all(acoords >= [0]*len(coords)) and \
                   np.all(acoords < signal_clean.shape[:len(coords)]):
                    signal_clean.__setitem__(coords, dissim)
                    cluster_truth.__setitem__(coords, i+1)
    else:
        raise ValueError("I know only circle")

    # generated randomly and will be mixed into subjects with different weights
    # TODO: static across runs within subject??  if so -- would be no different
    #       from having RSAs?
    common_noises = get_intrinsic_noises(
        signal_clean.shape,
        std=noise_common_std,
        sigma=noise_common_smooth,
        n=noise_common_n)
    assert common_noises[0].ndim == 3, "There should be no time comp"

    # Now lets generate per subject and per run data by adding some noise(s)
    # all_signals = []
    dss = []
    for isubject in xrange(nsubjects):
        # Interesting noise, simulating some underlying process which has nothing
        # to do with original design/similarity but having spatial structure which
        # repeats through runs with random weights (consider it to be a principal component)

        # generated randomly for each subject separately, but they should have
        # common structure across runs
        subj_specific_noises = get_intrinsic_noises(signal_clean.shape,
                                                std=noise_subject_std,
                                                sigma=noise_subject_smooth,
                                                n=noise_subject_n)
        assert subj_specific_noises[0].ndim == 3, "There should be no time comp"
        # subject_signals = []
        dss_subject = []
        subj_common_noises = [noise * np.random.normal()
                              for noise in common_noises]

        subj_specific_mixins = generate_mixins(nruns)
        subj_common_mixins = generate_mixins(nruns)

        for run in range(nruns):
            signal_run = signal_clean.copy()
            for noise in subj_specific_noises:
                signal_run += noise * subj_specific_mixins[run]
            for noise in subj_common_noises:
                signal_run += noise * subj_common_mixins[run]
            # generic noise -- no common structure across subjects/runs
            signal_run += filter_each_2d(
                np.random.normal(size=signal_clean.shape)*noise_independent_std,
                noise_independent_smooth)

            # go back to correlations with inverse of fisher
            signal_run = np.tanh(signal_run)
            # rollaxis to bring similarities into leading dimension
            ds = Dataset(np.rollaxis(signal_run, 2, 0))
            ds.sa['chunks'] = [run]
            ds.sa['dissimilarity'] = np.arange(len(dissim))  # Lame one for now
            ds_flat = ds.get_mapped(FlattenMapper(shape=ds.shape[1:],
                                                  space='pixel_indices'))
            dss_subject.append(ds_flat)
            #subject_signals.append(signal_run)
        #all_signals.append(subject_signals)
        ds = dsvstack(dss_subject)
        ds.a['mapper'] = dss_subject[0].a.mapper   # .a are not transferred by vstack
        dss.append(ds)

    # Instrumental noise -- the most banal
    assert(len(dss) == nsubjects)
    assert(len(dss) == nsubjects)
    assert(len(dss[0]) == nruns*len(dissim))

    return np.tanh(signal_clean), cluster_truth, dss