Example #1
def fmri_dataset(samples, targets=None, chunks=None, mask=None,
                 sprefix='voxel', tprefix='time', add_fa=None,):
    """Create a dataset from an fMRI timeseries image.

    The timeseries image serves as the samples data, with each volume becoming
    a sample. All 3D volume samples are flattened into one-dimensional feature
    vectors, optionally being masked (i.e. subset of voxels corresponding to
    non-zero elements in a mask image).

    In addition to (optional) samples attributes for targets and chunks the
    returned dataset contains a number of additional attributes:

    Samples attributes (per each volume):

      * volume index (time_indices)
      * volume acquisition time (time_coord)

    Feature attributes (per each voxel):

      * voxel indices (voxel_indices), sometimes referred to as ijk

    Dataset attributes:

      * dump of the image (e.g. NIfTI) header data (imghdr)
      * class of the image (e.g. Nifti1Image) (imgtype)
      * volume extent (voxel_dim)
      * voxel extent (voxel_eldim)

    The default attribute name is listed in parenthesis, but may be altered by
    the corresponding prefix arguments. The validity of the attribute values
    relies on correct settings in the NIfTI image header.

    samples : str or NiftiImage or list
      fMRI timeseries, specified either as a filename (single file 4D image),
      an image instance (4D image), or a list of filenames or image instances
      (each list item corresponding to a 3D volume).
    targets : scalar or sequence
      Label attribute for each volume in the timeseries, or a scalar value that
      is assigned to all samples.
    chunks : scalar or sequence
      Chunk attribute for each volume in the timeseries, or a scalar value that
      is assigned to all samples.
    mask : str or NiftiImage
      Filename or image instance of a 3D volume mask. Voxels corresponding to
      non-zero elements in the mask will be selected. The mask has to be in the
      same space (orientation and dimensions) as the timeseries image
    sprefix : str or None
      Prefix for attribute names describing spatial properties of the
      timeseries. If None, no such attributes are stored in the dataset.
    tprefix : str or None
      Prefix for attribute names describing temporal properties of the
      timeseries. If None, no such attributes are stored in the dataset.
    add_fa : dict or None
      Optional dictionary with additional volumetric data that shall be stored
      as feature attributes in the dataset. The dictionary key serves as the
      feature attribute name. Each value might be of any type supported by the
      'mask' argument of this function.

    # load the samples
    imgdata, imghdr, img = _load_anyimg(samples, ensure=True, enforce_dim=4)

    # figure out what the mask is, but only handle known cases, the rest
    # goes directly into the mapper which maybe knows more
    maskimg = _load_anyimg(mask)
    if maskimg is None:
        # take just data and ignore the header
        mask = maskimg[0]

    # compile the samples attributes
    sa = {}
    if not targets is None:
        sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets')
    if not chunks is None:
        sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks')

    # create a dataset
    ds = Dataset(imgdata, sa=sa)
    if sprefix is None:
        space = None
        space = sprefix + '_indices'
    ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], space=space))

    # now apply the mask if any
    if not mask is None:
        flatmask = ds.a.mapper.forward1(mask)
        # direct slicing is possible, and it is potentially more efficient,
        # so let's use it
        #mapper = StaticFeatureSelection(flatmask)
        #ds = ds.get_mapped(StaticFeatureSelection(flatmask))
        ds = ds[:, flatmask != 0]

    # load and store additional feature attributes
    if not add_fa is None:
        for fattr in add_fa:
            value = _load_anyimg(add_fa[fattr], ensure=True)[0]
            ds.fa[fattr] = ds.a.mapper.forward1(value)

    # store interesting NIfTI props in the dataset in a more portable way
    ds.a['imgaffine'] = img.get_affine()
    ds.a['imgtype'] = img.__class__.__name__
    # stick the header instance in as is, and ...
    ds.a['imghdr'] = imghdr
    # ... let strip_nibabel() be the central place to take care of any header
    # conversion into non-NiBabel dtypes

    # If there is a space assigned , store the extent of that space
    if sprefix is not None:
        ds.a[sprefix + '_dim'] = imgdata.shape[1:]
        # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x)
        ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr)
        # TODO extend with the unit
    if tprefix is not None:
        ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int')
        ds.sa[tprefix + '_coords'] = \
            np.arange(len(ds), dtype='float') * _get_dt(imghdr)
        # TODO extend with the unit

    return ds
Example #2
    def from_wizard(cls, samples, targets=None, chunks=None, mask=None,
                    mapper=None, flatten=None, space=None):
        """Convenience method to create dataset.

        Datasets can be created from N-dimensional samples. Data arrays with
        more than two dimensions are going to be flattened, while preserving
        the first axis (separating the samples) and concatenating all other as
        the second axis. Optionally, it is possible to specify targets and
        chunk attributes for all samples, and masking of the input data (only
        selecting elements corresponding to non-zero mask elements

        samples : ndarray
          N-dimensional samples array. The first axis separates individual
        targets : scalar or ndarray, optional
          Labels for all samples. If a scalar is provided its values is assigned
          as label to all samples.
        chunks : scalar or ndarray, optional
          Chunks definition for all samples. If a scalar is provided its values
          is assigned as chunk of all samples.
        mask : ndarray, optional
          The shape of the array has to correspond to the shape of a single
          sample (shape(samples)[1:] == shape(mask)). Its non-zero elements
          are used to mask the input data.
        mapper : Mapper instance, optional
          A trained mapper instance that is used to forward-map
          possibly already flattened (see flatten) and masked samples
          upon construction of the dataset. The mapper must have a
          simple feature space (samples x features) as output. Use a
          `ChainMapper` to achieve that, if necessary.
        flatten : None or bool, optional
          If None (default) and no mapper provided, data would get flattened.
          Bool value would instruct explicitly either to flatten before
          possibly passing into the mapper if no mask is given.
        space : str, optional
          If provided it is assigned to the mapper instance that performs the
          initial flattening of the data.

        instance : Dataset
        # for all non-ndarray samples you need to go with the constructor
        samples = np.asanyarray(samples)

        # compile the necessary samples attributes collection
        sa_items = {}

        if not targets is None:
            sa_items['targets'] = _expand_attribute(targets,

        if not chunks is None:
            # unlike previous implementation, we do not do magic to do chunks
            # if there are none, there are none
            sa_items['chunks'] = _expand_attribute(chunks,

        # common checks should go into __init__
        ds = cls(samples, sa=sa_items)
        # apply mask through mapper
        if mask is None:
            # if we have multi-dim data
            if len(samples.shape) > 2 and \
                   ((flatten is None and mapper is None) # auto case
                    or flatten):                         # bool case
                fm = FlattenMapper(shape=samples.shape[1:], space=space)
                ds = ds.get_mapped(fm)
            mm = mask_mapper(mask, space=space)
            ds = ds.get_mapped(mm)

        # apply generic mapper
        if not mapper is None:
            ds = ds.get_mapped(mapper)
        return ds