def fmri_dataset(samples, targets=None, chunks=None, mask=None, sprefix='voxel', tprefix='time', add_fa=None,): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the NIfTI image header data (imghdr) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples imgdata, imghdr = _load_anyimg(samples, ensure=True, enforce_dim=4) # figure out what the mask is, but only handle known cases, the rest # goes directly into the mapper which maybe knows more maskimg = _load_anyimg(mask) if maskimg is None: pass else: # take just data and ignore the header mask = maskimg[0] # compile the samples attributes sa = {} if not targets is None: sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets') if not chunks is None: sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks') # create a dataset ds = Dataset(imgdata, sa=sa) if sprefix is None: inspace = None else: inspace = sprefix + '_indices' ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], inspace=inspace)) # now apply the mask if any if not mask is None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it #mapper = FeatureSliceMapper(flatmask) #ds = ds.get_mapped(FeatureSliceMapper(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if not add_fa is None: for fattr in add_fa: value = _load_anyimg(add_fa[fattr], ensure=True)[0] ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting props in the dataset ds.a['imghdr'] = imghdr # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + '_dim'] = imgdata.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int') ds.sa[tprefix + '_coords'] = np.arange(len(ds), dtype='float') \ * _get_dt(imghdr) # TODO extend with the unit return ds
def from_wizard(cls, samples, targets=None, chunks=None, mask=None, mapper=None, space=None): """Convenience method to create dataset. Datasets can be created from N-dimensional samples. Data arrays with more than two dimensions are going to be flattened, while preserving the first axis (separating the samples) and concatenating all other as the second axis. Optionally, it is possible to specify targets and chunk attributes for all samples, and masking of the input data (only selecting elements corresponding to non-zero mask elements Parameters ---------- samples : ndarray N-dimensional samples array. The first axis separates individual samples. targets : scalar or ndarray, optional Labels for all samples. If a scalar is provided its values is assigned as label to all samples. chunks : scalar or ndarray, optional Chunks definition for all samples. If a scalar is provided its values is assigned as chunk of all samples. mask : ndarray, optional The shape of the array has to correspond to the shape of a single sample (shape(samples)[1:] == shape(mask)). Its non-zero elements are used to mask the input data. space : str, optional If provided it is assigned to the mapper instance that performs the initial flattening of the data. mapper : Mapper instance, optional A (potentially trained) mapper instance that is used to forward-map the already flattened and masked samples upon construction of the dataset. The mapper must have a simple feature space (samples x features) as output. Use a `ChainMapper` to achieve that, if necessary. Returns ------- instance : Dataset """ # for all non-ndarray samples you need to go with the constructor samples = np.asanyarray(samples) # compile the necessary samples attributes collection sa_items = {} if not targets is None: sa_items['targets'] = _expand_attribute(targets, samples.shape[0], 'targets') if not chunks is None: # unlike previous implementation, we do not do magic to do chunks # if there are none, there are none sa_items['chunks'] = _expand_attribute(chunks, samples.shape[0], 'chunks') # common checks should go into __init__ ds = cls(samples, sa=sa_items) # apply mask through mapper if mask is None: if len(samples.shape) > 2: # if we have multi-dim data fm = FlattenMapper(shape=samples.shape[1:], inspace=space) ds = ds.get_mapped(fm) else: mm = mask_mapper(mask, inspace=space) ds = ds.get_mapped(mm) # apply generic mapper if not mapper is None: ds = ds.get_mapped(mapper) return ds
def fmri_dataset(samples, targets=None, chunks=None, mask=None, sprefix='voxel', tprefix='time', add_fa=None,): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the image (e.g. NIfTI) header data (imghdr) * class of the image (e.g. Nifti1Image) (imgtype) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples imgdata, imghdr, imgtype = _load_anyimg(samples, ensure=True, enforce_dim=4) # figure out what the mask is, but only handle known cases, the rest # goes directly into the mapper which maybe knows more maskimg = _load_anyimg(mask) if maskimg is None: pass else: # take just data and ignore the header mask = maskimg[0] # compile the samples attributes sa = {} if not targets is None: sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets') if not chunks is None: sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks') # create a dataset ds = Dataset(imgdata, sa=sa) if sprefix is None: space = None else: space = sprefix + '_indices' ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], space=space)) # now apply the mask if any if not mask is None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it #mapper = StaticFeatureSelection(flatmask) #ds = ds.get_mapped(StaticFeatureSelection(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if not add_fa is None: for fattr in add_fa: value = _load_anyimg(add_fa[fattr], ensure=True)[0] ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting props in the dataset ds.a['imghdr'] = imghdr ds.a['imgtype'] = imgtype # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + '_dim'] = imgdata.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int') ds.sa[tprefix + '_coords'] = np.arange(len(ds), dtype='float') \ * _get_dt(imghdr) # TODO extend with the unit return ds
def fmri_dataset(samples, targets=None, chunks=None, mask=None, sprefix="voxel", tprefix="time", add_fa=None): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the NIfTI image header data (imghdr) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples niftisamples = _load_anynifti(samples, ensure=True, enforce_dim=4) samples = niftisamples.data # figure out what the mask is, but onyl handle known cases, the rest # goes directly into the mapper which maybe knows more niftimask = _load_anynifti(mask) if niftimask is None: pass elif isinstance(niftimask, np.ndarray): mask = niftimask else: mask = _get_nifti_data(niftimask) # compile the samples attributes sa = {} if not targets is None: sa["targets"] = _expand_attribute(targets, samples.shape[0], "targets") if not chunks is None: sa["chunks"] = _expand_attribute(chunks, samples.shape[0], "chunks") # create a dataset ds = Dataset(samples, sa=sa) if sprefix is None: inspace = None else: inspace = sprefix + "_indices" ds = ds.get_mapped(FlattenMapper(shape=samples.shape[1:], inspace=inspace)) # now apply the mask if any if not mask is None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it # mapper = FeatureSliceMapper(flatmask) # ds = ds.get_mapped(FeatureSliceMapper(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if not add_fa is None: for fattr in add_fa: value = _get_nifti_data(_load_anynifti(add_fa[fattr])) ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting props in the dataset # do not put the whole NiftiImage in the dict as this will most # likely be deepcopy'ed at some point and ensuring data integrity # of the complex Python-C-Swig hybrid might be a tricky task. # Only storing the header dict should achieve the same and is more # memory efficient and even simpler ds.a["imghdr"] = niftisamples.header # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + "_dim"] = samples.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + "_eldim"] = tuple([i for i in reversed(niftisamples.voxdim)]) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + "_indices"] = np.arange(len(ds), dtype="int") ds.sa[tprefix + "_coords"] = np.arange(len(ds), dtype="float") * niftisamples.header["pixdim"][4] # TODO extend with the unit return ds