def test_featuregroup_mapper(): ds = Dataset(np.arange(24).reshape(3,8)) ds.fa['roi'] = [0, 1] * 4 # just to check ds.sa['chunks'] = np.arange(3) # correct results csamples = [[3, 4], [11, 12], [19, 20]] croi = [0, 1] cchunks = np.arange(3) m = mean_group_feature(['roi']) mds = m.forward(ds) assert_equal(mds.shape, (3, 2)) assert_array_equal(mds.samples, csamples) assert_array_equal(mds.fa.roi, np.unique([0, 1] * 4)) # FAs should simply remain the same assert_array_equal(mds.sa.chunks, np.arange(3)) # now without grouping m = mean_feature() # forwarding just the samples should yield the same result assert_array_equal(m.forward(ds.samples), m.forward(ds).samples) # And when operating on a dataset with >1D samples, then operate # only across "features", i.e. 1st dimension ds = Dataset(np.arange(24).reshape(3,2,2,2)) mapped = ds.get_mapped(m) assert_array_equal(m.forward(ds.samples), mapped.samples) assert_array_equal(mapped.samples.shape, (3, 2, 2)) assert_array_equal(mapped.samples, np.mean(ds.samples, axis=1)) # and still could map back? ;) not ATM, so just to ensure consistency assert_raises(NotImplementedError, mapped.a.mapper.reverse, mapped.samples) # but it should also work with standard 2d sample arrays ds = Dataset(np.arange(24).reshape(3,8)) mapped = ds.get_mapped(m) assert_array_equal(mapped.samples.shape, (3, 1))
def fmri_dataset(samples, targets=None, chunks=None, mask=None, sprefix='voxel', tprefix='time', add_fa=None,): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the NIfTI image header data (imghdr) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples imgdata, imghdr = _load_anyimg(samples, ensure=True, enforce_dim=4) # figure out what the mask is, but only handle known cases, the rest # goes directly into the mapper which maybe knows more maskimg = _load_anyimg(mask) if maskimg is None: pass else: # take just data and ignore the header mask = maskimg[0] # compile the samples attributes sa = {} if not targets is None: sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets') if not chunks is None: sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks') # create a dataset ds = Dataset(imgdata, sa=sa) if sprefix is None: inspace = None else: inspace = sprefix + '_indices' ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], inspace=inspace)) # now apply the mask if any if not mask is None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it #mapper = FeatureSliceMapper(flatmask) #ds = ds.get_mapped(FeatureSliceMapper(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if not add_fa is None: for fattr in add_fa: value = _load_anyimg(add_fa[fattr], ensure=True)[0] ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting props in the dataset ds.a['imghdr'] = imghdr # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + '_dim'] = imgdata.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int') ds.sa[tprefix + '_coords'] = np.arange(len(ds), dtype='float') \ * _get_dt(imghdr) # TODO extend with the unit return ds
def fmri_dataset(samples, targets=None, chunks=None, mask=None, sprefix="voxel", tprefix="time", add_fa=None): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the NIfTI image header data (imghdr) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples niftisamples = _load_anynifti(samples, ensure=True, enforce_dim=4) samples = niftisamples.data # figure out what the mask is, but onyl handle known cases, the rest # goes directly into the mapper which maybe knows more niftimask = _load_anynifti(mask) if niftimask is None: pass elif isinstance(niftimask, np.ndarray): mask = niftimask else: mask = _get_nifti_data(niftimask) # compile the samples attributes sa = {} if not targets is None: sa["targets"] = _expand_attribute(targets, samples.shape[0], "targets") if not chunks is None: sa["chunks"] = _expand_attribute(chunks, samples.shape[0], "chunks") # create a dataset ds = Dataset(samples, sa=sa) if sprefix is None: inspace = None else: inspace = sprefix + "_indices" ds = ds.get_mapped(FlattenMapper(shape=samples.shape[1:], inspace=inspace)) # now apply the mask if any if not mask is None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it # mapper = FeatureSliceMapper(flatmask) # ds = ds.get_mapped(FeatureSliceMapper(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if not add_fa is None: for fattr in add_fa: value = _get_nifti_data(_load_anynifti(add_fa[fattr])) ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting props in the dataset # do not put the whole NiftiImage in the dict as this will most # likely be deepcopy'ed at some point and ensuring data integrity # of the complex Python-C-Swig hybrid might be a tricky task. # Only storing the header dict should achieve the same and is more # memory efficient and even simpler ds.a["imghdr"] = niftisamples.header # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + "_dim"] = samples.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + "_eldim"] = tuple([i for i in reversed(niftisamples.voxdim)]) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + "_indices"] = np.arange(len(ds), dtype="int") ds.sa[tprefix + "_coords"] = np.arange(len(ds), dtype="float") * niftisamples.header["pixdim"][4] # TODO extend with the unit return ds
def fmri_dataset(samples, targets=None, chunks=None, mask=None, sprefix='voxel', tprefix='time', add_fa=None,): """Create a dataset from an fMRI timeseries image. The timeseries image serves as the samples data, with each volume becoming a sample. All 3D volume samples are flattened into one-dimensional feature vectors, optionally being masked (i.e. subset of voxels corresponding to non-zero elements in a mask image). In addition to (optional) samples attributes for targets and chunks the returned dataset contains a number of additional attributes: Samples attributes (per each volume): * volume index (time_indices) * volume acquisition time (time_coord) Feature attributes (per each voxel): * voxel indices (voxel_indices), sometimes referred to as ijk Dataset attributes: * dump of the image (e.g. NIfTI) header data (imghdr) * class of the image (e.g. Nifti1Image) (imgtype) * volume extent (voxel_dim) * voxel extent (voxel_eldim) The default attribute name is listed in parenthesis, but may be altered by the corresponding prefix arguments. The validity of the attribute values relies on correct settings in the NIfTI image header. Parameters ---------- samples : str or NiftiImage or list fMRI timeseries, specified either as a filename (single file 4D image), an image instance (4D image), or a list of filenames or image instances (each list item corresponding to a 3D volume). targets : scalar or sequence Label attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. chunks : scalar or sequence Chunk attribute for each volume in the timeseries, or a scalar value that is assigned to all samples. mask : str or NiftiImage Filename or image instance of a 3D volume mask. Voxels corresponding to non-zero elements in the mask will be selected. The mask has to be in the same space (orientation and dimensions) as the timeseries image sprefix : str or None Prefix for attribute names describing spatial properties of the timeseries. If None, no such attributes are stored in the dataset. tprefix : str or None Prefix for attribute names describing temporal properties of the timeseries. If None, no such attributes are stored in the dataset. add_fa : dict or None Optional dictionary with additional volumetric data that shall be stored as feature attributes in the dataset. The dictionary key serves as the feature attribute name. Each value might be of any type supported by the 'mask' argument of this function. Returns ------- Dataset """ # load the samples imgdata, imghdr, imgtype = _load_anyimg(samples, ensure=True, enforce_dim=4) # figure out what the mask is, but only handle known cases, the rest # goes directly into the mapper which maybe knows more maskimg = _load_anyimg(mask) if maskimg is None: pass else: # take just data and ignore the header mask = maskimg[0] # compile the samples attributes sa = {} if not targets is None: sa['targets'] = _expand_attribute(targets, imgdata.shape[0], 'targets') if not chunks is None: sa['chunks'] = _expand_attribute(chunks, imgdata.shape[0], 'chunks') # create a dataset ds = Dataset(imgdata, sa=sa) if sprefix is None: space = None else: space = sprefix + '_indices' ds = ds.get_mapped(FlattenMapper(shape=imgdata.shape[1:], space=space)) # now apply the mask if any if not mask is None: flatmask = ds.a.mapper.forward1(mask) # direct slicing is possible, and it is potentially more efficient, # so let's use it #mapper = StaticFeatureSelection(flatmask) #ds = ds.get_mapped(StaticFeatureSelection(flatmask)) ds = ds[:, flatmask != 0] # load and store additional feature attributes if not add_fa is None: for fattr in add_fa: value = _load_anyimg(add_fa[fattr], ensure=True)[0] ds.fa[fattr] = ds.a.mapper.forward1(value) # store interesting props in the dataset ds.a['imghdr'] = imghdr ds.a['imgtype'] = imgtype # If there is a space assigned , store the extent of that space if sprefix is not None: ds.a[sprefix + '_dim'] = imgdata.shape[1:] # 'voxdim' is (x,y,z) while 'samples' are (t,z,y,x) ds.a[sprefix + '_eldim'] = _get_voxdim(imghdr) # TODO extend with the unit if tprefix is not None: ds.sa[tprefix + '_indices'] = np.arange(len(ds), dtype='int') ds.sa[tprefix + '_coords'] = np.arange(len(ds), dtype='float') \ * _get_dt(imghdr) # TODO extend with the unit return ds