def test_er_nifti_dataset(): # setup data sources tssrc = os.path.join(pymvpa_dataroot, 'bold.nii.gz') evsrc = os.path.join(pymvpa_dataroot, 'fslev3.txt') masrc = os.path.join(pymvpa_dataroot, 'mask.nii.gz') evs = FslEV3(evsrc).to_events() # load timeseries ds_orig = fmri_dataset(tssrc) # segment into events ds = eventrelated_dataset(ds_orig, evs, time_attr='time_coords') # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 3200) assert_equal(len(ds), len(evs)) # the voxel indices are reflattened after boxcaring , but still 3D assert_equal(ds.fa.voxel_indices.shape, (ds.nfeatures, 3)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:800], ds.fa.voxel_indices[800:1600]) # each feature got an event offset value assert_array_equal(ds.fa.event_offsetidx, np.repeat([0,1,2,3], 800)) # check for all event attributes assert_true('onset' in ds.sa) assert_true('duration' in ds.sa) assert_true('features' in ds.sa) # check samples origsamples = _load_anyimg(tssrc)[0] for i, onset in \ enumerate([value2idx(e['onset'], ds_orig.sa.time_coords, 'floor') for e in evs]): assert_array_equal(ds.samples[i], origsamples[onset:onset+4].ravel()) assert_array_equal(ds.sa.time_indices[i], np.arange(onset, onset + 4)) assert_array_equal(ds.sa.time_coords[i], np.arange(onset, onset + 4) * 2.5) for evattr in [a for a in ds.sa if a.count("event_attrs") and not a.count('event_attrs_event')]: assert_array_equal(evs[i]['_'.join(evattr.split('_')[2:])], ds.sa[evattr].value[i]) # check offset: only the last one exactly matches the tr assert_array_equal(ds.sa.orig_offset, [1, 1, 0]) # map back into voxel space, should ignore addtional features nim = map2nifti(ds) # origsamples has t,x,y,z assert_equal(nim.get_shape(), origsamples.shape[1:] + (len(ds) * 4,)) # check shape of a single sample nim = map2nifti(ds, ds.samples[0]) # pynifti image has [t,]z,y,x assert_equal(nim.get_shape(), (40, 20, 1, 4)) # and now with masking ds = fmri_dataset(tssrc, mask=masrc) ds = eventrelated_dataset(ds, evs, time_attr='time_coords') nnonzero = len(_load_anyimg(masrc)[0].nonzero()[0]) assert_equal(nnonzero, 530) # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 4 * 530) assert_equal(len(ds), len(evs)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:nnonzero], ds.fa.voxel_indices[nnonzero:2*nnonzero])
def test_er_nifti_dataset(): # setup data sources tssrc = os.path.join(pymvpa_dataroot, 'bold.nii.gz') evsrc = os.path.join(pymvpa_dataroot, 'fslev3.txt') masrc = os.path.join(pymvpa_dataroot, 'mask.nii.gz') evs = FslEV3(evsrc).to_events() # load timeseries ds_orig = fmri_dataset(tssrc) # segment into events ds = eventrelated_dataset(ds_orig, evs, time_attr='time_coords') # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 3200) assert_equal(len(ds), len(evs)) # the voxel indices are reflattened after boxcaring , but still 3D assert_equal(ds.fa.voxel_indices.shape, (ds.nfeatures, 3)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:800], ds.fa.voxel_indices[800:1600]) # each feature got an event offset value assert_array_equal(ds.fa.event_offsetidx, np.repeat([0, 1, 2, 3], 800)) # check for all event attributes assert_true('onset' in ds.sa) assert_true('duration' in ds.sa) assert_true('features' in ds.sa) # check samples origsamples = _load_anyimg(tssrc)[0] for i, onset in \ enumerate([value2idx(e['onset'], ds_orig.sa.time_coords, 'floor') for e in evs]): assert_array_equal(ds.samples[i], origsamples[onset:onset + 4].ravel()) assert_array_equal(ds.sa.time_indices[i], np.arange(onset, onset + 4)) assert_array_equal(ds.sa.time_coords[i], np.arange(onset, onset + 4) * 2.5) for evattr in [ a for a in ds.sa if a.count("event_attrs") and not a.count('event_attrs_event') ]: assert_array_equal(evs[i]['_'.join(evattr.split('_')[2:])], ds.sa[evattr].value[i]) # check offset: only the last one exactly matches the tr assert_array_equal(ds.sa.orig_offset, [1, 1, 0]) # map back into voxel space, should ignore addtional features nim = map2nifti(ds) if externals.exists('nibabel'): # origsamples has t,x,y,z assert_equal(nim.get_shape(), origsamples.shape[1:] + (len(ds) * 4, )) # check shape of a single sample nim = map2nifti(ds, ds.samples[0]) # pynifti image has [t,]z,y,x assert_equal(nim.get_shape(), (40, 20, 1, 4)) else: # origsamples has t,x,y,z but pynifti image has [t,]z,y,x assert_equal(nim.data.T.shape, origsamples.shape[1:] + (len(ds) * 4, )) # check shape of a single sample nim = map2nifti(ds, ds.samples[0]) # pynifti image has [t,]z,y,x assert_equal(nim.data.T.shape, (40, 20, 1, 4)) # and now with masking ds = fmri_dataset(tssrc, mask=masrc) ds = eventrelated_dataset(ds, evs, time_attr='time_coords') nnonzero = len(_load_anyimg(masrc)[0].nonzero()[0]) assert_equal(nnonzero, 530) # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 4 * 530) assert_equal(len(ds), len(evs)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:nnonzero], ds.fa.voxel_indices[nnonzero:2 * nnonzero])
def eventrelated_dataset(ds, events=None, time_attr=None, match="prev", eprefix="event"): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additonal attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <ChainMapper: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev["onset"], tvec, conv_strategy) # store offset of sample time and real onset ev["orig_offset"] = ev["onset"] - tvec[idx] # rescue the real onset into a new attribute ev["orig_onset"] = ev["onset"] ev["orig_duration"] = ev["duration"] # figure out how many sample we need ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]]) # new onset is sample index ev["onset"] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ["onset", "duration"]: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars["duration"]) if __debug__: if not max(evvars["duration"]) == min(evvars["duration"]): warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars["onset"], boxlength, inspace=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + "_" + a] = ds.sa[a] if a in ["onset", "duration"]: # special case: we want the non-descrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev', eprefix='event'): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additonal attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <ChainMapper: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many sample we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, inspace=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + '_' + a] = ds.sa[a] if a in ['onset', 'duration']: # special case: we want the non-descrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds