def _extract_boxcar_events(ds, events=None, time_attr=None, match="prev", eprefix="event", event_mapper=None): """see eventrelated_dataset() for docs""" # relabel argument conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev["onset"], tvec, conv_strategy) # store offset of sample time and real onset ev["orig_offset"] = ev["onset"] - tvec[idx] # rescue the real onset into a new attribute ev["orig_onset"] = ev["onset"] ev["orig_duration"] = ev["duration"] # figure out how many samples we need ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]]) # new onset is sample index ev["onset"] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ["onset", "duration"]: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars["duration"]) if __debug__: if not max(evvars["duration"]) == min(evvars["duration"]): warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars["onset"], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if not time_attr is None: for attr in ("onset", "duration"): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def test_er_nifti_dataset(): # setup data sources tssrc = os.path.join(pymvpa_dataroot, u"bold.nii.gz") evsrc = os.path.join(pymvpa_dataroot, "fslev3.txt") masrc = os.path.join(pymvpa_dataroot, "mask.nii.gz") evs = FslEV3(evsrc).to_events() # load timeseries ds_orig = fmri_dataset(tssrc) # segment into events ds = eventrelated_dataset(ds_orig, evs, time_attr="time_coords") # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 3200) assert_equal(len(ds), len(evs)) # the voxel indices are reflattened after boxcaring , but still 3D assert_equal(ds.fa.voxel_indices.shape, (ds.nfeatures, 3)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:800], ds.fa.voxel_indices[800:1600]) # each feature got an event offset value assert_array_equal(ds.fa.event_offsetidx, np.repeat([0, 1, 2, 3], 800)) # check for all event attributes assert_true("onset" in ds.sa) assert_true("duration" in ds.sa) assert_true("features" in ds.sa) # check samples origsamples = _load_anyimg(tssrc)[0] for i, onset in enumerate([value2idx(e["onset"], ds_orig.sa.time_coords, "floor") for e in evs]): assert_array_equal(ds.samples[i], origsamples[onset : onset + 4].ravel()) assert_array_equal(ds.sa.time_indices[i], np.arange(onset, onset + 4)) assert_array_equal(ds.sa.time_coords[i], np.arange(onset, onset + 4) * 2.5) for evattr in [a for a in ds.sa if a.count("event_attrs") and not a.count("event_attrs_event")]: assert_array_equal(evs[i]["_".join(evattr.split("_")[2:])], ds.sa[evattr].value[i]) # check offset: only the last one exactly matches the tr assert_array_equal(ds.sa.orig_offset, [1, 1, 0]) # map back into voxel space, should ignore addtional features nim = map2nifti(ds) # origsamples has t,x,y,z assert_equal(nim.get_shape(), origsamples.shape[1:] + (len(ds) * 4,)) # check shape of a single sample nim = map2nifti(ds, ds.samples[0]) # pynifti image has [t,]z,y,x assert_equal(nim.get_shape(), (40, 20, 1, 4)) # and now with masking ds = fmri_dataset(tssrc, mask=masrc) ds = eventrelated_dataset(ds, evs, time_attr="time_coords") nnonzero = len(_load_anyimg(masrc)[0].nonzero()[0]) assert_equal(nnonzero, 530) # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 4 * 530) assert_equal(len(ds), len(evs)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:nnonzero], ds.fa.voxel_indices[nnonzero : 2 * nnonzero])
def extract_boxcar_event_samples( ds, events=None, time_attr=None, match='prev', event_offset=None, event_duration=None, eprefix='event', event_mapper=None): """Segment a dataset by extracting boxcar events (Multiple) consecutive samples are extracted for each event, and are either returned in a flattened shape, or subject to further processing. Boxcar event model details -------------------------- For each event all samples covering that particular event are used to form a corresponding sample. One sample for each event is returned. Event specification dictionaries must contain an ``onset`` attribute (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset). Any number of additional attributes can be present in an event specification. Those attributes are included as sample attributes in the returned dataset. Alternatively, ``onset`` and ``duration`` may also be given in a non-discrete time specification. In this case a dataset attribute needs to be specified that contains time-stamps for each input data sample, and is used to convert times into discrete sample indices (see ``match`` argument). A mapper instance can be provided (see ``event_mapper``) to implement futher processing of each event sample, for example in order to yield average samples. Returns ------- Dataset One sample per each event definition that has been passed to the function. Additional event attributes are included as sample attributes. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = {'prev': 'floor', 'next': 'ceil', 'closest': 'round'}[match] if not (event_offset is None and event_duration is None): descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) if not event_offset is None: ev['onset'] += event_offset if not event_duration is None: ev['duration'] = event_duration descr_events.append(ev) events = descr_events if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.'% boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if not time_attr is None: for attr in ('onset', 'duration'): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def _extract_boxcar_events(ds, events=None, time_attr=None, match='prev', eprefix='event', event_mapper=None): """see eventrelated_dataset() for docs""" # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if not time_attr is None: for attr in ('onset', 'duration'): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def test_er_nifti_dataset(): # setup data sources tssrc = os.path.join(pymvpa_dataroot, u'bold.nii.gz') evsrc = os.path.join(pymvpa_dataroot, 'fslev3.txt') masrc = os.path.join(pymvpa_dataroot, 'mask.nii.gz') evs = FslEV3(evsrc).to_events() # load timeseries ds_orig = fmri_dataset(tssrc) # segment into events ds = eventrelated_dataset(ds_orig, evs, time_attr='time_coords') # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 3200) assert_equal(len(ds), len(evs)) # the voxel indices are reflattened after boxcaring , but still 3D assert_equal(ds.fa.voxel_indices.shape, (ds.nfeatures, 3)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:800], ds.fa.voxel_indices[800:1600]) # each feature got an event offset value assert_array_equal(ds.fa.event_offsetidx, np.repeat([0, 1, 2, 3], 800)) # check for all event attributes assert_true('onset' in ds.sa) assert_true('duration' in ds.sa) assert_true('features' in ds.sa) # check samples origsamples = _load_anyimg(tssrc)[0] for i, onset in \ enumerate([value2idx(e['onset'], ds_orig.sa.time_coords, 'floor') for e in evs]): assert_array_equal(ds.samples[i], origsamples[onset:onset + 4].ravel()) assert_array_equal(ds.sa.time_indices[i], np.arange(onset, onset + 4)) assert_array_equal(ds.sa.time_coords[i], np.arange(onset, onset + 4) * 2.5) for evattr in [ a for a in ds.sa if a.count("event_attrs") and not a.count('event_attrs_event') ]: assert_array_equal(evs[i]['_'.join(evattr.split('_')[2:])], ds.sa[evattr].value[i]) # check offset: only the last one exactly matches the tr assert_array_equal(ds.sa.orig_offset, [1, 1, 0]) # map back into voxel space, should ignore addtional features nim = map2nifti(ds) # origsamples has t,x,y,z if externals.versions['nibabel'] >= '1.2': vol_shape = nim.shape else: vol_shape = nim.get_shape() assert_equal(vol_shape, origsamples.shape[1:] + (len(ds) * 4, )) # check shape of a single sample nim = map2nifti(ds, ds.samples[0]) if externals.versions['nibabel'] >= '1.2': vol_shape = nim.shape else: vol_shape = nim.get_shape() # pynifti image has [t,]z,y,x assert_equal(vol_shape, (40, 20, 1, 4)) # and now with masking ds = fmri_dataset(tssrc, mask=masrc) ds = eventrelated_dataset(ds, evs, time_attr='time_coords') nnonzero = len(_load_anyimg(masrc)[0].nonzero()[0]) assert_equal(nnonzero, 530) # we ask for boxcars of 9s length, and the tr in the file header says 2.5s # hence we should get round(9.0/2.4) * np.prod((1,20,40) == 3200 features assert_equal(ds.nfeatures, 4 * 530) assert_equal(len(ds), len(evs)) # and they have been broadcasted through all boxcars assert_array_equal(ds.fa.voxel_indices[:nnonzero], ds.fa.voxel_indices[nnonzero:2 * nnonzero])
def extract_boxcar_event_samples(ds, events=None, time_attr=None, match='prev', event_offset=None, event_duration=None, eprefix='event', event_mapper=None): """Segment a dataset by extracting boxcar events (Multiple) consecutive samples are extracted for each event, and are either returned in a flattened shape, or subject to further processing. Events are specified as a list of dictionaries (see:class:`~mvpa2.misc.support.Event`) for a helper class. Each dictionary contains all relevant attributes to describe an event. This is at least the ``onset`` time of an event, but can also comprise of ``duration``, ``amplitude``, and arbitrary other attributes. Boxcar event model details -------------------------- For each event all samples covering that particular event are used to form a corresponding sample. One sample for each event is returned. Event specification dictionaries must contain an ``onset`` attribute (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset). Any number of additional attributes can be present in an event specification. Those attributes are included as sample attributes in the returned dataset. Alternatively, ``onset`` and ``duration`` may also be given in a non-discrete time specification. In this case a dataset attribute needs to be specified that contains time-stamps for each input data sample, and is used to convert times into discrete sample indices (see ``match`` argument). A mapper instance can be provided (see ``event_mapper``) to implement futher processing of each event sample, for example in order to yield average samples. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None Attribute with dataset sample time-stamps. If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. For HRF modeling this argument is mandatory. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. event_offset : None or float If not None, all event ``onset`` specifications will be offset by this value before boxcar modeling is performed. event_duration : None or float If not None, all event ``duration`` specifications will be set to this value before boxcar modeling is done. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. event_mapper : Mapper This mapper is used to forward-map the dataset containing the boxcar event samples. If None (default) a FlattenMapper is employed to convert multi-dimensional sample matrices into simple one-dimensional sample vectors. This option can be used to implement temporal compression, by e.g. averaging samples within an event boxcar using an FxMapper. Any mapper needs to keep the sample axis unchanged, i.e. number and order of samples remain the same. Returns ------- Dataset One sample per each event definition that has been passed to the function. Additional event attributes are included as sample attributes. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not (event_offset is None and event_duration is None): descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) if event_offset is not None: ev['onset'] += event_offset if event_duration is not None: ev['duration'] = event_duration descr_events.append(ev) events = descr_events if time_attr is not None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if time_attr is not None: for attr in ('onset', 'duration'): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev', eprefix='event', event_mapper=None): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa2.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additional attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. event_mapper : Mapper This mapper is used to forward-map the dataset containing the boxcar event samples. If None (default) a FlattenMapper is employed to convert multi-dimensional sample matrices into simple one-dimensional sample vectors. This option can be used to implement temporal compression, by e.g. averaging samples within an event boxcar using an FxMapper. Any mapper needs to keep the sample axis unchanged, i.e. number and order of samples remain the same. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = {'prev': 'floor', 'next': 'ceil', 'closest': 'round'}[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.'% boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + '_' + a] = ds.sa[a] if a in ['onset', 'duration']: # special case: we want the non-discrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev', eprefix='event'): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa2.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additional attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + '_' + a] = ds.sa[a] if a in ['onset', 'duration']: # special case: we want the non-discrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds