def _test_compare_to_old(self): """Good just to compare if I didn't screw up anything... treat it as a regression test """ import mvpa2.mappers.wavelet_ as wavelet_ ds = datasets['uni2medium'] d2d = ds.samples ws = 16 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper for wdm, wdm_ in ((WaveletTransformationMapper(), wavelet_.WaveletTransformationMapper()), (WaveletPacketMapper(), wavelet_.WaveletPacketMapper()),): d3d_wd = wdm(d3d) d3d_wd_ = wdm_(d3d) self.assertTrue((d3d_wd == d3d_wd_).all(), msg="We should have got same result with old and new code. " "Got %s and %s" % (d3d_wd, d3d_wd_))
def _test_compare_to_old(self): """Good just to compare if I didn't screw up anything... treat it as a regression test """ import mvpa2.mappers.wavelet_ as wavelet_ ds = datasets['uni2medium'] d2d = ds.samples ws = 16 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper for wdm, wdm_ in ((WaveletTransformationMapper(), wavelet_.WaveletTransformationMapper()), (WaveletPacketMapper(), wavelet_.WaveletPacketMapper()),): d3d_wd = wdm(d3d) d3d_wd_ = wdm_(d3d) self.failUnless((d3d_wd == d3d_wd_).all(), msg="We should have got same result with old and new code. " "Got %s and %s" % (d3d_wd, d3d_wd_))
def _extract_boxcar_events(ds, events=None, time_attr=None, match="prev", eprefix="event", event_mapper=None): """see eventrelated_dataset() for docs""" # relabel argument conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev["onset"], tvec, conv_strategy) # store offset of sample time and real onset ev["orig_offset"] = ev["onset"] - tvec[idx] # rescue the real onset into a new attribute ev["orig_onset"] = ev["onset"] ev["orig_duration"] = ev["duration"] # figure out how many samples we need ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]]) # new onset is sample index ev["onset"] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ["onset", "duration"]: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars["duration"]) if __debug__: if not max(evvars["duration"]) == min(evvars["duration"]): warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars["onset"], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if not time_attr is None: for attr in ("onset", "duration"): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def test_simple_wdm(self): """ """ ds = datasets['uni2medium'] d2d = ds.samples ws = 15 # size of timeline for wavelet sp = np.arange(ds.nsamples - ws * 2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletTransformationMapper() d3d_wd = wdm.forward(d3d) d3d_swap = d3d.swapaxes(1, 2) self.assertRaises(ValueError, WaveletTransformationMapper, wavelet='bogus') self.assertRaises(ValueError, WaveletTransformationMapper, mode='bogus') # use wavelet mapper for wdm, wdm_swap in ((WaveletTransformationMapper(), WaveletTransformationMapper(dim=2)), (WaveletPacketMapper(), WaveletPacketMapper(dim=2))): for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)): dd_wd = wdm.forward(dd) if dd_swap is not None: dd_wd_swap = wdm_swap.forward(dd_swap) self.assertTrue( (dd_wd == dd_wd_swap.swapaxes(1, 2)).all(), msg="We should have got same result with swapped " "dimensions and explicit mentioining of it. " "Got %s and %s" % (dd_wd, dd_wd_swap)) # some sanity checks self.assertTrue(dd_wd.shape[0] == dd.shape[0]) if not isinstance(wdm, WaveletPacketMapper): # we can do reverse only for DWT dd_rev = wdm.reverse(dd_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.assertEqual(dd_rev.shape, dd.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(dd - dd_rev) ornorm = np.linalg.norm(dd) self.assertTrue(diff / ornorm < 1e-10)
def test_simple_wdm(self): """ """ ds = datasets['uni2medium'] d2d = ds.samples ws = 15 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletTransformationMapper() d3d_wd = wdm.forward(d3d) d3d_swap = d3d.swapaxes(1,2) self.failUnlessRaises(ValueError, WaveletTransformationMapper, wavelet='bogus') self.failUnlessRaises(ValueError, WaveletTransformationMapper, mode='bogus') # use wavelet mapper for wdm, wdm_swap in ((WaveletTransformationMapper(), WaveletTransformationMapper(dim=2)), (WaveletPacketMapper(), WaveletPacketMapper(dim=2))): for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)): dd_wd = wdm.forward(dd) if dd_swap is not None: dd_wd_swap = wdm_swap.forward(dd_swap) self.failUnless((dd_wd == dd_wd_swap.swapaxes(1,2)).all(), msg="We should have got same result with swapped " "dimensions and explicit mentioining of it. " "Got %s and %s" % (dd_wd, dd_wd_swap)) # some sanity checks self.failUnless(dd_wd.shape[0] == dd.shape[0]) if not isinstance(wdm, WaveletPacketMapper): # we can do reverse only for DWT dd_rev = wdm.reverse(dd_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual(dd_rev.shape, dd.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(dd - dd_rev) ornorm = np.linalg.norm(dd) self.failUnless(diff/ornorm < 1e-10)
def test_state_cycle_with_custom_reduce(fname): # BoxcarMapper has a custom __reduce__ implementation . The 'space' # setting will only survive a svae/load cycle if the state is correctly # handle for custom reduce iplementations. bm = BoxcarMapper([0], 1, space='boxy') h5save(fname, bm) bm_rl = h5load(fname) assert_equal(bm_rl.get_space(), 'boxy')
def test_simple_wp1_level(self): """ """ ds = datasets['uni2large'] d2d = ds.samples ws = 50 # size of timeline for wavelet sp = (np.arange(ds.nsamples - ws * 2) + ws)[:4] # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletPacketMapper(level=2, wavelet='sym2') d3d_wd = wdm.forward(d3d) # Check dimensionality d3d_wds, d3ds = d3d_wd.shape, d3d.shape self.assertTrue(len(d3d_wds) == len(d3ds) + 1) self.assertTrue(d3d_wds[1] * d3d_wds[2] >= d3ds[1]) self.assertTrue(d3d_wds[0] == d3ds[0]) self.assertTrue(d3d_wds[-1] == d3ds[-1]) #print d2d.shape, d3d.shape, d3d_wd.shape if externals.exists('pywt wp reconstruct'): # Test reverse -- should be identical # we can do reverse only for DWT d3d_rev = wdm.reverse(d3d_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.assertEqual(d3d_rev.shape, d3d.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(d3d - d3d_rev) ornorm = np.linalg.norm(d3d) skip_if_no_external('pywt wp reconstruct fixed') self.assertTrue(diff / ornorm < 1e-10) else: self.assertRaises(NotImplementedError, wdm.reverse, d3d_wd)
def test_simple_wp1_level(self): """ """ ds = datasets['uni2large'] d2d = ds.samples ws = 50 # size of timeline for wavelet sp = (np.arange(ds.nsamples - ws*2) + ws)[:4] # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletPacketMapper(level=2, wavelet='sym2') d3d_wd = wdm.forward(d3d) # Check dimensionality d3d_wds, d3ds = d3d_wd.shape, d3d.shape self.failUnless(len(d3d_wds) == len(d3ds)+1) self.failUnless(d3d_wds[1] * d3d_wds[2] >= d3ds[1]) self.failUnless(d3d_wds[0] == d3ds[0]) self.failUnless(d3d_wds[-1] == d3ds[-1]) #print d2d.shape, d3d.shape, d3d_wd.shape if externals.exists('pywt wp reconstruct'): # Test reverse -- should be identical # we can do reverse only for DWT d3d_rev = wdm.reverse(d3d_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual(d3d_rev.shape, d3d.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(d3d - d3d_rev) ornorm = np.linalg.norm(d3d) skip_if_no_external('pywt wp reconstruct fixed') self.failUnless(diff/ornorm < 1e-10) else: self.failUnlessRaises(NotImplementedError, wdm.reverse, d3d_wd)
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:, 0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm.forward(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9) + 1)).T) # now test for proper data shape data = np.ones((10, 3, 4, 2)) sp = [2, 4, 3, 5] trans = BoxcarMapper(sp, 4).forward(data) assert_equal(trans.shape, (4, 4, 3, 4, 2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [2, 4, 3, 5] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # now in a dataset ds = Dataset([singlesample]) assert_equal(ds.shape, (1, ) + singlesample.shape) # after reverse mapping the 'sample axis' should vanish and the original 3d # shape of the samples should be restored assert_equal(ds.shape[1:], m.reverse(ds).shape) # multiple samples should just be concatenated along the samples axis ds = Dataset([singlesample, singlesample]) assert_equal((np.prod(ds.shape[:2]), ) + singlesample.shape[1:], m.reverse(ds).shape) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def extract_boxcar_event_samples(ds, events=None, time_attr=None, match='prev', event_offset=None, event_duration=None, eprefix='event', event_mapper=None): """Segment a dataset by extracting boxcar events (Multiple) consecutive samples are extracted for each event, and are either returned in a flattened shape, or subject to further processing. Events are specified as a list of dictionaries (see:class:`~mvpa2.misc.support.Event`) for a helper class. Each dictionary contains all relevant attributes to describe an event. This is at least the ``onset`` time of an event, but can also comprise of ``duration``, ``amplitude``, and arbitrary other attributes. Boxcar event model details -------------------------- For each event all samples covering that particular event are used to form a corresponding sample. One sample for each event is returned. Event specification dictionaries must contain an ``onset`` attribute (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset). Any number of additional attributes can be present in an event specification. Those attributes are included as sample attributes in the returned dataset. Alternatively, ``onset`` and ``duration`` may also be given in a non-discrete time specification. In this case a dataset attribute needs to be specified that contains time-stamps for each input data sample, and is used to convert times into discrete sample indices (see ``match`` argument). A mapper instance can be provided (see ``event_mapper``) to implement futher processing of each event sample, for example in order to yield average samples. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None Attribute with dataset sample time-stamps. If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. For HRF modeling this argument is mandatory. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. event_offset : None or float If not None, all event ``onset`` specifications will be offset by this value before boxcar modeling is performed. event_duration : None or float If not None, all event ``duration`` specifications will be set to this value before boxcar modeling is done. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. event_mapper : Mapper This mapper is used to forward-map the dataset containing the boxcar event samples. If None (default) a FlattenMapper is employed to convert multi-dimensional sample matrices into simple one-dimensional sample vectors. This option can be used to implement temporal compression, by e.g. averaging samples within an event boxcar using an FxMapper. Any mapper needs to keep the sample axis unchanged, i.e. number and order of samples remain the same. Returns ------- Dataset One sample per each event definition that has been passed to the function. Additional event attributes are included as sample attributes. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not (event_offset is None and event_duration is None): descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) if event_offset is not None: ev['onset'] += event_offset if event_duration is not None: ev['duration'] = event_duration descr_events.append(ev) events = descr_events if time_attr is not None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if time_attr is not None: for attr in ('onset', 'duration'): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def timesegments_classification(dss, window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ part2 = NFoldPartitioner(attr='subjects') # Check if input list contains Datasets, ndarrays dss = [Dataset(ds) if not type(ds) == Dataset else ds for ds in dss] # TODO: allow for doing feature selection if do_zscore: for ds in dss: zscore(ds, chunks_attr=None) # assign .sa.subjects to those datasets for i, ds in enumerate(dss): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints(len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list( Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert (ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors_across_subjects = np.asarray(errors_across_subjects) if __debug__: debug( "BM", "Finished with %s array of errors. Mean error %.2f" % (errors_across_subjects.shape, np.mean(errors_across_subjects))) return errors_across_subjects
def test_datasetmapping(): # 6 samples, 4X2 features data = np.arange(48).reshape(6, 4, 2) ds = Dataset(data, sa={ 'timepoints': np.arange(6), 'multidim': data.copy() }, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, space='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength) + ds.shape[1:]) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s + 1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, list(range(boxlength))) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal( rds.samples, np.array([[[0, 1], [2, 3], [4, 5], [6, 7]], [[8, 9], [10, 11], [12, 13], [14, 15]], [[8, 9], [10, 11], [12, 13], [14, 15]], [[16, 17], [18, 19], [20, 21], [22, 23]], [[32, 33], [34, 35], [36, 37], [38, 39]], [[40, 41], [42, 43], [44, 45], [46, 47]]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid) # popular dataset configuration (double flatten + boxcar) cm = ChainMapper([FlattenMapper(), bm, FlattenMapper()]) cm.train(ds) bflat = ds.get_mapped(cm) assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:]))) # add attributes bflat.fa['testfa'] = np.arange(bflat.nfeatures) bflat.sa['testsa'] = np.arange(bflat.nsamples) # now try to go back bflatrev = bflat.mapper.reverse(bflat) # data should be same again, as far as the boxcars match assert_array_equal(ds.samples[:2], bflatrev.samples[:2]) assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:]) # feature axis should match assert_equal(ds.shape[1:], bflatrev.shape[1:])
def test_datasetmapping(): # 6 samples, 4X2 features data = np.arange(48).reshape(6,4,2) ds = Dataset(data, sa={'timepoints': np.arange(6), 'multidim': data.copy()}, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, space='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength) + ds.shape[1:]) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s+1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal(rds.samples, np.array([[[ 0, 1], [ 2, 3], [ 4, 5], [ 6, 7]], [[ 8, 9], [10, 11], [12, 13], [14, 15]], [[ 8, 9], [10, 11], [12, 13], [14, 15]], [[16, 17], [18, 19], [20, 21], [22, 23]], [[32, 33], [34, 35], [36, 37], [38, 39]], [[40, 41], [42, 43], [44, 45], [46, 47]]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid) # popular dataset configuration (double flatten + boxcar) cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()]) cm.train(ds) bflat = ds.get_mapped(cm) assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:]))) # add attributes bflat.fa['testfa'] = np.arange(bflat.nfeatures) bflat.sa['testsa'] = np.arange(bflat.nsamples) # now try to go back bflatrev = bflat.mapper.reverse(bflat) # data should be same again, as far as the boxcars match assert_array_equal(ds.samples[:2], bflatrev.samples[:2]) assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:]) # feature axis should match assert_equal(ds.shape[1:], bflatrev.shape[1:])
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:,0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm.forward(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T) # now test for proper data shape data = np.ones((10,3,4,2)) sp = [ 2, 4, 3, 5 ] trans = BoxcarMapper(sp, 4).forward(data) assert_equal(trans.shape, (4,4,3,4,2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [ 2, 4, 3, 5 ] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # now in a dataset ds = Dataset([singlesample]) assert_equal(ds.shape, (1,) + singlesample.shape) # after reverse mapping the 'sample axis' should vanish and the original 3d # shape of the samples should be restored assert_equal(ds.shape[1:], m.reverse(ds).shape) # multiple samples should just be concatenated along the samples axis ds = Dataset([singlesample, singlesample]) assert_equal((np.prod(ds.shape[:2]),) + singlesample.shape[1:], m.reverse(ds).shape) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def timesegments_classification(dss, hyper=None, part1=HalfPartitioner(), part2=NFoldPartitioner(attr='subjects'), window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. hyper : Hyperalignment-like, optional Beast which if called on a list of datasets should spit out trained mappers. If not specified, `IdentityMapper`s will be used part1 : Partitioner, optional Partitioner to split data for hyperalignment "cross-validation" part2 : Partitioner, optional Partitioner for CV within the hyperalignment test split window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ # Generate outer-most partitioning () parts = [copy.deepcopy(part1).generate(ds) for ds in dss] iter = 1 errors = [] while True: try: dss_partitioned = [p.next() for p in parts] except StopIteration: # we are done -- no more partitions break if __debug__: debug("BM", "Iteration %d", iter) dss_train, dss_test = zip(*[ list(Splitter("partitions").generate(ds)) for ds in dss_partitioned ]) # TODO: allow for doing feature selection if do_zscore: for ds in dss_train + dss_test: zscore(ds, chunks_attr=None) if hyper is not None: # since otherwise it would remember previous loop dataset as the "commonspace" # Now let's do hyperalignment but on a copy in each loop iteration hyper_ = copy.deepcopy(hyper) mappers = hyper_(dss_train) else: mappers = [IdentityMapper() for ds in dss_train] dss_test_aligned = [ mapper.forward(ds) for mapper, ds in zip(mappers, dss_test) ] # assign .sa.subjects to those datasets for i, ds in enumerate(dss_test_aligned): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss_test_aligned: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints( len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [ np.asscalar(np.unique(x)) for x in ds.sa[sa].value ] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list( Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert (ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef( ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors.append(errors_across_subjects) iter += 1 errors = np.array(errors) if __debug__: debug( "BM", "Finished with %s array of errors. Mean error %.2f" % (errors.shape, np.mean(errors))) return errors
def plot_erp(data, SR=500, onsets=None, pre=0.2, pre_onset=None, post=None, pre_mean=None, color='r', errcolor=None, errtype=None, ax=pl, ymult=1.0, *args, **kwargs): """Plot single ERP on existing canvas Parameters ---------- data : 1D or 2D ndarray The data array can either be 1D (samples over time) or 2D (trials x samples). In the first case a boxcar mapper is used to extract the respective trial timecourses given a list of trial onsets. In the latter case, each row of the data array is taken as the EEG signal timecourse of a particular trial. onsets : list(int) List of onsets (in samples not in seconds). SR : int, optional Sampling rate (1/s) of the signal. pre : float, optional Duration (in seconds) to be plotted prior to onset. pre_onset : float or None If data is already in epochs (2D) then pre_onset provides information on how many seconds pre-stimulus were used to generate them. If None, then pre_onset = pre post : float Duration (in seconds) to be plotted after the onset. pre_mean : float Duration (in seconds) at the beginning of the window which is used for deriving the mean of the signal. If None, pre_mean = pre. If 0, then the mean is not subtracted from the signal. errtype : None or 'ste' or 'std' or 'ci95' or list of previous three Type of error value to be computed per datapoint. 'ste' -- standard error of the mean, 'std' -- standard deviation 'ci95' -- 95% confidence interval (1.96 * ste), None -- no error margin is plotted (default) Optionally, multiple error types can be specified in a list. In that case all of them will be plotted. color : matplotlib color code, optional Color to be used for plotting the mean signal timecourse. errcolor : matplotlib color code Color to be used for plotting the error margin. If None, use main color but with weak alpha level ax : Target where to draw. ymult : float, optional Multiplier for the values. E.g. if negative-up ERP plot is needed: provide ymult=-1.0 *args, **kwargs Additional arguments to `pylab.plot`. Returns ------- array Mean ERP timeseries. """ if pre_mean is None: pre_mean = pre # set default pre_discard = 0 if onsets is not None: # if we need to extract ERPs if post is None: raise ValueError, \ "Duration post onsets must be provided if onsets are given" # trial timecourse duration duration = pre + post # We are working with a full timeline bcm = BoxcarMapper(onsets, boxlength=int(SR * duration), offset=-int(SR * pre)) erp_data = bcm(data) # override values since we are using Boxcar pre_onset = pre else: if pre_onset is None: pre_onset = pre if pre_onset < pre: warning( "Pre-stimulus interval to plot %g is smaller than provided " "pre-stimulus captured interval %g, thus plot interval was " "adjusted" % (pre, pre_onset)) pre = pre_onset if post is None: # figure out post duration = float(data.shape[1]) / SR - pre_discard post = duration - pre else: duration = pre + post erp_data = data pre_discard = pre_onset - pre # Scale the data appropriately erp_data *= ymult # validity check -- we should have 2D matrix (trials x samples) if len(erp_data.shape) != 2: raise RuntimeError, \ "plot_erp() supports either 1D data with onsets, or 2D data " \ "(trials x sample_points). Shape of the data at the point " \ "is %s" % erp_data.shape if not (pre_mean == 0 or pre_mean is None): # mean of pre-onset signal accross trials erp_baseline = np.mean(erp_data[:, int((pre_onset - pre_mean) * SR):int(pre_onset * SR)]) # center data on pre-onset mean # NOTE: make sure that we make a copy of the data to don't # alter the original. Better be safe than sorry erp_data = erp_data - erp_baseline # generate timepoints and error ranges to plot filled error area # top -> # bottom <- time_points = np.arange(erp_data.shape[1]) * 1.0 / SR - pre_onset # if pre != pre_onset if pre_discard > 0: npoints = int(pre_discard * SR) time_points = time_points[npoints:] erp_data = erp_data[:, npoints:] # select only time points of interest (if post is provided) if post is not None: npoints = int(duration * SR) time_points = time_points[:npoints] erp_data = erp_data[:, :npoints] # compute mean signal timecourse accross trials erp_mean = np.mean(erp_data, axis=0) # give sane default if errtype is None: errtype = [] if not isinstance(errtype, list): errtype = [errtype] for et in errtype: # compute error per datapoint if et in ['ste', 'ci95']: erp_stderr = erp_data.std(axis=0) / np.sqrt(len(erp_data)) if et == 'ci95': erp_stderr *= 1.96 elif et == 'std': erp_stderr = erp_data.std(axis=0) else: raise ValueError, "Unknown error type '%s'" % errtype time_points2w = np.hstack((time_points, time_points[::-1])) error_top = erp_mean + erp_stderr error_bottom = erp_mean - erp_stderr error2w = np.hstack((error_top, error_bottom[::-1])) if errcolor is None: errcolor = color # plot error margin pfill = ax.fill(time_points2w, error2w, edgecolor=errcolor, facecolor=errcolor, alpha=0.2, zorder=3) # plot mean signal timecourse ax.plot(time_points, erp_mean, lw=2, color=color, zorder=4, *args, **kwargs) # ax.xaxis.set_major_locator(pl.MaxNLocator(4)) return erp_mean
def timesegments_classification( dss, hyper=None, part1=HalfPartitioner(), part2=NFoldPartitioner(attr='subjects'), window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. hyper : Hyperalignment-like, optional Beast which if called on a list of datasets should spit out trained mappers. If not specified, `IdentityMapper`s will be used part1 : Partitioner, optional Partitioner to split data for hyperalignment "cross-validation" part2 : Partitioner, optional Partitioner for CV within the hyperalignment test split window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ # Generate outer-most partitioning () parts = [copy.deepcopy(part1).generate(ds) for ds in dss] iter = 1 errors = [] while True: try: dss_partitioned = [p.next() for p in parts] except StopIteration: # we are done -- no more partitions break if __debug__: debug("BM", "Iteration %d", iter) dss_train, dss_test = zip(*[list(Splitter("partitions").generate(ds)) for ds in dss_partitioned]) # TODO: allow for doing feature selection if do_zscore: for ds in dss_train + dss_test: zscore(ds, chunks_attr=None) if hyper is not None: # since otherwise it would remember previous loop dataset as the "commonspace" # Now let's do hyperalignment but on a copy in each loop iteration hyper_ = copy.deepcopy(hyper) mappers = hyper_(dss_train) else: mappers = [IdentityMapper() for ds in dss_train] dss_test_aligned = [mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)] # assign .sa.subjects to those datasets for i, ds in enumerate(dss_test_aligned): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss_test_aligned: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints(len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list(Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert(ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors.append(errors_across_subjects) iter += 1 errors = np.array(errors) if __debug__: debug("BM", "Finished with %s array of errors. Mean error %.2f" % (errors.shape, np.mean(errors))) return errors
def _extract_boxcar_events(ds, events=None, time_attr=None, match='prev', eprefix='event', event_mapper=None): """see eventrelated_dataset() for docs""" # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if not time_attr is None: for attr in ('onset', 'duration'): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev', eprefix='event'): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa2.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additional attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = { 'prev': 'floor', 'next': 'ceil', 'closest': 'round' }[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.' % boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + '_' + a] = ds.sa[a] if a in ['onset', 'duration']: # special case: we want the non-discrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds
def extract_boxcar_event_samples( ds, events=None, time_attr=None, match='prev', event_offset=None, event_duration=None, eprefix='event', event_mapper=None): """Segment a dataset by extracting boxcar events (Multiple) consecutive samples are extracted for each event, and are either returned in a flattened shape, or subject to further processing. Boxcar event model details -------------------------- For each event all samples covering that particular event are used to form a corresponding sample. One sample for each event is returned. Event specification dictionaries must contain an ``onset`` attribute (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset). Any number of additional attributes can be present in an event specification. Those attributes are included as sample attributes in the returned dataset. Alternatively, ``onset`` and ``duration`` may also be given in a non-discrete time specification. In this case a dataset attribute needs to be specified that contains time-stamps for each input data sample, and is used to convert times into discrete sample indices (see ``match`` argument). A mapper instance can be provided (see ``event_mapper``) to implement futher processing of each event sample, for example in order to yield average samples. Returns ------- Dataset One sample per each event definition that has been passed to the function. Additional event attributes are included as sample attributes. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = {'prev': 'floor', 'next': 'ceil', 'closest': 'round'}[match] if not (event_offset is None and event_duration is None): descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) if not event_offset is None: ev['onset'] += event_offset if not event_duration is None: ev['duration'] = event_duration descr_events.append(ev) events = descr_events if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = _events2dict(descr_events) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.'% boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute # special case onset and duration in case of conversion into descrete time if not time_attr is None: for attr in ('onset', 'duration'): evvars[attr] = [e[attr] for e in events] ds = _evvars2ds(ds, evvars, eprefix) return ds
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev', eprefix='event', event_mapper=None): """Segment a dataset into a set of events. This function can be used to extract event-related samples from any time-series based dataset (actually, it don't have to be time series, but could also be any other type of ordered samples). Boxcar-shaped event samples, potentially spanning multiple input samples can be automatically extracted using :class:`~mvpa2.misc.support.Event` definition lists. For each event all samples covering that particular event are used to form the corresponding sample. An event definition is a dictionary that contains ``onset`` (as sample index in the input dataset), ``duration`` (as number of consecutive samples after the onset), as well as an arbitrary number of additional attributes. Alternatively, ``onset`` and ``duration`` may also be given as real time stamps (or durations). In this case a to be specified samples attribute in the input dataset will be used to convert these into sample indices. Parameters ---------- ds : Dataset The samples of this input dataset have to be in whatever ascending order. events : list Each event definition has to specify ``onset`` and ``duration``. All other attributes will be passed on to the sample attributes collection of the returned dataset. time_attr : str or None If not None, the ``onset`` and ``duration`` specs from the event list will be converted using information from this sample attribute. Its values will be treated as in-the-same-unit and are used to determine corresponding samples from real-value onset and duration definitions. match : {'prev', 'next', 'closest'} Strategy used to match real-value onsets to sample indices. 'prev' chooses the closes preceding samples, 'next' the closest following sample and 'closest' to absolute closest sample. eprefix : str or None If not None, this prefix is used to name additional attributes generated by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional attributes will be created. event_mapper : Mapper This mapper is used to forward-map the dataset containing the boxcar event samples. If None (default) a FlattenMapper is employed to convert multi-dimensional sample matrices into simple one-dimensional sample vectors. This option can be used to implement temporal compression, by e.g. averaging samples within an event boxcar using an FxMapper. Any mapper needs to keep the sample axis unchanged, i.e. number and order of samples remain the same. Returns ------- Dataset The returned dataset has one sample per each event definition that has been passed to the function. Examples -------- The documentation also contains an :ref:`example script <example_eventrelated>` showing a spatio-temporal analysis of fMRI data that involves this function. >>> from mvpa2.datasets import Dataset >>> ds = Dataset(np.random.randn(10, 25)) >>> events = [{'onset': 2, 'duration': 4}, ... {'onset': 4, 'duration': 4}] >>> eds = eventrelated_dataset(ds, events) >>> len(eds) 2 >>> eds.nfeatures == ds.nfeatures * 4 True >>> 'mapper' in ds.a False >>> print eds.a.mapper <Chain: <Boxcar: bl=4>-<Flatten>> And now the same conversion, but with events specified as real time. This is on possible if the input dataset contains a sample attribute with the necessary information about the input samples. >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds)) >>> rt_events = [{'onset': 1.05, 'duration': 2.2}, ... {'onset': 2.3, 'duration': 2.12}] >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time', ... match='closest') >>> np.all(eds.samples == rt_eds.samples) True >>> # returned dataset e.g. has info from original samples >>> rt_eds.sa.record_time array([[ 1.11111111, 1.66666667, 2.22222222, 2.77777778], [ 2.22222222, 2.77777778, 3.33333333, 3.88888889]]) """ # relabel argument conv_strategy = {'prev': 'floor', 'next': 'ceil', 'closest': 'round'}[match] if not time_attr is None: tvec = ds.sa[time_attr].value # we are asked to convert onset time into sample ids descr_events = [] for ev in events: # do not mess with the input data ev = copy.deepcopy(ev) # best matching sample idx = value2idx(ev['onset'], tvec, conv_strategy) # store offset of sample time and real onset ev['orig_offset'] = ev['onset'] - tvec[idx] # rescue the real onset into a new attribute ev['orig_onset'] = ev['onset'] ev['orig_duration'] = ev['duration'] # figure out how many samples we need ev['duration'] = \ len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']]) # new onset is sample index ev['onset'] = idx descr_events.append(ev) else: descr_events = events # convert the event specs into the format expected by BoxcarMapper # take the first event as an example of contained keys evvars = {} for k in descr_events[0]: try: evvars[k] = [e[k] for e in descr_events] except KeyError: raise ValueError("Each event property must be present for all " "events (could not find '%s')" % k) # checks for p in ['onset', 'duration']: if not p in evvars: raise ValueError("'%s' is a required property for all events." % p) boxlength = max(evvars['duration']) if __debug__: if not max(evvars['duration']) == min(evvars['duration']): warning('Boxcar mapper will use maximum boxlength (%i) of all ' 'provided Events.'% boxlength) # finally create, train und use the boxcar mapper bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix) bcm.train(ds) ds = ds.get_mapped(bcm) if event_mapper is None: # at last reflatten the dataset # could we add some meaningful attribute during this mapping, i.e. would # assigning 'inspace' do something good? ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:])) else: ds = ds.get_mapped(event_mapper) # add samples attributes for the events, simply dump everything as a samples # attribute for a in evvars: if not eprefix is None and a in ds.sa: # if there is already a samples attribute like this, it got mapped # by BoxcarMapper (i.e. is multi-dimensional). We move it aside # under new `eprefix` name ds.sa[eprefix + '_' + a] = ds.sa[a] if a in ['onset', 'duration']: # special case: we want the non-discrete, original onset and # duration if not time_attr is None: # but only if there was a conversion happining, since otherwise # we get the same info from BoxcarMapper ds.sa[a] = [e[a] for e in events] else: ds.sa[a] = evvars[a] return ds