Beispiel #1
0
    def _test_compare_to_old(self):
        """Good just to compare if I didn't screw up anything... treat
        it as a regression test
        """
        import mvpa2.mappers.wavelet_ as wavelet_

        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 16                          # size of timeline for wavelet
        sp = np.arange(ds.nsamples-ws*2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        for wdm, wdm_ in ((WaveletTransformationMapper(),
                           wavelet_.WaveletTransformationMapper()),
                          (WaveletPacketMapper(),
                           wavelet_.WaveletPacketMapper()),):
            d3d_wd = wdm(d3d)
            d3d_wd_ = wdm_(d3d)

            self.assertTrue((d3d_wd == d3d_wd_).all(),
                msg="We should have got same result with old and new code. "
                    "Got %s and %s" % (d3d_wd, d3d_wd_))
    def _test_compare_to_old(self):
        """Good just to compare if I didn't screw up anything... treat
        it as a regression test
        """
        import mvpa2.mappers.wavelet_ as wavelet_

        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 16                          # size of timeline for wavelet
        sp = np.arange(ds.nsamples-ws*2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        for wdm, wdm_ in ((WaveletTransformationMapper(),
                           wavelet_.WaveletTransformationMapper()),
                          (WaveletPacketMapper(),
                           wavelet_.WaveletPacketMapper()),):
            d3d_wd = wdm(d3d)
            d3d_wd_ = wdm_(d3d)

            self.failUnless((d3d_wd == d3d_wd_).all(),
                msg="We should have got same result with old and new code. "
                    "Got %s and %s" % (d3d_wd, d3d_wd_))
Beispiel #3
0
def _extract_boxcar_events(ds, events=None, time_attr=None, match="prev", eprefix="event", event_mapper=None):
    """see eventrelated_dataset() for docs"""
    # relabel argument
    conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev["onset"], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev["orig_offset"] = ev["onset"] - tvec[idx]
            # rescue the real onset into a new attribute
            ev["orig_onset"] = ev["onset"]
            ev["orig_duration"] = ev["duration"]
            # figure out how many samples we need
            ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]])
            # new onset is sample index
            ev["onset"] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ["onset", "duration"]:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars["duration"])
    if __debug__:
        if not max(evvars["duration"]) == min(evvars["duration"]):
            warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars["onset"], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if not time_attr is None:
        for attr in ("onset", "duration"):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Beispiel #4
0
    def test_simple_wdm(self):
        """
        """
        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 15  # size of timeline for wavelet
        sp = np.arange(ds.nsamples - ws * 2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletTransformationMapper()
        d3d_wd = wdm.forward(d3d)
        d3d_swap = d3d.swapaxes(1, 2)

        self.assertRaises(ValueError,
                          WaveletTransformationMapper,
                          wavelet='bogus')
        self.assertRaises(ValueError,
                          WaveletTransformationMapper,
                          mode='bogus')

        # use wavelet mapper
        for wdm, wdm_swap in ((WaveletTransformationMapper(),
                               WaveletTransformationMapper(dim=2)),
                              (WaveletPacketMapper(),
                               WaveletPacketMapper(dim=2))):
            for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)):
                dd_wd = wdm.forward(dd)
                if dd_swap is not None:
                    dd_wd_swap = wdm_swap.forward(dd_swap)

                    self.assertTrue(
                        (dd_wd == dd_wd_swap.swapaxes(1, 2)).all(),
                        msg="We should have got same result with swapped "
                        "dimensions and explicit mentioining of it. "
                        "Got %s and %s" % (dd_wd, dd_wd_swap))

                # some sanity checks
                self.assertTrue(dd_wd.shape[0] == dd.shape[0])

                if not isinstance(wdm, WaveletPacketMapper):
                    # we can do reverse only for DWT
                    dd_rev = wdm.reverse(dd_wd)
                    # inverse transform might be not exactly as the
                    # input... but should be very close ;-)
                    self.assertEqual(dd_rev.shape,
                                     dd.shape,
                                     msg="Shape should be the same after iDWT")

                    diff = np.linalg.norm(dd - dd_rev)
                    ornorm = np.linalg.norm(dd)
                    self.assertTrue(diff / ornorm < 1e-10)
    def test_simple_wdm(self):
        """
        """
        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 15                          # size of timeline for wavelet
        sp = np.arange(ds.nsamples-ws*2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletTransformationMapper()
        d3d_wd = wdm.forward(d3d)
        d3d_swap = d3d.swapaxes(1,2)

        self.failUnlessRaises(ValueError, WaveletTransformationMapper,
                              wavelet='bogus')
        self.failUnlessRaises(ValueError, WaveletTransformationMapper,
                              mode='bogus')

        # use wavelet mapper
        for wdm, wdm_swap in ((WaveletTransformationMapper(),
                               WaveletTransformationMapper(dim=2)),
                              (WaveletPacketMapper(),
                               WaveletPacketMapper(dim=2))):
          for dd, dd_swap in ((d3d, d3d_swap),
                              (d2d, None)):
            dd_wd = wdm.forward(dd)
            if dd_swap is not None:
                dd_wd_swap = wdm_swap.forward(dd_swap)

                self.failUnless((dd_wd == dd_wd_swap.swapaxes(1,2)).all(),
                                msg="We should have got same result with swapped "
                                "dimensions and explicit mentioining of it. "
                                "Got %s and %s" % (dd_wd, dd_wd_swap))

            # some sanity checks
            self.failUnless(dd_wd.shape[0] == dd.shape[0])

            if not isinstance(wdm, WaveletPacketMapper):
                # we can do reverse only for DWT
                dd_rev = wdm.reverse(dd_wd)
                # inverse transform might be not exactly as the
                # input... but should be very close ;-)
                self.failUnlessEqual(dd_rev.shape, dd.shape,
                                     msg="Shape should be the same after iDWT")

                diff = np.linalg.norm(dd - dd_rev)
                ornorm = np.linalg.norm(dd)
                self.failUnless(diff/ornorm < 1e-10)
Beispiel #6
0
def test_state_cycle_with_custom_reduce(fname):
    # BoxcarMapper has a custom __reduce__ implementation . The 'space'
    # setting will only survive a svae/load cycle if the state is correctly
    # handle for custom reduce iplementations.
    bm = BoxcarMapper([0], 1, space='boxy')
    h5save(fname, bm)
    bm_rl = h5load(fname)
    assert_equal(bm_rl.get_space(), 'boxy')
Beispiel #7
0
    def test_simple_wp1_level(self):
        """
        """

        ds = datasets['uni2large']
        d2d = ds.samples
        ws = 50  # size of timeline for wavelet
        sp = (np.arange(ds.nsamples - ws * 2) + ws)[:4]

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletPacketMapper(level=2, wavelet='sym2')
        d3d_wd = wdm.forward(d3d)

        # Check dimensionality
        d3d_wds, d3ds = d3d_wd.shape, d3d.shape
        self.assertTrue(len(d3d_wds) == len(d3ds) + 1)
        self.assertTrue(d3d_wds[1] * d3d_wds[2] >= d3ds[1])
        self.assertTrue(d3d_wds[0] == d3ds[0])
        self.assertTrue(d3d_wds[-1] == d3ds[-1])
        #print d2d.shape, d3d.shape, d3d_wd.shape

        if externals.exists('pywt wp reconstruct'):
            # Test reverse -- should be identical
            # we can do reverse only for DWT
            d3d_rev = wdm.reverse(d3d_wd)

            # inverse transform might be not exactly as the
            # input... but should be very close ;-)
            self.assertEqual(d3d_rev.shape,
                             d3d.shape,
                             msg="Shape should be the same after iDWT")

            diff = np.linalg.norm(d3d - d3d_rev)
            ornorm = np.linalg.norm(d3d)

            skip_if_no_external('pywt wp reconstruct fixed')
            self.assertTrue(diff / ornorm < 1e-10)
        else:
            self.assertRaises(NotImplementedError, wdm.reverse, d3d_wd)
    def test_simple_wp1_level(self):
        """
        """

        ds = datasets['uni2large']
        d2d = ds.samples
        ws = 50                          # size of timeline for wavelet
        sp = (np.arange(ds.nsamples - ws*2) + ws)[:4]

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletPacketMapper(level=2, wavelet='sym2')
        d3d_wd = wdm.forward(d3d)

        # Check dimensionality
        d3d_wds, d3ds = d3d_wd.shape, d3d.shape
        self.failUnless(len(d3d_wds) == len(d3ds)+1)
        self.failUnless(d3d_wds[1] * d3d_wds[2] >= d3ds[1])
        self.failUnless(d3d_wds[0] == d3ds[0])
        self.failUnless(d3d_wds[-1] == d3ds[-1])
        #print d2d.shape, d3d.shape, d3d_wd.shape

        if externals.exists('pywt wp reconstruct'):
            # Test reverse -- should be identical
            # we can do reverse only for DWT
            d3d_rev = wdm.reverse(d3d_wd)

            # inverse transform might be not exactly as the
            # input... but should be very close ;-)
            self.failUnlessEqual(d3d_rev.shape, d3d.shape,
                                 msg="Shape should be the same after iDWT")

            diff = np.linalg.norm(d3d - d3d_rev)
            ornorm = np.linalg.norm(d3d)

            skip_if_no_external('pywt wp reconstruct fixed')
            self.failUnless(diff/ornorm < 1e-10)
        else:
            self.failUnlessRaises(NotImplementedError, wdm.reverse, d3d_wd)
Beispiel #9
0
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:, 0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm.forward(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(),
                       np.vstack((np.arange(9), np.arange(9) + 1)).T)

    # now test for proper data shape
    data = np.ones((10, 3, 4, 2))
    sp = [2, 4, 3, 5]
    trans = BoxcarMapper(sp, 4).forward(data)
    assert_equal(trans.shape, (4, 4, 3, 4, 2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [2, 4, 3, 5]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # now in a dataset
    ds = Dataset([singlesample])
    assert_equal(ds.shape, (1, ) + singlesample.shape)
    # after reverse mapping the 'sample axis' should vanish and the original 3d
    # shape of the samples should be restored
    assert_equal(ds.shape[1:], m.reverse(ds).shape)
    # multiple samples should just be concatenated along the samples axis
    ds = Dataset([singlesample, singlesample])
    assert_equal((np.prod(ds.shape[:2]), ) + singlesample.shape[1:],
                 m.reverse(ds).shape)
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
Beispiel #10
0
def extract_boxcar_event_samples(ds,
                                 events=None,
                                 time_attr=None,
                                 match='prev',
                                 event_offset=None,
                                 event_duration=None,
                                 eprefix='event',
                                 event_mapper=None):
    """Segment a dataset by extracting boxcar events

    (Multiple) consecutive samples are extracted for each event, and are either
    returned in a flattened shape, or subject to further processing.

    Events are specified as a list of dictionaries
    (see:class:`~mvpa2.misc.support.Event`) for a helper class. Each dictionary
    contains all relevant attributes to describe an event. This is at least the
    ``onset`` time of an event, but can also comprise of ``duration``,
    ``amplitude``, and arbitrary other attributes.

    Boxcar event model details
    --------------------------

    For each event all samples covering that particular event are used to form
    a corresponding sample. One sample for each event is returned. Event
    specification dictionaries must contain an ``onset`` attribute (as sample
    index in the input dataset), ``duration`` (as number of consecutive samples
    after the onset). Any number of additional attributes can be present in an
    event specification. Those attributes are included as sample attributes in
    the returned dataset.

    Alternatively, ``onset`` and ``duration`` may also be given in a
    non-discrete time specification. In this case a dataset attribute needs to
    be specified that contains time-stamps for each input data sample, and is
    used to convert times into discrete sample indices (see ``match``
    argument).

    A mapper instance can be provided (see ``event_mapper``) to implement
    futher processing of each event sample, for example in order to yield
    average samples.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All
      other attributes will be passed on to the sample attributes collection of
      the returned dataset.
    time_attr : str or None
      Attribute with dataset sample time-stamps.
      If not None, the ``onset`` and ``duration`` specs
      from the event list will be converted using information from this sample
      attribute. Its values will be treated as in-the-same-unit and are used to
      determine corresponding samples from real-value onset and duration
      definitions.
      For HRF modeling this argument is mandatory.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample
      indices. 'prev' chooses the closes preceding samples, 'next' the closest
      following sample and 'closest' to absolute closest sample.
    event_offset : None or float
      If not None, all event ``onset`` specifications will be offset by this
      value before boxcar modeling is performed.
    event_duration : None or float
      If not None, all event ``duration`` specifications will be set to this
      value before boxcar modeling is done.
    eprefix : str or None
      If not None, this prefix is used to name additional
      attributes generated by the underlying
      `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional
      attributes will be created.
    event_mapper : Mapper
      This mapper is used to forward-map the dataset containing the boxcar event
      samples. If None (default) a FlattenMapper is employed to convert
      multi-dimensional sample matrices into simple one-dimensional sample
      vectors. This option can be used to implement temporal compression, by
      e.g. averaging samples within an event boxcar using an FxMapper. Any
      mapper needs to keep the sample axis unchanged, i.e. number and order of
      samples remain the same.

    Returns
    -------
    Dataset
      One sample per each event definition that has been passed to the
      function. Additional event attributes are included as sample attributes.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {
        'prev': 'floor',
        'next': 'ceil',
        'closest': 'round'
    }[match]

    if not (event_offset is None and event_duration is None):
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            if event_offset is not None:
                ev['onset'] += event_offset
            if event_duration is not None:
                ev['duration'] = event_duration
            descr_events.append(ev)
        events = descr_events

    if time_attr is not None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.' % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if time_attr is not None:
        for attr in ('onset', 'duration'):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Beispiel #11
0
def timesegments_classification(dss,
                                window_size=6,
                                overlapping_windows=True,
                                distance='correlation',
                                do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    part2 = NFoldPartitioner(attr='subjects')
    # Check if input list contains Datasets, ndarrays
    dss = [Dataset(ds) if not type(ds) == Dataset else ds for ds in dss]
    # TODO:  allow for doing feature selection
    if do_zscore:
        for ds in dss:
            zscore(ds, chunks_attr=None)

    # assign .sa.subjects to those datasets
    for i, ds in enumerate(dss):
        # part2.attr is by default "subjects"
        ds.sa[part2.attr] = [i]

    dss_test_bc = []
    for ds in dss:
        if overlapping_windows:
            startpoints = range(len(ds) - window_size + 1)
        else:
            startpoints = _get_nonoverlapping_startpoints(len(ds), window_size)
        bm = BoxcarMapper(startpoints, window_size)
        bm.train(ds)
        ds_ = bm.forward(ds)
        ds_.sa['startpoints'] = startpoints

        # reassign subjects so they are not arrays
        def assign_unique(ds, sa):
            ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value]

        assign_unique(ds_, part2.attr)

        fm = FlattenMapper()
        fm.train(ds_)
        dss_test_bc.append(ds_.get_mapped(fm))

    ds_test = vstack(dss_test_bc)
    # Perform classification across subjects comparing against mean
    # spatio-temporal pattern of other subjects
    errors_across_subjects = []
    for ds_test_part in part2.generate(ds_test):
        ds_train_, ds_test_ = list(
            Splitter("partitions").generate(ds_test_part))
        # average across subjects to get a representative pattern per timepoint
        ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
        assert (ds_train_.shape == ds_test_.shape)

        if distance == 'correlation':
            # TODO: redo more efficiently since now we are creating full
            # corrcoef matrix.  Also we might better just take a name for
            # the pdist measure but then implement them efficiently
            # (i.e. without hstacking both pieces together first)
            dist = 1 - np.corrcoef(ds_train_,
                                   ds_test_)[len(ds_test_):, :len(ds_test_)]
        else:
            raise NotImplementedError

        if overlapping_windows:
            dist = wipe_out_offdiag(dist, window_size)

        winners = np.argmin(dist, axis=1)
        error = np.mean(winners != np.arange(len(winners)))
        errors_across_subjects.append(error)

    errors_across_subjects = np.asarray(errors_across_subjects)
    if __debug__:
        debug(
            "BM", "Finished with %s array of errors. Mean error %.2f" %
            (errors_across_subjects.shape, np.mean(errors_across_subjects)))
    return errors_across_subjects
Beispiel #12
0
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6, 4, 2)
    ds = Dataset(data,
                 sa={
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 },
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()),
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, list(range(boxlength)))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(
        rds.samples,
        np.array([[[0, 1], [2, 3], [4, 5], [6, 7]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[16, 17], [18, 19], [20, 21], [22, 23]],
                  [[32, 33], [34, 35], [36, 37], [38, 39]],
                  [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm = ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    cm.train(ds)
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape,
                 (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
Beispiel #13
0
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6,4,2)
    ds = Dataset(data,
                 sa={'timepoints': np.arange(6),
                     'multidim': data.copy()},
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
            (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s+1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(rds.samples,
                       np.array([[[ 0,  1], [ 2,  3], [ 4,  5], [ 6,  7]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[16, 17], [18, 19], [20, 21], [22, 23]],
                                 [[32, 33], [34, 35], [36, 37], [38, 39]],
                                 [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    cm.train(ds)
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
Beispiel #14
0
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:,0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm.forward(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T)


    # now test for proper data shape
    data = np.ones((10,3,4,2))
    sp = [ 2, 4, 3, 5 ]
    trans = BoxcarMapper(sp, 4).forward(data)
    assert_equal(trans.shape, (4,4,3,4,2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [ 2, 4, 3, 5 ]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # now in a dataset
    ds = Dataset([singlesample])
    assert_equal(ds.shape, (1,) + singlesample.shape)
    # after reverse mapping the 'sample axis' should vanish and the original 3d
    # shape of the samples should be restored
    assert_equal(ds.shape[1:], m.reverse(ds).shape)
    # multiple samples should just be concatenated along the samples axis
    ds = Dataset([singlesample, singlesample])
    assert_equal((np.prod(ds.shape[:2]),) + singlesample.shape[1:],
                 m.reverse(ds).shape)
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def timesegments_classification(dss,
                                hyper=None,
                                part1=HalfPartitioner(),
                                part2=NFoldPartitioner(attr='subjects'),
                                window_size=6,
                                overlapping_windows=True,
                                distance='correlation',
                                do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    hyper : Hyperalignment-like, optional
       Beast which if called on a list of datasets should spit out trained
       mappers.  If not specified, `IdentityMapper`s will be used
    part1 : Partitioner, optional
       Partitioner to split data for hyperalignment "cross-validation"
    part2 : Partitioner, optional
       Partitioner for CV within the hyperalignment test split
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    # Generate outer-most partitioning ()
    parts = [copy.deepcopy(part1).generate(ds) for ds in dss]

    iter = 1
    errors = []

    while True:
        try:
            dss_partitioned = [p.next() for p in parts]
        except StopIteration:
            # we are done -- no more partitions
            break
        if __debug__:
            debug("BM", "Iteration %d", iter)

        dss_train, dss_test = zip(*[
            list(Splitter("partitions").generate(ds)) for ds in dss_partitioned
        ])

        # TODO:  allow for doing feature selection

        if do_zscore:
            for ds in dss_train + dss_test:
                zscore(ds, chunks_attr=None)

        if hyper is not None:
            # since otherwise it would remember previous loop dataset as the "commonspace"
            # Now let's do hyperalignment but on a copy in each loop iteration
            hyper_ = copy.deepcopy(hyper)
            mappers = hyper_(dss_train)
        else:
            mappers = [IdentityMapper() for ds in dss_train]

        dss_test_aligned = [
            mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)
        ]

        # assign .sa.subjects to those datasets
        for i, ds in enumerate(dss_test_aligned):
            # part2.attr is by default "subjects"
            ds.sa[part2.attr] = [i]

        dss_test_bc = []
        for ds in dss_test_aligned:
            if overlapping_windows:
                startpoints = range(len(ds) - window_size + 1)
            else:
                startpoints = _get_nonoverlapping_startpoints(
                    len(ds), window_size)
            bm = BoxcarMapper(startpoints, window_size)
            bm.train(ds)
            ds_ = bm.forward(ds)
            ds_.sa['startpoints'] = startpoints

            # reassign subjects so they are not arrays
            def assign_unique(ds, sa):
                ds.sa[sa] = [
                    np.asscalar(np.unique(x)) for x in ds.sa[sa].value
                ]

            assign_unique(ds_, part2.attr)

            fm = FlattenMapper()
            fm.train(ds_)
            dss_test_bc.append(ds_.get_mapped(fm))

        ds_test = vstack(dss_test_bc)
        # Perform classification across subjects comparing against mean
        # spatio-temporal pattern of other subjects
        errors_across_subjects = []
        for ds_test_part in part2.generate(ds_test):
            ds_train_, ds_test_ = list(
                Splitter("partitions").generate(ds_test_part))
            # average across subjects to get a representative pattern per timepoint
            ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
            assert (ds_train_.shape == ds_test_.shape)

            if distance == 'correlation':
                # TODO: redo more efficiently since now we are creating full
                # corrcoef matrix.  Also we might better just take a name for
                # the pdist measure but then implement them efficiently
                # (i.e. without hstacking both pieces together first)
                dist = 1 - np.corrcoef(
                    ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)]
            else:
                raise NotImplementedError

            if overlapping_windows:
                dist = wipe_out_offdiag(dist, window_size)

            winners = np.argmin(dist, axis=1)
            error = np.mean(winners != np.arange(len(winners)))
            errors_across_subjects.append(error)
        errors.append(errors_across_subjects)
        iter += 1

    errors = np.array(errors)
    if __debug__:
        debug(
            "BM", "Finished with %s array of errors. Mean error %.2f" %
            (errors.shape, np.mean(errors)))
    return errors
Beispiel #16
0
def plot_erp(data,
             SR=500,
             onsets=None,
             pre=0.2,
             pre_onset=None,
             post=None,
             pre_mean=None,
             color='r',
             errcolor=None,
             errtype=None,
             ax=pl,
             ymult=1.0,
             *args,
             **kwargs):
    """Plot single ERP on existing canvas

    Parameters
    ----------
    data : 1D or 2D ndarray
      The data array can either be 1D (samples over time) or 2D
      (trials x samples). In the first case a boxcar mapper is used to
      extract the respective trial timecourses given a list of trial onsets.
      In the latter case, each row of the data array is taken as the EEG
      signal timecourse of a particular trial.
    onsets : list(int)
      List of onsets (in samples not in seconds).
    SR : int, optional
      Sampling rate (1/s) of the signal.
    pre : float, optional
      Duration (in seconds) to be plotted prior to onset.
    pre_onset : float or None
      If data is already in epochs (2D) then pre_onset provides information
      on how many seconds pre-stimulus were used to generate them. If None,
      then pre_onset = pre
    post : float
      Duration (in seconds) to be plotted after the onset.
    pre_mean : float
      Duration (in seconds) at the beginning of the window which is used
      for deriving the mean of the signal. If None, pre_mean = pre. If 0,
      then the mean is not subtracted from the signal.
    errtype : None or 'ste' or 'std' or 'ci95' or list of previous three
      Type of error value to be computed per datapoint.  'ste' --
      standard error of the mean, 'std' -- standard deviation 'ci95'
      -- 95% confidence interval (1.96 * ste), None -- no error margin
      is plotted (default)
      Optionally, multiple error types can be specified in a list. In that
      case all of them will be plotted.
    color : matplotlib color code, optional
      Color to be used for plotting the mean signal timecourse.
    errcolor : matplotlib color code
      Color to be used for plotting the error margin. If None, use main color
      but with weak alpha level
    ax :
      Target where to draw.
    ymult : float, optional
      Multiplier for the values. E.g. if negative-up ERP plot is needed:
      provide ymult=-1.0
    *args, **kwargs
      Additional arguments to `pylab.plot`.

    Returns
    -------
    array
      Mean ERP timeseries.
    """
    if pre_mean is None:
        pre_mean = pre

    # set default
    pre_discard = 0

    if onsets is not None:  # if we need to extract ERPs
        if post is None:
            raise ValueError, \
                  "Duration post onsets must be provided if onsets are given"
        # trial timecourse duration
        duration = pre + post

        # We are working with a full timeline
        bcm = BoxcarMapper(onsets,
                           boxlength=int(SR * duration),
                           offset=-int(SR * pre))
        erp_data = bcm(data)

        # override values since we are using Boxcar
        pre_onset = pre
    else:
        if pre_onset is None:
            pre_onset = pre

        if pre_onset < pre:
            warning(
                "Pre-stimulus interval to plot %g is smaller than provided "
                "pre-stimulus captured interval %g, thus plot interval was "
                "adjusted" % (pre, pre_onset))
            pre = pre_onset

        if post is None:
            # figure out post
            duration = float(data.shape[1]) / SR - pre_discard
            post = duration - pre
        else:
            duration = pre + post

        erp_data = data
        pre_discard = pre_onset - pre

    # Scale the data appropriately
    erp_data *= ymult

    # validity check -- we should have 2D matrix (trials x samples)
    if len(erp_data.shape) != 2:
        raise RuntimeError, \
              "plot_erp() supports either 1D data with onsets, or 2D data " \
              "(trials x sample_points). Shape of the data at the point " \
              "is %s" % erp_data.shape

    if not (pre_mean == 0 or pre_mean is None):
        # mean of pre-onset signal accross trials
        erp_baseline = np.mean(erp_data[:,
                                        int((pre_onset - pre_mean) *
                                            SR):int(pre_onset * SR)])
        # center data on pre-onset mean
        # NOTE: make sure that we make a copy of the data to don't
        #       alter the original. Better be safe than sorry
        erp_data = erp_data - erp_baseline

    # generate timepoints and error ranges to plot filled error area
    # top ->
    # bottom <-
    time_points = np.arange(erp_data.shape[1]) * 1.0 / SR - pre_onset

    # if pre != pre_onset
    if pre_discard > 0:
        npoints = int(pre_discard * SR)
        time_points = time_points[npoints:]
        erp_data = erp_data[:, npoints:]

    # select only time points of interest (if post is provided)
    if post is not None:
        npoints = int(duration * SR)
        time_points = time_points[:npoints]
        erp_data = erp_data[:, :npoints]

    # compute mean signal timecourse accross trials
    erp_mean = np.mean(erp_data, axis=0)

    # give sane default
    if errtype is None:
        errtype = []
    if not isinstance(errtype, list):
        errtype = [errtype]

    for et in errtype:
        # compute error per datapoint
        if et in ['ste', 'ci95']:
            erp_stderr = erp_data.std(axis=0) / np.sqrt(len(erp_data))
            if et == 'ci95':
                erp_stderr *= 1.96
        elif et == 'std':
            erp_stderr = erp_data.std(axis=0)
        else:
            raise ValueError, "Unknown error type '%s'" % errtype

        time_points2w = np.hstack((time_points, time_points[::-1]))

        error_top = erp_mean + erp_stderr
        error_bottom = erp_mean - erp_stderr
        error2w = np.hstack((error_top, error_bottom[::-1]))

        if errcolor is None:
            errcolor = color

        # plot error margin
        pfill = ax.fill(time_points2w,
                        error2w,
                        edgecolor=errcolor,
                        facecolor=errcolor,
                        alpha=0.2,
                        zorder=3)

    # plot mean signal timecourse
    ax.plot(time_points,
            erp_mean,
            lw=2,
            color=color,
            zorder=4,
            *args,
            **kwargs)
    #    ax.xaxis.set_major_locator(pl.MaxNLocator(4))
    return erp_mean
Beispiel #17
0
def timesegments_classification(
        dss,
        hyper=None,
        part1=HalfPartitioner(),
        part2=NFoldPartitioner(attr='subjects'),
        window_size=6,
        overlapping_windows=True,
        distance='correlation',
        do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    hyper : Hyperalignment-like, optional
       Beast which if called on a list of datasets should spit out trained
       mappers.  If not specified, `IdentityMapper`s will be used
    part1 : Partitioner, optional
       Partitioner to split data for hyperalignment "cross-validation"
    part2 : Partitioner, optional
       Partitioner for CV within the hyperalignment test split
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    # Generate outer-most partitioning ()
    parts = [copy.deepcopy(part1).generate(ds) for ds in dss]

    iter = 1
    errors = []

    while True:
        try:
            dss_partitioned = [p.next() for p in parts]
        except StopIteration:
            # we are done -- no more partitions
            break
        if __debug__:
            debug("BM", "Iteration %d", iter)

        dss_train, dss_test = zip(*[list(Splitter("partitions").generate(ds))
                                    for ds in dss_partitioned])

        # TODO:  allow for doing feature selection

        if do_zscore:
            for ds in dss_train + dss_test:
                zscore(ds, chunks_attr=None)

        if hyper is not None:
            # since otherwise it would remember previous loop dataset as the "commonspace"
            # Now let's do hyperalignment but on a copy in each loop iteration
            hyper_ = copy.deepcopy(hyper)
            mappers = hyper_(dss_train)
        else:
            mappers = [IdentityMapper() for ds in dss_train]

        dss_test_aligned = [mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)]

        # assign .sa.subjects to those datasets
        for i, ds in enumerate(dss_test_aligned):
            # part2.attr is by default "subjects"
            ds.sa[part2.attr] = [i]

        dss_test_bc = []
        for ds in dss_test_aligned:
            if overlapping_windows:
                startpoints = range(len(ds) - window_size + 1)
            else:
                startpoints = _get_nonoverlapping_startpoints(len(ds), window_size)
            bm = BoxcarMapper(startpoints, window_size)
            bm.train(ds)
            ds_ = bm.forward(ds)
            ds_.sa['startpoints'] = startpoints
            # reassign subjects so they are not arrays
            def assign_unique(ds, sa):
                ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value]
            assign_unique(ds_, part2.attr)

            fm = FlattenMapper()
            fm.train(ds_)
            dss_test_bc.append(ds_.get_mapped(fm))

        ds_test = vstack(dss_test_bc)
        # Perform classification across subjects comparing against mean
        # spatio-temporal pattern of other subjects
        errors_across_subjects = []
        for ds_test_part in part2.generate(ds_test):
            ds_train_, ds_test_ = list(Splitter("partitions").generate(ds_test_part))
            # average across subjects to get a representative pattern per timepoint
            ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
            assert(ds_train_.shape == ds_test_.shape)

            if distance == 'correlation':
                # TODO: redo more efficiently since now we are creating full
                # corrcoef matrix.  Also we might better just take a name for
                # the pdist measure but then implement them efficiently
                # (i.e. without hstacking both pieces together first)
                dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)]
            else:
                raise NotImplementedError

            if overlapping_windows:
                dist = wipe_out_offdiag(dist, window_size)

            winners = np.argmin(dist, axis=1)
            error = np.mean(winners != np.arange(len(winners)))
            errors_across_subjects.append(error)
        errors.append(errors_across_subjects)
        iter += 1

    errors = np.array(errors)
    if __debug__:
        debug("BM", "Finished with %s array of errors. Mean error %.2f"
              % (errors.shape, np.mean(errors)))
    return errors
Beispiel #18
0
def _extract_boxcar_events(ds,
                           events=None,
                           time_attr=None,
                           match='prev',
                           eprefix='event',
                           event_mapper=None):
    """see eventrelated_dataset() for docs"""
    # relabel argument
    conv_strategy = {
        'prev': 'floor',
        'next': 'ceil',
        'closest': 'round'
    }[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.' % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if not time_attr is None:
        for attr in ('onset', 'duration'):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Beispiel #19
0
def eventrelated_dataset(ds,
                         events=None,
                         time_attr=None,
                         match='prev',
                         eprefix='event'):
    """Segment a dataset into a set of events.

    This function can be used to extract event-related samples from any
    time-series based dataset (actually, it don't have to be time series, but
    could also be any other type of ordered samples). Boxcar-shaped event
    samples, potentially spanning multiple input samples can be automatically
    extracted using :class:`~mvpa2.misc.support.Event` definition lists.  For
    each event all samples covering that particular event are used to form the
    corresponding sample.

    An event definition is a dictionary that contains ``onset`` (as sample index
    in the input dataset), ``duration`` (as number of consecutive samples after
    the onset), as well as an arbitrary number of additional attributes.

    Alternatively, ``onset`` and ``duration`` may also be given as real time
    stamps (or durations). In this case a to be specified samples attribute in
    the input dataset will be used to convert these into sample indices.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All other
      attributes will be passed on to the sample attributes collection of the
      returned dataset.
    time_attr : str or None
      If not None, the ``onset`` and ``duration`` specs from the event list will
      be converted using information from this sample attribute. Its values will
      be treated as in-the-same-unit and are used to determine corresponding
      samples from real-value onset and duration definitions.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample indices. 'prev' chooses
      the closes preceding samples, 'next' the closest following sample and
      'closest' to absolute closest sample.
    eprefix : str or None
      If not None, this prefix is used to name additional attributes generated
      by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to
      None, no additional attributes will be created.

    Returns
    -------
    Dataset
      The returned dataset has one sample per each event definition that has
      been passed to the function.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {
        'prev': 'floor',
        'next': 'ceil',
        'closest': 'round'
    }[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = {}
    for k in descr_events[0]:
        try:
            evvars[k] = [e[k] for e in descr_events]
        except KeyError:
            raise ValueError("Each event property must be present for all "
                             "events (could not find '%s')" % k)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.' % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    # at last reflatten the dataset
    # could we add some meaningful attribute during this mapping, i.e. would
    # assigning 'inspace' do something good?
    ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    for a in evvars:
        if not eprefix is None and a in ds.sa:
            # if there is already a samples attribute like this, it got mapped
            # by BoxcarMapper (i.e. is multi-dimensional). We move it aside
            # under new `eprefix` name
            ds.sa[eprefix + '_' + a] = ds.sa[a]
        if a in ['onset', 'duration']:
            # special case: we want the non-discrete, original onset and
            # duration
            if not time_attr is None:
                # but only if there was a conversion happining, since otherwise
                # we get the same info from BoxcarMapper
                ds.sa[a] = [e[a] for e in events]
        else:
            ds.sa[a] = evvars[a]
    return ds
Beispiel #20
0
def extract_boxcar_event_samples(
        ds, events=None, time_attr=None, match='prev',
        event_offset=None, event_duration=None,
        eprefix='event', event_mapper=None):
    """Segment a dataset by extracting boxcar events

    (Multiple) consecutive samples are extracted for each event, and are either
    returned in a flattened shape, or subject to further processing.


    Boxcar event model details
    --------------------------

    For each event all samples covering that particular event are used to form
    a corresponding sample. One sample for each event is returned. Event
    specification dictionaries must contain an ``onset`` attribute (as sample
    index in the input dataset), ``duration`` (as number of consecutive samples
    after the onset). Any number of additional attributes can be present in an
    event specification. Those attributes are included as sample attributes in
    the returned dataset.

    Alternatively, ``onset`` and ``duration`` may also be given in a
    non-discrete time specification. In this case a dataset attribute needs to
    be specified that contains time-stamps for each input data sample, and is
    used to convert times into discrete sample indices (see ``match``
    argument).

    A mapper instance can be provided (see ``event_mapper``) to implement
    futher processing of each event sample, for example in order to yield
    average samples.

    Returns
    -------
    Dataset
      One sample per each event definition that has been passed to the
      function. Additional event attributes are included as sample attributes.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {'prev': 'floor',
                     'next': 'ceil',
                     'closest': 'round'}[match]

    if not (event_offset is None and event_duration is None):
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            if not event_offset is None:
                ev['onset'] += event_offset
            if not event_duration is None:
                ev['duration'] = event_duration
            descr_events.append(ev)
        events = descr_events

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events."
                             % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.'% boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if not time_attr is None:
        for attr in ('onset', 'duration'):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Beispiel #21
0
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev',
                         eprefix='event', event_mapper=None):
    """Segment a dataset into a set of events.

    This function can be used to extract event-related samples from any
    time-series based dataset (actually, it don't have to be time series, but
    could also be any other type of ordered samples). Boxcar-shaped event
    samples, potentially spanning multiple input samples can be automatically
    extracted using :class:`~mvpa2.misc.support.Event` definition lists.  For
    each event all samples covering that particular event are used to form the
    corresponding sample.

    An event definition is a dictionary that contains ``onset`` (as sample index
    in the input dataset), ``duration`` (as number of consecutive samples after
    the onset), as well as an arbitrary number of additional attributes.

    Alternatively, ``onset`` and ``duration`` may also be given as real time
    stamps (or durations). In this case a to be specified samples attribute in
    the input dataset will be used to convert these into sample indices.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All other
      attributes will be passed on to the sample attributes collection of the
      returned dataset.
    time_attr : str or None
      If not None, the ``onset`` and ``duration`` specs from the event list will
      be converted using information from this sample attribute. Its values will
      be treated as in-the-same-unit and are used to determine corresponding
      samples from real-value onset and duration definitions.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample indices. 'prev' chooses
      the closes preceding samples, 'next' the closest following sample and
      'closest' to absolute closest sample.
    eprefix : str or None
      If not None, this prefix is used to name additional attributes generated
      by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to
      None, no additional attributes will be created.
    event_mapper : Mapper
      This mapper is used to forward-map the dataset containing the boxcar event
      samples. If None (default) a FlattenMapper is employed to convert
      multi-dimensional sample matrices into simple one-dimensional sample
      vectors. This option can be used to implement temporal compression, by
      e.g. averaging samples within an event boxcar using an FxMapper. Any
      mapper needs to keep the sample axis unchanged, i.e. number and order of
      samples remain the same.

    Returns
    -------
    Dataset
      The returned dataset has one sample per each event definition that has
      been passed to the function.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {'prev': 'floor',
                     'next': 'ceil',
                     'closest': 'round'}[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = {}
    for k in descr_events[0]:
        try:
            evvars[k] = [e[k] for e in descr_events]
        except KeyError:
            raise ValueError("Each event property must be present for all "
                             "events (could not find '%s')" % k)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events."
                             % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.'% boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    for a in evvars:
        if not eprefix is None and a in ds.sa:
            # if there is already a samples attribute like this, it got mapped
            # by BoxcarMapper (i.e. is multi-dimensional). We move it aside
            # under new `eprefix` name
            ds.sa[eprefix + '_' + a] = ds.sa[a]
        if a in ['onset', 'duration']:
            # special case: we want the non-discrete, original onset and
            # duration
            if not time_attr is None:
                # but only if there was a conversion happining, since otherwise
                # we get the same info from BoxcarMapper
                ds.sa[a] = [e[a] for e in events]
        else:
            ds.sa[a] = evvars[a]
    return ds