Esempio n. 1
0
def _extract_boxcar_events(ds, events=None, time_attr=None, match="prev", eprefix="event", event_mapper=None):
    """see eventrelated_dataset() for docs"""
    # relabel argument
    conv_strategy = {"prev": "floor", "next": "ceil", "closest": "round"}[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev["onset"], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev["orig_offset"] = ev["onset"] - tvec[idx]
            # rescue the real onset into a new attribute
            ev["orig_onset"] = ev["onset"]
            ev["orig_duration"] = ev["duration"]
            # figure out how many samples we need
            ev["duration"] = len(tvec[idx:][tvec[idx:] < ev["onset"] + ev["duration"]])
            # new onset is sample index
            ev["onset"] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ["onset", "duration"]:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars["duration"])
    if __debug__:
        if not max(evvars["duration"]) == min(evvars["duration"]):
            warning("Boxcar mapper will use maximum boxlength (%i) of all " "provided Events." % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars["onset"], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if not time_attr is None:
        for attr in ("onset", "duration"):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Esempio n. 2
0
def extract_boxcar_event_samples(
        ds, events=None, time_attr=None, match='prev',
        event_offset=None, event_duration=None,
        eprefix='event', event_mapper=None):
    """Segment a dataset by extracting boxcar events

    (Multiple) consecutive samples are extracted for each event, and are either
    returned in a flattened shape, or subject to further processing.


    Boxcar event model details
    --------------------------

    For each event all samples covering that particular event are used to form
    a corresponding sample. One sample for each event is returned. Event
    specification dictionaries must contain an ``onset`` attribute (as sample
    index in the input dataset), ``duration`` (as number of consecutive samples
    after the onset). Any number of additional attributes can be present in an
    event specification. Those attributes are included as sample attributes in
    the returned dataset.

    Alternatively, ``onset`` and ``duration`` may also be given in a
    non-discrete time specification. In this case a dataset attribute needs to
    be specified that contains time-stamps for each input data sample, and is
    used to convert times into discrete sample indices (see ``match``
    argument).

    A mapper instance can be provided (see ``event_mapper``) to implement
    futher processing of each event sample, for example in order to yield
    average samples.

    Returns
    -------
    Dataset
      One sample per each event definition that has been passed to the
      function. Additional event attributes are included as sample attributes.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {'prev': 'floor',
                     'next': 'ceil',
                     'closest': 'round'}[match]

    if not (event_offset is None and event_duration is None):
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            if not event_offset is None:
                ev['onset'] += event_offset
            if not event_duration is None:
                ev['duration'] = event_duration
            descr_events.append(ev)
        events = descr_events

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events."
                             % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.'% boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if not time_attr is None:
        for attr in ('onset', 'duration'):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Esempio n. 3
0
def _extract_boxcar_events(ds,
                           events=None,
                           time_attr=None,
                           match='prev',
                           eprefix='event',
                           event_mapper=None):
    """see eventrelated_dataset() for docs"""
    # relabel argument
    conv_strategy = {
        'prev': 'floor',
        'next': 'ceil',
        'closest': 'round'
    }[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.' % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if not time_attr is None:
        for attr in ('onset', 'duration'):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Esempio n. 4
0
def extract_boxcar_event_samples(ds,
                                 events=None,
                                 time_attr=None,
                                 match='prev',
                                 event_offset=None,
                                 event_duration=None,
                                 eprefix='event',
                                 event_mapper=None):
    """Segment a dataset by extracting boxcar events

    (Multiple) consecutive samples are extracted for each event, and are either
    returned in a flattened shape, or subject to further processing.

    Events are specified as a list of dictionaries
    (see:class:`~mvpa2.misc.support.Event`) for a helper class. Each dictionary
    contains all relevant attributes to describe an event. This is at least the
    ``onset`` time of an event, but can also comprise of ``duration``,
    ``amplitude``, and arbitrary other attributes.

    Boxcar event model details
    --------------------------

    For each event all samples covering that particular event are used to form
    a corresponding sample. One sample for each event is returned. Event
    specification dictionaries must contain an ``onset`` attribute (as sample
    index in the input dataset), ``duration`` (as number of consecutive samples
    after the onset). Any number of additional attributes can be present in an
    event specification. Those attributes are included as sample attributes in
    the returned dataset.

    Alternatively, ``onset`` and ``duration`` may also be given in a
    non-discrete time specification. In this case a dataset attribute needs to
    be specified that contains time-stamps for each input data sample, and is
    used to convert times into discrete sample indices (see ``match``
    argument).

    A mapper instance can be provided (see ``event_mapper``) to implement
    futher processing of each event sample, for example in order to yield
    average samples.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All
      other attributes will be passed on to the sample attributes collection of
      the returned dataset.
    time_attr : str or None
      Attribute with dataset sample time-stamps.
      If not None, the ``onset`` and ``duration`` specs
      from the event list will be converted using information from this sample
      attribute. Its values will be treated as in-the-same-unit and are used to
      determine corresponding samples from real-value onset and duration
      definitions.
      For HRF modeling this argument is mandatory.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample
      indices. 'prev' chooses the closes preceding samples, 'next' the closest
      following sample and 'closest' to absolute closest sample.
    event_offset : None or float
      If not None, all event ``onset`` specifications will be offset by this
      value before boxcar modeling is performed.
    event_duration : None or float
      If not None, all event ``duration`` specifications will be set to this
      value before boxcar modeling is done.
    eprefix : str or None
      If not None, this prefix is used to name additional
      attributes generated by the underlying
      `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to None, no additional
      attributes will be created.
    event_mapper : Mapper
      This mapper is used to forward-map the dataset containing the boxcar event
      samples. If None (default) a FlattenMapper is employed to convert
      multi-dimensional sample matrices into simple one-dimensional sample
      vectors. This option can be used to implement temporal compression, by
      e.g. averaging samples within an event boxcar using an FxMapper. Any
      mapper needs to keep the sample axis unchanged, i.e. number and order of
      samples remain the same.

    Returns
    -------
    Dataset
      One sample per each event definition that has been passed to the
      function. Additional event attributes are included as sample attributes.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {
        'prev': 'floor',
        'next': 'ceil',
        'closest': 'round'
    }[match]

    if not (event_offset is None and event_duration is None):
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            if event_offset is not None:
                ev['onset'] += event_offset
            if event_duration is not None:
                ev['duration'] = event_duration
            descr_events.append(ev)
        events = descr_events

    if time_attr is not None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = _events2dict(descr_events)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.' % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    # special case onset and duration in case of conversion into descrete time
    if time_attr is not None:
        for attr in ('onset', 'duration'):
            evvars[attr] = [e[attr] for e in events]
    ds = _evvars2ds(ds, evvars, eprefix)

    return ds
Esempio n. 5
0
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:, 0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm.forward(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(),
                       np.vstack((np.arange(9), np.arange(9) + 1)).T)

    # now test for proper data shape
    data = np.ones((10, 3, 4, 2))
    sp = [2, 4, 3, 5]
    trans = BoxcarMapper(sp, 4).forward(data)
    assert_equal(trans.shape, (4, 4, 3, 4, 2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [2, 4, 3, 5]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # now in a dataset
    ds = Dataset([singlesample])
    assert_equal(ds.shape, (1, ) + singlesample.shape)
    # after reverse mapping the 'sample axis' should vanish and the original 3d
    # shape of the samples should be restored
    assert_equal(ds.shape[1:], m.reverse(ds).shape)
    # multiple samples should just be concatenated along the samples axis
    ds = Dataset([singlesample, singlesample])
    assert_equal((np.prod(ds.shape[:2]), ) + singlesample.shape[1:],
                 m.reverse(ds).shape)
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
Esempio n. 6
0
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6, 4, 2)
    ds = Dataset(data,
                 sa={
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 },
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()),
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, list(range(boxlength)))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(
        rds.samples,
        np.array([[[0, 1], [2, 3], [4, 5], [6, 7]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[16, 17], [18, 19], [20, 21], [22, 23]],
                  [[32, 33], [34, 35], [36, 37], [38, 39]],
                  [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm = ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    cm.train(ds)
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape,
                 (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
Esempio n. 7
0
def timesegments_classification(dss,
                                window_size=6,
                                overlapping_windows=True,
                                distance='correlation',
                                do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    part2 = NFoldPartitioner(attr='subjects')
    # Check if input list contains Datasets, ndarrays
    dss = [Dataset(ds) if not type(ds) == Dataset else ds for ds in dss]
    # TODO:  allow for doing feature selection
    if do_zscore:
        for ds in dss:
            zscore(ds, chunks_attr=None)

    # assign .sa.subjects to those datasets
    for i, ds in enumerate(dss):
        # part2.attr is by default "subjects"
        ds.sa[part2.attr] = [i]

    dss_test_bc = []
    for ds in dss:
        if overlapping_windows:
            startpoints = range(len(ds) - window_size + 1)
        else:
            startpoints = _get_nonoverlapping_startpoints(len(ds), window_size)
        bm = BoxcarMapper(startpoints, window_size)
        bm.train(ds)
        ds_ = bm.forward(ds)
        ds_.sa['startpoints'] = startpoints

        # reassign subjects so they are not arrays
        def assign_unique(ds, sa):
            ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value]

        assign_unique(ds_, part2.attr)

        fm = FlattenMapper()
        fm.train(ds_)
        dss_test_bc.append(ds_.get_mapped(fm))

    ds_test = vstack(dss_test_bc)
    # Perform classification across subjects comparing against mean
    # spatio-temporal pattern of other subjects
    errors_across_subjects = []
    for ds_test_part in part2.generate(ds_test):
        ds_train_, ds_test_ = list(
            Splitter("partitions").generate(ds_test_part))
        # average across subjects to get a representative pattern per timepoint
        ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
        assert (ds_train_.shape == ds_test_.shape)

        if distance == 'correlation':
            # TODO: redo more efficiently since now we are creating full
            # corrcoef matrix.  Also we might better just take a name for
            # the pdist measure but then implement them efficiently
            # (i.e. without hstacking both pieces together first)
            dist = 1 - np.corrcoef(ds_train_,
                                   ds_test_)[len(ds_test_):, :len(ds_test_)]
        else:
            raise NotImplementedError

        if overlapping_windows:
            dist = wipe_out_offdiag(dist, window_size)

        winners = np.argmin(dist, axis=1)
        error = np.mean(winners != np.arange(len(winners)))
        errors_across_subjects.append(error)

    errors_across_subjects = np.asarray(errors_across_subjects)
    if __debug__:
        debug(
            "BM", "Finished with %s array of errors. Mean error %.2f" %
            (errors_across_subjects.shape, np.mean(errors_across_subjects)))
    return errors_across_subjects
Esempio n. 8
0
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:,0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm.forward(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T)


    # now test for proper data shape
    data = np.ones((10,3,4,2))
    sp = [ 2, 4, 3, 5 ]
    trans = BoxcarMapper(sp, 4).forward(data)
    assert_equal(trans.shape, (4,4,3,4,2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [ 2, 4, 3, 5 ]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # now in a dataset
    ds = Dataset([singlesample])
    assert_equal(ds.shape, (1,) + singlesample.shape)
    # after reverse mapping the 'sample axis' should vanish and the original 3d
    # shape of the samples should be restored
    assert_equal(ds.shape[1:], m.reverse(ds).shape)
    # multiple samples should just be concatenated along the samples axis
    ds = Dataset([singlesample, singlesample])
    assert_equal((np.prod(ds.shape[:2]),) + singlesample.shape[1:],
                 m.reverse(ds).shape)
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
Esempio n. 9
0
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6,4,2)
    ds = Dataset(data,
                 sa={'timepoints': np.arange(6),
                     'multidim': data.copy()},
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
            (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s+1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(rds.samples,
                       np.array([[[ 0,  1], [ 2,  3], [ 4,  5], [ 6,  7]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[16, 17], [18, 19], [20, 21], [22, 23]],
                                 [[32, 33], [34, 35], [36, 37], [38, 39]],
                                 [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    cm.train(ds)
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
Esempio n. 10
0
def eventrelated_dataset(ds, events=None, time_attr=None, match='prev',
                         eprefix='event', event_mapper=None):
    """Segment a dataset into a set of events.

    This function can be used to extract event-related samples from any
    time-series based dataset (actually, it don't have to be time series, but
    could also be any other type of ordered samples). Boxcar-shaped event
    samples, potentially spanning multiple input samples can be automatically
    extracted using :class:`~mvpa2.misc.support.Event` definition lists.  For
    each event all samples covering that particular event are used to form the
    corresponding sample.

    An event definition is a dictionary that contains ``onset`` (as sample index
    in the input dataset), ``duration`` (as number of consecutive samples after
    the onset), as well as an arbitrary number of additional attributes.

    Alternatively, ``onset`` and ``duration`` may also be given as real time
    stamps (or durations). In this case a to be specified samples attribute in
    the input dataset will be used to convert these into sample indices.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All other
      attributes will be passed on to the sample attributes collection of the
      returned dataset.
    time_attr : str or None
      If not None, the ``onset`` and ``duration`` specs from the event list will
      be converted using information from this sample attribute. Its values will
      be treated as in-the-same-unit and are used to determine corresponding
      samples from real-value onset and duration definitions.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample indices. 'prev' chooses
      the closes preceding samples, 'next' the closest following sample and
      'closest' to absolute closest sample.
    eprefix : str or None
      If not None, this prefix is used to name additional attributes generated
      by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to
      None, no additional attributes will be created.
    event_mapper : Mapper
      This mapper is used to forward-map the dataset containing the boxcar event
      samples. If None (default) a FlattenMapper is employed to convert
      multi-dimensional sample matrices into simple one-dimensional sample
      vectors. This option can be used to implement temporal compression, by
      e.g. averaging samples within an event boxcar using an FxMapper. Any
      mapper needs to keep the sample axis unchanged, i.e. number and order of
      samples remain the same.

    Returns
    -------
    Dataset
      The returned dataset has one sample per each event definition that has
      been passed to the function.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {'prev': 'floor',
                     'next': 'ceil',
                     'closest': 'round'}[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = {}
    for k in descr_events[0]:
        try:
            evvars[k] = [e[k] for e in descr_events]
        except KeyError:
            raise ValueError("Each event property must be present for all "
                             "events (could not find '%s')" % k)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events."
                             % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.'% boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    if event_mapper is None:
        # at last reflatten the dataset
        # could we add some meaningful attribute during this mapping, i.e. would
        # assigning 'inspace' do something good?
        ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    else:
        ds = ds.get_mapped(event_mapper)
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    for a in evvars:
        if not eprefix is None and a in ds.sa:
            # if there is already a samples attribute like this, it got mapped
            # by BoxcarMapper (i.e. is multi-dimensional). We move it aside
            # under new `eprefix` name
            ds.sa[eprefix + '_' + a] = ds.sa[a]
        if a in ['onset', 'duration']:
            # special case: we want the non-discrete, original onset and
            # duration
            if not time_attr is None:
                # but only if there was a conversion happining, since otherwise
                # we get the same info from BoxcarMapper
                ds.sa[a] = [e[a] for e in events]
        else:
            ds.sa[a] = evvars[a]
    return ds
Esempio n. 11
0
def eventrelated_dataset(ds,
                         events=None,
                         time_attr=None,
                         match='prev',
                         eprefix='event'):
    """Segment a dataset into a set of events.

    This function can be used to extract event-related samples from any
    time-series based dataset (actually, it don't have to be time series, but
    could also be any other type of ordered samples). Boxcar-shaped event
    samples, potentially spanning multiple input samples can be automatically
    extracted using :class:`~mvpa2.misc.support.Event` definition lists.  For
    each event all samples covering that particular event are used to form the
    corresponding sample.

    An event definition is a dictionary that contains ``onset`` (as sample index
    in the input dataset), ``duration`` (as number of consecutive samples after
    the onset), as well as an arbitrary number of additional attributes.

    Alternatively, ``onset`` and ``duration`` may also be given as real time
    stamps (or durations). In this case a to be specified samples attribute in
    the input dataset will be used to convert these into sample indices.

    Parameters
    ----------
    ds : Dataset
      The samples of this input dataset have to be in whatever ascending order.
    events : list
      Each event definition has to specify ``onset`` and ``duration``. All other
      attributes will be passed on to the sample attributes collection of the
      returned dataset.
    time_attr : str or None
      If not None, the ``onset`` and ``duration`` specs from the event list will
      be converted using information from this sample attribute. Its values will
      be treated as in-the-same-unit and are used to determine corresponding
      samples from real-value onset and duration definitions.
    match : {'prev', 'next', 'closest'}
      Strategy used to match real-value onsets to sample indices. 'prev' chooses
      the closes preceding samples, 'next' the closest following sample and
      'closest' to absolute closest sample.
    eprefix : str or None
      If not None, this prefix is used to name additional attributes generated
      by the underlying `~mvpa2.mappers.boxcar.BoxcarMapper`. If it is set to
      None, no additional attributes will be created.

    Returns
    -------
    Dataset
      The returned dataset has one sample per each event definition that has
      been passed to the function.

    Examples
    --------
    The documentation also contains an :ref:`example script
    <example_eventrelated>` showing a spatio-temporal analysis of fMRI data
    that involves this function.

    >>> from mvpa2.datasets import Dataset
    >>> ds = Dataset(np.random.randn(10, 25))
    >>> events = [{'onset': 2, 'duration': 4},
    ...           {'onset': 4, 'duration': 4}]
    >>> eds = eventrelated_dataset(ds, events)
    >>> len(eds)
    2
    >>> eds.nfeatures == ds.nfeatures * 4
    True
    >>> 'mapper' in ds.a
    False
    >>> print eds.a.mapper
    <Chain: <Boxcar: bl=4>-<Flatten>>

    And now the same conversion, but with events specified as real time. This is
    on possible if the input dataset contains a sample attribute with the
    necessary information about the input samples.

    >>> ds.sa['record_time'] = np.linspace(0, 5, len(ds))
    >>> rt_events = [{'onset': 1.05, 'duration': 2.2},
    ...              {'onset': 2.3, 'duration': 2.12}]
    >>> rt_eds = eventrelated_dataset(ds, rt_events, time_attr='record_time',
    ...                               match='closest')
    >>> np.all(eds.samples == rt_eds.samples)
    True
    >>> # returned dataset e.g. has info from original samples
    >>> rt_eds.sa.record_time
    array([[ 1.11111111,  1.66666667,  2.22222222,  2.77777778],
           [ 2.22222222,  2.77777778,  3.33333333,  3.88888889]])
    """
    # relabel argument
    conv_strategy = {
        'prev': 'floor',
        'next': 'ceil',
        'closest': 'round'
    }[match]

    if not time_attr is None:
        tvec = ds.sa[time_attr].value
        # we are asked to convert onset time into sample ids
        descr_events = []
        for ev in events:
            # do not mess with the input data
            ev = copy.deepcopy(ev)
            # best matching sample
            idx = value2idx(ev['onset'], tvec, conv_strategy)
            # store offset of sample time and real onset
            ev['orig_offset'] = ev['onset'] - tvec[idx]
            # rescue the real onset into a new attribute
            ev['orig_onset'] = ev['onset']
            ev['orig_duration'] = ev['duration']
            # figure out how many samples we need
            ev['duration'] = \
                    len(tvec[idx:][tvec[idx:] < ev['onset'] + ev['duration']])
            # new onset is sample index
            ev['onset'] = idx
            descr_events.append(ev)
    else:
        descr_events = events
    # convert the event specs into the format expected by BoxcarMapper
    # take the first event as an example of contained keys
    evvars = {}
    for k in descr_events[0]:
        try:
            evvars[k] = [e[k] for e in descr_events]
        except KeyError:
            raise ValueError("Each event property must be present for all "
                             "events (could not find '%s')" % k)
    # checks
    for p in ['onset', 'duration']:
        if not p in evvars:
            raise ValueError("'%s' is a required property for all events." % p)
    boxlength = max(evvars['duration'])
    if __debug__:
        if not max(evvars['duration']) == min(evvars['duration']):
            warning('Boxcar mapper will use maximum boxlength (%i) of all '
                    'provided Events.' % boxlength)

    # finally create, train und use the boxcar mapper
    bcm = BoxcarMapper(evvars['onset'], boxlength, space=eprefix)
    bcm.train(ds)
    ds = ds.get_mapped(bcm)
    # at last reflatten the dataset
    # could we add some meaningful attribute during this mapping, i.e. would
    # assigning 'inspace' do something good?
    ds = ds.get_mapped(FlattenMapper(shape=ds.samples.shape[1:]))
    # add samples attributes for the events, simply dump everything as a samples
    # attribute
    for a in evvars:
        if not eprefix is None and a in ds.sa:
            # if there is already a samples attribute like this, it got mapped
            # by BoxcarMapper (i.e. is multi-dimensional). We move it aside
            # under new `eprefix` name
            ds.sa[eprefix + '_' + a] = ds.sa[a]
        if a in ['onset', 'duration']:
            # special case: we want the non-discrete, original onset and
            # duration
            if not time_attr is None:
                # but only if there was a conversion happining, since otherwise
                # we get the same info from BoxcarMapper
                ds.sa[a] = [e[a] for e in events]
        else:
            ds.sa[a] = evvars[a]
    return ds
Esempio n. 12
0
def timesegments_classification(
        dss,
        hyper=None,
        part1=HalfPartitioner(),
        part2=NFoldPartitioner(attr='subjects'),
        window_size=6,
        overlapping_windows=True,
        distance='correlation',
        do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    hyper : Hyperalignment-like, optional
       Beast which if called on a list of datasets should spit out trained
       mappers.  If not specified, `IdentityMapper`s will be used
    part1 : Partitioner, optional
       Partitioner to split data for hyperalignment "cross-validation"
    part2 : Partitioner, optional
       Partitioner for CV within the hyperalignment test split
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    # Generate outer-most partitioning ()
    parts = [copy.deepcopy(part1).generate(ds) for ds in dss]

    iter = 1
    errors = []

    while True:
        try:
            dss_partitioned = [p.next() for p in parts]
        except StopIteration:
            # we are done -- no more partitions
            break
        if __debug__:
            debug("BM", "Iteration %d", iter)

        dss_train, dss_test = zip(*[list(Splitter("partitions").generate(ds))
                                    for ds in dss_partitioned])

        # TODO:  allow for doing feature selection

        if do_zscore:
            for ds in dss_train + dss_test:
                zscore(ds, chunks_attr=None)

        if hyper is not None:
            # since otherwise it would remember previous loop dataset as the "commonspace"
            # Now let's do hyperalignment but on a copy in each loop iteration
            hyper_ = copy.deepcopy(hyper)
            mappers = hyper_(dss_train)
        else:
            mappers = [IdentityMapper() for ds in dss_train]

        dss_test_aligned = [mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)]

        # assign .sa.subjects to those datasets
        for i, ds in enumerate(dss_test_aligned):
            # part2.attr is by default "subjects"
            ds.sa[part2.attr] = [i]

        dss_test_bc = []
        for ds in dss_test_aligned:
            if overlapping_windows:
                startpoints = range(len(ds) - window_size + 1)
            else:
                startpoints = _get_nonoverlapping_startpoints(len(ds), window_size)
            bm = BoxcarMapper(startpoints, window_size)
            bm.train(ds)
            ds_ = bm.forward(ds)
            ds_.sa['startpoints'] = startpoints
            # reassign subjects so they are not arrays
            def assign_unique(ds, sa):
                ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value]
            assign_unique(ds_, part2.attr)

            fm = FlattenMapper()
            fm.train(ds_)
            dss_test_bc.append(ds_.get_mapped(fm))

        ds_test = vstack(dss_test_bc)
        # Perform classification across subjects comparing against mean
        # spatio-temporal pattern of other subjects
        errors_across_subjects = []
        for ds_test_part in part2.generate(ds_test):
            ds_train_, ds_test_ = list(Splitter("partitions").generate(ds_test_part))
            # average across subjects to get a representative pattern per timepoint
            ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
            assert(ds_train_.shape == ds_test_.shape)

            if distance == 'correlation':
                # TODO: redo more efficiently since now we are creating full
                # corrcoef matrix.  Also we might better just take a name for
                # the pdist measure but then implement them efficiently
                # (i.e. without hstacking both pieces together first)
                dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)]
            else:
                raise NotImplementedError

            if overlapping_windows:
                dist = wipe_out_offdiag(dist, window_size)

            winners = np.argmin(dist, axis=1)
            error = np.mean(winners != np.arange(len(winners)))
            errors_across_subjects.append(error)
        errors.append(errors_across_subjects)
        iter += 1

    errors = np.array(errors)
    if __debug__:
        debug("BM", "Finished with %s array of errors. Mean error %.2f"
              % (errors.shape, np.mean(errors)))
    return errors
Esempio n. 13
0
def timesegments_classification(dss,
                                hyper=None,
                                part1=HalfPartitioner(),
                                part2=NFoldPartitioner(attr='subjects'),
                                window_size=6,
                                overlapping_windows=True,
                                distance='correlation',
                                do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    hyper : Hyperalignment-like, optional
       Beast which if called on a list of datasets should spit out trained
       mappers.  If not specified, `IdentityMapper`s will be used
    part1 : Partitioner, optional
       Partitioner to split data for hyperalignment "cross-validation"
    part2 : Partitioner, optional
       Partitioner for CV within the hyperalignment test split
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    # Generate outer-most partitioning ()
    parts = [copy.deepcopy(part1).generate(ds) for ds in dss]

    iter = 1
    errors = []

    while True:
        try:
            dss_partitioned = [p.next() for p in parts]
        except StopIteration:
            # we are done -- no more partitions
            break
        if __debug__:
            debug("BM", "Iteration %d", iter)

        dss_train, dss_test = zip(*[
            list(Splitter("partitions").generate(ds)) for ds in dss_partitioned
        ])

        # TODO:  allow for doing feature selection

        if do_zscore:
            for ds in dss_train + dss_test:
                zscore(ds, chunks_attr=None)

        if hyper is not None:
            # since otherwise it would remember previous loop dataset as the "commonspace"
            # Now let's do hyperalignment but on a copy in each loop iteration
            hyper_ = copy.deepcopy(hyper)
            mappers = hyper_(dss_train)
        else:
            mappers = [IdentityMapper() for ds in dss_train]

        dss_test_aligned = [
            mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)
        ]

        # assign .sa.subjects to those datasets
        for i, ds in enumerate(dss_test_aligned):
            # part2.attr is by default "subjects"
            ds.sa[part2.attr] = [i]

        dss_test_bc = []
        for ds in dss_test_aligned:
            if overlapping_windows:
                startpoints = range(len(ds) - window_size + 1)
            else:
                startpoints = _get_nonoverlapping_startpoints(
                    len(ds), window_size)
            bm = BoxcarMapper(startpoints, window_size)
            bm.train(ds)
            ds_ = bm.forward(ds)
            ds_.sa['startpoints'] = startpoints

            # reassign subjects so they are not arrays
            def assign_unique(ds, sa):
                ds.sa[sa] = [
                    np.asscalar(np.unique(x)) for x in ds.sa[sa].value
                ]

            assign_unique(ds_, part2.attr)

            fm = FlattenMapper()
            fm.train(ds_)
            dss_test_bc.append(ds_.get_mapped(fm))

        ds_test = vstack(dss_test_bc)
        # Perform classification across subjects comparing against mean
        # spatio-temporal pattern of other subjects
        errors_across_subjects = []
        for ds_test_part in part2.generate(ds_test):
            ds_train_, ds_test_ = list(
                Splitter("partitions").generate(ds_test_part))
            # average across subjects to get a representative pattern per timepoint
            ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
            assert (ds_train_.shape == ds_test_.shape)

            if distance == 'correlation':
                # TODO: redo more efficiently since now we are creating full
                # corrcoef matrix.  Also we might better just take a name for
                # the pdist measure but then implement them efficiently
                # (i.e. without hstacking both pieces together first)
                dist = 1 - np.corrcoef(
                    ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)]
            else:
                raise NotImplementedError

            if overlapping_windows:
                dist = wipe_out_offdiag(dist, window_size)

            winners = np.argmin(dist, axis=1)
            error = np.mean(winners != np.arange(len(winners)))
            errors_across_subjects.append(error)
        errors.append(errors_across_subjects)
        iter += 1

    errors = np.array(errors)
    if __debug__:
        debug(
            "BM", "Finished with %s array of errors. Mean error %.2f" %
            (errors.shape, np.mean(errors)))
    return errors