def _test_compare_to_old(self):
        """Good just to compare if I didn't screw up anything... treat
        it as a regression test
        """
        import mvpa2.mappers.wavelet_ as wavelet_

        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 16                          # size of timeline for wavelet
        sp = np.arange(ds.nsamples-ws*2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        for wdm, wdm_ in ((WaveletTransformationMapper(),
                           wavelet_.WaveletTransformationMapper()),
                          (WaveletPacketMapper(),
                           wavelet_.WaveletPacketMapper()),):
            d3d_wd = wdm(d3d)
            d3d_wd_ = wdm_(d3d)

            self.failUnless((d3d_wd == d3d_wd_).all(),
                msg="We should have got same result with old and new code. "
                    "Got %s and %s" % (d3d_wd, d3d_wd_))
Exemple #2
0
    def _test_compare_to_old(self):
        """Good just to compare if I didn't screw up anything... treat
        it as a regression test
        """
        import mvpa2.mappers.wavelet_ as wavelet_

        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 16                          # size of timeline for wavelet
        sp = np.arange(ds.nsamples-ws*2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        for wdm, wdm_ in ((WaveletTransformationMapper(),
                           wavelet_.WaveletTransformationMapper()),
                          (WaveletPacketMapper(),
                           wavelet_.WaveletPacketMapper()),):
            d3d_wd = wdm(d3d)
            d3d_wd_ = wdm_(d3d)

            self.assertTrue((d3d_wd == d3d_wd_).all(),
                msg="We should have got same result with old and new code. "
                    "Got %s and %s" % (d3d_wd, d3d_wd_))
Exemple #3
0
    def test_simple_wdm(self):
        """
        """
        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 15  # size of timeline for wavelet
        sp = np.arange(ds.nsamples - ws * 2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletTransformationMapper()
        d3d_wd = wdm.forward(d3d)
        d3d_swap = d3d.swapaxes(1, 2)

        self.assertRaises(ValueError,
                          WaveletTransformationMapper,
                          wavelet='bogus')
        self.assertRaises(ValueError,
                          WaveletTransformationMapper,
                          mode='bogus')

        # use wavelet mapper
        for wdm, wdm_swap in ((WaveletTransformationMapper(),
                               WaveletTransformationMapper(dim=2)),
                              (WaveletPacketMapper(),
                               WaveletPacketMapper(dim=2))):
            for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)):
                dd_wd = wdm.forward(dd)
                if dd_swap is not None:
                    dd_wd_swap = wdm_swap.forward(dd_swap)

                    self.assertTrue(
                        (dd_wd == dd_wd_swap.swapaxes(1, 2)).all(),
                        msg="We should have got same result with swapped "
                        "dimensions and explicit mentioining of it. "
                        "Got %s and %s" % (dd_wd, dd_wd_swap))

                # some sanity checks
                self.assertTrue(dd_wd.shape[0] == dd.shape[0])

                if not isinstance(wdm, WaveletPacketMapper):
                    # we can do reverse only for DWT
                    dd_rev = wdm.reverse(dd_wd)
                    # inverse transform might be not exactly as the
                    # input... but should be very close ;-)
                    self.assertEqual(dd_rev.shape,
                                     dd.shape,
                                     msg="Shape should be the same after iDWT")

                    diff = np.linalg.norm(dd - dd_rev)
                    ornorm = np.linalg.norm(dd)
                    self.assertTrue(diff / ornorm < 1e-10)
    def test_simple_wdm(self):
        """
        """
        ds = datasets['uni2medium']
        d2d = ds.samples
        ws = 15                          # size of timeline for wavelet
        sp = np.arange(ds.nsamples-ws*2) + ws

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletTransformationMapper()
        d3d_wd = wdm.forward(d3d)
        d3d_swap = d3d.swapaxes(1,2)

        self.failUnlessRaises(ValueError, WaveletTransformationMapper,
                              wavelet='bogus')
        self.failUnlessRaises(ValueError, WaveletTransformationMapper,
                              mode='bogus')

        # use wavelet mapper
        for wdm, wdm_swap in ((WaveletTransformationMapper(),
                               WaveletTransformationMapper(dim=2)),
                              (WaveletPacketMapper(),
                               WaveletPacketMapper(dim=2))):
          for dd, dd_swap in ((d3d, d3d_swap),
                              (d2d, None)):
            dd_wd = wdm.forward(dd)
            if dd_swap is not None:
                dd_wd_swap = wdm_swap.forward(dd_swap)

                self.failUnless((dd_wd == dd_wd_swap.swapaxes(1,2)).all(),
                                msg="We should have got same result with swapped "
                                "dimensions and explicit mentioining of it. "
                                "Got %s and %s" % (dd_wd, dd_wd_swap))

            # some sanity checks
            self.failUnless(dd_wd.shape[0] == dd.shape[0])

            if not isinstance(wdm, WaveletPacketMapper):
                # we can do reverse only for DWT
                dd_rev = wdm.reverse(dd_wd)
                # inverse transform might be not exactly as the
                # input... but should be very close ;-)
                self.failUnlessEqual(dd_rev.shape, dd.shape,
                                     msg="Shape should be the same after iDWT")

                diff = np.linalg.norm(dd - dd_rev)
                ornorm = np.linalg.norm(dd)
                self.failUnless(diff/ornorm < 1e-10)
Exemple #5
0
    def test_simple_wp1_level(self):
        """
        """

        ds = datasets['uni2large']
        d2d = ds.samples
        ws = 50  # size of timeline for wavelet
        sp = (np.arange(ds.nsamples - ws * 2) + ws)[:4]

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletPacketMapper(level=2, wavelet='sym2')
        d3d_wd = wdm.forward(d3d)

        # Check dimensionality
        d3d_wds, d3ds = d3d_wd.shape, d3d.shape
        self.assertTrue(len(d3d_wds) == len(d3ds) + 1)
        self.assertTrue(d3d_wds[1] * d3d_wds[2] >= d3ds[1])
        self.assertTrue(d3d_wds[0] == d3ds[0])
        self.assertTrue(d3d_wds[-1] == d3ds[-1])
        #print d2d.shape, d3d.shape, d3d_wd.shape

        if externals.exists('pywt wp reconstruct'):
            # Test reverse -- should be identical
            # we can do reverse only for DWT
            d3d_rev = wdm.reverse(d3d_wd)

            # inverse transform might be not exactly as the
            # input... but should be very close ;-)
            self.assertEqual(d3d_rev.shape,
                             d3d.shape,
                             msg="Shape should be the same after iDWT")

            diff = np.linalg.norm(d3d - d3d_rev)
            ornorm = np.linalg.norm(d3d)

            skip_if_no_external('pywt wp reconstruct fixed')
            self.assertTrue(diff / ornorm < 1e-10)
        else:
            self.assertRaises(NotImplementedError, wdm.reverse, d3d_wd)
    def test_simple_wp1_level(self):
        """
        """

        ds = datasets['uni2large']
        d2d = ds.samples
        ws = 50                          # size of timeline for wavelet
        sp = (np.arange(ds.nsamples - ws*2) + ws)[:4]

        # create 3D instance (samples x timepoints x channels)
        bcm = BoxcarMapper(sp, ws)
        d3d = bcm.forward(d2d)

        # use wavelet mapper
        wdm = WaveletPacketMapper(level=2, wavelet='sym2')
        d3d_wd = wdm.forward(d3d)

        # Check dimensionality
        d3d_wds, d3ds = d3d_wd.shape, d3d.shape
        self.failUnless(len(d3d_wds) == len(d3ds)+1)
        self.failUnless(d3d_wds[1] * d3d_wds[2] >= d3ds[1])
        self.failUnless(d3d_wds[0] == d3ds[0])
        self.failUnless(d3d_wds[-1] == d3ds[-1])
        #print d2d.shape, d3d.shape, d3d_wd.shape

        if externals.exists('pywt wp reconstruct'):
            # Test reverse -- should be identical
            # we can do reverse only for DWT
            d3d_rev = wdm.reverse(d3d_wd)

            # inverse transform might be not exactly as the
            # input... but should be very close ;-)
            self.failUnlessEqual(d3d_rev.shape, d3d.shape,
                                 msg="Shape should be the same after iDWT")

            diff = np.linalg.norm(d3d - d3d_rev)
            ornorm = np.linalg.norm(d3d)

            skip_if_no_external('pywt wp reconstruct fixed')
            self.failUnless(diff/ornorm < 1e-10)
        else:
            self.failUnlessRaises(NotImplementedError, wdm.reverse, d3d_wd)
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:, 0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm.forward(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(),
                       np.vstack((np.arange(9), np.arange(9) + 1)).T)

    # now test for proper data shape
    data = np.ones((10, 3, 4, 2))
    sp = [2, 4, 3, 5]
    trans = BoxcarMapper(sp, 4).forward(data)
    assert_equal(trans.shape, (4, 4, 3, 4, 2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [2, 4, 3, 5]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # now in a dataset
    ds = Dataset([singlesample])
    assert_equal(ds.shape, (1, ) + singlesample.shape)
    # after reverse mapping the 'sample axis' should vanish and the original 3d
    # shape of the samples should be restored
    assert_equal(ds.shape[1:], m.reverse(ds).shape)
    # multiple samples should just be concatenated along the samples axis
    ds = Dataset([singlesample, singlesample])
    assert_equal((np.prod(ds.shape[:2]), ) + singlesample.shape[1:],
                 m.reverse(ds).shape)
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6, 4, 2)
    ds = Dataset(data,
                 sa={
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 },
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()),
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
                 (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, list(range(boxlength)))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(
        rds.samples,
        np.array([[[0, 1], [2, 3], [4, 5], [6, 7]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[16, 17], [18, 19], [20, 21], [22, 23]],
                  [[32, 33], [34, 35], [36, 37], [38, 39]],
                  [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm = ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    cm.train(ds)
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape,
                 (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
Exemple #9
0
def timesegments_classification(dss,
                                window_size=6,
                                overlapping_windows=True,
                                distance='correlation',
                                do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    part2 = NFoldPartitioner(attr='subjects')
    # Check if input list contains Datasets, ndarrays
    dss = [Dataset(ds) if not type(ds) == Dataset else ds for ds in dss]
    # TODO:  allow for doing feature selection
    if do_zscore:
        for ds in dss:
            zscore(ds, chunks_attr=None)

    # assign .sa.subjects to those datasets
    for i, ds in enumerate(dss):
        # part2.attr is by default "subjects"
        ds.sa[part2.attr] = [i]

    dss_test_bc = []
    for ds in dss:
        if overlapping_windows:
            startpoints = range(len(ds) - window_size + 1)
        else:
            startpoints = _get_nonoverlapping_startpoints(len(ds), window_size)
        bm = BoxcarMapper(startpoints, window_size)
        bm.train(ds)
        ds_ = bm.forward(ds)
        ds_.sa['startpoints'] = startpoints

        # reassign subjects so they are not arrays
        def assign_unique(ds, sa):
            ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value]

        assign_unique(ds_, part2.attr)

        fm = FlattenMapper()
        fm.train(ds_)
        dss_test_bc.append(ds_.get_mapped(fm))

    ds_test = vstack(dss_test_bc)
    # Perform classification across subjects comparing against mean
    # spatio-temporal pattern of other subjects
    errors_across_subjects = []
    for ds_test_part in part2.generate(ds_test):
        ds_train_, ds_test_ = list(
            Splitter("partitions").generate(ds_test_part))
        # average across subjects to get a representative pattern per timepoint
        ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
        assert (ds_train_.shape == ds_test_.shape)

        if distance == 'correlation':
            # TODO: redo more efficiently since now we are creating full
            # corrcoef matrix.  Also we might better just take a name for
            # the pdist measure but then implement them efficiently
            # (i.e. without hstacking both pieces together first)
            dist = 1 - np.corrcoef(ds_train_,
                                   ds_test_)[len(ds_test_):, :len(ds_test_)]
        else:
            raise NotImplementedError

        if overlapping_windows:
            dist = wipe_out_offdiag(dist, window_size)

        winners = np.argmin(dist, axis=1)
        error = np.mean(winners != np.arange(len(winners)))
        errors_across_subjects.append(error)

    errors_across_subjects = np.asarray(errors_across_subjects)
    if __debug__:
        debug(
            "BM", "Finished with %s array of errors. Mean error %.2f" %
            (errors_across_subjects.shape, np.mean(errors_across_subjects)))
    return errors_across_subjects
Exemple #10
0
def test_simpleboxcar():
    data = np.atleast_2d(np.arange(10)).T
    sp = np.arange(10)

    # check if stupid thing don't work
    assert_raises(ValueError, BoxcarMapper, sp, 0)

    # now do an identity transformation
    bcm = BoxcarMapper(sp, 1)
    trans = bcm.forward(data)
    # ,0 is a feature below, so we get explicit 2D out of 1D
    assert_array_equal(trans[:,0], data)

    # now check for illegal boxes
    if __debug__:
        # condition is checked only in __debug__
        assert_raises(ValueError, BoxcarMapper(sp, 2).train, data)

    # now something that should work
    nbox = 9
    boxlength = 2
    sp = np.arange(nbox)
    bcm = BoxcarMapper(sp, boxlength)
    trans = bcm.forward(data)
    # check that is properly upcasts the dimensionality
    assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:])
    # check actual values, squeezing the last dim for simplicity
    assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T)


    # now test for proper data shape
    data = np.ones((10,3,4,2))
    sp = [ 2, 4, 3, 5 ]
    trans = BoxcarMapper(sp, 4).forward(data)
    assert_equal(trans.shape, (4,4,3,4,2))

    # test reverse
    data = np.arange(240).reshape(10, 3, 4, 2)
    sp = [ 2, 4, 3, 5 ]
    boxlength = 2
    m = BoxcarMapper(sp, boxlength)
    m.train(data)
    mp = m.forward(data)
    assert_equal(mp.shape, (4, 2, 3, 4, 2))

    # try full reconstruct
    mr = m.reverse(mp)
    # shape has to match
    assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:])
    # only known samples are part of the results
    assert_true((mr >= 24).all())
    assert_true((mr < 168).all())

    # check proper reconstruction of non-conflicting sample
    assert_array_equal(mr[0].ravel(), np.arange(48, 72))

    # check proper reconstruction of samples being part of multiple
    # mapped samples
    assert_array_equal(mr[1].ravel(), np.arange(72, 96))

    # test reverse of a single sample
    singlesample = np.arange(48).reshape(2, 3, 4, 2)
    assert_array_equal(singlesample, m.reverse1(singlesample))
    # now in a dataset
    ds = Dataset([singlesample])
    assert_equal(ds.shape, (1,) + singlesample.shape)
    # after reverse mapping the 'sample axis' should vanish and the original 3d
    # shape of the samples should be restored
    assert_equal(ds.shape[1:], m.reverse(ds).shape)
    # multiple samples should just be concatenated along the samples axis
    ds = Dataset([singlesample, singlesample])
    assert_equal((np.prod(ds.shape[:2]),) + singlesample.shape[1:],
                 m.reverse(ds).shape)
    # should not work for shape mismatch, but it does work and is useful when
    # reverse mapping sample attributes
    #assert_raises(ValueError, m.reverse, singlesample[0])

    # check broadcasting of 'raw' samples into proper boxcars on forward()
    bc = m.forward1(np.arange(24).reshape(3, 4, 2))
    assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
Exemple #11
0
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6,4,2)
    ds = Dataset(data,
                 sa={'timepoints': np.arange(6),
                     'multidim': data.copy()},
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    bm.train(ds)
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys()))
    assert_equal(mds.sa.multidim.shape,
            (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
    assert_array_equal(mds.sa.timepoints.flatten(),
                       np.array([(s, s+1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
    assert_array_equal(rds.samples,
                       np.array([[[ 0,  1], [ 2,  3], [ 4,  5], [ 6,  7]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[16, 17], [18, 19], [20, 21], [22, 23]],
                                 [[32, 33], [34, 35], [36, 37], [38, 39]],
                                 [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    cm.train(ds)
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
Exemple #12
0
def timesegments_classification(
        dss,
        hyper=None,
        part1=HalfPartitioner(),
        part2=NFoldPartitioner(attr='subjects'),
        window_size=6,
        overlapping_windows=True,
        distance='correlation',
        do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    hyper : Hyperalignment-like, optional
       Beast which if called on a list of datasets should spit out trained
       mappers.  If not specified, `IdentityMapper`s will be used
    part1 : Partitioner, optional
       Partitioner to split data for hyperalignment "cross-validation"
    part2 : Partitioner, optional
       Partitioner for CV within the hyperalignment test split
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    # Generate outer-most partitioning ()
    parts = [copy.deepcopy(part1).generate(ds) for ds in dss]

    iter = 1
    errors = []

    while True:
        try:
            dss_partitioned = [p.next() for p in parts]
        except StopIteration:
            # we are done -- no more partitions
            break
        if __debug__:
            debug("BM", "Iteration %d", iter)

        dss_train, dss_test = zip(*[list(Splitter("partitions").generate(ds))
                                    for ds in dss_partitioned])

        # TODO:  allow for doing feature selection

        if do_zscore:
            for ds in dss_train + dss_test:
                zscore(ds, chunks_attr=None)

        if hyper is not None:
            # since otherwise it would remember previous loop dataset as the "commonspace"
            # Now let's do hyperalignment but on a copy in each loop iteration
            hyper_ = copy.deepcopy(hyper)
            mappers = hyper_(dss_train)
        else:
            mappers = [IdentityMapper() for ds in dss_train]

        dss_test_aligned = [mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)]

        # assign .sa.subjects to those datasets
        for i, ds in enumerate(dss_test_aligned):
            # part2.attr is by default "subjects"
            ds.sa[part2.attr] = [i]

        dss_test_bc = []
        for ds in dss_test_aligned:
            if overlapping_windows:
                startpoints = range(len(ds) - window_size + 1)
            else:
                startpoints = _get_nonoverlapping_startpoints(len(ds), window_size)
            bm = BoxcarMapper(startpoints, window_size)
            bm.train(ds)
            ds_ = bm.forward(ds)
            ds_.sa['startpoints'] = startpoints
            # reassign subjects so they are not arrays
            def assign_unique(ds, sa):
                ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value]
            assign_unique(ds_, part2.attr)

            fm = FlattenMapper()
            fm.train(ds_)
            dss_test_bc.append(ds_.get_mapped(fm))

        ds_test = vstack(dss_test_bc)
        # Perform classification across subjects comparing against mean
        # spatio-temporal pattern of other subjects
        errors_across_subjects = []
        for ds_test_part in part2.generate(ds_test):
            ds_train_, ds_test_ = list(Splitter("partitions").generate(ds_test_part))
            # average across subjects to get a representative pattern per timepoint
            ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
            assert(ds_train_.shape == ds_test_.shape)

            if distance == 'correlation':
                # TODO: redo more efficiently since now we are creating full
                # corrcoef matrix.  Also we might better just take a name for
                # the pdist measure but then implement them efficiently
                # (i.e. without hstacking both pieces together first)
                dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)]
            else:
                raise NotImplementedError

            if overlapping_windows:
                dist = wipe_out_offdiag(dist, window_size)

            winners = np.argmin(dist, axis=1)
            error = np.mean(winners != np.arange(len(winners)))
            errors_across_subjects.append(error)
        errors.append(errors_across_subjects)
        iter += 1

    errors = np.array(errors)
    if __debug__:
        debug("BM", "Finished with %s array of errors. Mean error %.2f"
              % (errors.shape, np.mean(errors)))
    return errors
def timesegments_classification(dss,
                                hyper=None,
                                part1=HalfPartitioner(),
                                part2=NFoldPartitioner(attr='subjects'),
                                window_size=6,
                                overlapping_windows=True,
                                distance='correlation',
                                do_zscore=True):
    """Time-segment classification across subjects using Hyperalignment

    Parameters
    ----------
    dss : list of datasets
       Datasets to benchmark on.  Usually a single dataset per subject.
    hyper : Hyperalignment-like, optional
       Beast which if called on a list of datasets should spit out trained
       mappers.  If not specified, `IdentityMapper`s will be used
    part1 : Partitioner, optional
       Partitioner to split data for hyperalignment "cross-validation"
    part2 : Partitioner, optional
       Partitioner for CV within the hyperalignment test split
    window_size : int, optional
       How many temporal points to consider for a classification sample
    overlapping_windows : bool, optional
       Strategy to how create and classify "samples" for classification.  If
       True -- `window_size` samples from each time point (but trailing ones)
       constitute a sample, and upon "predict" `window_size` of samples around
       each test point is not considered.  If False -- samples are just taken
       (with training and testing splits) at `window_size` step from one to
       another.
    do_zscore : bool, optional
       Perform zscoring (overall, not per-chunk) for each dataset upon
       partitioning with part1
    ...
    """
    # Generate outer-most partitioning ()
    parts = [copy.deepcopy(part1).generate(ds) for ds in dss]

    iter = 1
    errors = []

    while True:
        try:
            dss_partitioned = [p.next() for p in parts]
        except StopIteration:
            # we are done -- no more partitions
            break
        if __debug__:
            debug("BM", "Iteration %d", iter)

        dss_train, dss_test = zip(*[
            list(Splitter("partitions").generate(ds)) for ds in dss_partitioned
        ])

        # TODO:  allow for doing feature selection

        if do_zscore:
            for ds in dss_train + dss_test:
                zscore(ds, chunks_attr=None)

        if hyper is not None:
            # since otherwise it would remember previous loop dataset as the "commonspace"
            # Now let's do hyperalignment but on a copy in each loop iteration
            hyper_ = copy.deepcopy(hyper)
            mappers = hyper_(dss_train)
        else:
            mappers = [IdentityMapper() for ds in dss_train]

        dss_test_aligned = [
            mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)
        ]

        # assign .sa.subjects to those datasets
        for i, ds in enumerate(dss_test_aligned):
            # part2.attr is by default "subjects"
            ds.sa[part2.attr] = [i]

        dss_test_bc = []
        for ds in dss_test_aligned:
            if overlapping_windows:
                startpoints = range(len(ds) - window_size + 1)
            else:
                startpoints = _get_nonoverlapping_startpoints(
                    len(ds), window_size)
            bm = BoxcarMapper(startpoints, window_size)
            bm.train(ds)
            ds_ = bm.forward(ds)
            ds_.sa['startpoints'] = startpoints

            # reassign subjects so they are not arrays
            def assign_unique(ds, sa):
                ds.sa[sa] = [
                    np.asscalar(np.unique(x)) for x in ds.sa[sa].value
                ]

            assign_unique(ds_, part2.attr)

            fm = FlattenMapper()
            fm.train(ds_)
            dss_test_bc.append(ds_.get_mapped(fm))

        ds_test = vstack(dss_test_bc)
        # Perform classification across subjects comparing against mean
        # spatio-temporal pattern of other subjects
        errors_across_subjects = []
        for ds_test_part in part2.generate(ds_test):
            ds_train_, ds_test_ = list(
                Splitter("partitions").generate(ds_test_part))
            # average across subjects to get a representative pattern per timepoint
            ds_train_ = mean_group_sample(['startpoints'])(ds_train_)
            assert (ds_train_.shape == ds_test_.shape)

            if distance == 'correlation':
                # TODO: redo more efficiently since now we are creating full
                # corrcoef matrix.  Also we might better just take a name for
                # the pdist measure but then implement them efficiently
                # (i.e. without hstacking both pieces together first)
                dist = 1 - np.corrcoef(
                    ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)]
            else:
                raise NotImplementedError

            if overlapping_windows:
                dist = wipe_out_offdiag(dist, window_size)

            winners = np.argmin(dist, axis=1)
            error = np.mean(winners != np.arange(len(winners)))
            errors_across_subjects.append(error)
        errors.append(errors_across_subjects)
        iter += 1

    errors = np.array(errors)
    if __debug__:
        debug(
            "BM", "Finished with %s array of errors. Mean error %.2f" %
            (errors.shape, np.mean(errors)))
    return errors