def _test_compare_to_old(self): """Good just to compare if I didn't screw up anything... treat it as a regression test """ import mvpa2.mappers.wavelet_ as wavelet_ ds = datasets['uni2medium'] d2d = ds.samples ws = 16 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper for wdm, wdm_ in ((WaveletTransformationMapper(), wavelet_.WaveletTransformationMapper()), (WaveletPacketMapper(), wavelet_.WaveletPacketMapper()),): d3d_wd = wdm(d3d) d3d_wd_ = wdm_(d3d) self.failUnless((d3d_wd == d3d_wd_).all(), msg="We should have got same result with old and new code. " "Got %s and %s" % (d3d_wd, d3d_wd_))
def _test_compare_to_old(self): """Good just to compare if I didn't screw up anything... treat it as a regression test """ import mvpa2.mappers.wavelet_ as wavelet_ ds = datasets['uni2medium'] d2d = ds.samples ws = 16 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper for wdm, wdm_ in ((WaveletTransformationMapper(), wavelet_.WaveletTransformationMapper()), (WaveletPacketMapper(), wavelet_.WaveletPacketMapper()),): d3d_wd = wdm(d3d) d3d_wd_ = wdm_(d3d) self.assertTrue((d3d_wd == d3d_wd_).all(), msg="We should have got same result with old and new code. " "Got %s and %s" % (d3d_wd, d3d_wd_))
def test_simple_wdm(self): """ """ ds = datasets['uni2medium'] d2d = ds.samples ws = 15 # size of timeline for wavelet sp = np.arange(ds.nsamples - ws * 2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletTransformationMapper() d3d_wd = wdm.forward(d3d) d3d_swap = d3d.swapaxes(1, 2) self.assertRaises(ValueError, WaveletTransformationMapper, wavelet='bogus') self.assertRaises(ValueError, WaveletTransformationMapper, mode='bogus') # use wavelet mapper for wdm, wdm_swap in ((WaveletTransformationMapper(), WaveletTransformationMapper(dim=2)), (WaveletPacketMapper(), WaveletPacketMapper(dim=2))): for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)): dd_wd = wdm.forward(dd) if dd_swap is not None: dd_wd_swap = wdm_swap.forward(dd_swap) self.assertTrue( (dd_wd == dd_wd_swap.swapaxes(1, 2)).all(), msg="We should have got same result with swapped " "dimensions and explicit mentioining of it. " "Got %s and %s" % (dd_wd, dd_wd_swap)) # some sanity checks self.assertTrue(dd_wd.shape[0] == dd.shape[0]) if not isinstance(wdm, WaveletPacketMapper): # we can do reverse only for DWT dd_rev = wdm.reverse(dd_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.assertEqual(dd_rev.shape, dd.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(dd - dd_rev) ornorm = np.linalg.norm(dd) self.assertTrue(diff / ornorm < 1e-10)
def test_simple_wdm(self): """ """ ds = datasets['uni2medium'] d2d = ds.samples ws = 15 # size of timeline for wavelet sp = np.arange(ds.nsamples-ws*2) + ws # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletTransformationMapper() d3d_wd = wdm.forward(d3d) d3d_swap = d3d.swapaxes(1,2) self.failUnlessRaises(ValueError, WaveletTransformationMapper, wavelet='bogus') self.failUnlessRaises(ValueError, WaveletTransformationMapper, mode='bogus') # use wavelet mapper for wdm, wdm_swap in ((WaveletTransformationMapper(), WaveletTransformationMapper(dim=2)), (WaveletPacketMapper(), WaveletPacketMapper(dim=2))): for dd, dd_swap in ((d3d, d3d_swap), (d2d, None)): dd_wd = wdm.forward(dd) if dd_swap is not None: dd_wd_swap = wdm_swap.forward(dd_swap) self.failUnless((dd_wd == dd_wd_swap.swapaxes(1,2)).all(), msg="We should have got same result with swapped " "dimensions and explicit mentioining of it. " "Got %s and %s" % (dd_wd, dd_wd_swap)) # some sanity checks self.failUnless(dd_wd.shape[0] == dd.shape[0]) if not isinstance(wdm, WaveletPacketMapper): # we can do reverse only for DWT dd_rev = wdm.reverse(dd_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual(dd_rev.shape, dd.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(dd - dd_rev) ornorm = np.linalg.norm(dd) self.failUnless(diff/ornorm < 1e-10)
def test_simple_wp1_level(self): """ """ ds = datasets['uni2large'] d2d = ds.samples ws = 50 # size of timeline for wavelet sp = (np.arange(ds.nsamples - ws * 2) + ws)[:4] # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletPacketMapper(level=2, wavelet='sym2') d3d_wd = wdm.forward(d3d) # Check dimensionality d3d_wds, d3ds = d3d_wd.shape, d3d.shape self.assertTrue(len(d3d_wds) == len(d3ds) + 1) self.assertTrue(d3d_wds[1] * d3d_wds[2] >= d3ds[1]) self.assertTrue(d3d_wds[0] == d3ds[0]) self.assertTrue(d3d_wds[-1] == d3ds[-1]) #print d2d.shape, d3d.shape, d3d_wd.shape if externals.exists('pywt wp reconstruct'): # Test reverse -- should be identical # we can do reverse only for DWT d3d_rev = wdm.reverse(d3d_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.assertEqual(d3d_rev.shape, d3d.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(d3d - d3d_rev) ornorm = np.linalg.norm(d3d) skip_if_no_external('pywt wp reconstruct fixed') self.assertTrue(diff / ornorm < 1e-10) else: self.assertRaises(NotImplementedError, wdm.reverse, d3d_wd)
def test_simple_wp1_level(self): """ """ ds = datasets['uni2large'] d2d = ds.samples ws = 50 # size of timeline for wavelet sp = (np.arange(ds.nsamples - ws*2) + ws)[:4] # create 3D instance (samples x timepoints x channels) bcm = BoxcarMapper(sp, ws) d3d = bcm.forward(d2d) # use wavelet mapper wdm = WaveletPacketMapper(level=2, wavelet='sym2') d3d_wd = wdm.forward(d3d) # Check dimensionality d3d_wds, d3ds = d3d_wd.shape, d3d.shape self.failUnless(len(d3d_wds) == len(d3ds)+1) self.failUnless(d3d_wds[1] * d3d_wds[2] >= d3ds[1]) self.failUnless(d3d_wds[0] == d3ds[0]) self.failUnless(d3d_wds[-1] == d3ds[-1]) #print d2d.shape, d3d.shape, d3d_wd.shape if externals.exists('pywt wp reconstruct'): # Test reverse -- should be identical # we can do reverse only for DWT d3d_rev = wdm.reverse(d3d_wd) # inverse transform might be not exactly as the # input... but should be very close ;-) self.failUnlessEqual(d3d_rev.shape, d3d.shape, msg="Shape should be the same after iDWT") diff = np.linalg.norm(d3d - d3d_rev) ornorm = np.linalg.norm(d3d) skip_if_no_external('pywt wp reconstruct fixed') self.failUnless(diff/ornorm < 1e-10) else: self.failUnlessRaises(NotImplementedError, wdm.reverse, d3d_wd)
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:, 0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm.forward(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9) + 1)).T) # now test for proper data shape data = np.ones((10, 3, 4, 2)) sp = [2, 4, 3, 5] trans = BoxcarMapper(sp, 4).forward(data) assert_equal(trans.shape, (4, 4, 3, 4, 2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [2, 4, 3, 5] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength, ) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # now in a dataset ds = Dataset([singlesample]) assert_equal(ds.shape, (1, ) + singlesample.shape) # after reverse mapping the 'sample axis' should vanish and the original 3d # shape of the samples should be restored assert_equal(ds.shape[1:], m.reverse(ds).shape) # multiple samples should just be concatenated along the samples axis ds = Dataset([singlesample, singlesample]) assert_equal((np.prod(ds.shape[:2]), ) + singlesample.shape[1:], m.reverse(ds).shape) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_datasetmapping(): # 6 samples, 4X2 features data = np.arange(48).reshape(6, 4, 2) ds = Dataset(data, sa={ 'timepoints': np.arange(6), 'multidim': data.copy() }, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, space='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength) + ds.shape[1:]) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s + 1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, list(range(boxlength))) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal( rds.samples, np.array([[[0, 1], [2, 3], [4, 5], [6, 7]], [[8, 9], [10, 11], [12, 13], [14, 15]], [[8, 9], [10, 11], [12, 13], [14, 15]], [[16, 17], [18, 19], [20, 21], [22, 23]], [[32, 33], [34, 35], [36, 37], [38, 39]], [[40, 41], [42, 43], [44, 45], [46, 47]]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid) # popular dataset configuration (double flatten + boxcar) cm = ChainMapper([FlattenMapper(), bm, FlattenMapper()]) cm.train(ds) bflat = ds.get_mapped(cm) assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:]))) # add attributes bflat.fa['testfa'] = np.arange(bflat.nfeatures) bflat.sa['testsa'] = np.arange(bflat.nsamples) # now try to go back bflatrev = bflat.mapper.reverse(bflat) # data should be same again, as far as the boxcars match assert_array_equal(ds.samples[:2], bflatrev.samples[:2]) assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:]) # feature axis should match assert_equal(ds.shape[1:], bflatrev.shape[1:])
def timesegments_classification(dss, window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ part2 = NFoldPartitioner(attr='subjects') # Check if input list contains Datasets, ndarrays dss = [Dataset(ds) if not type(ds) == Dataset else ds for ds in dss] # TODO: allow for doing feature selection if do_zscore: for ds in dss: zscore(ds, chunks_attr=None) # assign .sa.subjects to those datasets for i, ds in enumerate(dss): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints(len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list( Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert (ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors_across_subjects = np.asarray(errors_across_subjects) if __debug__: debug( "BM", "Finished with %s array of errors. Mean error %.2f" % (errors_across_subjects.shape, np.mean(errors_across_subjects))) return errors_across_subjects
def test_simpleboxcar(): data = np.atleast_2d(np.arange(10)).T sp = np.arange(10) # check if stupid thing don't work assert_raises(ValueError, BoxcarMapper, sp, 0) # now do an identity transformation bcm = BoxcarMapper(sp, 1) trans = bcm.forward(data) # ,0 is a feature below, so we get explicit 2D out of 1D assert_array_equal(trans[:,0], data) # now check for illegal boxes if __debug__: # condition is checked only in __debug__ assert_raises(ValueError, BoxcarMapper(sp, 2).train, data) # now something that should work nbox = 9 boxlength = 2 sp = np.arange(nbox) bcm = BoxcarMapper(sp, boxlength) trans = bcm.forward(data) # check that is properly upcasts the dimensionality assert_equal(trans.shape, (nbox, boxlength) + data.shape[1:]) # check actual values, squeezing the last dim for simplicity assert_array_equal(trans.squeeze(), np.vstack((np.arange(9), np.arange(9)+1)).T) # now test for proper data shape data = np.ones((10,3,4,2)) sp = [ 2, 4, 3, 5 ] trans = BoxcarMapper(sp, 4).forward(data) assert_equal(trans.shape, (4,4,3,4,2)) # test reverse data = np.arange(240).reshape(10, 3, 4, 2) sp = [ 2, 4, 3, 5 ] boxlength = 2 m = BoxcarMapper(sp, boxlength) m.train(data) mp = m.forward(data) assert_equal(mp.shape, (4, 2, 3, 4, 2)) # try full reconstruct mr = m.reverse(mp) # shape has to match assert_equal(mr.shape, (len(sp) * boxlength,) + data.shape[1:]) # only known samples are part of the results assert_true((mr >= 24).all()) assert_true((mr < 168).all()) # check proper reconstruction of non-conflicting sample assert_array_equal(mr[0].ravel(), np.arange(48, 72)) # check proper reconstruction of samples being part of multiple # mapped samples assert_array_equal(mr[1].ravel(), np.arange(72, 96)) # test reverse of a single sample singlesample = np.arange(48).reshape(2, 3, 4, 2) assert_array_equal(singlesample, m.reverse1(singlesample)) # now in a dataset ds = Dataset([singlesample]) assert_equal(ds.shape, (1,) + singlesample.shape) # after reverse mapping the 'sample axis' should vanish and the original 3d # shape of the samples should be restored assert_equal(ds.shape[1:], m.reverse(ds).shape) # multiple samples should just be concatenated along the samples axis ds = Dataset([singlesample, singlesample]) assert_equal((np.prod(ds.shape[:2]),) + singlesample.shape[1:], m.reverse(ds).shape) # should not work for shape mismatch, but it does work and is useful when # reverse mapping sample attributes #assert_raises(ValueError, m.reverse, singlesample[0]) # check broadcasting of 'raw' samples into proper boxcars on forward() bc = m.forward1(np.arange(24).reshape(3, 4, 2)) assert_array_equal(bc, np.array(2 * [np.arange(24).reshape(3, 4, 2)]))
def test_datasetmapping(): # 6 samples, 4X2 features data = np.arange(48).reshape(6,4,2) ds = Dataset(data, sa={'timepoints': np.arange(6), 'multidim': data.copy()}, fa={'fid': np.arange(4)}) # with overlapping and non-overlapping boxcars startpoints = [0, 1, 4] boxlength = 2 bm = BoxcarMapper(startpoints, boxlength, space='boxy') # train is critical bm.train(ds) mds = bm.forward(ds) assert_equal(len(mds), len(startpoints)) assert_equal(mds.nfeatures, boxlength) # all samples attributes remain, but the can rotated/compressed into # multidimensional attributes assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys())) assert_equal(mds.sa.multidim.shape, (len(startpoints), boxlength) + ds.shape[1:]) assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength)) assert_array_equal(mds.sa.timepoints.flatten(), np.array([(s, s+1) for s in startpoints]).flatten()) assert_array_equal(mds.sa.boxy_onsetidx, startpoints) # feature attributes also get rotated and broadcasted assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid]) # and finally there is a new one assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength)) # now see how it works on reverse() rds = bm.reverse(mds) # we got at least something of all original attributes back assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys())) assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys())) # it is not possible to reconstruct the full samples array # some samples even might show up multiple times (when there are overlapping # boxcars assert_array_equal(rds.samples, np.array([[[ 0, 1], [ 2, 3], [ 4, 5], [ 6, 7]], [[ 8, 9], [10, 11], [12, 13], [14, 15]], [[ 8, 9], [10, 11], [12, 13], [14, 15]], [[16, 17], [18, 19], [20, 21], [22, 23]], [[32, 33], [34, 35], [36, 37], [38, 39]], [[40, 41], [42, 43], [44, 45], [46, 47]]])) assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5]) assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints]) # but feature attributes should be fully recovered assert_array_equal(rds.fa.fid, ds.fa.fid) # popular dataset configuration (double flatten + boxcar) cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()]) cm.train(ds) bflat = ds.get_mapped(cm) assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:]))) # add attributes bflat.fa['testfa'] = np.arange(bflat.nfeatures) bflat.sa['testsa'] = np.arange(bflat.nsamples) # now try to go back bflatrev = bflat.mapper.reverse(bflat) # data should be same again, as far as the boxcars match assert_array_equal(ds.samples[:2], bflatrev.samples[:2]) assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:]) # feature axis should match assert_equal(ds.shape[1:], bflatrev.shape[1:])
def timesegments_classification( dss, hyper=None, part1=HalfPartitioner(), part2=NFoldPartitioner(attr='subjects'), window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. hyper : Hyperalignment-like, optional Beast which if called on a list of datasets should spit out trained mappers. If not specified, `IdentityMapper`s will be used part1 : Partitioner, optional Partitioner to split data for hyperalignment "cross-validation" part2 : Partitioner, optional Partitioner for CV within the hyperalignment test split window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ # Generate outer-most partitioning () parts = [copy.deepcopy(part1).generate(ds) for ds in dss] iter = 1 errors = [] while True: try: dss_partitioned = [p.next() for p in parts] except StopIteration: # we are done -- no more partitions break if __debug__: debug("BM", "Iteration %d", iter) dss_train, dss_test = zip(*[list(Splitter("partitions").generate(ds)) for ds in dss_partitioned]) # TODO: allow for doing feature selection if do_zscore: for ds in dss_train + dss_test: zscore(ds, chunks_attr=None) if hyper is not None: # since otherwise it would remember previous loop dataset as the "commonspace" # Now let's do hyperalignment but on a copy in each loop iteration hyper_ = copy.deepcopy(hyper) mappers = hyper_(dss_train) else: mappers = [IdentityMapper() for ds in dss_train] dss_test_aligned = [mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)] # assign .sa.subjects to those datasets for i, ds in enumerate(dss_test_aligned): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss_test_aligned: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints(len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list(Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert(ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors.append(errors_across_subjects) iter += 1 errors = np.array(errors) if __debug__: debug("BM", "Finished with %s array of errors. Mean error %.2f" % (errors.shape, np.mean(errors))) return errors
def timesegments_classification(dss, hyper=None, part1=HalfPartitioner(), part2=NFoldPartitioner(attr='subjects'), window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. hyper : Hyperalignment-like, optional Beast which if called on a list of datasets should spit out trained mappers. If not specified, `IdentityMapper`s will be used part1 : Partitioner, optional Partitioner to split data for hyperalignment "cross-validation" part2 : Partitioner, optional Partitioner for CV within the hyperalignment test split window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ # Generate outer-most partitioning () parts = [copy.deepcopy(part1).generate(ds) for ds in dss] iter = 1 errors = [] while True: try: dss_partitioned = [p.next() for p in parts] except StopIteration: # we are done -- no more partitions break if __debug__: debug("BM", "Iteration %d", iter) dss_train, dss_test = zip(*[ list(Splitter("partitions").generate(ds)) for ds in dss_partitioned ]) # TODO: allow for doing feature selection if do_zscore: for ds in dss_train + dss_test: zscore(ds, chunks_attr=None) if hyper is not None: # since otherwise it would remember previous loop dataset as the "commonspace" # Now let's do hyperalignment but on a copy in each loop iteration hyper_ = copy.deepcopy(hyper) mappers = hyper_(dss_train) else: mappers = [IdentityMapper() for ds in dss_train] dss_test_aligned = [ mapper.forward(ds) for mapper, ds in zip(mappers, dss_test) ] # assign .sa.subjects to those datasets for i, ds in enumerate(dss_test_aligned): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss_test_aligned: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints( len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [ np.asscalar(np.unique(x)) for x in ds.sa[sa].value ] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list( Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert (ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef( ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors.append(errors_across_subjects) iter += 1 errors = np.array(errors) if __debug__: debug( "BM", "Finished with %s array of errors. Mean error %.2f" % (errors.shape, np.mean(errors))) return errors