Example #1
    def test_feature_selection_pipeline(self):
        sens_ana = SillySensitivityAnalyzer()

        data = self.get_data()
        data_nfeatures = data.nfeatures

        # test silly one first ;-)
            sens_ana(data).samples[0, 0], -int(data_nfeatures / 2))

        # OLD: first remove 25% == 6, and then 4, total removing 10
        # NOW: test should be independent of the numerical number of features
        feature_selections = [

        # create a FeatureSelection pipeline
        feat_sel_pipeline = ChainMapper(feature_selections)

        resds = feat_sel_pipeline(data)

                         msg="Test the property feature_selections")

        desired_nfeatures = int(np.ceil(data_nfeatures * 0.75))
        self.assertEqual([fe._oshape[0] for fe in feat_sel_pipeline],
                         [desired_nfeatures, desired_nfeatures - 4])
Example #2
    def test_feature_selection_pipeline(self):
        sens_ana = SillySensitivityAnalyzer()

        data = self.get_data()
        data_nfeatures = data.nfeatures

        # test silly one first ;-)
        self.assertEqual(sens_ana(data).samples[0,0], -int(data_nfeatures/2))

        # OLD: first remove 25% == 6, and then 4, total removing 10
        # NOW: test should be independent of the numerical number of features
        feature_selections = [SensitivityBasedFeatureSelection(

        # create a FeatureSelection pipeline
        feat_sel_pipeline = ChainMapper(feature_selections)

        resds = feat_sel_pipeline(data)

                             msg="Test the property feature_selections")

        desired_nfeatures = int(np.ceil(data_nfeatures*0.75))
        self.assertEqual([fe._oshape[0] for fe in feat_sel_pipeline],
                             [desired_nfeatures, desired_nfeatures - 4])
Example #3
def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target)

    # if it is not trained it knows nothing

    # a new mapper should appear when doing feature selection
    cm.append(StaticFeatureSelection(list(range(1, 16))))
    assert_equal(cm.forward1(data[0]).shape, (15,))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(StaticFeatureSelection([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # check reproduction
    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        import mvpa2
        cm_clone = eval(repr(cm))
        cm_clone = eval(repr(cm))
        assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)

    # Lets construct a dataset with mapper assigned and see
    # if sub-selecting a feature adjusts trailing StaticFeatureSelection
    # appropriately
    ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1]
    tail_sfs = ds_subsel.a.mapper[-1]
    assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')
Example #4
def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    pristinedata = data.copy()
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target)

    # if it is not trained it knows nothing

    # a new mapper should appear when doing feature selection
    cm.append(StaticFeatureSelection(range(1, 16)))
    assert_equal(cm.forward1(data[0]).shape, (15,))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(StaticFeatureSelection([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # check reproduction
    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        import mvpa2
        cm_clone = eval(repr(cm))
        cm_clone = eval(repr(cm))
        assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)

    # Lets construct a dataset with mapper assigned and see
    # if sub-selecting a feature adjusts trailing StaticFeatureSelection
    # appropriately
    ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1]
    tail_sfs = ds_subsel.a.mapper[-1]
    assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')
Example #5
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6, 4, 2)
    ds = Dataset(data,
                     'timepoints': np.arange(6),
                     'multidim': data.copy()
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
                 ['boxy_onsetidx'] + sorted(ds.sa.keys()))
                 (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
                       np.array([(s, s + 1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, list(range(boxlength)))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
        np.array([[[0, 1], [2, 3], [4, 5], [6, 7]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[8, 9], [10, 11], [12, 13], [14, 15]],
                  [[16, 17], [18, 19], [20, 21], [22, 23]],
                  [[32, 33], [34, 35], [36, 37], [38, 39]],
                  [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm = ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    bflat = ds.get_mapped(cm)
                 (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])
Example #6
def test_datasetmapping():
    # 6 samples, 4X2 features
    data = np.arange(48).reshape(6,4,2)
    ds = Dataset(data,
                 sa={'timepoints': np.arange(6),
                     'multidim': data.copy()},
                 fa={'fid': np.arange(4)})
    # with overlapping and non-overlapping boxcars
    startpoints = [0, 1, 4]
    boxlength = 2
    bm = BoxcarMapper(startpoints, boxlength, space='boxy')
    # train is critical
    mds = bm.forward(ds)
    assert_equal(len(mds), len(startpoints))
    assert_equal(mds.nfeatures, boxlength)
    # all samples attributes remain, but the can rotated/compressed into
    # multidimensional attributes
    assert_equal(sorted(mds.sa.keys()), ['boxy_onsetidx'] + sorted(ds.sa.keys()))
            (len(startpoints), boxlength) + ds.shape[1:])
    assert_equal(mds.sa.timepoints.shape, (len(startpoints), boxlength))
                       np.array([(s, s+1) for s in startpoints]).flatten())
    assert_array_equal(mds.sa.boxy_onsetidx, startpoints)
    # feature attributes also get rotated and broadcasted
    assert_array_equal(mds.fa.fid, [ds.fa.fid, ds.fa.fid])
    # and finally there is a new one
    assert_array_equal(mds.fa.boxy_offsetidx, range(boxlength))

    # now see how it works on reverse()
    rds = bm.reverse(mds)
    # we got at least something of all original attributes back
    assert_equal(sorted(rds.sa.keys()), sorted(ds.sa.keys()))
    assert_equal(sorted(rds.fa.keys()), sorted(ds.fa.keys()))
    # it is not possible to reconstruct the full samples array
    # some samples even might show up multiple times (when there are overlapping
    # boxcars
                       np.array([[[ 0,  1], [ 2,  3], [ 4,  5], [ 6,  7]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[ 8,  9], [10, 11], [12, 13], [14, 15]],
                                 [[16, 17], [18, 19], [20, 21], [22, 23]],
                                 [[32, 33], [34, 35], [36, 37], [38, 39]],
                                 [[40, 41], [42, 43], [44, 45], [46, 47]]]))
    assert_array_equal(rds.sa.timepoints, [0, 1, 1, 2, 4, 5])
    assert_array_equal(rds.sa.multidim, ds.sa.multidim[rds.sa.timepoints])
    # but feature attributes should be fully recovered
    assert_array_equal(rds.fa.fid, ds.fa.fid)

    # popular dataset configuration (double flatten + boxcar)
    cm= ChainMapper([FlattenMapper(), bm, FlattenMapper()])
    bflat = ds.get_mapped(cm)
    assert_equal(bflat.shape, (len(startpoints), boxlength * np.prod(ds.shape[1:])))
    # add attributes
    bflat.fa['testfa'] = np.arange(bflat.nfeatures)
    bflat.sa['testsa'] = np.arange(bflat.nsamples)
    # now try to go back
    bflatrev = bflat.mapper.reverse(bflat)
    # data should be same again, as far as the boxcars match
    assert_array_equal(ds.samples[:2], bflatrev.samples[:2])
    assert_array_equal(ds.samples[-2:], bflatrev.samples[-2:])
    # feature axis should match
    assert_equal(ds.shape[1:], bflatrev.shape[1:])