def test_feature_selection_pipeline(self): sens_ana = SillySensitivityAnalyzer() data = self.get_data() data_nfeatures = data.nfeatures # test silly one first ;-) self.failUnlessEqual(sens_ana(data).samples[0,0], -int(data_nfeatures/2)) # OLD: first remove 25% == 6, and then 4, total removing 10 # NOW: test should be independent of the numerical number of features feature_selections = [SensitivityBasedFeatureSelection( sens_ana, FractionTailSelector(0.25)), SensitivityBasedFeatureSelection( sens_ana, FixedNElementTailSelector(4)) ] # create a FeatureSelection pipeline feat_sel_pipeline = ChainMapper(feature_selections) feat_sel_pipeline.train(data) resds = feat_sel_pipeline(data) self.failUnlessEqual(len(feat_sel_pipeline), len(feature_selections), msg="Test the property feature_selections") desired_nfeatures = int(np.ceil(data_nfeatures*0.75)) self.failUnlessEqual([fe._oshape[0] for fe in feat_sel_pipeline], [desired_nfeatures, desired_nfeatures - 4])
def testChainMapper(self): data = N.array([N.arange(24).reshape(3,4,2) + (i * 100) for i in range(10)]) startpoints = [ 2, 4, 3, 5 ] m = ChainMapper([BoxcarMapper(startpoints, 2), DenseArrayMapper(mask=N.ones((2, 3, 4, 2)))]) mp = m.forward(data) # 4 startpoint, with each two samples of shape (3,4,2) self.failUnless(mp.shape == (4, 48)) self.failUnless(m.reverse(N.arange(48)).shape == (2, 3, 4, 2)) # should behave a DenseArrayMapper alone self.failUnless((N.array([n for n in m.getNeighbor(24, radius=1.1)]) == N.array((0, 24, 25, 26, 32))).all())
def mask_mapper(mask=None, shape=None, inspace=None): """Factory method to create a chain of Flatten+FeatureSlice Mappers Parameters ---------- mask : None or array an array in the original dataspace and its nonzero elements are used to define the features included in the dataset. Alternatively, the `shape` argument can be used to define the array dimensions. shape : None or tuple The shape of the array to be mapped. If `shape` is provided instead of `mask`, a full mask (all True) of the desired shape is constructed. If `shape` is specified in addition to `mask`, the provided mask is extended to have the same number of dimensions. inspace Provided to `FlattenMapper` """ if mask is None: if shape is None: raise ValueError, \ "Either `shape` or `mask` have to be specified." else: # make full dataspace mask if nothing else is provided mask = np.ones(shape, dtype='bool') else: if not shape is None: # expand mask to span all dimensions but first one # necessary e.g. if only one slice from timeseries of volumes is # requested. mask = np.array(mask, copy=False, subok=True, ndmin=len(shape)) # check for compatibility if not shape == mask.shape: raise ValueError, \ "The mask dataspace shape %s is not " \ "compatible with the provided shape %s." \ % (mask.shape, shape) fm = FlattenMapper(shape=mask.shape, inspace=inspace) flatmask = fm.forward1(mask) mapper = ChainMapper([fm, FeatureSliceMapper( flatmask, dshape=flatmask.shape, oshape=(len(flatmask.nonzero()[0]),))]) return mapper
def _append_mapper(self, mapper): if not 'mapper' in self.a: self.a['mapper'] = mapper return pmapper = self.a.mapper # otherwise we have a mapper already, but is it a chain? if not isinstance(pmapper, ChainMapper): self.a.mapper = ChainMapper([pmapper]) # is a chain mapper # merge slicer? lastmapper = self.a.mapper[-1] if isinstance(lastmapper, FeatureSliceMapper) \ and lastmapper.is_mergable(mapper): lastmapper += mapper else: self.a.mapper.append(mapper)
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [ FlattenMapper(inspace='voxel'), ChainMapper([ FlattenMapper(inspace='voxel'), FeatureSliceMapper(slice(None)) ]) ]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(FeatureSliceMapper(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15, )) assert_equal(len(cm), 2) # multiple slicing cm.append(FeatureSliceMapper([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # check reproduction cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8)
def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4,) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(StaticFeatureSelection(range(1,16))) assert_equal(cm.forward1(data[0]).shape, (15,)) assert_equal(len(cm), 2) # multiple slicing cm.append(StaticFeatureSelection([9,14])) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # check reproduction if __debug__: # debug mode needs special test as it enhances the repr output # with module info and id() appendix for objects import mvpa cm_clone = eval(repr(cm)) assert_equal('#'.join(repr(cm_clone).split('#')[:-1]), '#'.join(repr(cm).split('#')[:-1])) else: cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2,)) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:,[10,15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8)