def test_chainmapper(): # the chain needs at lest one mapper assert_raises(ValueError, ChainMapper, []) # a typical first mapper is to flatten cm = ChainMapper([FlattenMapper()]) # few container checks assert_equal(len(cm), 1) assert_true(isinstance(cm[0], FlattenMapper)) # now training # come up with data samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target) # if it is not trained it knows nothing cm.train(data) # a new mapper should appear when doing feature selection cm.append(FeatureSliceMapper(range(1, 16))) assert_equal(cm.forward1(data[0]).shape, (15, )) assert_equal(len(cm), 2) # multiple slicing cm.append(FeatureSliceMapper([9, 14])) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # check reproduction cm_clone = eval(repr(cm)) assert_equal(repr(cm_clone), repr(cm)) # what happens if we retrain the whole beast an same data as before cm.train(data) assert_equal(cm.forward1(data[0]).shape, (2, )) assert_equal(len(cm), 3) # let's map something mdata = cm.forward(data) assert_array_equal(mdata, target[:, [10, 15]]) # and back rdata = cm.reverse(mdata) # original shape assert_equal(rdata.shape, data.shape) # content as far it could be restored assert_array_equal(rdata[rdata > 0], data[rdata > 0]) assert_equal(np.sum(rdata > 0), 8)
def test_subset(): data = np.array( [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]) # float array doesn't work sm = FeatureSliceMapper(np.ones(16)) assert_raises(IndexError, sm.forward, data) # full mask sm = FeatureSliceMapper(slice(None)) # should not change single samples assert_array_equal(sm.forward(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.forward(data.copy()), data) sm.train(data) # same on reverse assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.reverse(data.copy()), data) # identical mappers sm_none = FeatureSliceMapper(slice(None)) sm_int = FeatureSliceMapper(np.arange(16)) sm_bool = FeatureSliceMapper(np.ones(16, dtype='bool')) sms = [sm_none, sm_int, sm_bool] # test subsets sids = [3,4,5,6] bsubset = np.zeros(16, dtype='bool') bsubset[sids] = True subsets = [sids, slice(3,7), bsubset, [3,3,4,4,6,6,6,5]] # all test subset result in equivalent masks, hence should do the same to # the mapper and result in identical behavior for st in sms: for i, sub in enumerate(subsets): # shallow copy orig = copy(st) subsm = FeatureSliceMapper(sub) # should do copy-on-write for all important stuff!! assert_true(orig.is_mergable(subsm)) orig += subsm # test if selection did its job if i == 3: # special case of multiplying features assert_array_equal(orig.forward1(data[0].copy()), subsets[i]) else: assert_array_equal(orig.forward1(data[0].copy()), sids) ## all of the above shouldn't change the original mapper #assert_array_equal(sm.get_mask(), np.arange(16)) # check for some bug catcher # no 3D input #assert_raises(IndexError, sm.forward, np.ones((3,2,1))) # no input of wrong length if __debug__: # checked only in __debug__ assert_raises(ValueError, sm.forward, np.ones(4))
def test_repr(): # this time give mask only by its target length sm = FeatureSliceMapper(slice(None), inspace='myspace') # check reproduction sm_clone = eval(repr(sm)) assert_equal(repr(sm_clone), repr(sm))
def __getitem__(self, args): # uniformize for checks below; it is not a tuple if just single slicing # spec is passed if not isinstance(args, tuple): args = (args, ) # if we get an slicing array for feature selection and it is *not* 1D # try feeding it through the mapper (if there is any) if len(args) > 1 and isinstance(args[1], np.ndarray) \ and len(args[1].shape) > 1 \ and self.a.has_key('mapper'): args = list(args) args[1] = self.a.mapper.forward1(args[1]) args = tuple(args) # let the base do the work ds = super(Dataset, self).__getitem__(args) # and adjusting the mapper (if any) if len(args) > 1 and 'mapper' in ds.a: # create matching mapper # the mapper is just appended to the dataset. It could also be # actually used to perform the slicing and prevent duplication of # functionality between the Dataset.__getitem__ and the mapper. # However, __getitem__ is sometimes more efficient, since it can # slice samples and feature axis at the same time. Moreover, the # mvpa.base.dataset.Dataset has no clue about mappers and should # be fully functional without them. subsetmapper = FeatureSliceMapper(args[1], dshape=self.samples.shape[1:]) # do not-act forward mapping to charge the output shape of the # slice mapper without having it to train on a full dataset (which # is most likely more expensive) subsetmapper.forward(np.zeros((1, ) + self.shape[1:], dtype='bool')) # mapper is ready to use -- simply store ds._append_mapper(subsetmapper) return ds
def __getitem__(self, args): # uniformize for checks below; it is not a tuple if just single slicing # spec is passed if not isinstance(args, tuple): args = (args,) # if we get an slicing array for feature selection and it is *not* 1D # try feeding it through the mapper (if there is any) if len(args) > 1 and isinstance(args[1], np.ndarray) \ and len(args[1].shape) > 1 \ and self.a.has_key('mapper'): args = list(args) args[1] = self.a.mapper.forward1(args[1]) args = tuple(args) # let the base do the work ds = super(Dataset, self).__getitem__(args) # and adjusting the mapper (if any) if len(args) > 1 and 'mapper' in ds.a: # create matching mapper # the mapper is just appended to the dataset. It could also be # actually used to perform the slicing and prevent duplication of # functionality between the Dataset.__getitem__ and the mapper. # However, __getitem__ is sometimes more efficient, since it can # slice samples and feature axis at the same time. Moreover, the # mvpa.base.dataset.Dataset has no clue about mappers and should # be fully functional without them. subsetmapper = FeatureSliceMapper(args[1], dshape=self.samples.shape[1:]) # do not-act forward mapping to charge the output shape of the # slice mapper without having it to train on a full dataset (which # is most likely more expensive) subsetmapper.forward(np.zeros((1,) + self.shape[1:], dtype='bool')) # mapper is ready to use -- simply store ds._append_mapper(subsetmapper) return ds
def test_selects(): mask = np.ones((3,2), dtype='bool') mask[1,1] = 0 mask0 = mask.copy() data = np.arange(6).reshape(mask.shape) map_ = mask_mapper(mask) # check if any exception is thrown if we get # out of the outIds #assert_raises(IndexError, map_.select_out, [0,1,2,6]) # remove 1,2 map_.append(FeatureSliceMapper([0,3,4])) assert_array_equal(map_.forward1(data), [0, 4, 5]) # remove 1 more map_.append(FeatureSliceMapper([0,2])) assert_array_equal(map_.forward1(data), [0, 5]) # check if original mask wasn't perturbed assert_array_equal(mask, mask0) # check if original mask wasn't perturbed assert_array_equal(mask, mask0)
def test_subset_filler(): sm = FeatureSliceMapper(np.arange(3)) sm_f0 = FeatureSliceMapper(np.arange(3), filler=0) sm_fm1 = FeatureSliceMapper(np.arange(3), filler=-1) sm_fnan = FeatureSliceMapper(np.arange(3), filler=np.nan) data = np.arange(12).astype(float).reshape((2, -1)) sm.train(data) data_forwarded = sm.forward(data) for m in (sm, sm_f0, sm_fm1, sm_fnan): m.train(data) assert_array_equal(data_forwarded, m.forward(data)) data_back_fm1 = sm_fm1.reverse(data_forwarded) ok_(np.all(data_back_fm1[:, 3:] == -1)) data_back_fnan = sm_fnan.reverse(data_forwarded) ok_(np.all(np.isnan(data_back_fnan[:, 3:])))
def mask_mapper(mask=None, shape=None, inspace=None): """Factory method to create a chain of Flatten+FeatureSlice Mappers Parameters ---------- mask : None or array an array in the original dataspace and its nonzero elements are used to define the features included in the dataset. Alternatively, the `shape` argument can be used to define the array dimensions. shape : None or tuple The shape of the array to be mapped. If `shape` is provided instead of `mask`, a full mask (all True) of the desired shape is constructed. If `shape` is specified in addition to `mask`, the provided mask is extended to have the same number of dimensions. inspace Provided to `FlattenMapper` """ if mask is None: if shape is None: raise ValueError, \ "Either `shape` or `mask` have to be specified." else: # make full dataspace mask if nothing else is provided mask = np.ones(shape, dtype='bool') else: if not shape is None: # expand mask to span all dimensions but first one # necessary e.g. if only one slice from timeseries of volumes is # requested. mask = np.array(mask, copy=False, subok=True, ndmin=len(shape)) # check for compatibility if not shape == mask.shape: raise ValueError, \ "The mask dataspace shape %s is not " \ "compatible with the provided shape %s." \ % (mask.shape, shape) fm = FlattenMapper(shape=mask.shape, inspace=inspace) flatmask = fm.forward1(mask) mapper = ChainMapper([fm, FeatureSliceMapper( flatmask, dshape=flatmask.shape, oshape=(len(flatmask.nonzero()[0]),))]) return mapper
def test_flatten(): samples_shape = (2, 2, 4) data_shape = (4, ) + samples_shape data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray) pristinedata = data.copy() target = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]] target = np.array(target).view(myarray) index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3], [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]]) # array subclass survives ok_(isinstance(data, myarray)) # actually, there should be no difference between a plain FlattenMapper and # a chain that only has a FlattenMapper as the one element for fm in [ FlattenMapper(inspace='voxel'), ChainMapper([ FlattenMapper(inspace='voxel'), FeatureSliceMapper(slice(None)) ]) ]: # not working if untrained assert_raises(RuntimeError, fm.forward1, np.arange(np.sum(samples_shape) + 1)) fm.train(data) ok_(isinstance(fm.forward(data), myarray)) ok_(isinstance(fm.forward1(data[2]), myarray)) assert_array_equal(fm.forward(data), target) assert_array_equal(fm.forward1(data[2]), target[2]) assert_raises(ValueError, fm.forward, np.arange(4)) # all of that leaves that data unmodified assert_array_equal(data, pristinedata) # reverse mapping ok_(isinstance(fm.reverse(target), myarray)) ok_(isinstance(fm.reverse1(target[0]), myarray)) ok_(isinstance(fm.reverse(target[1:2]), myarray)) assert_array_equal(fm.reverse(target), data) assert_array_equal(fm.reverse1(target[0]), data[0]) assert_array_equal(fm.reverse(target[1:2]), data[1:2]) assert_raises(ValueError, fm.reverse, np.arange(14)) # check one dimensional data, treated as scalar samples oned = np.arange(5) fm.train(Dataset(oned)) # needs 2D assert_raises(ValueError, fm.forward, oned) # doesn't match mapper, since Dataset turns `oned` into (5,1) assert_raises(ValueError, fm.forward, oned) assert_equal(Dataset(oned).nfeatures, 1) # try dataset mode, with some feature attribute fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape) ds = Dataset(data, fa={'awesome': fattr.copy()}) assert_equal(ds.samples.shape, data_shape) fm.train(ds) dsflat = fm.forward(ds) ok_(isinstance(dsflat, Dataset)) ok_(isinstance(dsflat.samples, myarray)) assert_array_equal(dsflat.samples, target) assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape))) assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable)) # test index creation assert_array_equal(index_target, dsflat.fa.voxel) # and back revds = fm.reverse(dsflat) ok_(isinstance(revds, Dataset)) ok_(isinstance(revds.samples, myarray)) assert_array_equal(revds.samples, data) assert_array_equal(revds.fa.awesome, fattr) assert_true(isinstance(revds.fa['awesome'], ArrayCollectable)) assert_false('voxel' in revds.fa)
def test_subset(): data = np.array( [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]) # float array doesn't work sm = FeatureSliceMapper(np.ones(16)) assert_raises(IndexError, sm.forward, data) # full mask sm = FeatureSliceMapper(slice(None)) # should not change single samples assert_array_equal(sm.forward(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.forward(data.copy()), data) sm.train(data) # same on reverse assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1]) # or multi-samples assert_array_equal(sm.reverse(data.copy()), data) # identical mappers sm_none = FeatureSliceMapper(slice(None)) sm_int = FeatureSliceMapper(np.arange(16)) sm_bool = FeatureSliceMapper(np.ones(16, dtype='bool')) sms = [sm_none, sm_int, sm_bool] # test subsets sids = [3, 4, 5, 6] bsubset = np.zeros(16, dtype='bool') bsubset[sids] = True subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]] # all test subset result in equivalent masks, hence should do the same to # the mapper and result in identical behavior for st in sms: for i, sub in enumerate(subsets): # shallow copy orig = copy(st) subsm = FeatureSliceMapper(sub) # should do copy-on-write for all important stuff!! assert_true(orig.is_mergable(subsm)) orig += subsm # test if selection did its job if i == 3: # special case of multiplying features assert_array_equal(orig.forward1(data[0].copy()), subsets[i]) else: assert_array_equal(orig.forward1(data[0].copy()), sids) ## all of the above shouldn't change the original mapper #assert_array_equal(sm.get_mask(), np.arange(16)) # check for some bug catcher # no 3D input #assert_raises(IndexError, sm.forward, np.ones((3,2,1))) # no input of wrong length if __debug__: # checked only in __debug__ assert_raises(ValueError, sm.forward, np.ones(4))