Python StaticFeatureSelection Exemples, mvpa2.featsel.base.StaticFeatureSelection Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : base.py Projet : Soletmons/PyMVPA

    def __getitem__(self, args):
        # uniformize for checks below; it is not a tuple if just single slicing
        # spec is passed
        if not isinstance(args, tuple):
            args = (args, )

        # if we get an slicing array for feature selection and it is *not* 1D
        # try feeding it through the mapper (if there is any)
        if len(args) > 1 and isinstance(args[1], np.ndarray) \
           and len(args[1].shape) > 1 \
           and self.a.has_key('mapper'):
            args = list(args)
            args[1] = self.a.mapper.forward1(args[1])

        # check if any of the args is a dict, which would require fancy selection
        args_ = []
        for i, arg in enumerate(args):
            if isinstance(arg, dict):
                col = (self.sa, self.fa)[i]
                args_.append(col.match(arg))
            else:
                args_.append(arg)
        args = tuple(args_)

        # let the base do the work
        ds = super(Dataset, self).__getitem__(args)

        # and adjusting the mapper (if any)
        if len(args) > 1 and 'mapper' in ds.a:
            # create matching mapper
            # the mapper is just appended to the dataset. It could also be
            # actually used to perform the slicing and prevent duplication of
            # functionality between the Dataset.__getitem__ and the mapper.
            # However, __getitem__ is sometimes more efficient, since it can
            # slice samples and feature axis at the same time. Moreover, the
            # mvpa2.base.dataset.Dataset has no clue about mappers and should
            # be fully functional without them.
            subsetmapper = StaticFeatureSelection(
                args[1], dshape=self.samples.shape[1:])
            # do not-act forward mapping to charge the output shape of the
            # slice mapper without having it to train on a full dataset (which
            # is most likely more expensive)
            subsetmapper.forward(np.zeros((1, ) + self.shape[1:],
                                          dtype='bool'))
            # mapper is ready to use -- simply store
            ds._append_mapper(subsetmapper)

        return ds

Exemple #2

0

Afficher le fichier

def test_repr():
    # this time give mask only by its target length
    sm = StaticFeatureSelection(slice(None), space='myspace')

    # check reproduction
    sm_clone = eval(repr(sm))
    assert_equal(repr(sm_clone), repr(sm))

Exemple #3

0

Afficher le fichier

def test_static_reverse_doesnt_work_after_feature_selection_tuneup_1():
    ds_orig = datasets['uni2small'].copy()  # doesn't matter which

    m = StaticFeatureSelection(np.arange(4))
    m.train(ds_orig)
    ds = ds_orig.get_mapped(m)
    ds0_rev = ds.a.mapper.reverse1(ds.samples[0])  # should work
    assert_equal(ds0_rev.shape, (ds_orig.nfeatures,))

    # direct feature selection
    ds_ = ds[:, [0, 2]]
    # should work but doesn't due to
    # RuntimeError: Cannot reverse-map data since the original data shape is unknown. Either set `dshape` in the constructor, or call train().
    ds0_rev_ = ds_.a.mapper.reverse1(ds_.samples[0])
    #ds0_rev_ = _verified_reverse1(ds_.a.mapper, ds_.samples[0])
    assert_equal(ds0_rev_.shape, (ds_orig.nfeatures,))

Exemple #4

0

Afficher le fichier

def test_remove_invariant_as_a_mapper():
    from mvpa2.featsel.helpers import RangeElementSelector
    from mvpa2.featsel.base import StaticFeatureSelection, SensitivityBasedFeatureSelection
    from mvpa2.testing.datasets import datasets
    from mvpa2.datasets.miscfx import remove_invariant_features

    mapper = SensitivityBasedFeatureSelection(
              lambda x: np.std(x, axis=0),
              RangeElementSelector(lower=0, inclusive=False),
              train_analyzer=False,
              auto_train=True)

    ds = datasets['uni2large'].copy()

    ds.a['mapper'] = StaticFeatureSelection(np.arange(ds.nfeatures))
    ds.fa['index'] = np.arange(ds.nfeatures)
    ds.samples[:, [1, 8]] = 10

    ds_out = mapper(ds)

    # Validate that we are getting the same results as remove_invariant_features
    ds_rifs = remove_invariant_features(ds)
    assert_array_equal(ds_out.samples, ds_rifs.samples)
    assert_array_equal(ds_out.fa.index, ds_rifs.fa.index)

    assert_equal(ds_out.fa.index[1], 2)
    assert_equal(ds_out.fa.index[8], 10)

Exemple #5

0

Afficher le fichier

    def _concat_results(sl=None, dataset=None, roi_ids=None, results=None):
        """The simplest implementation for collecting the results --
        just put them into a list

        This this implementation simply collects them into a list and
        uses only sl. for assigning conditional attributes.  But
        custom implementation might make use of more/less of them.
        Implemented as @staticmethod just to emphasize that in
        principle it is independent of the actual searchlight instance
        """
        # collect results
        results = sum(results, [])

        if __debug__ and 'SLC' in debug.active:
            debug('SLC', '')            # just newline
            resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A'
            debug('SLC', ' hstacking %d results of shape %s'
                  % (len(results), resshape))

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        result_ds = hstack(results)

        if __debug__:
            debug('SLC', " hstacked shape %s" % (result_ds.shape,))

        if sl.ca.is_enabled('roi_feature_ids'):
            sl.ca.roi_feature_ids = [r.a.roi_feature_ids for r in results]
        if sl.ca.is_enabled('roi_sizes'):
            sl.ca.roi_sizes = [r.a.roi_sizes for r in results]
        if sl.ca.is_enabled('roi_center_ids'):
            sl.ca.roi_center_ids = [r.a.roi_center_ids for r in results]

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if roi_ids is None:
                result_ds.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)

                # NNO if the orignal mapper has no append (because it's not a
                # chainmapper, for example), we make our own chainmapper.
                feat_sel_mapper = StaticFeatureSelection(
                                    roi_ids, dshape=dataset.shape[1:])
                if hasattr(mapper, 'append'):
                    mapper.append(feat_sel_mapper)
                else:
                    mapper = ChainMapper([dataset.a.mapper,
                                          feat_sel_mapper])

                result_ds.a['mapper'] = mapper

        # store the center ids as a feature attribute
        result_ds.fa['center_ids'] = roi_ids

        return result_ds

Exemple #6

0

Afficher le fichier

Fichier : base.py Projet : Arthurkorn/PyMVPA

    def __getitem__(self, args):
        # uniformize for checks below; it is not a tuple if just single slicing
        # spec is passed
        if not isinstance(args, tuple):
            args = (args,)

        # if we get an slicing array for feature selection and it is *not* 1D
        # try feeding it through the mapper (if there is any)
        if len(args) > 1 and isinstance(args[1], np.ndarray) \
           and len(args[1].shape) > 1 \
           and self.a.has_key('mapper'):
            args = list(args)
            args[1] = self.a.mapper.forward1(args[1])

        # check if any of the args is a dict, which would require fancy selection
        args_ = []
        for i, arg in enumerate(args):
            if isinstance(arg, dict):
                col = (self.sa, self.fa)[i]
                args_.append(col.match(arg))
            else:
                args_.append(arg)
        args = tuple(args_)

        # let the base do the work
        ds = super(Dataset, self).__getitem__(args)

        # and adjusting the mapper (if any)
        if len(args) > 1 and 'mapper' in ds.a:
            # create matching mapper
            # the mapper is just appended to the dataset. It could also be
            # actually used to perform the slicing and prevent duplication of
            # functionality between the Dataset.__getitem__ and the mapper.
            # However, __getitem__ is sometimes more efficient, since it can
            # slice samples and feature axis at the same time. Moreover, the
            # mvpa2.base.dataset.Dataset has no clue about mappers and should
            # be fully functional without them.
            subsetmapper = StaticFeatureSelection(args[1],
                                              dshape=self.samples.shape[1:])
            # do not-act forward mapping to charge the output shape of the
            # slice mapper without having it to train on a full dataset (which
            # is most likely more expensive)
            subsetmapper.forward(np.zeros((1,) + self.shape[1:], dtype='bool'))
            # mapper is ready to use -- simply store
            ds._append_mapper(subsetmapper)

        return ds

Exemple #7

0

Afficher le fichier

    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                        externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
        else:
            roi_ids = np.arange(dataset.nfeatures)

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                mapper.append(
                    StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results

Exemple #8

0

Afficher le fichier

Fichier : test_arraymapper.py Projet : lydiawawa/Machine-Learning

def test_selects():
    mask = np.ones((3, 2), dtype='bool')
    mask[1, 1] = 0
    mask0 = mask.copy()
    data = np.arange(6).reshape(mask.shape)
    map_ = mask_mapper(mask)

    # check if any exception is thrown if we get
    # out of the outIds
    #assert_raises(IndexError, map_.select_out, [0,1,2,6])

    # remove 1,2
    map_.append(StaticFeatureSelection([0, 3, 4]))
    assert_array_equal(map_.forward1(data), [0, 4, 5])
    # remove 1 more
    map_.append(StaticFeatureSelection([0, 2]))
    assert_array_equal(map_.forward1(data), [0, 5])

    # check if original mask wasn't perturbed
    assert_array_equal(mask, mask0)

    # check if original mask wasn't perturbed
    assert_array_equal(mask, mask0)

Exemple #9

0

Afficher le fichier

Fichier : test_mapper.py Projet : pckillerbrici/PyMVPA

def test_subset_filler():
    sm = StaticFeatureSelection(np.arange(3))
    sm_f0 = StaticFeatureSelection(np.arange(3), filler=0)
    sm_fm1 = StaticFeatureSelection(np.arange(3), filler= -1)
    sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))

Exemple #10

0

Afficher le fichier

Fichier : flatten.py Projet : Soletmons/PyMVPA

def mask_mapper(mask=None, shape=None, space=None):
    """Factory method to create a chain of Flatten+StaticFeatureSelection Mappers

    Parameters
    ----------
    mask : None or array
      an array in the original dataspace and its nonzero elements are
      used to define the features included in the dataset. Alternatively,
      the `shape` argument can be used to define the array dimensions.
    shape : None or tuple
      The shape of the array to be mapped. If `shape` is provided instead
      of `mask`, a full mask (all True) of the desired shape is
      constructed. If `shape` is specified in addition to `mask`, the
      provided mask is extended to have the same number of dimensions.
    inspace
      Provided to `FlattenMapper`
    """
    if mask is None:
        if shape is None:
            raise ValueError, \
                  "Either `shape` or `mask` have to be specified."
        else:
            # make full dataspace mask if nothing else is provided
            mask = np.ones(shape, dtype='bool')
    else:
        if not shape is None:
            # expand mask to span all dimensions but first one
            # necessary e.g. if only one slice from timeseries of volumes is
            # requested.
            mask = np.array(mask, copy=False, subok=True, ndmin=len(shape))
            # check for compatibility
            if not shape == mask.shape:
                raise ValueError, \
                    "The mask dataspace shape %s is not " \
                    "compatible with the provided shape %s." \
                    % (mask.shape, shape)

    fm = FlattenMapper(shape=mask.shape, space=space)
    flatmask = fm.forward1(mask)
    mapper = ChainMapper([
        fm,
        StaticFeatureSelection(flatmask,
                               dshape=flatmask.shape,
                               oshape=(len(flatmask.nonzero()[0]), ))
    ])
    return mapper

Exemple #11

0

Afficher le fichier

Fichier : ifs.py Projet : Python3pkg/PyMVPA

    def _train(self, ds):
        # local binding
        fmeasure = self._fmeasure
        fselector = self._fselector
        scriterion = self._stopping_criterion
        bestdetector = self._bestdetector

        # init
        # Computed error for each tested features set.
        errors = []
        # feature candidate are all features in the pattern object
        candidates = list(range(ds.nfeatures))
        # initially empty list of selected features
        selected = []
        # results in here please
        results = None

        # as long as there are candidates left
        # the loop will most likely get broken earlier if the stopping
        # criterion is reached
        while len(candidates):
            # measures for all candidates
            measures = []

            # for all possible candidates
            for i, candidate in enumerate(candidates):
                if __debug__:
                    debug('IFSC', "Tested %i" % i, cr=True)

                # take the new candidate and all already selected features
                # select a new temporay feature subset from the dataset
                # slice the full dataset, because for the initial iteration
                # steps this will be much mure effecient than splitting the
                # full ds into train and test at first
                fslm = StaticFeatureSelection(selected + [candidate])
                fslm.train(ds)
                candidate_ds = fslm(ds)
                # activate the dataset splitter
                dsgen = self._splitter.generate(candidate_ds)
                # and derived the dataset part that is used for computing the selection
                # criterion
                trainds = next(dsgen)
                # compute data measure on the training part of this feature set
                measures.append(fmeasure(trainds))

            # relies on ds.item() to work properly
            measures = [np.asscalar(m) for m in measures]

            # Select promissing feature candidates (staging)
            # IDs are only applicable to the current set of feature candidates
            tmp_staging_ids = fselector(measures)

            # translate into real candidate ids
            staging_ids = [candidates[i] for i in tmp_staging_ids]

            # mark them as selected and remove from candidates
            selected += staging_ids
            for i in staging_ids:
                candidates.remove(i)

            # actually run the performance measure to estimate "quality" of
            # selection
            fslm = StaticFeatureSelection(selected)
            fslm.train(ds)
            selectedds = fslm(ds)
            # split into train and test part
            trainds, testds = self._get_traintest_ds(selectedds)
            # evaluate and store
            error = self._evaluate_pmeasure(trainds, testds)
            errors.append(np.asscalar(error))
            # intermediate cleanup, so the datasets do not hand around while
            # the next candidate evaluation is computed
            del trainds
            del testds

            # Check if it is time to stop and if we got
            # the best result
            stop = scriterion(errors)
            isthebest = bestdetector(errors)

            if __debug__:
                debug('IFSC',
                      "nselected %i; error: %.4f " \
                      "best/stop=%d/%d\n" \
                      % (len(selected), errors[-1], isthebest, stop),
                      cr=True, lf=True)

            if isthebest:
                # announce desired features to the underlying slice mapper
                # do copy to survive later selections
                self._safe_assign_slicearg(copy(selected))

            # leave the loop when the criterion is reached
            if stop:
                break

        # charge state
        self.ca.errors = errors

Exemple #12

0

Afficher le fichier

def test_flatten():
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape).view(myarray)
    pristinedata = data.copy()
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target).view(myarray)
    index_target = np.array([[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3],
                            [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3],
                            [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 0, 3],
                            [1, 1, 0], [1, 1, 1], [1, 1, 2], [1, 1, 3]])

    # test only flattening the first two dimensions
    fm_max = FlattenMapper(maxdims=2)
    fm_max.train(data)
    assert_equal(fm_max(data).shape, (4, 4, 4))

    # array subclass survives
    ok_(isinstance(data, myarray))

    # actually, there should be no difference between a plain FlattenMapper and
    # a chain that only has a FlattenMapper as the one element
    for fm in [FlattenMapper(space='voxel'),
               ChainMapper([FlattenMapper(space='voxel'),
                            StaticFeatureSelection(slice(None))])]:
        # not working if untrained
        assert_raises(RuntimeError,
                      fm.forward1,
                      np.arange(np.sum(samples_shape) + 1))

        fm.train(data)

        ok_(isinstance(fm.forward(data), myarray))
        ok_(isinstance(fm.forward1(data[2]), myarray))
        assert_array_equal(fm.forward(data), target)
        assert_array_equal(fm.forward1(data[2]), target[2])
        assert_raises(ValueError, fm.forward, np.arange(4))

        # all of that leaves that data unmodified
        assert_array_equal(data, pristinedata)

        # reverse mapping
        ok_(isinstance(fm.reverse(target), myarray))
        ok_(isinstance(fm.reverse1(target[0]), myarray))
        ok_(isinstance(fm.reverse(target[1:2]), myarray))
        assert_array_equal(fm.reverse(target), data)
        assert_array_equal(fm.reverse1(target[0]), data[0])
        assert_array_equal(fm.reverse1(target[0]),
                           _verified_reverse1(fm, target[0]))
        assert_array_equal(fm.reverse(target[1:2]), data[1:2])
        assert_raises(ValueError, fm.reverse, np.arange(14))

        # check one dimensional data, treated as scalar samples
        oned = np.arange(5)
        fm.train(Dataset(oned))
        # needs 2D
        assert_raises(ValueError, fm.forward, oned)
        # doesn't match mapper, since Dataset turns `oned` into (5,1)
        assert_raises(ValueError, fm.forward, oned)
        assert_equal(Dataset(oned).nfeatures, 1)

        # try dataset mode, with some feature attribute
        fattr = np.arange(np.prod(samples_shape)).reshape(samples_shape)
        ds = Dataset(data, fa={'awesome': fattr.copy()})
        assert_equal(ds.samples.shape, data_shape)
        fm.train(ds)
        dsflat = fm.forward(ds)
        ok_(isinstance(dsflat, Dataset))
        ok_(isinstance(dsflat.samples, myarray))
        assert_array_equal(dsflat.samples, target)
        assert_array_equal(dsflat.fa.awesome, np.arange(np.prod(samples_shape)))
        assert_true(isinstance(dsflat.fa['awesome'], ArrayCollectable))
        # test index creation
        assert_array_equal(index_target, dsflat.fa.voxel)

        # and back
        revds = fm.reverse(dsflat)
        ok_(isinstance(revds, Dataset))
        ok_(isinstance(revds.samples, myarray))
        assert_array_equal(revds.samples, data)
        assert_array_equal(revds.fa.awesome, fattr)
        assert_true(isinstance(revds.fa['awesome'], ArrayCollectable))
        assert_false('voxel' in revds.fa)

Exemple #13

0

Afficher le fichier

def test_chainmapper():
    # the chain needs at lest one mapper
    assert_raises(ValueError, ChainMapper, [])
    # a typical first mapper is to flatten
    cm = ChainMapper([FlattenMapper()])

    # few container checks
    assert_equal(len(cm), 1)
    assert_true(isinstance(cm[0], FlattenMapper))

    # now training
    # come up with data
    samples_shape = (2, 2, 4)
    data_shape = (4,) + samples_shape
    data = np.arange(np.prod(data_shape)).reshape(data_shape)
    target = [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
              [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
              [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]]
    target = np.array(target)

    # if it is not trained it knows nothing
    cm.train(data)

    # a new mapper should appear when doing feature selection
    cm.append(StaticFeatureSelection(list(range(1, 16))))
    assert_equal(cm.forward1(data[0]).shape, (15,))
    assert_equal(len(cm), 2)
    # multiple slicing
    cm.append(StaticFeatureSelection([9, 14]))
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # check reproduction
    if __debug__:
        # debug mode needs special test as it enhances the repr output
        # with module info and id() appendix for objects
        import mvpa2
        cm_clone = eval(repr(cm))
        assert_equal('#'.join(repr(cm_clone).split('#')[:-1]),
                     '#'.join(repr(cm).split('#')[:-1]))
    else:
        cm_clone = eval(repr(cm))
        assert_equal(repr(cm_clone), repr(cm))

    # what happens if we retrain the whole beast an same data as before
    cm.train(data)
    assert_equal(cm.forward1(data[0]).shape, (2,))
    assert_equal(len(cm), 3)

    # let's map something
    mdata = cm.forward(data)
    assert_array_equal(mdata, target[:, [10, 15]])
    # and back
    rdata = cm.reverse(mdata)
    # original shape
    assert_equal(rdata.shape, data.shape)
    # content as far it could be restored
    assert_array_equal(rdata[rdata > 0], data[rdata > 0])
    assert_equal(np.sum(rdata > 0), 8)

    # Lets construct a dataset with mapper assigned and see
    # if sub-selecting a feature adjusts trailing StaticFeatureSelection
    # appropriately
    ds_subsel = Dataset.from_wizard(data, mapper=cm)[:, 1]
    tail_sfs = ds_subsel.a.mapper[-1]
    assert_equal(repr(tail_sfs), 'StaticFeatureSelection(slicearg=array([14]))')

Exemple #14

0

Afficher le fichier

def test_subset_filler():
    sm = StaticFeatureSelection(np.arange(3))
    sm_f0 = StaticFeatureSelection(np.arange(3), filler=0)
    sm_fm1 = StaticFeatureSelection(np.arange(3), filler=-1)
    sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))

Exemple #15

0

Afficher le fichier

def test_subset():
    data = np.array(
            [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
            [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
            [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
            [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]])
    # float array doesn't work
    sm = StaticFeatureSelection(np.ones(16))
    assert_raises(IndexError, sm.forward, data)

    # full mask
    sm = StaticFeatureSelection(slice(None))
    # should not change single samples
    assert_array_equal(sm.forward(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.forward(data.copy()), data)
    sm.train(data)
    # same on reverse
    assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.reverse(data.copy()), data)

    # identical mappers
    sm_none = StaticFeatureSelection(slice(None))
    sm_int = StaticFeatureSelection(np.arange(16))
    sm_bool = StaticFeatureSelection(np.ones(16, dtype='bool'))
    sms = [sm_none, sm_int, sm_bool]

    # test subsets
    sids = [3, 4, 5, 6]
    bsubset = np.zeros(16, dtype='bool')
    bsubset[sids] = True
    subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]]
    # all test subset result in equivalent masks, hence should do the same to
    # the mapper and result in identical behavior
    for st in sms:
        for i, sub in enumerate(subsets):
            # shallow copy
            orig = copy(st)
            subsm = StaticFeatureSelection(sub)
            # should do copy-on-write for all important stuff!!
            orig += subsm
            # test if selection did its job
            if i == 3:
                # special case of multiplying features
                assert_array_equal(orig.forward1(data[0].copy()), subsets[i])
            else:
                assert_array_equal(orig.forward1(data[0].copy()), sids)

    ## all of the above shouldn't change the original mapper
    #assert_array_equal(sm.get_mask(), np.arange(16))

    # check for some bug catcher
    # no 3D input
    #assert_raises(IndexError, sm.forward, np.ones((3,2,1)))
    # no input of wrong length
    if __debug__:
        # checked only in __debug__
        assert_raises(ValueError, sm.forward, np.ones(4))
    # same on reverse
    #assert_raises(ValueError, sm.reverse, np.ones(16))
    # invalid ids
    #assert_false(subsm.is_valid_inid(-1))
    #assert_false(subsm.is_valid_inid(16))

    # intended merge failures
    fsm = StaticFeatureSelection(np.arange(16))
    assert_equal(fsm.__iadd__(None), NotImplemented)
    assert_equal(fsm.__iadd__(Dataset([2, 3, 4])), NotImplemented)

Exemple #16

0

Afficher le fichier

Fichier : ifs.py Projet : Anhmike/PyMVPA

    def _train(self, ds):
        # local binding
        fmeasure = self._fmeasure
        fselector = self._fselector
        scriterion = self._stopping_criterion
        bestdetector = self._bestdetector

        # init
        # Computed error for each tested features set.
        errors = []
        # feature candidate are all features in the pattern object
        candidates = range(ds.nfeatures)
        # initially empty list of selected features
        selected = []
        # results in here please
        results = None

        # as long as there are candidates left
        # the loop will most likely get broken earlier if the stopping
        # criterion is reached
        while len(candidates):
            # measures for all candidates
            measures = []

            # for all possible candidates
            for i, candidate in enumerate(candidates):
                if __debug__:
                    debug('IFSC', "Tested %i" % i, cr=True)

                # take the new candidate and all already selected features
                # select a new temporay feature subset from the dataset
                # slice the full dataset, because for the initial iteration
                # steps this will be much mure effecient than splitting the
                # full ds into train and test at first
                fslm = StaticFeatureSelection(selected + [candidate])
                fslm.train(ds)
                candidate_ds = fslm(ds)
                # activate the dataset splitter
                dsgen = self._splitter.generate(candidate_ds)
                # and derived the dataset part that is used for computing the selection
                # criterion
                trainds = dsgen.next()
                # compute data measure on the training part of this feature set
                measures.append(fmeasure(trainds))

            # relies on ds.item() to work properly
            measures = [np.asscalar(m) for m in measures]

            # Select promissing feature candidates (staging)
            # IDs are only applicable to the current set of feature candidates
            tmp_staging_ids = fselector(measures)

            # translate into real candidate ids
            staging_ids = [candidates[i] for i in tmp_staging_ids]

            # mark them as selected and remove from candidates
            selected += staging_ids
            for i in staging_ids:
                candidates.remove(i)

            # actually run the performance measure to estimate "quality" of
            # selection
            fslm = StaticFeatureSelection(selected)
            fslm.train(ds)
            selectedds = fslm(ds)
            # split into train and test part
            trainds, testds = self._get_traintest_ds(selectedds)
            # evaluate and store
            error = self._evaluate_pmeasure(trainds, testds)
            errors.append(np.asscalar(error))
            # intermediate cleanup, so the datasets do not hand around while
            # the next candidate evaluation is computed
            del trainds
            del testds

            # Check if it is time to stop and if we got
            # the best result
            stop = scriterion(errors)
            isthebest = bestdetector(errors)

            if __debug__:
                debug('IFSC',
                      "nselected %i; error: %.4f " \
                      "best/stop=%d/%d\n" \
                      % (len(selected), errors[-1], isthebest, stop),
                      cr=True, lf=True)

            if isthebest:
                # announce desired features to the underlying slice mapper
                # do copy to survive later selections
                self._safe_assign_slicearg(copy(selected))

            # leave the loop when the criterion is reached
            if stop:
                break

        # charge state
        self.ca.errors = errors

Exemple #17

0

Afficher le fichier

Fichier : searchlight.py Projet : atiroms/PyMVPA

    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                qe_ids = self._queryengine.ids # known to qe
                if not set(qe_ids).issuperset(roi_ids):
                    raise IndexError(
                          "Some roi_ids are not known to the query engine %s: %s"
                          % (self._queryengine,
                             set(roi_ids).difference(qe_ids)))
        else:
            roi_ids = self._queryengine.ids

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)

                # NNO if the orignal mapper has no append (because it's not a
                # chainmapper, for example), we make our own chainmapper.
                #
                # THe original code was:
                # mapper.append(StaticFeatureSelection(roi_ids,
                #                                     dshape=dataset.shape[1:]))
                feat_sel_mapper = StaticFeatureSelection(roi_ids,
                                                     dshape=dataset.shape[1:])
                if 'append' in dir(mapper):
                    mapper.append(feat_sel_mapper)
                else:
                    mapper = ChainMapper([dataset.a.mapper,
                                          feat_sel_mapper])

                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results
        # return raw results, base-class will take care of transformations
        return results

Exemple #18

0

Afficher le fichier

Fichier : test_mapper.py Projet : pckillerbrici/PyMVPA

def test_subset():
    data = np.array(
            [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
            [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
            [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
            [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]])
    # float array doesn't work
    sm = StaticFeatureSelection(np.ones(16))
    assert_raises(IndexError, sm.forward, data)

    # full mask
    sm = StaticFeatureSelection(slice(None))
    # should not change single samples
    assert_array_equal(sm.forward(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.forward(data.copy()), data)
    sm.train(data)
    # same on reverse
    assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.reverse(data.copy()), data)

    # identical mappers
    sm_none = StaticFeatureSelection(slice(None))
    sm_int = StaticFeatureSelection(np.arange(16))
    sm_bool = StaticFeatureSelection(np.ones(16, dtype='bool'))
    sms = [sm_none, sm_int, sm_bool]

    # test subsets
    sids = [3, 4, 5, 6]
    bsubset = np.zeros(16, dtype='bool')
    bsubset[sids] = True
    subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]]
    # all test subset result in equivalent masks, hence should do the same to
    # the mapper and result in identical behavior
    for st in sms:
        for i, sub in enumerate(subsets):
            # shallow copy
            orig = copy(st)
            subsm = StaticFeatureSelection(sub)
            # should do copy-on-write for all important stuff!!
            orig += subsm
            # test if selection did its job
            if i == 3:
                # special case of multiplying features
                assert_array_equal(orig.forward1(data[0].copy()), subsets[i])
            else:
                assert_array_equal(orig.forward1(data[0].copy()), sids)

    ## all of the above shouldn't change the original mapper
    #assert_array_equal(sm.get_mask(), np.arange(16))

    # check for some bug catcher
    # no 3D input
    #assert_raises(IndexError, sm.forward, np.ones((3,2,1)))
    # no input of wrong length
    if __debug__:
        # checked only in __debug__
        assert_raises(ValueError, sm.forward, np.ones(4))
    # same on reverse
    #assert_raises(ValueError, sm.reverse, np.ones(16))
    # invalid ids
    #assert_false(subsm.is_valid_inid(-1))
    #assert_false(subsm.is_valid_inid(16))

    # intended merge failures
    fsm = StaticFeatureSelection(np.arange(16))
    assert_equal(fsm.__iadd__(None), NotImplemented)
    assert_equal(fsm.__iadd__(Dataset([2, 3, 4])), NotImplemented)