Python StaticFeatureSelection.train 예제들, mvpa2.featsel.base.StaticFeatureSelection.train Python 예제들

예제 #1

0

파일 보기

def test_static_reverse_doesnt_work_after_feature_selection_tuneup_1():
    ds_orig = datasets['uni2small'].copy()  # doesn't matter which

    m = StaticFeatureSelection(np.arange(4))
    m.train(ds_orig)
    ds = ds_orig.get_mapped(m)
    ds0_rev = ds.a.mapper.reverse1(ds.samples[0])  # should work
    assert_equal(ds0_rev.shape, (ds_orig.nfeatures,))

    # direct feature selection
    ds_ = ds[:, [0, 2]]
    # should work but doesn't due to
    # RuntimeError: Cannot reverse-map data since the original data shape is unknown. Either set `dshape` in the constructor, or call train().
    ds0_rev_ = ds_.a.mapper.reverse1(ds_.samples[0])
    #ds0_rev_ = _verified_reverse1(ds_.a.mapper, ds_.samples[0])
    assert_equal(ds0_rev_.shape, (ds_orig.nfeatures,))

예제 #2

0

파일 보기

def test_subset_filler():
    sm = StaticFeatureSelection(np.arange(3))
    sm_f0 = StaticFeatureSelection(np.arange(3), filler=0)
    sm_fm1 = StaticFeatureSelection(np.arange(3), filler=-1)
    sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))

예제 #3

0

파일 보기

파일: test_mapper.py 프로젝트: pckillerbrici/PyMVPA

def test_subset_filler():
    sm = StaticFeatureSelection(np.arange(3))
    sm_f0 = StaticFeatureSelection(np.arange(3), filler=0)
    sm_fm1 = StaticFeatureSelection(np.arange(3), filler= -1)
    sm_fnan = StaticFeatureSelection(np.arange(3), filler=np.nan)
    data = np.arange(12).astype(float).reshape((2, -1))

    sm.train(data)
    data_forwarded = sm.forward(data)

    for m in (sm, sm_f0, sm_fm1, sm_fnan):
        m.train(data)
        assert_array_equal(data_forwarded, m.forward(data))

    data_back_fm1 = sm_fm1.reverse(data_forwarded)
    ok_(np.all(data_back_fm1[:, 3:] == -1))
    data_back_fnan = sm_fnan.reverse(data_forwarded)
    ok_(np.all(np.isnan(data_back_fnan[:, 3:])))

예제 #4

0

파일 보기

def test_subset():
    data = np.array(
            [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
            [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
            [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
            [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]])
    # float array doesn't work
    sm = StaticFeatureSelection(np.ones(16))
    assert_raises(IndexError, sm.forward, data)

    # full mask
    sm = StaticFeatureSelection(slice(None))
    # should not change single samples
    assert_array_equal(sm.forward(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.forward(data.copy()), data)
    sm.train(data)
    # same on reverse
    assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.reverse(data.copy()), data)

    # identical mappers
    sm_none = StaticFeatureSelection(slice(None))
    sm_int = StaticFeatureSelection(np.arange(16))
    sm_bool = StaticFeatureSelection(np.ones(16, dtype='bool'))
    sms = [sm_none, sm_int, sm_bool]

    # test subsets
    sids = [3, 4, 5, 6]
    bsubset = np.zeros(16, dtype='bool')
    bsubset[sids] = True
    subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]]
    # all test subset result in equivalent masks, hence should do the same to
    # the mapper and result in identical behavior
    for st in sms:
        for i, sub in enumerate(subsets):
            # shallow copy
            orig = copy(st)
            subsm = StaticFeatureSelection(sub)
            # should do copy-on-write for all important stuff!!
            orig += subsm
            # test if selection did its job
            if i == 3:
                # special case of multiplying features
                assert_array_equal(orig.forward1(data[0].copy()), subsets[i])
            else:
                assert_array_equal(orig.forward1(data[0].copy()), sids)

    ## all of the above shouldn't change the original mapper
    #assert_array_equal(sm.get_mask(), np.arange(16))

    # check for some bug catcher
    # no 3D input
    #assert_raises(IndexError, sm.forward, np.ones((3,2,1)))
    # no input of wrong length
    if __debug__:
        # checked only in __debug__
        assert_raises(ValueError, sm.forward, np.ones(4))
    # same on reverse
    #assert_raises(ValueError, sm.reverse, np.ones(16))
    # invalid ids
    #assert_false(subsm.is_valid_inid(-1))
    #assert_false(subsm.is_valid_inid(16))

    # intended merge failures
    fsm = StaticFeatureSelection(np.arange(16))
    assert_equal(fsm.__iadd__(None), NotImplemented)
    assert_equal(fsm.__iadd__(Dataset([2, 3, 4])), NotImplemented)

예제 #5

0

파일 보기

파일: test_mapper.py 프로젝트: pckillerbrici/PyMVPA

def test_subset():
    data = np.array(
            [[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
            [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
            [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
            [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]])
    # float array doesn't work
    sm = StaticFeatureSelection(np.ones(16))
    assert_raises(IndexError, sm.forward, data)

    # full mask
    sm = StaticFeatureSelection(slice(None))
    # should not change single samples
    assert_array_equal(sm.forward(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.forward(data.copy()), data)
    sm.train(data)
    # same on reverse
    assert_array_equal(sm.reverse(data[0:1].copy()), data[0:1])
    # or multi-samples
    assert_array_equal(sm.reverse(data.copy()), data)

    # identical mappers
    sm_none = StaticFeatureSelection(slice(None))
    sm_int = StaticFeatureSelection(np.arange(16))
    sm_bool = StaticFeatureSelection(np.ones(16, dtype='bool'))
    sms = [sm_none, sm_int, sm_bool]

    # test subsets
    sids = [3, 4, 5, 6]
    bsubset = np.zeros(16, dtype='bool')
    bsubset[sids] = True
    subsets = [sids, slice(3, 7), bsubset, [3, 3, 4, 4, 6, 6, 6, 5]]
    # all test subset result in equivalent masks, hence should do the same to
    # the mapper and result in identical behavior
    for st in sms:
        for i, sub in enumerate(subsets):
            # shallow copy
            orig = copy(st)
            subsm = StaticFeatureSelection(sub)
            # should do copy-on-write for all important stuff!!
            orig += subsm
            # test if selection did its job
            if i == 3:
                # special case of multiplying features
                assert_array_equal(orig.forward1(data[0].copy()), subsets[i])
            else:
                assert_array_equal(orig.forward1(data[0].copy()), sids)

    ## all of the above shouldn't change the original mapper
    #assert_array_equal(sm.get_mask(), np.arange(16))

    # check for some bug catcher
    # no 3D input
    #assert_raises(IndexError, sm.forward, np.ones((3,2,1)))
    # no input of wrong length
    if __debug__:
        # checked only in __debug__
        assert_raises(ValueError, sm.forward, np.ones(4))
    # same on reverse
    #assert_raises(ValueError, sm.reverse, np.ones(16))
    # invalid ids
    #assert_false(subsm.is_valid_inid(-1))
    #assert_false(subsm.is_valid_inid(16))

    # intended merge failures
    fsm = StaticFeatureSelection(np.arange(16))
    assert_equal(fsm.__iadd__(None), NotImplemented)
    assert_equal(fsm.__iadd__(Dataset([2, 3, 4])), NotImplemented)

예제 #6

0

파일 보기

파일: ifs.py 프로젝트: Python3pkg/PyMVPA

    def _train(self, ds):
        # local binding
        fmeasure = self._fmeasure
        fselector = self._fselector
        scriterion = self._stopping_criterion
        bestdetector = self._bestdetector

        # init
        # Computed error for each tested features set.
        errors = []
        # feature candidate are all features in the pattern object
        candidates = list(range(ds.nfeatures))
        # initially empty list of selected features
        selected = []
        # results in here please
        results = None

        # as long as there are candidates left
        # the loop will most likely get broken earlier if the stopping
        # criterion is reached
        while len(candidates):
            # measures for all candidates
            measures = []

            # for all possible candidates
            for i, candidate in enumerate(candidates):
                if __debug__:
                    debug('IFSC', "Tested %i" % i, cr=True)

                # take the new candidate and all already selected features
                # select a new temporay feature subset from the dataset
                # slice the full dataset, because for the initial iteration
                # steps this will be much mure effecient than splitting the
                # full ds into train and test at first
                fslm = StaticFeatureSelection(selected + [candidate])
                fslm.train(ds)
                candidate_ds = fslm(ds)
                # activate the dataset splitter
                dsgen = self._splitter.generate(candidate_ds)
                # and derived the dataset part that is used for computing the selection
                # criterion
                trainds = next(dsgen)
                # compute data measure on the training part of this feature set
                measures.append(fmeasure(trainds))

            # relies on ds.item() to work properly
            measures = [np.asscalar(m) for m in measures]

            # Select promissing feature candidates (staging)
            # IDs are only applicable to the current set of feature candidates
            tmp_staging_ids = fselector(measures)

            # translate into real candidate ids
            staging_ids = [candidates[i] for i in tmp_staging_ids]

            # mark them as selected and remove from candidates
            selected += staging_ids
            for i in staging_ids:
                candidates.remove(i)

            # actually run the performance measure to estimate "quality" of
            # selection
            fslm = StaticFeatureSelection(selected)
            fslm.train(ds)
            selectedds = fslm(ds)
            # split into train and test part
            trainds, testds = self._get_traintest_ds(selectedds)
            # evaluate and store
            error = self._evaluate_pmeasure(trainds, testds)
            errors.append(np.asscalar(error))
            # intermediate cleanup, so the datasets do not hand around while
            # the next candidate evaluation is computed
            del trainds
            del testds

            # Check if it is time to stop and if we got
            # the best result
            stop = scriterion(errors)
            isthebest = bestdetector(errors)

            if __debug__:
                debug('IFSC',
                      "nselected %i; error: %.4f " \
                      "best/stop=%d/%d\n" \
                      % (len(selected), errors[-1], isthebest, stop),
                      cr=True, lf=True)

            if isthebest:
                # announce desired features to the underlying slice mapper
                # do copy to survive later selections
                self._safe_assign_slicearg(copy(selected))

            # leave the loop when the criterion is reached
            if stop:
                break

        # charge state
        self.ca.errors = errors

예제 #7

0

파일 보기

파일: ifs.py 프로젝트: Anhmike/PyMVPA

    def _train(self, ds):
        # local binding
        fmeasure = self._fmeasure
        fselector = self._fselector
        scriterion = self._stopping_criterion
        bestdetector = self._bestdetector

        # init
        # Computed error for each tested features set.
        errors = []
        # feature candidate are all features in the pattern object
        candidates = range(ds.nfeatures)
        # initially empty list of selected features
        selected = []
        # results in here please
        results = None

        # as long as there are candidates left
        # the loop will most likely get broken earlier if the stopping
        # criterion is reached
        while len(candidates):
            # measures for all candidates
            measures = []

            # for all possible candidates
            for i, candidate in enumerate(candidates):
                if __debug__:
                    debug('IFSC', "Tested %i" % i, cr=True)

                # take the new candidate and all already selected features
                # select a new temporay feature subset from the dataset
                # slice the full dataset, because for the initial iteration
                # steps this will be much mure effecient than splitting the
                # full ds into train and test at first
                fslm = StaticFeatureSelection(selected + [candidate])
                fslm.train(ds)
                candidate_ds = fslm(ds)
                # activate the dataset splitter
                dsgen = self._splitter.generate(candidate_ds)
                # and derived the dataset part that is used for computing the selection
                # criterion
                trainds = dsgen.next()
                # compute data measure on the training part of this feature set
                measures.append(fmeasure(trainds))

            # relies on ds.item() to work properly
            measures = [np.asscalar(m) for m in measures]

            # Select promissing feature candidates (staging)
            # IDs are only applicable to the current set of feature candidates
            tmp_staging_ids = fselector(measures)

            # translate into real candidate ids
            staging_ids = [candidates[i] for i in tmp_staging_ids]

            # mark them as selected and remove from candidates
            selected += staging_ids
            for i in staging_ids:
                candidates.remove(i)

            # actually run the performance measure to estimate "quality" of
            # selection
            fslm = StaticFeatureSelection(selected)
            fslm.train(ds)
            selectedds = fslm(ds)
            # split into train and test part
            trainds, testds = self._get_traintest_ds(selectedds)
            # evaluate and store
            error = self._evaluate_pmeasure(trainds, testds)
            errors.append(np.asscalar(error))
            # intermediate cleanup, so the datasets do not hand around while
            # the next candidate evaluation is computed
            del trainds
            del testds

            # Check if it is time to stop and if we got
            # the best result
            stop = scriterion(errors)
            isthebest = bestdetector(errors)

            if __debug__:
                debug('IFSC',
                      "nselected %i; error: %.4f " \
                      "best/stop=%d/%d\n" \
                      % (len(selected), errors[-1], isthebest, stop),
                      cr=True, lf=True)

            if isthebest:
                # announce desired features to the underlying slice mapper
                # do copy to survive later selections
                self._safe_assign_slicearg(copy(selected))

            # leave the loop when the criterion is reached
            if stop:
                break

        # charge state
        self.ca.errors = errors