Example #1
0
def test_BinaryFxFeatureMeasure(ds):
    if not isinstance(ds.samples, np.ndarray):
        return
    # some simple function
    f = lambda x, y: np.sum((x.T*y).T, axis=0)
    fx = BinaryFxFeaturewiseMeasure(f, uni=False, numeric=True)
    fx_uni = BinaryFxFeaturewiseMeasure(f, uni=True, numeric=True)
    out = fx(ds)
    out_uni = fx_uni(ds)
    assert(len(out) == 1)
    assert_array_almost_equal(out.samples, out_uni)
    assert_equal(out.fa, out_uni.fa)
    ok_(str(fx).startswith("<BinaryFxFeaturewiseMeasure: lambda x, y:"))
Example #2
0
    def test_SplitRFE(self):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20, nlabels=2, nfeatures=30,
                                         snr=1., nonbogus_features=[1,5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 4 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=4)

        rfeclf = MappedClassifier(
            clf, SplitRFE(clf,
                          partitioner,
                          fselector=FractionTailSelector(
                              0.2, mode='discard', tail='lower')))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(len(set(dataset.a.nonbogus_features).intersection(
                rfeclf.mapper.slicearg)) > 0)
        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)
Example #3
0
    def test_SplitRFE(self):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20,
                                         nlabels=2,
                                         nfeatures=30,
                                         snr=1.,
                                         nonbogus_features=[1, 5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 4 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=4)

        rfeclf = MappedClassifier(
            clf,
            SplitRFE(clf,
                     partitioner,
                     fselector=FractionTailSelector(0.2,
                                                    mode='discard',
                                                    tail='lower')))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(
            len(
                set(dataset.a.nonbogus_features).intersection(
                    rfeclf.mapper.slicearg)) > 0)
        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)
Example #4
0
    def test_SplitRFE(self, fmeasure):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20, nlabels=2, nfeatures=11,
                                         snr=1., nonbogus_features=[1, 5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 3 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=3)

        rfeclf = MappedClassifier(
            clf, SplitRFE(clf,
                          partitioner,
                          fselector=FractionTailSelector(
                              0.5, mode='discard', tail='lower'),
                          fmeasure=fmeasure,
                           # need to update only when using clf's sens anal
                          update_sensitivity=fmeasure is None))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(len(set(dataset.a.nonbogus_features).intersection(
                rfeclf.mapper.slicearg)) > 0)

        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)

        if externals.exists('joblib'):
            rfeclf.mapper.nproc = -1
            # compare results against the one ran in parallel
            _slicearg = rfeclf.mapper.slicearg
            _predictions = predictions
            rfeclf.train(dataset)
            predictions = rfeclf(dataset).samples
            assert_array_equal(predictions, _predictions)
            assert_array_equal(_slicearg, rfeclf.mapper.slicearg)

        # Test that we can collect stats from cas within cross-validation
        sensitivities = []
        nested_errors = []
        nested_nfeatures = []
        def store_me(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(force_train=False)(data)
            sensitivities.append(sens)
            nested_errors.append(node.measure.mapper.ca.nested_errors)
            nested_nfeatures.append(node.measure.mapper.ca.nested_nfeatures)
        cv = CrossValidation(rfeclf, NFoldPartitioner(count=1), callback=store_me,
                             enable_ca=['stats'])
        _ = cv(dataset)
        # just to make sure we collected them
        assert_equal(len(sensitivities), 1)
        assert_equal(len(nested_errors), 1)
        assert_equal(len(nested_nfeatures), 1)
Example #5
0
    def test_SplitRFE(self, fmeasure):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20,
                                         nlabels=2,
                                         nfeatures=11,
                                         snr=1.,
                                         nonbogus_features=[1, 5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 3 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=3)

        rfeclf = MappedClassifier(
            clf,
            SplitRFE(
                clf,
                partitioner,
                fselector=FractionTailSelector(0.5,
                                               mode='discard',
                                               tail='lower'),
                fmeasure=fmeasure,
                # need to update only when using clf's sens anal
                update_sensitivity=fmeasure is None))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(
            len(
                set(dataset.a.nonbogus_features).intersection(
                    rfeclf.mapper.slicearg)) > 0)

        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)

        if externals.exists('joblib'):
            rfeclf.mapper.nproc = -1
            # compare results against the one ran in parallel
            _slicearg = rfeclf.mapper.slicearg
            _predictions = predictions
            rfeclf.train(dataset)
            predictions = rfeclf(dataset).samples
            assert_array_equal(predictions, _predictions)
            assert_array_equal(_slicearg, rfeclf.mapper.slicearg)

        # Test that we can collect stats from cas within cross-validation
        sensitivities = []
        nested_errors = []
        nested_nfeatures = []

        def store_me(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(
                force_train=False)(data)
            sensitivities.append(sens)
            nested_errors.append(node.measure.mapper.ca.nested_errors)
            nested_nfeatures.append(node.measure.mapper.ca.nested_nfeatures)

        cv = CrossValidation(rfeclf,
                             NFoldPartitioner(count=1),
                             callback=store_me,
                             enable_ca=['stats'])
        _ = cv(dataset)
        # just to make sure we collected them
        assert_equal(len(sensitivities), 1)
        assert_equal(len(nested_errors), 1)
        assert_equal(len(nested_nfeatures), 1)