Esempio n. 1
0
    def test_multiclass_classifier(self, clf):
        oldC = None
        # XXX somewhat ugly way to force non-dataspecific C value.
        # Otherwise multiclass libsvm builtin and our MultiClass would differ
        # in results
        if clf.params.has_key('C') and clf.params.C<0:
            oldC = clf.params.C
            clf.params.C = 1.0                 # reset C to be 1

        svm, svm2 = clf, clf.clone()
        svm2.ca.enable(['training_stats'])

        mclf = MulticlassClassifier(clf=svm,
                                   enable_ca=['training_stats'])

        svm2.train(datasets['uni2small'])
        mclf.train(datasets['uni2small'])
        s1 = str(mclf.ca.training_stats)
        s2 = str(svm2.ca.training_stats)
        self.failUnlessEqual(s1, s2,
            msg="Multiclass clf should provide same results as built-in "
                "libsvm's %s. Got %s and %s" % (svm2, s1, s2))

        svm2.untrain()

        self.failUnless(svm2.trained == False,
            msg="Un-Trained SVM should be untrained")

        self.failUnless(np.array([x.trained for x in mclf.clfs]).all(),
            msg="Trained Boosted classifier should have all primary classifiers trained")
        self.failUnless(mclf.trained,
            msg="Trained Boosted classifier should be marked as trained")

        mclf.untrain()

        self.failUnless(not mclf.trained,
                        msg="UnTrained Boosted classifier should not be trained")
        self.failUnless(not np.array([x.trained for x in mclf.clfs]).any(),
            msg="UnTrained Boosted classifier should have no primary classifiers trained")

        if oldC is not None:
            clf.params.C = oldC
Esempio n. 2
0
    def test_multiclass_classifier(self, clf):
        oldC = None
        # XXX somewhat ugly way to force non-dataspecific C value.
        # Otherwise multiclass libsvm builtin and our MultiClass would differ
        # in results
        if clf.params.has_key('C') and clf.params.C < 0:
            oldC = clf.params.C
            clf.params.C = 1.0  # reset C to be 1

        svm, svm2 = clf, clf.clone()
        svm2.ca.enable(['training_confusion'])

        mclf = MulticlassClassifier(clf=svm, enable_ca=['training_confusion'])

        svm2.train(datasets['uni2small_train'])
        mclf.train(datasets['uni2small_train'])
        s1 = str(mclf.ca.training_confusion)
        s2 = str(svm2.ca.training_confusion)
        self.failUnlessEqual(
            s1,
            s2,
            msg="Multiclass clf should provide same results as built-in "
            "libsvm's %s. Got %s and %s" % (svm2, s1, s2))

        svm2.untrain()

        self.failUnless(svm2.trained == False,
                        msg="Un-Trained SVM should be untrained")

        self.failUnless(
            np.array([x.trained for x in mclf.clfs]).all(),
            msg=
            "Trained Boosted classifier should have all primary classifiers trained"
        )
        self.failUnless(
            mclf.trained,
            msg="Trained Boosted classifier should be marked as trained")

        mclf.untrain()

        self.failUnless(
            not mclf.trained,
            msg="UnTrained Boosted classifier should not be trained")
        self.failUnless(
            not np.array([x.trained for x in mclf.clfs]).any(),
            msg=
            "UnTrained Boosted classifier should have no primary classifiers trained"
        )

        if oldC is not None:
            clf.params.C = oldC
    def __test_fspipeline_with_split_classifier(self, basic_clf):
        #basic_clf = LinearNuSVMC()
        multi_clf = MulticlassClassifier(clf=basic_clf)
        #svm_weigths = LinearSVMWeights(svm)

        # Proper RFE: aggregate sensitivities across multiple splits,
        # but also due to multi class those need to be aggregated
        # somehow. Transfer error here should be 'leave-1-out' error
        # of split classifier itself
        sclf = SplitClassifier(clf=basic_clf)
        rfe = RFE(sensitivity_analyzer=sclf.get_sensitivity_analyzer(
            enable_ca=["sensitivities"]),
                  transfer_error=trans_error,
                  feature_selector=FeatureSelectionPipeline([
                      FractionTailSelector(0.5),
                      FixedNElementTailSelector(1)
                  ]),
                  train_clf=True)

        # and we get sensitivity analyzer which works on splits and uses
        # sensitivity
        selected_features = rfe(self.dataset)
Esempio n. 4
0
#  - Nu-classifiers are turned off since for haxby DS default nu
#    is an 'infisible' one
#  - Python's SMLR is turned off for the duration of development
#    since it is slow and results should be the same as of C version
#
clfswh += [
    SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
    SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
    #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"),
    #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"),
    #SMLR(implementation="Python", descr="SMLR(Python)")
]

clfswh += \
     [ MulticlassClassifier(clfswh['smlr'][0],
                            descr='Pairs+maxvote multiclass on ' + \
                            clfswh['smlr'][0].descr) ]

if externals.exists('libsvm'):
    from mvpa.clfs import libsvmc as libsvm
    clfswh._known_tags.union_update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())
    clfswh += [
        libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        libsvm.SVM(C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
        libsvm.SVM(C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
        libsvm.SVM(svm_impl='NU_SVC',
                   descr="libsvm.LinNuSVM(nu=def)",
                   probability=1)
    ]
    clfswh += [
        libsvm.SVM(kernel=RbfLSKernel(), descr="libsvm.RbfSVM()"),
Esempio n. 5
0
    def test_multiclass_classifier(self, clf):
        oldC = None
        # XXX somewhat ugly way to force non-dataspecific C value.
        # Otherwise multiclass libsvm builtin and our MultiClass would differ
        # in results
        if clf.params.has_key('C') and clf.params.C<0:
            oldC = clf.params.C
            clf.params.C = 1.0                 # reset C to be 1

        svm, svm2 = clf, clf.clone()
        svm2.ca.enable(['training_stats'])

        mclf = MulticlassClassifier(clf=svm,
                                   enable_ca=['training_stats'])

        # with explicit MaximalVote with the conditional attributes
        # enabled
        mclf_mv = MulticlassClassifier(clf=svm,
                                       combiner=MaximalVote(enable_ca=['estimates', 'predictions']),
                                       enable_ca=['training_stats'])

        for clf_ in svm2, mclf, mclf_mv:
            clf_.train(datasets['uni2small'])
        s1 = str(mclf.ca.training_stats)
        s2 = str(svm2.ca.training_stats)
        s3 = str(mclf_mv.ca.training_stats)
        self.failUnlessEqual(s1, s2,
            msg="Multiclass clf should provide same results as built-in "
                "libsvm's %s. Got %s and %s" % (svm2, s1, s2))
        self.failUnlessEqual(s1, s3,
            msg="%s should have used maxvote resolver by default"
                "so results should have been identical. Got %s and %s"
                % (mclf, s1, s3))

        assert_equal(len(mclf_mv.combiner.ca.estimates),
                     len(mclf_mv.combiner.ca.predictions))

        # They should have came from assessing training_stats ca being
        # enabled
        # recompute accuracy on predictions for training_stats
        training_acc = np.sum(mclf_mv.combiner.ca.predictions ==
                              datasets['uni2small'].targets) \
                              / float(len(datasets['uni2small']))
        # should match
        assert_equal(mclf_mv.ca.training_stats.stats['ACC'], training_acc)

        svm2.untrain()

        self.failUnless(svm2.trained == False,
            msg="Un-Trained SVM should be untrained")

        self.failUnless(np.array([x.trained for x in mclf.clfs]).all(),
            msg="Trained Boosted classifier should have all primary classifiers trained")
        self.failUnless(mclf.trained,
            msg="Trained Boosted classifier should be marked as trained")

        mclf.untrain()

        self.failUnless(not mclf.trained,
                        msg="UnTrained Boosted classifier should not be trained")
        self.failUnless(not np.array([x.trained for x in mclf.clfs]).any(),
            msg="UnTrained Boosted classifier should have no primary classifiers trained")

        if oldC is not None:
            clf.params.C = oldC