Example #1
0
    def test_multiclass_classifier(self, clf):
        # Force non-dataspecific C value.
        # Otherwise multiclass libsvm builtin and our MultiClass would differ
        # in results
        svm = clf.clone()                 # operate on clone to avoid side-effects
        if 'C' in svm.params and svm.params.C<0:
            svm.params.C = 1.0                 # reset C to be 1
        svm2 = svm.clone()
        svm2.ca.enable(['training_stats'])

        mclf = MulticlassClassifier(clf=svm, enable_ca=['training_stats'])

        # with explicit MaximalVote with the conditional attributes
        # enabled
        mclf_mv = MulticlassClassifier(clf=svm,
                                       combiner=MaximalVote(enable_ca=['estimates', 'predictions']),
                                       enable_ca=['training_stats'])

        ds_train = datasets['uni2small']
        for clf_ in svm2, mclf, mclf_mv:
            clf_.train(ds_train)
        s1 = str(mclf.ca.training_stats)
        s2 = str(svm2.ca.training_stats)
        s3 = str(mclf_mv.ca.training_stats)
        self.assertEqual(s1, s2,
            msg="Multiclass clf should provide same results as built-in "
                "libsvm's %s. Got %s and %s" % (svm2, s1, s2))
        self.assertEqual(s1, s3,
            msg="%s should have used maxvote resolver by default"
                "so results should have been identical. Got %s and %s"
                % (mclf, s1, s3))

        assert_equal(len(mclf_mv.combiner.ca.estimates),
                     len(mclf_mv.combiner.ca.predictions))

        # They should have came from assessing training_stats ca being
        # enabled
        # recompute accuracy on predictions for training_stats
        training_acc = np.sum(mclf_mv.combiner.ca.predictions ==
                              ds_train.targets) / float(len(ds_train))
        # should match
        assert_equal(mclf_mv.ca.training_stats.stats['ACC'], training_acc)

        svm2.untrain()

        self.assertTrue(svm2.trained == False,
            msg="Un-Trained SVM should be untrained")

        self.assertTrue(np.array([x.trained for x in mclf.clfs]).all(),
            msg="Trained Boosted classifier should have all primary classifiers trained")
        self.assertTrue(mclf.trained,
            msg="Trained Boosted classifier should be marked as trained")

        mclf.untrain()

        self.assertTrue(not mclf.trained,
                        msg="UnTrained Boosted classifier should not be trained")
        self.assertTrue(not np.array([x.trained for x in mclf.clfs]).any(),
            msg="UnTrained Boosted classifier should have no primary classifiers trained")
Example #2
0
    def __test_fspipeline_with_split_classifier(self, basic_clf):
        #basic_clf = LinearNuSVMC()
        multi_clf = MulticlassClassifier(clf=basic_clf)
        #svm_weigths = LinearSVMWeights(svm)

        # Proper RFE: aggregate sensitivities across multiple splits,
        # but also due to multi class those need to be aggregated
        # somehow. Transfer error here should be 'leave-1-out' error
        # of split classifier itself
        sclf = SplitClassifier(clf=basic_clf)
        rfe = RFE(sensitivity_analyzer=sclf.get_sensitivity_analyzer(
            enable_ca=["sensitivities"]),
                  transfer_error=trans_error,
                  feature_selector=FeatureSelectionPipeline([
                      FractionTailSelector(0.5),
                      FixedNElementTailSelector(1)
                  ]),
                  train_pmeasure=True)

        # and we get sensitivity analyzer which works on splits and uses
        # sensitivity
        selected_features = rfe(self.dataset)
Example #3
0
def test_multiclass_pairs_svm_searchlight():
    from mvpa2.measures.searchlight import sphere_searchlight
    import mvpa2.clfs.meta
    #reload(mvpa2.clfs.meta)
    from mvpa2.clfs.meta import MulticlassClassifier

    from mvpa2.datasets import Dataset
    from mvpa2.clfs.svm import LinearCSVMC
    #import mvpa2.testing.datasets
    #reload(mvpa2.testing.datasets)
    from mvpa2.testing.datasets import datasets
    from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner
    from mvpa2.measures.base import CrossValidation

    from mvpa2.testing import ok_, assert_equal, assert_array_equal
    from mvpa2.sandbox.multiclass import get_pairwise_accuracies

    # Some parameters used in the test below
    nproc = 1 + int(mvpa2.externals.exists('pprocess'))
    ntargets = 4                                # number of targets
    npairs = ntargets*(ntargets-1)/2
    center_ids = [35, 55, 1]
    ds = datasets['3dsmall'].copy()

    # redefine C,T so we have a multiclass task
    nsamples = len(ds)
    ds.sa.targets = range(ntargets) * (nsamples//ntargets)
    ds.sa.chunks = np.arange(nsamples) // ntargets
    # and add some obvious signal where it is due
    ds.samples[:, 55] += 15*ds.sa.targets   # for all 4 targets
    ds.samples[:, 35] += 15*(ds.sa.targets % 2) # so we have conflicting labels
    # while 35 would still be just for 2 categories which would conflict

    mclf = MulticlassClassifier(LinearCSVMC(),
                                pass_attr=['sa.chunks', 'ca.raw_predictions_ds'],
                                enable_ca=['raw_predictions_ds'])

    label_pairs = mclf._get_binary_pairs(ds)

    def place_sa_as_samples(ds):
        # add a degenerate dimension for the hstacking in the searchlight
        ds.samples = ds.sa.raw_predictions_ds[:, None]
        ds.sa.pop('raw_predictions_ds')   # no need to drag the copy
        return ds

    mcv = CrossValidation(mclf, OddEvenPartitioner(), errorfx=None,
                          postproc=place_sa_as_samples)
    sl = sphere_searchlight(mcv, nproc=nproc, radius=2, space='myspace',
                            center_ids=center_ids)
    slmap = sl(ds)


    ok_('chunks' in slmap.sa)
    ok_('cvfolds' in slmap.sa)
    ok_('targets' in slmap.sa)
    # so for each SL we got all pairwise tests
    assert_equal(slmap.shape, (nsamples, len(center_ids), npairs))
    assert_array_equal(np.unique(slmap.sa.cvfolds), [0, 1])

    # Verify that we got right labels in each 'pair'
    # all searchlights should have the same set of labels for a given
    # pair of targets
    label_pairs_ = np.apply_along_axis(
        np.unique, 0,
        ## reshape slmap so we have only simple pairs in the columns
        np.reshape(slmap, (-1, npairs))).T

    # need to prep that list of pairs obtained from MulticlassClassifier
    # and since it is 1-vs-1, they all should be just pairs of lists of
    # 1 element so should work
    assert_equal(len(label_pairs_), npairs)
    assert_array_equal(np.squeeze(np.array(label_pairs)), label_pairs_)
    assert_equal(label_pairs_.shape, (npairs, 2))   # for this particular case


    out    = get_pairwise_accuracies(slmap)
    out123 = get_pairwise_accuracies(slmap, select=[1, 2, 3])

    assert_array_equal(np.unique(out123.T), np.arange(1, 4))   # so we got at least correct targets
    # test that we extracted correct accuracies
    # First 3 in out.T should have category 0, so skip them and compare otherwise
    assert_array_equal(out.samples[3:], out123.samples)

    ok_(np.all(out.samples[:, 1] == 1.), "This was with super-strong result")
Example #4
0
def test_multiclass_without_combiner_sens(clf):
    ds = datasets['uni3small'].copy()
    # do the clone since later we will compare sensitivities and need it
    # independently trained etc
    mclf = MulticlassClassifier(clf.clone(), combiner=None)

    # We have lots of sandwiching
    #    Multiclass.clfs -> [BinaryClassifier] -> clf
    # where BinaryClassifier's estimates are binarized.
    # Let's also check that we are getting sensitivities correctly.
    # With addition of MulticlassClassifierSensitivityAnalyzer we managed to break
    # it and none tests picked it up, so here we will test that sensitivities
    # are computed and labeled correctly

    # verify that all kinds of results on two classes are identical to the ones
    # if obtained running it without MulticlassClassifier
    # ds = ds[:, 0]  #  uncomment out to ease/speed up troubleshooting
    ds2 = ds.select(sadict=dict(targets=['L1', 'L2']))
    # we will train only on one chunk so we could get "realistic" (not just
    # overfit) predictions
    ds2_train = ds2.select(sadict=dict(chunks=ds.UC[:1]))

    # also consider simpler BinaryClassifier to easier pin point the problem
    # and be explicit about what is positive and what is negative label(s)
    bclf = BinaryClassifier(clf.clone(), poslabels=['L2'], neglabels=['L1'])

    predictions = []
    clfs = [clf, bclf, mclf]
    for c in clfs:
        c.ca.enable('all')
        c.train(ds2_train)
        predictions.append(c.predict(ds2))
    p1, bp1, mp1 = predictions

    assert_equal(p1, bp1)

    # ATM mclf.predict returns dataset (with fa.targets to list pairs of targets
    # used I guess) while p1 is just a list.
    def assert_list_equal_to_ds(l, ds):
        assert_equal(ds.shape, (len(l), 1))
        assert_array_equal(l, ds.samples[:, 0])

    assert_list_equal_to_ds(p1, mp1)

    # but if we look at sensitivities
    s1, bs1, ms1 = [c.get_sensitivity_analyzer()(ds2) for c in clfs]
    # Do ground checks for s1
    nonbogus_target = ds2.fa.nonbogus_targets[0]

    # if there was a feature with signal, we know what to expect!:
    # such assignments are randomized, so we might not have signal in that
    # single feature we chose to test with
    if nonbogus_target and nonbogus_target in ds2.UT:
        # that in the pair of labels it would be 2nd one if positive sensitivity
        # or 1st one is negative
        # with classifier we try (SVM) should be pairs of labels
        assert isinstance(s1.T[0], tuple)
        assert_equal(len(s1), 1)
        assert_equal(s1.T[0][int(s1.samples[0, 0] > 0)], nonbogus_target)

    # And in either case we could check that we are getting identical results!
    # lrn_index is unique to ms1 and "ignore_sa" to assert_datasets_equal still
    # compares for the keys to be present in both, so does not help
    ms1.sa.pop('lrn_index')

    assert_datasets_equal(s1, bs1)
    # and here we get a "problem"!
    assert_datasets_equal(s1, ms1)
Example #5
0
    def test_multiclass_classifier(self, clf):
        # Force non-dataspecific C value.
        # Otherwise multiclass libsvm builtin and our MultiClass would differ
        # in results
        svm = clf.clone()  # operate on clone to avoid side-effects
        if svm.params.has_key('C') and svm.params.C < 0:
            svm.params.C = 1.0  # reset C to be 1
        svm2 = svm.clone()
        svm2.ca.enable(['training_stats'])

        mclf = MulticlassClassifier(clf=svm, enable_ca=['training_stats'])

        # with explicit MaximalVote with the conditional attributes
        # enabled
        mclf_mv = MulticlassClassifier(
            clf=svm,
            combiner=MaximalVote(enable_ca=['estimates', 'predictions']),
            enable_ca=['training_stats'])

        ds_train = datasets['uni2small']
        for clf_ in svm2, mclf, mclf_mv:
            clf_.train(ds_train)
        s1 = str(mclf.ca.training_stats)
        s2 = str(svm2.ca.training_stats)
        s3 = str(mclf_mv.ca.training_stats)
        self.assertEqual(
            s1,
            s2,
            msg="Multiclass clf should provide same results as built-in "
            "libsvm's %s. Got %s and %s" % (svm2, s1, s2))
        self.assertEqual(
            s1,
            s3,
            msg="%s should have used maxvote resolver by default"
            "so results should have been identical. Got %s and %s" %
            (mclf, s1, s3))

        assert_equal(len(mclf_mv.combiner.ca.estimates),
                     len(mclf_mv.combiner.ca.predictions))

        # They should have came from assessing training_stats ca being
        # enabled
        # recompute accuracy on predictions for training_stats
        training_acc = np.sum(
            mclf_mv.combiner.ca.predictions == ds_train.targets) / float(
                len(ds_train))
        # should match
        assert_equal(mclf_mv.ca.training_stats.stats['ACC'], training_acc)

        svm2.untrain()

        self.assertTrue(svm2.trained == False,
                        msg="Un-Trained SVM should be untrained")

        self.assertTrue(
            np.array([x.trained for x in mclf.clfs]).all(),
            msg=
            "Trained Boosted classifier should have all primary classifiers trained"
        )
        self.assertTrue(
            mclf.trained,
            msg="Trained Boosted classifier should be marked as trained")

        mclf.untrain()

        self.assertTrue(
            not mclf.trained,
            msg="UnTrained Boosted classifier should not be trained")
        self.assertTrue(
            not np.array([x.trained for x in mclf.clfs]).any(),
            msg=
            "UnTrained Boosted classifier should have no primary classifiers trained"
        )
Example #6
0
# NB:
#  - Nu-classifiers are turned off since for haxby DS default nu
#    is an 'infisible' one
#  - Python's SMLR is turned off for the duration of development
#    since it is slow and results should be the same as of C version
#
clfswh += [
    SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
    SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
    #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"),
    #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"),
    #SMLR(implementation="Python", descr="SMLR(Python)")
]

clfswh += \
     [ MulticlassClassifier(SMLR(lm=0.1),
                            descr='Pairs+maxvote multiclass on SMLR(lm=0.1)') ]

clfswh += [
    RandomClassifier(descr="Random"),
    RandomClassifier(same=True, descr="RandomSame"),
]

if externals.exists('libsvm'):
    from mvpa2.clfs.libsvmc import svm as libsvm
    clfswh._known_tags.update(list(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys()))
    clfswh += [
        libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        libsvm.SVM(C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
        libsvm.SVM(C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
        libsvm.SVM(svm_impl='NU_SVC',
                   descr="libsvm.LinNuSVM(nu=def)",