def test_multiclass_classifier(self, clf): # Force non-dataspecific C value. # Otherwise multiclass libsvm builtin and our MultiClass would differ # in results svm = clf.clone() # operate on clone to avoid side-effects if 'C' in svm.params and svm.params.C<0: svm.params.C = 1.0 # reset C to be 1 svm2 = svm.clone() svm2.ca.enable(['training_stats']) mclf = MulticlassClassifier(clf=svm, enable_ca=['training_stats']) # with explicit MaximalVote with the conditional attributes # enabled mclf_mv = MulticlassClassifier(clf=svm, combiner=MaximalVote(enable_ca=['estimates', 'predictions']), enable_ca=['training_stats']) ds_train = datasets['uni2small'] for clf_ in svm2, mclf, mclf_mv: clf_.train(ds_train) s1 = str(mclf.ca.training_stats) s2 = str(svm2.ca.training_stats) s3 = str(mclf_mv.ca.training_stats) self.assertEqual(s1, s2, msg="Multiclass clf should provide same results as built-in " "libsvm's %s. Got %s and %s" % (svm2, s1, s2)) self.assertEqual(s1, s3, msg="%s should have used maxvote resolver by default" "so results should have been identical. Got %s and %s" % (mclf, s1, s3)) assert_equal(len(mclf_mv.combiner.ca.estimates), len(mclf_mv.combiner.ca.predictions)) # They should have came from assessing training_stats ca being # enabled # recompute accuracy on predictions for training_stats training_acc = np.sum(mclf_mv.combiner.ca.predictions == ds_train.targets) / float(len(ds_train)) # should match assert_equal(mclf_mv.ca.training_stats.stats['ACC'], training_acc) svm2.untrain() self.assertTrue(svm2.trained == False, msg="Un-Trained SVM should be untrained") self.assertTrue(np.array([x.trained for x in mclf.clfs]).all(), msg="Trained Boosted classifier should have all primary classifiers trained") self.assertTrue(mclf.trained, msg="Trained Boosted classifier should be marked as trained") mclf.untrain() self.assertTrue(not mclf.trained, msg="UnTrained Boosted classifier should not be trained") self.assertTrue(not np.array([x.trained for x in mclf.clfs]).any(), msg="UnTrained Boosted classifier should have no primary classifiers trained")
def __test_fspipeline_with_split_classifier(self, basic_clf): #basic_clf = LinearNuSVMC() multi_clf = MulticlassClassifier(clf=basic_clf) #svm_weigths = LinearSVMWeights(svm) # Proper RFE: aggregate sensitivities across multiple splits, # but also due to multi class those need to be aggregated # somehow. Transfer error here should be 'leave-1-out' error # of split classifier itself sclf = SplitClassifier(clf=basic_clf) rfe = RFE(sensitivity_analyzer=sclf.get_sensitivity_analyzer( enable_ca=["sensitivities"]), transfer_error=trans_error, feature_selector=FeatureSelectionPipeline([ FractionTailSelector(0.5), FixedNElementTailSelector(1) ]), train_pmeasure=True) # and we get sensitivity analyzer which works on splits and uses # sensitivity selected_features = rfe(self.dataset)
def test_multiclass_pairs_svm_searchlight(): from mvpa2.measures.searchlight import sphere_searchlight import mvpa2.clfs.meta #reload(mvpa2.clfs.meta) from mvpa2.clfs.meta import MulticlassClassifier from mvpa2.datasets import Dataset from mvpa2.clfs.svm import LinearCSVMC #import mvpa2.testing.datasets #reload(mvpa2.testing.datasets) from mvpa2.testing.datasets import datasets from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.testing import ok_, assert_equal, assert_array_equal from mvpa2.sandbox.multiclass import get_pairwise_accuracies # Some parameters used in the test below nproc = 1 + int(mvpa2.externals.exists('pprocess')) ntargets = 4 # number of targets npairs = ntargets*(ntargets-1)/2 center_ids = [35, 55, 1] ds = datasets['3dsmall'].copy() # redefine C,T so we have a multiclass task nsamples = len(ds) ds.sa.targets = range(ntargets) * (nsamples//ntargets) ds.sa.chunks = np.arange(nsamples) // ntargets # and add some obvious signal where it is due ds.samples[:, 55] += 15*ds.sa.targets # for all 4 targets ds.samples[:, 35] += 15*(ds.sa.targets % 2) # so we have conflicting labels # while 35 would still be just for 2 categories which would conflict mclf = MulticlassClassifier(LinearCSVMC(), pass_attr=['sa.chunks', 'ca.raw_predictions_ds'], enable_ca=['raw_predictions_ds']) label_pairs = mclf._get_binary_pairs(ds) def place_sa_as_samples(ds): # add a degenerate dimension for the hstacking in the searchlight ds.samples = ds.sa.raw_predictions_ds[:, None] ds.sa.pop('raw_predictions_ds') # no need to drag the copy return ds mcv = CrossValidation(mclf, OddEvenPartitioner(), errorfx=None, postproc=place_sa_as_samples) sl = sphere_searchlight(mcv, nproc=nproc, radius=2, space='myspace', center_ids=center_ids) slmap = sl(ds) ok_('chunks' in slmap.sa) ok_('cvfolds' in slmap.sa) ok_('targets' in slmap.sa) # so for each SL we got all pairwise tests assert_equal(slmap.shape, (nsamples, len(center_ids), npairs)) assert_array_equal(np.unique(slmap.sa.cvfolds), [0, 1]) # Verify that we got right labels in each 'pair' # all searchlights should have the same set of labels for a given # pair of targets label_pairs_ = np.apply_along_axis( np.unique, 0, ## reshape slmap so we have only simple pairs in the columns np.reshape(slmap, (-1, npairs))).T # need to prep that list of pairs obtained from MulticlassClassifier # and since it is 1-vs-1, they all should be just pairs of lists of # 1 element so should work assert_equal(len(label_pairs_), npairs) assert_array_equal(np.squeeze(np.array(label_pairs)), label_pairs_) assert_equal(label_pairs_.shape, (npairs, 2)) # for this particular case out = get_pairwise_accuracies(slmap) out123 = get_pairwise_accuracies(slmap, select=[1, 2, 3]) assert_array_equal(np.unique(out123.T), np.arange(1, 4)) # so we got at least correct targets # test that we extracted correct accuracies # First 3 in out.T should have category 0, so skip them and compare otherwise assert_array_equal(out.samples[3:], out123.samples) ok_(np.all(out.samples[:, 1] == 1.), "This was with super-strong result")
def test_multiclass_without_combiner_sens(clf): ds = datasets['uni3small'].copy() # do the clone since later we will compare sensitivities and need it # independently trained etc mclf = MulticlassClassifier(clf.clone(), combiner=None) # We have lots of sandwiching # Multiclass.clfs -> [BinaryClassifier] -> clf # where BinaryClassifier's estimates are binarized. # Let's also check that we are getting sensitivities correctly. # With addition of MulticlassClassifierSensitivityAnalyzer we managed to break # it and none tests picked it up, so here we will test that sensitivities # are computed and labeled correctly # verify that all kinds of results on two classes are identical to the ones # if obtained running it without MulticlassClassifier # ds = ds[:, 0] # uncomment out to ease/speed up troubleshooting ds2 = ds.select(sadict=dict(targets=['L1', 'L2'])) # we will train only on one chunk so we could get "realistic" (not just # overfit) predictions ds2_train = ds2.select(sadict=dict(chunks=ds.UC[:1])) # also consider simpler BinaryClassifier to easier pin point the problem # and be explicit about what is positive and what is negative label(s) bclf = BinaryClassifier(clf.clone(), poslabels=['L2'], neglabels=['L1']) predictions = [] clfs = [clf, bclf, mclf] for c in clfs: c.ca.enable('all') c.train(ds2_train) predictions.append(c.predict(ds2)) p1, bp1, mp1 = predictions assert_equal(p1, bp1) # ATM mclf.predict returns dataset (with fa.targets to list pairs of targets # used I guess) while p1 is just a list. def assert_list_equal_to_ds(l, ds): assert_equal(ds.shape, (len(l), 1)) assert_array_equal(l, ds.samples[:, 0]) assert_list_equal_to_ds(p1, mp1) # but if we look at sensitivities s1, bs1, ms1 = [c.get_sensitivity_analyzer()(ds2) for c in clfs] # Do ground checks for s1 nonbogus_target = ds2.fa.nonbogus_targets[0] # if there was a feature with signal, we know what to expect!: # such assignments are randomized, so we might not have signal in that # single feature we chose to test with if nonbogus_target and nonbogus_target in ds2.UT: # that in the pair of labels it would be 2nd one if positive sensitivity # or 1st one is negative # with classifier we try (SVM) should be pairs of labels assert isinstance(s1.T[0], tuple) assert_equal(len(s1), 1) assert_equal(s1.T[0][int(s1.samples[0, 0] > 0)], nonbogus_target) # And in either case we could check that we are getting identical results! # lrn_index is unique to ms1 and "ignore_sa" to assert_datasets_equal still # compares for the keys to be present in both, so does not help ms1.sa.pop('lrn_index') assert_datasets_equal(s1, bs1) # and here we get a "problem"! assert_datasets_equal(s1, ms1)
def test_multiclass_classifier(self, clf): # Force non-dataspecific C value. # Otherwise multiclass libsvm builtin and our MultiClass would differ # in results svm = clf.clone() # operate on clone to avoid side-effects if svm.params.has_key('C') and svm.params.C < 0: svm.params.C = 1.0 # reset C to be 1 svm2 = svm.clone() svm2.ca.enable(['training_stats']) mclf = MulticlassClassifier(clf=svm, enable_ca=['training_stats']) # with explicit MaximalVote with the conditional attributes # enabled mclf_mv = MulticlassClassifier( clf=svm, combiner=MaximalVote(enable_ca=['estimates', 'predictions']), enable_ca=['training_stats']) ds_train = datasets['uni2small'] for clf_ in svm2, mclf, mclf_mv: clf_.train(ds_train) s1 = str(mclf.ca.training_stats) s2 = str(svm2.ca.training_stats) s3 = str(mclf_mv.ca.training_stats) self.assertEqual( s1, s2, msg="Multiclass clf should provide same results as built-in " "libsvm's %s. Got %s and %s" % (svm2, s1, s2)) self.assertEqual( s1, s3, msg="%s should have used maxvote resolver by default" "so results should have been identical. Got %s and %s" % (mclf, s1, s3)) assert_equal(len(mclf_mv.combiner.ca.estimates), len(mclf_mv.combiner.ca.predictions)) # They should have came from assessing training_stats ca being # enabled # recompute accuracy on predictions for training_stats training_acc = np.sum( mclf_mv.combiner.ca.predictions == ds_train.targets) / float( len(ds_train)) # should match assert_equal(mclf_mv.ca.training_stats.stats['ACC'], training_acc) svm2.untrain() self.assertTrue(svm2.trained == False, msg="Un-Trained SVM should be untrained") self.assertTrue( np.array([x.trained for x in mclf.clfs]).all(), msg= "Trained Boosted classifier should have all primary classifiers trained" ) self.assertTrue( mclf.trained, msg="Trained Boosted classifier should be marked as trained") mclf.untrain() self.assertTrue( not mclf.trained, msg="UnTrained Boosted classifier should not be trained") self.assertTrue( not np.array([x.trained for x in mclf.clfs]).any(), msg= "UnTrained Boosted classifier should have no primary classifiers trained" )
# NB: # - Nu-classifiers are turned off since for haxby DS default nu # is an 'infisible' one # - Python's SMLR is turned off for the duration of development # since it is slow and results should be the same as of C version # clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"), SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"), #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"), #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"), #SMLR(implementation="Python", descr="SMLR(Python)") ] clfswh += \ [ MulticlassClassifier(SMLR(lm=0.1), descr='Pairs+maxvote multiclass on SMLR(lm=0.1)') ] clfswh += [ RandomClassifier(descr="Random"), RandomClassifier(same=True, descr="RandomSame"), ] if externals.exists('libsvm'): from mvpa2.clfs.libsvmc import svm as libsvm clfswh._known_tags.update(list(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())) clfswh += [ libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1), libsvm.SVM(C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1), libsvm.SVM(C=1.0, descr="libsvm.LinSVM(C=1)", probability=1), libsvm.SVM(svm_impl='NU_SVC', descr="libsvm.LinNuSVM(nu=def)",