def test_multiclass_classifier(self, clf): # Force non-dataspecific C value. # Otherwise multiclass libsvm builtin and our MultiClass would differ # in results svm = clf.clone() # operate on clone to avoid side-effects if svm.params.has_key('C') and svm.params.C<0: svm.params.C = 1.0 # reset C to be 1 svm2 = svm.clone() svm2.ca.enable(['training_stats']) mclf = MulticlassClassifier(clf=svm, enable_ca=['training_stats']) # with explicit MaximalVote with the conditional attributes # enabled mclf_mv = MulticlassClassifier(clf=svm, combiner=MaximalVote(enable_ca=['estimates', 'predictions']), enable_ca=['training_stats']) ds_train = datasets['uni2small'] for clf_ in svm2, mclf, mclf_mv: clf_.train(ds_train) s1 = str(mclf.ca.training_stats) s2 = str(svm2.ca.training_stats) s3 = str(mclf_mv.ca.training_stats) self.assertEqual(s1, s2, msg="Multiclass clf should provide same results as built-in " "libsvm's %s. Got %s and %s" % (svm2, s1, s2)) self.assertEqual(s1, s3, msg="%s should have used maxvote resolver by default" "so results should have been identical. Got %s and %s" % (mclf, s1, s3)) assert_equal(len(mclf_mv.combiner.ca.estimates), len(mclf_mv.combiner.ca.predictions)) # They should have came from assessing training_stats ca being # enabled # recompute accuracy on predictions for training_stats training_acc = np.sum(mclf_mv.combiner.ca.predictions == ds_train.targets) / float(len(ds_train)) # should match assert_equal(mclf_mv.ca.training_stats.stats['ACC'], training_acc) svm2.untrain() self.assertTrue(svm2.trained == False, msg="Un-Trained SVM should be untrained") self.assertTrue(np.array([x.trained for x in mclf.clfs]).all(), msg="Trained Boosted classifier should have all primary classifiers trained") self.assertTrue(mclf.trained, msg="Trained Boosted classifier should be marked as trained") mclf.untrain() self.assertTrue(not mclf.trained, msg="UnTrained Boosted classifier should not be trained") self.assertTrue(not np.array([x.trained for x in mclf.clfs]).any(), msg="UnTrained Boosted classifier should have no primary classifiers trained")
def test_multiclass_without_combiner(): # The goal is to obtain all pairwise results as the resultant dataset # avoiding even calling any combiner clf = LinearCSVMC(C=1) ds = datasets['uni3small'].copy() ds.sa['ids'] = np.arange(len(ds)) mclf = MulticlassClassifier(clf, combiner=None) # without combining results at all mcv = CrossValidation(mclf, NFoldPartitioner(), errorfx=None) res = mcv(ds) assert_equal(len(res), len(ds)) assert_equal(res.nfeatures, 3) # 3 pairs for 3 classes assert_array_equal(res.UT, ds.UT) assert_array_equal(np.unique(np.array(res.fa.targets.tolist())), ds.UT) # TODO -- check that we have all the pairs? assert_array_equal(res.sa['cvfolds'].unique, np.arange(len(ds.UC))) if mcv.ca.is_enabled('training_stats'): # we must have received a dictionary per each pair training_stats = mcv.ca.training_stats assert_equal(set(training_stats.keys()), set([('L0', 'L1'), ('L0', 'L2'), ('L1', 'L2')])) for pair, cm in training_stats.iteritems(): assert_array_equal(cm.labels, ds.UT) # we should have no predictions for absent label assert_array_equal(cm.matrix[~np.in1d(ds.UT, pair)], 0) # while altogether all samples were processed once assert_array_equal(cm.stats['P'], len(ds)) # and number of sets should be equal number of chunks here assert_equal(len(cm.sets), len(ds.UC))
def test_multiclass_classifier_pass_ds_attributes(): # TODO: replicate/extend basic testing of pass_attr # in some more "basic" test_* clf = LinearCSVMC(C=1) ds = datasets['uni3small'].copy() ds.sa['ids'] = np.arange(len(ds)) mclf = MulticlassClassifier( clf, pass_attr=[ 'ids', 'sa.chunks', 'a.bogus_features', # 'ca.raw_estimates' # this one is binary_clf x samples list ATM # that is why raw_predictions_ds was born 'ca.raw_predictions_ds', 'ca.estimates', # this one is ok 'ca.predictions', ], enable_ca=['all']) mcv = CrossValidation(mclf, NFoldPartitioner(), errorfx=None) res = mcv(ds) assert_array_equal(sorted(res.sa.ids), ds.sa.ids) assert_array_equal(res.chunks, ds.chunks[res.sa.ids]) assert_array_equal(res.sa.predictions, res.samples[:, 0]) assert_array_equal(res.sa.cvfolds, np.repeat(range(len(ds.UC)), len(ds) / len(ds.UC)))
def test_multiclass_classifier_cv(clf, ds): # Extending test_clf.py:ClassifiersTests.test_multiclass_classifier # Compare performance with our MaximalVote to the one done natively # by e.g. LIBSVM clf = clf.clone() clf.params.C = 1 # so it doesn't auto-adjust mclf = MulticlassClassifier(clf=clf.clone()) part = NFoldPartitioner() cv = CrossValidation(clf, part, enable_ca=['stats', 'training_stats']) mcv = CrossValidation(mclf, part, enable_ca=['stats', 'training_stats']) er = cv(ds) mer = mcv(ds) # errors should be the same assert_array_equal(er, mer) assert_equal(str(cv.ca.training_stats), str(mcv.ca.training_stats)) # if it was a binary task, cv.ca.stats would also have AUC column # while mcv would not :-/ TODO if len(ds.UT) == 2: # so just compare the matrix and ACC assert_array_equal(cv.ca.stats.matrix, mcv.ca.stats.matrix) assert_equal(cv.ca.stats.stats['ACC'], mcv.ca.stats.stats['ACC']) else: assert_equal(str(cv.ca.stats), str(mcv.ca.stats))
def __test_fspipeline_with_split_classifier(self, basic_clf): #basic_clf = LinearNuSVMC() multi_clf = MulticlassClassifier(clf=basic_clf) #svm_weigths = LinearSVMWeights(svm) # Proper RFE: aggregate sensitivities across multiple splits, # but also due to multi class those need to be aggregated # somehow. Transfer error here should be 'leave-1-out' error # of split classifier itself sclf = SplitClassifier(clf=basic_clf) rfe = RFE(sensitivity_analyzer=sclf.get_sensitivity_analyzer( enable_ca=["sensitivities"]), transfer_error=trans_error, feature_selector=FeatureSelectionPipeline([ FractionTailSelector(0.5), FixedNElementTailSelector(1) ]), train_pmeasure=True) # and we get sensitivity analyzer which works on splits and uses # sensitivity selected_features = rfe(self.dataset)
def test_multiclass_pairs_svm_searchlight(): from mvpa2.measures.searchlight import sphere_searchlight import mvpa2.clfs.meta #reload(mvpa2.clfs.meta) from mvpa2.clfs.meta import MulticlassClassifier from mvpa2.datasets import Dataset from mvpa2.clfs.svm import LinearCSVMC #import mvpa2.testing.datasets #reload(mvpa2.testing.datasets) from mvpa2.testing.datasets import datasets from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner from mvpa2.measures.base import CrossValidation from mvpa2.testing import ok_, assert_equal, assert_array_equal from mvpa2.sandbox.multiclass import get_pairwise_accuracies # Some parameters used in the test below nproc = 1 + int(mvpa2.externals.exists('pprocess')) ntargets = 4 # number of targets npairs = ntargets*(ntargets-1)/2 center_ids = [35, 55, 1] ds = datasets['3dsmall'].copy() # redefine C,T so we have a multiclass task nsamples = len(ds) ds.sa.targets = range(ntargets) * (nsamples//ntargets) ds.sa.chunks = np.arange(nsamples) // ntargets # and add some obvious signal where it is due ds.samples[:, 55] += 15*ds.sa.targets # for all 4 targets ds.samples[:, 35] += 15*(ds.sa.targets % 2) # so we have conflicting labels # while 35 would still be just for 2 categories which would conflict mclf = MulticlassClassifier(LinearCSVMC(), pass_attr=['sa.chunks', 'ca.raw_predictions_ds'], enable_ca=['raw_predictions_ds']) label_pairs = mclf._get_binary_pairs(ds) def place_sa_as_samples(ds): # add a degenerate dimension for the hstacking in the searchlight ds.samples = ds.sa.raw_predictions_ds[:, None] ds.sa.pop('raw_predictions_ds') # no need to drag the copy return ds mcv = CrossValidation(mclf, OddEvenPartitioner(), errorfx=None, postproc=place_sa_as_samples) sl = sphere_searchlight(mcv, nproc=nproc, radius=2, space='myspace', center_ids=center_ids) slmap = sl(ds) ok_('chunks' in slmap.sa) ok_('cvfolds' in slmap.sa) ok_('targets' in slmap.sa) # so for each SL we got all pairwise tests assert_equal(slmap.shape, (nsamples, len(center_ids), npairs)) assert_array_equal(np.unique(slmap.sa.cvfolds), [0, 1]) # Verify that we got right labels in each 'pair' # all searchlights should have the same set of labels for a given # pair of targets label_pairs_ = np.apply_along_axis( np.unique, 0, ## reshape slmap so we have only simple pairs in the columns np.reshape(slmap, (-1, npairs))).T # need to prep that list of pairs obtained from MulticlassClassifier # and since it is 1-vs-1, they all should be just pairs of lists of # 1 element so should work assert_equal(len(label_pairs_), npairs) assert_array_equal(np.squeeze(np.array(label_pairs)), label_pairs_) assert_equal(label_pairs_.shape, (npairs, 2)) # for this particular case out = get_pairwise_accuracies(slmap) out123 = get_pairwise_accuracies(slmap, select=[1, 2, 3]) assert_array_equal(np.unique(out123.T), np.arange(1, 4)) # so we got at least correct targets # test that we extracted correct accuracies # First 3 in out.T should have category 0, so skip them and compare otherwise assert_array_equal(out.samples[3:], out123.samples) ok_(np.all(out.samples[:, 1] == 1.), "This was with super-strong result")
def test_multiclass_without_combiner_sens(clf): ds = datasets['uni3small'].copy() # do the clone since later we will compare sensitivities and need it # independently trained etc mclf = MulticlassClassifier(clf.clone(), combiner=None) # We have lots of sandwiching # Multiclass.clfs -> [BinaryClassifier] -> clf # where BinaryClassifier's estimates are binarized. # Let's also check that we are getting sensitivities correctly. # With addition of MulticlassClassifierSensitivityAnalyzer we managed to break # it and none tests picked it up, so here we will test that sensitivities # are computed and labeled correctly # verify that all kinds of results on two classes are identical to the ones # if obtained running it without MulticlassClassifier # ds = ds[:, 0] # uncomment out to ease/speed up troubleshooting ds2 = ds.select(sadict=dict(targets=['L1', 'L2'])) # we will train only on one chunk so we could get "realistic" (not just # overfit) predictions ds2_train = ds2.select(sadict=dict(chunks=ds.UC[:1])) # also consider simpler BinaryClassifier to easier pin point the problem # and be explicit about what is positive and what is negative label(s) bclf = BinaryClassifier(clf.clone(), poslabels=['L2'], neglabels=['L1']) predictions = [] clfs = [clf, bclf, mclf] for c in clfs: c.ca.enable('all') c.train(ds2_train) predictions.append(c.predict(ds2)) p1, bp1, mp1 = predictions assert_equal(p1, bp1) # ATM mclf.predict returns dataset (with fa.targets to list pairs of targets # used I guess) while p1 is just a list. def assert_list_equal_to_ds(l, ds): assert_equal(ds.shape, (len(l), 1)) assert_array_equal(l, ds.samples[:, 0]) assert_list_equal_to_ds(p1, mp1) # but if we look at sensitivities s1, bs1, ms1 = [c.get_sensitivity_analyzer()(ds2) for c in clfs] # Do ground checks for s1 nonbogus_target = ds2.fa.nonbogus_targets[0] # if there was a feature with signal, we know what to expect!: # such assignments are randomized, so we might not have signal in that # single feature we chose to test with if nonbogus_target and nonbogus_target in ds2.UT: # that in the pair of labels it would be 2nd one if positive sensitivity # or 1st one is negative # with classifier we try (SVM) should be pairs of labels assert isinstance(s1.T[0], tuple) assert_equal(len(s1), 1) assert_equal(s1.T[0][int(s1.samples[0, 0] > 0)], nonbogus_target) # And in either case we could check that we are getting identical results! # lrn_index is unique to ms1 and "ignore_sa" to assert_datasets_equal still # compares for the keys to be present in both, so does not help ms1.sa.pop('lrn_index') assert_datasets_equal(s1, bs1) # and here we get a "problem"! assert_datasets_equal(s1, ms1)
# NB: # - Nu-classifiers are turned off since for haxby DS default nu # is an 'infisible' one # - Python's SMLR is turned off for the duration of development # since it is slow and results should be the same as of C version # clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"), SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"), #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"), #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"), #SMLR(implementation="Python", descr="SMLR(Python)") ] clfswh += \ [ MulticlassClassifier(SMLR(lm=0.1), descr='Pairs+maxvote multiclass on SMLR(lm=0.1)') ] clfswh += [ RandomClassifier(descr="Random"), RandomClassifier(same=True, descr="RandomSame"), ] if externals.exists('libsvm'): from mvpa2.clfs.libsvmc import svm as libsvm clfswh._known_tags.update(list(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())) clfswh += [ libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1), libsvm.SVM(C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1), libsvm.SVM(C=1.0, descr="libsvm.LinSVM(C=1)", probability=1), libsvm.SVM(svm_impl='NU_SVC', descr="libsvm.LinNuSVM(nu=def)",