def test_diff_len_labels_str_treeclassifier(self): # check if the classifier can handle a dataset with labels as string of # variable length # was failing on TreeClassifier due to np.str dtype being assumed from first # returned value ds = datasets['uni4small'].copy() newlabels = dict([(l,l+'_'*li) for li,l in enumerate(ds.uniquetargets)]) ds.targets = [newlabels[l] for l in ds.targets] clf = TreeClassifier(mvpa2.testing.clfs.SVM(), { 'group1':(ds.uniquetargets[:2], mvpa2.testing.clfs.SVM()), 'group2':(ds.uniquetargets[2:], mvpa2.testing.clfs.SVM())}) clf.train(ds) predictions = clf.predict(ds) # predictions on the same ds as training should give same labels assert(np.all(np.unique(predictions) == ds.uniquetargets))
def test_diff_len_labels_str_treeclassifier(self): # check if the classifier can handle a dataset with labels as string of # variable length # was failing on TreeClassifier due to np.str dtype being assumed from first # returned value ds = datasets['uni4small'].copy() newlabels = dict([(l, l + '_' * li) for li, l in enumerate(ds.uniquetargets)]) ds.targets = [newlabels[l] for l in ds.targets] clf = TreeClassifier( mvpa2.testing.clfs.SVM(), { 'group1': (ds.uniquetargets[:2], mvpa2.testing.clfs.SVM()), 'group2': (ds.uniquetargets[2:], mvpa2.testing.clfs.SVM()) }) clf.train(ds) predictions = clf.predict(ds) # predictions on the same ds as training should give same labels assert (np.all(np.unique(predictions) == ds.uniquetargets))
def test_tree_classifier(self): """Basic tests for TreeClassifier """ ds = datasets['uni4medium'] # make it simple of the beast -- take only informative ones # because classifiers for the tree are selected randomly, so # performance varies a lot and we just need to check on # correct operation ds = ds[:, ds.fa.nonbogus_targets != [None]] clfs = clfswh['binary'] # pool of classifiers # Lets permute so each time we try some different combination # of the classifiers but exclude those operating on %s of # features since we might not have enough for that clfs = [ clfs[i] for i in np.random.permutation(len(clfs)) if not '%' in str(clfs[i]) ] # NB: It is necessary that the same classifier was not used at # different nodes, since it would be re-trained for a new set # of targets, thus leading to incorrect behavior/high error. # # Clone only those few leading ones which we will use # throughout the test clfs = [clf.clone() for clf in clfs[:4]] # Test conflicting definition tclf = TreeClassifier(clfs[0], { 'L0+2': (('L0', 'L2'), clfs[1]), 'L2+3': (('L2', 'L3'), clfs[2]) }) self.assertRaises(ValueError, tclf.train, ds) """Should raise exception since label 2 is in both""" # Test insufficient definition tclf = TreeClassifier(clfs[0], { 'L0+5': (('L0', 'L5'), clfs[1]), 'L2+3': (('L2', 'L3'), clfs[2]) }) self.assertRaises(ValueError, tclf.train, ds) """Should raise exception since no group for L1""" # proper definition now tclf = TreeClassifier(clfs[0], { 'L0+1': (('L0', 'L1'), clfs[1]), 'L2+3': (('L2', 'L3'), clfs[2]) }) # Lets test train/test cycle using CVTE cv = CrossValidation(tclf, OddEvenPartitioner(), postproc=mean_sample(), enable_ca=['stats', 'training_stats']) cverror = cv(ds).samples.squeeze() try: rtclf = repr(tclf) except: self.fail(msg="Could not obtain repr for TreeClassifier") # Test accessibility of .clfs self.assertTrue(tclf.clfs['L0+1'] is clfs[1]) self.assertTrue(tclf.clfs['L2+3'] is clfs[2]) cvtrc = cv.ca.training_stats cvtc = cv.ca.stats if cfg.getboolean('tests', 'labile', default='yes'): # just a dummy check to make sure everything is working self.assertTrue(cvtrc != cvtc) self.assertTrue(cverror < 0.3, msg="Got too high error = %s using %s" % (cverror, tclf)) # Test trailing nodes with no classifier # That is why we use separate pool of classifiers here # (that is probably old/not-needed since switched to use clones) clfs_mc = clfswh['multiclass'] # pool of classifiers clfs_mc = [ clfs_mc[i] for i in np.random.permutation(len(clfs_mc)) if not '%' in str(clfs_mc[i]) ] clfs_mc = [clf.clone() for clf in clfs_mc[:4]] # and clones again tclf = TreeClassifier(clfs_mc[0], { 'L0': (('L0', ), None), 'L1+2+3': (('L1', 'L2', 'L3'), clfs_mc[1]) }) cv = CrossValidation(tclf, OddEvenPartitioner(), postproc=mean_sample(), enable_ca=['stats', 'training_stats']) cverror = np.asscalar(cv(ds)) if cfg.getboolean('tests', 'labile', default='yes'): self.assertTrue(cverror < 0.3, msg="Got too high error = %s using %s" % (cverror, tclf))