def test_split_classifier(self): ds = self.data_bin_1 clf = SplitClassifier( clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'training_confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.confusion.error tr_error = clf.ca.training_confusion.error clf2 = clf.clone() cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), postproc=mean_sample(), enable_ca=['confusion', 'training_confusion']) cverror = cv(ds).samples.squeeze() tr_cverror = cv.ca.training_confusion.error self.failUnlessEqual( error, cverror, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) self.failUnlessEqual( tr_error, tr_cverror, msg="We should get the same training error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (tr_error, tr_cverror)) self.failUnlessEqual(clf.ca.confusion.percent_correct, 100, msg="Dummy clf should train perfectly") self.failUnlessEqual(len(clf.ca.confusion.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual( len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs") self.failUnlessEqual(clf.predict(ds.samples), list(ds.targets), msg="Should classify correctly") # feature_ids must be list of lists, and since it is not # feature-selecting classifier used - we expect all features # to be utilized # NOT ANYMORE -- for BoostedClassifier we have now union of all # used features across slave classifiers. That makes # semantics clear. If you need to get deeper -- use upcoming # harvesting facility ;-) # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) # self.failUnless(np.array([len(ids)==ds.nfeatures # for ids in clf.feature_ids]).all()) # Just check if we get it at all ;-) summary = clf.summary()
def test_split_classifier_extended(self, clf_): clf2 = clf_.clone() ds = datasets['uni2medium'] #self.data_bin_1 clf = SplitClassifier( clf=clf_, #SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.confusion.error cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), postproc=mean_sample(), enable_ca=['confusion', 'training_confusion']) cverror = cv(ds).samples.squeeze() self.failUnless( abs(error - cverror) < 0.01, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(error < 0.25, msg="clf should generalize more or less fine. " "Got error %s" % error) self.failUnlessEqual(len(clf.ca.confusion.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual( len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs")
def test_custom_targets(self, lrn): """Simple test if a learner could cope with custom sa not targets """ # Since we are comparing performances of two learners, we need # to assure that if they depend on some random seed -- they # would use the same value. Currently we have such stochastic # behavior in SMLR if 'seed' in lrn.params: from mvpa import _random_seed lrn = lrn.clone() # clone the beast lrn.params.seed = _random_seed # reuse the same seed lrn_ = lrn.clone() lrn_.params.targets_attr = 'custom' te = CrossValidatedTransferError(TransferError(lrn), NFoldSplitter()) te_ = CrossValidatedTransferError(TransferError(lrn_), NFoldSplitter()) nclasses = 2 * (1 + int('multiclass' in lrn.__tags__)) dsname = ('uni%dsmall' % nclasses, 'sin_modulated')[int(lrn.__is_regression__)] ds = datasets[dsname] ds_ = ds.copy() ds_.sa['custom'] = ds_.sa['targets'] ds_.sa.pop('targets') self.failUnless('targets' in ds.sa, msg="'targets' should remain in original ds") try: cve = te(ds) cve_ = te_(ds_) except Exception, e: self.fail("Failed with %r" % e)
def test_chi_square_searchlight(self): # only do partial to save time # Can't yet do this since test_searchlight isn't yet "under nose" #skip_if_no_external('scipy') if not externals.exists('scipy'): return from mvpa.misc.stats import chisquare transerror = TransferError(sample_clf_lin) cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1), enable_ca=['confusion']) def getconfusion(data): cv(data) return chisquare(cv.ca.confusion.matrix)[0] sl = sphere_searchlight(getconfusion, radius=0, center_ids=[3,50]) # run searchlight results = sl(self.dataset) self.failUnless(results.nfeatures == 2)
def test_split_featurewise_dataset_measure(self): ds = datasets['uni3small'] sana = SplitFeaturewiseDatasetMeasure( analyzer=SMLR(fit_all_weights=True).get_sensitivity_analyzer(), splitter=NFoldSplitter(), ) sens = sana(ds) # a sensitivity for each chunk and each label combination assert_equal(sens.shape, (len(ds.sa['chunks'].unique) * len(ds.sa['targets'].unique), ds.nfeatures)) # Lets try more complex example with 'boosting' ds = datasets['uni3medium'] ds.init_origids('samples') sana = SplitFeaturewiseDatasetMeasure( analyzer=SMLR(fit_all_weights=True).get_sensitivity_analyzer(), splitter=NoneSplitter(npertarget=0.25, mode='first', nrunspersplit=2), enable_ca=['splits', 'sensitivities']) sens = sana(ds) assert_equal(sens.shape, (2 * len(ds.sa['targets'].unique), ds.nfeatures)) splits = sana.ca.splits self.failUnlessEqual(len(splits), 2) self.failUnless( np.all([s[0].nsamples == ds.nsamples / 4 for s in splits])) # should have used different samples self.failUnless( np.any([splits[0][0].sa.origids != splits[1][0].sa.origids])) # and should have got different sensitivities self.failUnless(np.any(sens[0] != sens[1]))
def __test_matthias_question(self): rfe_clf = LinearCSVMC(C=1) rfesvm_split = SplitClassifier(rfe_clf) clf = \ FeatureSelectionClassifier( clf = LinearCSVMC(C=1), feature_selection = RFE( sensitivity_analyzer = rfesvm_split.get_sensitivity_analyzer( combiner=first_axis_mean, transformer=np.abs), transfer_error=ConfusionBasedError( rfesvm_split, confusion_state="confusion"), stopping_criterion=FixedErrorThresholdStopCrit(0.20), feature_selector=FractionTailSelector( 0.2, mode='discard', tail='lower'), update_sensitivity=True)) splitter = NFoldSplitter(cvtype=1) no_permutations = 1000 cv = CrossValidatedTransferError( TransferError(clf), splitter, null_dist=MCNullDist(permutations=no_permutations, tail='left'), enable_ca=['confusion']) error = cv(datasets['uni2small']) self.failUnless(error < 0.4) self.failUnless(cv.ca.null_prob < 0.05)
def test_slicing(self): spl = HalfSplitter() splits = [(train, test) for (train, test) in spl(self.data)] for s in splits: # we get slicing all the time assert_true(s[0].samples.base is self.data.samples) assert_true(s[1].samples.base is self.data.samples) spl = HalfSplitter(noslicing=True) splits = [(train, test) for (train, test) in spl(self.data)] for s in splits: # we no slicing at all assert_false(s[0].samples.base is self.data.samples) assert_false(s[1].samples.base is self.data.samples) spl = NFoldSplitter() splits = [(train, test) for (train, test) in spl(self.data)] for i, s in enumerate(splits): # training only first and last split if i == 0 or i == len(splits) - 1: assert_true(s[0].samples.base is self.data.samples) else: assert_false(s[0].samples.base is self.data.samples) # we get slicing all the time assert_true(s[1].samples.base is self.data.samples) step_ds = Dataset(np.random.randn(20, 2), sa={'chunks': np.tile([0, 1], 10)}) spl = OddEvenSplitter() splits = [(train, test) for (train, test) in spl(step_ds)] assert_equal(len(splits), 2) for s in splits: # we get slicing all the time assert_true(s[0].samples.base is step_ds.samples) assert_true(s[1].samples.base is step_ds.samples)
def test_james_problem(self): percent = 80 dataset = datasets['uni2small'] rfesvm_split = LinearCSVMC() fs = \ RFE(sensitivity_analyzer=rfesvm_split.get_sensitivity_analyzer(), transfer_error=TransferError(rfesvm_split), feature_selector=FractionTailSelector( percent / 100.0, mode='select', tail='upper'), update_sensitivity=True) clf = FeatureSelectionClassifier( clf = LinearCSVMC(), # on features selected via RFE feature_selection = fs) # update sensitivity at each step (since we're not using the # same CLF as sensitivity analyzer) clf.ca.enable('feature_ids') cv = CrossValidatedTransferError( TransferError(clf), NFoldSplitter(cvtype=1), postproc=mean_sample(), enable_ca=['confusion'], expose_testdataset=True) #cv = SplitClassifier(clf) try: error = cv(dataset).samples.squeeze() except Exception, e: self.fail('CrossValidation cannot handle classifier with RFE ' 'feature selection. Got exception: %s' % (e,))
def test_simple_n_minus_one_cv(self): data = get_mv_pattern(3) data.init_origids('samples') self.failUnless(data.nsamples == 120) self.failUnless(data.nfeatures == 2) self.failUnless( (data.sa.targets == \ [0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0] * 6).all()) self.failUnless( (data.sa.chunks == \ [k for k in range(1, 7) for i in range(20)]).all()) assert_equal(len(np.unique(data.sa.origids)), data.nsamples) transerror = TransferError(sample_clf_nl) cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1), enable_ca=['confusion', 'training_confusion', 'samples_error']) results = cv(data) self.failUnless((results.samples < 0.2).all() and (results.samples >= 0.0).all()) # TODO: test accessibility of {training_,}confusion{,s} of # CrossValidatedTransferError self.failUnless(isinstance(cv.ca.samples_error, dict)) self.failUnless(len(cv.ca.samples_error) == data.nsamples) # one value for each origid assert_array_equal(sorted(cv.ca.samples_error.keys()), sorted(data.sa.origids)) for k, v in cv.ca.samples_error.iteritems(): self.failUnless(len(v) == 1)
def test_counted_splitting(self): # count > #chunks, should result in 10 splits nchunks = len(self.data.sa['chunks'].unique) for strategy in NFoldSplitter._STRATEGIES: for count, target in [ (nchunks*2, nchunks), (nchunks, nchunks), (nchunks-1, nchunks-1), (3, 3), (0, 0), (1, 1) ]: nfs = NFoldSplitter(cvtype=1, count=count, strategy=strategy) splits = [ (train, test) for (train,test) in nfs(self.data) ] self.failUnless(len(splits) == target) chosenchunks = [int(s[1].uniquechunks) for s in splits] # Test if configuration matches as well nsplits_cfg = len(nfs.splitcfg(self.data)) self.failUnlessEqual(nsplits_cfg, target) # Check if "lastsplit" dsattr was assigned appropriately nsplits = len(splits) if nsplits > 0: # dummy-proof testing of last split for ds_ in splits[-1]: self.failUnless(ds_.a.lastsplit) # test all now for isplit,split in enumerate(splits): for ds_ in split: ds_.a.lastsplit == isplit==nsplits-1 # Check results of different strategies if strategy == 'first': self.failUnlessEqual(chosenchunks, range(target)) elif strategy == 'equidistant': if target == 3: self.failUnlessEqual(chosenchunks, [0, 3, 7]) elif strategy == 'random': # none is selected twice self.failUnless(len(set(chosenchunks)) == len(chosenchunks)) self.failUnless(target == len(chosenchunks)) else: raise RuntimeError, "Add unittest for strategy %s" \ % strategy
def test_cache_speedup(self): skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) ck = sgSVM(kernel=CachedKernel(kernel=RbfSGKernel(sigma=2)), C=1) sk = sgSVM(kernel=RbfSGKernel(sigma=2), C=1) cv_c = CrossValidatedTransferError(TransferError(ck), splitter=NFoldSplitter()) cv_s = CrossValidatedTransferError(TransferError(sk), splitter=NFoldSplitter()) #data = datasets['uni4large'] P = 5000 data = normal_feature_dataset(snr=2, perlabel=200, nchunks=10, means=np.random.randn(2, P), nfeatures=P) t0 = time() ck.params.kernel.compute(data) cachetime = time() - t0 t0 = time() cached_err = cv_c(data) ccv_time = time() - t0 t0 = time() norm_err = cv_s(data) ncv_time = time() - t0 assert_almost_equal(np.asanyarray(cached_err), np.asanyarray(norm_err)) ok_(cachetime < ncv_time) ok_(ccv_time < ncv_time) #print 'Regular CV time: %s seconds'%ncv_time #print 'Caching time: %s seconds'%cachetime #print 'Cached CV time: %s seconds'%ccv_time speedup = ncv_time / (ccv_time + cachetime) #print 'Speedup factor: %s'%speedup # Speedup ideally should be 10, though it's not purely linear self.failIf(speedup < 2, 'Problem caching data - too slow!')
def test_cached_kernel_different_datasets(self): skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) # Inspired by the problem Swaroop ran into k = LinearSGKernel(normalizer_cls=False) k_ = LinearSGKernel(normalizer_cls=False) # to be cached ck = CachedKernel(k_) clf = sgSVM(svm_impl='libsvm', kernel=k, C=-1) clf_ = sgSVM(svm_impl='libsvm', kernel=ck, C=-1) cvte = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) cvte_ = CrossValidatedTransferError(TransferError(clf_), NFoldSplitter()) te = TransferError(clf) te_ = TransferError(clf_) for r in xrange(2): ds1 = datasets['uni2medium'] errs1 = cvte(ds1) ck.compute(ds1) ok_(ck._recomputed) errs1_ = cvte_(ds1) ok_(~ck._recomputed) assert_array_equal(errs1, errs1_) ds2 = datasets['uni3small'] errs2 = cvte(ds2) ck.compute(ds2) ok_(ck._recomputed) errs2_ = cvte_(ds2) ok_(~ck._recomputed) assert_array_equal(errs2, errs2_) ssel = np.round(datasets['uni2large'].samples[:5, 0]).astype(int) terr = te(datasets['uni3small_test'][ssel], datasets['uni3small_train'][::2]) terr_ = te_(datasets['uni3small_test'][ssel], datasets['uni3small_train'][::2]) ok_(~ck._recomputed) ok_(terr == terr_)
def test_counted_splitting(self): # count > #chunks, should result in 10 splits nchunks = len(self.data.sa['chunks'].unique) for strategy in NFoldSplitter._STRATEGIES: for count, target in [(nchunks * 2, nchunks), (nchunks, nchunks), (nchunks - 1, nchunks - 1), (3, 3), (0, 0), (1, 1)]: nfs = NFoldSplitter(cvtype=1, count=count, strategy=strategy) splits = [(train, test) for (train, test) in nfs(self.data)] self.failUnless(len(splits) == target) chosenchunks = [int(s[1].uniquechunks) for s in splits] # Test if configuration matches as well nsplits_cfg = len(nfs.splitcfg(self.data)) self.failUnlessEqual(nsplits_cfg, target) # Check if "lastsplit" dsattr was assigned appropriately nsplits = len(splits) if nsplits > 0: # dummy-proof testing of last split for ds_ in splits[-1]: self.failUnless(ds_.a.lastsplit) # test all now for isplit, split in enumerate(splits): for ds_ in split: ds_.a.lastsplit == isplit == nsplits - 1 # Check results of different strategies if strategy == 'first': self.failUnlessEqual(chosenchunks, range(target)) elif strategy == 'equidistant': if target == 3: self.failUnlessEqual(chosenchunks, [0, 3, 7]) elif strategy == 'random': # none is selected twice self.failUnless( len(set(chosenchunks)) == len(chosenchunks)) self.failUnless(target == len(chosenchunks)) else: raise RuntimeError, "Add unittest for strategy %s" \ % strategy
def test_discarded_boundaries(self): splitters = [ NFoldSplitter(), NFoldSplitter(discard_boundary=(0, 1)), # discard testing NFoldSplitter(discard_boundary=(1, 0)), # discard training NFoldSplitter(discard_boundary=(2, 0)), # discard 2 from training NFoldSplitter(discard_boundary=1), # discard from both OddEvenSplitter(discard_boundary=(1, 0)), OddEvenSplitter(discard_boundary=(0, 1)), HalfSplitter(discard_boundary=(1, 0)), ] split_sets = [list(s(self.data)) for s in splitters] counts = [[(len(s[0].chunks), len(s[1].chunks)) for s in split_set] for split_set in split_sets] nodiscard_tr = [c[0] for c in counts[0]] nodiscard_te = [c[1] for c in counts[0]] # Discarding in testing: self.failUnless(nodiscard_tr == [c[0] for c in counts[1]]) self.failUnless( nodiscard_te[1:-1] == [c[1] + 2 for c in counts[1][1:-1]]) # at the beginning/end chunks, just a single element self.failUnless(nodiscard_te[0] == counts[1][0][1] + 1) self.failUnless(nodiscard_te[-1] == counts[1][-1][1] + 1) # Discarding in training for d in [1, 2]: self.failUnless(nodiscard_te == [c[1] for c in counts[1 + d]]) self.failUnless(nodiscard_tr[0] == counts[1 + d][0][0] + d) self.failUnless(nodiscard_tr[-1] == counts[1 + d][-1][0] + d) self.failUnless(nodiscard_tr[1:-1] == [c[0] + d * 2 for c in counts[1 + d][1:-1]]) # Discarding in both -- should be eq min from counts[1] and [2] counts_min = [(min(c1[0], c2[0]), min(c1[1], c2[1])) for c1, c2 in zip(counts[1], counts[2])] self.failUnless(counts_min == counts[4])
def test_harvesting(self): # get a dataset with a very high SNR data = get_mv_pattern(10) # do crossval with default errorfx and 'mean' combiner transerror = TransferError(clfswh['linear'][0]) cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1), harvest_attribs=['transerror.clf.ca.training_time']) result = cv(data) ok_(cv.ca.harvested.has_key('transerror.clf.ca.training_time')) assert_equal(len(cv.ca.harvested['transerror.clf.ca.training_time']), len(data.UC))
def test_noise_classification(self): # get a dataset with a very high SNR data = get_mv_pattern(10) # do crossval with default errorfx and 'mean' combiner transerror = TransferError(sample_clf_nl) cv = CrossValidatedTransferError(transerror, NFoldSplitter(cvtype=1)) # must return a scalar value result = cv(data) # must be perfect self.failUnless((result.samples < 0.05).all()) # do crossval with permuted regressors cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1, permute_attr='targets', nrunspersplit=10)) results = cv(data) # must be at chance level pmean = np.array(results).mean() self.failUnless(pmean < 0.58 and pmean > 0.42)
def test_simplest_cv_pat_gen(self): # create the generator nfs = NFoldSplitter(cvtype=1) # now get the xval pattern sets One-Fold CV) xvpat = [(train, test) for (train, test) in nfs(self.data)] self.failUnless(len(xvpat) == 10) for i, p in enumerate(xvpat): self.failUnless(len(p) == 2) self.failUnless(p[0].nsamples == 90) self.failUnless(p[1].nsamples == 10) self.failUnless(p[1].chunks[0] == i)
def test_vstack_and_origids_issue(self): # That is actually what swaroop hit skip_if_no_external('shogun', ver_dep='shogun:rev', min_version=4455) # Inspired by the problem Swaroop ran into k = LinearSGKernel(normalizer_cls=False) k_ = LinearSGKernel(normalizer_cls=False) # to be cached ck = CachedKernel(k_) clf = sgSVM(svm_impl='libsvm', kernel=k, C=-1) clf_ = sgSVM(svm_impl='libsvm', kernel=ck, C=-1) cvte = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) cvte_ = CrossValidatedTransferError(TransferError(clf_), NFoldSplitter()) ds = datasets['uni2large_test'].copy(deep=True) ok_(~('orig_ids' in ds.sa)) # assure that there are None ck.compute(ds) # so we initialize origids ok_('origids' in ds.sa) ds2 = ds.copy(deep=True) ds2.samples = np.zeros(ds2.shape) from mvpa.base.dataset import vstack ds_vstacked = vstack((ds2, ds)) # should complaint now since there would not be unique # samples' origids if __debug__: assert_raises(ValueError, ck.compute, ds_vstacked) ds_vstacked.init_origids('samples') # reset origids ck.compute(ds_vstacked) errs = cvte(ds_vstacked) errs_ = cvte_(ds_vstacked) # Following test would have failed since origids # were just ints, and then non-unique after vstack assert_array_equal(errs.samples, errs_.samples)
def test_classifier_generalization(self, clf): """Simple test if classifiers can generalize ok on simple data """ te = CrossValidatedTransferError(TransferError(clf), NFoldSplitter(), postproc=mean_sample()) # check the default self.failUnless(isinstance(te.transerror.errorfx, MeanMismatchErrorFx)) nclasses = 2 * (1 + int('multiclass' in clf.__tags__)) ds = datasets['uni%dmedium' % nclasses] try: cve = te(ds).samples.squeeze() except Exception, e: self.fail("Failed with %s" % e)
def test_harvesting(self): """Basic testing of harvesting based on SplitClassifier """ ds = self.data_bin_1 clf = SplitClassifier( clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'training_confusion', 'feature_ids'], harvest_attribs=['clf.ca.feature_ids', 'clf.ca.training_time'], descr="DESCR") clf.train(ds) # train the beast # Number of harvested items should be equal to number of chunks self.failUnlessEqual(len(clf.ca.harvested['clf.ca.feature_ids']), len(ds.UC)) # if we can blame multiple inheritance and ClassWithCollections.__init__ self.failUnlessEqual(clf.descr, "DESCR")
def test_regressions_classifiers(self, clf): """Simple tests on regressions being used as classifiers """ # check if we get values set correctly clf.ca.change_temporarily(enable_ca=['estimates']) self.failUnlessRaises(UnknownStateError, clf.ca['estimates']._get) cv = CrossValidatedTransferError( TransferError(clf), NFoldSplitter(), enable_ca=['confusion', 'training_confusion']) ds = datasets['uni2small'].copy() # we want numeric labels to maintain the previous behavior, especially # since we deal with regressions here ds.sa.targets = AttributeMap().to_numeric(ds.targets) cverror = cv(ds) self.failUnless(len(clf.ca.estimates) == ds[ds.chunks == 1].nsamples) clf.ca.reset_changed_temporarily()
def test_partial_searchlight_with_full_report(self): # compute N-1 cross-validation for each sphere transerror = TransferError(sample_clf_lin) cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1)) # contruct diameter 1 (or just radius 0) searchlight sl = sphere_searchlight(cv, radius=0, center_ids=[3,50]) # run searchlight results = sl(self.dataset) # only two spheres but error for all CV-folds self.failUnlessEqual(results.shape, (len(self.dataset.UC), 2)) # test if we graciously puke if center_ids are out of bounds dataset0 = self.dataset[:, :50] # so we have no 50th feature self.failUnlessRaises(IndexError, sl, dataset0)
def test_ifs(self, svm): # data measure and transfer error quantifier use the SAME clf! trans_error = TransferError(svm) data_measure = CrossValidatedTransferError(trans_error, NFoldSplitter(1), postproc=mean_sample()) ifs = IFS(data_measure, trans_error, feature_selector=\ # go for lower tail selection as data_measure will return # errors -> low is good FixedNElementTailSelector(1, tail='lower', mode='select'), ) wdata = self.get_data() wdata_nfeatures = wdata.nfeatures tdata = self.get_data() tdata_nfeatures = tdata.nfeatures sdata, stdata = ifs(wdata, tdata) # fail if orig datasets are changed self.failUnless(wdata.nfeatures == wdata_nfeatures) self.failUnless(tdata.nfeatures == tdata_nfeatures) # check that the features set with the least error is selected self.failUnless(len(ifs.ca.errors)) e = np.array(ifs.ca.errors) self.failUnless(sdata.nfeatures == e.argmin() + 1) # repeat with dataset where selection order is known signal = datasets['dumb2'] sdata, stdata = ifs(signal, signal) self.failUnless((sdata.samples[:, 0] == signal.samples[:, 0]).all())
def test_regressions(self, regr): """Simple tests on regressions """ ds = datasets['chirp_linear'] # we want numeric labels to maintain the previous behavior, especially # since we deal with regressions here ds.sa.targets = AttributeMap().to_numeric(ds.targets) cve = CrossValidatedTransferError( TransferError(regr), splitter=NFoldSplitter(), postproc=mean_sample(), enable_ca=['training_confusion', 'confusion']) # check the default self.failUnless(isinstance(cve.transerror.errorfx, CorrErrorFx)) corr = np.asscalar(cve(ds).samples) # Our CorrErrorFx should never return NaN self.failUnless(not np.isnan(corr)) self.failUnless(corr == cve.ca.confusion.stats['CCe']) splitregr = SplitClassifier( regr, splitter=OddEvenSplitter(), enable_ca=['training_confusion', 'confusion']) splitregr.train(ds) split_corr = splitregr.ca.confusion.stats['CCe'] split_corr_tr = splitregr.ca.training_confusion.stats['CCe'] for confusion, error in ( (cve.ca.confusion, corr), (splitregr.ca.confusion, split_corr), (splitregr.ca.training_confusion, split_corr_tr), ): #TODO: test confusion statistics # Part of it for now -- CCe for conf in confusion.summaries: stats = conf.stats if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(stats['CCe'] < 0.5) self.failUnlessEqual(stats['CCe'], stats['Summary CCe']) s0 = confusion.as_string(short=True) s1 = confusion.as_string(short=False) for s in [s0, s1]: self.failUnless(len(s) > 10, msg="We should get some string representation " "of regression summary. Got %s" % s) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless( error < 0.2, msg="Regressions should perform well on a simple " "dataset. Got correlation error of %s " % error) # Test access to summary statistics # YOH: lets start making testing more reliable. # p-value for such accident to have is verrrry tiny, # so if regression works -- it better has at least 0.5 ;) # otherwise fix it! ;) # YOH: not now -- issues with libsvr in SG and linear kernel if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(confusion.stats['CCe'] < 0.5) # just to check if it works fine split_predictions = splitregr.predict(ds.samples)
def test_cper_class(self, clf): if not (clf.params.has_key('C')): # skip those without C return ds = datasets['uni2medium'].copy() ds__ = datasets['uni2medium'].copy() # # ballanced set # Lets add a bit of noise to drive classifier nuts. same # should be done for disballanced set ds__.samples = ds__.samples + \ 0.5 * np.random.normal(size=(ds__.samples.shape)) # # disballanced set # lets overpopulate label 0 times = 20 ds_ = ds[(range(ds.nsamples) + range(ds.nsamples/2) * times)] ds_.samples = ds_.samples + \ 0.5 * np.random.normal(size=(ds_.samples.shape)) spl = get_nsamples_per_attr(ds_, 'targets') #_.samplesperlabel #print ds_.targets, ds_.chunks cve = CrossValidatedTransferError(TransferError(clf), NFoldSplitter(), enable_ca='confusion') # on balanced e = cve(ds__) tpr_1 = cve.ca.confusion.stats["TPR"][1] # on disbalanced e = cve(ds_) tpr_2 = cve.ca.confusion.stats["TPR"][1] # Set '1 C per label' # recreate cvte since previous might have operated on copies cve = CrossValidatedTransferError(TransferError(clf), NFoldSplitter(), enable_ca='confusion') oldC = clf.params.C # TODO: provide clf.params.C not with a tuple but dictionary # with C per label (now order is deduced in a cruel way) ratio = np.sqrt(float(spl[ds_.UT[0]])/spl[ds_.UT[1]]) clf.params.C = (-1/ratio, -1*ratio) try: # on disbalanced but with balanced C e_ = cve(ds_) # reassign C clf.params.C = oldC except: clf.params.C = oldC raise tpr_3 = cve.ca.confusion.stats["TPR"][1] # Actual tests if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(tpr_1 > 0.25, msg="Without disballance we should have some " "hits, but got TPR=%.3f" % tpr_1) self.failUnless(tpr_2 < 0.25, msg="With disballance we should have almost no " "hits for minor, but got TPR=%.3f" % tpr_2) self.failUnless(tpr_3 > 0.25, msg="With disballanced data but ratio-based Cs " "we should have some hits for minor, but got " "TPR=%.3f" % tpr_3)
def test_spatial_searchlight(self, common_variance): """Tests both generic and GNBSearchlight Test of GNBSearchlight anyways requires a ground-truth comparison to the generic version, so we are doing sweepargs here """ # compute N-1 cross-validation for each sphere # YOH: unfortunately sample_clf_lin is not guaranteed # to provide exactly the same results due to inherent # iterative process. Therefore lets use something quick # and pure Python gnb = GNB(common_variance=common_variance) transerror = TransferError(gnb) cv = CrossValidatedTransferError( transerror, NFoldSplitter(cvtype=1)) skwargs = dict(radius=1, enable_ca=['roi_sizes', 'raw_results']) sls = [sphere_searchlight(cv, **skwargs), #GNBSearchlight(gnb, NFoldSplitter(cvtype=1)) sphere_gnbsearchlight(gnb, NFoldSplitter(cvtype=1), indexsum='fancy', **skwargs) ] if externals.exists('scipy'): sls += [ sphere_gnbsearchlight(gnb, NFoldSplitter(cvtype=1), indexsum='sparse', **skwargs)] # Just test nproc whenever common_variance is True if externals.exists('pprocess') and common_variance: sls += [sphere_searchlight(cv, nproc=2, **skwargs)] all_results = [] ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace for sl in sls: # run searchlight results = sl(ds) all_results.append(results) # check for correct number of spheres self.failUnless(results.nfeatures == 106) # and measures (one per xfold) self.failUnless(len(results) == len(ds.UC)) # check for chance-level performance across all spheres self.failUnless(0.4 < results.samples.mean() < 0.6) mean_errors = results.samples.mean(axis=0) # that we do get different errors ;) self.failUnless(len(np.unique(mean_errors) > 3)) # check resonable sphere sizes self.failUnless(len(sl.ca.roi_sizes) == 106) self.failUnless(max(sl.ca.roi_sizes) == 7) self.failUnless(min(sl.ca.roi_sizes) == 4) # check base-class state self.failUnlessEqual(sl.ca.raw_results.nfeatures, 106) if len(all_results) > 1: # if we had multiple searchlights, we can check either they all # gave the same result (they should have) aresults = np.array([a.samples for a in all_results]) dresults = np.abs(aresults - aresults.mean(axis=0)) dmax = np.max(dresults) self.failUnless(dmax <= 1e-13)
def plot_feature_hist(dataset, xlim=None, noticks=True, targets_attr='targets', chunks_attr=None, **kwargs): """Plot histograms of feature values for each labels. Parameters ---------- dataset : Dataset xlim : None or 2-tuple Common x-axis limits for all histograms. noticks : bool If True, no axis ticks will be plotted. This is useful to save space in large plots. targets_attr : string, optional Name of samples attribute to be used as targets chunks_attr : None or string If a string, a histogram will be plotted per each target and each chunk (as defined in sa named `chunks_attr`), resulting is a histogram grid (targets x chunks). **kwargs Any additional arguments are passed to matplotlib's hist(). """ lsplit = NFoldSplitter(1, attr=targets_attr) csplit = NFoldSplitter(1, attr=chunks_attr) nrows = len(dataset.sa[targets_attr].unique) ncols = len(dataset.sa[chunks_attr].unique) def doplot(data): """Just a little helper which plots the histogram and removes ticks etc""" pl.hist(data, **kwargs) if xlim is not None: pl.xlim(xlim) if noticks: pl.yticks([]) pl.xticks([]) fig = 1 # for all labels for row, (_, ds) in enumerate(lsplit(dataset)): if chunks_attr: for col, (_, d) in enumerate(csplit(ds)): pl.subplot(nrows, ncols, fig) doplot(d.samples.ravel()) if row == 0: pl.title('C:' + str(d.sa[chunks_attr].unique[0])) if col == 0: pl.ylabel('L:' + str(d.sa[targets_attr].unique[0])) fig += 1 else: pl.subplot(1, nrows, fig) doplot(ds.samples) pl.title('L:' + str(ds.sa[targets_attr].unique[0])) fig += 1
def test_analyzer_with_split_classifier(self, clfds): """Test analyzers in split classifier """ clf, ds = clfds # unroll the tuple # We need to skip some LARSes here _sclf = str(clf) if 'LARS(' in _sclf and "type='stepwise'" in _sclf: # ADD KnownToFail thingie from NiPy return # To don't waste too much time testing lets limit to 3 splits nsplits = 3 splitter = NFoldSplitter(count=nsplits) mclf = SplitClassifier(clf=clf, splitter=splitter, enable_ca=['training_confusion', 'confusion']) sana = mclf.get_sensitivity_analyzer( # postproc=absolute_features(), enable_ca=["sensitivities"]) ulabels = ds.uniquetargets nlabels = len(ulabels) # Can't rely on splitcfg since count-limit is done in __call__ assert (nsplits == len(list(splitter(ds)))) sens = sana(ds) # It should return either ... # nlabels * nsplits req_nsamples = [nlabels * nsplits] if nlabels == 2: # A single sensitivity in case of binary req_nsamples += [nsplits] else: # and for pairs in case of multiclass req_nsamples += [(nlabels * (nlabels - 1) / 2) * nsplits] # and for 1-vs-1 embedded within Multiclass operating on # pairs (e.g. SMLR) req_nsamples += [req_nsamples[-1] * 2] # Also for regression_based -- they can do multiclass # but only 1 sensitivity is provided if 'regression_based' in clf.__tags__: req_nsamples += [nsplits] # # of features should correspond self.failUnlessEqual(sens.shape[1], ds.nfeatures) # # of samples/sensitivities should also be reasonable self.failUnless(sens.shape[0] in req_nsamples) # Check if labels are present self.failUnless('splits' in sens.sa) self.failUnless('targets' in sens.sa) # should be 1D -- otherwise dtype object self.failUnless(sens.sa.targets.ndim == 1) sens_ulabels = sens.sa['targets'].unique # Some labels might be pairs(tuples) so ndarray would be of # dtype object and we would need to get them all if sens_ulabels.dtype is np.dtype('object'): sens_ulabels = np.unique( reduce(lambda x, y: x + y, [list(x) for x in sens_ulabels])) assert_array_equal(sens_ulabels, ds.sa['targets'].unique) errors = [x.percent_correct for x in sana.clf.ca.confusion.matrices] # lets go through all sensitivities and see if we selected the right # features #if 'meta' in clf.__tags__ and len(sens.samples[0].nonzero()[0])<2: if '5%' in clf.descr \ or (nlabels > 2 and 'regression_based' in clf.__tags__): # Some meta classifiers (5% of ANOVA) are too harsh ;-) # if we get less than 2 features with on-zero sensitivities we # cannot really test # Also -- regression based classifiers performance for multiclass # is expected to suck in general return if cfg.getboolean('tests', 'labile', default='yes'): for conf_matrix in [sana.clf.ca.training_confusion] \ + sana.clf.ca.confusion.matrices: self.failUnless( conf_matrix.percent_correct>=70, msg="We must have trained on each one more or " \ "less correctly. Got %f%% correct on %d labels" % (conf_matrix.percent_correct, nlabels)) # Since now we have per split and possibly per label -- lets just find # mean per each feature per label across splits sensm = FxMapper('samples', lambda x: np.sum(x), uattrs=['targets'])(sens) sensgm = maxofabs_sample()(sensm) # global max of abs of means assert_equal(sensgm.shape[0], 1) assert_equal(sensgm.shape[1], ds.nfeatures) selected = FixedNElementTailSelector(len(ds.a.bogus_features))( sensgm.samples[0]) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnlessEqual( set(selected), set(ds.a.nonbogus_features), msg="At the end we should have selected the right features. " "Chose %s whenever nonbogus are %s" % (selected, ds.a.nonbogus_features)) # Now test each one per label # TODO: collect all failures and spit them out at once -- # that would make it easy to see if the sensitivity # just has incorrect order of labels assigned for sens1 in sensm: labels1 = sens1.targets # labels (1) for this sensitivity lndim = labels1.ndim label = labels1[0] # current label # XXX whole lndim comparison should be gone after # things get fixed and we arrive here with a tuple! if lndim == 1: # just a single label self.failUnless(label in ulabels) ilabel_all = np.where(ds.fa.targets == label)[0] # should have just 1 feature for the label self.failUnlessEqual(len(ilabel_all), 1) ilabel = ilabel_all[0] maxsensi = np.argmax(sens1) # index of max sensitivity self.failUnlessEqual( maxsensi, ilabel, "Maximal sensitivity for %s was found in %i whenever" " original feature was %i for nonbogus features %s" % (labels1, maxsensi, ilabel, ds.a.nonbogus_features)) elif lndim == 2 and labels1.shape[1] == 2: # pair of labels # we should have highest (in abs) coefficients in # those two labels maxsensi2 = np.argsort(np.abs(sens1))[0][-2:] ilabel2 = [ np.where(ds.fa.targets == l)[0][0] for l in label ] self.failUnlessEqual( set(maxsensi2), set(ilabel2), "Maximal sensitivity for %s was found in %s whenever" " original features were %s for nonbogus features %s" % (labels1, maxsensi2, ilabel2, ds.a.nonbogus_features)) """ # Now test for the sign of each one in pair ;) in # all binary problems L1 (-1) -> L2(+1), then # weights for L2 should be positive. to test for # L1 -- invert the sign # We already know (if we haven't failed in previous test), # that those 2 were the strongest -- so check only signs """ self.failUnless( sens1.samples[0, ilabel2[0]] < 0, "With %i classes in pair %s got feature %i for %r >= 0" % (nlabels, label, ilabel2[0], label[0])) self.failUnless( sens1.samples[0, ilabel2[1]] > 0, "With %i classes in pair %s got feature %i for %r <= 0" % (nlabels, label, ilabel2[1], label[1])) else: # yoh could be wrong at this assumption... time will show self.fail("Got unknown number labels per sensitivity: %s." " Should be either a single label or a pair" % labels1)