def test_clf_transfer_measure(self): # and now on a classifier clf = SMLR() enode = BinaryFxNode(mean_mismatch_error, 'targets') tm = TransferMeasure(clf, Splitter('chunks', count=2), enable_ca=['stats']) res = tm(self.dataset) manual_error = np.mean(res.samples.squeeze() != res.sa.targets) postproc_error = enode(res) tm_err = TransferMeasure(clf, Splitter('chunks', count=2), postproc=enode) auto_error = tm_err(self.dataset) ok_(manual_error == postproc_error.samples[0, 0])
def test_confusion_based_error(self, l_clf): train = datasets['uni2medium'] train = train[train.sa.train == 1] # to check if we fail to classify for 3 labels test3 = datasets['uni3medium'] test3 = test3[test3.sa.train == 1] err = ConfusionBasedError(clf=l_clf) terr = TransferMeasure(l_clf, Splitter('train', attr_values=[1, 1]), postproc=BinaryFxNode(mean_mismatch_error, 'targets')) self.assertRaises(UnknownStateError, err, None) """Shouldn't be able to access the state yet""" l_clf.train(train) e, te = err(None), terr(train) te = np.asscalar(te) self.assertTrue( abs(e - te) < 1e-10, msg="ConfusionBasedError (%.2g) should be equal to TransferError " "(%.2g) on traindataset" % (e, te)) # this will print nasty WARNING but it is ok -- it is just checking code # NB warnings are not printed while doing whole testing warning("Don't worry about the following warning.") if 'multiclass' in l_clf.__tags__: self.assertFalse(terr(test3) is None) # try copying the beast terr_copy = copy(terr)
def test_null_dist_prob(self, l_clf): train = datasets['uni2medium'] num_perm = 10 permutator = AttributePermutator('targets', count=num_perm, limit='chunks') # define class to estimate NULL distribution of errors # use left tail of the distribution since we use MeanMatchFx as error # function and lower is better terr = TransferMeasure(l_clf, Repeater(count=2), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), null_dist=MCNullDist(permutator, tail='left')) # check reasonable error range err = terr(train) self.assertTrue(np.mean(err) < 0.4) # Lets do the same for CVTE cvte = CrossValidation(l_clf, OddEvenPartitioner(), null_dist=MCNullDist(permutator, tail='left', enable_ca=['dist_samples' ]), postproc=mean_sample()) cv_err = cvte(train) # check that the result is highly significant since we know that the # data has signal null_prob = np.asscalar(terr.ca.null_prob) if cfg.getboolean('tests', 'labile', default='yes'): self.assertTrue( null_prob <= 0.1, msg="Failed to check that the result is highly significant " "(got %f) since we know that the data has signal" % null_prob) self.assertTrue( np.asscalar(cvte.ca.null_prob) <= 0.1, msg="Failed to check that the result is highly significant " "(got p(cvte)=%f) since we know that the data has signal" % np.asscalar(cvte.ca.null_prob)) # we should be able to access the actual samples of the distribution # yoh: why it is 3D really? # mih: because these are the distribution samples for the ONE error # collapsed into ONE value across all folds. It will also be # 3d if the return value of the measure isn't a scalar and it is # not collapsed across folds. it simply corresponds to the shape # of the output dataset of the respective measure (+1 axis) # Some permutations could have been skipped since classifier failed # to train due to degenerate situation etc, thus accounting for them self.assertEqual(cvte.null_dist.ca.dist_samples.shape[2], num_perm - cvte.null_dist.ca.skipped)
def test_transfer_measure(self): # come up with my own measure that only checks if training data # and test data are the same class MyMeasure(Measure): def _train(self, ds): self._tds = ds def _call(self, ds): return Dataset(ds.samples == self._tds.samples) tm = TransferMeasure(MyMeasure(), Splitter('chunks', count=2)) # result should not be all True (== identical) assert_true((tm(self.dataset).samples == False).any())
def test_pseudo_cv_measure(self): clf = SMLR() enode = BinaryFxNode(mean_mismatch_error, 'targets') tm = TransferMeasure(clf, Splitter('partitions'), postproc=enode) cvgen = NFoldPartitioner() rm = RepeatedMeasure(tm, cvgen) res = rm(self.dataset) # one error per fold assert_equal(res.shape, (len(self.dataset.sa['chunks'].unique), 1)) # we can do the same with Crossvalidation cv = CrossValidation(clf, cvgen, enable_ca=['stats', 'training_stats', 'datasets']) res = cv(self.dataset) assert_equal(res.shape, (len(self.dataset.sa['chunks'].unique), 1))
def test_single_class(self, clf): """Test if binary and multiclass can handle single class training/testing """ ds = datasets['uni2small'] ds = ds[ds.sa.targets == 'L0'] # only 1 label assert(ds.sa['targets'].unique == ['L0']) ds_ = list(OddEvenPartitioner().generate(ds))[0] # Here is our "nice" 0.6 substitute for TransferError: trerr = TransferMeasure(clf, Splitter('train'), postproc=BinaryFxNode(mean_mismatch_error, 'targets')) try: err = np.asscalar(trerr(ds_)) except Exception, e: self.fail(str(e))
def test_gnb(self): gnb = GNB() gnb_nc = GNB(common_variance=False) gnb_n = GNB(normalize=True) gnb_n_nc = GNB(normalize=True, common_variance=False) gnb_lin = GNB(common_variance=True) ds = datasets['uni2medium'] # Generic silly coverage just to assure that it works in all # possible scenarios: bools = (True, False) # There should be better way... heh for cv in bools: # common_variance? for prior in ('uniform', 'laplacian_smoothing', 'ratio'): tp = None # predictions -- all above should # result in the same predictions for n in bools: # normalized? for ls in bools: # logspace? for es in ((), ('estimates')): gnb_ = GNB(common_variance=cv, prior=prior, normalize=n, logprob=ls, enable_ca=es) tm = TransferMeasure(gnb_, Splitter('train')) predictions = tm(ds).samples[:, 0] if tp is None: tp = predictions assert_array_equal(predictions, tp) # if normalized -- check if estimates are such if n and 'estimates' in es: v = gnb_.ca.estimates if ls: # in log space -- take exp ;) v = np.exp(v) d1 = np.sum(v, axis=1) - 1.0 self.assertTrue(np.max(np.abs(d1)) < 1e-5) # smoke test to see whether invocation of sensitivity analyser blows # if gnb classifier isn't linear, and to see whether it doesn't blow # when it is linear. if cv: assert 'has_sensitivity' in gnb_.__tags__ gnb_.get_sensitivity_analyzer() if not cv: with self.assertRaises(NotImplementedError): gnb_.get_sensitivity_analyzer()
def test_gnb(self): gnb = GNB() gnb_nc = GNB(common_variance=False) gnb_n = GNB(normalize=True) gnb_n_nc = GNB(normalize=True, common_variance=False) ds = datasets['uni2medium'] # Generic silly coverage just to assure that it works in all # possible scenarios: bools = (True, False) # There should be better way... heh for cv in bools: # common_variance? for prior in ('uniform', 'laplacian_smoothing', 'ratio'): tp = None # predictions -- all above should # result in the same predictions for n in bools: # normalized? for ls in bools: # logspace? for es in ((), ('estimates')): gnb_ = GNB(common_variance=cv, prior=prior, normalize=n, logprob=ls, enable_ca=es) tm = TransferMeasure(gnb_, Splitter('train')) predictions = tm(ds).samples[:, 0] if tp is None: tp = predictions assert_array_equal(predictions, tp) # if normalized -- check if estimates are such if n and 'estimates' in es: v = gnb_.ca.estimates if ls: # in log space -- take exp ;) v = np.exp(v) d1 = np.sum(v, axis=1) - 1.0 self.assertTrue(np.max(np.abs(d1)) < 1e-5)
def test_multiclass_ties(clf): if 'lars' in clf.__tags__: raise SkipTest("Known to crash while running this test") ds = _dsties1 # reassign data between ties, so we know that decision is data, not order driven ds_ = ds.copy(deep=True) ds_.samples[ds.a.ties_idx[1]] = ds.samples[ds.a.ties_idx[0]] ds_.samples[ds.a.ties_idx[0]] = ds.samples[ds.a.ties_idx[1]] ok_(np.any(ds_.samples != ds.samples)) clf_ = clf.clone() clf = clf.clone() clf.ca.enable(['estimates', 'predictions']) clf_.ca.enable(['estimates', 'predictions']) te = TransferMeasure(clf, Splitter('train'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca=['stats']) te_ = TransferMeasure(clf_, Splitter('train'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca=['stats']) te = CrossValidation(clf, NFoldPartitioner(), postproc=mean_sample(), enable_ca=['stats']) te_ = CrossValidation(clf_, NFoldPartitioner(), postproc=mean_sample(), enable_ca=['stats']) error = te(ds) matrix = te.ca.stats.matrix # if ties were broken randomly we should have got nearly the same # number of hits for tied targets ties_indices = [te.ca.stats.labels.index(c) for c in ds.a.ties] hits = np.diag(te.ca.stats.matrix)[ties_indices] # First check is to see if we swap data between tied labels we # are getting the same results if we permute labels accordingly, # i.e. that tie resolution is not dependent on the labels order # but rather on the data te_(ds_) matrix_swapped = te_.ca.stats.matrix if False: #0 in hits: print clf, matrix, matrix_swapped print clf.ca.estimates[:, 2] - clf.ca.estimates[:, 0] #print clf.ca.estimates # TODO: for now disabled all the non-compliant ones to pass the # tests. For visibility decided to skip them instead of just # exclusion and skipping only here to possibly catch crashes # which might happen before if len( set(('libsvm', 'sg', 'skl', 'gpr', 'blr')).intersection(clf.__tags__)): raise SkipTest("Skipped %s because it is known to fail") ok_(not (np.array_equal(matrix, matrix_swapped) and 0 in hits)) # this check is valid only if ties are not broken randomly # like it is the case with SMLR if not ('random_tie_breaking' in clf.__tags__ or # since __tags__ would not go that high up e.g. in # <knn on SMLR non-0> 'SMLR' in str(clf)): assert_array_equal(hits, np.diag(matrix_swapped)[ties_indices[::-1]]) # Second check is to just see if we didn't get an obvious bias and # got 0 in one of the hits, although it is labile if cfg.getboolean('tests', 'labile', default='yes'): ok_(not 0 in hits)
"Failed to load due to %r" % (e,) ok_(isinstance(lrn_, Classifier)) # Verify that we have the same ca enabled # XXX FAILS atm! #ok_(set(lrn.ca.enabled) == set(lrn_.ca.enabled)) # lets choose a dataset dsname, errorfx = \ {False: ('uni2large', mean_mismatch_error), True: ('sin_modulated', corr_error)}\ ['regression' in lrn.__tags__] ds = datasets[dsname] splitter = Splitter('train') postproc = BinaryFxNode(errorfx, 'targets') te = TransferMeasure(lrn, splitter, postproc=postproc) te_ = TransferMeasure(lrn_, splitter, postproc=postproc) error = te(ds) error_ = te_(ds) if len(set(['swig', 'rpy2']).intersection(lrn.__tags__)): raise SkipTest("Trained swigged and R-interfaced classifiers can't " "be stored/reloaded yet") # now lets store/reload the trained one try: h5save(fname, lrn_) except Exception, e: raise AssertionError, \ "Failed to store trained lrn due to %r" % (e,)
def test_retrainables(self, clf): # XXX we agreed to not worry about this for the initial 0.6 release raise SkipTest # we need a copy since will tune its internals later on clf = clf.clone() clf.ca.change_temporarily( enable_ca=['estimates'], # ensure that it does do predictions # while training disable_ca=['training_stats']) clf_re = clf.clone() # TODO: .retrainable must have a callback to call smth like # _set_retrainable clf_re._set_retrainable(True) # need to have high snr so we don't 'cope' with problematic # datasets since otherwise unittests would fail. dsargs = { 'perlabel': 50, 'nlabels': 2, 'nfeatures': 5, 'nchunks': 1, 'nonbogus_features': [2, 4], 'snr': 5.0 } ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # NB datasets will be changed by the end of testing, so if # are to change to use generic datasets - make sure to copy # them here ds = deepcopy(datasets['uni2large']) clf.untrain() clf_re.untrain() trerr = TransferMeasure(clf, Splitter('train'), postproc=BinaryFxNode(mean_mismatch_error, 'targets')) trerr_re = TransferMeasure(clf_re, Splitter('train'), disable_ca=['training_stats'], postproc=BinaryFxNode( mean_mismatch_error, 'targets')) # Just check for correctness of retraining err_1 = np.asscalar(trerr(ds)) self.assertTrue( err_1 < 0.3, msg="We should test here on easy dataset. Got error of %s" % err_1) values_1 = clf.ca.estimates[:] # some times retraining gets into deeper optimization ;-) eps = 0.05 corrcoef_eps = 0.85 # just to get no failures... usually > 0.95 def batch_test(retrain=True, retest=True, closer=True): err = np.asscalar(trerr(ds)) err_re = np.asscalar(trerr_re(ds)) corr = np.corrcoef(clf.ca.estimates, clf_re.ca.estimates)[0, 1] corr_old = np.corrcoef(values_1, clf_re.ca.estimates)[0, 1] if __debug__: debug( 'TEST', "Retraining stats: errors %g %g corr %g " "with old error %g corr %g" % (err, err_re, corr, err_1, corr_old)) self.assertTrue(clf_re.ca.retrained == retrain, ("Must fully train", "Must retrain instead of full training")[retrain]) self.assertTrue(clf_re.ca.repredicted == retest, ("Must fully test", "Must retest instead of full testing")[retest]) self.assertTrue( corr > corrcoef_eps, msg="Result must be close to the one without retraining." " Got corrcoef=%s" % (corr)) if closer: self.assertTrue( corr >= corr_old, msg="Result must be closer to current without retraining" " than to old one. Got corrcoef=%s" % (corr_old)) # Check sequential retraining/retesting for i in xrange(3): flag = bool(i != 0) # ok - on 1st call we should train/test, then retrain/retest # and we can't compare for closinest to old result since # we are working on the same data/classifier batch_test(retrain=flag, retest=flag, closer=False) # should retrain nicely if we change a parameter if 'C' in clf.params: clf.params.C *= 0.1 clf_re.params.C *= 0.1 batch_test() elif 'sigma_noise' in clf.params: clf.params.sigma_noise *= 100 clf_re.params.sigma_noise *= 100 batch_test() else: raise RuntimeError, \ 'Please implement testing while changing some of the ' \ 'params for clf %s' % clf # should retrain nicely if we change kernel parameter if hasattr(clf, 'kernel_params') and len(clf.kernel_params): clf.kernel_params.gamma = 0.1 clf_re.kernel_params.gamma = 0.1 # retest is false since kernel got recomputed thus # can't expect to use the same kernel batch_test(retest=not ('gamma' in clf.kernel_params)) # should retrain nicely if we change labels permute = AttributePermutator('targets', assure=True) oldlabels = dstrain.targets[:] dstrain = permute(dstrain) self.assertTrue( (oldlabels != dstrain.targets).any(), msg="We should succeed at permutting -- now got the same targets") ds = vstack((dstrain, dstest)) batch_test() # Change labels in testing oldlabels = dstest.targets[:] dstest = permute(dstest) self.assertTrue( (oldlabels != dstest.targets).any(), msg="We should succeed at permutting -- now got the same targets") ds = vstack((dstrain, dstest)) batch_test() # should re-train if we change data # reuse trained SVM and its 'final' optimization point if not clf.__class__.__name__ in [ 'GPR' ]: # on GPR everything depends on the data ;-) oldsamples = dstrain.samples.copy() dstrain.samples[:] += dstrain.samples * 0.05 self.assertTrue((oldsamples != dstrain.samples).any()) ds = vstack((dstrain, dstest)) batch_test(retest=False) clf.ca.reset_changed_temporarily() # test retrain() # TODO XXX -- check validity clf_re.retrain(dstrain) self.assertTrue(clf_re.ca.retrained) clf_re.retrain(dstrain, labels=True) self.assertTrue(clf_re.ca.retrained) clf_re.retrain(dstrain, traindataset=True) self.assertTrue(clf_re.ca.retrained) # test repredict() clf_re.repredict(dstest.samples) self.assertTrue(clf_re.ca.repredicted) self.assertRaises(RuntimeError, clf_re.repredict, dstest.samples, labels=True) """for now retesting with anything changed makes no sense""" clf_re._set_retrainable(False)
def test_gideon_weird_case(self): """Test if MappedClassifier could handle a mapper altering number of samples 'The utter collapse' -- communicated by Peter J. Kohler Desire to collapse all samples per each category in training and testing sets, thus resulting only in a single sample/category per training and per testing. It is a peculiar scenario which pin points the problem that so far mappers assumed not to change number of samples """ from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.knn import kNN from mvpa2.mappers.base import ChainMapper ds = datasets['uni2large'].copy() #ds = ds[ds.sa.chunks < 9] accs = [] k = 1 # for kNN nf = 1 # for NFoldPartitioner for i in xrange(1): # # of random runs ds.samples = np.random.randn(*ds.shape) # # There are 3 ways to accomplish needed goal # # 0. Hard way: overcome the problem by manually # pre-splitting/meaning in a loop from mvpa2.clfs.transerror import ConfusionMatrix partitioner = NFoldPartitioner(nf) meaner = mean_group_sample(['targets', 'partitions']) cm = ConfusionMatrix() te = TransferMeasure(kNN(k), Splitter('partitions'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca=['stats']) errors = [] for part in partitioner.generate(ds): ds_meaned = meaner(part) errors.append(np.asscalar(te(ds_meaned))) cm += te.ca.stats #print i, cm.stats['ACC'] accs.append(cm.stats['ACC']) if False: # not yet working -- see _tent/allow_ch_nsamples # branch for attempt to make it work # 1. This is a "native way" IF we allow change of number # of samples via _call to be done by MappedClassifier # while operating solely on the mapped dataset clf2 = MappedClassifier( clf=kNN(k), #clf, mapper=mean_group_sample(['targets', 'partitions'])) cv = CrossValidation(clf2, NFoldPartitioner(nf), postproc=None, enable_ca=['stats']) # meaning all should be ok since we should have ballanced # sets across all chunks here errors_native = cv(ds) self.assertEqual( np.max(np.abs(errors_native.samples[:, 0] - errors)), 0) # 2. Work without fixes to MappedClassifier allowing # change of # of samples # # CrossValidation will operate on a chain mapper which # would perform necessary meaning first before dealing with # kNN cons: .stats would not be exposed since ChainMapper # doesn't expose them from ChainMapper (yet) if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active: raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper") cv2 = CrossValidation(ChainMapper( [mean_group_sample(['targets', 'partitions']), kNN(k)], space='targets'), NFoldPartitioner(nf), postproc=None) errors_native2 = cv2(ds) self.assertEqual( np.max(np.abs(errors_native2.samples[:, 0] - errors)), 0) # All of the ways should provide the same results #print i, np.max(np.abs(errors_native.samples[:,0] - errors)), \ # np.max(np.abs(errors_native2.samples[:,0] - errors)) if False: # just to investigate the distribution if we have enough iterations import pylab as pl uaccs = np.unique(accs) step = np.asscalar(np.unique(np.round(uaccs[1:] - uaccs[:-1], 4))) bins = np.linspace(0., 1., np.round(1. / step + 1)) xx = pl.hist(accs, bins=bins, align='left') pl.xlim((0. - step / 2, 1. + step / 2))