def test_split_featurewise_dataset_measure(self): ds = datasets['uni3small'] sana = RepeatedMeasure( SMLR(fit_all_weights=True).get_sensitivity_analyzer(), ChainNode( [NFoldPartitioner(), Splitter('partitions', attr_values=[1])])) sens = sana(ds) # a sensitivity for each chunk and each label combination assert_equal(sens.shape, (len(ds.sa['chunks'].unique) * len(ds.sa['targets'].unique), ds.nfeatures)) # Lets try more complex example with 'boosting' ds = datasets['uni3medium'] ds.init_origids('samples') sana = RepeatedMeasure( SMLR(fit_all_weights=True).get_sensitivity_analyzer(), Balancer(amount=0.25, count=2, apply_selection=True), enable_ca=['datasets', 'repetition_results']) sens = sana(ds) assert_equal(sens.shape, (2 * len(ds.sa['targets'].unique), ds.nfeatures)) splits = sana.ca.datasets self.assertEqual(len(splits), 2) self.assertTrue( np.all([s.nsamples == ds.nsamples // 4 for s in splits])) # should have used different samples self.assertTrue(np.any([splits[0].sa.origids != splits[1].sa.origids])) # and should have got different sensitivities self.assertTrue(np.any(sens[0] != sens[3]))
def test_smlr_sensitivities(self): data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4) # use SMLR on binary problem, but not fitting all weights clf = SMLR(fit_all_weights=False) clf.train(data) # now ask for the sensitivities WITHOUT having to pass the dataset # again sens = clf.get_sensitivity_analyzer(force_train=False)(None) self.assertTrue(sens.shape == (len(data.UT) - 1, data.nfeatures))
def test_smlr_state(self): data = datasets["dumb"] clf = SMLR() clf.train(data) clf.ca.enable("estimates") clf.ca.enable("predictions") p = np.asarray(clf.predict(data.samples)) self.assertTrue((p == clf.ca.predictions).all()) self.assertTrue(np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
def test_smlr_state(): data = datasets['dumb'] clf = SMLR() clf.train(data) clf.ca.enable('estimates') clf.ca.enable('predictions') p = np.asarray(clf.predict(data.samples)) assert_array_equal(p, clf.ca.predictions) assert_equal(np.array(clf.ca.estimates).shape[0], np.array(p).shape[0])
def test_smlr_state(self): data = datasets['dumb'] clf = SMLR() clf.train(data) clf.ca.enable('estimates') clf.ca.enable('predictions') p = np.asarray(clf.predict(data.samples)) self.failUnless((p == clf.ca.predictions).all()) self.failUnless(np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
def test_smlr_state(self): data = datasets['dumb'] clf = SMLR() clf.train(data) clf.ca.enable('estimates') clf.ca.enable('predictions') p = np.asarray(clf.predict(data.samples)) self.assertTrue((p == clf.ca.predictions).all()) self.assertTrue( np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
def train_readout_mnlogit(stimset, samples): (ds_train, ds_valid) = to_mvpa_dataset(stimset, samples) clf = SMLR() clf.train(ds_train) preds = clf.predict(ds_valid) actual = ds_valid.sa['targets'] zeq = np.array([a == p for (a,p) in zip(actual, preds)]) nc = float(len((zeq == True).nonzero()[0])) #print '%d correct out of %d' % (nc, len(preds)) percent_correct = nc / float(len(preds)) #print 'SMLogit Percent Correct: %0.3f' % percent_correct return percent_correct
def test_splitclf_sensitivities(): datasets = [ normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=4, nonbogus_features=[0, i + 1], snr=1, nchunks=2) for i in xrange(2) ] sclf = SplitClassifier(SMLR(), NFoldPartitioner()) analyzer = sclf.get_sensitivity_analyzer() senses1 = analyzer(datasets[0]) senses2 = analyzer(datasets[1]) for senses in senses1, senses2: # This should be False when comparing two folds assert_false(np.allclose(senses.samples[0], senses.samples[2])) assert_false(np.allclose(senses.samples[1], senses.samples[3])) # Moreover with new data we should have got different results # (i.e. it must retrained correctly) for s1, s2 in zip(senses1, senses2): assert_false(np.allclose(s1, s2)) # and we should have "selected" "correct" voxels for i, senses in enumerate((senses1, senses2)): assert_equal(set(np.argsort(np.max(np.abs(senses), axis=0))[-2:]), set((0, i + 1)))
def test_smlr(self): data = datasets["dumb"] clf = SMLR() clf.train(data) # prediction has to be perfect # # XXX yoh: whos said that?? ;-) # # There is always a tradeoff between learning and # generalization errors so... but in this case the problem is # more interesting: absent bias disallows to learn data you # have here -- there is no solution which would pass through # (0,0) predictions = clf.predict(data.samples) self.assertTrue((predictions == data.targets).all())
def test_smlr(self): data = datasets['dumb'] clf = SMLR() clf.train(data) # prediction has to be perfect # # XXX yoh: whos said that?? ;-) # # There is always a tradeoff between learning and # generalization errors so... but in this case the problem is # more interesting: absent bias disallows to learn data you # have here -- there is no solution which would pass through # (0,0) predictions = clf.predict(data.samples) self.assertTrue((predictions == data.targets).all())
def test_clf_transfer_measure(self): # and now on a classifier clf = SMLR() enode = BinaryFxNode(mean_mismatch_error, 'targets') tm = TransferMeasure(clf, Splitter('chunks', count=2), enable_ca=['stats']) res = tm(self.dataset) manual_error = np.mean(res.samples.squeeze() != res.sa.targets) postproc_error = enode(res) tm_err = TransferMeasure(clf, Splitter('chunks', count=2), postproc=enode) auto_error = tm_err(self.dataset) ok_(manual_error == postproc_error.samples[0, 0])
def test_pseudo_cv_measure(self): clf = SMLR() enode = BinaryFxNode(mean_mismatch_error, 'targets') tm = TransferMeasure(clf, Splitter('partitions'), postproc=enode) cvgen = NFoldPartitioner() rm = RepeatedMeasure(tm, cvgen) res = rm(self.dataset) # one error per fold assert_equal(res.shape, (len(self.dataset.sa['chunks'].unique), 1)) # we can do the same with Crossvalidation cv = CrossValidation(clf, cvgen, enable_ca=['stats', 'training_stats', 'datasets']) res = cv(self.dataset) assert_equal(res.shape, (len(self.dataset.sa['chunks'].unique), 1))
def test_union_feature_selection(self): # two methods: 5% highes F-scores, non-zero SMLR weights fss = [SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1, implementation="C"), postproc=sumofabs_sample()), RangeElementSelector(mode='select'))] fs = CombinedFeatureSelection(fss, method='union') od_union = fs(self.dataset) self.assertTrue(fs.method == 'union') # check output dataset self.assertTrue(od_union.nfeatures <= self.dataset.nfeatures) # again for intersection fs = CombinedFeatureSelection(fss, method='intersection') od_intersect = fs(self.dataset) assert_true(od_intersect.nfeatures < od_union.nfeatures)
def descriptions(self): """Descriptions of registered items""" return list(self.__descriptions.keys()) clfswh = Warehouse(known_tags=_KNOWN_INTERNALS) # classifiers regrswh = Warehouse(known_tags=_KNOWN_INTERNALS) # regressions # NB: # - Nu-classifiers are turned off since for haxby DS default nu # is an 'infisible' one # - Python's SMLR is turned off for the duration of development # since it is slow and results should be the same as of C version # clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"), SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"), #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"), #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"), #SMLR(implementation="Python", descr="SMLR(Python)") ] clfswh += \ [ MulticlassClassifier(SMLR(lm=0.1), descr='Pairs+maxvote multiclass on SMLR(lm=0.1)') ] clfswh += [ RandomClassifier(descr="Random"), RandomClassifier(same=True, descr="RandomSame"), ]
# See COPYING file distributed along with the PyMVPA package for the # copyright and license terms. # ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## """Unit tests for PyMVPA sparse multinomial logistic regression classifier""" import numpy as np from mvpa2.testing import * from mvpa2.testing.datasets import datasets from mvpa2.clfs.smlr import SMLR from mvpa2.misc.data_generators import normal_feature_dataset @sweepargs(clf=(SMLR(), SMLR(implementation='Python'))) def test_smlr(clf): data = datasets['dumb'] clf.train(data) # prediction has to be perfect # # XXX yoh: whos said that?? ;-) # # There is always a tradeoff between learning and # generalization errors so... but in this case the problem is # more interesting: absent bias disallows to learn data you # have here -- there is no solution which would pass through # (0,0) predictions = clf.predict(data.samples)
FeaturewiseMeasure.__init__(self, **kwargs) self.__mult = mult def _call(self, dataset): """Train linear SVM on `dataset` and extract weights from classifier. """ sens = self.__mult * (np.arange(dataset.nfeatures) - int(dataset.nfeatures / 2)) return Dataset(sens[np.newaxis]) # Sample universal classifiers (linear and non-linear) which should be # used whenever it doesn't matter what classifier it is for testing # some higher level creations -- chosen so it is the fastest universal # one. Also it should not punch state.py in the face how it is # happening with kNN... sample_clf_lin = SMLR(lm=0.1) #sg.svm.LinearCSVMC(svm_impl='libsvm') #if externals.exists('shogun'): # sample_clf_nl = sg.SVM(kernel_type='RBF', svm_impl='libsvm') #else: #classical one which was used for a while #and surprisingly it is not bad at all for the unittests sample_clf_nl = kNN(k=5) # and also a regression-based classifier r = clfswh['linear', 'regression_based', 'has_sensitivity'] if len(r) > 0: sample_clf_reg = r[0] else: sample_clf_reg = None