def test_split_featurewise_dataset_measure(self): ds = datasets['uni3small'] sana = SplitFeaturewiseDatasetMeasure( analyzer=SMLR(fit_all_weights=True).get_sensitivity_analyzer(), splitter=NFoldSplitter(), ) sens = sana(ds) # a sensitivity for each chunk and each label combination assert_equal(sens.shape, (len(ds.sa['chunks'].unique) * len(ds.sa['targets'].unique), ds.nfeatures)) # Lets try more complex example with 'boosting' ds = datasets['uni3medium'] ds.init_origids('samples') sana = SplitFeaturewiseDatasetMeasure( analyzer=SMLR(fit_all_weights=True).get_sensitivity_analyzer(), splitter=NoneSplitter(npertarget=0.25, mode='first', nrunspersplit=2), enable_ca=['splits', 'sensitivities']) sens = sana(ds) assert_equal(sens.shape, (2 * len(ds.sa['targets'].unique), ds.nfeatures)) splits = sana.ca.splits self.failUnlessEqual(len(splits), 2) self.failUnless( np.all([s[0].nsamples == ds.nsamples / 4 for s in splits])) # should have used different samples self.failUnless( np.any([splits[0][0].sa.origids != splits[1][0].sa.origids])) # and should have got different sensitivities self.failUnless(np.any(sens[0] != sens[1]))
def test_smlr_sensitivities(self): data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4) # use SMLR on binary problem, but not fitting all weights clf = SMLR(fit_all_weights=False) clf.train(data) # now ask for the sensitivities WITHOUT having to pass the dataset # again sens = clf.get_sensitivity_analyzer(force_training=False)() self.failUnless(sens.shape == (len(data.UT) - 1, data.nfeatures))
def test_smlr_state(self): data = datasets['dumb'] clf = SMLR() clf.train(data) clf.ca.enable('estimates') clf.ca.enable('predictions') p = np.asarray(clf.predict(data.samples)) self.failUnless((p == clf.ca.predictions).all()) self.failUnless( np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
def test_smlr(self): data = datasets['dumb'] clf = SMLR() clf.train(data) # prediction has to be perfect # # XXX yoh: whos said that?? ;-) # # There is always a tradeoff between learning and # generalization errors so... but in this case the problem is # more interesting: absent bias disallows to learn data you # have here -- there is no solution which would pass through # (0,0) predictions = clf.predict(data.samples) self.failUnless((predictions == data.targets).all())
def test_union_feature_selection(self): # two methods: 5% highes F-scores, non-zero SMLR weights fss = [ SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1, implementation="C"), postproc=sumofabs_sample()), RangeElementSelector(mode='select')) ] fs = CombinedFeatureSelection( fss, combiner='union', enable_ca=['selected_ids', 'selections_ids']) od = fs(self.dataset) self.failUnless(fs.combiner == 'union') self.failUnless(len(fs.ca.selections_ids)) self.failUnless(len(fs.ca.selections_ids) <= self.dataset.nfeatures) # should store one set per methods self.failUnless(len(fs.ca.selections_ids) == len(fss)) # no individual can be larger than union for s in fs.ca.selections_ids: self.failUnless(len(s) <= len(fs.ca.selected_ids)) # check output dataset self.failUnless(od.nfeatures == len(fs.ca.selected_ids)) for i, id in enumerate(fs.ca.selected_ids): self.failUnless( (od.samples[:, i] == self.dataset.samples[:, id]).all()) # again for intersection fs = CombinedFeatureSelection( fss, combiner='intersection', enable_ca=['selected_ids', 'selections_ids']) # simply run it for now -- can't think of additional tests od = fs(self.dataset)
"""Registered items """ return self.__items clfswh = Warehouse(known_tags=_KNOWN_INTERNALS) # classifiers regrswh = Warehouse(known_tags=_KNOWN_INTERNALS) # regressions # NB: # - Nu-classifiers are turned off since for haxby DS default nu # is an 'infisible' one # - Python's SMLR is turned off for the duration of development # since it is slow and results should be the same as of C version # clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"), SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"), #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"), #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"), #SMLR(implementation="Python", descr="SMLR(Python)") ] clfswh += \ [ MulticlassClassifier(clfswh['smlr'][0], descr='Pairs+maxvote multiclass on ' + \ clfswh['smlr'][0].descr) ] if externals.exists('libsvm'): from mvpa.clfs import libsvmc as libsvm clfswh._known_tags.union_update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys()) clfswh += [
FeaturewiseDatasetMeasure.__init__(self, **kwargs) self.__mult = mult def _call(self, dataset): """Train linear SVM on `dataset` and extract weights from classifier. """ sens = self.__mult * (np.arange(dataset.nfeatures) - int(dataset.nfeatures / 2)) return Dataset(sens[np.newaxis]) # Sample universal classifiers (linear and non-linear) which should be # used whenever it doesn't matter what classifier it is for testing # some higher level creations -- chosen so it is the fastest universal # one. Also it should not punch state.py in the face how it is # happening with kNN... sample_clf_lin = SMLR(lm=0.1) #sg.svm.LinearCSVMC(svm_impl='libsvm') #if externals.exists('shogun'): # sample_clf_nl = sg.SVM(kernel_type='RBF', svm_impl='libsvm') #else: #classical one which was used for a while #and surprisingly it is not bad at all for the unittests sample_clf_nl = kNN(k=5) # and also a regression-based classifier r = clfswh['linear', 'regression_based', 'has_sensitivity'] if len(r) > 0: sample_clf_reg = r[0] else: sample_clf_reg = None