def test_smlr_state(self): data = datasets['dumb'] clf = SMLR() clf.train(data) clf.ca.enable('estimates') clf.ca.enable('predictions') p = np.asarray(clf.predict(data.samples)) self.failUnless((p == clf.ca.predictions).all()) self.failUnless(np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
def testSMLRState(self): data = datasets['dumb'] clf = SMLR() clf.train(data) clf.states.enable('values') clf.states.enable('predictions') p = N.asarray(clf.predict(data.samples)) self.failUnless((p == clf.predictions).all()) self.failUnless(N.array(clf.values).shape[0] == N.array(p).shape[0])
def test_smlr_state(self): data = datasets['dumb'] clf = SMLR() clf.train(data) clf.ca.enable('estimates') clf.ca.enable('predictions') p = np.asarray(clf.predict(data.samples)) self.failUnless((p == clf.ca.predictions).all()) self.failUnless( np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
def test_smlr(self): data = datasets['dumb'] clf = SMLR() clf.train(data) # prediction has to be perfect # # XXX yoh: whos said that?? ;-) # # There is always a tradeoff between learning and # generalization errors so... but in this case the problem is # more interesting: absent bias disallows to learn data you # have here -- there is no solution which would pass through # (0,0) predictions = clf.predict(data.samples) self.failUnless((predictions == data.targets).all())
def compute(self): feat = self.feat lab = self.lab opt = self.opt trainLabels = opt.trainLabels thresh = opt.thresh if opt.checkNull: if opt.mode == "train": nrnd.shuffle(lab[0]) elif opt.mode == "test": nrnd.shuffle(lab[2]) else: raise "Null hypothesis checking is undefined for this mode: ",opt.mode maxLab = max([ max(l) for l in lab ]) if "svm" in opt.method: if opt.kernel == "lin": svmFact = SvmShogLin.factory(C=opt.C) elif opt.kernel == "rbf": kernel=SparseGaussianKernel(100,opt.rbfWidth) #kernel must know its lhs for classification, and #it must be the same as it was for training kernel.init(feat[0],feat[0]) svmFact = SvmShogKern.factory(C=opt.C,kernel=kernel) if opt.method == "svm": modStore = SvmModFileStore(opt.modelRoot,svmFact=svmFact) if opt.mode == "train": if opt.method == "svm": svmMul = SvmOneVsAll(maxLabel=maxLab) svmMul.setLab(lab[0]) svmMul.setFeat(feat[0]) svmMul.setSvmStore(modStore) svmMul.trainMany(trainLabels=trainLabels) elif opt.method == "smlr": import mvpa.datasets from mvpa.clfs.smlr import SMLR mv_data = mvpa.datasets.Dataset(samples=feat[0].get_full_feature_matrix().transpose(),labels=lab[0]) clf = SMLR(lm=opt.smlrLm,convergence_tol=opt.smlrConvergenceTol) clf.train(mv_data) makedir(opt.modelRoot) dumpObj(clf,pjoin(opt.modelRoot,"smlr")) elif opt.mode in ("test","predict"): if opt.method == "svm": if trainLabels is None: labLoad = None maxLabel = modStore.getMaxLabel() else: labLoad = trainLabels maxLabel = max(trainLabels) svms = SvmModMemStore(svmFact) svms.fromOther(modStore,labels=labLoad) svmMul = SvmOneVsAll(maxLabel=maxLabel) svmMul.setSvmStore(svms) if opt.useSrm: svmMul.setFeat(feat[1]) svmMul.classifyBin() svmMul.setLab(lab[1]) svmMul.computeSrm() srm = svmMul.getSrm() #srm[:] = 1. #svmMul.setSrm(srm) print "SRM = %s" % (svmMul.getSrm(),) svmMul.setLab(None) svmMul.setFeat(feat[2]) svmMul.classifyBin() labPred = n.zeros((len(thresh),len(lab[2])),dtype='i4') for iThresh in xrange(len(thresh)): t = thresh[iThresh] labPred[iThresh] = svmMul.classify(thresh=t,useSrm=opt.useSrm) print "Threshold %.3f" % t return Struct(labPred=labPred,param=n.rec.fromarrays([thresh],names="thresh")) elif opt.method == "smlr": clf = loadObj(pjoin(opt.modelRoot,"smlr")) labPred = n.asarray(clf.predict(feat[2].get_full_feature_matrix().transpose()),dtype='i4') labPred.shape = (1,len(labPred)) return Struct(labPred=labPred,param=n.rec.fromarrays([thresh[0:1]],names="thresh")) elif opt.method == "knn": dist = SparseEuclidianDistance(feat[0],feat[1]) mod = KNN(opt.knnK,dist,Labels(lab[0].astype('f8'))) if opt.knnMaxDist is not None: mod.set_max_dist(opt.knnMaxDist) mod.train() labPred = mod.classify().get_labels() labUnclass = mod.get_unclass_label() labPred[labPred==labUnclass] = opt.labUnclass labPred.shape = (1,len(labPred)) return Struct(labPred=labPred,param=None) elif opt.method == "knn-svm": assert len(thresh) == 1,"multiple SVM decision thresholds not implemented for knn-svm" dist = SparseEuclidianDistance(feat[0],feat[2]) knn = KNN(opt.knnK,dist,Labels(lab[0].astype('f8'))) knn.train() n_test = feat[2].get_num_vectors() ind_neighb = numpy.zeros((n_test,opt.knnK),dtype='i4') dist_neighb = numpy.zeros((n_test,opt.knnK),dtype='f8') print "Computing KNN list..." knn.get_neighbours(ind_neighb,dist_neighb) labPred = numpy.zeros(n_test,dtype='i4') print "Training neighbours' SVMs..." for iTest in xrange(n_test): samp_ind_neighb = ind_neighb[iTest] samp_dist_neighb = dist_neighb[iTest] if opt.knnMaxDist is not None: samp_in_dist = samp_dist_neighb < opt.knnMaxDist samp_ind_neighb = samp_ind_neighb[samp_in_dist] samp_dist_neighb = samp_dist_neighb[samp_in_dist] if len(samp_ind_neighb) > 0: svmTrFeat = feat[0].subsample(samp_ind_neighb) svmTrLab = lab[0][samp_ind_neighb] if (svmTrLab == svmTrLab[0]).all(): labPred[iTest] = svmTrLab[0] if iTest % 100 == 0: print "All %s neighbours have one label %i for samp %i" % (len(samp_ind_neighb),labPred[iTest],iTest) else: svmTsFeat = feat[2].subsample(numpy.asarray([iTest],dtype='i4')) labPred[iTest] = svmOneVsAllOneStep(feat=(svmTrFeat,svmTsFeat), lab=(svmTrLab,), opt=Struct(C=opt.C,thresh=thresh[0],useSrm=False)) if iTest % 100 == 0: print "SVM selected label %i from %s for samp %i" % (labPred[iTest],svmTrLab,iTest) else: labPred[iTest] = opt.labUnclass if iTest % 100 == 0: print "No training samples are within cutoff distance found for samp %i" % (iTest,) labPred.shape = (1,len(labPred)) return Struct(labPred=labPred,param=None)