Beispiel #1
0
    def test_smlr_sensitivities(self):
        data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

        # use SMLR on binary problem, but not fitting all weights
        clf = SMLR(fit_all_weights=False)
        clf.train(data)

        # now ask for the sensitivities WITHOUT having to pass the dataset
        # again
        sens = clf.get_sensitivity_analyzer(force_training=False)()
        self.failUnless(sens.shape == (len(data.UT) - 1, data.nfeatures))
Beispiel #2
0
    def test_smlr_sensitivities(self):
        data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

        # use SMLR on binary problem, but not fitting all weights
        clf = SMLR(fit_all_weights=False)
        clf.train(data)

        # now ask for the sensitivities WITHOUT having to pass the dataset
        # again
        sens = clf.get_sensitivity_analyzer(force_training=False)()
        self.failUnless(sens.shape == (len(data.UT) - 1, data.nfeatures))
Beispiel #3
0
    def test_smlr_state(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        clf.ca.enable('estimates')
        clf.ca.enable('predictions')

        p = np.asarray(clf.predict(data.samples))

        self.failUnless((p == clf.ca.predictions).all())
        self.failUnless(np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
Beispiel #4
0
    def testSMLRState(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        clf.states.enable('values')
        clf.states.enable('predictions')

        p = N.asarray(clf.predict(data.samples))

        self.failUnless((p == clf.predictions).all())
        self.failUnless(N.array(clf.values).shape[0] == N.array(p).shape[0])
Beispiel #5
0
    def test_smlr_state(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        clf.ca.enable('estimates')
        clf.ca.enable('predictions')

        p = np.asarray(clf.predict(data.samples))

        self.failUnless((p == clf.ca.predictions).all())
        self.failUnless(
            np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
Beispiel #6
0
    def test_smlr(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        # prediction has to be perfect
        #
        # XXX yoh: whos said that?? ;-)
        #
        # There is always a tradeoff between learning and
        # generalization errors so...  but in this case the problem is
        # more interesting: absent bias disallows to learn data you
        # have here -- there is no solution which would pass through
        # (0,0)
        predictions = clf.predict(data.samples)
        self.failUnless((predictions == data.targets).all())
Beispiel #7
0
    def test_smlr(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        # prediction has to be perfect
        #
        # XXX yoh: whos said that?? ;-)
        #
        # There is always a tradeoff between learning and
        # generalization errors so...  but in this case the problem is
        # more interesting: absent bias disallows to learn data you
        # have here -- there is no solution which would pass through
        # (0,0)
        predictions = clf.predict(data.samples)
        self.failUnless((predictions == data.targets).all())
Beispiel #8
0
    def compute(self):
        feat = self.feat
        lab = self.lab
        opt = self.opt
        
        trainLabels = opt.trainLabels

        thresh = opt.thresh

        if opt.checkNull:
            if opt.mode == "train":
                nrnd.shuffle(lab[0])
            elif opt.mode == "test":
                nrnd.shuffle(lab[2])
            else:
                raise "Null hypothesis checking is undefined for this mode: ",opt.mode

        maxLab = max([ max(l) for l in lab ])
        if "svm" in opt.method:
            if opt.kernel == "lin":
                svmFact = SvmShogLin.factory(C=opt.C)
            elif opt.kernel == "rbf":
                kernel=SparseGaussianKernel(100,opt.rbfWidth)
                #kernel must know its lhs for classification, and
                #it must be the same as it was for training
                kernel.init(feat[0],feat[0])
                svmFact = SvmShogKern.factory(C=opt.C,kernel=kernel)
            if opt.method == "svm":
                modStore = SvmModFileStore(opt.modelRoot,svmFact=svmFact)
        if opt.mode == "train":
            if opt.method == "svm":
                svmMul = SvmOneVsAll(maxLabel=maxLab)
                svmMul.setLab(lab[0])
                svmMul.setFeat(feat[0])
                svmMul.setSvmStore(modStore)
                svmMul.trainMany(trainLabels=trainLabels)
            elif opt.method == "smlr":
                import mvpa.datasets
                from mvpa.clfs.smlr import SMLR
                mv_data = mvpa.datasets.Dataset(samples=feat[0].get_full_feature_matrix().transpose(),labels=lab[0])
                clf = SMLR(lm=opt.smlrLm,convergence_tol=opt.smlrConvergenceTol)
                clf.train(mv_data)
                makedir(opt.modelRoot)
                dumpObj(clf,pjoin(opt.modelRoot,"smlr"))

        elif opt.mode in ("test","predict"):
            if opt.method == "svm":
                if trainLabels is None:
                    labLoad = None
                    maxLabel = modStore.getMaxLabel()
                else:
                    labLoad = trainLabels
                    maxLabel = max(trainLabels)
                svms = SvmModMemStore(svmFact)
                svms.fromOther(modStore,labels=labLoad)
                svmMul = SvmOneVsAll(maxLabel=maxLabel)
                svmMul.setSvmStore(svms)
                if opt.useSrm:
                    svmMul.setFeat(feat[1])
                    svmMul.classifyBin()
                    svmMul.setLab(lab[1])
                    svmMul.computeSrm()
                    srm = svmMul.getSrm()
                    #srm[:] = 1.
                    #svmMul.setSrm(srm)
                    print "SRM = %s" % (svmMul.getSrm(),)
                svmMul.setLab(None)
                svmMul.setFeat(feat[2])
                svmMul.classifyBin()
                labPred = n.zeros((len(thresh),len(lab[2])),dtype='i4')
                for iThresh in xrange(len(thresh)):
                    t = thresh[iThresh]
                    labPred[iThresh] = svmMul.classify(thresh=t,useSrm=opt.useSrm)
                    print "Threshold %.3f" % t
                return Struct(labPred=labPred,param=n.rec.fromarrays([thresh],names="thresh"))
            elif opt.method == "smlr":
                clf = loadObj(pjoin(opt.modelRoot,"smlr"))
                labPred = n.asarray(clf.predict(feat[2].get_full_feature_matrix().transpose()),dtype='i4')
                labPred.shape = (1,len(labPred))
                return Struct(labPred=labPred,param=n.rec.fromarrays([thresh[0:1]],names="thresh"))
            elif opt.method == "knn":
                dist = SparseEuclidianDistance(feat[0],feat[1])
                mod = KNN(opt.knnK,dist,Labels(lab[0].astype('f8')))
                if opt.knnMaxDist is not None:
                    mod.set_max_dist(opt.knnMaxDist)
                mod.train()
                labPred = mod.classify().get_labels()
                labUnclass = mod.get_unclass_label()
                labPred[labPred==labUnclass] = opt.labUnclass
                labPred.shape = (1,len(labPred))
                return Struct(labPred=labPred,param=None)
            elif opt.method == "knn-svm":
                assert len(thresh) == 1,"multiple SVM decision thresholds not implemented for knn-svm"
                dist = SparseEuclidianDistance(feat[0],feat[2])
                knn = KNN(opt.knnK,dist,Labels(lab[0].astype('f8')))
                knn.train()
                n_test = feat[2].get_num_vectors()
                ind_neighb = numpy.zeros((n_test,opt.knnK),dtype='i4')
                dist_neighb = numpy.zeros((n_test,opt.knnK),dtype='f8')
                print "Computing KNN list..."
                knn.get_neighbours(ind_neighb,dist_neighb)
                labPred = numpy.zeros(n_test,dtype='i4')
                print "Training neighbours' SVMs..."
                for iTest in xrange(n_test):
                    samp_ind_neighb = ind_neighb[iTest]
                    samp_dist_neighb = dist_neighb[iTest]
                    if opt.knnMaxDist is not None:
                        samp_in_dist = samp_dist_neighb < opt.knnMaxDist
                        samp_ind_neighb = samp_ind_neighb[samp_in_dist]
                        samp_dist_neighb = samp_dist_neighb[samp_in_dist]
                    if len(samp_ind_neighb) > 0:
                        svmTrFeat = feat[0].subsample(samp_ind_neighb)
                        svmTrLab = lab[0][samp_ind_neighb]
                        if (svmTrLab == svmTrLab[0]).all():
                            labPred[iTest] = svmTrLab[0]
                            if iTest % 100 == 0:
                                print "All %s neighbours have one label %i for samp %i" % (len(samp_ind_neighb),labPred[iTest],iTest)
                        else:
                            svmTsFeat = feat[2].subsample(numpy.asarray([iTest],dtype='i4'))
                            labPred[iTest] = svmOneVsAllOneStep(feat=(svmTrFeat,svmTsFeat),
                                    lab=(svmTrLab,),
                                    opt=Struct(C=opt.C,thresh=thresh[0],useSrm=False))
                            if iTest % 100 == 0:
                                print "SVM selected label %i from %s for samp %i" % (labPred[iTest],svmTrLab,iTest)
                    else:
                        labPred[iTest] = opt.labUnclass
                        if iTest % 100 == 0:
                            print "No training samples are within cutoff distance found for samp %i" % (iTest,)

                labPred.shape = (1,len(labPred))
                return Struct(labPred=labPred,param=None)