def sixClasses(numImgs): # Classes of objects which are to be used classlist = [ "005.volvic", "006.barillabox", "004.fantabottle", "007.kinderschokoladetafel", "003.pringles", "002.cokecan", ] numCluster = 5000 et = 7.5 pt = 5.0 etClassify = 7.5 ptClassify = 5.0 fDist = 0.0015 nu = 0.5 gamma = 1.0 confThr = 0.00 rec.logger.info("Training gleissenthalls six classes") rec.logger.info("SIFT parameters: edge threshold=%f, peak threshold=%f", et, pt) rec.logger.info("Number of clusters: %d", numCluster) rec.logger.info("SVM parameter: nu=%f, sigma(RBF-Kernel)=%f", nu, gamma) # Load images from folder structure trainImgs = im.loadImages("img/train/6classes", classlist, numImgs) testImgs = im.loadImages("img/test_big_images", classlist) trainLabel = im.loadLabels("img/train/6classes", classlist, numImgs) testLabels = im.loadLabels("img/test_big_images", classlist) # Train classifier learner = cl.Learner() learner.trainPerClass(trainImgs, trainLabel, numCluster, fDist, pt, et) c = cl.Classifier(learner) testDesc, _ = im.extractFeaturesPerClass(testImgs, testLabels, ptClassify, etClassify) tmpHistograms = cl._buildHistograms(learner.codebook, testDesc) orngHistograms = cl._convertToOrangeDataSet(tmpHistograms, learner.domain, testLabels) c.classificationAccuracy(orngHistograms, None, confThr, ptClassify, etClassify, nu, gamma, False) c.saveToDB() rec.logger.info("Classes: %s", learner.strClasses) rec.logger.info("\n" + str(c.confusion)) rec.logger.info("\nAccuracy: %f CV: %f", c.clAccuracy, c.cvAccuracy)
def calltech(numTrain, numTest, masterCBs, pNorm, numCluster): # Classes of objects which are to be used classlist = None # numCluster = 10000 et = 5.0 pt = 0.0 etClassify = 10.0 ptClassify = 2.5 fDist = 0.0001 nu = 0.5 gamma = 2.0 confThr = 0.00 rec.logger.info("Training calltech") rec.logger.info("SIFT parameters: edge threshold=%f, peak threshold=%f", et, pt) rec.logger.info("Number of clusters: %d", numCluster) rec.logger.info("SVM parameter: nu=%f, sigma(RBF-Kernel)=%f", nu, gamma) # Load images from folder structure trainImgs = im.loadImages("256_ObjectCategories", classlist, numTrain) testImgs = im.loadImages("256_ObjectCategories", classlist, numTest, rev=True) trainLabel = im.loadLabels("256_ObjectCategories", classlist, numTrain) testLabels = im.loadLabels("256_ObjectCategories", classlist, numTest) # Train classifier learner = cl.Learner() learner.trainPerClass(trainImgs, trainLabel, numCluster, fDist, pt, et, masterCBs, pNorm) c = cl.Classifier(learner) testDesc, _ = im.extractFeaturesPerClass(testImgs, testLabels, ptClassify, etClassify) tmpHistograms = cl._buildHistograms(learner.codebook, testDesc) orngHistograms = cl._convertToOrangeDataSet(tmpHistograms, learner.domain, testLabels) c.classificationAccuracy(orngHistograms, None, confThr, ptClassify, etClassify, nu, gamma, False) c.saveToDB() rec.logger.info("Classes: %s", learner.strClasses) rec.logger.info("\n" + str(c.confusion)) rec.logger.info("\nAccuracy: %f CV: %f", c.clAccuracy, c.cvAccuracy)
def googleImages(numTrainImgs): # Classes of objects which are to be used classlist = None numCluster = 5000 et = 5.0 pt = 7.5 etClassify = 7.5 ptClassify = 5.0 fDist = 0.0009 nu = 0.5 gamma = 2.0 confThr = 0.00 rec.logger.info("Training %i googleSimilarImages", numTrainImgs) rec.logger.info("SIFT parameters: edge threshold=%f, peak threshold=%f", et, pt) rec.logger.info("Number of clusters: %d", numCluster) rec.logger.info("SVM parameter: nu=%f, sigma(RBF-Kernel)=%f", nu, gamma) # Load images from folder structure trainImgs = im.loadImages("img/train/googleSimilarImages", classlist, numTrainImgs) testImgs = im.loadImages("img/test_big_images", classlist, 30) trainLabel = im.loadLabels("img/train/googleSimilarImages", classlist, numTrainImgs) testLabels = im.loadLabels("img/test_big_images", classlist, 30) # Train classifier learner = cl.Learner() learner.trainPerClass(trainImgs, trainLabel, numCluster, fDist, pt, et) c = cl.Classifier(learner) testDesc, _ = im.extractFeaturesPerClass(testImgs, testLabels, ptClassify, etClassify) tmpHistograms = cl._buildHistograms(learner.codebook, testDesc) orngHistograms = cl._convertToOrangeDataSet(tmpHistograms, learner.domain, testLabels) c.classificationAccuracy(orngHistograms, None, confThr, ptClassify, etClassify, nu, gamma, False) c.saveToDB() rec.logger.info("Classes: %s", learner.strClasses) rec.logger.info("\n" + str(c.confusion)) rec.logger.info("\nAccuracy: %f CV: %f", c.clAccuracy, c.cvAccuracy)
def trainPerClass(self, descOrImgs, labels, numCluster, fisherThreshold, peakThreshold=None, edgeThreshold=None, masterCBs=1, pNorm=2.0, maxiter=0, numruns=20): """ Perform k-means clustering and vector quantize all features. The codebook and the histograms of visual words are stored as members. If L{featOrImgs} is a list of images, SIFT features are extracted before all other operations are done. @param descOrImgs: Either a sequence of SIFT descriptor arrays or an iterator of images. @type descOrImgs: [numpy.ndarray] or [Image.Image] @param numCluster: The number of clusters used for k-means clustering. @type numCluster: int @param peakThreshold: The peak threshold used during SIFT feature extraction. This argument is ignored if L{featOrImgs} is a list of SIFT features. @type peakThreshold: float @param edgeThreshold: The edge threshold used during SIFT feature extraction. This argument is ignored if L{featOrImgs} is a list of SIFT features. @type edgeThreshold: float @param maxiter: The maximum iterations of one k-means run. "0" means there is no iteration limit. @type maxiter: int @param numruns: The number of k-means runs. @type numruns: int @param fisherThreshold: The minimum distance two centroids need to have after clustering.. @type fisherThreshold: double """ if isinstance(descOrImgs, collections.Iterator): self.peakThreshold = peakThreshold self.edgeThreshold = edgeThreshold descOrImgs, _ = im.extractFeaturesPerClass(descOrImgs, labels, peakThreshold, edgeThreshold) elif not isinstance(descOrImgs, dict): raise TypeError("'descOrImgs' must be either an iterator or a dictionary.") starttime = time.time() self.fisherThreshold = fisherThreshold # Compute codebook from all sift descriptors of all images classes = _getAllClasses(descOrImgs.keys()) self.strClasses = str(classes) self.codebook = self._clusterPerClass(descOrImgs, numCluster / len(classes), maxiter, numruns, fisherThreshold, masterCBs) # Get the histograms as a list of numpy arrays tempHistograms = _buildHistograms(self.codebook, descOrImgs) # Create orange domain histogramDomain = [orange.FloatVariable('a%i' % x) for x in xrange(numpy.size(self.codebook, 0))] classDomain = [orange.EnumVariable("class", values=orange.StringList(classes))] self.domain = orange.Domain(histogramDomain + classDomain) # Create orange exampleTable self.histograms = _convertToOrangeDataSet(tempHistograms, self.domain, labels) endtime = time.time() self.trainingTime = endtime - starttime
def parameterGridSearch( trainPath, trainClasses, trainCount, testPath, testCount, clusterRange, ptRange, etRange, fisherRange, nuRange, gammaRange, ctRange, ptTestRg=None, etTestRg=None, numMastCB=1, ): trainLabels = im.loadLabels(trainPath, trainClasses, trainCount) trClasses = cl._getAllClasses(trainLabels) testLabels = im.loadLabels(testPath, trClasses, testCount) if ptTestRg is None and etTestRg is None: ptTestRg = [-1] etTestRg = [-1] for peakVal in ptRange: for edgeVal in etRange: # Extract SIFT features rec.logger.info("SIFT Training Parameter. Peak: %f Edge: %f", peakVal, edgeVal) trainImgs = im.loadImages(trainPath, trainClasses, trainCount) trainDescPerClass, numTrainDesc = im.extractFeaturesPerClass(trainImgs, trainLabels, peakVal, edgeVal) for clusterParam in clusterRange: for fDist in fisherRange: learner = cl.Learner() learner.peakThreshold = peakVal learner.edgeThreshold = edgeVal learner.numTrainingDesc = numTrainDesc rec.logger.info("Cluster parameter. Cluster: %i MinDist %f", clusterParam, fDist) try: learner.trainPerClass(trainDescPerClass, trainLabels, clusterParam, fDist, masterCBs=numMastCB) except ValueError: rec.logger.warning("Skipped this paramter set because there are not enough features.") break for p in ptTestRg: for e in etTestRg: if p == -1 and e == -1: p = peakVal e = edgeVal rec.logger.info("SIFT Test Parameter. Peak: %f Edge: %f", p, e) testImgs = im.loadImages(testPath, trClasses, testCount, False) testDesc, numTestDesc = im.extractFeaturesPerClass(testImgs, testLabels, p, e) tmpHistograms = cl._buildHistograms(learner.codebook, testDesc) orngHistograms = cl._convertToOrangeDataSet(tmpHistograms, learner.domain, testLabels) for n in nuRange: for g in gammaRange: for ct in ctRange: c = cl.Classifier(learner) c.numTestDesc = numTestDesc c.peakThreshold = p c.edgeThreshold = e rec.logger.info( "SVM parameter. Nu: %f gamma %f," "Confidence Threshold: %f", n, g, ct ) _, _ = c.classificationAccuracy(orngHistograms, None, ct, p, e, n, g) c.saveToDB() rec.logger.info( "Ref count of classifier: %i, learner: %i", sys.getrefcount(c), sys.getrefcount(learner), )