def evaluate( contender, runset, printVerbose=False ): if type(runset) is dict and not runset['runset'] is None\ and isinstance(runset['runset'], cvac.RunSet): runset = runset['runset'] if not runset or not isinstance(runset, cvac.RunSet) or not runset.purposedLists: raise RuntimeError("no proper runset") evalset = runset print( "---- evaluation:" ) easy.printRunSetInfo( evalset, printArtifacts=printVerbose ) detector = contender.getDetector() detections = easy.detect( detector, contender.detectorData, evalset, detectorProperties=contender.detectorProps ) ct = getConfusionTable( detections, origSet=evalset, foundMap=contender.foundMap ) # create result structure r = EvaluationResult( 0, ct[0], nores=ct[1], detail=None, name=contender.name ) return r
''' Easy! mini tutorial Create a RunSet in several ways, use it for detection and evaluation matz 6/18/2013 ''' import easy import cvac # a simple RunSet with just one unlabeled image; # remember that paths are relative to CVAC.DataDir rs1 = easy.createRunSet( "testImg/italia.jpg" ) print("=== RunSet 1: ==="); easy.printRunSetInfo( rs1, printLabels=True ) # to give samples a purpose, state the purpose: rs2 = easy.createRunSet( "testImg/italia.jpg", "POSITIVE" ) print("\n=== RunSet 2: ==="); easy.printRunSetInfo( rs2, printLabels=True ) # add more samples to a runset; anything starting with "pos" # will be added into the POSITIVE sequence of labeled items easy.addToRunSet( rs2, "testImg/TestKrFlag.jpg", "POS" ) easy.addToRunSet( rs2, "testImg/TestCaFlag.jpg", "neg" ) easy.addToRunSet( rs2, "testImg/TestUsFlag.jpg", "0" ) print("\n=== RunSet 2, after appending: ==="); easy.printRunSetInfo( rs2, printLabels=True ) # create a runset from a folder with sub-folders rs3 = easy.createRunSet( "trainImg" ) print("\n=== RunSet 3: ===");
import time #for computing computation time import os #for chdir stime = time.clock() ''' Making training data ''' #trainImg_roc_simple #trainImg_roc trainsetPos = easy.createRunSet( 'corporate_logos' ) trainsetNeg = easy.createRunSet( 'trainImg' ) runset = cvac.RunSet() easy.addToRunSet(runset, trainsetPos, 'pos') easy.addToRunSet(runset, trainsetNeg, 'neg') easy.printRunSetInfo( runset, printLabels=True, ) strTrainer = "BOW_Trainer" strDetector = "BOW_Detector" list_nWord = [5,10,15,20] doWithNegativeSample = True if doWithNegativeSample: ############################################################### # With background data ''' Execute jousting for generating ROC points '''
print("\n=== Corpus 2: ==="); print('Obtained {0} labeled artifact{1} from trainImg directory:'.format( len(lablist2), ("s","")[len(lablist2)==1])); easy.printCategoryInfo( categories2 ) # Note how both corpora contain flag images, but they have different # labels. To use them for evaluation, let's assign the same purpose # to syntactically different but semantically identical labels. # Because we don't specify it, this guesses the specific Purpose that # is assigned to the labels. # Also obtain this mapping from Purpose to label name, called "classmap." rs1 = easy.createRunSet( categories1['CA_flag']+categories2['ca'], "0" ) easy.addToRunSet( rs1, categories1['KO_flag']+categories2['kr'], "1" ) easy.addToRunSet( rs1, categories1['US_flag']+categories2['us'], "2" ) print("\n=== The Corpora combined into one RunSet: ==="); easy.printRunSetInfo( rs1 ) # A runset can be used for training or for testing print("------- Bag of Words results for corporate logos: -------") detector = easy.getDetector( "BOW_Detector" ) modelfile = "detectors/bowUSKOCA.zip" results1 = easy.detect( detector, modelfile, rs1 ) print("Note that both original and found labels are printed:") easy.printResults( results1 ) # Print again, this time replacing the found labels with a double # mapping from foundLabel -> guessed Purpose -> classmap label; # Note that this fails if multiple original labels mapped to the same # Purpose. wait() print("------- Same results, but found labels replaced with guessed original labels: -------")
else: c = ev.Contender("DPM") c.detectorString = "DPM_Detector" c.detectorData = "detectors/dpmStarbucksLogo.zip" c.foundMap = {'Positive':easy.getPurpose('pos'), 'Negative':easy.getPurpose('neg')} contenders.append( c ); # OpenCVCascade, with special settings for anticipated poor performance if (easy.getTrainer("OpenCVCascadeTrainer")==None): print("Cascade service(s) are insufficiently configured, skipping.") else: c = ev.Contender("cascade") c.trainerString = "OpenCVCascadeTrainer" c.detectorString = "OpenCVCascadeDetector" # c.foundMap = {'any':easy.getPurpose('pos')} c.foundMap = {'positive':posPurpose, 'negative':negPurpose} detector = easy.getDetector(c.detectorString) detectorProps = easy.getDetectorProperties(detector) c.detectorProps = detectorProps; c.detectorProps.props["maxRectangles"] = "200" c.detectorProps.minNeighbors = 0; # This prevents hang up in evaluator when training has too few samples contenders.append( c ); runset = easy.createRunSet( "trainImg/kr", "pos" ) easy.addToRunSet( runset, "trainImg/ca", "neg" ) easy.printRunSetInfo( runset, printArtifacts=False, printLabels=True ) perfdata = ev.joust( contenders, runset, folds=3 ) ev.printEvaluationResults(perfdata[0])
def crossValidate( contender, runset, folds=10, printVerbose=False ): '''Returns summary statistics tp, fp, tn, fn, recall, trueNegRate, and a detailed matrix of results with one row for each fold, and one column each for true positive, false positive, true negative, and false negative counts''' # sanity checks: # only positive and negative purposes, # count number of entries for each purpose runset_pos = asList( runset, purpose="pos" ) runset_neg = asList( runset, purpose="neg" ) num_items = ( len(runset_pos), len(runset_neg) ) # check that there are no other purposes all_items = len( asList( runset ) ) if sum(num_items)!=all_items: raise RuntimeError("crossValidate can only handle Positive and Negative purposes") if min(num_items)<2: raise RuntimeError("need more than 1 labeled item per purpose to cross validate") # make sure there are enough items for xval to make sense if folds>min(num_items): print("warning: cannot do "+folds+"-fold validation with only "+str(num_items)+" labeled items") folds = min(num_items) # calculate the size of the training and evaluation sets. # if the number of labeled items in the runset divided # by the number of folds isn't an even # division, use more items for the evaluation chunksize = (int(math.floor( num_items[0]/folds )), int(math.floor( num_items[1]/folds ))) trainsize = (chunksize[0] * (folds-1), chunksize[1] * (folds-1)) evalsize = (num_items[0]-trainsize[0], num_items[1]-trainsize[1]) print( "Will perform a {0}-fold cross-validation with {1} training samples and " "{2} evaluation samples".format( folds, trainsize, evalsize ) ) # randomize the order of the elements in the runset, once and for all folds rndidx = ( range( num_items[0] ), range( num_items[1] ) ) random.shuffle( rndidx[0] ) # shuffles items in place random.shuffle( rndidx[1] ) # shuffles items in place #confusionTables = numpy.empty( [folds, 5], dtype=int ) confusionTables = [] for fold in range( folds ): # split the runset trainset, evalset = splitRunSet( runset_pos, runset_neg, fold, chunksize, evalsize, rndidx ) print( "-------- fold number {0} --------".format(fold) ) # training print( "---- training:" ) easy.printRunSetInfo( trainset, printArtifacts=printVerbose ) trainer = contender.getTrainer() model = easy.train( trainer, trainset, trainerProperties=contender.trainerProps) # detection print( "---- evaluation:" ) easy.printRunSetInfo( evalset, printArtifacts=printVerbose ) detector = contender.getDetector() detections = easy.detect( detector, model, evalset, detectorProperties=contender.detectorProps ) confusionTables.append( \ getConfusionTable( detections, origSet=evalset, foundMap=contender.foundMap )) # calculate statistics of our tuple TestResult,nores sumTestResult = TestResult() sumNoRes = 0; for entry in confusionTables: sumTestResult.tp += entry[0].tp sumTestResult.tn += entry[0].tn sumTestResult.fp += entry[0].fp sumTestResult.fn += entry[0].fn sumNoRes += entry[1] r = EvaluationResult(folds, sumTestResult, sumNoRes, detail=None, name=contender.name) return r
# pick a subset: all license plates license_plates = categories['license plate'] print("There are {0} license plate labels.".format( len(license_plates) )) # another subset: all labels starting with "car" cars = [] for key in categories.keys(): if key.startswith("car"): cars.append( categories[key] ) print("There are {0} car-related labels.".format( len(cars) )) # Note that Labels are cached in the CorpusServer, but the corpus currently # needs to re-mirror if the CorpusServer is restarted because Labels are # not stored to disk. Images are stored to disk. quit() # done for now # Train a detector on license plates trainer = easy.getTrainer( "BOW_Trainer:default -p 10103 ") trainset = easy.createRunSet( license_plates, "pos" ) easy.printRunSetInfo( trainset ) licenseplateModel = easy.train( trainer, trainset ) # test the license plate detector on the known locations of cars; # this will only try to detect within the boundaries of cars. testset = easy.createRunSet( cars, "unpurposed" ) detector = easy.getDetector( "BOW_Detector:default -p 10104" ) results = easy.detect( detector, licenseplateModel, testset ) printResults( results )
c.detectorData = "detectors/dpmStarbucksLogo.zip" c.foundMap = { 'Positive': easy.getPurpose('pos'), 'Negative': easy.getPurpose('neg') } contenders.append(c) # OpenCVCascade, with special settings for anticipated poor performance if (easy.getTrainer("OpenCVCascadeTrainer") == None): print("Cascade service(s) are insufficiently configured, skipping.") else: c = ev.Contender("cascade") c.trainerString = "OpenCVCascadeTrainer" c.detectorString = "OpenCVCascadeDetector" # c.foundMap = {'any':easy.getPurpose('pos')} c.foundMap = {'positive': posPurpose, 'negative': negPurpose} detector = easy.getDetector(c.detectorString) detectorProps = easy.getDetectorProperties(detector) c.detectorProps = detectorProps c.detectorProps.props["maxRectangles"] = "200" c.detectorProps.minNeighbors = 0 # This prevents hang up in evaluator when training has too few samples contenders.append(c) runset = easy.createRunSet("trainImg/kr", "pos") easy.addToRunSet(runset, "trainImg/ca", "neg") easy.printRunSetInfo(runset, printArtifacts=False, printLabels=True) perfdata = ev.joust(contenders, runset, folds=3) ev.printEvaluationResults(perfdata[0])
import os import easy # # Create a training set from one sample each of 9 corporate logos # trainset1 = easy.createRunSet( "corporate_logos" ) # train, round 1 trainer = easy.getTrainer( "BOW_Trainer") model1 = easy.train( trainer, trainset1 ) # evaluate the model on a separate test set, images and videos # in DataDir/testdata1 testset1 = easy.createRunSet( "testImg", "UNPURPOSED" ) easy.printRunSetInfo( testset1 ) detector = easy.getDetector( "BOW_Detector" ) result1 = easy.detect( detector, model1, testset1 ) easy.printResults(result1) # sort the images from the testdata1 folder into subfolders of # "testresults1" corresponding to the found labels; # if multiple labels were found per original, consider only # the label with the highest confidence easy.sortIntoFolders( result1, outfolder="testresults1", multi="highest") # Now manually sort through the created testresults1 and move # _incorrectly_ classified samples into correctly labeled subfolders # of a new folder "corporate_logos_round2". Found labels on locations # that are not one of the 9 logos have to be sorted into a 10th class # (subfolder), generally called the "reject" class.
import os import easy # # Create a training set from one sample each of 9 corporate logos # trainset1 = easy.createRunSet("corporate_logos") # train, round 1 trainer = easy.getTrainer("BOW_Trainer") model1 = easy.train(trainer, trainset1) # evaluate the model on a separate test set, images and videos # in DataDir/testImg testset1 = easy.createRunSet("testImg", "UNPURPOSED") easy.printRunSetInfo(testset1) detector = easy.getDetector("BOW_Detector") result1 = easy.detect(detector, model1, testset1) easy.printResults(result1) # sort the images from the testdata1 folder into subfolders of # "testresults1" corresponding to the found labels; # if multiple labels were found per original, consider only # the label with the highest confidence easy.sortIntoFolders(result1, outfolder="testresults1", multi="highest") # Now manually sort through the created testresults1 and move # _incorrectly_ classified samples into correctly labeled subfolders # of a new folder "corporate_logos_round2". Found labels on locations # that are not one of the 9 logos have to be sorted into a 10th class # (subfolder), generally called the "reject" class.
# pick a subset: all license plates license_plates = categories['license plate'] print("There are {0} license plate labels.".format(len(license_plates))) # another subset: all labels starting with "car" cars = [] for key in categories.keys(): if key.startswith("car"): cars.append(categories[key]) print("There are {0} car-related labels.".format(len(cars))) # Note that Labels are cached in the CorpusServer, but the corpus currently # needs to re-mirror if the CorpusServer is restarted because Labels are # not stored to disk. Images are stored to disk. quit() # done for now # Train a detector on license plates trainer = easy.getTrainer("BOW_Trainer:default -p 10103 ") trainset = easy.createRunSet(license_plates, "pos") easy.printRunSetInfo(trainset) licenseplateModel = easy.train(trainer, trainset) # test the license plate detector on the known locations of cars; # this will only try to detect within the boundaries of cars. testset = easy.createRunSet(cars, "unpurposed") detector = easy.getDetector("BOW_Detector:default -p 10104") results = easy.detect(detector, licenseplateModel, testset) printResults(results)
import time #for computing computation time import os #for chdir stime = time.clock() ''' Making training data ''' #trainImg_roc_simple #trainImg_roc trainsetPos = easy.createRunSet('corporate_logos') trainsetNeg = easy.createRunSet('trainImg') runset = cvac.RunSet() easy.addToRunSet(runset, trainsetPos, 'pos') easy.addToRunSet(runset, trainsetNeg, 'neg') easy.printRunSetInfo( runset, printLabels=True, ) strTrainer = "BOW_Trainer" strDetector = "BOW_Detector" list_nWord = [5, 10, 15, 20] doWithNegativeSample = True if doWithNegativeSample: ############################################################### # With background data ''' Execute jousting for generating ROC points '''
print("\n=== Corpus 2: ===") print('Obtained {0} labeled artifact{1} from trainImg directory:'.format( len(lablist2), ("s", "")[len(lablist2) == 1])) easy.printCategoryInfo(categories2) # Note how both corpora contain flag images, but they have different # labels. To use them for evaluation, let's assign the same purpose # to syntactically different but semantically identical labels. # Because we don't specify it, this guesses the specific Purpose that # is assigned to the labels. # Also obtain this mapping from Purpose to label name, called "classmap." rs1 = easy.createRunSet(categories1['CA_flag'] + categories2['ca'], "0") easy.addToRunSet(rs1, categories1['KO_flag'] + categories2['kr'], "1") easy.addToRunSet(rs1, categories1['US_flag'] + categories2['us'], "2") print("\n=== The Corpora combined into one RunSet: ===") easy.printRunSetInfo(rs1) # A runset can be used for training or for testing print("------- Bag of Words results for corporate logos: -------") detector = easy.getDetector("BOW_Detector") modelfile = "detectors/bowUSKOCA.zip" results1 = easy.detect(detector, modelfile, rs1) print("Note that both original and found labels are printed:") easy.printResults(results1) # Print again, this time replacing the found labels with a double # mapping from foundLabel -> guessed Purpose -> classmap label; # Note that this fails if multiple original labels mapped to the same # Purpose. wait() print(