def test_getLabelableList(self): print("testing getLabelableList") sys.stdout.flush() labelList = easy.getLabelableList("easyTestData") '''verify that the labels are correct They should be , the root directory "easyTestData", "kitchen" and "MITmountain" ''' for entry in labelList: print("label name " + entry.lab.name) if entry.lab.name != "easyTestData" and \ entry.lab.name != "kitchen" and \ entry.lab.name != "MITmountain": raise RuntimeError( "Not a valid label name in test getLabelableList") if entry.lab.name == "kitchen": # should find easyTestData, inside, house properties if "easyTestData" not in entry.lab.properties: raise RuntimeError("easyTestData not in properties") if "inside" not in entry.lab.properties: raise RuntimeError("inside not in properties") if "house" not in entry.lab.properties: raise RuntimeError("house not in properties") ''' verify that we can make a runset and use it with the labelableList ''' runset = cvac.RunSet() posPurpose = easy.getPurpose('pos') easy.addToRunSet(runset, labelList, posPurpose) if not easy.isProperRunSet(runset): raise RuntimeError( "test getLabelableList failed with an invalid runset") labelList = easy.getLabelableList("easyTestData", recursive=False) ''' should only have one label "easyTestData" ''' for entry in labelList: if entry.lab.name != "easyTestData": raise RuntimeError( "recursive option failed in test getLabelableList") if len(entry.lab.properties) > 0: raise RuntimeError("labelable should not have any properties") if not easy.isProperRunSet(runset): raise RuntimeError( "test getLabelableList failed with an invalid runset with non-recursive call" )
def test_getLabelableList(self): print("testing getLabelableList") sys.stdout.flush() labelList = easy.getLabelableList("easyTestData") '''verify that the labels are correct They should be , the root directory "easyTestData", "kitchen" and "MITmountain" ''' for entry in labelList: print ("label name " + entry.lab.name) if entry.lab.name != "easyTestData" and \ entry.lab.name != "kitchen" and \ entry.lab.name != "MITmountain": raise RuntimeError("Not a valid label name in test getLabelableList") if entry.lab.name == "kitchen": # should find easyTestData, inside, house properties if "easyTestData" not in entry.lab.properties: raise RuntimeError("easyTestData not in properties") if "inside" not in entry.lab.properties: raise RuntimeError("inside not in properties") if "house" not in entry.lab.properties: raise RuntimeError("house not in properties") ''' verify that we can make a runset and use it with the labelableList ''' runset = cvac.RunSet() posPurpose = easy.getPurpose('pos') easy.addToRunSet(runset, labelList, posPurpose) if not easy.isProperRunSet(runset): raise RuntimeError("test getLabelableList failed with an invalid runset") labelList = easy.getLabelableList("easyTestData", recursive=False) ''' should only have one label "easyTestData" ''' for entry in labelList: if entry.lab.name != "easyTestData": raise RuntimeError("recursive option failed in test getLabelableList") if len(entry.lab.properties) > 0: raise RuntimeError("labelable should not have any properties") if not easy.isProperRunSet(runset): raise RuntimeError("test getLabelableList failed with an invalid runset with non-recursive call")
def getLabels(self): # todo: this is a very preliminary implementation that # doesn't do any error checking or create proper temp directories; # it mainly just works with a remote tar.gz type corpus file import urllib import tarfile urlfile = urllib.URLopener() urlfile.retrieve( self.main_location, "deleteme.tar.gz" ) # extract the tar into a hardcoded dir path self.dataSetFolder = self.getFSPath( "deleteme_tmpdir" ) if not os.path.exists(self.dataSetFolder): os.makedirs(self.dataSetFolder) tar = tarfile.open("deleteme.tar.gz") tar.extractall(path=self.dataSetFolder) tar.close() # obtain labelables from extracted tar directory return easy.getLabelableList(self.dataSetFolder)
corpus1 = easy.openCorpus( "corpus/CvacCorpusTest.properties" ) # Now let's obtain the labels contained in this corpus. With # this particular corpus, the labels are only available if the # image files are local to the CorpusServer, which in this case is # the default, local server. More on that in a moment. # Images will be downloaded automatically if the createMirror flag # is set, however, a network connection is required. categories1, lablist1 = easy.getDataSet( corpus1, createMirror=True ) print("=== Corpus 1: ==="); print('Obtained {0} labeled artifact{1} from corpus1 "{2}":'.format( len(lablist1), ("s","")[len(lablist1)==1], corpus1.name )); easy.printCategoryInfo( categories1 ) # Create a list of labelable files under a directory. lablist2 = easy.getLabelableList( "trainImg" ) categories2 = easy.getCategories(lablist2) print("\n=== Corpus 2: ==="); print('Obtained {0} labeled artifact{1} from trainImg directory:'.format( len(lablist2), ("s","")[len(lablist2)==1])); easy.printCategoryInfo( categories2 ) # Note how both corpora contain flag images, but they have different # labels. To use them for evaluation, let's assign the same purpose # to syntactically different but semantically identical labels. # Because we don't specify it, this guesses the specific Purpose that # is assigned to the labels. # Also obtain this mapping from Purpose to label name, called "classmap." rs1 = easy.createRunSet( categories1['CA_flag']+categories2['ca'], "0" ) easy.addToRunSet( rs1, categories1['KO_flag']+categories2['kr'], "1" ) easy.addToRunSet( rs1, categories1['US_flag']+categories2['us'], "2" )
corpus1 = easy.openCorpus("corpus/CvacCorpusTest.properties") # Now let's obtain the labels contained in this corpus. With # this particular corpus, the labels are only available if the # image files are local to the CorpusServer, which in this case is # the default, local server. More on that in a moment. # Images will be downloaded automatically if the createMirror flag # is set, however, a network connection is required. categories1, lablist1 = easy.getDataSet(corpus1, createMirror=True) print("=== Corpus 1: ===") print('Obtained {0} labeled artifact{1} from corpus1 "{2}":'.format( len(lablist1), ("s", "")[len(lablist1) == 1], corpus1.name)) easy.printCategoryInfo(categories1) # Create a list of labelable files under a directory. lablist2 = easy.getLabelableList("trainImg") categories2 = easy.getCategories(lablist2) print("\n=== Corpus 2: ===") print('Obtained {0} labeled artifact{1} from trainImg directory:'.format( len(lablist2), ("s", "")[len(lablist2) == 1])) easy.printCategoryInfo(categories2) # Note how both corpora contain flag images, but they have different # labels. To use them for evaluation, let's assign the same purpose # to syntactically different but semantically identical labels. # Because we don't specify it, this guesses the specific Purpose that # is assigned to the labels. # Also obtain this mapping from Purpose to label name, called "classmap." rs1 = easy.createRunSet(categories1['CA_flag'] + categories2['ca'], "0") easy.addToRunSet(rs1, categories1['KO_flag'] + categories2['kr'], "1") easy.addToRunSet(rs1, categories1['US_flag'] + categories2['us'], "2")