class TaggerHandler: def __init__(self, dataDir, table): sys.stderr.write("TaggerHandler: Constructor\n") self.__TaggerInstance = Tagger() self.__dataDir = dataDir self.__tableFile = open(table,'w') def __updateTable(self, setting, accuracies): self.__tableFile.write(setting+'\t'+'\t'.join(map(lambda x:str(x), accuracies))+'\n') def __runTagger(self, trainFile, testFile): self.__TaggerInstance.loadData(trainFile, testFile) self.__TaggerInstance.train() accuracies = self.__TaggerInstance.test() setting = trainFile.split("_")[0] self.__updateTable(setting, accuracies) def run(self, trainFiles, testFiles): trainFiles = [self.__dataDir+line.strip() for line in open(trainFiles)] testFiles = [self.__dataDir+line.strip() for line in open(testFiles)] for trainFile in trainFiles: for testFile in testFiles: self.__runTagger(trainFile, testFile) self.__tableFile.close()
class TaggerHandler: def __init__(self, dataDir, table): sys.stderr.write("TaggerHandler: Constructor\n") self.__TaggerInstance = Tagger() self.__dataDir = dataDir self.__tableFile = open(table, 'w', 1) self.__tableFile.write('TrainCSType\tTrainPureCSSplit\tTrainSize\tExperimentType\tTestCSType\tTestPureCSSplit\tTestSize\tTagset\tOverallAccuracy\tSameContextAccuracy\tDifferentContextAccuracy\tPrevWordDifferentAccuracy\tPrePrevWordDifferentAccuracy\tUnknowns\n') def __updateTable(self, setting, accuracies): ##print accuracies self.__tableFile.write(setting + '\t' + '\t'.join(map(lambda x:str(x), accuracies)) + '\n') def __getSetting(self, string): string = string.split("/")[-1].split("TrainCS")[1] cstype = string.split("CS")[0] csSplit = string.split("CS")[1].split("Pure")[0] pureSplit = string.split("Pure")[1].split("Total")[0] pureCSSplit = pureSplit + '-' + csSplit totalSize = string.split("Total")[1].split("_")[0] return '\t'.join([cstype, pureCSSplit, totalSize]) def __runTagger(self, trainFile, testFile): self.__TaggerInstance.loadData(trainFile, testFile) self.__TaggerInstance.train() accuracies = self.__TaggerInstance.test() trainSetting = self.__getSetting(trainFile) testSetting = self.__getSetting(testFile) tagset = self.__tagset(trainFile) self.__updateTable(trainSetting + '\t' + testSetting + '\t' + tagset, accuracies) def __runTagger2(self, trainFile, testFile, expType): trainFile = self.__dataDir+trainFile testFile = self.__dataDir+testFile self.__TaggerInstance.loadData(trainFile, testFile) self.__TaggerInstance.train() accuracies = self.__TaggerInstance.test() trainSetting = self.__getSetting(trainFile) testSetting = self.__getSetting(testFile) tagset = self.__tagset(trainFile) self.__updateTable(trainSetting + '\t' + expType + '\t' + testSetting + '\t' + tagset, accuracies) def __tagset(self, string): tagset = "Mixed" if len(string.split(".")) > 1 and string.split(".")[1] == "uni": tagset = 'Universal' if string.find(".uniq") >= 0: tagset += ".uniq" return tagset def run(self, trainFiles, testFiles): #trainFiles = [self.__dataDir+line.strip() for line in open(trainFiles)] #testFiles = [self.__dataDir+line.strip() for line in open(testFiles)] trainFiles = [line.strip() for line in open(trainFiles)] testFiles = [line.strip() for line in open(testFiles)] for trainFile in trainFiles: #if trainFile.find("Type1")>=0 or trainFile.find("Type0")>=0: # continue for testFile in testFiles: if self.__tagset(trainFile) != "Mixed" or self.__tagset(trainFile) != self.__tagset(testFile): #if self.__tagset(trainFile)!= self.__tagset(testFile): continue ##if testFile.find("CS0Pure100")<0: ## continue ##print testFile self.__runTagger(trainFile, testFile) self.__tableFile.close() def run2(self, trainFiles, testFiles): trainFiles = [line.strip() for line in open(trainFiles)] testFiles = [line.strip() for line in open(testFiles)] for trainFile in trainFiles: for testFile in testFiles: if self.__tagset(trainFile) != self.__tagset(testFile): continue controlTrainFile = trainFile + "_Control" if self.__tagset(trainFile) == "Universal": controlTrainFile = trainFile.split(".uni")[0] + "_Control" + ".uni" ##if testFile.find("CS0Pure100")<0: ## continue ##print testFile self.__runTagger2(trainFile, testFile, "Experiment") self.__runTagger2(controlTrainFile, testFile, "Control") self.__tableFile.close()