def eval(self): allValues = [] for dirPartition in self.dirPartitions: #print "dirPartition", dirPartition (annotateDir, testDir, trainDir, modelDir, resultDir) = self.partitions.getDirTestNames(dirPartition) labeledContent = self._getFile(resultDir, 'testEstCRF_Wapiti.txt') desiredContent = self._getFile(trainDir, 'evaldata_CRF_Wapiti.txt') # tmpFiles from training of testDir are saved in trainDir ! # harmonize the two lists, they are not tokenized the same way desiredContentHarmonized, labeledContentHarmonized = prepareEval.prepareEval(desiredContent, labeledContent) self._saveFile(labeledContentHarmonized, resultDir, 'annotatedEval.txt') self._saveFile(desiredContentHarmonized, resultDir, 'desiredEval.txt') evalText, labels, values = TokenAccuracyEval.evaluate(labeledContentHarmonized, desiredContentHarmonized) allValues.append(values) self._saveFile(evalText, dirPartition, 'evaluation.txt') # calculate average of results for all partitions average = [float(sum(col))/len(col) for col in zip(*allValues)] allValues.append(average) # print all results and average on the last line finalEval = "\t".join(labels) + "\n" finalEval += "\n".join(["\t".join(['{:f}'.format(v) for v in values]) for values in allValues]) self._saveFile(finalEval, self.partitions.getDirPercentName(), 'evaluation.tsv')
def tokenEval(self): desiredContent = self._getFile(self.dirResult, 'evaldata_CRF_Wapiti.txt') labeledContent= self._getFile(self.dirResult, 'testEstCRF_Wapiti.txt') desiredContentHarmonized, labeledContentHarmonized = prepareEval.prepareEval(desiredContent, labeledContent) evalText, labels, values = TokenAccuracyEval.evaluate(labeledContentHarmonized, desiredContentHarmonized) finalEval = "\t".join(labels) + "\n" finalEval += "\t".join(['{:f}'.format(v) for v in values]) + "\n" finalEval += evalText self._saveFile(finalEval, self.dirResult, 'evaluation.txt')