Example #1
0
	def eval(self):
		allValues = []
		for dirPartition in self.dirPartitions:
			#print "dirPartition", dirPartition
			(annotateDir, testDir, trainDir, modelDir, resultDir) = self.partitions.getDirTestNames(dirPartition)
			
			labeledContent = self._getFile(resultDir, 'testEstCRF_Wapiti.txt')
			desiredContent = self._getFile(trainDir, 'evaldata_CRF_Wapiti.txt') # tmpFiles from training of testDir are saved in trainDir !

			# harmonize the two lists, they are not tokenized the same way
			desiredContentHarmonized, labeledContentHarmonized = prepareEval.prepareEval(desiredContent, labeledContent)

			self._saveFile(labeledContentHarmonized, resultDir, 'annotatedEval.txt')
			self._saveFile(desiredContentHarmonized, resultDir, 'desiredEval.txt')
			
			evalText, labels, values = TokenAccuracyEval.evaluate(labeledContentHarmonized, desiredContentHarmonized)
			allValues.append(values)
			self._saveFile(evalText, dirPartition, 'evaluation.txt')
		
		# calculate average of results for all partitions
		average = [float(sum(col))/len(col) for col in zip(*allValues)]
		allValues.append(average)
		
		# print all results and average on the last line
		finalEval = "\t".join(labels) + "\n"
		finalEval += "\n".join(["\t".join(['{:f}'.format(v) for v in values]) for values in allValues])
		self._saveFile(finalEval, self.partitions.getDirPercentName(), 'evaluation.tsv')
Example #2
0
	def tokenEval(self):
		desiredContent = self._getFile(self.dirResult, 'evaldata_CRF_Wapiti.txt')
		labeledContent= self._getFile(self.dirResult, 'testEstCRF_Wapiti.txt')
		
		desiredContentHarmonized, labeledContentHarmonized = prepareEval.prepareEval(desiredContent, labeledContent)
		
		evalText, labels, values = TokenAccuracyEval.evaluate(labeledContentHarmonized, desiredContentHarmonized)
		
		finalEval = "\t".join(labels) + "\n"
		finalEval += "\t".join(['{:f}'.format(v) for v in values]) + "\n"
		finalEval += evalText
		
		self._saveFile(finalEval, self.dirResult, 'evaluation.txt')