Python FormatEval Beispiele

Programmiersprache: Python

Namespace / Paketname: formatEval

Klasse / Typ: FormatEval

Beispiele auf hotexamples.com: 5

Python FormatEval - 5 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die formatEval.FormatEval, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

getShuffledCorpus(2)

copy_files_for_eval(1)

getBiblFromDir(1)

getBiblList(1)

get_list_of_tag_from_dir(1)

stripTags(1)

strip_tags(1)

Beispiel #1

Datei anzeigen

Datei: simpleEval.py Projekt: OpenEdition/bilbo

	def extractAndCleanCorpus(self):
		files = os.path.join(self.dirCorpus, "*xml")
		for xmlFile in glob.glob(files):
			with open(xmlFile, 'r', encoding='utf-8') as content_file:
				content = content_file.read()
			striped = FormatEval.strip_tags(content, self.corpusTag)
			self._saveFile(striped, self.dirLabel, os.path.basename(xmlFile))

Beispiel #2

Datei anzeigen

Datei: partition.py Projekt: OpenEdition/bilbo

	def partition(self):
		self.createPartitionFolders(self.dirCorpus, self.testPercentage, self.numberOfPartition)
		bibl_list = FormatEval.get_list_of_tag_from_dir(self.dirCorpus)
		# faire une liste de toutes les bibl dans les fichiers [(nom_fichier, bibl_index)]
		# shuffle de cette liste label/train
		# sort la liste par fichier
		# pour chaque fichier effacer les bibl qui ne font pas partie de l'index
		self.createEvaluationfiles(self.dirCorpus, self.testPercentage, self.numberOfPartition, bibl_list)

Beispiel #3

Datei anzeigen

Datei: partition.py Projekt: ansdma/bilbo

	def createEvaluationfiles(self, dirCorpus, testPercentage, numberOfPartition, allBibl):
		dirPartitions = self.getDirPartitionNames()
		for dirPartition in dirPartitions:
			(annotateDir, testDir, trainDir, modelDir, _) = self.getDirTestNames(dirPartition)
			testCorpus, trainCorpus = FormatEval.getShuffledCorpus(allBibl, testPercentage)
			
			trainFile = os.path.join(trainDir, 'train.xml')
			self.saveListToFile(trainCorpus, trainFile)
			
			cleanCorpus = FormatEval.stripTags(testCorpus)
			cleanFile = os.path.join(annotateDir, 'test_clean.xml')
			self.saveListToFile(cleanCorpus, cleanFile)

			# In test.xml we need to duplicate <bibl> inside <bibl>, in order to present the same data for evaluation
			# Bilbo does not format the "same" data equaly between train and annotation
			evalFile = os.path.join(testDir, 'test.xml')
			testCorpus = FormatEval.getBiblList("\n".join(testCorpus))
			self.saveListToFile(testCorpus, evalFile)

Beispiel #4

Datei anzeigen

Datei: partition.py Projekt: OpenEdition/bilbo

	def createEvaluationfiles(self, dirCorpus, testPercentage, numberOfPartition, bibl_list):
		dirPartitions = self.getDirPartitionNames()
		for dirPartition in dirPartitions:
			(annotateDir, testDir, trainDir, modelDir, _) = self.getDirTestNames(dirPartition)
			testCorpus, trainCorpus = FormatEval.getShuffledCorpus(bibl_list, testPercentage)
			#print testCorpus
			#print trainCorpus
			
			# files used for training (100 - testPercentage % of the corpus)
			FormatEval.copy_files_for_eval(self.dirCorpus, trainDir, trainCorpus)
			# files used for evaluation keeping annotations (testPercentage % of the corpus)
			FormatEval.copy_files_for_eval(self.dirCorpus, testDir, testCorpus)
			# files used for evaluation, strip the annotations
			# they will be labeled by bilbo
			FormatEval.copy_files_for_eval(self.dirCorpus, annotateDir, testCorpus, 'bibl', strip=True)

Beispiel #5

Datei anzeigen

Datei: partition.py Projekt: ansdma/bilbo

	def getAndSaveAllBibl(self, dirCorpus):
		allBibl = FormatEval.getBiblFromDir(dirCorpus)
		fileName = os.path.join(self.getDirEvalName(), 'all_bibl.xml')
		self.saveListToFile(allBibl, fileName)
		return allBibl