def createEvaluationfiles(self, dirCorpus, testPercentage, numberOfPartition, allBibl): dirPartitions = self.getDirPartitionNames() for dirPartition in dirPartitions: (annotateDir, testDir, trainDir, modelDir, _) = self.getDirTestNames(dirPartition) testCorpus, trainCorpus = FormatEval.getShuffledCorpus(allBibl, testPercentage) trainFile = os.path.join(trainDir, 'train.xml') self.saveListToFile(trainCorpus, trainFile) cleanCorpus = FormatEval.stripTags(testCorpus) cleanFile = os.path.join(annotateDir, 'test_clean.xml') self.saveListToFile(cleanCorpus, cleanFile) # In test.xml we need to duplicate <bibl> inside <bibl>, in order to present the same data for evaluation # Bilbo does not format the "same" data equaly between train and annotation evalFile = os.path.join(testDir, 'test.xml') testCorpus = FormatEval.getBiblList("\n".join(testCorpus)) self.saveListToFile(testCorpus, evalFile)