def testSort(self): for test in xrange(self.testNo): tempFile1 = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempFile1) lines1 = self._loadFile(tempFile1) lines1.sort() sort(tempFile1) with open(tempFile1, 'r') as f: lines2 = f.readlines() self.assertEquals(lines1, lines2)
def testSort(self): for test in range(self.testNo): tempFile1 = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempFile1) lines1 = self._loadFile(tempFile1) lines1.sort() sort(tempFile1) with open(tempFile1, 'r') as f: lines2 = f.readlines() self.assertEquals(lines1, lines2)
def testGetMidPoint(self): for test in xrange(self.testNo): tempFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempFile) l = open(tempFile, 'r').read() fileSize = os.path.getsize(tempFile) midPoint = getMidPoint(tempFile, 0, fileSize) print("the mid point is %i of a file of %i bytes" % (midPoint, fileSize)) assert midPoint < fileSize assert l[midPoint] == '\n' assert midPoint >= 0
def testGetMidPoint(self): for test in range(self.testNo): makeFileToSort(self.inputFile) with open(self.inputFile, 'r') as f: sorted_contents = f.read() fileSize = os.path.getsize(self.inputFile) midPoint = getMidPoint(self.inputFile, 0, fileSize) print("the mid point is %i of a file of %i bytes" % (midPoint, fileSize)) assert midPoint < fileSize assert sorted_contents[midPoint] == '\n' assert midPoint >= 0
def testGetMidPoint(self): for test in range(self.testNo): makeFileToSort(self.inputFile) with open(self.inputFile) as f: sorted_contents = f.read() fileSize = os.path.getsize(self.inputFile) midPoint = getMidPoint(self.inputFile, 0, fileSize) print(f"The mid point is {midPoint} of a file of {fileSize} bytes.") assert midPoint < fileSize assert sorted_contents[midPoint] == '\n' assert midPoint >= 0
def testGetMidPoint(self): for test in range(self.testNo): tempFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempFile) l = open(tempFile, 'r').read() fileSize = os.path.getsize(tempFile) midPoint = getMidPoint(tempFile, 0, fileSize) print("the mid point is %i of a file of %i bytes" % (midPoint, fileSize)) assert midPoint < fileSize assert l[midPoint] == '\n' assert midPoint >= 0
def testCopySubRangeOfFile(self): for test in xrange(self.testNo): tempFile = os.path.join(self.tempDir, "fileToSort1.txt") outputFile = os.path.join(self.tempDir, "outputFileToSort1.txt") makeFileToSort(tempFile, lines=10, lineLen=defaultLineLen) fileSize = os.path.getsize(tempFile) assert fileSize > 0 fileStart = random.choice(xrange(0, fileSize)) fileEnd = random.choice(xrange(fileStart, fileSize)) fileHandle = open(outputFile, 'w') copySubRangeOfFile(tempFile, fileStart, fileEnd, fileHandle) fileHandle.close() l = open(outputFile, 'r').read() l2 = open(tempFile, 'r').read()[fileStart:fileEnd] self.assertEquals(l, l2)
def testCopySubRangeOfFile(self): for test in range(self.testNo): tempFile = os.path.join(self.tempDir, "fileToSort1.txt") outputFile = os.path.join(self.tempDir, "outputFileToSort1.txt") makeFileToSort(tempFile, lines=10, lineLen=defaultLineLen) fileSize = os.path.getsize(tempFile) assert fileSize > 0 fileStart = random.choice(range(0, fileSize)) fileEnd = random.choice(range(fileStart, fileSize)) fileHandle = open(outputFile, 'w') copySubRangeOfFile(tempFile, fileStart, fileEnd, fileHandle) fileHandle.close() l = open(outputFile, 'r').read() l2 = open(tempFile, 'r').read()[fileStart:fileEnd] self.assertEquals(l, l2)
def testCopySubRangeOfFile(self): for test in range(self.testNo): tempFile = os.path.join(self.tempDir, "fileToSort1.txt") outputFile = os.path.join(self.tempDir, "outputFileToSort1.txt") makeFileToSort(tempFile, lines=10, lineLen=defaultLineLen) fileSize = os.path.getsize(tempFile) assert fileSize > 0 fileStart = random.choice(range(0, fileSize)) fileEnd = random.choice(range(fileStart, fileSize)) with open(outputFile, 'w') as f: f.write(copySubRangeOfFile(tempFile, fileStart, fileEnd)) with open(outputFile, 'r') as f: l = f.read() with open(tempFile, 'r') as f: l2 = f.read()[fileStart:fileEnd] self.assertEqual(l, l2)
def testMerge(self): for test in xrange(self.testNo): tempFile1 = os.path.join(self.tempDir, "fileToSort1.txt") tempFile2 = os.path.join(self.tempDir, "fileToSort2.txt") tempFile3 = os.path.join(self.tempDir, "mergedFile.txt") makeFileToSort(tempFile1) makeFileToSort(tempFile2) sort(tempFile1) sort(tempFile2) with open(tempFile3, 'w') as fileHandle: with open(tempFile1) as tempFileHandle1: with open(tempFile2) as tempFileHandle2: merge(tempFileHandle1, tempFileHandle2, fileHandle) lines1 = self._loadFile(tempFile1) + self._loadFile(tempFile2) lines1.sort() with open(tempFile3, 'r') as f: lines2 = f.readlines() self.assertEquals(lines1, lines2)
def testMerge(self): for test in range(self.testNo): tempFile1 = os.path.join(self.tempDir, "fileToSort1.txt") tempFile2 = os.path.join(self.tempDir, "fileToSort2.txt") tempFile3 = os.path.join(self.tempDir, "mergedFile.txt") makeFileToSort(tempFile1) makeFileToSort(tempFile2) sort(tempFile1) sort(tempFile2) with open(tempFile3, 'w') as fileHandle: with open(tempFile1) as tempFileHandle1: with open(tempFile2) as tempFileHandle2: merge(tempFileHandle1, tempFileHandle2, fileHandle) lines1 = self._loadFile(tempFile1) + self._loadFile(tempFile2) lines1.sort() with open(tempFile3, 'r') as f: lines2 = f.readlines() self.assertEquals(lines1, lines2)
def _toilSort(self, jobStoreLocator, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen, retryCount=2, badWorker=0.5, downCheckpoints=False, disableCaching=False): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStoreLocator: The location of the job store. :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in xrange(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStoreLocator) options.logLevel = getLogLevelString() options.retryCount = retryCount options.batchSystem = batchSystem options.clean = "never" options.badWorker = badWorker options.badWorkerFailInterval = 0.05 options.disableCaching = disableCaching # FIXME maybe this line should be deleted options.downCheckpoints = downCheckpoints options.N = N # Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, lineLen=lineLen) options.fileToSort = tempSortFile # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True with self.assertRaises(NoSuchJobStoreException): main(options) options.restart = False # Now actually run the workflow try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store with self.assertRaises(JobStoreExistsException): main(options) options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 32: # p(fail after this many restarts) = 0.5**32 self.fail('Exceeded a reasonable number of restarts') totalTrys += 1 # Now check that if you try to restart from here it will raise an exception # indicating that there are no jobs remaining in the workflow. with self.assertRaises(JobException): main(options) # Now check the file is properly sorted.. with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) finally: subprocess.check_call([resolveEntryPoint('toil'), 'clean', jobStoreLocator])
def _toilSort(self, jobStoreLocator, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen, retryCount=2, badWorker=0.5, downCheckpoints=False, disableCaching=False): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStoreLocator: The location of the job store. :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in range(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStoreLocator) options.logLevel = getLogLevelString() options.retryCount = retryCount options.batchSystem = batchSystem options.clean = "never" options.badWorker = badWorker options.badWorkerFailInterval = 0.05 options.disableCaching = disableCaching # This is required because mesosMasterAddress now defaults to the IP of the machine # that is starting the workflow while the mesos *tests* run locally. if batchSystem == 'mesos': options.mesosMasterAddress = 'localhost:5050' options.downCheckpoints = downCheckpoints options.N = N options.outputFile = self.outputFile options.fileToSort = self.inputFile options.overwriteOutput = True options.realTimeLogging = True # Make the file to sort makeFileToSort(options.fileToSort, lines=lines, lineLen=lineLen) # First make our own sorted version with open(options.fileToSort, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True with self.assertRaises(NoSuchJobStoreException): with runMain(options): # Now check the file is properly sorted.. with open(options.outputFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) options.restart = False # Now actually run the workflow try: with runMain(options): pass i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store with self.assertRaises(JobStoreExistsException): with runMain(options): pass options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: with runMain(options): pass i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 32: # p(fail after this many restarts) = 0.5**32 self.fail( 'Exceeded a reasonable number of restarts') totalTrys += 1 finally: subprocess.check_call( [resolveEntryPoint('toil'), 'clean', jobStoreLocator]) # final test to make sure the jobStore was actually deleted self.assertRaises(NoSuchJobStoreException, Toil.resumeJobStore, jobStoreLocator)
def _toilSort(self, jobStoreLocator, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen, retryCount=2, badWorker=0.5, downCheckpoints=False, disableCaching=False): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStoreLocator: The location of the job store. :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in xrange(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStoreLocator) options.logLevel = getLogLevelString() options.retryCount = retryCount options.batchSystem = batchSystem options.clean = "never" options.badWorker = badWorker options.badWorkerFailInterval = 0.05 options.disableCaching = disableCaching # FIXME maybe this line should be deleted options.downCheckpoints = downCheckpoints options.N = N # Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, lineLen=lineLen) options.fileToSort = tempSortFile # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True with self.assertRaises(NoSuchJobStoreException): main(options) options.restart = False # Now actually run the workflow try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store with self.assertRaises(JobStoreExistsException): main(options) options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 32: # p(fail after this many restarts) = 0.5**32 self.fail( 'Exceeded a reasonable number of restarts') totalTrys += 1 # Now check that if you try to restart from here it will raise an exception # indicating that there are no jobs remaining in the workflow. with self.assertRaises(JobException): main(options) # Now check the file is properly sorted.. with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) finally: subprocess.check_call( [resolveEntryPoint('toil'), 'clean', jobStoreLocator])