def testServiceParallelRecursive(self, checkpoint=True): """ Tests the creation of a Job.Service, creating parallel chains of services and accessing jobs. Randomly fails the worker. """ for test in range(1): # Temporary file outFiles = [ get_temp_file(rootDir=self._createTempDir()) for j in range(2) ] messageBundles = [[ random.randint(1, sys.maxsize) for i in range(3) ] for j in range(2)] try: # Wire up the services/jobs t = Job.wrapJobFn(serviceTestParallelRecursive, outFiles, messageBundles, checkpoint=True) # Run the workflow repeatedly until success self.runToil(t, retryCount=2) # Check output for (messages, outFile) in zip(messageBundles, outFiles): self.assertEqual( list(map(int, open(outFile, 'r').readlines())), messages) finally: list(map(os.remove, outFiles))
def testServiceDeadlock(self): """ Creates a job with more services than maxServices, checks that deadlock is detected. """ outFile = get_temp_file(rootDir=self._createTempDir()) try: def makeWorkflow(): job = Job() r1 = job.addService(ToySerializableService("woot1")) r2 = job.addService(ToySerializableService("woot2")) r3 = job.addService(ToySerializableService("woot3")) job.addChildFn(fnTest, [r1, r2, r3], outFile) return job # This should fail as too few services available try: self.runToil(makeWorkflow(), badWorker=0.0, maxServiceJobs=2, deadlockWait=5) except DeadlockException: print("Got expected deadlock exception") else: assert 0 # This should pass, as adequate services available self.runToil(makeWorkflow(), maxServiceJobs=3) # Check we get expected output assert open(outFile, 'r').read() == "woot1 woot2 woot3" finally: os.remove(outFile)
def testServiceRecursive(self, checkpoint=True): """ Tests the creation of a Job.Service, creating a chain of services and accessing jobs. Randomly fails the worker. """ for test in range(1): # Temporary file outFile = get_temp_file(rootDir=self._createTempDir()) messages = [random.randint(1, sys.maxsize) for i in range(3)] try: # Wire up the services/jobs t = Job.wrapJobFn(serviceTestRecursive, outFile, messages, checkpoint=checkpoint) # Run the workflow repeatedly until success self.runToil(t) # Check output self.assertEqual( list(map(int, open(outFile, 'r').readlines())), messages) finally: os.remove(outFile)
def testEncapsulation(self): """ Tests the Job.encapsulation method, which uses the EncapsulationJob class. """ # Temporary file outFile = get_temp_file(rootDir=self._createTempDir()) try: # Encapsulate a job graph a = Job.wrapJobFn(encapsulatedJobFn, "A", outFile, name="a") a = a.encapsulate(name="a-encap") # Now add children/follow to the encapsulated graph d = Job.wrapFn(fn1Test, a.rv(), outFile, name="d") e = Job.wrapFn(fn1Test, d.rv(), outFile, name="e") a.addChild(d) a.addFollowOn(e) # Create the runner for the workflow. options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.logLevel = "INFO" # Run the workflow, the return value being the number of failed jobs Job.Runner.startToil(a, options) # Check output self.assertEqual(open(outFile).readline(), "ABCDE") finally: os.remove(outFile)
def issueBatchJob(self, jobDesc, job_environment: Optional[Dict[str, str]] = None): """ Issues parasol with job commands. """ self.checkResourceRequest(jobDesc.memory, jobDesc.cores, jobDesc.disk) MiB = 1 << 20 truncatedMemory = jobDesc.memory // MiB * MiB # Look for a batch for jobs with these resource requirements, with # the memory rounded down to the nearest megabyte. Rounding down # meams the new job can't ever decrease the memory requirements # of jobs already in the batch. if len(self.resultsFiles) >= self.maxBatches: raise RuntimeError('Number of batches reached limit of %i' % self.maxBatches) try: results = self.resultsFiles[(truncatedMemory, jobDesc.cores)] except KeyError: results = get_temp_file(rootDir=self.parasolResultsDir) self.resultsFiles[(truncatedMemory, jobDesc.cores)] = results # Prefix the command with environment overrides, optionally looking them up from the # current environment if the value is None command = ' '.join(concat('env', self.__environment(job_environment), jobDesc.command)) parasolCommand = ['-verbose', '-ram=%i' % jobDesc.memory, '-cpu=%i' % jobDesc.cores, '-results=' + results, 'add', 'job', command] # Deal with the cpus self.usedCpus += jobDesc.cores while True: # Process finished results with no wait try: jobID = self.cpuUsageQueue.get_nowait() except Empty: break if jobID in list(self.jobIDsToCpu.keys()): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 while self.usedCpus > self.maxCores: # If we are still waiting jobID = self.cpuUsageQueue.get() if jobID in list(self.jobIDsToCpu.keys()): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 # Now keep going while True: line = self._runParasol(parasolCommand)[1][0] match = self.parasolOutputPattern.match(line) if match is None: # This is because parasol add job will return success, even if the job was not # properly issued! logger.debug('We failed to properly add the job, we will try again after a 5s.') time.sleep(5) else: jobID = int(match.group(1)) self.jobIDsToCpu[jobID] = jobDesc.cores self.runningJobs.add(jobID) logger.debug("Got the parasol job id: %s from line: %s" % (jobID, line)) return jobID
def testService(self, checkpoint=False): """ Tests the creation of a Job.Service with random failures of the worker. """ for test in range(2): outFile = get_temp_file(rootDir=self._createTempDir()) # Temporary file messageInt = random.randint(1, sys.maxsize) try: # Wire up the services/jobs t = Job.wrapJobFn(serviceTest, outFile, messageInt, checkpoint=checkpoint) # Run the workflow repeatedly until success self.runToil(t) # Check output self.assertEqual(int(open(outFile, 'r').readline()), messageInt) finally: os.remove(outFile)
def testStatic(self): r""" Create a DAG of jobs non-dynamically and run it. DAG is: A -> F \------- B -> D \ \ \ ------- C -> E Follow on is marked by -> """ outFile = get_temp_file(rootDir=self._createTempDir()) try: # Create the jobs A = Job.wrapFn(fn1Test, "A", outFile) B = Job.wrapFn(fn1Test, A.rv(), outFile) C = Job.wrapFn(fn1Test, B.rv(), outFile) D = Job.wrapFn(fn1Test, C.rv(), outFile) E = Job.wrapFn(fn1Test, D.rv(), outFile) F = Job.wrapFn(fn1Test, E.rv(), outFile) # Connect them into a workflow A.addChild(B) A.addChild(C) B.addChild(C) B.addFollowOn(E) C.addFollowOn(D) A.addFollowOn(F) # Create the runner for the workflow. options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.logLevel = "INFO" options.retryCount = 100 options.badWorker = 0.5 options.badWorkerFailInterval = 0.01 # Run the workflow, the return value being the number of failed jobs Job.Runner.startToil(A, options) # Check output self.assertEqual(open(outFile, 'r').readline(), "ABCDEFG") finally: os.remove(outFile)
def setUp(self): super(UtilsTest, self).setUp() self.tempDir = self._createTempDir() self.tempFile = get_temp_file(rootDir=self.tempDir) self.outputFile = 'someSortedStuff.txt' self.toilDir = os.path.join(self.tempDir, "jobstore") self.assertFalse(os.path.exists(self.toilDir)) self.lines = 1000 self.lineLen = 10 self.N = 1000 makeFileToSort(self.tempFile, self.lines, self.lineLen) # First make our own sorted version with open(self.tempFile, 'r') as fileHandle: self.correctSort = fileHandle.readlines() self.correctSort.sort() self.sort_workflow_cmd = [ python, '-m', 'toil.test.sort.sort', f'file:{self.toilDir}', '--clean=never', '--numLines=1', '--lineLength=1' ] self.restart_sort_workflow_cmd = [ python, '-m', 'toil.test.sort.restart_sort', f'file:{self.toilDir}' ]
def setUp(self): self.exportPath = get_temp_file()
def getTempFile(suffix="", rootDir=None): logger.warning( 'Deprecated toil method. Please use "toil.test.get_temp_file()" instead."' ) return get_temp_file(suffix, rootDir)