def test_hello_world(self): dir = os.path.dirname(os.path.abspath(__file__)) subprocess.check_call("python {dir}/helloWorld.py " "--batchSystem=mesos " "--logLevel={logLevel}".format( dir=dir, logLevel=getLogLevelString()), shell=True)
def __init__(self): #Core options self.jobStore = os.path.abspath("./toil") self.logLevel = getLogLevelString() self.workDir = None self.stats = False self.clean = "never" #Restarting the workflow options self.restart = False #Batch system options self.batchSystem = "singleMachine" self.scale = 1 self.masterIP = '127.0.0.1:5050' self.parasolCommand = "parasol" #Resource requirements self.defaultMemory = 2147483648 self.defaultCores = 1 self.defaultDisk = 2147483648 self.maxCores = sys.maxint self.maxMemory = sys.maxint self.maxDisk = sys.maxint #Retrying/rescuing jobs self.retryCount = 0 self.maxJobDuration = sys.maxint self.rescueJobsFrequency = 3600 #Misc self.maxLogFileSize = 50120 self.sseKey = None
def createConfig(options): """ Creates a config object from the options object. TODO: Make the config object a proper class """ logger.info("Starting to create the toil setup for the first time") config = ET.Element("config") config.attrib["log_level"] = getLogLevelString() config.attrib["master_ip"] = options.masterIP config.attrib["job_store"] = os.path.abspath( options.toil) if options.toil.startswith('.') else options.toil config.attrib["parasol_command"] = options.parasolCommand config.attrib["try_count"] = str(int(options.retryCount) + 1) config.attrib["max_job_duration"] = str(float(options.maxJobDuration)) config.attrib["batch_system"] = options.batchSystem config.attrib["job_time"] = str(float(options.jobTime)) config.attrib["max_log_file_size"] = str(int(options.maxLogFileSize)) config.attrib["default_memory"] = str(int(options.defaultMemory)) config.attrib["default_cpu"] = str(int(options.defaultCpu)) config.attrib["default_disk"] = str(int(options.defaultDisk)) config.attrib["max_cpus"] = str(int(options.maxCpus)) config.attrib["max_memory"] = str(int(options.maxMemory)) config.attrib["max_disk"] = str(int(options.maxDisk)) config.attrib["scale"] = str(float(options.scale)) if options.bigBatchSystem is not None: config.attrib["big_batch_system"] = options.bigBatchSystem config.attrib["big_memory_threshold"] = str( int(options.bigMemoryThreshold)) config.attrib["big_cpu_threshold"] = str(int(options.bigCpuThreshold)) config.attrib["big_max_cpus"] = str(int(options.bigMaxCpus)) config.attrib["big_max_memory"] = str(int(options.bigMaxMemory)) if options.stats: config.attrib["stats"] = "" return config
def createConfig(options): """ Creates a config object from the options object. TODO: Make the config object a proper class """ logger.info("Starting to create the toil setup for the first time") config = ET.Element("config") config.attrib["log_level"] = getLogLevelString() config.attrib["master_ip"] = options.masterIP config.attrib["job_store"] = os.path.abspath(options.toil) if options.toil.startswith('.') else options.toil config.attrib["parasol_command"] = options.parasolCommand config.attrib["try_count"] = str(int(options.retryCount) + 1) config.attrib["max_job_duration"] = str(float(options.maxJobDuration)) config.attrib["batch_system"] = options.batchSystem config.attrib["job_time"] = str(float(options.jobTime)) config.attrib["max_log_file_size"] = str(int(options.maxLogFileSize)) config.attrib["default_memory"] = str(int(options.defaultMemory)) config.attrib["default_cpu"] = str(int(options.defaultCpu)) config.attrib["default_disk"] = str(int(options.defaultDisk)) config.attrib["max_cpus"] = str(int(options.maxCpus)) config.attrib["max_memory"] = str(int(options.maxMemory)) config.attrib["max_disk"] = str(int(options.maxDisk)) config.attrib["scale"] = str(float(options.scale)) if options.bigBatchSystem is not None: config.attrib["big_batch_system"] = options.bigBatchSystem config.attrib["big_memory_threshold"] = str(int(options.bigMemoryThreshold)) config.attrib["big_cpu_threshold"] = str(int(options.bigCpuThreshold)) config.attrib["big_max_cpus"] = str(int(options.bigMaxCpus)) config.attrib["big_max_memory"] = str(int(options.bigMaxMemory)) if options.stats: config.attrib["stats"] = "" return config
def test_hello_world( self ): dir = os.path.dirname( os.path.abspath( __file__ ) ) subprocess.check_call( "python {dir}/helloWorld.py " "--batchSystem=mesos " "--logLevel={logLevel}".format( dir=dir, logLevel=getLogLevelString( ) ), shell=True )
def test_hello_world(self): system([sys.executable, '-m', helloWorld.__name__, 'file:./toilTest', '--batchSystem', 'mesos', '--mesosMaster', 'localhost:5050', '--logLevel', getLogLevelString()])
def __init__(self): #Core options self.jobStore = os.path.abspath("./toil") self.logLevel = getLogLevelString() self.workDir = None self.stats = False self.clean = "never" #Restarting the workflow options self.restart = False #Batch system options self.batchSystem = "singleMachine" self.scale = 1 self.masterIP = '127.0.0.1:5050' self.parasolCommand = "parasol" #Resource requirements self.defaultMemory = 2147483648 self.defaultCores = 1 self.defaultDisk = 2147483648 self.maxCores = sys.maxint self.maxMemory = sys.maxint self.maxDisk = sys.maxint #Retrying/rescuing jobs self.retryCount = 0 self.maxJobDuration = sys.maxint self.rescueJobsFrequency = 3600 #Misc self.maxLogFileSize=50120 self.sseKey = None
def test_hello_world(self): system( [ sys.executable, "-m", helloWorld.__name__, "./toilTest", "--batchSystem=mesos", "--logLevel", getLogLevelString(), ] )
def toilSortTest(self, testNo=1, batchSystem="singleMachine", jobStore='file', lines=10000, maxLineLength=10, N=10000): """ Tests toil by sorting a file in parallel. """ for test in xrange(testNo): options = Job.Runner.getDefaultOptions() #toil if jobStore == 'file': options.toil = self.toilDir else: options.toil = jobStore # Specify options options.logLevel = getLogLevelString() options.retryCount = 32 options.batchSystem = batchSystem #Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, maxLineLength=maxLineLength) # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Run the toil firstJob = Job.wrapJobFn( setup, tempSortFile, N, memory=5000, ) i = Job.Runner.startToil(firstJob, options) #There should be no failed jobs self.assertEquals(i, 0) # Now check the file is properly sorted.. # Now get the sorted file with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() checkEqual(l, l2)
def __init__(self): # Core options self.workflowID = None """This attribute uniquely identifies the job store and therefore the workflow. It is necessary in order to distinguish between two consequitive workflows for which self.jobStore is the same, e.g. when a job store name is reused after a previous run has finished sucessfully and its job store has been clean up.""" self.jobStore = os.path.abspath("./toil") self.logLevel = getLogLevelString() self.workDir = None self.stats = False # Because the stats option needs the jobStore to persist past the end of the run, # the clean default value depends the specified stats option and is determined in setOptions self.clean = None self.cleanWorkDir = None #Restarting the workflow options self.restart = False #Batch system options self.batchSystem = "singleMachine" self.scale = 1 self.mesosMasterAddress = 'localhost:5050' self.parasolCommand = "parasol" self.parasolMaxBatches = 10000 self.environment = {} #Resource requirements self.defaultMemory = 2147483648 self.defaultCores = 1 self.defaultDisk = 2147483648 self.defaultCache = self.defaultDisk self.maxCores = sys.maxint self.maxMemory = sys.maxint self.maxDisk = sys.maxint #Retrying/rescuing jobs self.retryCount = 0 self.maxJobDuration = sys.maxint self.rescueJobsFrequency = 3600 #Misc self.maxLogFileSize=50120 self.sseKey = None self.cseKey = None self.servicePollingInterval = 60 #Debug options self.badWorker = 0.0 self.badWorkerFailInterval = 0.01
def __init__(self): # Core options self.workflowID = None """This attribute uniquely identifies the job store and therefore the workflow. It is necessary in order to distinguish between two consequitive workflows for which self.jobStore is the same, e.g. when a job store name is reused after a previous run has finished sucessfully and its job store has been clean up.""" self.jobStore = os.path.abspath("./toil") self.logLevel = getLogLevelString() self.workDir = None self.stats = False # Because the stats option needs the jobStore to persist past the end of the run, # the clean default value depends the specified stats option and is determined in setOptions self.clean = None #Restarting the workflow options self.restart = False #Batch system options self.batchSystem = "singleMachine" self.scale = 1 self.mesosMasterAddress = 'localhost:5050' self.parasolCommand = "parasol" self.parasolMaxBatches = 10000 self.environment = {} #Resource requirements self.defaultMemory = 2147483648 self.defaultCores = 1 self.defaultDisk = 2147483648 self.defaultCache = self.defaultDisk self.maxCores = sys.maxint self.maxMemory = sys.maxint self.maxDisk = sys.maxint #Retrying/rescuing jobs self.retryCount = 0 self.maxJobDuration = sys.maxint self.rescueJobsFrequency = 3600 #Misc self.maxLogFileSize = 50120 self.sseKey = None self.cseKey = None #Debug options self.badWorker = 0.0 self.badWorkerFailInterval = 0.01
def __init__(self): # Core options self.jobStore = os.path.abspath("./toil") self.logLevel = getLogLevelString() self.workDir = None self.stats = False # Because the stats option needs the jobStore to persist past the end of the run, # the clean default value depends the specified stats option and is determined in setOptions self.clean = None # Restarting the workflow options self.restart = False # Batch system options self.batchSystem = "singleMachine" self.scale = 1 self.mesosMasterAddress = "localhost:5050" self.parasolCommand = "parasol" self.parasolMaxBatches = 10000 self.environment = {} # Resource requirements self.defaultMemory = 2147483648 self.defaultCores = 1 self.defaultDisk = 2147483648 self.defaultCache = self.defaultDisk self.maxCores = sys.maxint self.maxMemory = sys.maxint self.maxDisk = sys.maxint # Retrying/rescuing jobs self.retryCount = 0 self.maxJobDuration = sys.maxint self.rescueJobsFrequency = 3600 # Misc self.maxLogFileSize = 50120 self.sseKey = None self.cseKey = None # Debug options self.badWorker = 0.0 self.badWorkerFailInterval = 0.01
def toilSortTest(self, testNo=1, batchSystem="singleMachine", jobStore='file', lines=10000, maxLineLength=10, N=10000): """ Tests toil by sorting a file in parallel. """ for test in xrange(testNo): options = Job.Runner.getDefaultOptions() #toil if jobStore == 'file': options.toil = self.toilDir else: options.toil = jobStore # Specify options options.logLevel = getLogLevelString() options.retryCount = 32 options.batchSystem = batchSystem #Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, maxLineLength=maxLineLength) # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Run the toil firstJob = Job.wrapJobFn(setup, tempSortFile, N, memory=5000, ) i = Job.Runner.startToil(firstJob, options) #There should be no failed jobs self.assertEquals(i, 0) # Now check the file is properly sorted.. # Now get the sorted file with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() checkEqual(l, l2)
def _toilSort(self, jobStore, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStore: a job store string :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in xrange(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStore) options.logLevel = getLogLevelString() options.retryCount = 2 options.batchSystem = batchSystem options.clean = "never" options.badWorker = 0.5 options.badWorkerFailInterval = 0.05 # Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, lineLen=lineLen) # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Make the first job firstJob = Job.wrapJobFn(setup, tempSortFile, N, memory=sortMemory) # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True try: Job.Runner.startToil(firstJob, options) self.fail() except JobStoreCreationException: pass options.restart = False # Now actually run the workflow try: Job.Runner.startToil(firstJob, options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store try: Job.Runner.startToil(firstJob, options) self.fail() except JobStoreCreationException: pass options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: Job.Runner.startToil(firstJob, options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 16: #p(fail after this many restarts) = 0.5**32 self.fail() #Exceeded a reasonable number of restarts totalTrys += 1 # Now check that if you try to restart from here it will raise an exception # indicating that there are no jobs remaining in the workflow. try: Job.Runner.startToil(firstJob, options) self.fail() except JobException: pass # Now check the file is properly sorted.. with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) finally: subprocess.check_call([resolveEntryPoint('toil'), 'clean', jobStore])
def test_hello_world(self): system([sys.executable, '-m', helloWorld.__name__, './toilTest', '--batchSystem=mesos', '--logLevel', getLogLevelString()])
def _toilSort(self, jobStoreLocator, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen, retryCount=2, badWorker=0.5, downCheckpoints=False, disableCaching=False): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStoreLocator: The location of the job store. :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in xrange(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStoreLocator) options.logLevel = getLogLevelString() options.retryCount = retryCount options.batchSystem = batchSystem options.clean = "never" options.badWorker = badWorker options.badWorkerFailInterval = 0.05 options.disableCaching = disableCaching # FIXME maybe this line should be deleted options.downCheckpoints = downCheckpoints options.N = N # Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, lineLen=lineLen) options.fileToSort = tempSortFile # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True with self.assertRaises(NoSuchJobStoreException): main(options) options.restart = False # Now actually run the workflow try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store with self.assertRaises(JobStoreExistsException): main(options) options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 32: # p(fail after this many restarts) = 0.5**32 self.fail('Exceeded a reasonable number of restarts') totalTrys += 1 # Now check that if you try to restart from here it will raise an exception # indicating that there are no jobs remaining in the workflow. with self.assertRaises(JobException): main(options) # Now check the file is properly sorted.. with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) finally: subprocess.check_call([resolveEntryPoint('toil'), 'clean', jobStoreLocator])
def __init__(self): # Core options self.workflowID = None """This attribute uniquely identifies the job store and therefore the workflow. It is necessary in order to distinguish between two consequitive workflows for which self.jobStore is the same, e.g. when a job store name is reused after a previous run has finished sucessfully and its job store has been clean up.""" self.workflowAttemptNumber = None self.jobStore = None self.logLevel = getLogLevelString() self.workDir = None self.stats = False # Because the stats option needs the jobStore to persist past the end of the run, # the clean default value depends the specified stats option and is determined in setOptions self.clean = None self.cleanWorkDir = None #Restarting the workflow options self.restart = False #Batch system options self.batchSystem = "singleMachine" self.scale = 1 self.mesosMasterAddress = 'localhost:5050' self.parasolCommand = "parasol" self.parasolMaxBatches = 10000 self.environment = {} #Autoscaling options self.provisioner = None self.nodeType = None self.nodeOptions = None self.minNodes = 0 self.maxNodes = 10 self.preemptableNodeType = None self.preemptableNodeOptions = None self.minPreemptableNodes = 0 self.maxPreemptableNodes = 0 self.alphaPacking = 0.8 self.betaInertia = 1.2 self.scaleInterval = 10 self.preemptableCompensation = 0.0 #Resource requirements self.defaultMemory = 2147483648 self.defaultCores = 1 self.defaultDisk = 2147483648 self.readGlobalFileMutableByDefault = False self.defaultPreemptable = False self.maxCores = sys.maxint self.maxMemory = sys.maxint self.maxDisk = sys.maxint #Retrying/rescuing jobs self.retryCount = 0 self.maxJobDuration = sys.maxint self.rescueJobsFrequency = 3600 #Misc self.disableCaching = False self.maxLogFileSize = 50120 self.sseKey = None self.cseKey = None self.servicePollingInterval = 60 self.useAsync = True #Debug options self.badWorker = 0.0 self.badWorkerFailInterval = 0.01
def __init__(self): # Core options self.workflowID = None """This attribute uniquely identifies the job store and therefore the workflow. It is necessary in order to distinguish between two consequitive workflows for which self.jobStore is the same, e.g. when a job store name is reused after a previous run has finished sucessfully and its job store has been clean up.""" self.workflowAttemptNumber = None self.jobStore = None self.logLevel = getLogLevelString() self.workDir = None self.stats = False # Because the stats option needs the jobStore to persist past the end of the run, # the clean default value depends the specified stats option and is determined in setOptions self.clean = None self.cleanWorkDir = None #Restarting the workflow options self.restart = False #Batch system options self.batchSystem = "singleMachine" self.scale = 1 self.mesosMasterAddress = 'localhost:5050' self.parasolCommand = "parasol" self.parasolMaxBatches = 10000 self.environment = {} #Autoscaling options self.provisioner = None self.nodeType = None self.nodeOptions = None self.minNodes = 0 self.maxNodes = 10 self.preemptableNodeType = None self.preemptableNodeOptions = None self.minPreemptableNodes = 0 self.maxPreemptableNodes = 0 self.alphaPacking = 0.8 self.betaInertia = 1.2 self.scaleInterval = 10 self.preemptableCompensation = 0.0 # Parameters to limit service jobs, so preventing deadlock scheduling scenarios self.maxPreemptableServiceJobs = sys.maxint self.maxServiceJobs = sys.maxint self.deadlockWait = 60 # Wait one minute before declaring a deadlock #Resource requirements self.defaultMemory = 2147483648 self.defaultCores = 1 self.defaultDisk = 2147483648 self.readGlobalFileMutableByDefault = False self.defaultPreemptable = False self.maxCores = sys.maxint self.maxMemory = sys.maxint self.maxDisk = sys.maxint #Retrying/rescuing jobs self.retryCount = 0 self.maxJobDuration = sys.maxint self.rescueJobsFrequency = 3600 #Misc self.disableCaching = False self.maxLogFileSize=50120 self.sseKey = None self.cseKey = None self.servicePollingInterval = 60 self.useAsync = True #Debug options self.badWorker = 0.0 self.badWorkerFailInterval = 0.01
def _toilSort(self, jobStoreLocator, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen, retryCount=2, badWorker=0.5, downCheckpoints=False, disableCaching=False): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStoreLocator: The location of the job store. :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in range(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStoreLocator) options.logLevel = getLogLevelString() options.retryCount = retryCount options.batchSystem = batchSystem options.clean = "never" options.badWorker = badWorker options.badWorkerFailInterval = 0.05 options.disableCaching = disableCaching # This is required because mesosMasterAddress now defaults to the IP of the machine # that is starting the workflow while the mesos *tests* run locally. if batchSystem == 'mesos': options.mesosMasterAddress = 'localhost:5050' options.downCheckpoints = downCheckpoints options.N = N options.outputFile = self.outputFile options.fileToSort = self.inputFile options.overwriteOutput = True options.realTimeLogging = True # Make the file to sort makeFileToSort(options.fileToSort, lines=lines, lineLen=lineLen) # First make our own sorted version with open(options.fileToSort, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True with self.assertRaises(NoSuchJobStoreException): with runMain(options): # Now check the file is properly sorted.. with open(options.outputFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) options.restart = False # Now actually run the workflow try: with runMain(options): pass i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store with self.assertRaises(JobStoreExistsException): with runMain(options): pass options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: with runMain(options): pass i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 32: # p(fail after this many restarts) = 0.5**32 self.fail( 'Exceeded a reasonable number of restarts') totalTrys += 1 finally: subprocess.check_call( [resolveEntryPoint('toil'), 'clean', jobStoreLocator]) # final test to make sure the jobStore was actually deleted self.assertRaises(NoSuchJobStoreException, Toil.resumeJobStore, jobStoreLocator)
def _toilSort(self, jobStore, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStore: a job store string :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in xrange(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStore) options.logLevel = getLogLevelString() options.retryCount = 2 options.batchSystem = batchSystem options.clean = "never" options.badWorker = 0.5 options.badWorkerFailInterval = 0.05 # Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, lineLen=lineLen) # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Make the first job firstJob = Job.wrapJobFn(setup, tempSortFile, N, memory=sortMemory) # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True try: Job.Runner.startToil(firstJob, options) self.fail() except JobStoreCreationException: pass options.restart = False # Now actually run the workflow try: Job.Runner.startToil(firstJob, options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store try: Job.Runner.startToil(firstJob, options) self.fail() except JobStoreCreationException: pass options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: Job.Runner.startToil(firstJob, options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 16: #p(fail after this many restarts) = 0.5**32 self.fail( ) #Exceeded a reasonable number of restarts totalTrys += 1 # Now check that if you try to restart from here it will raise an exception # indicating that there are no jobs remaining in the workflow. try: Job.Runner.startToil(firstJob, options) self.fail() except JobException: pass # Now check the file is properly sorted.. with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) finally: subprocess.check_call( [resolveEntryPoint('toil'), 'clean', jobStore])
def toilSortTest(self, jobStore, batchSystem, lines, N, testNo=1, maxLineLength=10): """ Tests toil by sorting a file in parallel. """ for test in xrange(testNo): # Specify options options = Job.Runner.getDefaultOptions() options.jobStore = jobStore options.logLevel = getLogLevelString() options.retryCount = 2 options.batchSystem = batchSystem # Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, maxLineLength=maxLineLength) # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Make the first job firstJob = Job.wrapJobFn(setup, tempSortFile, N, memory=5000) # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True try: Job.Runner.startToil(firstJob, options) self.fail() except JobStoreCreationException: pass options.restart = False # Now actually run the workflow try: Job.Runner.startToil(firstJob, options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing job store try: Job.Runner.startToil(firstJob, options) self.fail() except JobStoreCreationException: pass options.restart = True # This loop tests the restart behavior while i != 0: options.useExistingOptions = random.random() > 0.5 try: Job.Runner.startToil(firstJob, options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Now check that if you try to restart from here it will raise an exception # indicating that there are no jobs remaining in the workflow. try: Job.Runner.startToil(firstJob, options) self.fail() except JobException: pass # self.assertTrue(e.message.endswith('left in toil workflow (workflow has finished successfully?)')) # Now check the file is properly sorted.. with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() checkEqual(l, l2)
def _toilSort(self, jobStoreLocator, batchSystem, lines=defaultLines, N=defaultN, testNo=1, lineLen=defaultLineLen, retryCount=2, badWorker=0.5, downCheckpoints=False, disableCaching=False): """ Generate a file consisting of the given number of random lines, each line of the given length. Sort the file with Toil by splitting the file recursively until each part is less than the given number of bytes, sorting each part and merging them back together. Then verify the result. :param jobStoreLocator: The location of the job store. :param batchSystem: the name of the batch system :param lines: the number of random lines to generate :param N: the size in bytes of each split :param testNo: the number of repeats of this test :param lineLen: the length of each random line in the file """ for test in xrange(testNo): try: # Specify options options = Job.Runner.getDefaultOptions(jobStoreLocator) options.logLevel = getLogLevelString() options.retryCount = retryCount options.batchSystem = batchSystem options.clean = "never" options.badWorker = badWorker options.badWorkerFailInterval = 0.05 options.disableCaching = disableCaching # FIXME maybe this line should be deleted options.downCheckpoints = downCheckpoints options.N = N # Make the file to sort tempSortFile = os.path.join(self.tempDir, "fileToSort.txt") makeFileToSort(tempSortFile, lines=lines, lineLen=lineLen) options.fileToSort = tempSortFile # First make our own sorted version with open(tempSortFile, 'r') as fileHandle: l = fileHandle.readlines() l.sort() # Check we get an exception if we try to restart a workflow that doesn't exist options.restart = True with self.assertRaises(NoSuchJobStoreException): main(options) options.restart = False # Now actually run the workflow try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs # Check we get an exception if we try to run without restart on an existing store with self.assertRaises(JobStoreExistsException): main(options) options.restart = True # This loop tests the restart behavior totalTrys = 1 while i != 0: options.useExistingOptions = random.random() > 0.5 try: main(options) i = 0 except FailedJobsException as e: i = e.numberOfFailedJobs if totalTrys > 32: # p(fail after this many restarts) = 0.5**32 self.fail( 'Exceeded a reasonable number of restarts') totalTrys += 1 # Now check that if you try to restart from here it will raise an exception # indicating that there are no jobs remaining in the workflow. with self.assertRaises(JobException): main(options) # Now check the file is properly sorted.. with open(tempSortFile, 'r') as fileHandle: l2 = fileHandle.readlines() self.assertEquals(l, l2) finally: subprocess.check_call( [resolveEntryPoint('toil'), 'clean', jobStoreLocator])