def testPreemptableDeficitIsSet(self): """ Make sure that updateClusterSize sets the preemptable deficit if it can't launch preemptable nodes properly. That way, the deficit can be communicated to the next run of estimateNodeCount. """ # Mock out addNodes. We want to pretend it had trouble # launching all 5 nodes, and could only launch 3. self.provisioner.addNodes = MagicMock(return_value=3) # Pretend there are no nodes in the cluster right now self.provisioner.getProvisionedWorkers = MagicMock(return_value=[]) # In this case, we want to explicitly set up the config so # that we can have preemptable and non-preemptable nodes of # the same type. That is the only situation where # preemptableCompensation applies. self.config.nodeTypes = ['c4.8xlarge:0.6', 'c4.8xlarge'] self.provisioner.nodeTypes = ['c4.8xlarge', 'c4.8xlarge'] self.provisioner.nodeShapes = [c4_8xlarge_preemptable, c4_8xlarge] scaler = ClusterScaler(self.provisioner, self.leader, self.config) estimatedNodeCounts = {c4_8xlarge_preemptable: 5, c4_8xlarge: 0} scaler.updateClusterSize(estimatedNodeCounts) self.assertEqual(scaler.preemptableNodeDeficit['c4.8xlarge'], 2) self.provisioner.addNodes.assert_called_once() # OK, now pretend this is a while later, and actually launched # the nodes properly. The deficit should disappear self.provisioner.addNodes = MagicMock(return_value=5) scaler.updateClusterSize(estimatedNodeCounts) self.assertEqual(scaler.preemptableNodeDeficit['c4.8xlarge'], 0)
def testNoLaunchingIfDeltaAlreadyMet(self): """ Check that the scaler doesn't try to launch "0" more instances if the delta was able to be met by unignoring nodes. """ # We have only one node type for simplicity self.provisioner.nodeTypes = ['c4.8xlarge'] self.provisioner.nodeShapes = [c4_8xlarge] scaler = ClusterScaler(self.provisioner, self.leader, self.config) # Pretend there is one ignored worker in the cluster self.provisioner.getProvisionedWorkers = MagicMock(return_value=[ Node('127.0.0.1', '127.0.0.1', 'testNode', datetime.datetime.now().isoformat(), nodeType='c4.8xlarge', preemptable=True) ]) scaler.ignoredNodes.add('127.0.0.1') # Exercise the updateClusterSize logic self.provisioner.addNodes = MagicMock() scaler.updateClusterSize({c4_8xlarge: 1}) self.assertFalse(self.provisioner.addNodes.called, "addNodes was called when no new nodes were needed") self.assertEqual( len(scaler.ignoredNodes), 0, "The scaler didn't unignore an ignored node when " "scaling up")
def testRounding(self): """ Test to make sure the ClusterScaler's rounding rounds properly. """ # Get a ClusterScaler self.config.targetTime = 1 self.config.betaInertia = 0.0 self.config.maxNodes = [2, 3] scaler = ClusterScaler(self.provisioner, self.leader, self.config) # Exact integers round to themselves self.assertEqual(scaler._round(0.0), 0) self.assertEqual(scaler._round(1.0), 1) self.assertEqual(scaler._round(-1.0), -1) self.assertEqual(scaler._round(123456789101112.13), 123456789101112) # Decimals other than X.5 round to the side they are closer to self.assertEqual(scaler._round(1E-10), 0) self.assertEqual(scaler._round(0.5 + 1E-15), 1) self.assertEqual(scaler._round(-0.9), -1) self.assertEqual(scaler._round(-0.4), 0) # Decimals at exactly X.5 round away from 0 self.assertEqual(scaler._round(0.5), 1) self.assertEqual(scaler._round(-0.5), -1) self.assertEqual(scaler._round(2.5), 3) self.assertEqual(scaler._round(-2.5), -3) self.assertEqual(scaler._round(15.5), 16) self.assertEqual(scaler._round(-15.5), -16) self.assertEqual(scaler._round(123456789101112.5), 123456789101113)
def testMaxNodes(self): """ Set the scaler to be very aggressive, give it a ton of jobs, and make sure it doesn't go over maxNodes. """ self.config.targetTime = 1 self.config.betaInertia = 0.0 self.config.maxNodes = [2, 3] scaler = ClusterScaler(self.provisioner, self.leader, self.config) jobShapes = [ Shape(wallTime=3600, cores=2, memory=h2b('1G'), disk=h2b('2G'), preemptable=True) ] * 1000 jobShapes.extend([ Shape(wallTime=3600, cores=2, memory=h2b('1G'), disk=h2b('2G'), preemptable=False) ] * 1000) estimatedNodeCounts = scaler.getEstimatedNodeCounts( jobShapes, defaultdict(int)) self.assertEqual(estimatedNodeCounts[r3_8xlarge], 2) self.assertEqual(estimatedNodeCounts[c4_8xlarge_preemptable], 3)
def _testClusterScaling(self, config, numJobs, numPreemptableJobs): """ Test the ClusterScaler class with different patterns of job creation. Tests ascertain that autoscaling occurs and that all the jobs are run. """ # First do simple test of creating 100 preemptable and non-premptable jobs and check the # jobs are completed okay, then print the amount of worker time expended and the total # number of worker nodes used. logger.info("Creating dummy batch system and scalar") mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0) clusterScaler = ClusterScaler(mock, mock, config) # Add 100 jobs to complete logger.info("Creating test jobs") map(lambda x: mock.addJob(), range(numJobs)) map(lambda x: mock.addJob(preemptable=True), range(numPreemptableJobs)) # Add some completed jobs for preemptable in (True, False): if preemptable and numPreemptableJobs > 0 or not preemptable and numJobs > 0: # Add a 1000 random jobs for i in xrange(1000): x = mock.getNodeShape(preemptable) iJ = IssuedJob(1, memory=random.choice(range(1, x.memory)), cores=random.choice(range(1, x.cores)), disk=random.choice(range(1, x.disk)), preemptable=preemptable) clusterScaler.addCompletedJob( iJ, random.choice(range(1, x.wallTime))) logger.info("Waiting for jobs to be processed") startTime = time.time() # Wait while the cluster the process chunks through the jobs while (mock.getNumberOfJobsIssued(preemptable=False) > 0 or mock.getNumberOfJobsIssued(preemptable=True) > 0 or mock.getNumberOfNodes() > 0 or mock.getNumberOfNodes(preemptable=True) > 0): logger.info( "Running, non-preemptable queue size: %s, non-preemptable workers: %s, " "preemptable queue size: %s, preemptable workers: %s", mock.getNumberOfJobsIssued(preemptable=False), mock.getNumberOfNodes(preemptable=False), mock.getNumberOfJobsIssued(preemptable=True), mock.getNumberOfNodes(preemptable=True)) time.sleep(0.5) logger.info("We waited %s for cluster to finish" % (time.time() - startTime)) clusterScaler.shutdown() # Print some info about the autoscaling for i, bs in enumerate(mock.delegates): preemptable = bool(i) logger.info("Preemptable: %s, Total-jobs: %s: Max-workers: %s," " Total-worker-time: %s, Worker-time-per-job: %s" % (preemptable, bs.totalJobs, bs.maxWorkers, bs.totalWorkerTime, bs.totalWorkerTime / bs.totalJobs if bs.totalJobs > 0 else 0.0))
def testMinNodes(self): """ Without any jobs queued, the scaler should still estimate "minNodes" nodes. """ self.config.betaInertia = 0.0 self.config.minNodes = [2, 3] scaler = ClusterScaler(self.provisioner, self.leader, self.config) jobShapes = [] estimatedNodeCounts = scaler.getEstimatedNodeCounts(jobShapes, defaultdict(int)) self.assertEqual(estimatedNodeCounts[r3_8xlarge], 2) self.assertEqual(estimatedNodeCounts[c4_8xlarge_preemptable], 3)
def testBetaInertia(self): # This is really high, but makes things easy to calculate. self.config.betaInertia = 0.5 scaler = ClusterScaler(self.provisioner, self.leader, self.config) # OK, smoothing things this much should get us 50% of the way to 100. self.assertEqual(scaler.smoothEstimate(c4_8xlarge_preemptable, 100), 50) # Now we should be at 75%. self.assertEqual(scaler.smoothEstimate(c4_8xlarge_preemptable, 100), 75) # We should eventually converge on our estimate as long as betaInertia is below 1. for _ in range(1000): scaler.smoothEstimate(c4_8xlarge_preemptable, 100) self.assertEqual(scaler.smoothEstimate(c4_8xlarge_preemptable, 100), 100)
def testPreemptableDeficitResponse(self): """ When a preemptable deficit was detected by a previous run of the loop, the scaler should add non-preemptable nodes to compensate in proportion to preemptableCompensation. """ self.config.targetTime = 1 self.config.betaInertia = 0.0 self.config.maxNodes = [10, 10] # This should mean that one non-preemptable node is launched # for every two preemptable nodes "missing". self.config.preemptableCompensation = 0.5 # In this case, we want to explicitly set up the config so # that we can have preemptable and non-preemptable nodes of # the same type. That is the only situation where # preemptableCompensation applies. self.config.nodeTypes = [c4_8xlarge_preemptable, c4_8xlarge] self.provisioner.setAutoscaledNodeTypes([ ({t}, None) for t in self.config.nodeTypes ]) scaler = ClusterScaler(self.provisioner, self.leader, self.config) # Simulate a situation where a previous run caused a # "deficit" of 5 preemptable nodes (e.g. a spot bid was lost) scaler.preemptableNodeDeficit[c4_8xlarge] = 5 # Add a bunch of preemptable jobs (so the bin-packing # estimate for the non-preemptable node should still be 0) jobShapes = [ Shape(wallTime=3600, cores=2, memory=h2b('1G'), disk=h2b('2G'), preemptable=True) ] * 1000 estimatedNodeCounts = scaler.getEstimatedNodeCounts( jobShapes, defaultdict(int)) # We don't care about the estimated size of the preemptable # nodes. All we want to know is if we responded to the deficit # properly: 0.5 * 5 (preemptableCompensation * the deficit) = 3 (rounded up). self.assertEqual( estimatedNodeCounts[self.provisioner.node_shapes_for_testing[1]], 3)
def mainLoop(config, batchSystem, provisioner, jobStore, rootJobWrapper, jobCache=None): """ This is the main loop from which jobs are issued and processed. If jobCache is passed, it must be a dict from job ID to pre-existing JobWrapper objects. Jobs will be loaded from the cache (which can be downloaded from the jobStore in a batch). :raises: toil.leader.FailedJobsException if at the end of function their remain \ failed jobs :return: The return value of the root job's run function. :rtype: Any """ # Get a snap shot of the current state of the jobs in the jobStore toilState = ToilState(jobStore, rootJobWrapper, jobCache=jobCache) # Create a service manager to start and terminate services try: serviceManager = ServiceManager(jobStore) assert len(batchSystem.getIssuedBatchJobIDs() ) == 0 #Batch system must start with no active jobs! logger.info( "Checked batch system has no running jobs and no updated jobs") # Load the jobBatcher class - used to track jobs submitted to the batch-system jobBatcher = JobBatcher(config, batchSystem, jobStore, toilState, serviceManager) logger.info( "Found %s jobs to start and %i jobs with successors to run", len(toilState.updatedJobs), len(toilState.successorCounts)) try: # Start the stats/logging aggregation process statsAndLogging = StatsAndLogging(jobStore) try: # Create cluster scaling processes if the provisioner is not None if provisioner is None: clusterScaler = None else: clusterScaler = ClusterScaler(provisioner, jobBatcher, config) jobBatcher.clusterScaler = clusterScaler innerLoop(jobStore, config, batchSystem, toilState, jobBatcher, serviceManager, statsAndLogging) finally: if provisioner is not None: logger.info('Waiting for workers to shutdown') startTime = time.time() clusterScaler.shutdown() logger.info('Worker shutdown complete in %s seconds', time.time() - startTime) finally: # Shutdown the stats and logging process statsAndLogging.shutdown() finally: serviceManager.shutdown() # Filter the failed jobs toilState.totalFailedJobs = set( filter(jobStore.exists, toilState.totalFailedJobs)) logger.info("Finished toil run %s" % ("successfully" if len(toilState.totalFailedJobs) == 0 else ("with %s failed jobs" % len(toilState.totalFailedJobs)))) if len(toilState.totalFailedJobs): logger.info("Failed jobs at end of the run: %s", toilState.totalFailedJobs) # Cleanup if len(toilState.totalFailedJobs) > 0: raise FailedJobsException(config.jobStore, len(toilState.totalFailedJobs)) # Parse out the return value from the root job with jobStore.readSharedFileStream("rootJobReturnValue") as jobStoreFileID: with jobStore.readFileStream(jobStoreFileID.read()) as fH: try: return cPickle.load(fH) # rootJobReturnValue except EOFError: logger.exception("Failed to unpickle root job return value") raise FailedJobsException(jobStoreFileID, toilState.totalFailedJobs)
def _testClusterScaling(self, config, numJobs, numPreemptableJobs, jobShape): """ Test the ClusterScaler class with different patterns of job creation. Tests ascertain that autoscaling occurs and that all the jobs are run. """ # First do simple test of creating 100 preemptable and non-premptable jobs and check the # jobs are completed okay, then print the amount of worker time expended and the total # number of worker nodes used. logger.info("Creating dummy batch system and scalar") mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0) mock.start() clusterScaler = ClusterScaler(mock, mock, config) clusterScaler.start() try: # Add 100 jobs to complete logger.info("Creating test jobs") list( map(lambda x: mock.addJob(jobShape=jobShape), list(range(numJobs)))) list( map(lambda x: mock.addJob(jobShape=jobShape, preemptable=True), list(range(numPreemptableJobs)))) # Add some completed jobs for preemptable in (True, False): if preemptable and numPreemptableJobs > 0 or not preemptable and numJobs > 0: # Add a 1000 random jobs for i in range(1000): x = mock.getNodeShape(nodeType=jobShape) iJ = JobNode( jobStoreID=1, requirements=dict( memory=random.choice(list(range(1, x.memory))), cores=random.choice(list(range(1, x.cores))), disk=random.choice(list(range(1, x.disk))), preemptable=preemptable), command=None, jobName='testClusterScaling', unitName='') clusterScaler.addCompletedJob( iJ, random.choice(list(range(1, x.wallTime)))) logger.info("Waiting for jobs to be processed") startTime = time.time() # Wait while the cluster the process chunks through the jobs while (mock.getNumberOfJobsIssued(preemptable=False) > 0 or mock.getNumberOfJobsIssued(preemptable=True) > 0 or mock.getNumberOfNodes() > 0 or mock.getNumberOfNodes(preemptable=True) > 0): logger.info( "Running, non-preemptable queue size: %s, non-preemptable workers: %s, " "preemptable queue size: %s, preemptable workers: %s" % (mock.getNumberOfJobsIssued(preemptable=False), mock.getNumberOfNodes(preemptable=False), mock.getNumberOfJobsIssued(preemptable=True), mock.getNumberOfNodes(preemptable=True))) clusterScaler.check() time.sleep(0.5) logger.info("We waited %s for cluster to finish" % (time.time() - startTime)) finally: clusterScaler.shutdown() mock.shutDown() # Print some info about the autoscaling logger.info("Total-jobs: %s: Max-workers: %s," " Total-worker-time: %s, Worker-time-per-job: %s" % (mock.totalJobs, sum( mock.maxWorkers.values()), mock.totalWorkerTime, old_div(mock.totalWorkerTime, mock.totalJobs) if mock.totalJobs > 0 else 0.0))
def testClusterScalingMultipleNodeTypes(self): smallNode = Shape(20, 5, 10, 10, False) mediumNode = Shape(20, 10, 10, 10, False) largeNode = Shape(20, 20, 10, 10, False) numJobs = 100 config = Config() # Make defaults dummy values config.defaultMemory = 1 config.defaultCores = 1 config.defaultDisk = 1 # No preemptable nodes/jobs config.preemptableNodeTypes = [] config.minPreemptableNodes = [] config.maxPreemptableNodes = [] # No preemptable nodes #Make sure the node types don't have to be ordered config.nodeTypes = [largeNode, smallNode, mediumNode] config.minNodes = [0, 0, 0] config.maxNodes = [10, 10] # test expansion of this list # Algorithm parameters config.alphaPacking = 0.8 config.betaInertia = 1.2 config.scaleInterval = 3 mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0) clusterScaler = ClusterScaler(mock, mock, config) clusterScaler.start() mock.start() try: #Add small jobs list( map(lambda x: mock.addJob(jobShape=smallNode), list(range(numJobs)))) list( map(lambda x: mock.addJob(jobShape=mediumNode), list(range(numJobs)))) #Add medium completed jobs for i in range(1000): iJ = JobNode(jobStoreID=1, requirements=dict(memory=random.choice( range(smallNode.memory, mediumNode.memory)), cores=mediumNode.cores, disk=largeNode.cores, preemptable=False), command=None, jobName='testClusterScaling', unitName='') clusterScaler.addCompletedJob(iJ, random.choice(range(1, 10))) while mock.getNumberOfJobsIssued() > 0 or mock.getNumberOfNodes( ) > 0: logger.info("%i nodes currently provisioned" % mock.getNumberOfNodes()) #Make sure there are no large nodes self.assertEqual(mock.getNumberOfNodes(nodeType=largeNode), 0) clusterScaler.check() time.sleep(0.5) finally: clusterScaler.shutdown() mock.shutDown() #Make sure jobs ran on both the small and medium node types self.assertTrue(mock.totalJobs > 0) self.assertTrue(mock.maxWorkers[smallNode] > 0) self.assertTrue(mock.maxWorkers[mediumNode] > 0) self.assertEqual(mock.maxWorkers[largeNode], 0)
def __init__(self, config, batchSystem, provisioner, jobStore, rootJob, jobCache=None): """ :param toil.common.Config config: :param toil.batchSystems.abstractBatchSystem.AbstractBatchSystem batchSystem: :param toil.provisioners.abstractProvisioner.AbstractProvisioner provisioner :param toil.jobStores.abstractJobStore.AbstractJobStore jobStore: :param toil.jobGraph.JobGraph rootJob If jobCache is passed, it must be a dict from job ID to pre-existing JobGraph objects. Jobs will be loaded from the cache (which can be downloaded from the jobStore in a batch) during the construction of the ToilState object. """ # Object containing parameters for the run self.config = config # The job store self.jobStore = jobStore self.jobStoreLocator = config.jobStore # Get a snap shot of the current state of the jobs in the jobStore self.toilState = ToilState(jobStore, rootJob, jobCache=jobCache) logger.info("Found %s jobs to start and %i jobs with successors to run", len(self.toilState.updatedJobs), len(self.toilState.successorCounts)) # Batch system self.batchSystem = batchSystem assert len(self.batchSystem.getIssuedBatchJobIDs()) == 0 #Batch system must start with no active jobs! logger.info("Checked batch system has no running jobs and no updated jobs") # Map of batch system IDs to IsseudJob tuples self.jobBatchSystemIDToIssuedJob = {} # Number of preempetable jobs currently being run by batch system self.preemptableJobsIssued = 0 # Tracking the number service jobs issued, # this is used limit the number of services issued to the batch system self.serviceJobsIssued = 0 self.serviceJobsToBeIssued = [] # A queue of service jobs that await scheduling #Equivalents for service jobs to be run on preemptable nodes self.preemptableServiceJobsIssued = 0 self.preemptableServiceJobsToBeIssued = [] # Hash to store number of times a job is lost by the batch system, # used to decide if to reissue an apparently missing job self.reissueMissingJobs_missingHash = {} # Class used to create/destroy nodes in the cluster, may be None if # using a statically defined cluster self.provisioner = provisioner # Create cluster scaling thread if the provisioner is not None self.clusterScaler = None if self.provisioner is None else ClusterScaler(self.provisioner, self, self.config) # A service manager thread to start and terminate services self.serviceManager = ServiceManager(jobStore, self.toilState) # A thread to manage the aggregation of statistics and logging from the run self.statsAndLogging = StatsAndLogging(self.jobStore, self.config) # Set used to monitor deadlocked jobs self.potentialDeadlockedJobs = set() self.potentialDeadlockTime = 0