Example #1
0
 def testNoLaunchingIfDeltaAlreadyMet(self):
     """
     Check that the scaler doesn't try to launch "0" more instances if
     the delta was able to be met by unignoring nodes.
     """
     # We have only one node type for simplicity
     self.provisioner.nodeTypes = ['c4.8xlarge']
     self.provisioner.nodeShapes = [c4_8xlarge]
     scaler = ClusterScaler(self.provisioner, self.leader, self.config)
     # Pretend there is one ignored worker in the cluster
     self.provisioner.getProvisionedWorkers = MagicMock(return_value=[
         Node('127.0.0.1',
              '127.0.0.1',
              'testNode',
              datetime.datetime.now().isoformat(),
              nodeType='c4.8xlarge',
              preemptable=True)
     ])
     scaler.ignoredNodes.add('127.0.0.1')
     # Exercise the updateClusterSize logic
     self.provisioner.addNodes = MagicMock()
     scaler.updateClusterSize({c4_8xlarge: 1})
     self.assertFalse(self.provisioner.addNodes.called,
                      "addNodes was called when no new nodes were needed")
     self.assertEqual(
         len(scaler.ignoredNodes), 0,
         "The scaler didn't unignore an ignored node when "
         "scaling up")
Example #2
0
 def testMaxNodes(self):
     """
     Set the scaler to be very aggressive, give it a ton of jobs, and
     make sure it doesn't go over maxNodes.
     """
     self.config.targetTime = 1
     self.config.betaInertia = 0.0
     self.config.maxNodes = [2, 3]
     scaler = ClusterScaler(self.provisioner, self.leader, self.config)
     jobShapes = [
         Shape(wallTime=3600,
               cores=2,
               memory=h2b('1G'),
               disk=h2b('2G'),
               preemptable=True)
     ] * 1000
     jobShapes.extend([
         Shape(wallTime=3600,
               cores=2,
               memory=h2b('1G'),
               disk=h2b('2G'),
               preemptable=False)
     ] * 1000)
     estimatedNodeCounts = scaler.getEstimatedNodeCounts(
         jobShapes, defaultdict(int))
     self.assertEqual(estimatedNodeCounts[r3_8xlarge], 2)
     self.assertEqual(estimatedNodeCounts[c4_8xlarge_preemptable], 3)
Example #3
0
    def _testClusterScaling(self, config, numJobs, numPreemptableJobs):
        """
        Test the ClusterScaler class with different patterns of job creation. Tests ascertain
        that autoscaling occurs and that all the jobs are run.
        """
        # First do simple test of creating 100 preemptable and non-premptable jobs and check the
        # jobs are completed okay, then print the amount of worker time expended and the total
        # number of worker nodes used.

        logger.info("Creating dummy batch system and scalar")

        mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0)
        clusterScaler = ClusterScaler(mock, mock, config)

        # Add 100 jobs to complete 
        logger.info("Creating test jobs")
        map(lambda x: mock.addJob(), range(numJobs))
        map(lambda x: mock.addJob(preemptable=True), range(numPreemptableJobs))

        # Add some completed jobs
        for preemptable in (True, False):
            if preemptable and numPreemptableJobs > 0 or not preemptable and numJobs > 0:
                # Add a 1000 random jobs
                for i in xrange(1000):
                    x = mock.getNodeShape(preemptable)
                    iJ = IssuedJob(1, memory=random.choice(range(1, x.memory)),
                                   cores=random.choice(range(1, x.cores)),
                                   disk=random.choice(range(1, x.disk)),
                                   preemptable=preemptable)
                    clusterScaler.addCompletedJob(iJ, random.choice(range(1, x.wallTime)))

        logger.info("Waiting for jobs to be processed")
        startTime = time.time()
        # Wait while the cluster the process chunks through the jobs
        while (mock.getNumberOfJobsIssued(preemptable=False) > 0 or
                       mock.getNumberOfJobsIssued(preemptable=True) > 0 or
                       mock.getNumberOfNodes() > 0 or mock.getNumberOfNodes(
            preemptable=True) > 0):
            logger.info("Running, non-preemptable queue size: %s, non-preemptable workers: %s, "
                        "preemptable queue size: %s, preemptable workers: %s",
                        mock.getNumberOfJobsIssued(preemptable=False),
                        mock.getNumberOfNodes(preemptable=False),
                        mock.getNumberOfJobsIssued(preemptable=True),
                        mock.getNumberOfNodes(preemptable=True))
            time.sleep(0.5)
        logger.info("We waited %s for cluster to finish" % (time.time() - startTime))
        clusterScaler.shutdown()

        # Print some info about the autoscaling
        for i, bs in enumerate(mock.delegates):
            preemptable = bool(i)
            logger.info("Preemptable: %s, Total-jobs: %s: Max-workers: %s,"
                        " Total-worker-time: %s, Worker-time-per-job: %s" %
                        (preemptable, bs.totalJobs, bs.maxWorkers,
                         bs.totalWorkerTime,
                         bs.totalWorkerTime / bs.totalJobs if bs.totalJobs > 0 else 0.0))
Example #4
0
 def testMinNodes(self):
     """
     Without any jobs queued, the scaler should still estimate "minNodes" nodes.
     """
     self.config.betaInertia = 0.0
     self.config.minNodes = [2, 3]
     scaler = ClusterScaler(self.provisioner, self.leader, self.config)
     jobShapes = []
     estimatedNodeCounts = scaler.getEstimatedNodeCounts(jobShapes, defaultdict(int))
     self.assertEqual(estimatedNodeCounts[r3_8xlarge], 2)
     self.assertEqual(estimatedNodeCounts[c4_8xlarge_preemptable], 3)
Example #5
0
    def testPreemptableDeficitResponse(self):
        """
        When a preemptable deficit was detected by a previous run of the
        loop, the scaler should add non-preemptable nodes to
        compensate in proportion to preemptableCompensation.
        """
        self.config.targetTime = 1
        self.config.betaInertia = 0.0
        self.config.maxNodes = [10, 10]
        # This should mean that one non-preemptable node is launched
        # for every two preemptable nodes "missing".
        self.config.preemptableCompensation = 0.5
        # In this case, we want to explicitly set up the config so
        # that we can have preemptable and non-preemptable nodes of
        # the same type. That is the only situation where
        # preemptableCompensation applies.
        self.config.nodeTypes = [c4_8xlarge_preemptable, c4_8xlarge]
        self.provisioner.setAutoscaledNodeTypes([
            ({t}, None) for t in self.config.nodeTypes
        ])

        scaler = ClusterScaler(self.provisioner, self.leader, self.config)
        # Simulate a situation where a previous run caused a
        # "deficit" of 5 preemptable nodes (e.g. a spot bid was lost)
        scaler.preemptableNodeDeficit[c4_8xlarge] = 5
        # Add a bunch of preemptable jobs (so the bin-packing
        # estimate for the non-preemptable node should still be 0)
        jobShapes = [
            Shape(wallTime=3600,
                  cores=2,
                  memory=h2b('1G'),
                  disk=h2b('2G'),
                  preemptable=True)
        ] * 1000
        estimatedNodeCounts = scaler.getEstimatedNodeCounts(
            jobShapes, defaultdict(int))
        # We don't care about the estimated size of the preemptable
        # nodes. All we want to know is if we responded to the deficit
        # properly: 0.5 * 5 (preemptableCompensation * the deficit) = 3 (rounded up).
        self.assertEqual(
            estimatedNodeCounts[self.provisioner.node_shapes_for_testing[1]],
            3)
Example #6
0
    def testPreemptableDeficitIsSet(self):
        """
        Make sure that updateClusterSize sets the preemptable deficit if
        it can't launch preemptable nodes properly. That way, the
        deficit can be communicated to the next run of
        estimateNodeCount.
        """
        # Mock out addNodes. We want to pretend it had trouble
        # launching all 5 nodes, and could only launch 3.
        self.provisioner.addNodes = MagicMock(return_value=3)
        # Pretend there are no nodes in the cluster right now
        self.provisioner.getProvisionedWorkers = MagicMock(return_value=[])
        # In this case, we want to explicitly set up the config so
        # that we can have preemptable and non-preemptable nodes of
        # the same type. That is the only situation where
        # preemptableCompensation applies.
        self.config.nodeTypes = ['c4.8xlarge:0.6', 'c4.8xlarge']
        self.provisioner.nodeTypes = ['c4.8xlarge', 'c4.8xlarge']
        self.provisioner.nodeShapes = [c4_8xlarge_preemptable, c4_8xlarge]
        scaler = ClusterScaler(self.provisioner, self.leader, self.config)
        estimatedNodeCounts = {c4_8xlarge_preemptable: 5, c4_8xlarge: 0}
        scaler.updateClusterSize(estimatedNodeCounts)
        self.assertEqual(scaler.preemptableNodeDeficit['c4.8xlarge'], 2)
        self.provisioner.addNodes.assert_called_once()

        # OK, now pretend this is a while later, and actually launched
        # the nodes properly. The deficit should disappear
        self.provisioner.addNodes = MagicMock(return_value=5)
        scaler.updateClusterSize(estimatedNodeCounts)
        self.assertEqual(scaler.preemptableNodeDeficit['c4.8xlarge'], 0)
Example #7
0
    def _testClusterScaling(self, config, numJobs, numPreemptableJobs):
        """
        Test the ClusterScaler class with different patterns of job creation. Tests ascertain
        that autoscaling occurs and that all the jobs are run.
        """
        # First do simple test of creating 100 preemptable and non-premptable jobs and check the
        # jobs are completed okay, then print the amount of worker time expended and the total
        # number of worker nodes used.

        logger.info("Creating dummy batch system and scalar")

        mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0)
        clusterScaler = ClusterScaler(mock, mock, config)

        # Add 100 jobs to complete
        logger.info("Creating test jobs")
        map(lambda x: mock.addJob(), range(numJobs))
        map(lambda x: mock.addJob(preemptable=True), range(numPreemptableJobs))

        # Add some completed jobs
        for preemptable in (True, False):
            if preemptable and numPreemptableJobs > 0 or not preemptable and numJobs > 0:
                # Add a 1000 random jobs
                for i in xrange(1000):
                    x = mock.getNodeShape(preemptable)
                    iJ = IssuedJob(1,
                                   memory=random.choice(range(1, x.memory)),
                                   cores=random.choice(range(1, x.cores)),
                                   disk=random.choice(range(1, x.disk)),
                                   preemptable=preemptable)
                    clusterScaler.addCompletedJob(
                        iJ, random.choice(range(1, x.wallTime)))

        logger.info("Waiting for jobs to be processed")
        startTime = time.time()
        # Wait while the cluster the process chunks through the jobs
        while (mock.getNumberOfJobsIssued(preemptable=False) > 0
               or mock.getNumberOfJobsIssued(preemptable=True) > 0
               or mock.getNumberOfNodes() > 0
               or mock.getNumberOfNodes(preemptable=True) > 0):
            logger.info(
                "Running, non-preemptable queue size: %s, non-preemptable workers: %s, "
                "preemptable queue size: %s, preemptable workers: %s",
                mock.getNumberOfJobsIssued(preemptable=False),
                mock.getNumberOfNodes(preemptable=False),
                mock.getNumberOfJobsIssued(preemptable=True),
                mock.getNumberOfNodes(preemptable=True))
            time.sleep(0.5)
        logger.info("We waited %s for cluster to finish" %
                    (time.time() - startTime))
        clusterScaler.shutdown()

        # Print some info about the autoscaling
        for i, bs in enumerate(mock.delegates):
            preemptable = bool(i)
            logger.info("Preemptable: %s, Total-jobs: %s: Max-workers: %s,"
                        " Total-worker-time: %s, Worker-time-per-job: %s" %
                        (preemptable, bs.totalJobs, bs.maxWorkers,
                         bs.totalWorkerTime, bs.totalWorkerTime /
                         bs.totalJobs if bs.totalJobs > 0 else 0.0))
Example #8
0
 def testBetaInertia(self):
     # This is really high, but makes things easy to calculate.
     self.config.betaInertia = 0.5
     scaler = ClusterScaler(self.provisioner, self.leader, self.config)
     # OK, smoothing things this much should get us 50% of the way to 100.
     self.assertEqual(scaler.smoothEstimate(c4_8xlarge_preemptable, 100), 50)
     # Now we should be at 75%.
     self.assertEqual(scaler.smoothEstimate(c4_8xlarge_preemptable, 100), 75)
     # We should eventually converge on our estimate as long as betaInertia is below 1.
     for _ in range(1000):
         scaler.smoothEstimate(c4_8xlarge_preemptable, 100)
     self.assertEqual(scaler.smoothEstimate(c4_8xlarge_preemptable, 100), 100)
Example #9
0
def mainLoop(config,
             batchSystem,
             provisioner,
             jobStore,
             rootJobWrapper,
             jobCache=None):
    """
    This is the main loop from which jobs are issued and processed.
    
    If jobCache is passed, it must be a dict from job ID to pre-existing
    JobWrapper objects. Jobs will be loaded from the cache (which can be
    downloaded from the jobStore in a batch).

    :raises: toil.leader.FailedJobsException if at the end of function their remain \
    failed jobs
    
    :return: The return value of the root job's run function.
    :rtype: Any
    """

    # Get a snap shot of the current state of the jobs in the jobStore
    toilState = ToilState(jobStore, rootJobWrapper, jobCache=jobCache)

    # Create a service manager to start and terminate services
    try:
        serviceManager = ServiceManager(jobStore)

        assert len(batchSystem.getIssuedBatchJobIDs()
                   ) == 0  #Batch system must start with no active jobs!
        logger.info(
            "Checked batch system has no running jobs and no updated jobs")

        # Load the jobBatcher class - used to track jobs submitted to the batch-system
        jobBatcher = JobBatcher(config, batchSystem, jobStore, toilState,
                                serviceManager)
        logger.info(
            "Found %s jobs to start and %i jobs with successors to run",
            len(toilState.updatedJobs), len(toilState.successorCounts))

        try:
            # Start the stats/logging aggregation process
            statsAndLogging = StatsAndLogging(jobStore)

            try:
                # Create cluster scaling processes if the provisioner is not None
                if provisioner is None:
                    clusterScaler = None
                else:
                    clusterScaler = ClusterScaler(provisioner, jobBatcher,
                                                  config)
                    jobBatcher.clusterScaler = clusterScaler
                innerLoop(jobStore, config, batchSystem, toilState, jobBatcher,
                          serviceManager, statsAndLogging)
            finally:
                if provisioner is not None:
                    logger.info('Waiting for workers to shutdown')
                    startTime = time.time()
                    clusterScaler.shutdown()
                    logger.info('Worker shutdown complete in %s seconds',
                                time.time() - startTime)
        finally:
            # Shutdown the stats and logging process
            statsAndLogging.shutdown()
    finally:
        serviceManager.shutdown()

    # Filter the failed jobs
    toilState.totalFailedJobs = set(
        filter(jobStore.exists, toilState.totalFailedJobs))

    logger.info("Finished toil run %s" %
                ("successfully" if len(toilState.totalFailedJobs) == 0 else
                 ("with %s failed jobs" % len(toilState.totalFailedJobs))))
    if len(toilState.totalFailedJobs):
        logger.info("Failed jobs at end of the run: %s",
                    toilState.totalFailedJobs)

    # Cleanup
    if len(toilState.totalFailedJobs) > 0:
        raise FailedJobsException(config.jobStore,
                                  len(toilState.totalFailedJobs))

    # Parse out the return value from the root job
    with jobStore.readSharedFileStream("rootJobReturnValue") as jobStoreFileID:
        with jobStore.readFileStream(jobStoreFileID.read()) as fH:
            try:
                return cPickle.load(fH)  # rootJobReturnValue
            except EOFError:
                logger.exception("Failed to unpickle root job return value")
                raise FailedJobsException(jobStoreFileID,
                                          toilState.totalFailedJobs)
Example #10
0
    def testRounding(self):
        """
        Test to make sure the ClusterScaler's rounding rounds properly.
        """

        # Get a ClusterScaler
        self.config.targetTime = 1
        self.config.betaInertia = 0.0
        self.config.maxNodes = [2, 3]
        scaler = ClusterScaler(self.provisioner, self.leader, self.config)

        # Exact integers round to themselves
        self.assertEqual(scaler._round(0.0), 0)
        self.assertEqual(scaler._round(1.0), 1)
        self.assertEqual(scaler._round(-1.0), -1)
        self.assertEqual(scaler._round(123456789101112.13), 123456789101112)

        # Decimals other than X.5 round to the side they are closer to
        self.assertEqual(scaler._round(1E-10), 0)
        self.assertEqual(scaler._round(0.5 + 1E-15), 1)
        self.assertEqual(scaler._round(-0.9), -1)
        self.assertEqual(scaler._round(-0.4), 0)

        # Decimals at exactly X.5 round away from 0
        self.assertEqual(scaler._round(0.5), 1)
        self.assertEqual(scaler._round(-0.5), -1)
        self.assertEqual(scaler._round(2.5), 3)
        self.assertEqual(scaler._round(-2.5), -3)
        self.assertEqual(scaler._round(15.5), 16)
        self.assertEqual(scaler._round(-15.5), -16)
        self.assertEqual(scaler._round(123456789101112.5), 123456789101113)
Example #11
0
    def _testClusterScaling(self, config, numJobs, numPreemptableJobs,
                            jobShape):
        """
        Test the ClusterScaler class with different patterns of job creation. Tests ascertain
        that autoscaling occurs and that all the jobs are run.
        """
        # First do simple test of creating 100 preemptable and non-premptable jobs and check the
        # jobs are completed okay, then print the amount of worker time expended and the total
        # number of worker nodes used.

        logger.info("Creating dummy batch system and scalar")

        mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0)
        mock.start()
        clusterScaler = ClusterScaler(mock, mock, config)
        clusterScaler.start()
        try:
            # Add 100 jobs to complete
            logger.info("Creating test jobs")
            list(
                map(lambda x: mock.addJob(jobShape=jobShape),
                    list(range(numJobs))))
            list(
                map(lambda x: mock.addJob(jobShape=jobShape, preemptable=True),
                    list(range(numPreemptableJobs))))

            # Add some completed jobs
            for preemptable in (True, False):
                if preemptable and numPreemptableJobs > 0 or not preemptable and numJobs > 0:
                    # Add a 1000 random jobs
                    for i in range(1000):
                        x = mock.getNodeShape(nodeType=jobShape)
                        iJ = JobNode(
                            jobStoreID=1,
                            requirements=dict(
                                memory=random.choice(list(range(1, x.memory))),
                                cores=random.choice(list(range(1, x.cores))),
                                disk=random.choice(list(range(1, x.disk))),
                                preemptable=preemptable),
                            command=None,
                            jobName='testClusterScaling',
                            unitName='')
                        clusterScaler.addCompletedJob(
                            iJ, random.choice(list(range(1, x.wallTime))))

            logger.info("Waiting for jobs to be processed")
            startTime = time.time()
            # Wait while the cluster the process chunks through the jobs
            while (mock.getNumberOfJobsIssued(preemptable=False) > 0
                   or mock.getNumberOfJobsIssued(preemptable=True) > 0
                   or mock.getNumberOfNodes() > 0
                   or mock.getNumberOfNodes(preemptable=True) > 0):
                logger.info(
                    "Running, non-preemptable queue size: %s, non-preemptable workers: %s, "
                    "preemptable queue size: %s, preemptable workers: %s" %
                    (mock.getNumberOfJobsIssued(preemptable=False),
                     mock.getNumberOfNodes(preemptable=False),
                     mock.getNumberOfJobsIssued(preemptable=True),
                     mock.getNumberOfNodes(preemptable=True)))
                clusterScaler.check()
                time.sleep(0.5)
            logger.info("We waited %s for cluster to finish" %
                        (time.time() - startTime))
        finally:
            clusterScaler.shutdown()
            mock.shutDown()

        # Print some info about the autoscaling
        logger.info("Total-jobs: %s: Max-workers: %s,"
                    " Total-worker-time: %s, Worker-time-per-job: %s" %
                    (mock.totalJobs, sum(
                        mock.maxWorkers.values()), mock.totalWorkerTime,
                     old_div(mock.totalWorkerTime, mock.totalJobs)
                     if mock.totalJobs > 0 else 0.0))
Example #12
0
    def testClusterScalingMultipleNodeTypes(self):

        smallNode = Shape(20, 5, 10, 10, False)
        mediumNode = Shape(20, 10, 10, 10, False)
        largeNode = Shape(20, 20, 10, 10, False)

        numJobs = 100

        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # No preemptable nodes/jobs
        config.preemptableNodeTypes = []
        config.minPreemptableNodes = []
        config.maxPreemptableNodes = []  # No preemptable nodes

        #Make sure the node types don't have to be ordered
        config.nodeTypes = [largeNode, smallNode, mediumNode]
        config.minNodes = [0, 0, 0]
        config.maxNodes = [10, 10]  # test expansion of this list

        # Algorithm parameters
        config.alphaPacking = 0.8
        config.betaInertia = 1.2
        config.scaleInterval = 3

        mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0)
        clusterScaler = ClusterScaler(mock, mock, config)
        clusterScaler.start()
        mock.start()

        try:
            #Add small jobs
            list(
                map(lambda x: mock.addJob(jobShape=smallNode),
                    list(range(numJobs))))
            list(
                map(lambda x: mock.addJob(jobShape=mediumNode),
                    list(range(numJobs))))

            #Add medium completed jobs
            for i in range(1000):
                iJ = JobNode(jobStoreID=1,
                             requirements=dict(memory=random.choice(
                                 range(smallNode.memory, mediumNode.memory)),
                                               cores=mediumNode.cores,
                                               disk=largeNode.cores,
                                               preemptable=False),
                             command=None,
                             jobName='testClusterScaling',
                             unitName='')
                clusterScaler.addCompletedJob(iJ, random.choice(range(1, 10)))

            while mock.getNumberOfJobsIssued() > 0 or mock.getNumberOfNodes(
            ) > 0:
                logger.info("%i nodes currently provisioned" %
                            mock.getNumberOfNodes())
                #Make sure there are no large nodes
                self.assertEqual(mock.getNumberOfNodes(nodeType=largeNode), 0)
                clusterScaler.check()
                time.sleep(0.5)
        finally:
            clusterScaler.shutdown()
            mock.shutDown()

        #Make sure jobs ran on both the small and medium node types
        self.assertTrue(mock.totalJobs > 0)
        self.assertTrue(mock.maxWorkers[smallNode] > 0)
        self.assertTrue(mock.maxWorkers[mediumNode] > 0)

        self.assertEqual(mock.maxWorkers[largeNode], 0)
Example #13
0
def mainLoop(config, batchSystem, provisioner, jobStore, rootJobWrapper, jobCache=None):
    """
    This is the main loop from which jobs are issued and processed.
    
    If jobCache is passed, it must be a dict from job ID to pre-existing
    JobWrapper objects. Jobs will be loaded from the cache (which can be
    downloaded from the jobStore in a batch).

    :raises: toil.leader.FailedJobsException if at the end of function their remain \
    failed jobs
    
    :return: The return value of the root job's run function.
    :rtype: Any
    """

    # Get a snap shot of the current state of the jobs in the jobStore
    toilState = ToilState(jobStore, rootJobWrapper, jobCache=jobCache)

    # Create a service manager to start and terminate services
    try:
        serviceManager = ServiceManager(jobStore)
    
        assert len(batchSystem.getIssuedBatchJobIDs()) == 0 #Batch system must start with no active jobs!
        logger.info("Checked batch system has no running jobs and no updated jobs")
    
        # Load the jobBatcher class - used to track jobs submitted to the batch-system
        jobBatcher = JobBatcher(config, batchSystem, jobStore, toilState, serviceManager)
        logger.info("Found %s jobs to start and %i jobs with successors to run",
                    len(toilState.updatedJobs), len(toilState.successorCounts))
    
        try:
            # Start the stats/logging aggregation process
            statsAndLogging = StatsAndLogging(jobStore)
            
            try:
                # Create cluster scaling processes if the provisioner is not None
                if provisioner is None:
                    clusterScaler = None
                else:
                    clusterScaler = ClusterScaler(provisioner, jobBatcher, config)
                    jobBatcher.clusterScaler = clusterScaler
                innerLoop(jobStore, config, batchSystem, toilState, jobBatcher, serviceManager, statsAndLogging)
            finally:
                if provisioner is not None:
                    logger.info('Waiting for workers to shutdown')
                    startTime = time.time()
                    clusterScaler.shutdown()
                    logger.info('Worker shutdown complete in %s seconds', time.time() - startTime)
        finally:
            # Shutdown the stats and logging process
            statsAndLogging.shutdown()
    finally:
        serviceManager.shutdown()


    # Filter the failed jobs
    toilState.totalFailedJobs = set(filter(jobStore.exists, toilState.totalFailedJobs))

    logger.info("Finished toil run %s" %
                 ("successfully" if len(toilState.totalFailedJobs) == 0 else ("with %s failed jobs" % len(toilState.totalFailedJobs))))
    if len(toilState.totalFailedJobs):
        logger.info("Failed jobs at end of the run: %s", toilState.totalFailedJobs)

    # Cleanup
    if len(toilState.totalFailedJobs) > 0:
        raise FailedJobsException( config.jobStore, len(toilState.totalFailedJobs) )

    # Parse out the return value from the root job
    with jobStore.readSharedFileStream("rootJobReturnValue") as jobStoreFileID:
        with jobStore.readFileStream(jobStoreFileID.read()) as fH:
            try:
                return cPickle.load(fH)  # rootJobReturnValue
            except EOFError:
                logger.exception("Failed to unpickle root job return value")
                raise FailedJobsException(jobStoreFileID, toilState.totalFailedJobs)
Example #14
0
    def __init__(self, config, batchSystem, provisioner, jobStore, rootJob, jobCache=None):
        """
        :param toil.common.Config config:
        :param toil.batchSystems.abstractBatchSystem.AbstractBatchSystem batchSystem:
        :param toil.provisioners.abstractProvisioner.AbstractProvisioner provisioner
        :param toil.jobStores.abstractJobStore.AbstractJobStore jobStore:
        :param toil.jobGraph.JobGraph rootJob

        If jobCache is passed, it must be a dict from job ID to pre-existing
        JobGraph objects. Jobs will be loaded from the cache (which can be
        downloaded from the jobStore in a batch) during the construction of the ToilState object.
        """
        # Object containing parameters for the run
        self.config = config

        # The job store
        self.jobStore = jobStore
        self.jobStoreLocator = config.jobStore

        # Get a snap shot of the current state of the jobs in the jobStore
        self.toilState = ToilState(jobStore, rootJob, jobCache=jobCache)
        logger.info("Found %s jobs to start and %i jobs with successors to run",
                        len(self.toilState.updatedJobs), len(self.toilState.successorCounts))

        # Batch system
        self.batchSystem = batchSystem
        assert len(self.batchSystem.getIssuedBatchJobIDs()) == 0 #Batch system must start with no active jobs!
        logger.info("Checked batch system has no running jobs and no updated jobs")

        # Map of batch system IDs to IsseudJob tuples
        self.jobBatchSystemIDToIssuedJob = {}

        # Number of preempetable jobs currently being run by batch system
        self.preemptableJobsIssued = 0

        # Tracking the number service jobs issued,
        # this is used limit the number of services issued to the batch system
        self.serviceJobsIssued = 0
        self.serviceJobsToBeIssued = [] # A queue of service jobs that await scheduling
        #Equivalents for service jobs to be run on preemptable nodes
        self.preemptableServiceJobsIssued = 0
        self.preemptableServiceJobsToBeIssued = []

        # Hash to store number of times a job is lost by the batch system,
        # used to decide if to reissue an apparently missing job
        self.reissueMissingJobs_missingHash = {}

        # Class used to create/destroy nodes in the cluster, may be None if
        # using a statically defined cluster
        self.provisioner = provisioner

        # Create cluster scaling thread if the provisioner is not None
        self.clusterScaler = None if self.provisioner is None else ClusterScaler(self.provisioner, self, self.config)

        # A service manager thread to start and terminate services
        self.serviceManager = ServiceManager(jobStore, self.toilState)

        # A thread to manage the aggregation of statistics and logging from the run
        self.statsAndLogging = StatsAndLogging(self.jobStore, self.config)

        # Set used to monitor deadlocked jobs
        self.potentialDeadlockedJobs = set()
        self.potentialDeadlockTime = 0
Example #15
0
class Leader(object):
    """ Class that encapsulates the logic of the leader.
    """
    def __init__(self, config, batchSystem, provisioner, jobStore, rootJob, jobCache=None):
        """
        :param toil.common.Config config:
        :param toil.batchSystems.abstractBatchSystem.AbstractBatchSystem batchSystem:
        :param toil.provisioners.abstractProvisioner.AbstractProvisioner provisioner
        :param toil.jobStores.abstractJobStore.AbstractJobStore jobStore:
        :param toil.jobGraph.JobGraph rootJob

        If jobCache is passed, it must be a dict from job ID to pre-existing
        JobGraph objects. Jobs will be loaded from the cache (which can be
        downloaded from the jobStore in a batch) during the construction of the ToilState object.
        """
        # Object containing parameters for the run
        self.config = config

        # The job store
        self.jobStore = jobStore
        self.jobStoreLocator = config.jobStore

        # Get a snap shot of the current state of the jobs in the jobStore
        self.toilState = ToilState(jobStore, rootJob, jobCache=jobCache)
        logger.info("Found %s jobs to start and %i jobs with successors to run",
                        len(self.toilState.updatedJobs), len(self.toilState.successorCounts))

        # Batch system
        self.batchSystem = batchSystem
        assert len(self.batchSystem.getIssuedBatchJobIDs()) == 0 #Batch system must start with no active jobs!
        logger.info("Checked batch system has no running jobs and no updated jobs")

        # Map of batch system IDs to IsseudJob tuples
        self.jobBatchSystemIDToIssuedJob = {}

        # Number of preempetable jobs currently being run by batch system
        self.preemptableJobsIssued = 0

        # Tracking the number service jobs issued,
        # this is used limit the number of services issued to the batch system
        self.serviceJobsIssued = 0
        self.serviceJobsToBeIssued = [] # A queue of service jobs that await scheduling
        #Equivalents for service jobs to be run on preemptable nodes
        self.preemptableServiceJobsIssued = 0
        self.preemptableServiceJobsToBeIssued = []

        # Timing of the jobGraph rescuing method
        self.timeSinceJobsLastRescued = None

        # Hash to store number of times a job is lost by the batch system,
        # used to decide if to reissue an apparently missing job
        self.reissueMissingJobs_missingHash = {}

        # Class used to create/destroy nodes in the cluster, may be None if
        # using a statically defined cluster
        self.provisioner = provisioner

        # Create cluster scaling thread if the provisioner is not None
        self.clusterScaler = None
        if self.provisioner is not None and len(self.provisioner.nodeTypes) > 0:
            self.clusterScaler = ClusterScaler(self.provisioner, self, self.config)

        # A service manager thread to start and terminate services
        self.serviceManager = ServiceManager(jobStore, self.toilState)

        # A thread to manage the aggregation of statistics and logging from the run
        self.statsAndLogging = StatsAndLogging(self.jobStore, self.config)

        # Set used to monitor deadlocked jobs
        self.potentialDeadlockedJobs = set()
        self.potentialDeadlockTime = 0

        # A dashboard that runs on the leader node in AWS clusters to track the state
        # of the cluster
        self.toilMetrics = None

        # internal jobs we should not expose at top level debugging
        self.debugJobNames = ("CWLJob", "CWLWorkflow", "CWLScatter", "CWLGather",
                              "ResolveIndirect")

    def run(self):
        """
        This runs the leader process to issue and manage jobs.

        :raises: toil.leader.FailedJobsException if at the end of function their remain \
        failed jobs

        :return: The return value of the root job's run function.
        :rtype: Any
        """
        # Start the stats/logging aggregation thread
        self.statsAndLogging.start()
        if self.config.metrics:
            self.toilMetrics = ToilMetrics(provisioner=self.provisioner)

        try:

            # Start service manager thread
            self.serviceManager.start()
            try:

                # Create cluster scaling processes if not None
                if self.clusterScaler != None:
                    self.clusterScaler.start()

                try:
                    # Run the main loop
                    self.innerLoop()
                finally:
                    if self.clusterScaler is not None:
                        logger.info('Waiting for workers to shutdown')
                        startTime = time.time()
                        self.clusterScaler.shutdown()
                        logger.info('Worker shutdown complete in %s seconds', time.time() - startTime)

            finally:
                # Ensure service manager thread is properly shutdown
                self.serviceManager.shutdown()

        finally:
            # Ensure the stats and logging thread is properly shutdown
            self.statsAndLogging.shutdown()
            if self.toilMetrics:
                self.toilMetrics.shutdown()


        # Filter the failed jobs
        self.toilState.totalFailedJobs = [j for j in self.toilState.totalFailedJobs if self.jobStore.exists(j.jobStoreID)]

        logger.info("Finished toil run %s" %
                     ("successfully" if len(self.toilState.totalFailedJobs) == 0 else ("with %s failed jobs" % len(self.toilState.totalFailedJobs))))

        if len(self.toilState.totalFailedJobs):
            logger.info("Failed jobs at end of the run: %s", ' '.join(str(job) for job in self.toilState.totalFailedJobs))
        # Cleanup
        if len(self.toilState.totalFailedJobs) > 0:
            raise FailedJobsException(self.config.jobStore, self.toilState.totalFailedJobs, self.jobStore)


        return self.jobStore.getRootJobReturnValue()

    def _handledFailedSuccessor(self, jobNode, jobGraph, successorJobStoreID):
        """Deal with the successor having failed. Return True if there are
        still active successors. Return False if all successors have failed
        and the job is queued to run to handle the failed successors."""
        logger.debug("Successor job: %s of job: %s has failed """
                     "predecessors", jobNode, jobGraph)

        # Add the job to the set having failed successors
        self.toilState.hasFailedSuccessors.add(jobGraph.jobStoreID)

        # Reduce active successor count and remove the successor as an active successor of the job
        self.toilState.successorCounts[jobGraph.jobStoreID] -= 1
        assert self.toilState.successorCounts[jobGraph.jobStoreID] >= 0
        self.toilState.successorJobStoreIDToPredecessorJobs[successorJobStoreID].remove(jobGraph)
        if len(self.toilState.successorJobStoreIDToPredecessorJobs[successorJobStoreID]) == 0:
            self.toilState.successorJobStoreIDToPredecessorJobs.pop(successorJobStoreID)

        # If the job now has no active successors add to active jobs
        # so it can be processed as a job with failed successors
        if self.toilState.successorCounts[jobGraph.jobStoreID] == 0:
            logger.debug("Job: %s has no successors to run "
                         "and some are failed, adding to list of jobs "
                         "with failed successors", jobGraph)
            self.toilState.successorCounts.pop(jobGraph.jobStoreID)
            self.toilState.updatedJobs.add((jobGraph, 0))
            return False


    def _checkSuccssorReadyToRunMultiplePredecessors(self, jobGraph, jobNode, successorJobStoreID):
        """Handle the special cases of checking if a successor job is
        ready to run when there are multiple predecessors"""
        # See implementation note at the top of this file for discussion of multiple predecessors
        logger.debug("Successor job: %s of job: %s has multiple "
                     "predecessors", jobNode, jobGraph)

        # Get the successor job graph, which is caches
        if successorJobStoreID not in self.toilState.jobsToBeScheduledWithMultiplePredecessors:
            self.toilState.jobsToBeScheduledWithMultiplePredecessors[successorJobStoreID] = self.jobStore.load(successorJobStoreID)
        successorJobGraph = self.toilState.jobsToBeScheduledWithMultiplePredecessors[successorJobStoreID]

        # Add the jobGraph as a finished predecessor to the successor
        successorJobGraph.predecessorsFinished.add(jobGraph.jobStoreID)

        # If the successor is in the set of successors of failed jobs
        if successorJobStoreID in self.toilState.failedSuccessors:
            if not self._handledFailedSuccessor(jobNode, jobGraph, successorJobStoreID):
                return False

        # If the successor job's predecessors have all not all completed then
        # ignore the jobGraph as is not yet ready to run
        assert len(successorJobGraph.predecessorsFinished) <= successorJobGraph.predecessorNumber
        if len(successorJobGraph.predecessorsFinished) < successorJobGraph.predecessorNumber:
            return False
        else:
            # Remove the successor job from the cache
            self.toilState.jobsToBeScheduledWithMultiplePredecessors.pop(successorJobStoreID)
            return True

    def _makeJobSuccssorReadyToRun(self, jobGraph, jobNode):
        """make a successor job ready to run, returning False if they should
        not yet be run"""
        successorJobStoreID = jobNode.jobStoreID
        #Build map from successor to predecessors.
        if successorJobStoreID not in self.toilState.successorJobStoreIDToPredecessorJobs:
            self.toilState.successorJobStoreIDToPredecessorJobs[successorJobStoreID] = []
        self.toilState.successorJobStoreIDToPredecessorJobs[successorJobStoreID].append(jobGraph)

        if jobNode.predecessorNumber > 1:
            return self._checkSuccssorReadyToRunMultiplePredecessors(jobGraph, jobNode, successorJobStoreID)
        else:
            return True

    def _runJobSuccessors(self, jobGraph):
        assert len(jobGraph.stack[-1]) > 0
        logger.debug("Job: %s has %i successors to schedule",
                     jobGraph.jobStoreID, len(jobGraph.stack[-1]))
        #Record the number of successors that must be completed before
        #the jobGraph can be considered again
        assert jobGraph.jobStoreID not in self.toilState.successorCounts
        self.toilState.successorCounts[jobGraph.jobStoreID] = len(jobGraph.stack[-1])

        # For each successor schedule if all predecessors have been completed
        successors = []
        for jobNode in jobGraph.stack[-1]:
            if self._makeJobSuccssorReadyToRun(jobGraph, jobNode):
                successors.append(jobNode)
        self.issueJobs(successors)

    def _processFailedSuccessors(self, jobGraph):
        """Some of the jobs successors failed then either fail the job
        or restart it if it has retries left and is a checkpoint job"""

        if jobGraph.jobStoreID in self.toilState.servicesIssued:
            # The job has services running, signal for them to be killed
            # once they are killed then the jobGraph will be re-added to
            # the updatedJobs set and then scheduled to be removed
            logger.debug("Telling job: %s to terminate its services due to successor failure",
                         jobGraph.jobStoreID)
            self.serviceManager.killServices(self.toilState.servicesIssued[jobGraph.jobStoreID],
                                             error=True)
        elif jobGraph.jobStoreID in self.toilState.successorCounts:
            # The job has non-service jobs running wait for them to finish
            # the job will be re-added to the updated jobs when these jobs
            # are done
            logger.debug("Job %s with ID: %s with failed successors still has successor jobs running",
                         jobGraph, jobGraph.jobStoreID)
        elif jobGraph.checkpoint is not None and jobGraph.remainingRetryCount > 1:
            # If the job is a checkpoint and has remaining retries then reissue it.
            # The logic behind using > 1 rather than > 0 here: Since this job has
            # been tried once (without decreasing its retry count as the job
            # itself was successful), and its subtree failed, it shouldn't be retried
            # unless it has more than 1 try.
            logger.warn('Job: %s is being restarted as a checkpoint after the total '
                        'failure of jobs in its subtree.', jobGraph.jobStoreID)
            self.issueJob(JobNode.fromJobGraph(jobGraph))
        else:
            # Mark it totally failed
            logger.debug("Job %s is being processed as completely failed", jobGraph.jobStoreID)
            self.processTotallyFailedJob(jobGraph)

    def _processReadyJob(self, jobGraph, resultStatus):
        logger.debug('Updating status of job %s with ID %s: with result status: %s',
                     jobGraph, jobGraph.jobStoreID, resultStatus)

        if jobGraph in self.serviceManager.jobGraphsWithServicesBeingStarted:
            # This stops a job with services being issued by the serviceManager from
            # being considered further in this loop. This catch is necessary because
            # the job's service's can fail while being issued, causing the job to be
            # added to updated jobs.
            logger.debug("Got a job to update which is still owned by the service "
                         "manager: %s", jobGraph.jobStoreID)
        elif jobGraph.jobStoreID in self.toilState.hasFailedSuccessors:
            self._processFailedSuccessors(jobGraph)
        elif jobGraph.command is not None or resultStatus != 0:
            # The jobGraph has a command it must be run before any successors.
            # Similarly, if the job previously failed we rerun it, even if it doesn't have a
            # command to run, to eliminate any parts of the stack now completed.
            isServiceJob = jobGraph.jobStoreID in self.toilState.serviceJobStoreIDToPredecessorJob

            # If the job has run out of retries or is a service job whose error flag has
            # been indicated, fail the job.
            if (jobGraph.remainingRetryCount == 0
                or isServiceJob and not self.jobStore.fileExists(jobGraph.errorJobStoreID)):
                self.processTotallyFailedJob(jobGraph)
                logger.warn("Job %s with ID %s is completely failed",
                            jobGraph, jobGraph.jobStoreID)
            else:
                # Otherwise try the job again
                self.issueJob(JobNode.fromJobGraph(jobGraph))
        elif len(jobGraph.services) > 0:
            # the job has services to run, which have not been started, start them
            # Build a map from the service jobs to the job and a map
            # of the services created for the job
            assert jobGraph.jobStoreID not in self.toilState.servicesIssued
            self.toilState.servicesIssued[jobGraph.jobStoreID] = {}
            for serviceJobList in jobGraph.services:
                for serviceTuple in serviceJobList:
                    serviceID = serviceTuple.jobStoreID
                    assert serviceID not in self.toilState.serviceJobStoreIDToPredecessorJob
                    self.toilState.serviceJobStoreIDToPredecessorJob[serviceID] = jobGraph
                    self.toilState.servicesIssued[jobGraph.jobStoreID][serviceID] = serviceTuple

            # Use the service manager to start the services
            self.serviceManager.scheduleServices(jobGraph)

            logger.debug("Giving job: %s to service manager to schedule its jobs", jobGraph.jobStoreID)
        elif len(jobGraph.stack) > 0:
            # There are exist successors to run
            self._runJobSuccessors(jobGraph)
        elif jobGraph.jobStoreID in self.toilState.servicesIssued:
            logger.debug("Telling job: %s to terminate its services due to the "
                         "successful completion of its successor jobs",
                         jobGraph)
            self.serviceManager.killServices(self.toilState.servicesIssued[jobGraph.jobStoreID], error=False)
        else:
            #There are no remaining tasks to schedule within the jobGraph, but
            #we schedule it anyway to allow it to be deleted. Remove the job

            #TODO: An alternative would be simple delete it here and add it to the
            #list of jobs to process, or (better) to create an asynchronous
            #process that deletes jobs and then feeds them back into the set
            #of jobs to be processed
            if jobGraph.remainingRetryCount > 0:
                self.issueJob(JobNode.fromJobGraph(jobGraph))
                logger.debug("Job: %s is empty, we are scheduling to clean it up", jobGraph.jobStoreID)
            else:
                self.processTotallyFailedJob(jobGraph)
                logger.warn("Job: %s is empty but completely failed - something is very wrong", jobGraph.jobStoreID)

    def _processReadyJobs(self):
        """Process jobs that are ready to be scheduled/have successors to schedule"""
        logger.debug('Built the jobs list, currently have %i jobs to update and %i jobs issued',
                     len(self.toilState.updatedJobs), self.getNumberOfJobsIssued())

        updatedJobs = self.toilState.updatedJobs # The updated jobs to consider below
        self.toilState.updatedJobs = set() # Resetting the list for the next set

        for jobGraph, resultStatus in updatedJobs:
            self._processReadyJob(jobGraph, resultStatus)

    def _startServiceJobs(self):
        """Start any service jobs available from the service manager"""
        self.issueQueingServiceJobs()
        while True:
            serviceJob = self.serviceManager.getServiceJobsToStart(0)
            # Stop trying to get jobs when function returns None
            if serviceJob is None:
                break
            logger.debug('Launching service job: %s', serviceJob)
            self.issueServiceJob(serviceJob)

    def _processJobsWithRunningServices(self):
        """Get jobs whose services have started"""
        while True:
            jobGraph = self.serviceManager.getJobGraphWhoseServicesAreRunning(0)
            if jobGraph is None: # Stop trying to get jobs when function returns None
                break
            logger.debug('Job: %s has established its services.', jobGraph.jobStoreID)
            jobGraph.services = []
            self.toilState.updatedJobs.add((jobGraph, 0))

    def _gatherUpdatedJobs(self, updatedJobTuple):
        """Gather any new, updated jobGraph from the batch system"""
        jobID, result, wallTime = updatedJobTuple
        # easy, track different state
        try:
            updatedJob = self.jobBatchSystemIDToIssuedJob[jobID]
        except KeyError:
            logger.warn("A result seems to already have been processed "
                        "for job %s", jobID)
        else:
            if result == 0:
                cur_logger = (logger.debug if str(updatedJob.jobName).startswith(CWL_INTERNAL_JOBS)
                              else logger.info)
                cur_logger('Job ended successfully: %s', updatedJob)
                if self.toilMetrics:
                    self.toilMetrics.logCompletedJob(updatedJob)
            else:
                logger.warn('Job failed with exit value %i: %s',
                            result, updatedJob)
            self.processFinishedJob(jobID, result, wallTime=wallTime)

    def _processLostJobs(self):
        """Process jobs that have gone awry"""
        # In the case that there is nothing happening (no updated jobs to
        # gather for rescueJobsFrequency seconds) check if there are any jobs
        # that have run too long (see self.reissueOverLongJobs) or which have
        # gone missing from the batch system (see self.reissueMissingJobs)
        if ((time.time() - self.timeSinceJobsLastRescued) >= self.config.rescueJobsFrequency):
            # We only rescue jobs every N seconds, and when we have apparently
            # exhausted the current jobGraph supply
            self.reissueOverLongJobs()
            logger.info("Reissued any over long jobs")

            hasNoMissingJobs = self.reissueMissingJobs()
            if hasNoMissingJobs:
                self.timeSinceJobsLastRescued = time.time()
            else:
                # This means we'll try again in a minute, providing things are quiet
                self.timeSinceJobsLastRescued += 60
            logger.info("Rescued any (long) missing jobs")


    def innerLoop(self):
        """
        The main loop for processing jobs by the leader.
        """
        logger.info("Starting the main loop")
        self.timeSinceJobsLastRescued = time.time()

        while self._anythingLeftToDo():
            if len(self.toilState.updatedJobs) > 0:
                self._processReadyJobs()

            # deal with service-related jobs
            self._startServiceJobs()
            self._processJobsWithRunningServices()

            # check in with the batch system
            updatedJobTuple = self.batchSystem.getUpdatedBatchJob(2)
            if updatedJobTuple is not None:
                self._gatherUpdatedJobs(updatedJobTuple)
            else:
                self._processLostJobs()

            # Check on the associated threads and exit if a failure is detected
            self.statsAndLogging.check()
            self.serviceManager.check()
            # the cluster scaler object will only be instantiated if autoscaling is enabled
            if self.clusterScaler is not None:
                self.clusterScaler.check()

            # Check for deadlocks
            self.checkForDeadlocks()

        logger.info("Finished the main loop: no jobs left to run")

        # Consistency check the toil state
        assert self.toilState.updatedJobs == set()
        assert self.toilState.successorCounts == {}
        assert self.toilState.successorJobStoreIDToPredecessorJobs == {}
        assert self.toilState.serviceJobStoreIDToPredecessorJob == {}
        assert self.toilState.servicesIssued == {}
        # assert self.toilState.jobsToBeScheduledWithMultiplePredecessors # These are not properly emptied yet
        # assert self.toilState.hasFailedSuccessors == set() # These are not properly emptied yet

    def checkForDeadlocks(self):
        """
        Checks if the system is deadlocked running service jobs.
        """
        totalRunningJobs = len(self.batchSystem.getRunningBatchJobIDs())
        totalServicesIssued = self.serviceJobsIssued + self.preemptableServiceJobsIssued
        # If there are no updated jobs and at least some jobs running
        if totalServicesIssued >= totalRunningJobs and len(self.toilState.updatedJobs) == 0 and totalRunningJobs > 0:
            serviceJobs = [x for x in list(self.jobBatchSystemIDToIssuedJob.values()) if isinstance(x, ServiceJobNode)]
            runningServiceJobs = set([x for x in serviceJobs if self.serviceManager.isRunning(x)])
            assert len(runningServiceJobs) <= totalRunningJobs

            # If all the running jobs are active services then we have a potential deadlock
            if len(runningServiceJobs) == totalRunningJobs:
                # We wait self.config.deadlockWait seconds before declaring the system deadlocked
                if self.potentialDeadlockedJobs != runningServiceJobs:
                    self.potentialDeadlockedJobs = runningServiceJobs
                    self.potentialDeadlockTime = time.time()
                elif time.time() - self.potentialDeadlockTime >= self.config.deadlockWait:
                    raise DeadlockException("The system is service deadlocked - all %d running jobs are active services" % totalRunningJobs)
            else:
                # We have observed non-service jobs running, so reset the potential deadlock
                self.potentialDeadlockedJobs = set()
                self.potentialDeadlockTime = 0
        else:
            # We have observed non-service jobs running, so reset the potential deadlock
            self.potentialDeadlockedJobs = set()
            self.potentialDeadlockTime = 0

    def _anythingLeftToDo(self):
        return (len(self.toilState.updatedJobs) > 0) or (self.getNumberOfJobsIssued() > 0) or (self.serviceManager.jobsIssuedToServiceManager > 0)

    def issueJob(self, jobNode):
        """
        Add a job to the queue of jobs
        """
        jobNode.command = ' '.join((resolveEntryPoint('_toil_worker'),
                                    jobNode.jobName, self.jobStoreLocator, jobNode.jobStoreID))
        jobBatchSystemID = self.batchSystem.issueBatchJob(jobNode)
        self.jobBatchSystemIDToIssuedJob[jobBatchSystemID] = jobNode
        if jobNode.preemptable:
            # len(jobBatchSystemIDToIssuedJob) should always be greater than or equal to preemptableJobsIssued,
            # so increment this value after the job is added to the issuedJob dict
            self.preemptableJobsIssued += 1
        cur_logger = (logger.debug if jobNode.jobName.startswith(CWL_INTERNAL_JOBS)
                      else logger.info)
        cur_logger("Issued job %s with job batch system ID: "
                   "%s and cores: %s, disk: %s, and memory: %s",
                   jobNode, str(jobBatchSystemID), int(jobNode.cores),
                   bytes2human(jobNode.disk), bytes2human(jobNode.memory))
        if self.toilMetrics:
            self.toilMetrics.logIssuedJob(jobNode)
            self.toilMetrics.logQueueSize(self.getNumberOfJobsIssued())

    def issueJobs(self, jobs):
        """
        Add a list of jobs, each represented as a jobNode object
        """
        for job in jobs:
            self.issueJob(job)

    def issueServiceJob(self, jobNode):
        """
        Issue a service job, putting it on a queue if the maximum number of service
        jobs to be scheduled has been reached.
        """
        if jobNode.preemptable:
            self.preemptableServiceJobsToBeIssued.append(jobNode)
        else:
            self.serviceJobsToBeIssued.append(jobNode)
        self.issueQueingServiceJobs()

    def issueQueingServiceJobs(self):
        """
        Issues any queuing service jobs up to the limit of the maximum allowed.
        """
        while len(self.serviceJobsToBeIssued) > 0 and self.serviceJobsIssued < self.config.maxServiceJobs:
            self.issueJob(self.serviceJobsToBeIssued.pop())
            self.serviceJobsIssued += 1
        while len(self.preemptableServiceJobsToBeIssued) > 0 and self.preemptableServiceJobsIssued < self.config.maxPreemptableServiceJobs:
            self.issueJob(self.preemptableServiceJobsToBeIssued.pop())
            self.preemptableServiceJobsIssued += 1

    def getNumberOfJobsIssued(self, preemptable=None):
        """
        Gets number of jobs that have been added by issueJob(s) and not
        removed by removeJob

        :param None or boolean preemptable: If none, return all types of jobs.
          If true, return just the number of preemptable jobs. If false, return
          just the number of non-preemptable jobs.
        """
        if preemptable is None:
            return len(self.jobBatchSystemIDToIssuedJob)
        elif preemptable:
            return self.preemptableJobsIssued
        else:
            assert len(self.jobBatchSystemIDToIssuedJob) >= self.preemptableJobsIssued
            return len(self.jobBatchSystemIDToIssuedJob) - self.preemptableJobsIssued


    def getJobStoreID(self, jobBatchSystemID):
        """
        Gets the job file associated the a given id
        """
        return self.jobBatchSystemIDToIssuedJob[jobBatchSystemID].jobStoreID

    def removeJob(self, jobBatchSystemID):
        """
        Removes a job from the system.
        """
        assert jobBatchSystemID in self.jobBatchSystemIDToIssuedJob
        jobNode = self.jobBatchSystemIDToIssuedJob[jobBatchSystemID]
        if jobNode.preemptable:
            # len(jobBatchSystemIDToIssuedJob) should always be greater than or equal to preemptableJobsIssued,
            # so decrement this value before removing the job from the issuedJob map
            assert self.preemptableJobsIssued > 0
            self.preemptableJobsIssued -= 1
        del self.jobBatchSystemIDToIssuedJob[jobBatchSystemID]
        # If service job
        if jobNode.jobStoreID in self.toilState.serviceJobStoreIDToPredecessorJob:
            # Decrement the number of services
            if jobNode.preemptable:
                self.preemptableServiceJobsIssued -= 1
            else:
                self.serviceJobsIssued -= 1

        return jobNode

    def getJobs(self, preemptable=None):
        jobs = self.jobBatchSystemIDToIssuedJob.values()
        if preemptable is not None:
            jobs = [job for job in jobs if job.preemptable == preemptable]
        return jobs

    def getJobIDs(self):
        """
        Gets the set of jobs currently issued.
        """
        return list(self.jobBatchSystemIDToIssuedJob.keys())

    def killJobs(self, jobsToKill):
        """
        Kills the given set of jobs and then sends them for processing
        """
        if len(jobsToKill) > 0:
            self.batchSystem.killBatchJobs(jobsToKill)
            for jobBatchSystemID in jobsToKill:
                self.processFinishedJob(jobBatchSystemID, 1)

    #Following functions handle error cases for when jobs have gone awry with the batch system.

    def reissueOverLongJobs(self):
        """
        Check each issued job - if it is running for longer than desirable
        issue a kill instruction.
        Wait for the job to die then we pass the job to processFinishedJob.
        """
        maxJobDuration = self.config.maxJobDuration
        jobsToKill = []
        if maxJobDuration < 10000000:  # We won't bother doing anything if the rescue
            # time is more than 16 weeks.
            runningJobs = self.batchSystem.getRunningBatchJobIDs()
            for jobBatchSystemID in list(runningJobs.keys()):
                if runningJobs[jobBatchSystemID] > maxJobDuration:
                    logger.warn("The job: %s has been running for: %s seconds, more than the "
                                "max job duration: %s, we'll kill it",
                                str(self.getJobStoreID(jobBatchSystemID)),
                                str(runningJobs[jobBatchSystemID]),
                                str(maxJobDuration))
                    jobsToKill.append(jobBatchSystemID)
            self.killJobs(jobsToKill)

    def reissueMissingJobs(self, killAfterNTimesMissing=3):
        """
        Check all the current job ids are in the list of currently running batch system jobs.
        If a job is missing, we mark it as so, if it is missing for a number of runs of
        this function (say 10).. then we try deleting the job (though its probably lost), we wait
        then we pass the job to processFinishedJob.
        """
        runningJobs = set(self.batchSystem.getIssuedBatchJobIDs())
        jobBatchSystemIDsSet = set(self.getJobIDs())
        #Clean up the reissueMissingJobs_missingHash hash, getting rid of jobs that have turned up
        missingJobIDsSet = set(self.reissueMissingJobs_missingHash.keys())
        for jobBatchSystemID in missingJobIDsSet.difference(jobBatchSystemIDsSet):
            self.reissueMissingJobs_missingHash.pop(jobBatchSystemID)
            logger.warn("Batch system id: %s is no longer missing", str(jobBatchSystemID))
        assert runningJobs.issubset(jobBatchSystemIDsSet) #Assert checks we have
        #no unexpected jobs running
        jobsToKill = []
        for jobBatchSystemID in set(jobBatchSystemIDsSet.difference(runningJobs)):
            jobStoreID = self.getJobStoreID(jobBatchSystemID)
            if jobBatchSystemID in self.reissueMissingJobs_missingHash:
                self.reissueMissingJobs_missingHash[jobBatchSystemID] += 1
            else:
                self.reissueMissingJobs_missingHash[jobBatchSystemID] = 1
            timesMissing = self.reissueMissingJobs_missingHash[jobBatchSystemID]
            logger.warn("Job store ID %s with batch system id %s is missing for the %i time",
                        jobStoreID, str(jobBatchSystemID), timesMissing)
            if self.toilMetrics:
                self.toilMetrics.logMissingJob()
            if timesMissing == killAfterNTimesMissing:
                self.reissueMissingJobs_missingHash.pop(jobBatchSystemID)
                jobsToKill.append(jobBatchSystemID)
        self.killJobs(jobsToKill)
        return len( self.reissueMissingJobs_missingHash ) == 0 #We use this to inform
        #if there are missing jobs

    def processFinishedJob(self, batchSystemID, resultStatus, wallTime=None):
        """
        Function reads a processed jobGraph file and updates it state.
        """
        def processRemovedJob(issuedJob):
            if resultStatus != 0:
                logger.warn("Despite the batch system claiming failure the "
                            "job %s seems to have finished and been removed", issuedJob)
            self._updatePredecessorStatus(issuedJob.jobStoreID)
        jobNode = self.removeJob(batchSystemID)
        jobStoreID = jobNode.jobStoreID
        if wallTime is not None and self.clusterScaler is not None:
            self.clusterScaler.addCompletedJob(jobNode, wallTime)
        if self.jobStore.exists(jobStoreID):
            logger.debug("Job %s continues to exist (i.e. has more to do)", jobNode)
            try:
                jobGraph = self.jobStore.load(jobStoreID)
            except NoSuchJobException:
                # Avoid importing AWSJobStore as the corresponding extra might be missing
                if self.jobStore.__class__.__name__ == 'AWSJobStore':
                    # We have a ghost job - the job has been deleted but a stale read from
                    # SDB gave us a false positive when we checked for its existence.
                    # Process the job from here as any other job removed from the job store.
                    # This is a temporary work around until https://github.com/BD2KGenomics/toil/issues/1091
                    # is completed
                    logger.warn('Got a stale read from SDB for job %s', jobNode)
                    processRemovedJob(jobNode)
                    return
                else:
                    raise
            if jobGraph.logJobStoreFileID is not None:
                with jobGraph.getLogFileHandle( self.jobStore ) as logFileStream:
                    # more memory efficient than read().striplines() while leaving off the
                    # trailing \n left when using readlines()
                    # http://stackoverflow.com/a/15233739
                    StatsAndLogging.logWithFormatting(jobStoreID, logFileStream, method=logger.warn,
                                                      message='The job seems to have left a log file, indicating failure: %s' % jobGraph)
                if self.config.writeLogs or self.config.writeLogsGzip:
                    with jobGraph.getLogFileHandle(self.jobStore) as logFileStream:
                        StatsAndLogging.writeLogFiles(jobGraph.chainedJobs, logFileStream, self.config)
            if resultStatus != 0:
                # If the batch system returned a non-zero exit code then the worker
                # is assumed not to have captured the failure of the job, so we
                # reduce the retry count here.
                if jobGraph.logJobStoreFileID is None:
                    logger.warn("No log file is present, despite job failing: %s", jobNode)
                jobGraph.setupJobAfterFailure(self.config)
                self.jobStore.update(jobGraph)
            elif jobStoreID in self.toilState.hasFailedSuccessors:
                # If the job has completed okay, we can remove it from the list of jobs with failed successors
                self.toilState.hasFailedSuccessors.remove(jobStoreID)

            self.toilState.updatedJobs.add((jobGraph, resultStatus)) #Now we know the
            #jobGraph is done we can add it to the list of updated jobGraph files
            logger.debug("Added job: %s to active jobs", jobGraph)
        else:  #The jobGraph is done
            processRemovedJob(jobNode)

    @staticmethod
    def getSuccessors(jobGraph, alreadySeenSuccessors, jobStore):
        """
        Gets successors of the given job by walking the job graph recursively.
        Any successor in alreadySeenSuccessors is ignored and not traversed.
        Returns the set of found successors. This set is added to alreadySeenSuccessors.
        """
        successors = set()

        def successorRecursion(jobGraph):
            # For lists of successors
            for successorList in jobGraph.stack:

                # For each successor in list of successors
                for successorJobNode in successorList:

                    # Id of the successor
                    successorJobStoreID = successorJobNode.jobStoreID

                    # If successor not already visited
                    if successorJobStoreID not in alreadySeenSuccessors:

                        # Add to set of successors
                        successors.add(successorJobStoreID)
                        alreadySeenSuccessors.add(successorJobStoreID)

                        # Recurse if job exists
                        # (job may not exist if already completed)
                        if jobStore.exists(successorJobStoreID):
                            successorRecursion(jobStore.load(successorJobStoreID))

        successorRecursion(jobGraph) # Recurse from jobGraph

        return successors

    def processTotallyFailedJob(self, jobGraph):
        """
        Processes a totally failed job.
        """
        # Mark job as a totally failed job
        self.toilState.totalFailedJobs.add(JobNode.fromJobGraph(jobGraph))
        if self.toilMetrics:
            self.toilMetrics.logFailedJob(jobGraph)

        if jobGraph.jobStoreID in self.toilState.serviceJobStoreIDToPredecessorJob: # Is
            # a service job
            logger.debug("Service job is being processed as a totally failed job: %s", jobGraph)

            predecesssorJobGraph = self.toilState.serviceJobStoreIDToPredecessorJob[jobGraph.jobStoreID]

            # This removes the service job as a service of the predecessor
            # and potentially makes the predecessor active
            self._updatePredecessorStatus(jobGraph.jobStoreID)

            # Remove the start flag, if it still exists. This indicates
            # to the service manager that the job has "started", this prevents
            # the service manager from deadlocking while waiting
            self.jobStore.deleteFile(jobGraph.startJobStoreID)

            # Signal to any other services in the group that they should
            # terminate. We do this to prevent other services in the set
            # of services from deadlocking waiting for this service to start properly
            if predecesssorJobGraph.jobStoreID in self.toilState.servicesIssued:
                self.serviceManager.killServices(self.toilState.servicesIssued[predecesssorJobGraph.jobStoreID], error=True)
                logger.debug("Job: %s is instructing all the services of its parent job to quit", jobGraph)

            self.toilState.hasFailedSuccessors.add(predecesssorJobGraph.jobStoreID) # This ensures that the
            # job will not attempt to run any of it's successors on the stack
        else:
            # Is a non-service job
            assert jobGraph.jobStoreID not in self.toilState.servicesIssued

            # Traverse failed job's successor graph and get the jobStoreID of new successors.
            # Any successor already in toilState.failedSuccessors will not be traversed
            # All successors traversed will be added to toilState.failedSuccessors and returned
            # as a set (unseenSuccessors).
            unseenSuccessors = self.getSuccessors(jobGraph, self.toilState.failedSuccessors,
                                                  self.jobStore)
            logger.debug("Found new failed successors: %s of job: %s", " ".join(
                         unseenSuccessors), jobGraph)

            # For each newly found successor
            for successorJobStoreID in unseenSuccessors:

                # If the successor is a successor of other jobs that have already tried to schedule it
                if successorJobStoreID in self.toilState.successorJobStoreIDToPredecessorJobs:

                    # For each such predecessor job
                    # (we remove the successor from toilState.successorJobStoreIDToPredecessorJobs to avoid doing
                    # this multiple times for each failed predecessor)
                    for predecessorJob in self.toilState.successorJobStoreIDToPredecessorJobs.pop(successorJobStoreID):

                        # Reduce the predecessor job's successor count.
                        self.toilState.successorCounts[predecessorJob.jobStoreID] -= 1

                        # Indicate that it has failed jobs.
                        self.toilState.hasFailedSuccessors.add(predecessorJob.jobStoreID)
                        logger.debug("Marking job: %s as having failed successors (found by "
                                     "reading successors failed job)", predecessorJob)

                        # If the predecessor has no remaining successors, add to list of active jobs
                        assert self.toilState.successorCounts[predecessorJob.jobStoreID] >= 0
                        if self.toilState.successorCounts[predecessorJob.jobStoreID] == 0:
                            self.toilState.updatedJobs.add((predecessorJob, 0))

                            # Remove the predecessor job from the set of jobs with successors.
                            self.toilState.successorCounts.pop(predecessorJob.jobStoreID)

            # If the job has predecessor(s)
            if jobGraph.jobStoreID in self.toilState.successorJobStoreIDToPredecessorJobs:

                # For each predecessor of the job
                for predecessorJobGraph in self.toilState.successorJobStoreIDToPredecessorJobs[jobGraph.jobStoreID]:

                    # Mark the predecessor as failed
                    self.toilState.hasFailedSuccessors.add(predecessorJobGraph.jobStoreID)
                    logger.debug("Totally failed job: %s is marking direct predecessor: %s "
                                 "as having failed jobs", jobGraph, predecessorJobGraph)

                self._updatePredecessorStatus(jobGraph.jobStoreID)

    def _updatePredecessorStatus(self, jobStoreID):
        """
        Update status of predecessors for finished successor job.
        """
        if jobStoreID in self.toilState.serviceJobStoreIDToPredecessorJob:
            # Is a service job
            predecessorJob = self.toilState.serviceJobStoreIDToPredecessorJob.pop(jobStoreID)
            self.toilState.servicesIssued[predecessorJob.jobStoreID].pop(jobStoreID)
            if len(self.toilState.servicesIssued[predecessorJob.jobStoreID]) == 0: # Predecessor job has
                # all its services terminated
                self.toilState.servicesIssued.pop(predecessorJob.jobStoreID) # The job has no running services
                self.toilState.updatedJobs.add((predecessorJob, 0)) # Now we know
                # the job is done we can add it to the list of updated job files

        elif jobStoreID not in self.toilState.successorJobStoreIDToPredecessorJobs:
            #We have reach the root job
            assert len(self.toilState.updatedJobs) == 0
            assert len(self.toilState.successorJobStoreIDToPredecessorJobs) == 0
            assert len(self.toilState.successorCounts) == 0
            logger.debug("Reached root job %s so no predecessors to clean up" % jobStoreID)

        else:
            # Is a non-root, non-service job
            logger.debug("Cleaning the predecessors of %s" % jobStoreID)

            # For each predecessor
            for predecessorJob in self.toilState.successorJobStoreIDToPredecessorJobs.pop(jobStoreID):

                # Reduce the predecessor's number of successors by one to indicate the
                # completion of the jobStoreID job
                self.toilState.successorCounts[predecessorJob.jobStoreID] -= 1

                # If the predecessor job is done and all the successors are complete
                if self.toilState.successorCounts[predecessorJob.jobStoreID] == 0:

                    # Remove it from the set of jobs with active successors
                    self.toilState.successorCounts.pop(predecessorJob.jobStoreID)

                    if predecessorJob.jobStoreID not in self.toilState.hasFailedSuccessors:
                        # Pop stack at this point, as we can get rid of its successors
                        predecessorJob.stack.pop()

                    # Now we know the job is done we can add it to the list of updated job files
                    assert predecessorJob not in self.toilState.updatedJobs
                    self.toilState.updatedJobs.add((predecessorJob, 0))

                    logger.debug('Job %s has all its non-service successors completed or totally '
                                 'failed', predecessorJob)