Ejemplo n.º 1
0
    def testConcurrencyWithDisk(self):
        """
        Tests that the batch system is allocating disk resources properly
        """
        tempDir = self._createTempDir('testFiles')

        options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
        options.workDir = tempDir
        from toil import physicalDisk
        availableDisk = physicalDisk(options.workDir)
        logger.info('Testing disk concurrency limits with %s disk space',
                    availableDisk)
        # More disk might become available by the time Toil starts, so we limit it here
        options.maxDisk = availableDisk
        options.batchSystem = self.batchSystemName

        counterPath = os.path.join(tempDir, 'counter')
        resetCounters(counterPath)
        value, maxValue = getCounters(counterPath)
        assert (value, maxValue) == (0, 0)

        half_disk = availableDisk // 2
        more_than_half_disk = half_disk + 500
        logger.info('Dividing into parts of %s and %s', half_disk,
                    more_than_half_disk)

        root = Job()
        # Physically, we're asking for 50% of disk and 50% of disk + 500bytes in the two jobs. The
        # batchsystem should not allow the 2 child jobs to run concurrently.
        root.addChild(
            Job.wrapFn(measureConcurrency,
                       counterPath,
                       self.sleepTime,
                       cores=1,
                       memory='1M',
                       disk=half_disk))
        root.addChild(
            Job.wrapFn(measureConcurrency,
                       counterPath,
                       self.sleepTime,
                       cores=1,
                       memory='1M',
                       disk=more_than_half_disk))
        Job.Runner.startToil(root, options)
        _, maxValue = getCounters(counterPath)

        logger.info('After run: %s disk space', physicalDisk(options.workDir))

        self.assertEqual(maxValue, 1)
Ejemplo n.º 2
0
    def testConcurrencyWithDisk(self):
        """
        Tests that the batch system is allocating disk resources properly
        """
        tempDir = self._createTempDir('testFiles')

        options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
        options.workDir = tempDir
        from toil import physicalDisk
        availableDisk = physicalDisk('', toilWorkflowDir=options.workDir)
        options.batchSystem = self.batchSystemName

        counterPath = os.path.join(tempDir, 'counter')
        resetCounters(counterPath)
        value, maxValue = getCounters(counterPath)
        assert (value, maxValue) == (0, 0)

        root = Job()
        # Physically, we're asking for 50% of disk and 50% of disk + 500bytes in the two jobs. The
        # batchsystem should not allow the 2 child jobs to run concurrently.
        root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1,
                                    memory='1M', disk=old_div(availableDisk,2)))
        root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1,
                                 memory='1M', disk=(old_div(availableDisk, 2)) + 500))
        Job.Runner.startToil(root, options)
        _, maxValue = getCounters(counterPath)
        self.assertEqual(maxValue, 1)
Ejemplo n.º 3
0
    def testConcurrencyWithDisk(self):
        """
        Tests that the batch system is allocating disk resources properly
        """
        tempDir = self._createTempDir('testFiles')

        options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
        options.workDir = tempDir
        from toil import physicalDisk
        availableDisk = physicalDisk('', toilWorkflowDir=options.workDir)
        options.batchSystem = self.batchSystemName

        counterPath = os.path.join(tempDir, 'counter')
        resetCounters(counterPath)
        value, maxValue = getCounters(counterPath)
        assert (value, maxValue) == (0, 0)

        root = Job()
        # Physically, we're asking for 50% of disk and 50% of disk + 500bytes in the two jobs. The
        # batchsystem should not allow the 2 child jobs to run concurrently.
        root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1,
                                    memory='1M', disk=availableDisk/2))
        root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1,
                                 memory='1M', disk=(availableDisk / 2) + 500))
        Job.Runner.startToil(root, options)
        _, maxValue = getCounters(counterPath)
        self.assertEqual(maxValue, 1)
Ejemplo n.º 4
0
    def __init__(self, config, maxCores, maxMemory, maxDisk):
        if maxCores > self.numCores:
            log.warn('Limiting maxCores to CPU count of system (%i).', self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            log.warn('Limiting maxMemory to physically available memory (%i).', self.physicalMemory)
            maxMemory = self.physicalMemory
        self.physicalDisk = toil.physicalDisk(config)
        if maxDisk > self.physicalDisk:
            log.warn('Limiting maxDisk to physically available disk (%i).', self.physicalDisk)
            maxDisk = self.physicalDisk
        super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale
        # Number of worker threads that will be started
        self.numWorkers = int(old_div(self.maxCores, self.minCores))
        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()
        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs = {}
        """
        :type: dict[str,toil.job.JobNode]
        """
        # A queue of jobs waiting to be executed. Consumed by the workers.
        self.inputQueue = Queue()
        # A queue of finished jobs. Produced by the workers.
        self.outputQueue = Queue()
        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs = {}
        """
        :type: dict[str,Info]
        """
        # The list of worker threads
        self.workerThreads = []
        """
        :type list[Thread]
        """
        # Variables involved with non-blocking resource acquisition
        self.acquisitionTimeout = 5
        self.acquisitionRetryDelay = 10
        self.aquisitionCondition = Condition()

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(self.numWorkers, 'cores', self.acquisitionTimeout)
        # A lock to work around the lack of thread-safety in Python's subprocess module
        self.popenLock = Lock()
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory', self.acquisitionTimeout)
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk', self.acquisitionTimeout)

        log.debug('Setting up the thread pool with %i workers, '
                 'given a minimum CPU fraction of %f '
                 'and a maximum CPU value of %i.', self.numWorkers, self.minCores, maxCores)
        for i in range(self.numWorkers):
            worker = Thread(target=self.worker, args=(self.inputQueue,))
            self.workerThreads.append(worker)
            worker.start()
Ejemplo n.º 5
0
    def __init__(self, config, maxCores, maxMemory, maxDisk):
        self.config = config
        # Limit to the smaller of the user-imposed limit and what we actually
        # have on this machine for each resource.
        #
        # If we don't have up to the limit of the resource (and the resource
        # isn't the inlimited sentinel), warn.
        if maxCores > self.numCores:
            if maxCores != sys.maxsize:
                # We have an actually specified limit and not the default
                log.warning(
                    'Not enough cores! User limited to %i but we only have %i.',
                    maxCores, self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            if maxMemory != sys.maxsize:
                # We have an actually specified limit and not the default
                log.warning(
                    'Not enough memory! User limited to %i bytes but we only have %i bytes.',
                    maxMemory, self.physicalMemory)
            maxMemory = self.physicalMemory

        workdir = Toil.getLocalWorkflowDir(
            config.workflowID, config.workDir
        )  # config.workDir may be None; this sets a real directory
        self.physicalDisk = toil.physicalDisk(workdir)
        if maxDisk > self.physicalDisk:
            if maxDisk != sys.maxsize:
                # We have an actually specified limit and not the default
                log.warning(
                    'Not enough disk space! User limited to %i bytes but we only have %i bytes.',
                    maxDisk, self.physicalDisk)
            maxDisk = self.physicalDisk

        super(SingleMachineBatchSystem, self).__init__(config, maxCores,
                                                       maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale

        if config.badWorker > 0 and config.debugWorker:
            # We can't throw SIGUSR1 at the worker because it is also going to
            # be the leader and/or test harness.
            raise RuntimeError(
                "Cannot use badWorker and debugWorker together; "
                "worker would have to kill the leader")

        self.debugWorker = config.debugWorker

        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()

        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs: Dict[str, toil.job.JobDescription] = {}

        # A queue of jobs waiting to be executed. Consumed by the daddy thread.
        self.inputQueue = Queue()

        # A queue of finished jobs. Produced by the daddy thread.
        self.outputQueue = Queue()

        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs: Dict[str, Info] = {}

        # These next two are only used outside debug-worker mode

        # A dict mapping PIDs to Popen objects for running jobs.
        # Jobs that don't fork are executed one at a time in the main thread.
        self.children: Dict[int, subprocess.Popen] = {}
        # A dict mapping child PIDs to the Job IDs they are supposed to be running.
        self.childToJob: Dict[int, str] = {}

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(int(self.maxCores / self.minCores),
                                          'cores')
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory')
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk')

        # If we can't schedule something, we fill this in with a reason why
        self.schedulingStatusMessage = None

        # We use this event to signal shutdown
        self.shuttingDown = Event()

        # A thread in charge of managing all our child processes.
        # Also takes care of resource accounting.
        self.daddyThread = None
        # If it breaks it will fill this in
        self.daddyException: Optional[Exception] = None

        if self.debugWorker:
            log.debug('Started batch system %s in worker debug mode.',
                      id(self))
        else:
            self.daddyThread = Thread(target=self.daddy, daemon=True)
            self.daddyThread.start()
            log.debug('Started batch system %s in normal mode.', id(self))
Ejemplo n.º 6
0
    def __init__(self, config, maxCores, maxMemory, maxDisk):
        if maxCores > self.numCores:
            log.warn('Limiting maxCores to CPU count of system (%i).', self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            log.warn('Limiting maxMemory to physically available memory (%i).', self.physicalMemory)
            maxMemory = self.physicalMemory
        self.physicalDisk = toil.physicalDisk(config)
        if maxDisk > self.physicalDisk:
            log.warn('Limiting maxDisk to physically available disk (%i).', self.physicalDisk)
            maxDisk = self.physicalDisk
        super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale
        # Number of worker threads that will be started
        self.debugWorker = config.debugWorker
        self.numWorkers = int(old_div(self.maxCores, self.minCores))
        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()
        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs = {}
        """
        :type: dict[str,toil.job.JobNode]
        """
        # A queue of jobs waiting to be executed. Consumed by the workers.
        self.inputQueue = Queue()
        # A queue of finished jobs. Produced by the workers.
        self.outputQueue = Queue()
        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs = {}
        """
        :type: dict[str,Info]
        """
        # The list of worker threads
        self.workerThreads = []
        """
        :type list[Thread]
        """
        # Variables involved with non-blocking resource acquisition
        self.acquisitionTimeout = 5
        self.acquisitionRetryDelay = 10
        self.aquisitionCondition = Condition()

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(self.numWorkers, 'cores', self.acquisitionTimeout)
        # A lock to work around the lack of thread-safety in Python's subprocess module
        self.popenLock = Lock()
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory', self.acquisitionTimeout)
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk', self.acquisitionTimeout)

        if not self.debugWorker:
            log.debug('Setting up the thread pool with %i workers, '
                     'given a minimum CPU fraction of %f '
                     'and a maximum CPU value of %i.', self.numWorkers, self.minCores, maxCores)
            for i in range(self.numWorkers):
                worker = Thread(target=self.worker, args=(self.inputQueue,))
                self.workerThreads.append(worker)
                worker.start()
        else:
            log.debug('Started in worker debug mode.')
Ejemplo n.º 7
0
    def __init__(self, config, maxCores, maxMemory, maxDisk):
        if maxCores > self.numCores:
            log.warning('Limiting maxCores to CPU count of system (%i).',
                        self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            log.warning(
                'Limiting maxMemory to physically available memory (%i).',
                self.physicalMemory)
            maxMemory = self.physicalMemory
        self.physicalDisk = toil.physicalDisk(config)
        if maxDisk > self.physicalDisk:
            log.warning('Limiting maxDisk to physically available disk (%i).',
                        self.physicalDisk)
            maxDisk = self.physicalDisk
        super(SingleMachineBatchSystem, self).__init__(config, maxCores,
                                                       maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale

        if config.badWorker > 0 and config.debugWorker:
            # We can't throw SIGUSR1 at the worker because it is also going to
            # be the leader and/or test harness.
            raise RuntimeError(
                "Cannot use badWorker and debugWorker together; "
                "worker would have to kill the leader")

        self.debugWorker = config.debugWorker

        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()

        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs = {}
        """
        :type: dict[str,toil.job.JobNode]
        """

        # A queue of jobs waiting to be executed. Consumed by the daddy thread.
        self.inputQueue = Queue()

        # A queue of finished jobs. Produced by the daddy thread.
        self.outputQueue = Queue()

        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs = {}
        """
        :type: dict[str,Info]
        """

        # These next two are only used outside debug-worker mode

        # A dict mapping PIDs to Popen objects for running jobs.
        # Jobs that don't fork are executed one at a time in the main thread.
        self.children = {}
        """
        :type: dict[int,subprocess.Popen]
        """
        # A dict mapping child PIDs to the Job IDs they are supposed to be running.
        self.childToJob = {}
        """
        :type: dict[int,str]
        """

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(
            int(old_div(self.maxCores, self.minCores)), 'cores')
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory')
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk')

        # We use this event to signal shutdown
        self.shuttingDown = Event()

        # A thread in charge of managing all our child processes.
        # Also takes care of resource accounting.
        self.daddyThread = None
        # If it breaks it will fill this in
        self.daddyException = None

        if self.debugWorker:
            log.debug('Started in worker debug mode.')
        else:
            self.daddyThread = Thread(target=self.daddy, daemon=True)
            self.daddyThread.start()
            log.debug('Started in normal mode.')