Esempio n. 1
0
 def _startParasol(self, numCores=None, memory=None):
     if numCores is None:
         numCores = cpu_count()
     if memory is None:
         memory = physicalMemory()
     self.numCores = numCores
     self.memory = memory
     self.leader = self.ParasolLeaderThread()
     self.leader.start()
     self.worker = self.ParasolWorkerThread()
     self.worker.start()
     while self.leader.popen is None or self.worker.popen is None:
         log.info('Waiting for leader and worker processes')
         time.sleep(.1)
Esempio n. 2
0
 def _startParasol(self, numCores=None, memory=None):
     if numCores is None:
         numCores = multiprocessing.cpu_count()
     if memory is None:
         memory = physicalMemory()
     self.numCores = numCores
     self.memory = memory
     self.leader = self.ParasolLeaderThread()
     self.leader.start()
     self.worker = self.ParasolWorkerThread()
     self.worker.start()
     while self.leader.popen is None or self.worker.popen is None:
         log.info('Waiting for leader and worker processes')
         time.sleep(.1)
Esempio n. 3
0
def prepare_input(job, sample, config, enqueue_consolidation=True):

    # job prep
    config = argparse.Namespace(**vars(config))
    uuid, url, contig_name, reference_url, params_url = sample
    config.uuid = uuid
    config.contig_name = contig_name
    config.reference_url = reference_url
    config.params_url = params_url
    if config.intermediate_file_location is not None:
        config.intermediate_file_location = os.path.join(
            config.intermediate_file_location, uuid)
        mkdir_p(config.intermediate_file_location)
    work_dir = job.fileStore.getLocalTempDir()
    start = time.time()
    log(job, "{}".format(datetime.datetime.now()), config.uuid, 'START')
    log(
        job,
        "Preparing input with URL:{}, contig:{}, reference_url:{}, params_url:{}"
        .format(url, contig_name, reference_url,
                params_url), uuid, 'prepare_input')

    # todo global resource estimation
    config.maxCores = min(config.maxCores, multiprocessing.cpu_count())
    config.defaultCores = min(MP_CPU, config.maxCores)
    config.maxMemory = min(config.maxMemory, int(physicalMemory() * .95))
    #config.disk

    # download references - TOIL_JOBSTORE_PROTOCOL queries are so this function can be imported

    #ref fasta
    if reference_url.startswith(TOIL_JOBSTORE_PROTOCOL):
        ref_genome_fileid = reference_url.replace(TOIL_JOBSTORE_PROTOCOL, '',
                                                  1)
        ref_genome_filename = "{}.reference.{}.fa".format(uuid, contig_name)
        job.fileStore.readGlobalFile(
            ref_genome_fileid, os.path.join(work_dir, ref_genome_filename))
    else:
        download_url(reference_url, work_dir=work_dir)
        ref_genome_filename = os.path.basename(reference_url)
        ref_genome_fileid = job.fileStore.writeGlobalFile(
            os.path.join(work_dir, ref_genome_filename))
    ref_genome_size = os.stat(os.path.join(work_dir,
                                           ref_genome_filename)).st_size
    config.reference_genome_fileid = ref_genome_fileid

    #params
    if params_url.startswith(TOIL_JOBSTORE_PROTOCOL):
        params_fileid = params_url.replace(TOIL_JOBSTORE_PROTOCOL, '', 1)
    else:
        download_url(params_url, work_dir=work_dir)
        params_filename = os.path.basename(params_url)
        params_fileid = job.fileStore.writeGlobalFile(
            os.path.join(work_dir, params_filename))
    config.params_fileid = params_fileid

    # download bam
    if url.startswith(TOIL_JOBSTORE_PROTOCOL):
        bam_filename = "{}.input.{}.bam".format(uuid, contig_name)
        job.fileStore.readGlobalFile(
            url.replace(TOIL_JOBSTORE_PROTOCOL, '', 1),
            os.path.join(work_dir, bam_filename))
    else:
        download_url(url, work_dir=work_dir)
        bam_filename = os.path.basename(url)
    data_bam_location = os.path.join("/data", bam_filename)
    workdir_bam_location = os.path.join(work_dir, bam_filename)

    # index the bam
    _index_bam(job, config, work_dir, bam_filename)

    # sanity check
    workdir_bai_location = os.path.join(work_dir, bam_filename + ".bai")
    if not os.path.isfile(workdir_bai_location):
        raise UserError("BAM index file not created for {}: {}".format(
            bam_filename, workdir_bai_location))

    # get start and end location
    start_idx = sys.maxint
    end_idx = 0
    with closing(
            pysam.AlignmentFile(
                workdir_bam_location,
                'rb' if bam_filename.endswith("bam") else 'r')) as aln:
        for read in aln.fetch():
            align_start = read.reference_start
            align_end = read.reference_end
            start_idx = min([start_idx, align_start])
            end_idx = max([end_idx, align_end])
    log(job, "start_pos:{}, end_pos:{}".format(config.uuid, start_idx,
                                               end_idx), uuid, 'prepare_input')

    # get reads from positions
    chunk_infos = list()
    idx = start_idx
    while idx < end_idx:
        ci = {CI_UUID: uuid}
        ci[CI_CHUNK_BOUNDARY_START] = idx
        chunk_start = idx - config.partition_margin
        ci[CI_CHUNK_START] = chunk_start
        idx += config.partition_size
        ci[CI_CHUNK_BOUNDARY_END] = idx
        chunk_end = idx + config.partition_margin
        ci[CI_CHUNK_END] = chunk_end
        chunk_infos.append(ci)

    # enqueue jobs
    log(job, "Enqueueing {} jobs".format(len(chunk_infos)), uuid,
        'prepare_input')
    idx = 0
    enqueued_jobs = 0
    returned_tarballs = list()
    for ci in chunk_infos:
        #prep
        ci[CI_CHUNK_INDEX] = idx
        chunk_start = ci[CI_CHUNK_START]
        chunk_end = ci[CI_CHUNK_END]
        chunk_position_description = "{}:{}-{}".format(config.contig_name,
                                                       chunk_start, chunk_end)
        bam_split_command = [
            "view", "-b", data_bam_location, chunk_position_description
        ]
        chunk_name = "{}.{}.bam".format(config.uuid, idx)

        #write chunk
        chunk_location = os.path.join(work_dir, chunk_name)
        with open(chunk_location, 'w') as out:
            docker_call(job,
                        config,
                        work_dir,
                        bam_split_command,
                        DOCKER_SAMTOOLS_IMG,
                        DOCKER_SAMTOOLS_TAG,
                        outfile=out)

        #document read count
        chunk_size = os.stat(chunk_location).st_size
        ci[CI_CHUNK_SIZE] = chunk_size
        ci[CI_REF_FA_SIZE] = ref_genome_size
        read_count = prepare_input__get_bam_read_count(job, work_dir,
                                                       chunk_name)
        ci[CI_READ_COUNT] = read_count
        log(
            job,
            "chunk from {} for idx {} is {}b ({}mb) and has {} reads".format(
                chunk_position_description, idx, chunk_size,
                int(chunk_size / 1024 / 1024),
                read_count), uuid, 'prepare_input')
        if config.intermediate_file_location is not None:
            copy_files(file_paths=[chunk_location],
                       output_dir=config.intermediate_file_location)

        # enqueue marginPhase job
        if read_count > 0:
            chunk_fileid = job.fileStore.writeGlobalFile(chunk_location)
            mp_cores = config.defaultCores
            mp_mem = int(
                min(
                    int(chunk_size * MP_MEM_BAM_FACTOR +
                        ref_genome_size * MP_MEM_REF_FACTOR),
                    config.maxMemory))
            mp_disk = int(
                min(
                    int(chunk_size * MP_DSK_BAM_FACTOR +
                        ref_genome_size * MP_DSK_REF_FACTOR +
                        (0 if config.cpecan_probabilities else
                         MP_DSK_CPECAN_FACTOR) * chunk_size), config.maxDisk))
            log(
                job,
                "requesting {} cores, {}b ({}mb) disk, {}b ({}gb) mem".format(
                    mp_cores, mp_disk, int(mp_disk / 1024 / 1024), mp_mem,
                    int(mp_mem / 1024 / 1024 / 1024)),
                "{}.{}".format(uuid, idx), 'prepare_input')
            mp_mem = str(int(mp_mem / 1024)) + "K"
            mp_disk = str(int(mp_disk) / 1024) + "K"
            margin_phase_job = job.addChildJobFn(run_margin_phase,
                                                 config,
                                                 chunk_fileid,
                                                 ci,
                                                 memory=mp_mem,
                                                 cores=mp_cores,
                                                 disk=mp_disk)
            returned_tarballs.append(margin_phase_job.rv())
            enqueued_jobs += 1
        idx += 1

    log(job, "Enqueued {} jobs".format(enqueued_jobs), uuid, 'prepare_input')

    # enqueue merging and consolidation job
    merge_job = job.addFollowOnJobFn(merge_chunks, config, returned_tarballs)
    final_return_value = merge_job.rv()
    if enqueue_consolidation:
        consolidation_job = merge_job.addFollowOnJobFn(consolidate_output,
                                                       config, merge_job.rv())
        final_return_value = consolidation_job.rv()

    # log
    log_generic_job_debug(job, config.uuid, 'prepare_input', work_dir=work_dir)
    log_time(job, "prepare_input", start, config.uuid)

    # return appropriate output
    return final_return_value
Esempio n. 4
0
    def testNestedResourcesDoNotBlock(self):
        """
        Resources are requested in the order Memory > Cpu > Disk.
        Test that inavailability of cpus for one job that is scheduled does not block another job
        that can run.
        """
        tempDir = self._createTempDir('testFiles')

        options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
        options.workDir = tempDir
        options.maxCores = 4
        from toil import physicalMemory
        availableMemory = physicalMemory()
        options.batchSystem = self.batchSystemName

        outFile = os.path.join(tempDir, 'counter')
        open(outFile, 'w').close()

        root = Job()

        blocker = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=30, writeVal='b',
                             cores=2, memory='1M', disk='1M')
        firstJob = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=5, writeVal='fJ',
                              cores=1, memory='1M', disk='1M')
        secondJob = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=10,
                               writeVal='sJ', cores=1, memory='1M', disk='1M')

        # Should block off 50% of memory while waiting for it's 3 cores
        firstJobChild = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=0,
                                   writeVal='fJC', cores=3, memory=int(old_div(availableMemory,2)), disk='1M')

        # These two shouldn't be able to run before B because there should be only
        # (50% of memory - 1M) available (firstJobChild should be blocking 50%)
        secondJobChild = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=5,
                                    writeVal='sJC', cores=2, memory=int(old_div(availableMemory,1.5)),
                                    disk='1M')
        secondJobGrandChild = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=5,
                                         writeVal='sJGC', cores=2, memory=int(old_div(availableMemory,1.5)),
                                         disk='1M')

        root.addChild(blocker)
        root.addChild(firstJob)
        root.addChild(secondJob)

        firstJob.addChild(firstJobChild)
        secondJob.addChild(secondJobChild)

        secondJobChild.addChild(secondJobGrandChild)
        """
        The tree is:
                    root
                  /   |   \
                 b    fJ   sJ
                      |    |
                      fJC  sJC
                           |
                           sJGC
        But the order of execution should be
        root > b , fJ, sJ > sJC > sJGC > fJC
        since fJC cannot run till bl finishes but sJC and sJGC can(fJC blocked by disk). If the
        resource acquisition is written properly, then fJC which is scheduled before sJC and sJGC
        should not block them, and should only run after they finish.
        """
        Job.Runner.startToil(root, options)
        with open(outFile) as oFH:
            outString = oFH.read()
        # The ordering of b, fJ and sJ is non-deterministic since they are scheduled at the same
        # time. We look for all possible permutations.
        possibleStarts = tuple([''.join(x) for x in itertools.permutations(['b', 'fJ', 'sJ'])])
        assert outString.startswith(possibleStarts)
        assert outString.endswith('sJCsJGCfJC')
Esempio n. 5
0
class SingleMachineBatchSystem(BatchSystemSupport):
    """
    The interface for running jobs on a single machine, runs all the jobs you
    give it as they come in, but in parallel.

    Uses a single "daddy" thread to manage a fleet of child processes.

    Communication with the daddy thread happens via two queues: one queue of
    jobs waiting to be run (the input queue), and one queue of jobs that are
    finished/stopped and need to be returned by getUpdatedBatchJob (the output
    queue).

    When the batch system is shut down, the daddy thread is stopped.

    If running in debug-worker mode, jobs are run immediately as they are sent
    to the batch system, in the sending thread, and the daddy thread is not
    run. But the queues are still used.
    """
    @classmethod
    def supportsAutoDeployment(cls):
        return False

    @classmethod
    def supportsWorkerCleanup(cls):
        return True

    numCores = cpu_count()

    minCores = 0.1
    """
    The minimal fractional CPU. Tasks with a smaller core requirement will be rounded up to this
    value.
    """
    physicalMemory = toil.physicalMemory()

    def __init__(self, config, maxCores, maxMemory, maxDisk):
        self.config = config
        # Limit to the smaller of the user-imposed limit and what we actually
        # have on this machine for each resource.
        #
        # If we don't have up to the limit of the resource (and the resource
        # isn't the inlimited sentinel), warn.
        if maxCores > self.numCores:
            if maxCores != sys.maxsize:
                # We have an actually specified limit and not the default
                log.warning(
                    'Not enough cores! User limited to %i but we only have %i.',
                    maxCores, self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            if maxMemory != sys.maxsize:
                # We have an actually specified limit and not the default
                log.warning(
                    'Not enough memory! User limited to %i bytes but we only have %i bytes.',
                    maxMemory, self.physicalMemory)
            maxMemory = self.physicalMemory

        workdir = Toil.getLocalWorkflowDir(
            config.workflowID, config.workDir
        )  # config.workDir may be None; this sets a real directory
        self.physicalDisk = toil.physicalDisk(workdir)
        if maxDisk > self.physicalDisk:
            if maxDisk != sys.maxsize:
                # We have an actually specified limit and not the default
                log.warning(
                    'Not enough disk space! User limited to %i bytes but we only have %i bytes.',
                    maxDisk, self.physicalDisk)
            maxDisk = self.physicalDisk

        super(SingleMachineBatchSystem, self).__init__(config, maxCores,
                                                       maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale

        if config.badWorker > 0 and config.debugWorker:
            # We can't throw SIGUSR1 at the worker because it is also going to
            # be the leader and/or test harness.
            raise RuntimeError(
                "Cannot use badWorker and debugWorker together; "
                "worker would have to kill the leader")

        self.debugWorker = config.debugWorker

        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()

        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs: Dict[str, toil.job.JobDescription] = {}

        # A queue of jobs waiting to be executed. Consumed by the daddy thread.
        self.inputQueue = Queue()

        # A queue of finished jobs. Produced by the daddy thread.
        self.outputQueue = Queue()

        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs: Dict[str, Info] = {}

        # These next two are only used outside debug-worker mode

        # A dict mapping PIDs to Popen objects for running jobs.
        # Jobs that don't fork are executed one at a time in the main thread.
        self.children: Dict[int, subprocess.Popen] = {}
        # A dict mapping child PIDs to the Job IDs they are supposed to be running.
        self.childToJob: Dict[int, str] = {}

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(int(self.maxCores / self.minCores),
                                          'cores')
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory')
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk')

        # If we can't schedule something, we fill this in with a reason why
        self.schedulingStatusMessage = None

        # We use this event to signal shutdown
        self.shuttingDown = Event()

        # A thread in charge of managing all our child processes.
        # Also takes care of resource accounting.
        self.daddyThread = None
        # If it breaks it will fill this in
        self.daddyException: Optional[Exception] = None

        if self.debugWorker:
            log.debug('Started batch system %s in worker debug mode.',
                      id(self))
        else:
            self.daddyThread = Thread(target=self.daddy, daemon=True)
            self.daddyThread.start()
            log.debug('Started batch system %s in normal mode.', id(self))

    def daddy(self):
        """
        Be the "daddy" thread.

        Our job is to look at jobs from the input queue.

        If a job fits in the available resources, we allocate resources for it
        and kick off a child process.

        We also check on our children.

        When a child finishes, we reap it, release its resources, and put its
        information in the output queue.
        """

        try:
            log.debug('Started daddy thread for batch system %s.', id(self))

            while not self.shuttingDown.is_set():
                # Main loop

                while not self.shuttingDown.is_set():
                    # Try to start as many jobs as we can try to start
                    try:
                        # Grab something from the input queue if available.
                        args = self.inputQueue.get_nowait()
                        jobCommand, jobID, jobCores, jobMemory, jobDisk, environment = args

                        coreFractions = int(jobCores / self.minCores)

                        # Try to start the child
                        result = self._startChild(jobCommand, jobID,
                                                  coreFractions, jobMemory,
                                                  jobDisk, environment)

                        if result is None:
                            # We did not get the resources to run this job.
                            # Requeue last, so we can look at the next job.
                            # TODO: Have some kind of condition the job can wait on,
                            # but without threads (queues for jobs needing
                            # cores/memory/disk individually)?
                            self.inputQueue.put(args)
                            break

                        # Otherwise it's a PID if it succeeded, or False if it couldn't
                        # start. But we don't care either way here.

                    except Empty:
                        # Nothing to run. Stop looking in the queue.
                        break

                # Now check on our children.
                for done_pid in self._pollForDoneChildrenIn(self.children):
                    # A child has actually finished.
                    # Clean up after it.
                    self._handleChild(done_pid)

                # Then loop again: start and collect more jobs.
                # TODO: It would be good to be able to wait on a new job or a finished child, whichever comes first.
                # For now we just sleep and loop.
                time.sleep(0.01)

            # When we get here, we are shutting down.
            log.debug(
                'Daddy thread cleaning up %d remaining children for batch system %s...',
                len(self.children), id(self))

            self._stop_and_wait(self.children.values())

            log.debug(
                'Daddy thread for batch system %s finishing because no children should now exist',
                id(self))

            # Then exit the thread.
            return
        except Exception as e:
            log.critical(
                'Unhandled exception in daddy thread for batch system %s: %s',
                id(self), traceback.format_exc())
            # Pass the exception back to the main thread so it can stop the next person who calls into us.
            self.daddyException = e
            raise

    def _checkOnDaddy(self):
        if self.daddyException is not None:
            # The daddy thread broke and we cannot do our job
            log.critical(
                'Propagating unhandled exception in daddy thread to main thread'
            )
            exc = self.daddyException
            self.daddyException = None
            if isinstance(exc, Exception):
                raise exc
            else:
                raise TypeError(
                    f'Daddy thread failed with non-exception: {exc}')

    def _stop_now(self, popens: Sequence[subprocess.Popen]) -> List[int]:
        """
        Stop the given child processes and all their children. Does not reap them.

        Returns a list of PGIDs killed, where processes may exist that have not
        yet received their kill signals.
        """

        # We will potentially need to poll these PGIDs to ensure that all
        # processes in them are gone.
        pgids = []

        for popen in popens:
            # Kill all the children

            if popen.returncode is None:
                # Process is not known to be dead. Try and grab its group.
                try:
                    pgid = os.getpgid(popen.pid)
                except OSError:
                    # It just died. Assume the pgid was its PID.
                    pgid = popen.pid
            else:
                # It is dead. Try it's PID as a PGID and hope we didn't re-use it.
                pgid = popen.pid

            if pgid != os.getpgrp():
                # The child process really is in its own group, and not ours.

                # Kill the group, which hopefully hasn't been reused
                log.debug(
                    'Send shutdown kill to process group %s known to batch system %s',
                    pgid, id(self))
                try:
                    os.killpg(pgid, signal.SIGKILL)
                    pgids.append(pgid)
                except ProcessLookupError:
                    # It is dead already
                    pass
                except PermissionError:
                    # It isn't ours actually. Ours is dead.
                    pass
            else:
                # Kill the subprocess again through popen in case it somehow
                # never managed to make the group.
                popen.kill()

        return pgids

    def _stop_and_wait(self, popens: Sequence[subprocess.Popen]) -> None:
        """
        Stop the given child processes and all their children. Blocks until the
        processes are gone.
        """

        pgids = self._stop_now(popens)

        for popen in popens:
            # Wait on all the children
            popen.wait()

            log.debug(
                'Process %s known to batch system %s is stopped; it returned %s',
                popen.pid, id(self), popen.returncode)

        for pgid in pgids:
            try:
                while True:
                    # Send a kill to the group again, to see if anything in it
                    # is still alive. Our first kill might not have been
                    # delivered yet.
                    os.killpg(pgid, signal.SIGKILL)
                    # If that worked it is still alive, so wait for the kernel
                    # to stop fooling around and kill it.
                    log.warning(
                        'Sent redundant shutdown kill to surviving process group %s known to batch system %s',
                        pgid, id(self))
                    time.sleep(0.1)
            except ProcessLookupError:
                # The group is actually gone now.
                pass
            except PermissionError:
                # The group is not only gone but reused
                pass

    def _pollForDoneChildrenIn(self, pid_to_popen):
        """
        See if any children represented in the given dict from PID to Popen
        object have finished.

        Return a collection of their PIDs.

        Guarantees that each child's exit code will be gettable via wait() on
        the child's Popen object (i.e. does not reap the child, unless via
        Popen).
        """

        # We keep our found PIDs in a set so we can work around waitid showing
        # us the same one repeatedly.
        ready = set()

        # Find the waitid function
        waitid = getattr(os, 'waitid', None)

        if callable(waitid):
            # waitid exists (not Mac)

            while True:
                # Poll for any child to have exit, but don't reap it. Leave reaping
                # to the Popen.
                # TODO: What if someone else in Toil wants to do this syscall?
                # TODO: Is this one-notification-per-done-child with WNOHANG? Or
                # can we miss some? Or do we see the same one repeatedly until it
                # is reaped?
                try:
                    siginfo = waitid(os.P_ALL, -1,
                                     os.WEXITED | os.WNOWAIT | os.WNOHANG)
                except ChildProcessError:
                    # This happens when there is nothing to wait on right now,
                    # instead of the weird C behavior of overwriting a field in
                    # a pointed-to struct.
                    siginfo = None
                if siginfo is not None and siginfo.si_pid in pid_to_popen and siginfo.si_pid not in ready:
                    # Something new finished
                    ready.add(siginfo.si_pid)
                else:
                    # Nothing we own that we haven't seen before has finished.
                    return ready
        else:
            # On Mac there's no waitid and no way to wait and not reap.
            # Fall back on polling all the Popen objects.
            # To make this vaguely efficient we have to return done children in
            # batches.
            for pid, popen in pid_to_popen.items():
                if popen.poll() is not None:
                    # Process is done
                    ready.add(pid)
                    log.debug('Child %d has stopped', pid)

            # Return all the done processes we found
            return ready

    def _runDebugJob(self, jobCommand, jobID, environment):
        """
        Run the jobCommand right now, in the current thread.
        May only be called in debug-worker mode.
        Assumes resources are available.
        """
        assert self.debugWorker
        # TODO: It is not possible to kill running jobs in forkless mode,
        # because they are run immediately in the main thread.
        info = Info(time.time(), None, None, killIntended=False)
        self.runningJobs[jobID] = info

        if jobCommand.startswith("_toil_worker "):
            # We can actually run in this thread
            jobName, jobStoreLocator, jobStoreID = jobCommand.split()[
                1:4]  # Parse command
            jobStore = Toil.resumeJobStore(jobStoreLocator)
            toil_worker.workerScript(
                jobStore,
                jobStore.config,
                jobName,
                jobStoreID,
                redirectOutputToLogFile=not self.debugWorker
            )  # Call the worker
        else:
            # Run synchronously. If starting or running the command fails, let the exception stop us.
            subprocess.check_call(jobCommand,
                                  shell=True,
                                  env=dict(os.environ, **environment))

        self.runningJobs.pop(jobID)
        if not info.killIntended:
            self.outputQueue.put(
                UpdatedBatchJobInfo(jobID=jobID,
                                    exitStatus=0,
                                    wallTime=time.time() - info.time,
                                    exitReason=None))

    def getSchedulingStatusMessage(self):
        # Implement the abstractBatchSystem's scheduling status message API
        return self.schedulingStatusMessage

    def _setSchedulingStatusMessage(self, message):
        """
        If we can't run a job, we record a short message about why not. If the
        leader wants to know what is up with us (for example, to diagnose a
        deadlock), it can ask us for the message.
        """

        self.schedulingStatusMessage = message

    def _startChild(self, jobCommand, jobID, coreFractions, jobMemory, jobDisk,
                    environment):
        """
        Start a child process for the given job.

        Allocate its required resources and save it and save it in our bookkeeping structures.

        If the job is started, returns its PID.
        If the job fails to start, reports it as failed and returns False.
        If the job cannot get the resources it needs to start, returns None.
        """

        # We fill this in if we manage to actually start the child.
        popen = None

        # This is when we started working on the job.
        startTime = time.time()

        # See if we can fit the job in our resource pools right now.
        if self.coreFractions.acquireNow(coreFractions):
            # We got some cores
            if self.memory.acquireNow(jobMemory):
                # We got some memory
                if self.disk.acquireNow(jobDisk):
                    # We got the final resource, disk.
                    # Actually run the job.
                    # When it finishes we will release what it was using.
                    # So it is important to not lose track of the child process.

                    try:
                        # Launch the job.
                        # Make sure it is in its own session (and thus its own
                        # process group) so that, if the user signals the
                        # workflow, Toil will be responsible for killing the
                        # job. This also makes sure that we can signal the job
                        # and all its children together. We assume that the
                        # process group ID will equal the PID of the process we
                        # are starting.
                        popen = subprocess.Popen(jobCommand,
                                                 shell=True,
                                                 env=dict(
                                                     os.environ,
                                                     **environment),
                                                 start_new_session=True)
                    except Exception:
                        # If the job can't start, make sure we release resources now
                        self.coreFractions.release(coreFractions)
                        self.memory.release(jobMemory)
                        self.disk.release(jobDisk)

                        log.error('Could not start job %s: %s', jobID,
                                  traceback.format_exc())

                        # Report as failed.
                        self.outputQueue.put(
                            UpdatedBatchJobInfo(
                                jobID=jobID,
                                exitStatus=EXIT_STATUS_UNAVAILABLE_VALUE,
                                wallTime=0,
                                exitReason=None))

                        # Free resources
                        self.coreFractions.release(coreFractions)
                        self.memory.release(jobMemory)
                        self.disk.release(jobDisk)

                        # Complain it broke.
                        return False
                    else:
                        # If the job did start, record it
                        self.children[popen.pid] = popen
                        # Make sure we can look it up by PID later
                        self.childToJob[popen.pid] = jobID
                        # Record that the job is running, and the resources it is using
                        info = Info(startTime,
                                    popen, (coreFractions, jobMemory, jobDisk),
                                    killIntended=False)
                        self.runningJobs[jobID] = info

                        log.debug('Launched job %s as child %d', jobID,
                                  popen.pid)

                        # Report success starting the job
                        # Note that if a PID were somehow 0 it would look like False
                        assert popen.pid != 0
                        return popen.pid
                else:
                    # We can't get disk, so free cores and memory
                    self.coreFractions.release(coreFractions)
                    self.memory.release(jobMemory)
                    self._setSchedulingStatusMessage(
                        'Not enough disk to run job %s' % jobID)
            else:
                # Free cores, since we can't get memory
                self.coreFractions.release(coreFractions)
                self._setSchedulingStatusMessage(
                    'Not enough memory to run job %s' % jobID)
        else:
            self._setSchedulingStatusMessage('Not enough cores to run job %s' %
                                             jobID)

        # If we get here, we didn't succeed or fail starting the job.
        # We didn't manage to get the resources.
        # Report that.
        return None

    def _handleChild(self, pid: int) -> None:
        """
        Handle a child process PID that has finished.
        The PID must be for a child job we started.
        Not thread safe to run at the same time as we are making more children.

        Remove the child from our bookkeeping structures and free its resources.
        """

        # Look up the child
        popen = self.children[pid]
        jobID = self.childToJob[pid]
        info = self.runningJobs[jobID]

        # Unpack the job resources
        (coreFractions, jobMemory, jobDisk) = info.resources

        # Clean up our records of the job.
        self.runningJobs.pop(jobID)
        self.childToJob.pop(pid)
        self.children.pop(pid)

        if popen.returncode is None or not callable(getattr(
                os, 'waitid', None)):
            # It isn't reaped yet, or we have to reap all children to see if thay're done.
            # Before we reap it (if possible), kill its PID as a PGID to make sure
            # it isn't leaving children behind.
            # TODO: This is a PGID re-use risk on Mac because the process is
            # reaped already and the PGID may have been reused.
            try:
                os.killpg(pid, signal.SIGKILL)
            except ProcessLookupError:
                # It is dead already
                pass
            except PermissionError:
                # It isn't ours actually. Ours is dead.
                pass

        # See how the child did, and reap it.
        statusCode = popen.wait()
        if statusCode != 0 and not info.killIntended:
            log.error("Got exit code %i (indicating failure) "
                      "from job %s.", statusCode, self.jobs[jobID])
        if not info.killIntended:
            # Report if the job failed and we didn't kill it.
            # If we killed it then it shouldn't show up in the queue.
            self.outputQueue.put(
                UpdatedBatchJobInfo(jobID=jobID,
                                    exitStatus=statusCode,
                                    wallTime=time.time() - info.time,
                                    exitReason=None))

        # Make absolutely sure all processes in the group have received their
        # kill signals and been cleaned up.
        # TODO: this opens a PGID reuse risk; we reaped the process and its
        # PGID may have been re-used. But it probably hasn't been and we
        # definitely want to make sure all its children died before saying the
        # job is done. Some might not be dead yet if we don't do this.
        # TODO: can we safely do this before reaping? Or would we sit forever
        # signaling a dead but unreaped process?
        try:
            while True:
                # Send a kill to the group again, to see if anything in it
                # is still alive. Our first kill might not have been
                # delivered yet.
                os.killpg(pid, signal.SIGKILL)
                # If that worked it is still alive, so wait for the kernel
                # to stop fooling around and kill it.
                log.warning(
                    'Sent redundant job completion kill to surviving process group %s known to batch system %s',
                    pid, id(self))
                time.sleep(0.1)
        except ProcessLookupError:
            # It is dead already
            pass
        except PermissionError:
            # It isn't ours actually. Ours is dead.
            pass

        # Free up the job's resources.
        self.coreFractions.release(coreFractions)
        self.memory.release(jobMemory)
        self.disk.release(jobDisk)

        log.debug('Child %d for job %s succeeded', pid, jobID)

    def issueBatchJob(self, jobDesc):
        """Adds the command and resources to a queue to be run."""

        self._checkOnDaddy()

        # Round cores to minCores and apply scale.
        # Make sure to give minCores even if asked for 0 cores, or negative or something.
        cores = max(
            math.ceil(jobDesc.cores * self.scale / self.minCores) *
            self.minCores, self.minCores)

        # Don't do our own assertions about job size vs. our configured size.
        # The abstract batch system can handle it.
        self.checkResourceRequest(jobDesc.memory,
                                  cores,
                                  jobDesc.disk,
                                  job_name=jobDesc.jobName,
                                  detail=f'Scale is set to {self.scale}.')
        log.debug(
            f"Issuing the command: {jobDesc.command} with "
            f"memory: {jobDesc.memory}, cores: {cores}, disk: {jobDesc.disk}")
        with self.jobIndexLock:
            jobID = self.jobIndex
            self.jobIndex += 1
        self.jobs[jobID] = jobDesc.command

        if self.debugWorker:
            # Run immediately, blocking for return.
            # Ignore resource requirements; we run one job at a time
            self._runDebugJob(jobDesc.command, jobID, self.environment.copy())
        else:
            # Queue the job for later
            self.inputQueue.put(
                (jobDesc.command, jobID, cores, jobDesc.memory, jobDesc.disk,
                 self.environment.copy()))

        return jobID

    def killBatchJobs(self, jobIDs: Sequence[str]) -> None:
        """Kills jobs by ID."""

        self._checkOnDaddy()

        log.debug('Killing jobs: {}'.format(jobIDs))

        # Collect the popen handles for the jobs we have to stop
        popens: List[subprocess.Popen] = []

        for jobID in jobIDs:
            if jobID in self.runningJobs:
                info = self.runningJobs[jobID]
                info.killIntended = True
                if info.popen is not None:
                    popens.append(info.popen)
                else:
                    # No popen if running in forkless mode currently
                    assert self.debugWorker
                    log.critical("Can't kill job: %s in debug mode" % jobID)

        # Stop them all in a batch. Don't reap, because we need the daddy
        # thread to reap them to mark the jobs as not running anymore.
        self._stop_now(popens)

        for jobID in jobIDs:
            while jobID in self.runningJobs:
                # Wait for the daddy thread to collect them.
                time.sleep(0.01)

    def getIssuedBatchJobIDs(self):
        """Just returns all the jobs that have been run, but not yet returned as updated."""

        self._checkOnDaddy()

        return list(self.jobs.keys())

    def getRunningBatchJobIDs(self):

        self._checkOnDaddy()

        now = time.time()
        return {
            jobID: now - info.time
            for jobID, info in list(self.runningJobs.items())
        }

    def shutdown(self):
        """
        Cleanly terminate and join daddy thread.
        """

        if self.daddyThread is not None:
            # Tell the daddy thread to stop.
            self.shuttingDown.set()
            # Wait for it to stop.
            self.daddyThread.join()

        BatchSystemSupport.workerCleanup(self.workerCleanupInfo)

    def getUpdatedBatchJob(self, maxWait):
        """Returns a tuple of a no-longer-running job, the return value of its process, and its runtime, or None."""

        self._checkOnDaddy()

        try:
            item = self.outputQueue.get(timeout=maxWait)
        except Empty:
            return None
        self.jobs.pop(item.jobID)
        log.debug("Ran jobID: %s with exit value: %i", item.jobID,
                  item.exitStatus)
        return item

    @classmethod
    def setOptions(cls, setOption):
        setOption("scale", default=1)
Esempio n. 6
0
    def testNestedResourcesDoNotBlock(self):
        """
        Resources are requested in the order Memory > Cpu > Disk.
        Test that inavailability of cpus for one job that is scheduled does not block another job
        that can run.
        """
        tempDir = self._createTempDir('testFiles')

        options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
        options.workDir = tempDir
        options.maxCores = 4
        from toil import physicalMemory
        availableMemory = physicalMemory()
        options.batchSystem = self.batchSystemName

        outFile = os.path.join(tempDir, 'counter')
        open(outFile, 'w').close()

        root = Job()

        blocker = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=30, writeVal='b',
                             cores=2, memory='1M', disk='1M')
        firstJob = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=5, writeVal='fJ',
                              cores=1, memory='1M', disk='1M')
        secondJob = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=10,
                               writeVal='sJ', cores=1, memory='1M', disk='1M')

        # Should block off 50% of memory while waiting for it's 3 cores
        firstJobChild = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=0,
                                   writeVal='fJC', cores=3, memory=int(availableMemory/2), disk='1M')

        # These two shouldn't be able to run before B because there should be only
        # (50% of memory - 1M) available (firstJobChild should be blocking 50%)
        secondJobChild = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=5,
                                    writeVal='sJC', cores=2, memory=int(availableMemory/1.5),
                                    disk='1M')
        secondJobGrandChild = Job.wrapFn(_resourceBlockTestAuxFn, outFile=outFile, sleepTime=5,
                                         writeVal='sJGC', cores=2, memory=int(availableMemory/1.5),
                                         disk='1M')

        root.addChild(blocker)
        root.addChild(firstJob)
        root.addChild(secondJob)

        firstJob.addChild(firstJobChild)
        secondJob.addChild(secondJobChild)

        secondJobChild.addChild(secondJobGrandChild)
        """
        The tree is:
                    root
                  /   |   \
                 b    fJ   sJ
                      |    |
                      fJC  sJC
                           |
                           sJGC
        But the order of execution should be
        root > b , fJ, sJ > sJC > sJGC > fJC
        since fJC cannot run till bl finishes but sJC and sJGC can(fJC blocked by disk). If the
        resource acquisition is written properly, then fJC which is scheduled before sJC and sJGC
        should not block them, and should only run after they finish.
        """
        Job.Runner.startToil(root, options)
        with open(outFile) as oFH:
            outString = oFH.read()
        # The ordering of b, fJ and sJ is non-deterministic since they are scheduled at the same
        # time. We look for all possible permutations.
        possibleStarts = tuple([''.join(x) for x in itertools.permutations(['b', 'fJ', 'sJ'])])
        assert outString.startswith(possibleStarts)
        assert outString.endswith('sJCsJGCfJC')
Esempio n. 7
0
class SingleMachineBatchSystem(BatchSystemSupport):
    """
    The interface for running jobs on a single machine, runs all the jobs you give it as they
    come in, but in parallel.
    """

    @classmethod
    def supportsAutoDeployment(cls):
        return False

    @classmethod
    def supportsWorkerCleanup(cls):
        return True

    numCores = multiprocessing.cpu_count()

    minCores = 0.1
    """
    The minimal fractional CPU. Tasks with a smaller core requirement will be rounded up to this
    value. One important invariant of this class is that each worker thread represents a CPU
    requirement of minCores, meaning that we can never run more than numCores / minCores jobs
    concurrently.
    """
    physicalMemory = toil.physicalMemory()

    def __init__(self, config, maxCores, maxMemory, maxDisk):
        if maxCores > self.numCores:
            log.warn('Limiting maxCores to CPU count of system (%i).', self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            log.warn('Limiting maxMemory to physically available memory (%i).', self.physicalMemory)
            maxMemory = self.physicalMemory
        self.physicalDisk = toil.physicalDisk(config)
        if maxDisk > self.physicalDisk:
            log.warn('Limiting maxDisk to physically available disk (%i).', self.physicalDisk)
            maxDisk = self.physicalDisk
        super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale
        # Number of worker threads that will be started
        self.debugWorker = config.debugWorker
        self.numWorkers = int(old_div(self.maxCores, self.minCores))
        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()
        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs = {}
        """
        :type: dict[str,toil.job.JobNode]
        """
        # A queue of jobs waiting to be executed. Consumed by the workers.
        self.inputQueue = Queue()
        # A queue of finished jobs. Produced by the workers.
        self.outputQueue = Queue()
        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs = {}
        """
        :type: dict[str,Info]
        """
        # The list of worker threads
        self.workerThreads = []
        """
        :type list[Thread]
        """
        # Variables involved with non-blocking resource acquisition
        self.acquisitionTimeout = 5
        self.acquisitionRetryDelay = 10
        self.aquisitionCondition = Condition()

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(self.numWorkers, 'cores', self.acquisitionTimeout)
        # A lock to work around the lack of thread-safety in Python's subprocess module
        self.popenLock = Lock()
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory', self.acquisitionTimeout)
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk', self.acquisitionTimeout)

        if not self.debugWorker:
            log.debug('Setting up the thread pool with %i workers, '
                     'given a minimum CPU fraction of %f '
                     'and a maximum CPU value of %i.', self.numWorkers, self.minCores, maxCores)
            for i in range(self.numWorkers):
                worker = Thread(target=self.worker, args=(self.inputQueue,))
                self.workerThreads.append(worker)
                worker.start()
        else:
            log.debug('Started in worker debug mode.')

    def _runWorker(self, jobCommand, jobID, environment):
        """
        Run the jobCommand using the worker and wait for it to finish.
        The worker is forked unless it is a '_toil_worker' job and
        debugWorker is True.
        """
        startTime = time.time()  # Time job is started
        if self.debugWorker and "_toil_worker" in jobCommand:
            # Run the worker without forking
            jobName, jobStoreLocator, jobStoreID = jobCommand.split()[1:] # Parse command
            jobStore = Toil.resumeJobStore(jobStoreLocator)
            # TODO: The following does not yet properly populate self.runningJobs so it is not possible to kill
            # running jobs in forkless mode - see the "None" value in place of popen
            info = Info(time.time(), None, killIntended=False)
            try:
                self.runningJobs[jobID] = info
                try:
                    toil_worker.workerScript(jobStore, jobStore.config, jobName, jobStoreID, 
                                             redirectOutputToLogFile=not self.debugWorker) # Call the worker
                finally:
                    self.runningJobs.pop(jobID)
            finally:
                if not info.killIntended:
                    self.outputQueue.put((jobID, 0, time.time() - startTime))
        else:
            with self.popenLock:
                popen = subprocess.Popen(jobCommand,
                                         shell=True,
                                         env=dict(os.environ, **environment))
            info = Info(time.time(), popen, killIntended=False)
            try:
                self.runningJobs[jobID] = info
                try:
                    statusCode = popen.wait()
                    if statusCode != 0 and not info.killIntended:
                        log.error("Got exit code %i (indicating failure) "
                                  "from job %s.", statusCode, self.jobs[jobID])
                finally:
                    self.runningJobs.pop(jobID)
            finally:
                if not info.killIntended:
                    self.outputQueue.put((jobID, statusCode, time.time() - startTime))
        
    # Note: The input queue is passed as an argument because the corresponding attribute is reset
    # to None in shutdown()

    def worker(self, inputQueue):
        while True:
            if self.debugWorker and inputQueue.empty():
                return
            args = inputQueue.get()
            if args is None:
                break
            jobCommand, jobID, jobCores, jobMemory, jobDisk, environment = args
            while True:
                try:
                    coreFractions = int(old_div(jobCores, self.minCores))
                    log.debug('Acquiring %i bytes of memory from a pool of %s.', jobMemory,
                              self.memory)
                    with self.memory.acquisitionOf(jobMemory):
                        log.debug('Acquiring %i fractional cores from a pool of %s to satisfy a '
                                  'request of %f cores', coreFractions, self.coreFractions,
                                  jobCores)
                        with self.coreFractions.acquisitionOf(coreFractions):
                            with self.disk.acquisitionOf(jobDisk):
                                self._runWorker(jobCommand, jobID, environment)

                except ResourcePool.AcquisitionTimeoutException as e:
                    log.debug('Could not acquire enough (%s) to run job (%s). Requested: (%s), '
                              'Avaliable: %s. Sleeping for 10s.', e.resource, jobID, e.requested,
                              e.available)
                    with self.aquisitionCondition:
                        # Make threads sleep for the given delay, or until another job finishes.
                        # Whichever is sooner.
                        self.aquisitionCondition.wait(timeout=self.acquisitionRetryDelay)
                    continue
                else:
                    log.debug('Finished job. self.coreFractions ~ %s and self.memory ~ %s',
                              self.coreFractions.value, self.memory.value)
                    with self.aquisitionCondition:
                        # Wake up sleeping threads
                        self.aquisitionCondition.notifyAll()
                    break

    def issueBatchJob(self, jobNode):
        """
        Adds the command and resources to a queue to be run.
        """
        # Round cores to minCores and apply scale
        cores = math.ceil(jobNode.cores * self.scale / self.minCores) * self.minCores
        assert cores <= self.maxCores, ('The job {} is requesting {} cores, more than the maximum of '
                                        '{} cores this batch system was configured with. Scale is '
                                        'set to {}.'.format(jobNode.jobName, cores, self.maxCores, self.scale))
        assert cores >= self.minCores
        assert jobNode.memory <= self.maxMemory, ('The job {} is requesting {} bytes of memory, more than '
                                          'the maximum of {} this batch system was configured '
                                          'with.'.format(jobNode.jobName, jobNode.memory, self.maxMemory))

        self.checkResourceRequest(jobNode.memory, cores, jobNode.disk)
        log.debug("Issuing the command: %s with memory: %i, cores: %i, disk: %i" % (
            jobNode.command, jobNode.memory, cores, jobNode.disk))
        with self.jobIndexLock:
            jobID = self.jobIndex
            self.jobIndex += 1
        self.jobs[jobID] = jobNode.command
        self.inputQueue.put((jobNode.command, jobID, cores, jobNode.memory,
                             jobNode.disk, self.environment.copy()))
        if self.debugWorker:  # then run immediately, blocking for return
            self.worker(self.inputQueue)
        return jobID

    def killBatchJobs(self, jobIDs):
        """
        Kills jobs by ID
        """
        log.debug('Killing jobs: {}'.format(jobIDs))
        for jobID in jobIDs:
            if jobID in self.runningJobs:
                info = self.runningJobs[jobID]
                info.killIntended = True
                if info.popen != None:
                    os.kill(info.popen.pid, 9)
                else:
                    # No popen if running in forkless mode currently 
                    assert self.debugWorker
                    log.critical("Can't kill job: %s in debug mode" % jobID)
                while jobID in self.runningJobs:
                    pass

    def getIssuedBatchJobIDs(self):
        """
        Just returns all the jobs that have been run, but not yet returned as updated.
        """
        return list(self.jobs.keys())

    def getRunningBatchJobIDs(self):
        now = time.time()
        return {jobID: now - info.time for jobID, info in list(self.runningJobs.items())}

    def shutdown(self):
        """
        Cleanly terminate worker threads. Add sentinels to inputQueue equal to maxThreads. Join
        all worker threads.
        """
        # Remove reference to inputQueue (raises exception if inputQueue is used after method call)
        inputQueue = self.inputQueue
        self.inputQueue = None
        for i in range(self.numWorkers):
            inputQueue.put(None)
        for thread in self.workerThreads:
            thread.join()
        BatchSystemSupport.workerCleanup(self.workerCleanupInfo)

    def getUpdatedBatchJob(self, maxWait):
        """
        Returns a map of the run jobs and the return value of their processes.
        """
        try:
            item = self.outputQueue.get(timeout=maxWait)
        except Empty:
            return None
        jobID, exitValue, wallTime = item
        jobCommand = self.jobs.pop(jobID)
        log.debug("Ran jobID: %s with exit value: %i", jobID, exitValue)
        return jobID, exitValue, wallTime

    @classmethod
    def setOptions(cls, setOption):
        setOption("scale", default=1)
Esempio n. 8
0
class SingleMachineBatchSystem(BatchSystemSupport):
    """
    The interface for running jobs on a single machine, runs all the jobs you give it as they
    come in, but in parallel.
    """
    @classmethod
    def supportsHotDeployment(cls):
        return False

    @classmethod
    def supportsWorkerCleanup(cls):
        return True

    numCores = multiprocessing.cpu_count()

    minCores = 0.1
    """
    The minimal fractional CPU. Tasks with a smaller core requirement will be rounded up to this
    value. One important invariant of this class is that each worker thread represents a CPU
    requirement of minCores, meaning that we can never run more than numCores / minCores jobs
    concurrently.
    """
    physicalMemory = toil.physicalMemory()

    def __init__(self, config, maxCores, maxMemory, maxDisk):
        if maxCores > self.numCores:
            log.warn('Limiting maxCores to CPU count of system (%i).',
                     self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            log.warn('Limiting maxMemory to physically available memory (%i).',
                     self.physicalMemory)
            maxMemory = self.physicalMemory
        self.physicalDisk = toil.physicalDisk(config)
        if maxDisk > self.physicalDisk:
            log.warn('Limiting maxDisk to physically available disk (%i).',
                     self.physicalDisk)
            maxDisk = self.physicalDisk
        super(SingleMachineBatchSystem, self).__init__(config, maxCores,
                                                       maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale
        # Number of worker threads that will be started
        self.numWorkers = int(self.maxCores / self.minCores)
        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()
        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs = {}
        """
        :type: dict[str,toil.job.JobNode]
        """
        # A queue of jobs waiting to be executed. Consumed by the workers.
        self.inputQueue = Queue()
        # A queue of finished jobs. Produced by the workers.
        self.outputQueue = Queue()
        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs = {}
        """
        :type: dict[str,Info]
        """
        # The list of worker threads
        self.workerThreads = []
        """
        :type list[Thread]
        """
        # Variables involved with non-blocking resource acquisition
        self.acquisitionTimeout = 5
        self.acquisitionRetryDelay = 10
        self.aquisitionCondition = Condition()

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(self.numWorkers, 'cores',
                                          self.acquisitionTimeout)
        # A lock to work around the lack of thread-safety in Python's subprocess module
        self.popenLock = Lock()
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory',
                                   self.acquisitionTimeout)
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk', self.acquisitionTimeout)

        log.debug(
            'Setting up the thread pool with %i workers, '
            'given a minimum CPU fraction of %f '
            'and a maximum CPU value of %i.', self.numWorkers, self.minCores,
            maxCores)
        for i in xrange(self.numWorkers):
            worker = Thread(target=self.worker, args=(self.inputQueue, ))
            self.workerThreads.append(worker)
            worker.start()

    def _getDebugCmd(self, jobCommand):
        """Calculate useful debugging command line, handling CWL runs.

        Tries to print out underlying CWL base command being run if possible,
        otherwise defaulting to the toil jobCommand.
        """
        debug_cmd = jobCommand
        cmd_args = jobCommand.split()
        if len(cmd_args) == 3 and cmd_args[0] == "_toil_worker":
            _, job_store_locator, job_store_id = cmd_args
            if job_store_locator.startswith("file:") or os.path.exists(
                    job_store_locator):
                import cPickle
                import marshal as pickler
                job_store_locator = job_store_locator.replace("file:", "")
                job_cmd_file = os.path.join(job_store_locator, "tmp",
                                            job_store_id, "job")
                with open(job_cmd_file) as in_handle:
                    job = pickler.load(in_handle)
                if job.get("command"):
                    command_parts = job["command"].split()
                    input_pickle_file = os.path.join(job_store_locator, "tmp",
                                                     command_parts[1])
                    with open(input_pickle_file) as in_handle:
                        input = cPickle.load(in_handle)
                    if (hasattr(input, "cwltool")
                            and hasattr(input.cwltool, "tool")
                            and "baseCommand" in input.cwltool.tool):
                        debug_cmd = " ".join(input.cwltool.tool["baseCommand"])
        return debug_cmd

    # Note: The input queue is passed as an argument because the corresponding attribute is reset
    # to None in shutdown()

    def worker(self, inputQueue):
        while True:
            args = inputQueue.get()
            if args is None:
                log.debug('Received queue sentinel.')
                break
            jobCommand, jobID, jobCores, jobMemory, jobDisk, environment = args
            while True:
                try:
                    coreFractions = int(jobCores / self.minCores)
                    log.debug(
                        'Acquiring %i bytes of memory from a pool of %s.',
                        jobMemory, self.memory)
                    with self.memory.acquisitionOf(jobMemory):
                        log.debug(
                            'Acquiring %i fractional cores from a pool of %s to satisfy a '
                            'request of %f cores', coreFractions,
                            self.coreFractions, jobCores)
                        with self.coreFractions.acquisitionOf(coreFractions):
                            with self.disk.acquisitionOf(jobDisk):
                                log.debug("Executing command: '%s'.",
                                          self._getDebugCmd(jobCommand))
                                startTime = time.time()  #Time job is started
                                with self.popenLock:
                                    popen = subprocess.Popen(
                                        jobCommand,
                                        shell=True,
                                        env=dict(os.environ, **environment))
                                statusCode = None
                                info = Info(time.time(),
                                            popen,
                                            killIntended=False)
                                try:
                                    self.runningJobs[jobID] = info
                                    try:
                                        statusCode = popen.wait()
                                        if 0 != statusCode:
                                            if statusCode != -9 or not info.killIntended:
                                                log.error(
                                                    "Got exit code %i (indicating failure) "
                                                    "from job %s.", statusCode,
                                                    self.jobs[jobID])
                                    finally:
                                        self.runningJobs.pop(jobID)
                                finally:
                                    if statusCode is not None and not info.killIntended:
                                        self.outputQueue.put(
                                            (jobID, statusCode,
                                             time.time() - startTime))
                except ResourcePool.AcquisitionTimeoutException as e:
                    log.debug(
                        'Could not acquire enough (%s) to run job. Requested: (%s), '
                        'Avaliable: %s. Sleeping for 10s.', e.resource,
                        e.requested, e.available)
                    with self.aquisitionCondition:
                        # Make threads sleep for the given delay, or until another job finishes.
                        # Whichever is sooner.
                        self.aquisitionCondition.wait(
                            timeout=self.acquisitionRetryDelay)
                    continue
                else:
                    log.debug(
                        'Finished job. self.coreFractions ~ %s and self.memory ~ %s',
                        self.coreFractions.value, self.memory.value)
                    with self.aquisitionCondition:
                        # Wake up sleeping threads
                        self.aquisitionCondition.notifyAll()
                    break
        log.debug('Exiting worker thread normally.')

    def issueBatchJob(self, jobNode):
        """
        Adds the command and resources to a queue to be run.
        """
        # Round cores to minCores and apply scale
        cores = math.ceil(
            jobNode.cores * self.scale / self.minCores) * self.minCores
        assert cores <= self.maxCores, (
            'The job is requesting {} cores, more than the maximum of '
            '{} cores this batch system was configured with. Scale is '
            'set to {}.'.format(cores, self.maxCores, self.scale))
        assert cores >= self.minCores
        assert jobNode.memory <= self.maxMemory, (
            'The job is requesting {} bytes of memory, more than '
            'the maximum of {} this batch system was configured '
            'with.'.format(jobNode.memory, self.maxMemory))

        self.checkResourceRequest(jobNode.memory, cores, jobNode.disk)
        log.debug(
            "Issuing the command: %s with memory: %i, cores: %i, disk: %i" %
            (jobNode.command, jobNode.memory, cores, jobNode.disk))
        with self.jobIndexLock:
            jobID = self.jobIndex
            self.jobIndex += 1
        self.jobs[jobID] = jobNode.command
        self.inputQueue.put(
            (jobNode.command, jobID, cores, jobNode.memory, jobNode.disk,
             self.environment.copy()))
        return jobID

    def killBatchJobs(self, jobIDs):
        """
        Kills jobs by ID
        """
        log.debug('Killing jobs: {}'.format(jobIDs))
        for jobID in jobIDs:
            if jobID in self.runningJobs:
                info = self.runningJobs[jobID]
                info.killIntended = True
                os.kill(info.popen.pid, 9)
                while jobID in self.runningJobs:
                    pass

    def getIssuedBatchJobIDs(self):
        """
        Just returns all the jobs that have been run, but not yet returned as updated.
        """
        return self.jobs.keys()

    def getRunningBatchJobIDs(self):
        now = time.time()
        return {
            jobID: now - info.time
            for jobID, info in self.runningJobs.iteritems()
        }

    def shutdown(self):
        """
        Cleanly terminate worker threads. Add sentinels to inputQueue equal to maxThreads. Join
        all worker threads.
        """
        # Remove reference to inputQueue (raises exception if inputQueue is used after method call)
        inputQueue = self.inputQueue
        self.inputQueue = None
        for i in xrange(self.numWorkers):
            inputQueue.put(None)
        for thread in self.workerThreads:
            thread.join()
        BatchSystemSupport.workerCleanup(self.workerCleanupInfo)

    def getUpdatedBatchJob(self, maxWait):
        """
        Returns a map of the run jobs and the return value of their processes.
        """
        try:
            item = self.outputQueue.get(timeout=maxWait)
        except Empty:
            return None
        jobID, exitValue, wallTime = item
        jobCommand = self.jobs.pop(jobID)
        log.debug("Ran jobID: %s with exit value: %i", jobID, exitValue)
        return jobID, exitValue, wallTime

    @classmethod
    def getRescueBatchJobFrequency(cls):
        """
        This should not really occur, wihtout an error. To exercise the system we allow it every 90 minutes.
        """
        return 5400
Esempio n. 9
0
class SingleMachineBatchSystem(BatchSystemSupport):
    """
    The interface for running jobs on a single machine, runs all the jobs you
    give it as they come in, but in parallel.

    Uses a single "daddy" thread to manage a fleet of child processes.
    
    Communication with the daddy thread happens via two queues: one queue of
    jobs waiting to be run (the input queue), and one queue of jobs that are
    finished/stopped and need to be returned by getUpdatedBatchJob (the output
    queue).

    When the batch system is shut down, the daddy thread is stopped.

    If running in debug-worker mode, jobs are run immediately as they are sent
    to the batch system, in the sending thread, and the daddy thread is not
    run. But the queues are still used.
    """
    @classmethod
    def supportsAutoDeployment(cls):
        return False

    @classmethod
    def supportsWorkerCleanup(cls):
        return True

    numCores = cpu_count()

    minCores = 0.1
    """
    The minimal fractional CPU. Tasks with a smaller core requirement will be rounded up to this
    value. 
    """
    physicalMemory = toil.physicalMemory()

    def __init__(self, config, maxCores, maxMemory, maxDisk):
        if maxCores > self.numCores:
            log.warning('Limiting maxCores to CPU count of system (%i).',
                        self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            log.warning(
                'Limiting maxMemory to physically available memory (%i).',
                self.physicalMemory)
            maxMemory = self.physicalMemory
        self.physicalDisk = toil.physicalDisk(config)
        if maxDisk > self.physicalDisk:
            log.warning('Limiting maxDisk to physically available disk (%i).',
                        self.physicalDisk)
            maxDisk = self.physicalDisk
        super(SingleMachineBatchSystem, self).__init__(config, maxCores,
                                                       maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale

        if config.badWorker > 0 and config.debugWorker:
            # We can't throw SIGUSR1 at the worker because it is also going to
            # be the leader and/or test harness.
            raise RuntimeError(
                "Cannot use badWorker and debugWorker together; "
                "worker would have to kill the leader")

        self.debugWorker = config.debugWorker

        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()

        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs = {}
        """
        :type: dict[str,toil.job.JobNode]
        """

        # A queue of jobs waiting to be executed. Consumed by the daddy thread.
        self.inputQueue = Queue()

        # A queue of finished jobs. Produced by the daddy thread.
        self.outputQueue = Queue()

        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs = {}
        """
        :type: dict[str,Info]
        """

        # These next two are only used outside debug-worker mode

        # A dict mapping PIDs to Popen objects for running jobs.
        # Jobs that don't fork are executed one at a time in the main thread.
        self.children = {}
        """
        :type: dict[int,subprocess.Popen]
        """
        # A dict mapping child PIDs to the Job IDs they are supposed to be running.
        self.childToJob = {}
        """
        :type: dict[int,str]
        """

        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(
            int(old_div(self.maxCores, self.minCores)), 'cores')
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory, 'memory')
        # A pool representing the available space in bytes
        self.disk = ResourcePool(self.maxDisk, 'disk')

        # We use this event to signal shutdown
        self.shuttingDown = Event()

        # A thread in charge of managing all our child processes.
        # Also takes care of resource accounting.
        self.daddyThread = None
        # If it breaks it will fill this in
        self.daddyException = None

        if self.debugWorker:
            log.debug('Started in worker debug mode.')
        else:
            self.daddyThread = Thread(target=self.daddy, daemon=True)
            self.daddyThread.start()
            log.debug('Started in normal mode.')

    def daddy(self):
        """
        Be the "daddy" thread.

        Our job is to look at jobs from the input queue.
        
        If a job fits in the available resources, we allocate resources for it
        and kick off a child process.

        We also check on our children.

        When a child finishes, we reap it, release its resources, and put its
        information in the output queue.
        """

        try:
            log.debug('Started daddy thread.')

            while not self.shuttingDown.is_set():
                # Main loop

                while not self.shuttingDown.is_set():
                    # Try to start as many jobs as we can try to start
                    try:
                        # Grab something from the input queue if available.
                        args = self.inputQueue.get_nowait()
                        jobCommand, jobID, jobCores, jobMemory, jobDisk, environment = args

                        coreFractions = int(old_div(jobCores, self.minCores))

                        # Try to start the child
                        result = self._startChild(jobCommand, jobID,
                                                  coreFractions, jobMemory,
                                                  jobDisk, environment)

                        log.debug('Tried to start job %s and got: %s', jobID,
                                  str(result))

                        if result is None:
                            # We did not get the resources to run this job.
                            # Requeue last, so we can look at the next job.
                            # TODO: Have some kind of condition the job can wait on,
                            # but without threads (queues for jobs needing
                            # cores/memory/disk individually)?
                            self.inputQueue.put(args)
                            break

                        # Otherwise it's a PID if it succeeded, or False if it couldn't
                        # start. But we don't care either way here.

                    except Empty:
                        # Nothing to run. Stop looking in the queue.
                        break

                # Now check on our children.
                for done_pid in self._pollForDoneChildrenIn(self.children):
                    # A child has actually finished.
                    # Clean up after it.
                    self._handleChild(done_pid)

                # Then loop again: start and collect more jobs.
                # TODO: It would be good to be able to wait on a new job or a finished child, whichever comes first.
                # For now we just sleep and loop.
                time.sleep(0.01)

            # When we get here, we are shutting down.

            for popen in self.children.values():
                # Kill all the children, going through popen to avoid signaling re-used PIDs.
                popen.kill()
            for popen in self.children.values():
                # Reap all the children
                popen.wait()

            # Then exit the thread.
            return
        except Exception as e:
            log.critical('Unhandled exception in daddy thread: %s',
                         traceback.format_exc())
            # Pass the exception back to the main thread so it can stop the next person who calls into us.
            self.daddyException = e
            raise

    def _checkOnDaddy(self):
        if self.daddyException is not None:
            # The daddy thread broke and we cannot do our job
            log.critical(
                'Propagating unhandled exception in daddy thread to main thread'
            )
            exc = self.daddyException
            self.daddyException = None
            raise exc

    def _pollForDoneChildrenIn(self, pid_to_popen):
        """
        See if any children represented in the given dict from PID to Popen
        object have finished.
        
        Return a collection of their PIDs.
        
        Guarantees that each child's exit code will be gettable via wait() on
        the child's Popen object (i.e. does not reap the child, unless via
        Popen).
        """

        # We keep our found PIDs in a set so we can work around waitid showing
        # us the same one repeatedly.
        ready = set()

        # Find the waitid function
        waitid = getattr(os, 'waitid', None)

        if callable(waitid):
            # waitid exists (not Mac)

            while True:
                # Poll for any child to have exit, but don't reap it. Leave reaping
                # to the Popen.
                # TODO: What if someone else in Toil wants to do this syscall?
                # TODO: Is this one-notification-per-done-child with WNOHANG? Or
                # can we miss some? Or do we see the same one repeatedly until it
                # is reaped?
                try:
                    siginfo = waitid(os.P_ALL, -1,
                                     os.WEXITED | os.WNOWAIT | os.WNOHANG)
                except ChildProcessError:
                    # This happens when there is nothing to wait on right now,
                    # instead of the weird C behavior of overwriting a field in
                    # a pointed-to struct.
                    siginfo = None
                if siginfo is not None and siginfo.si_pid in pid_to_popen and siginfo.si_pid not in ready:
                    # Something new finished
                    ready.add(siginfo.si_pid)
                else:
                    # Nothing we own that we haven't seen before has finished.
                    return ready
        else:
            # On Mac there's no waitid and no way to wait and not reap.
            # Fall back on polling all the Popen objects.
            # To make this vaguely efficient we have to return done children in
            # batches.
            for pid, popen in pid_to_popen.items():
                if popen.poll() is not None:
                    # Process is done
                    ready.add(pid)
                    log.debug('Child %d has stopped', pid)

            # Return all the done processes we found
            return ready

    def _runDebugJob(self, jobCommand, jobID, environment):
        """
        Run the jobCommand right now, in the current thread.
        May only be called in debug-worker mode.
        Assumes resources are available.
        """

        assert self.debugWorker

        # TODO: It is not possible to kill running jobs in forkless mode,
        # because they are run immediately in the main thread.
        info = Info(time.time(), None, None, killIntended=False)
        self.runningJobs[jobID] = info

        if jobCommand.startswith("_toil_worker "):
            # We can actually run in this thread
            jobName, jobStoreLocator, jobStoreID = jobCommand.split()[
                1:]  # Parse command
            jobStore = Toil.resumeJobStore(jobStoreLocator)
            toil_worker.workerScript(
                jobStore,
                jobStore.config,
                jobName,
                jobStoreID,
                redirectOutputToLogFile=not self.debugWorker
            )  # Call the worker
        else:
            # Run synchronously. If starting or running the command fails, let the exception stop us.
            subprocess.check_call(jobCommand,
                                  shell=True,
                                  env=dict(os.environ, **environment))

        self.runningJobs.pop(jobID)
        if not info.killIntended:
            self.outputQueue.put((jobID, 0, time.time() - info.time))

    def _startChild(self, jobCommand, jobID, coreFractions, jobMemory, jobDisk,
                    environment):
        """
        Start a child process for the given job.
        
        Allocate its required resources and save it and save it in our bookkeeping structures.

        If the job is started, returns its PID.
        If the job fails to start, reports it as failed and returns False.
        If the job cannot get the resources it needs to start, returns None.
        """

        # We fill this in if we manage to actually start the child.
        popen = None

        # This is when we started working on the job.
        startTime = time.time()

        # See if we can fit the job in our resource pools right now.
        if self.coreFractions.acquireNow(coreFractions):
            # We got some cores
            if self.memory.acquireNow(jobMemory):
                # We got some memory
                if self.disk.acquireNow(jobDisk):
                    # We got the final resource, disk.
                    # Actually run the job.
                    # When it finishes we will release what it was using.
                    # So it is important to not lose track of the child process.

                    try:
                        # Launch the job
                        popen = subprocess.Popen(jobCommand,
                                                 shell=True,
                                                 env=dict(
                                                     os.environ,
                                                     **environment))
                    except Exception:
                        # If the job can't start, make sure we release resources now
                        self.coreFractions.release(coreFractions)
                        self.memory.release(jobMemory)
                        self.disk.release(jobDisk)

                        log.error('Could not start job %s: %s', jobID,
                                  traceback.format_exc())

                        # Report as failed.
                        # TODO: what should the exit code be?
                        self.outputQueue.put((jobID, -1, 0))

                        # Free resources
                        self.coreFractions.release(coreFractions)
                        self.memory.release(jobMemory)
                        self.disk.release(jobDisk)

                        # Complain it broke.
                        return False
                    else:
                        # If the job did start, record it
                        self.children[popen.pid] = popen
                        # Make sure we can look it up by PID later
                        self.childToJob[popen.pid] = jobID
                        # Record that the job is running, and the resources it is using
                        info = Info(startTime,
                                    popen, (coreFractions, jobMemory, jobDisk),
                                    killIntended=False)
                        self.runningJobs[jobID] = info

                        log.debug('Launched job %s as child %d', jobID,
                                  popen.pid)

                        # Report success starting the job
                        # Note that if a PID were somehow 0 it would look like False
                        assert popen.pid != 0
                        return popen.pid
                else:
                    # We can't get disk, so free cores and memory
                    self.coreFractions.release(coreFractions)
                    self.memory.release(jobMemory)
                    log.debug('Not enough disk to run job %s', jobID)
            else:
                # Free cores, since we can't get memory
                self.coreFractions.release(coreFractions)
                log.debug('Not enough memory to run job %s', jobID)
        else:
            log.debug('Not enough cores to run job %s', jobID)

        # If we get here, we didn't succeed or fail starting the job.
        # We didn't manage to get the resources.
        # Report that.
        return None

    def _handleChild(self, pid):
        """
        Handle a child process PID that has finished.
        The PID must be for a child job we started.
        Not thread safe to run at the same time as we are making more children.

        Remove the child from our bookkeeping structures and free its resources.
        """

        # Look up the child
        popen = self.children[pid]
        jobID = self.childToJob[pid]
        info = self.runningJobs[jobID]

        # Unpack the job resources
        (coreFractions, jobMemory, jobDisk) = info.resources

        # Clean up our records of the job.
        self.runningJobs.pop(jobID)
        self.childToJob.pop(pid)
        self.children.pop(pid)

        # See how the child did, and reap it.
        statusCode = popen.wait()
        if statusCode != 0 and not info.killIntended:
            log.error("Got exit code %i (indicating failure) "
                      "from job %s.", statusCode, self.jobs[jobID])
        if not info.killIntended:
            # Report if the job failed and we didn't kill it.
            # If we killed it then it shouldn't show up in the queue.
            self.outputQueue.put((jobID, statusCode, time.time() - info.time))

        # Free up the job's resources.
        self.coreFractions.release(coreFractions)
        self.memory.release(jobMemory)
        self.disk.release(jobDisk)

        log.debug('Child %d for job %s succeeded', pid, jobID)

    def issueBatchJob(self, jobNode):
        """Adds the command and resources to a queue to be run."""

        self._checkOnDaddy()

        # Round cores to minCores and apply scale
        cores = math.ceil(
            jobNode.cores * self.scale / self.minCores) * self.minCores
        assert cores <= self.maxCores, (
            'The job {} is requesting {} cores, more than the maximum of '
            '{} cores this batch system was configured with. Scale is '
            'set to {}.'.format(jobNode.jobName, cores, self.maxCores,
                                self.scale))
        assert cores >= self.minCores
        assert jobNode.memory <= self.maxMemory, (
            'The job {} is requesting {} bytes of memory, more than '
            'the maximum of {} this batch system was configured '
            'with.'.format(jobNode.jobName, jobNode.memory, self.maxMemory))

        self.checkResourceRequest(jobNode.memory, cores, jobNode.disk)
        log.debug(
            "Issuing the command: %s with memory: %i, cores: %i, disk: %i" %
            (jobNode.command, jobNode.memory, cores, jobNode.disk))
        with self.jobIndexLock:
            jobID = self.jobIndex
            self.jobIndex += 1
        self.jobs[jobID] = jobNode.command

        if self.debugWorker:
            # Run immediately, blocking for return.
            # Ignore resource requirements; we run one job at a time
            self._runDebugJob(jobNode.command, jobID, self.environment.copy())
        else:
            # Queue the job for later
            self.inputQueue.put(
                (jobNode.command, jobID, cores, jobNode.memory, jobNode.disk,
                 self.environment.copy()))

        return jobID

    def killBatchJobs(self, jobIDs):
        """Kills jobs by ID."""

        self._checkOnDaddy()

        log.debug('Killing jobs: {}'.format(jobIDs))
        for jobID in jobIDs:
            if jobID in self.runningJobs:
                info = self.runningJobs[jobID]
                info.killIntended = True
                if info.popen != None:
                    log.debug('Send kill to PID %s', info.popen.pid)
                    info.popen.kill()
                    log.debug('Sent kill to PID %s', info.popen.pid)
                else:
                    # No popen if running in forkless mode currently
                    assert self.debugWorker
                    log.critical("Can't kill job: %s in debug mode" % jobID)
                while jobID in self.runningJobs:
                    pass

    def getIssuedBatchJobIDs(self):
        """Just returns all the jobs that have been run, but not yet returned as updated."""

        self._checkOnDaddy()

        return list(self.jobs.keys())

    def getRunningBatchJobIDs(self):

        self._checkOnDaddy()

        now = time.time()
        return {
            jobID: now - info.time
            for jobID, info in list(self.runningJobs.items())
        }

    def shutdown(self):
        """
        Cleanly terminate and join daddy thread.
        """

        if self.daddyThread is not None:
            # Tell the daddy thread to stop.
            self.shuttingDown.set()
            # Wait for it to stop.
            self.daddyThread.join()

        BatchSystemSupport.workerCleanup(self.workerCleanupInfo)

    def getUpdatedBatchJob(self, maxWait):
        """Returns a tuple of a no-longer-running job, the return value of its process, and its runtime, or None."""

        self._checkOnDaddy()

        try:
            item = self.outputQueue.get(timeout=maxWait)
        except Empty:
            return None
        jobID, exitValue, wallTime = item
        jobCommand = self.jobs.pop(jobID)
        log.debug("Ran jobID: %s with exit value: %i", jobID, exitValue)
        return jobID, exitValue, wallTime

    @classmethod
    def setOptions(cls, setOption):
        setOption("scale", default=1)
Esempio n. 10
0
class SingleMachineBatchSystem(BatchSystemSupport):
    """
    The interface for running jobs on a single machine, runs all the jobs you give it as they
    come in, but in parallel.
    """

    @classmethod
    def supportsHotDeployment(cls):
        return False

    @classmethod
    def supportsWorkerCleanup(cls):
        return True

    numCores = multiprocessing.cpu_count()

    minCores = 0.1
    """
    The minimal fractional CPU. Tasks with a smaller core requirement will be rounded up to this
    value. One important invariant of this class is that each worker thread represents a CPU
    requirement of minCores, meaning that we can never run more than numCores / minCores jobs
    concurrently.
    """
    physicalMemory = toil.physicalMemory()

    def __init__(self, config, maxCores, maxMemory, maxDisk):
        if maxCores > self.numCores:
            log.warn('Limiting maxCores to CPU count of system (%i).', self.numCores)
            maxCores = self.numCores
        if maxMemory > self.physicalMemory:
            log.warn('Limiting maxMemory to physically available memory (%i).', self.physicalMemory)
            maxMemory = self.physicalMemory
        super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk)
        assert self.maxCores >= self.minCores
        assert self.maxMemory >= 1

        # The scale allows the user to apply a factor to each task's cores requirement, thereby
        # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores
        # (scale > 1).
        self.scale = config.scale
        # Number of worker threads that will be started
        self.numWorkers = int(self.maxCores / self.minCores)
        # A counter to generate job IDs and a lock to guard it
        self.jobIndex = 0
        self.jobIndexLock = Lock()
        # A dictionary mapping IDs of submitted jobs to the command line
        self.jobs = {}
        """
        :type: dict[str,str]
        """
        # A queue of jobs waiting to be executed. Consumed by the workers.
        self.inputQueue = Queue()
        # A queue of finished jobs. Produced by the workers.
        self.outputQueue = Queue()
        # A dictionary mapping IDs of currently running jobs to their Info objects
        self.runningJobs = {}
        """
        :type: dict[str,Info]
        """
        # The list of worker threads
        self.workerThreads = []
        """
        :type list[Thread]
        """
        # A pool representing available CPU in units of minCores
        self.coreFractions = ResourcePool(self.numWorkers)
        # A lock to work around the lack of thread-safety in Python's subprocess module
        self.popenLock = Lock()
        # A pool representing available memory in bytes
        self.memory = ResourcePool(self.maxMemory)
        log.info('Setting up the thread pool with %i workers, '
                 'given a minimum CPU fraction of %f '
                 'and a maximum CPU value of %i.', self.numWorkers, self.minCores, maxCores)
        for i in xrange(self.numWorkers):
            worker = Thread(target=self.worker, args=(self.inputQueue,))
            self.workerThreads.append(worker)
            worker.start()

    # Note: The input queue is passed as an argument because the corresponding attribute is reset
    # to None in shutdown()

    def worker(self, inputQueue):
        while True:
            args = inputQueue.get()
            if args is None:
                log.debug('Received queue sentinel.')
                break
            jobCommand, jobID, jobCores, jobMemory, jobDisk, environment = args
            try:
                coreFractions = int(jobCores / self.minCores)
                log.debug('Acquiring %i bytes of memory from a pool of %s.', jobMemory, self.memory)
                with self.memory.acquisitionOf(jobMemory):
                    log.debug('Acquiring %i fractional cores from a pool of %s to satisfy a '
                              'request of %f cores', coreFractions, self.coreFractions, jobCores)
                    with self.coreFractions.acquisitionOf(coreFractions):
                        log.info("Executing command: '%s'.", jobCommand)
                        startTime = time.time() #Time job is started
                        with self.popenLock:
                            popen = subprocess.Popen(jobCommand,
                                                     shell=True,
                                                     env=dict(os.environ, **environment))
                        statusCode = None
                        info = Info(time.time(), popen, killIntended=False)
                        try:
                            self.runningJobs[jobID] = info
                            try:
                                statusCode = popen.wait()
                                if 0 != statusCode:
                                    if statusCode != -9 or not info.killIntended:
                                        log.error("Got exit code %i (indicating failure) from "
                                                  "command '%s'.", statusCode, jobCommand)
                            finally:
                                self.runningJobs.pop(jobID)
                        finally:
                            if statusCode is not None and not info.killIntended:
                                self.outputQueue.put((jobID, statusCode, time.time() - startTime))
            finally:
                log.debug('Finished job. self.coreFractions ~ %s and self.memory ~ %s',
                          self.coreFractions.value, self.memory.value)
        log.debug('Exiting worker thread normally.')

    def issueBatchJob(self, command, memory, cores, disk, preemptable):
        """
        Adds the command and resources to a queue to be run.
        """
        # Round cores to minCores and apply scale
        cores = math.ceil(cores * self.scale / self.minCores) * self.minCores
        assert cores <= self.maxCores, ('The job is requesting {} cores, more than the maximum of '
                                        '{} cores this batch system was configured with. Scale is '
                                        'set to {}.'.format(cores, self.maxCores, self.scale))
        assert cores >= self.minCores
        assert memory <= self.maxMemory, ('The job is requesting {} bytes of memory, more than '
                                          'the maximum of {} this batch system was configured '
                                          'with.'.format(memory, self.maxMemory))

        self.checkResourceRequest(memory, cores, disk)
        log.debug("Issuing the command: %s with memory: %i, cores: %i, disk: %i" % (
            command, memory, cores, disk))
        with self.jobIndexLock:
            jobID = self.jobIndex
            self.jobIndex += 1
        self.jobs[jobID] = command
        self.inputQueue.put((command, jobID, cores, memory, disk, self.environment.copy()))
        return jobID

    def killBatchJobs(self, jobIDs):
        """
        Kills jobs by ID
        """
        log.debug('Killing jobs: {}'.format(jobIDs))
        for jobID in jobIDs:
            if jobID in self.runningJobs:
                info = self.runningJobs[jobID]
                info.killIntended = True
                os.kill(info.popen.pid, 9)
                while jobID in self.runningJobs:
                    pass

    def getIssuedBatchJobIDs(self):
        """
        Just returns all the jobs that have been run, but not yet returned as updated.
        """
        return self.jobs.keys()

    def getRunningBatchJobIDs(self):
        now = time.time()
        return {jobID: now - info.time for jobID, info in self.runningJobs.iteritems()}

    def shutdown(self):
        """
        Cleanly terminate worker threads. Add sentinels to inputQueue equal to maxThreads. Join
        all worker threads.
        """
        # Remove reference to inputQueue (raises exception if inputQueue is used after method call)
        inputQueue = self.inputQueue
        self.inputQueue = None
        for i in xrange(self.numWorkers):
            inputQueue.put(None)
        for thread in self.workerThreads:
            thread.join()
        BatchSystemSupport.workerCleanup(self.workerCleanupInfo)

    def getUpdatedBatchJob(self, maxWait):
        """
        Returns a map of the run jobs and the return value of their processes.
        """
        try:
            item = self.outputQueue.get(timeout=maxWait)
        except Empty:
            return None
        jobID, exitValue, wallTime = item
        self.jobs.pop(jobID)
        log.debug("Ran jobID: %s with exit value: %i", jobID, exitValue)
        return jobID, exitValue, wallTime

    @classmethod
    def getRescueBatchJobFrequency(cls):
        """
        This should not really occur, wihtout an error. To exercise the system we allow it every 90 minutes.
        """
        return 5400