def _executeQuantaMP(self, iterable, butler, taskFactory):
        """Execute all Quanta in separate process pool.

        Parameters
        ----------
        iterable : iterable of `~lsst.pipe.base.QuantumIterData`
            Sequence if Quanta to execute. It is guaranteed that re-requisites
            for a given Quantum will always appear before that Quantum.
        butler : `lsst.daf.butler.Butler`
            Data butler instance
        taskFactory : `~lsst.pipe.base.TaskFactory`
            Task factory.
        """

        disableImplicitThreading()  # To prevent thread contention

        pool = multiprocessing.Pool(processes=self.numProc, maxtasksperchild=1)

        # map quantum id to AsyncResult
        results = {}

        # Add each Quantum to a pool, wait until it pre-requisites completed.
        # TODO: This is not super-efficient as it stops at the first Quantum
        # that cannot be executed (yet) and does not check other Quanta.
        for qdata in iterable:

            # check that task can run in sub-process
            taskDef = qdata.taskDef
            if not taskDef.taskClass.canMultiprocess:
                raise MPGraphExecutorError(
                    f"Task {taskDef.taskName} does not support multiprocessing;"
                    " use single process")

            # Wait for all dependencies
            for dep in qdata.dependencies:
                # Wait for max. timeout for this result to be ready.
                # This can raise on timeout or if remote call raises.
                _LOG.debug("Check dependency %s for %s", dep, qdata)
                results[dep].get(self.timeout)
                _LOG.debug("Result %s is ready", dep)

            # Add it to the pool and remember its result
            _LOG.debug("Sumbitting %s", qdata)
            args = (taskDef.taskClass, taskDef.config, qdata.quantum, butler,
                    taskFactory)
            results[qdata.quantumId] = pool.apply_async(
                self._executePipelineTask, args)

        # Everything is submitted, wait until it's complete
        _LOG.debug("Wait for all tasks")
        for qid, res in results.items():
            if res.ready():
                _LOG.debug("Result %d is ready", qid)
            else:
                _LOG.debug("Waiting for result %d", qid)
                res.get(self.timeout)
Exemple #2
0
    def __call__(self, function, iterable):
        """Apply function to every item of iterable.

        Wrapper around pool.map_async, to handle timeout. This is required
        so as to trigger an immediate interrupt on the KeyboardInterrupt
        (Ctrl-C); see
        http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool

        Further wraps the function in _poolFunctionWrapper to catch exceptions
        that don't inherit from Exception.
        """
        disableImplicitThreading()  # To prevent thread contention
        pool = multiprocessing.Pool(processes=self.numProc, maxtasksperchild=1)
        result = pool.map_async(function, iterable)
        return result.get(self.timeout)
Exemple #3
0
    def __call__(self, function, iterable):
        """Apply function to every item of iterable.

        Wrapper around pool.map_async, to handle timeout. This is required
        so as to trigger an immediate interrupt on the KeyboardInterrupt
        (Ctrl-C); see
        http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool

        Further wraps the function in _poolFunctionWrapper to catch exceptions
        that don't inherit from Exception.
        """
        disableImplicitThreading()  # To prevent thread contention
        pool = multiprocessing.Pool(processes=self.numProc, maxtasksperchild=1)
        result = pool.map_async(function, iterable)
        return result.get(self.timeout)
    def run(self, parsedCmd):
        """Run the task on all targets.

        Parameters
        ----------
        parsedCmd : `argparse.Namespace`
            Parsed command `argparse.Namespace`.

        Returns
        -------
        resultList : `list`
            A list of results returned by `TaskRunner.__call__`, or an empty list if `TaskRunner.__call__`
            is not called (e.g. if `TaskRunner.precall` returns `False`). See `TaskRunner.__call__`
            for details.

        Notes
        -----
        The task is run under multiprocessing if `TaskRunner.numProcesses` is more than 1; otherwise
        processing is serial.
        """
        resultList = []
        disableImplicitThreading()  # To prevent thread contention
        if self.numProcesses > 1:
            import multiprocessing
            self.prepareForMultiProcessing()
            pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=1)
            mapFunc = functools.partial(_runPool, pool, self.timeout)
        else:
            pool = None
            mapFunc = map

        if self.precall(parsedCmd):
            profileName = parsedCmd.profile if hasattr(parsedCmd, "profile") else None
            log = parsedCmd.log
            targetList = self.getTargetList(parsedCmd)
            if len(targetList) > 0:
                with profile(profileName, log):
                    # Run the task using self.__call__
                    resultList = list(mapFunc(self, targetList))
            else:
                log.warn("Not running the task because there is no data to process; "
                         "you may preview data using \"--show data\"")

        if pool is not None:
            pool.close()
            pool.join()

        return resultList
    def testApi(self):
        from lsst.base import haveThreads
        self.assertIsInstance(haveThreads(), bool)

        from lsst.base import setNumThreads  # noqa F401
# Raises lsst::base::NoThreadsException which is untranslated
# when threading library is missing, this needs fixing!
#        self.assertIs(setNumThreads(4), None)

        from lsst.base import getNumThreads
        self.assertIsInstance(getNumThreads(), int)

        from lsst.base import disableImplicitThreading
        self.assertIsInstance(disableImplicitThreading(), bool)
    def testApi(self):
        from lsst.base import haveThreads
        self.assertIsInstance(haveThreads(), bool)

        from lsst.base import setNumThreads  # noqa F401
        # Raises lsst::base::NoThreadsException which is untranslated
        # when threading library is missing, this needs fixing!
        #        self.assertIs(setNumThreads(4), None)

        from lsst.base import getNumThreads
        self.assertIsInstance(getNumThreads(), (int, long))

        from lsst.base import disableImplicitThreading
        self.assertIsInstance(disableImplicitThreading(), bool)
    def _executeQuantaMP(self, graph, butler):
        """Execute all Quanta in separate processes.

        Parameters
        ----------
        graph : `QuantumGraph`
            `QuantumGraph` that is to be executed.
        butler : `lsst.daf.butler.Butler`
            Data butler instance
        """

        disableImplicitThreading()  # To prevent thread contention

        _LOG.debug("Using %r for multiprocessing start method", self.startMethod)

        # re-pack input quantum data into jobs list
        jobs = _JobList(graph)

        # check that all tasks can run in sub-process
        for job in jobs.jobs:
            taskDef = job.qnode.taskDef
            if not taskDef.taskClass.canMultiprocess:
                raise MPGraphExecutorError(f"Task {taskDef.taskName} does not support multiprocessing;"
                                           " use single process")

        finishedCount, failedCount = 0, 0
        while jobs.pending or jobs.running:

            _LOG.debug("#pendingJobs: %s", len(jobs.pending))
            _LOG.debug("#runningJobs: %s", len(jobs.running))

            # See if any jobs have finished
            for job in jobs.running:
                if not job.process.is_alive():
                    _LOG.debug("finished: %s", job)
                    # finished
                    exitcode = job.process.exitcode
                    if exitcode == 0:
                        jobs.setJobState(job, JobState.FINISHED)
                        job.cleanup()
                        _LOG.debug("success: %s took %.3f seconds", job, time.time() - job.started)
                    else:
                        jobs.setJobState(job, JobState.FAILED)
                        job.cleanup()
                        _LOG.debug("failed: %s", job)
                        if self.failFast:
                            for stopJob in jobs.running:
                                if stopJob is not job:
                                    stopJob.stop()
                            raise MPGraphExecutorError(
                                f"Task {job} failed, exit code={exitcode}."
                            )
                        else:
                            _LOG.error(
                                "Task %s failed; processing will continue for remaining tasks.", job
                            )
                else:
                    # check for timeout
                    now = time.time()
                    if now - job.started > self.timeout:
                        jobs.setJobState(job, JobState.TIMED_OUT)
                        _LOG.debug("Terminating job %s due to timeout", job)
                        job.stop()
                        job.cleanup()
                        if self.failFast:
                            raise MPTimeoutError(f"Timeout ({self.timeout} sec) for task {job}.")
                        else:
                            _LOG.error(
                                "Timeout (%s sec) for task %s; task is killed, processing continues "
                                "for remaining tasks.", self.timeout, job
                            )

            # Fail jobs whose inputs failed, this may need several iterations
            # if the order is not right, will be done in the next loop.
            if jobs.failedNodes:
                for job in jobs.pending:
                    jobInputNodes = graph.determineInputsToQuantumNode(job.qnode)
                    if jobInputNodes & jobs.failedNodes:
                        jobs.setJobState(job, JobState.FAILED_DEP)
                        _LOG.error("Upstream job failed for task %s, skipping this task.", job)

            # see if we can start more jobs
            if len(jobs.running) < self.numProc:
                for job in jobs.pending:
                    jobInputNodes = graph.determineInputsToQuantumNode(job.qnode)
                    if jobInputNodes <= jobs.finishedNodes:
                        # all dependencies have completed, can start new job
                        if len(jobs.running) < self.numProc:
                            _LOG.debug("Submitting %s", job)
                            jobs.submit(job, butler, self.quantumExecutor, self.startMethod)
                        if len(jobs.running) >= self.numProc:
                            # cannot start any more jobs, wait until something finishes
                            break

            # Do cleanup for timed out jobs if necessary.
            jobs.cleanup()

            # Print progress message if something changed.
            newFinished, newFailed = len(jobs.finishedNodes), len(jobs.failedNodes)
            if (finishedCount, failedCount) != (newFinished, newFailed):
                finishedCount, failedCount = newFinished, newFailed
                totalCount = len(jobs.jobs)
                _LOG.info("Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
                          finishedCount, failedCount, totalCount - finishedCount - failedCount, totalCount)

            # Here we want to wait until one of the running jobs completes
            # but multiprocessing does not provide an API for that, for now
            # just sleep a little bit and go back to the loop.
            if jobs.running:
                time.sleep(0.1)

        if jobs.failedNodes:
            # print list of failed jobs
            _LOG.error("Failed jobs:")
            for job in jobs.jobs:
                if job.state != JobState.FINISHED:
                    _LOG.error("  - %s: %s", job.state.name, job)

            # if any job failed raise an exception
            if jobs.failedNodes == jobs.timedOutNodes:
                raise MPTimeoutError("One or more tasks timed out during execution.")
            else:
                raise MPGraphExecutorError("One or more tasks failed or timed out during execution.")