def _executeQuantaMP(self, iterable, butler, taskFactory): """Execute all Quanta in separate process pool. Parameters ---------- iterable : iterable of `~lsst.pipe.base.QuantumIterData` Sequence if Quanta to execute. It is guaranteed that re-requisites for a given Quantum will always appear before that Quantum. butler : `lsst.daf.butler.Butler` Data butler instance taskFactory : `~lsst.pipe.base.TaskFactory` Task factory. """ disableImplicitThreading() # To prevent thread contention pool = multiprocessing.Pool(processes=self.numProc, maxtasksperchild=1) # map quantum id to AsyncResult results = {} # Add each Quantum to a pool, wait until it pre-requisites completed. # TODO: This is not super-efficient as it stops at the first Quantum # that cannot be executed (yet) and does not check other Quanta. for qdata in iterable: # check that task can run in sub-process taskDef = qdata.taskDef if not taskDef.taskClass.canMultiprocess: raise MPGraphExecutorError( f"Task {taskDef.taskName} does not support multiprocessing;" " use single process") # Wait for all dependencies for dep in qdata.dependencies: # Wait for max. timeout for this result to be ready. # This can raise on timeout or if remote call raises. _LOG.debug("Check dependency %s for %s", dep, qdata) results[dep].get(self.timeout) _LOG.debug("Result %s is ready", dep) # Add it to the pool and remember its result _LOG.debug("Sumbitting %s", qdata) args = (taskDef.taskClass, taskDef.config, qdata.quantum, butler, taskFactory) results[qdata.quantumId] = pool.apply_async( self._executePipelineTask, args) # Everything is submitted, wait until it's complete _LOG.debug("Wait for all tasks") for qid, res in results.items(): if res.ready(): _LOG.debug("Result %d is ready", qid) else: _LOG.debug("Waiting for result %d", qid) res.get(self.timeout)
def __call__(self, function, iterable): """Apply function to every item of iterable. Wrapper around pool.map_async, to handle timeout. This is required so as to trigger an immediate interrupt on the KeyboardInterrupt (Ctrl-C); see http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool Further wraps the function in _poolFunctionWrapper to catch exceptions that don't inherit from Exception. """ disableImplicitThreading() # To prevent thread contention pool = multiprocessing.Pool(processes=self.numProc, maxtasksperchild=1) result = pool.map_async(function, iterable) return result.get(self.timeout)
def run(self, parsedCmd): """Run the task on all targets. Parameters ---------- parsedCmd : `argparse.Namespace` Parsed command `argparse.Namespace`. Returns ------- resultList : `list` A list of results returned by `TaskRunner.__call__`, or an empty list if `TaskRunner.__call__` is not called (e.g. if `TaskRunner.precall` returns `False`). See `TaskRunner.__call__` for details. Notes ----- The task is run under multiprocessing if `TaskRunner.numProcesses` is more than 1; otherwise processing is serial. """ resultList = [] disableImplicitThreading() # To prevent thread contention if self.numProcesses > 1: import multiprocessing self.prepareForMultiProcessing() pool = multiprocessing.Pool(processes=self.numProcesses, maxtasksperchild=1) mapFunc = functools.partial(_runPool, pool, self.timeout) else: pool = None mapFunc = map if self.precall(parsedCmd): profileName = parsedCmd.profile if hasattr(parsedCmd, "profile") else None log = parsedCmd.log targetList = self.getTargetList(parsedCmd) if len(targetList) > 0: with profile(profileName, log): # Run the task using self.__call__ resultList = list(mapFunc(self, targetList)) else: log.warn("Not running the task because there is no data to process; " "you may preview data using \"--show data\"") if pool is not None: pool.close() pool.join() return resultList
def testApi(self): from lsst.base import haveThreads self.assertIsInstance(haveThreads(), bool) from lsst.base import setNumThreads # noqa F401 # Raises lsst::base::NoThreadsException which is untranslated # when threading library is missing, this needs fixing! # self.assertIs(setNumThreads(4), None) from lsst.base import getNumThreads self.assertIsInstance(getNumThreads(), int) from lsst.base import disableImplicitThreading self.assertIsInstance(disableImplicitThreading(), bool)
def testApi(self): from lsst.base import haveThreads self.assertIsInstance(haveThreads(), bool) from lsst.base import setNumThreads # noqa F401 # Raises lsst::base::NoThreadsException which is untranslated # when threading library is missing, this needs fixing! # self.assertIs(setNumThreads(4), None) from lsst.base import getNumThreads self.assertIsInstance(getNumThreads(), (int, long)) from lsst.base import disableImplicitThreading self.assertIsInstance(disableImplicitThreading(), bool)
def _executeQuantaMP(self, graph, butler): """Execute all Quanta in separate processes. Parameters ---------- graph : `QuantumGraph` `QuantumGraph` that is to be executed. butler : `lsst.daf.butler.Butler` Data butler instance """ disableImplicitThreading() # To prevent thread contention _LOG.debug("Using %r for multiprocessing start method", self.startMethod) # re-pack input quantum data into jobs list jobs = _JobList(graph) # check that all tasks can run in sub-process for job in jobs.jobs: taskDef = job.qnode.taskDef if not taskDef.taskClass.canMultiprocess: raise MPGraphExecutorError(f"Task {taskDef.taskName} does not support multiprocessing;" " use single process") finishedCount, failedCount = 0, 0 while jobs.pending or jobs.running: _LOG.debug("#pendingJobs: %s", len(jobs.pending)) _LOG.debug("#runningJobs: %s", len(jobs.running)) # See if any jobs have finished for job in jobs.running: if not job.process.is_alive(): _LOG.debug("finished: %s", job) # finished exitcode = job.process.exitcode if exitcode == 0: jobs.setJobState(job, JobState.FINISHED) job.cleanup() _LOG.debug("success: %s took %.3f seconds", job, time.time() - job.started) else: jobs.setJobState(job, JobState.FAILED) job.cleanup() _LOG.debug("failed: %s", job) if self.failFast: for stopJob in jobs.running: if stopJob is not job: stopJob.stop() raise MPGraphExecutorError( f"Task {job} failed, exit code={exitcode}." ) else: _LOG.error( "Task %s failed; processing will continue for remaining tasks.", job ) else: # check for timeout now = time.time() if now - job.started > self.timeout: jobs.setJobState(job, JobState.TIMED_OUT) _LOG.debug("Terminating job %s due to timeout", job) job.stop() job.cleanup() if self.failFast: raise MPTimeoutError(f"Timeout ({self.timeout} sec) for task {job}.") else: _LOG.error( "Timeout (%s sec) for task %s; task is killed, processing continues " "for remaining tasks.", self.timeout, job ) # Fail jobs whose inputs failed, this may need several iterations # if the order is not right, will be done in the next loop. if jobs.failedNodes: for job in jobs.pending: jobInputNodes = graph.determineInputsToQuantumNode(job.qnode) if jobInputNodes & jobs.failedNodes: jobs.setJobState(job, JobState.FAILED_DEP) _LOG.error("Upstream job failed for task %s, skipping this task.", job) # see if we can start more jobs if len(jobs.running) < self.numProc: for job in jobs.pending: jobInputNodes = graph.determineInputsToQuantumNode(job.qnode) if jobInputNodes <= jobs.finishedNodes: # all dependencies have completed, can start new job if len(jobs.running) < self.numProc: _LOG.debug("Submitting %s", job) jobs.submit(job, butler, self.quantumExecutor, self.startMethod) if len(jobs.running) >= self.numProc: # cannot start any more jobs, wait until something finishes break # Do cleanup for timed out jobs if necessary. jobs.cleanup() # Print progress message if something changed. newFinished, newFailed = len(jobs.finishedNodes), len(jobs.failedNodes) if (finishedCount, failedCount) != (newFinished, newFailed): finishedCount, failedCount = newFinished, newFailed totalCount = len(jobs.jobs) _LOG.info("Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.", finishedCount, failedCount, totalCount - finishedCount - failedCount, totalCount) # Here we want to wait until one of the running jobs completes # but multiprocessing does not provide an API for that, for now # just sleep a little bit and go back to the loop. if jobs.running: time.sleep(0.1) if jobs.failedNodes: # print list of failed jobs _LOG.error("Failed jobs:") for job in jobs.jobs: if job.state != JobState.FINISHED: _LOG.error(" - %s: %s", job.state.name, job) # if any job failed raise an exception if jobs.failedNodes == jobs.timedOutNodes: raise MPTimeoutError("One or more tasks timed out during execution.") else: raise MPGraphExecutorError("One or more tasks failed or timed out during execution.")