def parallel_map ( func, iterable, iterable_type = single_argument, params=None, processes=1, method="multiprocessing", qsub_command=None, asynchronous=True, callback=None, preserve_order=True, preserve_exception_message=False, use_manager=False, stacktrace_handling = "ignore") : """ Generic parallel map() implementation for a variety of platforms, including the multiprocessing module and supported queuing systems, via the module libtbx.queuing_system_utils.scheduling. This is less flexible than pool_map above, since it does not provide a way to use a non-pickleable target function, but it provides a consistent API for programs where multiple execution methods are desired. It will also work on Windows (if the method is multiprocessing or threading). Note that for most applications, the threading method will be constrained by the Global Interpreter Lock, therefore multiprocessing is prefered for parallelizing across a single multi-core system. See Computational Crystallography Newsletter 3:37-42 (2012) for details of the underlying method. :param func: target function (must be pickleable) :param iterable: list of arguments for func :param processes: number of processes/threads to start :param method: parallelization method (multiprocessing|threading|sge|lsf|pbs) :param qsub_command: command to submit queue jobs (optional) :param asynchronous: run queue jobs asynchronously :param preserve_exception_message: keeps original exception message :returns: a list of result objects """ if (params is not None) : method = params.technology processes = params.nproc qsub_command = params.qsub_command from libtbx.utils import Sorry from libtbx.scheduling import SetupError if processes == 1 and "LIBTBX_FORCE_PARALLEL" not in os.environ: from libtbx.scheduling import mainthread creator = mainthread.creator else: from libtbx.scheduling import philgen from libtbx.scheduling import job_scheduler if method == "threading": technology = philgen.threading( capture_exception = preserve_exception_message, ) jfactory = technology.jfactory() qfactory = technology.qfactory()[0] capacity = job_scheduler.limited( njobs = get_processes( processes = processes ) ) elif method == "multiprocessing": technology = philgen.multiprocessing( capture_stderr = preserve_exception_message, qtype = philgen.mp_managed_queue if use_manager else philgen.mp_fifo_queue, ) jfactory = technology.jfactory() qfactory = technology.qfactory()[0] capacity = job_scheduler.limited( njobs = get_processes( processes = processes ) ) else: technology = philgen.cluster( asynchronous = asynchronous, capture_stderr = preserve_exception_message, ) assert method in technology.platforms # perhaps something less intrusive try: jfactory = technology.jfactory( platform = method, command = qsub_command ) except SetupError as e: raise Sorry, e from libtbx.scheduling import file_queue qfactory = file_queue.qfactory() if processes is Auto or processes is None: capacity = job_scheduler.unlimited else: capacity = job_scheduler.limited( njobs = processes ) creator = job_scheduler.creator( job_factory = jfactory, queue_factory = qfactory, capacity = capacity, ) import libtbx.scheduling if stacktrace_handling == "ignore": sthandler = libtbx.scheduling.ignore elif stacktrace_handling == "excepthook": sthandler = libtbx.scheduling.excepthook elif stacktrace_handling == "decorate": sthandler = libtbx.scheduling.decorate else: raise Sorry, "Unknown stacktrace handling method: %s" % stacktrace_handling from libtbx.scheduling import parallel_for if callback is None: callback = lambda r: None results = [] with libtbx.scheduling.holder( creator = creator, stacktrace = sthandler ) as manager: adfunc = iterable_type( func ) try: pfi = parallel_for.iterator( calculations = ( ( adfunc, ( args, ), {} ) for args in iterable ), manager = manager, keep_input_order = preserve_order, ) for ( calc, res ) in pfi: result = res() results.append( result ) callback( result ) except SetupError as e: raise Sorry, e manager.shutdown() manager.join() return results
def parallel_map ( func, iterable, iterable_type = single_argument, params=None, processes=1, method="multiprocessing", qsub_command=None, asynchronous=True, callback=None, preserve_order=True, preserve_exception_message=False, use_manager=False, stacktrace_handling = "ignore") : """ Generic parallel map() implementation for a variety of platforms, including the multiprocessing module and supported queuing systems, via the module libtbx.queuing_system_utils.scheduling. This is less flexible than pool_map above, since it does not provide a way to use a non-pickleable target function, but it provides a consistent API for programs where multiple execution methods are desired. It will also work on Windows (if the method is multiprocessing or threading). Note that for most applications, the threading method will be constrained by the Global Interpreter Lock, therefore multiprocessing is prefered for parallelizing across a single multi-core system. See Computational Crystallography Newsletter 3:37-42 (2012) for details of the underlying method. :param func: target function (must be pickleable) :param iterable: list of arguments for func :param processes: number of processes/threads to start :param method: parallelization method (multiprocessing|threading|sge|lsf|pbs) :param qsub_command: command to submit queue jobs (optional) :param asynchronous: run queue jobs asynchronously :param preserve_exception_message: keeps original exception message :returns: a list of result objects """ if (params is not None) : method = params.technology processes = params.nproc qsub_command = params.qsub_command from libtbx.utils import Sorry from libtbx.scheduling import SetupError if processes == 1 and "LIBTBX_FORCE_PARALLEL" not in os.environ: from libtbx.scheduling import mainthread creator = mainthread.creator else: from libtbx.scheduling import philgen from libtbx.scheduling import job_scheduler if method == "threading": technology = philgen.threading( capture_exception = preserve_exception_message, ) jfactory = technology.jfactory() qfactory = technology.qfactory()[0] capacity = job_scheduler.limited( njobs = get_processes( processes = processes ) ) elif method == "multiprocessing": technology = philgen.multiprocessing( capture_stderr = preserve_exception_message, qtype = philgen.mp_managed_queue if use_manager else philgen.mp_fifo_queue, ) jfactory = technology.jfactory() qfactory = technology.qfactory()[0] capacity = job_scheduler.limited( njobs = get_processes( processes = processes ) ) else: technology = philgen.cluster( asynchronous = asynchronous, capture_stderr = preserve_exception_message, ) assert method in technology.platforms # perhaps something less intrusive try: jfactory = technology.jfactory( platform = method, command = qsub_command ) except SetupError, e: raise Sorry, e from libtbx.scheduling import file_queue qfactory = file_queue.qfactory() if processes is Auto or processes is None: capacity = job_scheduler.unlimited else: capacity = job_scheduler.limited( njobs = processes ) creator = job_scheduler.creator( job_factory = jfactory, queue_factory = qfactory, capacity = capacity, )