def multi_core_run( myfunction, argstuples, nproc ): """ Run myfunction on many cpu cores using multiprocessing. A simplified version of parallel_map() above myfunction: name of the function to be parallelised, argstuples: list of tuples of associated input arguments, nproc: number of cores to run on. Both myfunction and its input arguments must be pickleable. Output is an iterator where each element is a tuple that contains: a tuple of arguments for one particular calculation with myfunction, the result of this calculation, the stacktrace if myfunction crashed Example: # define RunMyJob() in a file testjob.py def RunMyJob( foo, bar): import math return math.sqrt(foo)/bar # then one can start RunMyJob in parallel like: >>> import testjob >>> from libtbx import easy_mp >>> >>> argstuples = [( 3, 4), (2, 3) ] # define tuples of arguments >>> >>> for args, res, errstr in easy_mp.multi_core_run( testjob.RunMyJob, argstuples, 2): ... print "arguments: %s \nresult: %s \nerrorstring: %s\n" %(args, res, errstr) ... arguments: (2, 3) result: 0.471404520791 errorstring: None arguments: (3, 4) result: 0.433012701892 errorstring: None >>> """ from libtbx.scheduling import philgen from libtbx.scheduling import job_scheduler from libtbx.scheduling import parallel_for import libtbx.scheduling from libtbx.scheduling import stacktrace technology = philgen.multiprocessing( capture_stderr = True, # catch each individual error message and stack trace qtype = philgen.mp_managed_queue, ) jfactory = technology.jfactory() qfactory = technology.qfactory()[0] capacity = job_scheduler.limited( njobs = get_processes( processes = nproc ) ) creator = job_scheduler.creator( job_factory = jfactory, queue_factory = qfactory, capacity = capacity, ) manager = creator.create() pfi = parallel_for.iterator( calculations = ( ( myfunction, args, {} ) for args in argstuples ), manager = manager, keep_input_order = False, ) for i, ( calc, res ) in enumerate(pfi): result = None errstr = None try: result = res() except Exception, e: tracestr = "" if stacktrace.exc_info()[1]: for inf in stacktrace.exc_info()[1]: tracestr += inf errstr = str(e) + "\n" + tracestr #calc[0] is the function name, calc[1] is the tuple of function arguments parmres = ( calc[1], result, errstr ) if i >= len(argstuples)-1: manager.shutdown() # clean up once the last calculation has returned manager.join() creator.destroy( manager = manager ) yield parmres # spit out results as they emerge
from libtbx.scheduling import parallel_for if callback is None: callback = lambda r: None results = [] with libtbx.scheduling.holder(creator=creator, stacktrace=sthandler) as manager: adfunc = iterable_type(func) try: pfi = parallel_for.iterator( calculations=((adfunc, (args, ), {}) for args in iterable), manager=manager, keep_input_order=preserve_order, ) for (calc, res) in pfi: result = res() results.append(result) callback(result) except SetupError, e: raise Sorry, e manager.shutdown() manager.join() return results
def parallel_map ( func, iterable, iterable_type = single_argument, params=None, processes=1, method="multiprocessing", qsub_command=None, asynchronous=True, callback=None, preserve_order=True, preserve_exception_message=False, use_manager=False, stacktrace_handling = "ignore") : """ Generic parallel map() implementation for a variety of platforms, including the multiprocessing module and supported queuing systems, via the module libtbx.queuing_system_utils.scheduling. This is less flexible than pool_map above, since it does not provide a way to use a non-pickleable target function, but it provides a consistent API for programs where multiple execution methods are desired. It will also work on Windows (if the method is multiprocessing or threading). Note that for most applications, the threading method will be constrained by the Global Interpreter Lock, therefore multiprocessing is prefered for parallelizing across a single multi-core system. See Computational Crystallography Newsletter 3:37-42 (2012) for details of the underlying method. :param func: target function (must be pickleable) :param iterable: list of arguments for func :param processes: number of processes/threads to start :param method: parallelization method (multiprocessing|threading|sge|lsf|pbs) :param qsub_command: command to submit queue jobs (optional) :param asynchronous: run queue jobs asynchronously :param preserve_exception_message: keeps original exception message :returns: a list of result objects """ if (params is not None) : method = params.technology processes = params.nproc qsub_command = params.qsub_command from libtbx.utils import Sorry from libtbx.scheduling import SetupError if processes == 1 and "LIBTBX_FORCE_PARALLEL" not in os.environ: from libtbx.scheduling import mainthread creator = mainthread.creator else: from libtbx.scheduling import philgen from libtbx.scheduling import job_scheduler if method == "threading": technology = philgen.threading( capture_exception = preserve_exception_message, ) jfactory = technology.jfactory() qfactory = technology.qfactory()[0] capacity = job_scheduler.limited( njobs = get_processes( processes = processes ) ) elif method == "multiprocessing": technology = philgen.multiprocessing( capture_stderr = preserve_exception_message, qtype = philgen.mp_managed_queue if use_manager else philgen.mp_fifo_queue, ) jfactory = technology.jfactory() qfactory = technology.qfactory()[0] capacity = job_scheduler.limited( njobs = get_processes( processes = processes ) ) else: technology = philgen.cluster( asynchronous = asynchronous, capture_stderr = preserve_exception_message, ) assert method in technology.platforms # perhaps something less intrusive try: jfactory = technology.jfactory( platform = method, command = qsub_command ) except SetupError as e: raise Sorry, e from libtbx.scheduling import file_queue qfactory = file_queue.qfactory() if processes is Auto or processes is None: capacity = job_scheduler.unlimited else: capacity = job_scheduler.limited( njobs = processes ) creator = job_scheduler.creator( job_factory = jfactory, queue_factory = qfactory, capacity = capacity, ) import libtbx.scheduling if stacktrace_handling == "ignore": sthandler = libtbx.scheduling.ignore elif stacktrace_handling == "excepthook": sthandler = libtbx.scheduling.excepthook elif stacktrace_handling == "decorate": sthandler = libtbx.scheduling.decorate else: raise Sorry, "Unknown stacktrace handling method: %s" % stacktrace_handling from libtbx.scheduling import parallel_for if callback is None: callback = lambda r: None results = [] with libtbx.scheduling.holder( creator = creator, stacktrace = sthandler ) as manager: adfunc = iterable_type( func ) try: pfi = parallel_for.iterator( calculations = ( ( adfunc, ( args, ), {} ) for args in iterable ), manager = manager, keep_input_order = preserve_order, ) for ( calc, res ) in pfi: result = res() results.append( result ) callback( result ) except SetupError as e: raise Sorry, e manager.shutdown() manager.join() return results
raise Sorry, "Unknown stacktrace handling method: %s" % stacktrace_handling from libtbx.scheduling import parallel_for if callback is None: callback = lambda r: None results = [] with libtbx.scheduling.holder( creator = creator, stacktrace = sthandler ) as manager: adfunc = iterable_type( func ) try: pfi = parallel_for.iterator( calculations = ( ( adfunc, ( args, ), {} ) for args in iterable ), manager = manager, keep_input_order = preserve_order, ) for ( calc, res ) in pfi: result = res() results.append( result ) callback( result ) except SetupError, e: raise Sorry, e manager.shutdown() manager.join() return results