コード例 #1
0
def parallel_map (
    func,
    iterable,
    iterable_type = single_argument,
    params=None,
    processes=1,
    method="multiprocessing",
    qsub_command=None,
    asynchronous=True,
    callback=None,
    preserve_order=True,
    preserve_exception_message=False,
    use_manager=False,
    stacktrace_handling = "ignore") :
  """
  Generic parallel map() implementation for a variety of platforms, including
  the multiprocessing module and supported queuing systems, via the module
  libtbx.queuing_system_utils.scheduling.  This is less flexible than pool_map
  above, since it does not provide a way to use a non-pickleable target
  function, but it provides a consistent API for programs where multiple
  execution methods are desired.  It will also work on Windows (if the method
  is multiprocessing or threading).

  Note that for most applications, the threading method will be constrained
  by the Global Interpreter Lock, therefore multiprocessing is prefered for
  parallelizing across a single multi-core system.

  See Computational Crystallography Newsletter 3:37-42 (2012) for details of
  the underlying method.

  :param func: target function (must be pickleable)
  :param iterable: list of arguments for func
  :param processes: number of processes/threads to start
  :param method: parallelization method (multiprocessing|threading|sge|lsf|pbs)
  :param qsub_command: command to submit queue jobs (optional)
  :param asynchronous: run queue jobs asynchronously
  :param preserve_exception_message: keeps original exception message
  :returns: a list of result objects
  """
  if (params is not None) :
    method = params.technology
    processes = params.nproc
    qsub_command = params.qsub_command

  from libtbx.utils import Sorry
  from libtbx.scheduling import SetupError

  if processes == 1 and "LIBTBX_FORCE_PARALLEL" not in os.environ:
    from libtbx.scheduling import mainthread
    creator = mainthread.creator

  else:
    from libtbx.scheduling import philgen
    from libtbx.scheduling import job_scheduler

    if method == "threading":
      technology = philgen.threading(
        capture_exception = preserve_exception_message,
        )
      jfactory = technology.jfactory()
      qfactory = technology.qfactory()[0]
      capacity = job_scheduler.limited(
        njobs = get_processes( processes = processes )
        )

    elif method == "multiprocessing":
      technology = philgen.multiprocessing(
        capture_stderr = preserve_exception_message,
        qtype = philgen.mp_managed_queue if use_manager else philgen.mp_fifo_queue,
        )
      jfactory = technology.jfactory()
      qfactory = technology.qfactory()[0]
      capacity = job_scheduler.limited(
        njobs = get_processes( processes = processes )
        )

    else:
      technology = philgen.cluster(
        asynchronous = asynchronous,
        capture_stderr = preserve_exception_message,
        )

      assert method in technology.platforms # perhaps something less intrusive

      try:
        jfactory = technology.jfactory( platform = method, command = qsub_command )

      except SetupError as e:
        raise Sorry, e

      from libtbx.scheduling import file_queue
      qfactory = file_queue.qfactory()

      if processes is Auto or processes is None:
        capacity = job_scheduler.unlimited

      else:
        capacity = job_scheduler.limited( njobs = processes )

    creator = job_scheduler.creator(
      job_factory = jfactory,
      queue_factory = qfactory,
      capacity = capacity,
      )

  import libtbx.scheduling

  if stacktrace_handling == "ignore":
    sthandler = libtbx.scheduling.ignore

  elif stacktrace_handling == "excepthook":
    sthandler = libtbx.scheduling.excepthook

  elif stacktrace_handling == "decorate":
    sthandler = libtbx.scheduling.decorate

  else:
    raise Sorry, "Unknown stacktrace handling method: %s" % stacktrace_handling

  from libtbx.scheduling import parallel_for

  if callback is None:
    callback = lambda r: None

  results = []

  with libtbx.scheduling.holder( creator = creator, stacktrace = sthandler ) as manager:
    adfunc = iterable_type( func )

    try:
      pfi = parallel_for.iterator(
        calculations = ( ( adfunc, ( args, ), {} ) for args in iterable ),
        manager = manager,
        keep_input_order = preserve_order,
        )

      for ( calc, res ) in pfi:
        result = res()
        results.append( result )
        callback( result )

    except SetupError as e:
      raise Sorry, e

    manager.shutdown()
    manager.join()

  return results
コード例 #2
0
ファイル: easy_mp.py プロジェクト: cctbx/cctbx-playground
def parallel_map (
    func,
    iterable,
    iterable_type = single_argument,
    params=None,
    processes=1,
    method="multiprocessing",
    qsub_command=None,
    asynchronous=True,
    callback=None,
    preserve_order=True,
    preserve_exception_message=False,
    use_manager=False,
    stacktrace_handling = "ignore") :
  """
  Generic parallel map() implementation for a variety of platforms, including
  the multiprocessing module and supported queuing systems, via the module
  libtbx.queuing_system_utils.scheduling.  This is less flexible than pool_map
  above, since it does not provide a way to use a non-pickleable target
  function, but it provides a consistent API for programs where multiple
  execution methods are desired.  It will also work on Windows (if the method
  is multiprocessing or threading).

  Note that for most applications, the threading method will be constrained
  by the Global Interpreter Lock, therefore multiprocessing is prefered for
  parallelizing across a single multi-core system.

  See Computational Crystallography Newsletter 3:37-42 (2012) for details of
  the underlying method.

  :param func: target function (must be pickleable)
  :param iterable: list of arguments for func
  :param processes: number of processes/threads to start
  :param method: parallelization method (multiprocessing|threading|sge|lsf|pbs)
  :param qsub_command: command to submit queue jobs (optional)
  :param asynchronous: run queue jobs asynchronously
  :param preserve_exception_message: keeps original exception message
  :returns: a list of result objects
  """
  if (params is not None) :
    method = params.technology
    processes = params.nproc
    qsub_command = params.qsub_command

  from libtbx.utils import Sorry
  from libtbx.scheduling import SetupError

  if processes == 1 and "LIBTBX_FORCE_PARALLEL" not in os.environ:
    from libtbx.scheduling import mainthread
    creator = mainthread.creator

  else:
    from libtbx.scheduling import philgen
    from libtbx.scheduling import job_scheduler

    if method == "threading":
      technology = philgen.threading(
        capture_exception = preserve_exception_message,
        )
      jfactory = technology.jfactory()
      qfactory = technology.qfactory()[0]
      capacity = job_scheduler.limited(
        njobs = get_processes( processes = processes )
        )

    elif method == "multiprocessing":
      technology = philgen.multiprocessing(
        capture_stderr = preserve_exception_message,
        qtype = philgen.mp_managed_queue if use_manager else philgen.mp_fifo_queue,
        )
      jfactory = technology.jfactory()
      qfactory = technology.qfactory()[0]
      capacity = job_scheduler.limited(
        njobs = get_processes( processes = processes )
        )

    else:
      technology = philgen.cluster(
        asynchronous = asynchronous,
        capture_stderr = preserve_exception_message,
        )

      assert method in technology.platforms # perhaps something less intrusive

      try:
        jfactory = technology.jfactory( platform = method, command = qsub_command )

      except SetupError, e:
        raise Sorry, e

      from libtbx.scheduling import file_queue
      qfactory = file_queue.qfactory()

      if processes is Auto or processes is None:
        capacity = job_scheduler.unlimited

      else:
        capacity = job_scheduler.limited( njobs = processes )

    creator = job_scheduler.creator(
      job_factory = jfactory,
      queue_factory = qfactory,
      capacity = capacity,
      )