Python iterator Examples, libtbx.scheduling.parallel_for.iterator Python Examples

Example #1

0

Show file

def multi_core_run( myfunction, argstuples, nproc ):
  """
  Run myfunction on many cpu cores using multiprocessing.
  A simplified version of parallel_map() above

  myfunction: name of the function to be parallelised,
  argstuples: list of tuples of associated input arguments,
  nproc: number of cores to run on.

  Both myfunction and its input arguments must be pickleable.

  Output is an iterator where each element is a tuple that contains:
  a tuple of arguments for one particular calculation with myfunction,
  the result of this calculation,
  the stacktrace if myfunction crashed

  Example:

  # define RunMyJob() in a file testjob.py
  def RunMyJob( foo, bar):
    import math
    return math.sqrt(foo)/bar

  # then one can start RunMyJob in parallel like:
  >>> import testjob
  >>> from libtbx import easy_mp
  >>>
  >>> argstuples = [( 3, 4), (2, 3) ] # define tuples of arguments
  >>>
  >>> for args, res, errstr in easy_mp.multi_core_run( testjob.RunMyJob, argstuples, 2):
  ...   print "arguments: %s \nresult: %s \nerrorstring: %s\n" %(args, res, errstr)
  ...
  arguments: (2, 3)
  result: 0.471404520791
  errorstring: None

  arguments: (3, 4)
  result: 0.433012701892
  errorstring: None

  >>>

  """
  from libtbx.scheduling import philgen
  from libtbx.scheduling import job_scheduler
  from libtbx.scheduling import parallel_for
  import libtbx.scheduling
  from libtbx.scheduling import stacktrace

  technology = philgen.multiprocessing(
    capture_stderr = True, # catch each individual error message and stack trace
    qtype = philgen.mp_managed_queue,
    )

  jfactory = technology.jfactory()
  qfactory = technology.qfactory()[0]
  capacity = job_scheduler.limited(
    njobs = get_processes( processes = nproc )
    )

  creator = job_scheduler.creator(
    job_factory = jfactory,
    queue_factory = qfactory,
    capacity = capacity,
    )

  manager = creator.create()
  pfi = parallel_for.iterator(
      calculations = ( ( myfunction, args, {} ) for args in argstuples ),
      manager = manager,
      keep_input_order = False,
      )

  for i, ( calc, res ) in enumerate(pfi):
    result = None
    errstr =  None
    try:
      result = res()
    except Exception, e:
      tracestr = ""
      if stacktrace.exc_info()[1]:
        for inf in stacktrace.exc_info()[1]:
          tracestr += inf
      errstr = str(e) + "\n" + tracestr
    #calc[0] is the function name, calc[1] is the tuple of function arguments
    parmres = ( calc[1], result, errstr )

    if i >= len(argstuples)-1:
      manager.shutdown() # clean up once the last calculation has returned
      manager.join()
      creator.destroy( manager = manager )

    yield parmres # spit out results as they emerge

Example #2

0

Show file

    from libtbx.scheduling import parallel_for

    if callback is None:
        callback = lambda r: None

    results = []

    with libtbx.scheduling.holder(creator=creator,
                                  stacktrace=sthandler) as manager:
        adfunc = iterable_type(func)

        try:
            pfi = parallel_for.iterator(
                calculations=((adfunc, (args, ), {}) for args in iterable),
                manager=manager,
                keep_input_order=preserve_order,
            )

            for (calc, res) in pfi:
                result = res()
                results.append(result)
                callback(result)

        except SetupError, e:
            raise Sorry, e

        manager.shutdown()
        manager.join()

    return results

Example #3

0

Show file

def parallel_map (
    func,
    iterable,
    iterable_type = single_argument,
    params=None,
    processes=1,
    method="multiprocessing",
    qsub_command=None,
    asynchronous=True,
    callback=None,
    preserve_order=True,
    preserve_exception_message=False,
    use_manager=False,
    stacktrace_handling = "ignore") :
  """
  Generic parallel map() implementation for a variety of platforms, including
  the multiprocessing module and supported queuing systems, via the module
  libtbx.queuing_system_utils.scheduling.  This is less flexible than pool_map
  above, since it does not provide a way to use a non-pickleable target
  function, but it provides a consistent API for programs where multiple
  execution methods are desired.  It will also work on Windows (if the method
  is multiprocessing or threading).

  Note that for most applications, the threading method will be constrained
  by the Global Interpreter Lock, therefore multiprocessing is prefered for
  parallelizing across a single multi-core system.

  See Computational Crystallography Newsletter 3:37-42 (2012) for details of
  the underlying method.

  :param func: target function (must be pickleable)
  :param iterable: list of arguments for func
  :param processes: number of processes/threads to start
  :param method: parallelization method (multiprocessing|threading|sge|lsf|pbs)
  :param qsub_command: command to submit queue jobs (optional)
  :param asynchronous: run queue jobs asynchronously
  :param preserve_exception_message: keeps original exception message
  :returns: a list of result objects
  """
  if (params is not None) :
    method = params.technology
    processes = params.nproc
    qsub_command = params.qsub_command

  from libtbx.utils import Sorry
  from libtbx.scheduling import SetupError

  if processes == 1 and "LIBTBX_FORCE_PARALLEL" not in os.environ:
    from libtbx.scheduling import mainthread
    creator = mainthread.creator

  else:
    from libtbx.scheduling import philgen
    from libtbx.scheduling import job_scheduler

    if method == "threading":
      technology = philgen.threading(
        capture_exception = preserve_exception_message,
        )
      jfactory = technology.jfactory()
      qfactory = technology.qfactory()[0]
      capacity = job_scheduler.limited(
        njobs = get_processes( processes = processes )
        )

    elif method == "multiprocessing":
      technology = philgen.multiprocessing(
        capture_stderr = preserve_exception_message,
        qtype = philgen.mp_managed_queue if use_manager else philgen.mp_fifo_queue,
        )
      jfactory = technology.jfactory()
      qfactory = technology.qfactory()[0]
      capacity = job_scheduler.limited(
        njobs = get_processes( processes = processes )
        )

    else:
      technology = philgen.cluster(
        asynchronous = asynchronous,
        capture_stderr = preserve_exception_message,
        )

      assert method in technology.platforms # perhaps something less intrusive

      try:
        jfactory = technology.jfactory( platform = method, command = qsub_command )

      except SetupError as e:
        raise Sorry, e

      from libtbx.scheduling import file_queue
      qfactory = file_queue.qfactory()

      if processes is Auto or processes is None:
        capacity = job_scheduler.unlimited

      else:
        capacity = job_scheduler.limited( njobs = processes )

    creator = job_scheduler.creator(
      job_factory = jfactory,
      queue_factory = qfactory,
      capacity = capacity,
      )

  import libtbx.scheduling

  if stacktrace_handling == "ignore":
    sthandler = libtbx.scheduling.ignore

  elif stacktrace_handling == "excepthook":
    sthandler = libtbx.scheduling.excepthook

  elif stacktrace_handling == "decorate":
    sthandler = libtbx.scheduling.decorate

  else:
    raise Sorry, "Unknown stacktrace handling method: %s" % stacktrace_handling

  from libtbx.scheduling import parallel_for

  if callback is None:
    callback = lambda r: None

  results = []

  with libtbx.scheduling.holder( creator = creator, stacktrace = sthandler ) as manager:
    adfunc = iterable_type( func )

    try:
      pfi = parallel_for.iterator(
        calculations = ( ( adfunc, ( args, ), {} ) for args in iterable ),
        manager = manager,
        keep_input_order = preserve_order,
        )

      for ( calc, res ) in pfi:
        result = res()
        results.append( result )
        callback( result )

    except SetupError as e:
      raise Sorry, e

    manager.shutdown()
    manager.join()

  return results

Example #4

0

Show file

File: easy_mp.py Project: cctbx/cctbx-playground

    raise Sorry, "Unknown stacktrace handling method: %s" % stacktrace_handling

  from libtbx.scheduling import parallel_for

  if callback is None:
    callback = lambda r: None

  results = []

  with libtbx.scheduling.holder( creator = creator, stacktrace = sthandler ) as manager:
    adfunc = iterable_type( func )

    try:
      pfi = parallel_for.iterator(
        calculations = ( ( adfunc, ( args, ), {} ) for args in iterable ),
        manager = manager,
        keep_input_order = preserve_order,
        )

      for ( calc, res ) in pfi:
        result = res()
        results.append( result )
        callback( result )

    except SetupError, e:
      raise Sorry, e

    manager.shutdown()
    manager.join()

  return results