Python SubprocPool Examples, pants.base.worker_pool.SubprocPool Python Examples

Example #1

0

Show file

File: run_tracker.py Project: megaserg/pants

  def end(self):
    """This pants run is over, so stop tracking it.

    Note: If end() has been called once, subsequent calls are no-ops.
    """
    if self._background_worker_pool:
      if self._aborted:
        self.log(Report.INFO, "Aborting background workers.")
        self._background_worker_pool.abort()
      else:
        self.log(Report.INFO, "Waiting for background workers to finish.")
        self._background_worker_pool.shutdown()
      self.end_workunit(self._background_root_workunit)

    SubprocPool.shutdown(self._aborted)

    # Run a dummy work unit to write out one last timestamp.
    with self.new_workunit("complete"):
      pass

    self.end_workunit(self._main_root_workunit)

    outcome = self._main_root_workunit.outcome()
    if self._background_root_workunit:
      outcome = min(outcome, self._background_root_workunit.outcome())
    outcome_str = WorkUnit.outcome_string(outcome)
    log_level = RunTracker._log_levels[outcome]
    self.log(log_level, outcome_str)

    if self.run_info.get_info('outcome') is None:
      # If the goal is clean-all then the run info dir no longer exists, so ignore that error.
      self.run_info.add_info('outcome', outcome_str, ignore_errors=True)

    self.report.close()
    self.store_stats()

Example #2

0

Show file

File: run_tracker.py Project: benjyw/pants

  def __init__(self, *args, **kwargs):
    """
    :API: public
    """
    super(RunTracker, self).__init__(*args, **kwargs)
    self._run_timestamp = time.time()
    self._cmd_line = ' '.join(['pants'] + sys.argv[1:])

    # Initialized in `initialize()`.
    self.run_info_dir = None
    self.run_info = None
    self.cumulative_timings = None
    self.self_timings = None
    self.artifact_cache_stats = None
    self.pantsd_stats = None

    # Initialized in `start()`.
    self.report = None
    self._main_root_workunit = None

    # A lock to ensure that adding to stats at the end of a workunit
    # operates thread-safely.
    self._stats_lock = threading.Lock()

    # Log of success/failure/aborted for each workunit.
    self.outcomes = {}

    # Number of threads for foreground work.
    self._num_foreground_workers = self.get_options().num_foreground_workers

    # Number of threads for background work.
    self._num_background_workers = self.get_options().num_background_workers

    # self._threadlocal.current_workunit contains the current workunit for the calling thread.
    # Note that multiple threads may share a name (e.g., all the threads in a pool).
    self._threadlocal = threading.local()

    # For background work.  Created lazily if needed.
    self._background_worker_pool = None
    self._background_root_workunit = None

    # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring).
    SubprocPool.set_num_processes(self._num_foreground_workers)
    SubprocPool.foreground()

    self._aborted = False

    # Data will be organized first by target and then scope.
    # Eg:
    # {
    #   'target/address:name': {
    #     'running_scope': {
    #       'run_duration': 356.09
    #     },
    #     'GLOBAL': {
    #       'target_type': 'pants.test'
    #     }
    #   }
    # }
    self._target_to_data = {}

Example #3

0

Show file

File: run_tracker.py Project: Gointer/pants

  def shutdown_worker_pool(self):
    """Shuts down the SubprocPool.

    N.B. This exists only for internal use and to afford for fork()-safe operation in pantsd.

    :API: public
    """
    SubprocPool.shutdown(self._aborted)

Example #4

0

Show file

File: run_tracker.py Project: cburroughs/pants

  def __init__(self, *args, **kwargs):
    super(RunTracker, self).__init__(*args, **kwargs)
    run_timestamp = time.time()
    cmd_line = ' '.join(['pants'] + sys.argv[1:])

    # run_id is safe for use in paths.
    millis = int((run_timestamp * 1000) % 1000)
    run_id = 'pants_run_{}_{}_{}'.format(
               time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(run_timestamp)), millis,
               uuid.uuid4().hex)

    info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope)
    self.run_info_dir = os.path.join(info_dir, run_id)
    self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info'))
    self.run_info.add_basic_info(run_id, run_timestamp)
    self.run_info.add_info('cmd_line', cmd_line)

    # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
    link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest')

    relative_symlink(self.run_info_dir, link_to_latest)

    # Time spent in a workunit, including its children.
    self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir,
                                                             'cumulative_timings'))

    # Time spent in a workunit, not including its children.
    self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings'))

    # Hit/miss stats for the artifact cache.
    self.artifact_cache_stats = \
      ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats'))

    # Number of threads for foreground work.
    self._num_foreground_workers = self.get_options().num_foreground_workers

    # Number of threads for background work.
    self._num_background_workers = self.get_options().num_background_workers

    # We report to this Report.
    self.report = None

    # self._threadlocal.current_workunit contains the current workunit for the calling thread.
    # Note that multiple threads may share a name (e.g., all the threads in a pool).
    self._threadlocal = threading.local()

    # For main thread work. Created on start().
    self._main_root_workunit = None

    # For background work.  Created lazily if needed.
    self._background_worker_pool = None
    self._background_root_workunit = None

    # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring).
    SubprocPool.set_num_processes(self._num_foreground_workers)
    SubprocPool.foreground()

    self._aborted = False

Example #5

0

Show file

File: run_tracker.py Project: dominichamon/pants

  def end(self):
    """This pants run is over, so stop tracking it.

    Note: If end() has been called once, subsequent calls are no-ops.
    """

    if self._background_worker_pool:
      if self._aborted:
        self.log(Report.INFO, "Aborting background workers.")
        self._background_worker_pool.abort()
      else:
        self.log(Report.INFO, "Waiting for background workers to finish.")
        self._background_worker_pool.shutdown()
      self.report.end_workunit(self._background_root_workunit)
      self._background_root_workunit.end()

    if self._foreground_worker_pool:
      if self._aborted:
        self.log(Report.INFO, "Aborting foreground workers.")
        self._foreground_worker_pool.abort()
      else:
        self.log(Report.INFO, "Waiting for foreground workers to finish.")
        self._foreground_worker_pool.shutdown()

    SubprocPool.shutdown(self._aborted)

    self.report.end_workunit(self._main_root_workunit)
    self._main_root_workunit.end()

    outcome = self._main_root_workunit.outcome()
    if self._background_root_workunit:
      outcome = min(outcome, self._background_root_workunit.outcome())
    outcome_str = WorkUnit.outcome_string(outcome)
    log_level = WorkUnit.choose_for_outcome(outcome, Report.ERROR, Report.ERROR,
                                            Report.WARN, Report.INFO, Report.INFO)
    self.log(log_level, outcome_str)

    if self.run_info.get_info('outcome') is None:
      try:
        self.run_info.add_info('outcome', outcome_str)
      except IOError:
        pass  # If the goal is clean-all then the run info dir no longer exists...

    self.report.close()
    self.upload_stats()

Example #6

0

Show file

File: context.py Project: amedina/pants

  def subproc_map(self, f, items):
    """Map function `f` over `items` in subprocesses and return the result.

      :param f: A multiproc-friendly (importable) work function.
      :param args: A iterable of pickleable arguments to f.
    """
    try:
      # Pool.map (and async_map().get() w/o timeout) can miss SIGINT.
      # See: http://stackoverflow.com/a/1408476, http://bugs.python.org/issue8844
      # Instead, we map_async(...), wait *with a timeout* until ready, then .get()
      # NB: in 2.x, wait() with timeout wakes up often to check, burning CPU. Oh well.
      res = SubprocPool.foreground().map_async(f, items)
      while not res.ready():
        res.wait(60) # Repeatedly wait for up to a minute.
        if not res.ready():
          self.log.debug('subproc_map result still not ready...')
      return res.get()
    except KeyboardInterrupt:
      SubprocPool.shutdown(True)
      raise

Example #7

0

Show file

File: context.py Project: amedina/pants

  def exec_on_subproc(self, f, args):
    """Send work to a subprocess and block on it.

       This can be used by existing background Work in a ThreadPool to sidestep the GIL:
       The Thread calls this method and still blocks until the work is complete, so
       existing reporting and accounting is unchanged, but the actual work is executed
       in a subprocess, avoiding lock contention.

       :param f: A multiproc-friendly (importable) work function.
       :param args: Multiproc-friendly (pickleable) arguments to f.
    """
    return SubprocPool.background().apply(f, args)

Example #8

0

Show file

File: run_tracker.py Project: dominichamon/pants

  def __init__(self,
               info_dir,
               stats_upload_url=None,
               stats_upload_timeout=2,
               num_foreground_workers=8,
               num_background_workers=8):
    self.run_timestamp = time.time()  # A double, so we get subsecond precision for ids.
    cmd_line = ' '.join(['./pants'] + sys.argv[1:])

    # run_id is safe for use in paths.
    millis = (self.run_timestamp * 1000) % 1000
    run_id = 'pants_run_%s_%d' % \
             (time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(self.run_timestamp)), millis)

    self.run_info_dir = os.path.join(info_dir, run_id)
    self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info'))
    self.run_info.add_basic_info(run_id, self.run_timestamp)
    self.run_info.add_info('cmd_line', cmd_line)
    self.stats_url = stats_upload_url
    self.stats_timeout = stats_upload_timeout

    # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists.
    link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest')

    try:
      if os.path.lexists(link_to_latest):
        os.unlink(link_to_latest)
      os.symlink(self.run_info_dir, link_to_latest)
    except OSError as e:
      # Another run may beat us to deletion or creation.
      if not (e.errno == errno.EEXIST or e.errno == errno.ENOENT):
        raise

    # Time spent in a workunit, including its children.
    self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir,
                                                             'cumulative_timings'))

    # Time spent in a workunit, not including its children.
    self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings'))

    # Hit/miss stats for the artifact cache.
    self.artifact_cache_stats = \
      ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats'))

    # Number of threads for foreground work.
    self._num_foreground_workers = num_foreground_workers

    # Number of threads for background work.
    self._num_background_workers = num_background_workers

    # We report to this Report.
    self.report = None

    # self._threadlocal.current_workunit contains the current workunit for the calling thread.
    # Note that multiple threads may share a name (e.g., all the threads in a pool).
    self._threadlocal = threading.local()

    # For main thread work. Created on start().
    self._main_root_workunit = None

    # For concurrent foreground work.  Created lazily if needed.
    # Associated with the main thread's root workunit.
    self._foreground_worker_pool = None

    # For background work.  Created lazily if needed.
    self._background_worker_pool = None
    self._background_root_workunit = None

    # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring)
    SubprocPool.foreground()

    self._aborted = False

Example #9

0

Show file

File: run_tracker.py Project: jakubbujny/pants

  def shutdown_worker_pool(self):
    """Shuts down the SubprocPool.

    N.B. This exists only for internal use and to afford for fork()-safe operation in pantsd.
    """
    SubprocPool.shutdown(self._aborted)

Example #10

0

Show file

File: run_tracker.py Project: pall-valmundsson/pants

    def __init__(self, *args, **kwargs):
        """
    :API: public
    """
        super(RunTracker, self).__init__(*args, **kwargs)
        self._run_timestamp = time.time()
        self._cmd_line = ' '.join(['pants'] + sys.argv[1:])
        self._sorted_goal_infos = tuple()

        # Initialized in `initialize()`.
        self.run_info_dir = None
        self.run_info = None
        self.cumulative_timings = None
        self.self_timings = None
        self.artifact_cache_stats = None
        self.pantsd_stats = None

        # Initialized in `start()`.
        self.report = None
        self._main_root_workunit = None
        self._all_options = None

        # A lock to ensure that adding to stats at the end of a workunit
        # operates thread-safely.
        self._stats_lock = threading.Lock()

        # Log of success/failure/aborted for each workunit.
        self.outcomes = {}

        # Number of threads for foreground work.
        self._num_foreground_workers = self.get_options(
        ).num_foreground_workers

        # Number of threads for background work.
        self._num_background_workers = self.get_options(
        ).num_background_workers

        # self._threadlocal.current_workunit contains the current workunit for the calling thread.
        # Note that multiple threads may share a name (e.g., all the threads in a pool).
        self._threadlocal = threading.local()

        # A logger facade that logs into this RunTracker.
        self._logger = RunTrackerLogger(self)

        # For background work.  Created lazily if needed.
        self._background_worker_pool = None
        self._background_root_workunit = None

        # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring).
        SubprocPool.set_num_processes(self._num_foreground_workers)
        SubprocPool.foreground()

        self._aborted = False

        # Data will be organized first by target and then scope.
        # Eg:
        # {
        #   'target/address:name': {
        #     'running_scope': {
        #       'run_duration': 356.09
        #     },
        #     'GLOBAL': {
        #       'target_type': 'pants.test'
        #     }
        #   }
        # }
        self._target_to_data = {}